aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2013-06-25 01:27:07 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2013-08-07 03:46:49 -0400
commit543810eb67bea9c3046ecb58388493bca39fe796 (patch)
treecf65010367e53dfbd3e39a9eb6e89dacf92348f3
parent1412c8b72e192a14b8dd620f58a75f55a5490783 (diff)
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux.
-rw-r--r--include/litmus/affinity.h80
-rw-r--r--include/litmus/bheap.h77
-rw-r--r--include/litmus/binheap.h205
-rw-r--r--include/litmus/budget.h36
-rw-r--r--include/litmus/clustered.h44
-rw-r--r--include/litmus/edf_common.h25
-rw-r--r--include/litmus/fdso.h77
-rw-r--r--include/litmus/fp_common.h105
-rw-r--r--include/litmus/fpmath.h147
-rw-r--r--include/litmus/jobs.h8
-rw-r--r--include/litmus/litmus.h255
-rw-r--r--include/litmus/litmus_proc.h25
-rw-r--r--include/litmus/locking.h28
-rw-r--r--include/litmus/preempt.h164
-rw-r--r--include/litmus/rt_domain.h182
-rw-r--r--include/litmus/rt_param.h12
-rw-r--r--include/litmus/sched_plugin.h114
-rw-r--r--include/litmus/srp.h28
-rw-r--r--include/litmus/unistd_32.h21
-rw-r--r--include/litmus/unistd_64.h33
-rw-r--r--include/litmus/wait.h57
-rw-r--r--kernel/sched/litmus.c350
-rw-r--r--litmus/Kconfig165
-rw-r--r--litmus/Makefile19
-rw-r--r--litmus/affinity.c41
-rw-r--r--litmus/bheap.c316
-rw-r--r--litmus/binheap.c387
-rw-r--r--litmus/budget.c113
-rw-r--r--litmus/clustered.c111
-rw-r--r--litmus/ctrldev.c160
-rw-r--r--litmus/edf_common.c200
-rw-r--r--litmus/fdso.c307
-rw-r--r--litmus/fp_common.c119
-rw-r--r--litmus/jobs.c55
-rw-r--r--litmus/litmus.c576
-rw-r--r--litmus/litmus_proc.c407
-rw-r--r--litmus/locking.c188
-rw-r--r--litmus/preempt.c137
-rw-r--r--litmus/rt_domain.c348
-rw-r--r--litmus/sched_plugin.c224
-rw-r--r--litmus/srp.c305
-rw-r--r--litmus/sync.c166
42 files changed, 6411 insertions, 6 deletions
diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
new file mode 100644
index 000000000000..ca2e442eb547
--- /dev/null
+++ b/include/litmus/affinity.h
@@ -0,0 +1,80 @@
1#ifndef __LITMUS_AFFINITY_H
2#define __LITMUS_AFFINITY_H
3
4#include <linux/cpumask.h>
5
6/*
7 L1 (instr) = depth 0
8 L1 (data) = depth 1
9 L2 = depth 2
10 L3 = depth 3
11 */
12#define NUM_CACHE_LEVELS 4
13
14struct neighborhood
15{
16 unsigned int size[NUM_CACHE_LEVELS];
17 cpumask_var_t neighbors[NUM_CACHE_LEVELS];
18};
19
20/* topology info is stored redundently in a big array for fast lookups */
21extern struct neighborhood neigh_info[NR_CPUS];
22
23void init_topology(void); /* called by Litmus module's _init_litmus() */
24
25/* Works like:
26void get_nearest_available_cpu(
27 cpu_entry_t **nearest,
28 cpu_entry_t *start,
29 cpu_entry_t *entries,
30 int release_master)
31
32Set release_master = NO_CPU for no Release Master.
33
34We use a macro here to exploit the fact that C-EDF and G-EDF
35have similar structures for their cpu_entry_t structs, even though
36they do not share a common base-struct. The macro allows us to
37avoid code duplication.
38
39TODO: Factor out the job-to-processor linking from C/G-EDF into
40a reusable "processor mapping". (See B.B.'s RTSS'09 paper &
41dissertation.)
42 */
43#define get_nearest_available_cpu(nearest, start, entries, release_master) \
44{ \
45 (nearest) = NULL; \
46 if (!(start)->linked) { \
47 (nearest) = (start); \
48 } else { \
49 int __level; \
50 int __cpu; \
51 int __release_master = ((release_master) == NO_CPU) ? -1 : (release_master); \
52 struct neighborhood *__neighbors = &neigh_info[(start)->cpu]; \
53 \
54 for (__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) { \
55 if (__neighbors->size[__level] > 1) { \
56 for_each_cpu(__cpu, __neighbors->neighbors[__level]) { \
57 if (__cpu != __release_master) { \
58 cpu_entry_t *__entry = &per_cpu((entries), __cpu); \
59 if (!__entry->linked) { \
60 (nearest) = __entry; \
61 break; \
62 } \
63 } \
64 } \
65 } else if (__neighbors->size[__level] == 0) { \
66 break; \
67 } \
68 } \
69 } \
70 \
71 if ((nearest)) { \
72 TRACE("P%d is closest available CPU to P%d\n", \
73 (nearest)->cpu, (start)->cpu); \
74 } else { \
75 TRACE("Could not find an available CPU close to P%d\n", \
76 (start)->cpu); \
77 } \
78}
79
80#endif
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
new file mode 100644
index 000000000000..cf4864a498d8
--- /dev/null
+++ b/include/litmus/bheap.h
@@ -0,0 +1,77 @@
1/* bheaps.h -- Binomial Heaps
2 *
3 * (c) 2008, 2009 Bjoern Brandenburg
4 */
5
6#ifndef BHEAP_H
7#define BHEAP_H
8
9#define NOT_IN_HEAP UINT_MAX
10
11struct bheap_node {
12 struct bheap_node* parent;
13 struct bheap_node* next;
14 struct bheap_node* child;
15
16 unsigned int degree;
17 void* value;
18 struct bheap_node** ref;
19};
20
21struct bheap {
22 struct bheap_node* head;
23 /* We cache the minimum of the heap.
24 * This speeds up repeated peek operations.
25 */
26 struct bheap_node* min;
27};
28
29typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
30
31void bheap_init(struct bheap* heap);
32void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
33
34static inline int bheap_node_in_heap(struct bheap_node* h)
35{
36 return h->degree != NOT_IN_HEAP;
37}
38
39static inline int bheap_empty(struct bheap* heap)
40{
41 return heap->head == NULL && heap->min == NULL;
42}
43
44/* insert (and reinitialize) a node into the heap */
45void bheap_insert(bheap_prio_t higher_prio,
46 struct bheap* heap,
47 struct bheap_node* node);
48
49/* merge addition into target */
50void bheap_union(bheap_prio_t higher_prio,
51 struct bheap* target,
52 struct bheap* addition);
53
54struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
55 struct bheap* heap);
56
57struct bheap_node* bheap_take(bheap_prio_t higher_prio,
58 struct bheap* heap);
59
60void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
61int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
62
63void bheap_delete(bheap_prio_t higher_prio,
64 struct bheap* heap,
65 struct bheap_node* node);
66
67/* allocate from memcache */
68struct bheap_node* bheap_node_alloc(int gfp_flags);
69void bheap_node_free(struct bheap_node* hn);
70
71/* allocate a heap node for value and insert into the heap */
72int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
73 void* value, int gfp_flags);
74
75void* bheap_take_del(bheap_prio_t higher_prio,
76 struct bheap* heap);
77#endif
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 000000000000..1cf364701da8
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,205 @@
1#ifndef LITMUS_BINARY_HEAP_H
2#define LITMUS_BINARY_HEAP_H
3
4#include <linux/kernel.h>
5
6/**
7 * Simple binary heap with add, arbitrary delete, delete_root, and top
8 * operations.
9 *
10 * Style meant to conform with list.h.
11 *
12 * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
13 * heap may be overkill (and perhaps not general enough) for some applications.
14 *
15 * Note: In order to make node swaps fast, a node inserted with a data pointer
16 * may not always hold said data pointer. This is similar to the binomial heap
17 * implementation. This does make node deletion tricky since we have to
18 * (1) locate the node that holds the data pointer to delete, and (2) the
19 * node that was originally inserted with said data pointer. These have to be
20 * coalesced into a single node before removal (see usage of
21 * __binheap_safe_swap()). We have to track node references to accomplish this.
22 */
23
24struct binheap_node {
25 void *data;
26 struct binheap_node *parent;
27 struct binheap_node *left;
28 struct binheap_node *right;
29
30 /* pointer to binheap_node that holds *data for which this binheap_node
31 * was originally inserted. (*data "owns" this node)
32 */
33 struct binheap_node *ref;
34 struct binheap_node **ref_ptr;
35};
36
37/**
38 * Signature of compator function. Assumed 'less-than' (min-heap).
39 * Pass in 'greater-than' for max-heap.
40 *
41 * TODO: Consider macro-based implementation that allows comparator to be
42 * inlined (similar to Linux red/black tree) for greater efficiency.
43 */
44typedef int (*binheap_order_t)(struct binheap_node *a,
45 struct binheap_node *b);
46
47
48struct binheap {
49 struct binheap_node *root;
50
51 /* pointer to node to take next inserted child */
52 struct binheap_node *next;
53
54 /* pointer to last node in complete binary tree */
55 struct binheap_node *last;
56
57 /* comparator function pointer */
58 binheap_order_t compare;
59};
60
61
62/* Initialized heap nodes not in a heap have parent
63 * set to BINHEAP_POISON.
64 */
65#define BINHEAP_POISON ((void*)(0xdeadbeef))
66
67
68/**
69 * binheap_entry - get the struct for this heap node.
70 * Only valid when called upon heap nodes other than the root handle.
71 * @ptr: the heap node.
72 * @type: the type of struct pointed to by binheap_node::data.
73 * @member: unused.
74 */
75#define binheap_entry(ptr, type, member) \
76((type *)((ptr)->data))
77
78/**
79 * binheap_node_container - get the struct that contains this node.
80 * Only valid when called upon heap nodes other than the root handle.
81 * @ptr: the heap node.
82 * @type: the type of struct the node is embedded in.
83 * @member: the name of the binheap_struct within the (type) struct.
84 */
85#define binheap_node_container(ptr, type, member) \
86container_of((ptr), type, member)
87
88/**
89 * binheap_top_entry - get the struct for the node at the top of the heap.
90 * Only valid when called upon the heap handle node.
91 * @ptr: the special heap-handle node.
92 * @type: the type of the struct the head is embedded in.
93 * @member: the name of the binheap_struct within the (type) struct.
94 */
95#define binheap_top_entry(ptr, type, member) \
96binheap_entry((ptr)->root, type, member)
97
98/**
99 * binheap_delete_root - remove the root element from the heap.
100 * @handle: handle to the heap.
101 * @type: the type of the struct the head is embedded in.
102 * @member: the name of the binheap_struct within the (type) struct.
103 */
104#define binheap_delete_root(handle, type, member) \
105__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
106
107/**
108 * binheap_delete - remove an arbitrary element from the heap.
109 * @to_delete: pointer to node to be removed.
110 * @handle: handle to the heap.
111 */
112#define binheap_delete(to_delete, handle) \
113__binheap_delete((to_delete), (handle))
114
115/**
116 * binheap_add - insert an element to the heap
117 * new_node: node to add.
118 * @handle: handle to the heap.
119 * @type: the type of the struct the head is embedded in.
120 * @member: the name of the binheap_struct within the (type) struct.
121 */
122#define binheap_add(new_node, handle, type, member) \
123__binheap_add((new_node), (handle), container_of((new_node), type, member))
124
125/**
126 * binheap_decrease - re-eval the position of a node (based upon its
127 * original data pointer).
128 * @handle: handle to the heap.
129 * @orig_node: node that was associated with the data pointer
130 * (whose value has changed) when said pointer was
131 * added to the heap.
132 */
133#define binheap_decrease(orig_node, handle) \
134__binheap_decrease((orig_node), (handle))
135
136#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
137
138#define BINHEAP_NODE(name) \
139 struct binheap_node name = BINHEAP_NODE_INIT()
140
141
142static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
143{
144 n->data = NULL;
145 n->parent = BINHEAP_POISON;
146 n->left = NULL;
147 n->right = NULL;
148 n->ref = NULL;
149 n->ref_ptr = NULL;
150}
151
152static inline void INIT_BINHEAP_HANDLE(struct binheap *handle,
153 binheap_order_t compare)
154{
155 handle->root = NULL;
156 handle->next = NULL;
157 handle->last = NULL;
158 handle->compare = compare;
159}
160
161/* Returns true if binheap is empty. */
162static inline int binheap_empty(struct binheap *handle)
163{
164 return(handle->root == NULL);
165}
166
167/* Returns true if binheap node is in a heap. */
168static inline int binheap_is_in_heap(struct binheap_node *node)
169{
170 return (node->parent != BINHEAP_POISON);
171}
172
173/* Returns true if binheap node is in given heap. */
174int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap);
175
176/* Add a node to a heap */
177void __binheap_add(struct binheap_node *new_node,
178 struct binheap *handle,
179 void *data);
180
181/**
182 * Removes the root node from the heap. The node is removed after coalescing
183 * the binheap_node with its original data pointer at the root of the tree.
184 *
185 * The 'last' node in the tree is then swapped up to the root and bubbled
186 * down.
187 */
188void __binheap_delete_root(struct binheap *handle,
189 struct binheap_node *container);
190
191/**
192 * Delete an arbitrary node. Bubble node to delete up to the root,
193 * and then delete to root.
194 */
195void __binheap_delete(struct binheap_node *node_to_delete,
196 struct binheap *handle);
197
198/**
199 * Bubble up a node whose pointer has decreased in value.
200 */
201void __binheap_decrease(struct binheap_node *orig_node,
202 struct binheap *handle);
203
204
205#endif
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
new file mode 100644
index 000000000000..a766781b87d4
--- /dev/null
+++ b/include/litmus/budget.h
@@ -0,0 +1,36 @@
1#ifndef _LITMUS_BUDGET_H_
2#define _LITMUS_BUDGET_H_
3
4/* Update the per-processor enforcement timer (arm/reproram/cancel) for
5 * the next task. */
6void update_enforcement_timer(struct task_struct* t);
7
8inline static int budget_exhausted(struct task_struct* t)
9{
10 return get_exec_time(t) >= get_exec_cost(t);
11}
12
13inline static lt_t budget_remaining(struct task_struct* t)
14{
15 if (!budget_exhausted(t))
16 return get_exec_cost(t) - get_exec_time(t);
17 else
18 /* avoid overflow */
19 return 0;
20}
21
22#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
23
24#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
25 == PRECISE_ENFORCEMENT)
26
27static inline int requeue_preempted_job(struct task_struct* t)
28{
29 /* Add task to ready queue only if not subject to budget enforcement or
30 * if the job has budget remaining. t may be NULL.
31 */
32 return t && !is_completed(t) && !tsk_rt(t)->dont_requeue
33 && (!budget_exhausted(t) || !budget_enforced(t));
34}
35
36#endif
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
new file mode 100644
index 000000000000..0c18dcb15e6c
--- /dev/null
+++ b/include/litmus/clustered.h
@@ -0,0 +1,44 @@
1#ifndef CLUSTERED_H
2#define CLUSTERED_H
3
4/* Which cache level should be used to group CPUs into clusters?
5 * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
6 * global scheduling).
7 */
8enum cache_level {
9 GLOBAL_CLUSTER = 0,
10 L1_CLUSTER = 1,
11 L2_CLUSTER = 2,
12 L3_CLUSTER = 3
13};
14
15int parse_cache_level(const char *str, enum cache_level *level);
16const char* cache_level_name(enum cache_level level);
17
18/* expose a cache level in a /proc dir */
19struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
20 enum cache_level* level);
21
22
23
24struct scheduling_cluster {
25 unsigned int id;
26 /* list of CPUs that are part of this cluster */
27 struct list_head cpus;
28};
29
30struct cluster_cpu {
31 unsigned int id; /* which CPU is this? */
32 struct list_head cluster_list; /* List of the CPUs in this cluster. */
33 struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
34};
35
36int get_cluster_size(enum cache_level level);
37
38int assign_cpus_to_clusters(enum cache_level level,
39 struct scheduling_cluster* clusters[],
40 unsigned int num_clusters,
41 struct cluster_cpu* cpus[],
42 unsigned int num_cpus);
43
44#endif
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 000000000000..bbaf22ea7f12
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_COMMON_H__
11#define __UNC_EDF_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 000000000000..f2115b83f1e4
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,77 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13#include <linux/slab.h>
14
15#define MAX_OBJECT_DESCRIPTORS 85
16
17typedef enum {
18 MIN_OBJ_TYPE = 0,
19
20 FMLP_SEM = 0,
21 SRP_SEM = 1,
22
23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26
27 PCP_SEM = 5,
28
29 MAX_OBJ_TYPE = 5
30} obj_type_t;
31
32struct inode_obj_id {
33 struct list_head list;
34 atomic_t count;
35 struct inode* inode;
36
37 obj_type_t type;
38 void* obj;
39 unsigned int id;
40};
41
42struct fdso_ops;
43
44struct od_table_entry {
45 unsigned int used;
46
47 struct inode_obj_id* obj;
48 const struct fdso_ops* class;
49};
50
51struct fdso_ops {
52 int (*create)(void** obj_ref, obj_type_t type, void* __user);
53 void (*destroy)(obj_type_t type, void*);
54 int (*open) (struct od_table_entry*, void* __user);
55 int (*close) (struct od_table_entry*);
56};
57
58/* translate a userspace supplied od into the raw table entry
59 * returns NULL if od is invalid
60 */
61struct od_table_entry* get_entry_for_od(int od);
62
63/* translate a userspace supplied od into the associated object
64 * returns NULL if od is invalid
65 */
66static inline void* od_lookup(int od, obj_type_t type)
67{
68 struct od_table_entry* e = get_entry_for_od(od);
69 return e && e->obj->type == type ? e->obj->obj : NULL;
70}
71
72#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
73#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
74#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
75
76
77#endif
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 000000000000..19356c0fa6c1
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,105 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60 BUG_ON(index >= LITMUS_MAX_PRIORITY);
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..642de98542c8
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,147 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#include <linux/math64.h>
5
6#ifndef __KERNEL__
7#include <stdint.h>
8#define abs(x) (((x) < 0) ? -(x) : x)
9#endif
10
11// Use 64-bit because we want to track things at the nanosecond scale.
12// This can lead to very large numbers.
13typedef int64_t fpbuf_t;
14typedef struct
15{
16 fpbuf_t val;
17} fp_t;
18
19#define FP_SHIFT 10
20#define ROUND_BIT (FP_SHIFT - 1)
21
22#define _fp(x) ((fp_t) {x})
23
24#ifdef __KERNEL__
25static const fp_t LITMUS_FP_ZERO = {.val = 0};
26static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
27#endif
28
29static inline fp_t FP(fpbuf_t x)
30{
31 return _fp(((fpbuf_t) x) << FP_SHIFT);
32}
33
34/* divide two integers to obtain a fixed point value */
35static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
36{
37 return _fp(div64_s64(FP(a).val, (b)));
38}
39
40static inline fpbuf_t _point(fp_t x)
41{
42 return (x.val % (1 << FP_SHIFT));
43
44}
45
46#define fp2str(x) x.val
47/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
48#define _FP_ "%ld/1024"
49
50static inline fpbuf_t _floor(fp_t x)
51{
52 return x.val >> FP_SHIFT;
53}
54
55/* FIXME: negative rounding */
56static inline fpbuf_t _round(fp_t x)
57{
58 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
59}
60
61/* multiply two fixed point values */
62static inline fp_t _mul(fp_t a, fp_t b)
63{
64 return _fp((a.val * b.val) >> FP_SHIFT);
65}
66
67static inline fp_t _div(fp_t a, fp_t b)
68{
69#if !defined(__KERNEL__) && !defined(unlikely)
70#define unlikely(x) (x)
71#define DO_UNDEF_UNLIKELY
72#endif
73 /* try not to overflow */
74 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
75 return _fp((a.val / b.val) << FP_SHIFT);
76 else
77 return _fp((a.val << FP_SHIFT) / b.val);
78#ifdef DO_UNDEF_UNLIKELY
79#undef unlikely
80#undef DO_UNDEF_UNLIKELY
81#endif
82}
83
84static inline fp_t _add(fp_t a, fp_t b)
85{
86 return _fp(a.val + b.val);
87}
88
89static inline fp_t _sub(fp_t a, fp_t b)
90{
91 return _fp(a.val - b.val);
92}
93
94static inline fp_t _neg(fp_t x)
95{
96 return _fp(-x.val);
97}
98
99static inline fp_t _abs(fp_t x)
100{
101 return _fp(abs(x.val));
102}
103
104/* works the same as casting float/double to integer */
105static inline fpbuf_t _fp_to_integer(fp_t x)
106{
107 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
108}
109
110static inline fp_t _integer_to_fp(fpbuf_t x)
111{
112 return _frac(x,1);
113}
114
115static inline int _leq(fp_t a, fp_t b)
116{
117 return a.val <= b.val;
118}
119
120static inline int _geq(fp_t a, fp_t b)
121{
122 return a.val >= b.val;
123}
124
125static inline int _lt(fp_t a, fp_t b)
126{
127 return a.val < b.val;
128}
129
130static inline int _gt(fp_t a, fp_t b)
131{
132 return a.val > b.val;
133}
134
135static inline int _eq(fp_t a, fp_t b)
136{
137 return a.val == b.val;
138}
139
140static inline fp_t _max(fp_t a, fp_t b)
141{
142 if (a.val < b.val)
143 return b;
144 else
145 return a;
146}
147#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 000000000000..e1507d4314b8
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,8 @@
1#ifndef __LITMUS_JOBS_H__
2#define __LITMUS_JOBS_H__
3
4void prepare_for_next_period(struct task_struct *t);
5void release_at(struct task_struct *t, lt_t start);
6long complete_job(void);
7
8#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index c87863c9b231..e35c38c4c0a2 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -6,7 +6,41 @@
6#ifndef _LINUX_LITMUS_H_ 6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_ 7#define _LINUX_LITMUS_H_
8 8
9#include <litmus/debug_trace.h>
10
11#ifdef CONFIG_RELEASE_MASTER
12extern atomic_t release_master_cpu;
13#endif
14
15/* in_list - is a given list_head queued on some list?
16 */
17static inline int in_list(struct list_head* list)
18{
19 return !( /* case 1: deleted */
20 (list->next == LIST_POISON1 &&
21 list->prev == LIST_POISON2)
22 ||
23 /* case 2: initialized */
24 (list->next == list &&
25 list->prev == list)
26 );
27}
28
29struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
30
31#define NO_CPU 0xffffffff
32
33void litmus_fork(struct task_struct *tsk);
34void litmus_exec(void);
35/* clean up real-time state of a task */
36void exit_litmus(struct task_struct *dead_tsk);
37
38long litmus_admit_task(struct task_struct *tsk);
39void litmus_exit_task(struct task_struct *tsk);
40
9#define is_realtime(t) ((t)->policy == SCHED_LITMUS) 41#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
42#define rt_transition_pending(t) \
43 ((t)->rt_param.transition_pending)
10 44
11#define tsk_rt(t) (&(t)->rt_param) 45#define tsk_rt(t) (&(t)->rt_param)
12 46
@@ -28,6 +62,7 @@
28#define get_partition(t) (tsk_rt(t)->task_params.cpu) 62#define get_partition(t) (tsk_rt(t)->task_params.cpu)
29#define get_priority(t) (tsk_rt(t)->task_params.priority) 63#define get_priority(t) (tsk_rt(t)->task_params.priority)
30#define get_class(t) (tsk_rt(t)->task_params.cls) 64#define get_class(t) (tsk_rt(t)->task_params.cls)
65#define get_release_policy(t) (tsk_rt(t)->task_params.release_policy)
31 66
32/* job_param macros */ 67/* job_param macros */
33#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) 68#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
@@ -35,6 +70,15 @@
35#define get_release(t) (tsk_rt(t)->job_params.release) 70#define get_release(t) (tsk_rt(t)->job_params.release)
36#define get_lateness(t) (tsk_rt(t)->job_params.lateness) 71#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
37 72
73/* release policy macros */
74#define is_periodic(t) (get_release_policy(t) == TASK_PERIODIC)
75#define is_sporadic(t) (get_release_policy(t) == TASK_SPORADIC)
76#ifdef CONFIG_ALLOW_EARLY_RELEASE
77#define is_early_releasing(t) (get_release_policy(t) == TASK_EARLY)
78#else
79#define is_early_releasing(t) (0)
80#endif
81
38#define is_hrt(t) \ 82#define is_hrt(t) \
39 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) 83 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
40#define is_srt(t) \ 84#define is_srt(t) \
@@ -48,6 +92,192 @@ static inline lt_t litmus_clock(void)
48 return ktime_to_ns(ktime_get()); 92 return ktime_to_ns(ktime_get());
49} 93}
50 94
95/* A macro to convert from nanoseconds to ktime_t. */
96#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
97
98#define get_domain(t) (tsk_rt(t)->domain)
99
100/* Honor the flag in the preempt_count variable that is set
101 * when scheduling is in progress.
102 */
103#define is_running(t) \
104 ((t)->state == TASK_RUNNING || \
105 task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
106
107#define is_blocked(t) \
108 (!is_running(t))
109#define is_released(t, now) \
110 (lt_before_eq(get_release(t), now))
111#define is_tardy(t, now) \
112 (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
113
114/* real-time comparison macros */
115#define earlier_deadline(a, b) (lt_before(\
116 (a)->rt_param.job_params.deadline,\
117 (b)->rt_param.job_params.deadline))
118#define earlier_release(a, b) (lt_before(\
119 (a)->rt_param.job_params.release,\
120 (b)->rt_param.job_params.release))
121
122void preempt_if_preemptable(struct task_struct* t, int on_cpu);
123
124#ifdef CONFIG_LITMUS_LOCKING
125void srp_ceiling_block(void);
126#else
127#define srp_ceiling_block() /* nothing */
128#endif
129
130#define bheap2task(hn) ((struct task_struct*) hn->value)
131
132#ifdef CONFIG_NP_SECTION
133
134static inline int is_kernel_np(struct task_struct *t)
135{
136 return tsk_rt(t)->kernel_np;
137}
138
139static inline int is_user_np(struct task_struct *t)
140{
141 return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
142}
143
144static inline void request_exit_np(struct task_struct *t)
145{
146 if (is_user_np(t)) {
147 /* Set the flag that tells user space to call
148 * into the kernel at the end of a critical section. */
149 if (likely(tsk_rt(t)->ctrl_page)) {
150 TRACE_TASK(t, "setting delayed_preemption flag\n");
151 tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
152 }
153 }
154}
155
156static inline void make_np(struct task_struct *t)
157{
158 tsk_rt(t)->kernel_np++;
159}
160
161/* Caller should check if preemption is necessary when
162 * the function return 0.
163 */
164static inline int take_np(struct task_struct *t)
165{
166 return --tsk_rt(t)->kernel_np;
167}
168
169/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
170static inline int request_exit_np_atomic(struct task_struct *t)
171{
172 union np_flag old, new;
173
174 if (tsk_rt(t)->ctrl_page) {
175 old.raw = tsk_rt(t)->ctrl_page->sched.raw;
176 if (old.np.flag == 0) {
177 /* no longer non-preemptive */
178 return 0;
179 } else if (old.np.preempt) {
180 /* already set, nothing for us to do */
181 return 1;
182 } else {
183 /* non preemptive and flag not set */
184 new.raw = old.raw;
185 new.np.preempt = 1;
186 /* if we get old back, then we atomically set the flag */
187 return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
188 /* If we raced with a concurrent change, then so be
189 * it. Deliver it by IPI. We don't want an unbounded
190 * retry loop here since tasks might exploit that to
191 * keep the kernel busy indefinitely. */
192 }
193 } else
194 return 0;
195}
196
197#else
198
199static inline int is_kernel_np(struct task_struct* t)
200{
201 return 0;
202}
203
204static inline int is_user_np(struct task_struct* t)
205{
206 return 0;
207}
208
209static inline void request_exit_np(struct task_struct *t)
210{
211 /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
212 BUG();
213}
214
215static inline int request_exit_np_atomic(struct task_struct *t)
216{
217 return 0;
218}
219
220#endif
221
222static inline void clear_exit_np(struct task_struct *t)
223{
224 if (likely(tsk_rt(t)->ctrl_page))
225 tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
226}
227
228static inline int is_np(struct task_struct *t)
229{
230#ifdef CONFIG_SCHED_DEBUG_TRACE
231 int kernel, user;
232 kernel = is_kernel_np(t);
233 user = is_user_np(t);
234 if (kernel || user)
235 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
236
237 kernel, user);
238 return kernel || user;
239#else
240 return unlikely(is_kernel_np(t) || is_user_np(t));
241#endif
242}
243
244static inline int is_present(struct task_struct* t)
245{
246 return t && tsk_rt(t)->present;
247}
248
249static inline int is_completed(struct task_struct* t)
250{
251 return t && tsk_rt(t)->completed;
252}
253
254
255/* make the unit explicit */
256typedef unsigned long quanta_t;
257
258enum round {
259 FLOOR,
260 CEIL
261};
262
263
264/* Tick period is used to convert ns-specified execution
265 * costs and periods into tick-based equivalents.
266 */
267extern ktime_t tick_period;
268
269static inline quanta_t time2quanta(lt_t time, enum round round)
270{
271 s64 quantum_length = ktime_to_ns(tick_period);
272
273 if (do_div(time, quantum_length) && round == CEIL)
274 time++;
275 return (quanta_t) time;
276}
277
278/* By how much is cpu staggered behind CPU 0? */
279u64 cpu_stagger_offset(int cpu);
280
51static inline struct control_page* get_control_page(struct task_struct *t) 281static inline struct control_page* get_control_page(struct task_struct *t)
52{ 282{
53 return tsk_rt(t)->ctrl_page; 283 return tsk_rt(t)->ctrl_page;
@@ -58,4 +288,29 @@ static inline int has_control_page(struct task_struct* t)
58 return tsk_rt(t)->ctrl_page != NULL; 288 return tsk_rt(t)->ctrl_page != NULL;
59} 289}
60 290
291
292#ifdef CONFIG_SCHED_OVERHEAD_TRACE
293
294#define TS_SYSCALL_IN_START \
295 if (has_control_page(current)) { \
296 __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); \
297 }
298
299#define TS_SYSCALL_IN_END \
300 if (has_control_page(current)) { \
301 uint64_t irqs; \
302 local_irq_disable(); \
303 irqs = get_control_page(current)->irq_count - \
304 get_control_page(current)->irq_syscall_start; \
305 __TS_SYSCALL_IN_END(&irqs); \
306 local_irq_enable(); \
307 }
308
309#else
310
311#define TS_SYSCALL_IN_START
312#define TS_SYSCALL_IN_END
313
314#endif
315
61#endif 316#endif
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
new file mode 100644
index 000000000000..6800e725d48c
--- /dev/null
+++ b/include/litmus/litmus_proc.h
@@ -0,0 +1,25 @@
1#include <litmus/sched_plugin.h>
2#include <linux/proc_fs.h>
3
4int __init init_litmus_proc(void);
5void exit_litmus_proc(void);
6
7/*
8 * On success, returns 0 and sets the pointer to the location of the new
9 * proc dir entry, otherwise returns an error code and sets pde to NULL.
10 */
11long make_plugin_proc_dir(struct sched_plugin* plugin,
12 struct proc_dir_entry** pde);
13
14/*
15 * Plugins should deallocate all child proc directory entries before
16 * calling this, to avoid memory leaks.
17 */
18void remove_plugin_proc_dir(struct sched_plugin* plugin);
19
20
21/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
22 * remove a '\n' if present. Returns the number of bytes that were read or
23 * -EFAULT. */
24int copy_and_chomp(char *kbuf, unsigned long ksize,
25 __user const char* ubuf, unsigned long ulength);
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
new file mode 100644
index 000000000000..4d7b870cb443
--- /dev/null
+++ b/include/litmus/locking.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H
3
4struct litmus_lock_ops;
5
6/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores.
8 */
9struct litmus_lock {
10 struct litmus_lock_ops *ops;
11 int type;
12};
13
14struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user);
18 int (*close)(struct litmus_lock*);
19
20 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*);
22 int (*unlock)(struct litmus_lock*);
23
24 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*);
26};
27
28#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
new file mode 100644
index 000000000000..4fd108a45333
--- /dev/null
+++ b/include/litmus/preempt.h
@@ -0,0 +1,164 @@
1#ifndef LITMUS_PREEMPT_H
2#define LITMUS_PREEMPT_H
3
4#include <linux/types.h>
5#include <linux/cache.h>
6#include <linux/percpu.h>
7#include <asm/atomic.h>
8
9#include <litmus/debug_trace.h>
10
11DECLARE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
12
13#ifdef CONFIG_PREEMPT_STATE_TRACE
14const char* sched_state_name(int s);
15#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
16#else
17#define TRACE_STATE(fmt, args...) /* ignore */
18#endif
19
20#define VERIFY_SCHED_STATE(x) \
21 do { int __s = get_sched_state(); \
22 if ((__s & (x)) == 0) \
23 TRACE_STATE("INVALID s=0x%x (%s) not " \
24 "in 0x%x (%s) [%s]\n", \
25 __s, sched_state_name(__s), \
26 (x), #x, __FUNCTION__); \
27 } while (0);
28
29#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
30 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
31 cpu, (x), sched_state_name(x), \
32 (y), sched_state_name(y))
33
34
35typedef enum scheduling_state {
36 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
37 * should be scheduled, and the processor does not
38 * plan to invoke schedule(). */
39 SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the
40 * processor should reschedule, but this has not
41 * been communicated yet (IPI still pending). */
42 WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to
43 * reschedule and will do so shortly. */
44 TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(),
45 * has selected a new task to schedule, but has not
46 * yet performed the actual context switch. */
47 PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context
48 * switch, but a remote processor has already
49 * determined that a higher-priority task became
50 * eligible after the task was picked. */
51} sched_state_t;
52
53static inline sched_state_t get_sched_state_on(int cpu)
54{
55 return atomic_read(&per_cpu(resched_state, cpu));
56}
57
58static inline sched_state_t get_sched_state(void)
59{
60 return atomic_read(&__get_cpu_var(resched_state));
61}
62
63static inline int is_in_sched_state(int possible_states)
64{
65 return get_sched_state() & possible_states;
66}
67
68static inline int cpu_is_in_sched_state(int cpu, int possible_states)
69{
70 return get_sched_state_on(cpu) & possible_states;
71}
72
73static inline void set_sched_state(sched_state_t s)
74{
75 TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
76 atomic_set(&__get_cpu_var(resched_state), s);
77}
78
79static inline int sched_state_transition(sched_state_t from, sched_state_t to)
80{
81 sched_state_t old_state;
82
83 old_state = atomic_cmpxchg(&__get_cpu_var(resched_state), from, to);
84 if (old_state == from) {
85 TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
86 return 1;
87 } else
88 return 0;
89}
90
91static inline int sched_state_transition_on(int cpu,
92 sched_state_t from,
93 sched_state_t to)
94{
95 sched_state_t old_state;
96
97 old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
98 if (old_state == from) {
99 TRACE_SCHED_STATE_CHANGE(from, to, cpu);
100 return 1;
101 } else
102 return 0;
103}
104
105/* Plugins must call this function after they have decided which job to
106 * schedule next. IMPORTANT: this function must be called while still holding
107 * the lock that is used to serialize scheduling decisions.
108 *
109 * (Ideally, we would like to use runqueue locks for this purpose, but that
110 * would lead to deadlocks with the migration code.)
111 */
112static inline void sched_state_task_picked(void)
113{
114 VERIFY_SCHED_STATE(WILL_SCHEDULE);
115
116 /* WILL_SCHEDULE has only a local tansition => simple store is ok */
117 set_sched_state(TASK_PICKED);
118}
119
120static inline void sched_state_entered_schedule(void)
121{
122 /* Update state for the case that we entered schedule() not due to
123 * set_tsk_need_resched() */
124 set_sched_state(WILL_SCHEDULE);
125}
126
127/* Called by schedule() to check if the scheduling decision is still valid
128 * after a context switch. Returns 1 if the CPU needs to reschdule. */
129static inline int sched_state_validate_switch(void)
130{
131 int left_state_ok = 0;
132
133 VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED);
134
135 if (is_in_sched_state(TASK_PICKED)) {
136 /* Might be good; let's try to transition out of this
137 * state. This must be done atomically since remote processors
138 * may try to change the state, too. */
139 left_state_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
140 }
141
142 if (!left_state_ok) {
143 /* We raced with a higher-priority task arrival => not
144 * valid. The CPU needs to reschedule. */
145 set_sched_state(WILL_SCHEDULE);
146 return 1;
147 } else
148 return 0;
149}
150
151/* State transition events. See litmus/preempt.c for details. */
152void sched_state_will_schedule(struct task_struct* tsk);
153void sched_state_ipi(void);
154/* Cause a CPU (remote or local) to reschedule. */
155void litmus_reschedule(int cpu);
156void litmus_reschedule_local(void);
157
158#ifdef CONFIG_DEBUG_KERNEL
159void sched_state_plugin_check(void);
160#else
161#define sched_state_plugin_check() /* no check */
162#endif
163
164#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 000000000000..ac249292e866
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,182 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8#include <litmus/bheap.h>
9
10#define RELEASE_QUEUE_SLOTS 127 /* prime */
11
12struct _rt_domain;
13
14typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
15typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
16
17struct release_queue {
18 /* each slot maintains a list of release heaps sorted
19 * by release time */
20 struct list_head slot[RELEASE_QUEUE_SLOTS];
21};
22
23typedef struct _rt_domain {
24 /* runnable rt tasks are in here */
25 raw_spinlock_t ready_lock;
26 struct bheap ready_queue;
27
28 /* real-time tasks waiting for release are in here */
29 raw_spinlock_t release_lock;
30 struct release_queue release_queue;
31
32#ifdef CONFIG_RELEASE_MASTER
33 int release_master;
34#endif
35
36 /* for moving tasks to the release queue */
37 raw_spinlock_t tobe_lock;
38 struct list_head tobe_released;
39
40 /* how do we check if we need to kick another CPU? */
41 check_resched_needed_t check_resched;
42
43 /* how do we release jobs? */
44 release_jobs_t release_jobs;
45
46 /* how are tasks ordered in the ready queue? */
47 bheap_prio_t order;
48} rt_domain_t;
49
50struct release_heap {
51 /* list_head for per-time-slot list */
52 struct list_head list;
53 lt_t release_time;
54 /* all tasks to be released at release_time */
55 struct bheap heap;
56 /* used to trigger the release */
57 struct hrtimer timer;
58
59#ifdef CONFIG_RELEASE_MASTER
60 /* used to delegate releases */
61 struct hrtimer_start_on_info info;
62#endif
63 /* required for the timer callback */
64 rt_domain_t* dom;
65};
66
67
68static inline struct task_struct* __next_ready(rt_domain_t* rt)
69{
70 struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
71 if (hn)
72 return bheap2task(hn);
73 else
74 return NULL;
75}
76
77void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
78 check_resched_needed_t check,
79 release_jobs_t relase);
80
81void __add_ready(rt_domain_t* rt, struct task_struct *new);
82void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
83void __add_release(rt_domain_t* rt, struct task_struct *task);
84
85static inline struct task_struct* __take_ready(rt_domain_t* rt)
86{
87 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
88 if (hn)
89 return bheap2task(hn);
90 else
91 return NULL;
92}
93
94static inline struct task_struct* __peek_ready(rt_domain_t* rt)
95{
96 struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
97 if (hn)
98 return bheap2task(hn);
99 else
100 return NULL;
101}
102
103static inline int is_queued(struct task_struct *t)
104{
105 BUG_ON(!tsk_rt(t)->heap_node);
106 return bheap_node_in_heap(tsk_rt(t)->heap_node);
107}
108
109static inline void remove(rt_domain_t* rt, struct task_struct *t)
110{
111 bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
112}
113
114static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
115{
116 unsigned long flags;
117 /* first we need the write lock for rt_ready_queue */
118 raw_spin_lock_irqsave(&rt->ready_lock, flags);
119 __add_ready(rt, new);
120 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
121}
122
123static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
124{
125 unsigned long flags;
126 raw_spin_lock_irqsave(&rt->ready_lock, flags);
127 __merge_ready(rt, tasks);
128 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
129}
130
131static inline struct task_struct* take_ready(rt_domain_t* rt)
132{
133 unsigned long flags;
134 struct task_struct* ret;
135 /* first we need the write lock for rt_ready_queue */
136 raw_spin_lock_irqsave(&rt->ready_lock, flags);
137 ret = __take_ready(rt);
138 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
139 return ret;
140}
141
142
143static inline void add_release(rt_domain_t* rt, struct task_struct *task)
144{
145 unsigned long flags;
146 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
147 __add_release(rt, task);
148 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
149}
150
151#ifdef CONFIG_RELEASE_MASTER
152void __add_release_on(rt_domain_t* rt, struct task_struct *task,
153 int target_cpu);
154
155static inline void add_release_on(rt_domain_t* rt,
156 struct task_struct *task,
157 int target_cpu)
158{
159 unsigned long flags;
160 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
161 __add_release_on(rt, task, target_cpu);
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164#endif
165
166static inline int __jobs_pending(rt_domain_t* rt)
167{
168 return !bheap_empty(&rt->ready_queue);
169}
170
171static inline int jobs_pending(rt_domain_t* rt)
172{
173 unsigned long flags;
174 int ret;
175 /* first we need the write lock for rt_ready_queue */
176 raw_spin_lock_irqsave(&rt->ready_lock, flags);
177 ret = !bheap_empty(&rt->ready_queue);
178 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
179 return ret;
180}
181
182#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index a1fed7653377..138799fbaad7 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -84,12 +84,12 @@ struct rt_task {
84}; 84};
85 85
86union np_flag { 86union np_flag {
87 uint64_t raw; 87 uint32_t raw;
88 struct { 88 struct {
89 /* Is the task currently in a non-preemptive section? */ 89 /* Is the task currently in a non-preemptive section? */
90 uint64_t flag:31; 90 uint32_t flag:31;
91 /* Should the task call into the scheduler? */ 91 /* Should the task call into the scheduler? */
92 uint64_t preempt:1; 92 uint32_t preempt:1;
93 } np; 93 } np;
94}; 94};
95 95
@@ -110,10 +110,10 @@ union np_flag {
110struct control_page { 110struct control_page {
111 /* This flag is used by userspace to communicate non-preempive 111 /* This flag is used by userspace to communicate non-preempive
112 * sections. */ 112 * sections. */
113 volatile union np_flag sched; 113 volatile __attribute__ ((aligned (8))) union np_flag sched;
114 114
115 volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is 115 /* Incremented by the kernel each time an IRQ is handled. */
116 * handled. */ 116 volatile __attribute__ ((aligned (8))) uint64_t irq_count;
117 117
118 /* Locking overhead tracing: userspace records here the time stamp 118 /* Locking overhead tracing: userspace records here the time stamp
119 * and IRQ counter prior to starting the system call. */ 119 * and IRQ counter prior to starting the system call. */
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 000000000000..0f2fe90123db
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,114 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10#ifdef CONFIG_LITMUS_LOCKING
11#include <litmus/locking.h>
12#endif
13
14/************************ setup/tear down ********************/
15
16typedef long (*activate_plugin_t) (void);
17typedef long (*deactivate_plugin_t) (void);
18
19
20
21/********************* scheduler invocation ******************/
22
23/* Plugin-specific realtime tick handler */
24typedef void (*scheduler_tick_t) (struct task_struct *cur);
25/* Novell make sched decision function */
26typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
27/* Clean up after the task switch has occured.
28 * This function is called after every (even non-rt) task switch.
29 */
30typedef void (*finish_switch_t)(struct task_struct *prev);
31
32
33/********************* task state changes ********************/
34
35/* Called to setup a new real-time task.
36 * Release the first job, enqueue, etc.
37 * Task may already be running.
38 */
39typedef void (*task_new_t) (struct task_struct *task,
40 int on_rq,
41 int running);
42
43/* Called to re-introduce a task after blocking.
44 * Can potentially be called multiple times.
45 */
46typedef void (*task_wake_up_t) (struct task_struct *task);
47/* called to notify the plugin of a blocking real-time task
48 * it will only be called for real-time tasks and before schedule is called */
49typedef void (*task_block_t) (struct task_struct *task);
50/* Called when a real-time task exits or changes to a different scheduling
51 * class.
52 * Free any allocated resources
53 */
54typedef void (*task_exit_t) (struct task_struct *);
55
56#ifdef CONFIG_LITMUS_LOCKING
57/* Called when the current task attempts to create a new lock of a given
58 * protocol type. */
59typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
60 void* __user config);
61#endif
62
63
64/********************* sys call backends ********************/
65/* This function causes the caller to sleep until the next release */
66typedef long (*complete_job_t) (void);
67
68typedef long (*admit_task_t)(struct task_struct* tsk);
69
70typedef void (*release_at_t)(struct task_struct *t, lt_t start);
71
72struct sched_plugin {
73 struct list_head list;
74 /* basic info */
75 char *plugin_name;
76
77 /* setup */
78 activate_plugin_t activate_plugin;
79 deactivate_plugin_t deactivate_plugin;
80
81 /* scheduler invocation */
82 scheduler_tick_t tick;
83 schedule_t schedule;
84 finish_switch_t finish_switch;
85
86 /* syscall backend */
87 complete_job_t complete_job;
88 release_at_t release_at;
89
90 /* task state changes */
91 admit_task_t admit_task;
92
93 task_new_t task_new;
94 task_wake_up_t task_wake_up;
95 task_block_t task_block;
96 task_exit_t task_exit;
97
98#ifdef CONFIG_LITMUS_LOCKING
99 /* locking protocols */
100 allocate_lock_t allocate_lock;
101#endif
102} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
103
104
105extern struct sched_plugin *litmus;
106
107int register_sched_plugin(struct sched_plugin* plugin);
108struct sched_plugin* find_sched_plugin(const char* name);
109void print_sched_plugins(struct seq_file *m);
110
111
112extern struct sched_plugin linux_sched_plugin;
113
114#endif
diff --git a/include/litmus/srp.h b/include/litmus/srp.h
new file mode 100644
index 000000000000..c9a4552b2bf3
--- /dev/null
+++ b/include/litmus/srp.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_SRP_H
2#define LITMUS_SRP_H
3
4struct srp_semaphore;
5
6struct srp_priority {
7 struct list_head list;
8 unsigned int priority;
9 pid_t pid;
10};
11#define list2prio(l) list_entry(l, struct srp_priority, list)
12
13/* struct for uniprocessor SRP "semaphore" */
14struct srp_semaphore {
15 struct litmus_lock litmus_lock;
16 struct srp_priority ceiling;
17 struct task_struct* owner;
18 int cpu; /* cpu associated with this "semaphore" and resource */
19};
20
21/* map a task to its SRP preemption level priority */
22typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
23/* Must be updated by each plugin that uses SRP.*/
24extern srp_prioritization_t get_srp_prio;
25
26struct srp_semaphore* allocate_srp_semaphore(void);
27
28#endif
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
new file mode 100644
index 000000000000..94264c27d9ac
--- /dev/null
+++ b/include/litmus/unistd_32.h
@@ -0,0 +1,21 @@
1/*
2 * included from arch/x86/include/asm/unistd_32.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9#define __NR_get_rt_task_param __LSC(1)
10#define __NR_complete_job __LSC(2)
11#define __NR_od_open __LSC(3)
12#define __NR_od_close __LSC(4)
13#define __NR_litmus_lock __LSC(5)
14#define __NR_litmus_unlock __LSC(6)
15#define __NR_query_job_no __LSC(7)
16#define __NR_wait_for_job_release __LSC(8)
17#define __NR_wait_for_ts_release __LSC(9)
18#define __NR_release_ts __LSC(10)
19#define __NR_null_call __LSC(11)
20
21#define NR_litmus_syscalls 12
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
new file mode 100644
index 000000000000..d5ced0d2642c
--- /dev/null
+++ b/include/litmus/unistd_64.h
@@ -0,0 +1,33 @@
1/*
2 * included from arch/x86/include/asm/unistd_64.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
10#define __NR_get_rt_task_param __LSC(1)
11__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
12#define __NR_complete_job __LSC(2)
13__SYSCALL(__NR_complete_job, sys_complete_job)
14#define __NR_od_open __LSC(3)
15__SYSCALL(__NR_od_open, sys_od_open)
16#define __NR_od_close __LSC(4)
17__SYSCALL(__NR_od_close, sys_od_close)
18#define __NR_litmus_lock __LSC(5)
19__SYSCALL(__NR_litmus_lock, sys_litmus_lock)
20#define __NR_litmus_unlock __LSC(6)
21__SYSCALL(__NR_litmus_unlock, sys_litmus_unlock)
22#define __NR_query_job_no __LSC(7)
23__SYSCALL(__NR_query_job_no, sys_query_job_no)
24#define __NR_wait_for_job_release __LSC(8)
25__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
26#define __NR_wait_for_ts_release __LSC(9)
27__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
28#define __NR_release_ts __LSC(10)
29__SYSCALL(__NR_release_ts, sys_release_ts)
30#define __NR_null_call __LSC(11)
31__SYSCALL(__NR_null_call, sys_null_call)
32
33#define NR_litmus_syscalls 12
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 000000000000..ce1347c355f8
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
new file mode 100644
index 000000000000..59428036e2c8
--- /dev/null
+++ b/kernel/sched/litmus.c
@@ -0,0 +1,350 @@
1/* This file is included from kernel/sched.c */
2
3#include "sched.h"
4
5#include <litmus/trace.h>
6#include <litmus/sched_trace.h>
7
8#include <litmus/litmus.h>
9#include <litmus/budget.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/preempt.h>
12
13static void update_time_litmus(struct rq *rq, struct task_struct *p)
14{
15 u64 delta = rq->clock - p->se.exec_start;
16 if (unlikely((s64)delta < 0))
17 delta = 0;
18 /* per job counter */
19 p->rt_param.job_params.exec_time += delta;
20 /* task counter */
21 p->se.sum_exec_runtime += delta;
22 /* sched_clock() */
23 p->se.exec_start = rq->clock;
24 cpuacct_charge(p, delta);
25}
26
27static void double_rq_lock(struct rq *rq1, struct rq *rq2);
28static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
29
30/*
31 * litmus_tick gets called by scheduler_tick() with HZ freq
32 * Interrupts are disabled
33 */
34void litmus_tick(struct rq *rq, struct task_struct *p)
35{
36 TS_PLUGIN_TICK_START;
37
38 if (is_realtime(p))
39 update_time_litmus(rq, p);
40
41 /* plugin tick */
42 litmus->tick(p);
43
44 TS_PLUGIN_TICK_END;
45
46 return;
47}
48
49static struct task_struct *
50litmus_schedule(struct rq *rq, struct task_struct *prev)
51{
52 struct task_struct *next;
53
54#ifdef CONFIG_SMP
55 struct rq* other_rq;
56 long was_running;
57 lt_t _maybe_deadlock = 0;
58#endif
59
60 /* let the plugin schedule */
61 next = litmus->schedule(prev);
62
63 sched_state_plugin_check();
64
65#ifdef CONFIG_SMP
66 /* check if a global plugin pulled a task from a different RQ */
67 if (next && task_rq(next) != rq) {
68 /* we need to migrate the task */
69 other_rq = task_rq(next);
70 TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
71
72 /* while we drop the lock, the prev task could change its
73 * state
74 */
75 was_running = is_running(prev);
76 mb();
77 raw_spin_unlock(&rq->lock);
78
79 /* Don't race with a concurrent switch. This could deadlock in
80 * the case of cross or circular migrations. It's the job of
81 * the plugin to make sure that doesn't happen.
82 */
83 TRACE_TASK(next, "stack_in_use=%d\n",
84 next->rt_param.stack_in_use);
85 if (next->rt_param.stack_in_use != NO_CPU) {
86 TRACE_TASK(next, "waiting to deschedule\n");
87 _maybe_deadlock = litmus_clock();
88 }
89 while (next->rt_param.stack_in_use != NO_CPU) {
90 cpu_relax();
91 mb();
92 if (next->rt_param.stack_in_use == NO_CPU)
93 TRACE_TASK(next,"descheduled. Proceeding.\n");
94
95 if (lt_before(_maybe_deadlock + 1000000000L,
96 litmus_clock())) {
97 /* We've been spinning for 1s.
98 * Something can't be right!
99 * Let's abandon the task and bail out; at least
100 * we will have debug info instead of a hard
101 * deadlock.
102 */
103#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
104 BUG();
105#else
106 TRACE_TASK(next,"stack too long in use. "
107 "Deadlock?\n");
108 next = NULL;
109
110 /* bail out */
111 raw_spin_lock(&rq->lock);
112 return next;
113#endif
114 }
115 }
116#ifdef __ARCH_WANT_UNLOCKED_CTXSW
117 if (next->on_cpu)
118 TRACE_TASK(next, "waiting for !oncpu");
119 while (next->on_cpu) {
120 cpu_relax();
121 mb();
122 }
123#endif
124 double_rq_lock(rq, other_rq);
125 mb();
126 if (is_realtime(prev) && is_running(prev) != was_running) {
127 TRACE_TASK(prev,
128 "state changed while we dropped"
129 " the lock: is_running=%d, was_running=%d\n",
130 is_running(prev), was_running);
131 if (is_running(prev) && !was_running) {
132 /* prev task became unblocked
133 * we need to simulate normal sequence of events
134 * to scheduler plugins.
135 */
136 litmus->task_block(prev);
137 litmus->task_wake_up(prev);
138 }
139 }
140
141 set_task_cpu(next, smp_processor_id());
142
143 /* DEBUG: now that we have the lock we need to make sure a
144 * couple of things still hold:
145 * - it is still a real-time task
146 * - it is still runnable (could have been stopped)
147 * If either is violated, then the active plugin is
148 * doing something wrong.
149 */
150 if (!is_realtime(next) || !is_running(next)) {
151 /* BAD BAD BAD */
152 TRACE_TASK(next,"BAD: migration invariant FAILED: "
153 "rt=%d running=%d\n",
154 is_realtime(next),
155 is_running(next));
156 /* drop the task */
157 next = NULL;
158 }
159 /* release the other CPU's runqueue, but keep ours */
160 raw_spin_unlock(&other_rq->lock);
161 }
162#endif
163
164 if (next) {
165#ifdef CONFIG_SMP
166 next->rt_param.stack_in_use = rq->cpu;
167#else
168 next->rt_param.stack_in_use = 0;
169#endif
170 next->se.exec_start = rq->clock;
171 }
172
173 update_enforcement_timer(next);
174 return next;
175}
176
177static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
178 int flags)
179{
180 if (flags & ENQUEUE_WAKEUP) {
181 sched_trace_task_resume(p);
182 tsk_rt(p)->present = 1;
183 /* LITMUS^RT plugins need to update the state
184 * _before_ making it available in global structures.
185 * Linux gets away with being lazy about the task state
186 * update. We can't do that, hence we update the task
187 * state already here.
188 *
189 * WARNING: this needs to be re-evaluated when porting
190 * to newer kernel versions.
191 */
192 p->state = TASK_RUNNING;
193 litmus->task_wake_up(p);
194
195 rq->litmus.nr_running++;
196 } else
197 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
198}
199
200static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
201 int flags)
202{
203 if (flags & DEQUEUE_SLEEP) {
204 litmus->task_block(p);
205 tsk_rt(p)->present = 0;
206 sched_trace_task_block(p);
207
208 rq->litmus.nr_running--;
209 } else
210 TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
211}
212
213static void yield_task_litmus(struct rq *rq)
214{
215 TS_SYSCALL_IN_START;
216 TS_SYSCALL_IN_END;
217
218 BUG_ON(rq->curr != current);
219 /* sched_yield() is called to trigger delayed preemptions.
220 * Thus, mark the current task as needing to be rescheduled.
221 * This will cause the scheduler plugin to be invoked, which can
222 * then determine if a preemption is still required.
223 */
224 clear_exit_np(current);
225 litmus_reschedule_local();
226
227 TS_SYSCALL_OUT_START;
228}
229
230/* Plugins are responsible for this.
231 */
232static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
233{
234}
235
236static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
237{
238}
239
240#ifdef CONFIG_SMP
241static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev)
242{
243 update_time_litmus(rq, prev);
244 if (!is_running(prev))
245 tsk_rt(prev)->present = 0;
246}
247#endif
248
249/* pick_next_task_litmus() - litmus_schedule() function
250 *
251 * return the next task to be scheduled
252 */
253static struct task_struct *pick_next_task_litmus(struct rq *rq)
254{
255 /* get the to-be-switched-out task (prev) */
256 struct task_struct *prev = rq->litmus.prev;
257 struct task_struct *next;
258
259 /* if not called from schedule() but from somewhere
260 * else (e.g., migration), return now!
261 */
262 if(!rq->litmus.prev)
263 return NULL;
264
265 rq->litmus.prev = NULL;
266
267 TS_PLUGIN_SCHED_START;
268 next = litmus_schedule(rq, prev);
269 TS_PLUGIN_SCHED_END;
270
271 return next;
272}
273
274static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
275{
276 /* nothing to do; tick related tasks are done by litmus_tick() */
277 return;
278}
279
280static void switched_to_litmus(struct rq *rq, struct task_struct *p)
281{
282}
283
284static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
285 int oldprio)
286{
287}
288
289unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
290{
291 /* return infinity */
292 return 0;
293}
294
295/* This is called when a task became a real-time task, either due to a SCHED_*
296 * class transition or due to PI mutex inheritance. We don't handle Linux PI
297 * mutex inheritance yet (and probably never will). Use LITMUS provided
298 * synchronization primitives instead.
299 */
300static void set_curr_task_litmus(struct rq *rq)
301{
302 rq->curr->se.exec_start = rq->clock;
303}
304
305
306#ifdef CONFIG_SMP
307/* execve tries to rebalance task in this scheduling domain.
308 * We don't care about the scheduling domain; can gets called from
309 * exec, fork, wakeup.
310 */
311static int
312select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
313{
314 /* preemption is already disabled.
315 * We don't want to change cpu here
316 */
317 return task_cpu(p);
318}
319#endif
320
321const struct sched_class litmus_sched_class = {
322 /* From 34f971f6 the stop/migrate worker threads have a class on
323 * their own, which is the highest prio class. We don't support
324 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
325 * CPU capacity.
326 */
327 .next = &rt_sched_class,
328 .enqueue_task = enqueue_task_litmus,
329 .dequeue_task = dequeue_task_litmus,
330 .yield_task = yield_task_litmus,
331
332 .check_preempt_curr = check_preempt_curr_litmus,
333
334 .pick_next_task = pick_next_task_litmus,
335 .put_prev_task = put_prev_task_litmus,
336
337#ifdef CONFIG_SMP
338 .select_task_rq = select_task_rq_litmus,
339
340 .pre_schedule = pre_schedule_litmus,
341#endif
342
343 .set_curr_task = set_curr_task_litmus,
344 .task_tick = task_tick_litmus,
345
346 .get_rr_interval = get_rr_interval_litmus,
347
348 .prio_changed = prio_changed_litmus,
349 .switched_to = switched_to_litmus,
350};
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 5408ef6b159b..32c18c6eb58d 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -1,5 +1,156 @@
1menu "LITMUS^RT" 1menu "LITMUS^RT"
2 2
3menu "Scheduling"
4
5config RELEASE_MASTER
6 bool "Release-master Support"
7 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
8 default n
9 help
10 Allow one processor to act as a dedicated interrupt processor
11 that services all timer interrupts, but that does not schedule
12 real-time tasks. See RTSS'09 paper for details
13 (http://www.cs.unc.edu/~anderson/papers.html).
14
15
16config BUG_ON_MIGRATION_DEADLOCK
17 bool "Panic on suspected migration deadlock"
18 default y
19 help
20 This is a debugging option. The LITMUS^RT migration support code for
21 global scheduling contains a simple heuristic to detect when the
22 system deadlocks due to circular stack dependencies.
23
24 For example, such a deadlock exists if CPU 0 waits for task A's stack
25 to become available while using task B's stack, and CPU 1 waits for
26 task B's stack to become available while using task A's stack. Such
27 a situation can arise in (buggy) global scheduling plugins.
28
29 With this option enabled, such a scenario with result in a BUG().
30 You can turn off this option when debugging on real hardware (e.g.,
31 to rescue traces, etc. that would be hard to get after a panic).
32
33 Only turn this off if you really know what you are doing. If this
34 BUG() triggers, the scheduler is broken and turning off this option
35 won't fix it.
36
37
38endmenu
39
40menu "Real-Time Synchronization"
41
42config NP_SECTION
43 bool "Non-preemptive section support"
44 default y
45 help
46 Allow tasks to become non-preemptable.
47 Note that plugins still need to explicitly support non-preemptivity.
48 Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
49
50 This is required to support locking protocols such as the FMLP.
51 If disabled, all tasks will be considered preemptable at all times.
52
53config LITMUS_LOCKING
54 bool "Support for real-time locking protocols"
55 depends on NP_SECTION
56 default y
57 help
58 Enable LITMUS^RT's multiprocessor real-time locking protocols with
59 predicable maximum blocking times.
60
61 Say Yes if you want to include locking protocols such as the FMLP and
62 Baker's SRP.
63
64endmenu
65
66menu "Performance Enhancements"
67
68config SCHED_CPU_AFFINITY
69 bool "Local Migration Affinity"
70 depends on X86 && SYSFS
71 default y
72 help
73 Rescheduled tasks prefer CPUs near to their previously used CPU.
74 This may improve cache performance through possible preservation of
75 cache affinity, at the expense of (slightly) more involved scheduling
76 logic.
77
78 Warning: May make bugs harder to find since tasks may migrate less often.
79
80 NOTES:
81 * Feature is not utilized by PFair/PD^2.
82
83 Say Yes if unsure.
84
85config ALLOW_EARLY_RELEASE
86 bool "Allow Early Releasing"
87 default y
88 help
89 Allow tasks to release jobs early (while still maintaining job
90 precedence constraints). Only supported by EDF schedulers. Early
91 releasing must be explicitly requested by real-time tasks via
92 the task_params passed to sys_set_task_rt_param().
93
94 Early releasing can improve job response times while maintaining
95 real-time correctness. However, it can easily peg your CPUs
96 since tasks never suspend to wait for their next job. As such, early
97 releasing is really only useful in the context of implementing
98 bandwidth servers, interrupt handling threads, or short-lived
99 computations.
100
101 Beware that early releasing may affect real-time analysis
102 if using locking protocols or I/O.
103
104 Say Yes if unsure.
105
106choice
107 prompt "EDF Tie-Break Behavior"
108 default EDF_TIE_BREAK_LATENESS_NORM
109 help
110 Allows the configuration of tie-breaking behavior when the deadlines
111 of two EDF-scheduled tasks are equal.
112
113 config EDF_TIE_BREAK_LATENESS
114 bool "Lateness-based Tie Break"
115 help
116 Break ties between two jobs, A and B, based upon the lateness of their
117 prior jobs. The job with the greatest lateness has priority. Note that
118 lateness has a negative value if the prior job finished before its
119 deadline.
120
121 config EDF_TIE_BREAK_LATENESS_NORM
122 bool "Normalized Lateness-based Tie Break"
123 help
124 Break ties between two jobs, A and B, based upon the lateness, normalized
125 by relative deadline, of their prior jobs. The job with the greatest
126 normalized lateness has priority. Note that lateness has a negative value
127 if the prior job finished before its deadline.
128
129 Normalized lateness tie-breaks are likely desireable over non-normalized
130 tie-breaks if the execution times and/or relative deadlines of tasks in a
131 task set vary greatly.
132
133 config EDF_TIE_BREAK_HASH
134 bool "Hash-based Tie Breaks"
135 help
136 Break ties between two jobs, A and B, with equal deadlines by using a
137 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
138 A has ~50% of winning a given tie-break.
139
140 config EDF_PID_TIE_BREAK
141 bool "PID-based Tie Breaks"
142 help
143 Break ties based upon OS-assigned thread IDs. Use this option if
144 required by algorithm's real-time analysis or per-task response-time
145 jitter must be minimized.
146
147 NOTES:
148 * This tie-breaking method was default in Litmus 2012.2 and before.
149
150endchoice
151
152endmenu
153
3menu "Tracing" 154menu "Tracing"
4 155
5config FEATHER_TRACE 156config FEATHER_TRACE
@@ -154,6 +305,20 @@ config SCHED_DEBUG_TRACE_CALLER
154 305
155 If unsure, say No. 306 If unsure, say No.
156 307
308config PREEMPT_STATE_TRACE
309 bool "Trace preemption state machine transitions"
310 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
311 default n
312 help
313 With this option enabled, each CPU will log when it transitions
314 states in the preemption state machine. This state machine is
315 used to determine how to react to IPIs (avoid races with in-flight IPIs).
316
317 Warning: this creates a lot of information in the debug trace. Only
318 recommended when you are debugging preemption-related races.
319
320 If unsure, say No.
321
157endmenu 322endmenu
158 323
159endmenu 324endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 6318f1c6fac8..fcb4d7533327 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -2,6 +2,25 @@
2# Makefile for LITMUS^RT 2# Makefile for LITMUS^RT
3# 3#
4 4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 binheap.o \
20 ctrldev.o
21
22obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
23
5obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 24obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
6obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 25obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
7obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 26obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
new file mode 100644
index 000000000000..a5b437b80433
--- /dev/null
+++ b/litmus/affinity.c
@@ -0,0 +1,41 @@
1#include <linux/cpu.h>
2
3#include <litmus/affinity.h>
4
5struct neighborhood neigh_info[NR_CPUS];
6
7/* called by _init_litmus() */
8void init_topology(void) {
9 int cpu;
10 int i;
11 int chk;
12 int depth = num_cache_leaves;
13
14 if (depth > NUM_CACHE_LEVELS)
15 depth = NUM_CACHE_LEVELS;
16
17 for_each_online_cpu(cpu) {
18 for (i = 0; i < depth; ++i) {
19 chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
20 if (chk) {
21 /* failed */
22 neigh_info[cpu].size[i] = 0;
23 } else {
24 /* size = num bits in mask */
25 neigh_info[cpu].size[i] =
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 }
32
33 /* set data for non-existent levels */
34 for (; i < NUM_CACHE_LEVELS; ++i) {
35 neigh_info[cpu].size[i] = 0;
36
37 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
38 cpu, neigh_info[cpu].size[i], i, 0lu);
39 }
40 }
41}
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..2707e0122b6d
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,316 @@
1#include <linux/bug.h>
2#include <linux/kernel.h>
3#include <litmus/bheap.h>
4
5void bheap_init(struct bheap* heap)
6{
7 heap->head = NULL;
8 heap->min = NULL;
9}
10
11void bheap_node_init(struct bheap_node** _h, void* value)
12{
13 struct bheap_node* h = *_h;
14 h->parent = NULL;
15 h->next = NULL;
16 h->child = NULL;
17 h->degree = NOT_IN_HEAP;
18 h->value = value;
19 h->ref = _h;
20}
21
22
23/* make child a subtree of root */
24static void __bheap_link(struct bheap_node* root,
25 struct bheap_node* child)
26{
27 child->parent = root;
28 child->next = root->child;
29 root->child = child;
30 root->degree++;
31}
32
33/* merge root lists */
34static struct bheap_node* __bheap_merge(struct bheap_node* a,
35 struct bheap_node* b)
36{
37 struct bheap_node* head = NULL;
38 struct bheap_node** pos = &head;
39
40 while (a && b) {
41 if (a->degree < b->degree) {
42 *pos = a;
43 a = a->next;
44 } else {
45 *pos = b;
46 b = b->next;
47 }
48 pos = &(*pos)->next;
49 }
50 if (a)
51 *pos = a;
52 else
53 *pos = b;
54 return head;
55}
56
57/* reverse a linked list of nodes. also clears parent pointer */
58static struct bheap_node* __bheap_reverse(struct bheap_node* h)
59{
60 struct bheap_node* tail = NULL;
61 struct bheap_node* next;
62
63 if (!h)
64 return h;
65
66 h->parent = NULL;
67 while (h->next) {
68 next = h->next;
69 h->next = tail;
70 tail = h;
71 h = next;
72 h->parent = NULL;
73 }
74 h->next = tail;
75 return h;
76}
77
78static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
79 struct bheap_node** prev, struct bheap_node** node)
80{
81 struct bheap_node *_prev, *cur;
82 *prev = NULL;
83
84 if (!heap->head) {
85 *node = NULL;
86 return;
87 }
88
89 *node = heap->head;
90 _prev = heap->head;
91 cur = heap->head->next;
92 while (cur) {
93 if (higher_prio(cur, *node)) {
94 *node = cur;
95 *prev = _prev;
96 }
97 _prev = cur;
98 cur = cur->next;
99 }
100}
101
102static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
103 struct bheap_node* h2)
104{
105 struct bheap_node* h1;
106 struct bheap_node *prev, *x, *next;
107 if (!h2)
108 return;
109 h1 = heap->head;
110 if (!h1) {
111 heap->head = h2;
112 return;
113 }
114 h1 = __bheap_merge(h1, h2);
115 prev = NULL;
116 x = h1;
117 next = x->next;
118 while (next) {
119 if (x->degree != next->degree ||
120 (next->next && next->next->degree == x->degree)) {
121 /* nothing to do, advance */
122 prev = x;
123 x = next;
124 } else if (higher_prio(x, next)) {
125 /* x becomes the root of next */
126 x->next = next->next;
127 __bheap_link(x, next);
128 } else {
129 /* next becomes the root of x */
130 if (prev)
131 prev->next = next;
132 else
133 h1 = next;
134 __bheap_link(next, x);
135 x = next;
136 }
137 next = x->next;
138 }
139 heap->head = h1;
140}
141
142static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
143 struct bheap* heap)
144{
145 struct bheap_node *prev, *node;
146 __bheap_min(higher_prio, heap, &prev, &node);
147 if (!node)
148 return NULL;
149 if (prev)
150 prev->next = node->next;
151 else
152 heap->head = node->next;
153 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
154 return node;
155}
156
157/* insert (and reinitialize) a node into the heap */
158void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
159 struct bheap_node* node)
160{
161 struct bheap_node *min;
162 node->child = NULL;
163 node->parent = NULL;
164 node->next = NULL;
165 node->degree = 0;
166 if (heap->min && higher_prio(node, heap->min)) {
167 /* swap min cache */
168 min = heap->min;
169 min->child = NULL;
170 min->parent = NULL;
171 min->next = NULL;
172 min->degree = 0;
173 __bheap_union(higher_prio, heap, min);
174 heap->min = node;
175 } else
176 __bheap_union(higher_prio, heap, node);
177}
178
179void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
180{
181 struct bheap_node* min;
182 if (heap->min) {
183 min = heap->min;
184 heap->min = NULL;
185 bheap_insert(higher_prio, heap, min);
186 }
187}
188
189/* merge addition into target */
190void bheap_union(bheap_prio_t higher_prio,
191 struct bheap* target, struct bheap* addition)
192{
193 /* first insert any cached minima, if necessary */
194 bheap_uncache_min(higher_prio, target);
195 bheap_uncache_min(higher_prio, addition);
196 __bheap_union(higher_prio, target, addition->head);
197 /* this is a destructive merge */
198 addition->head = NULL;
199}
200
201struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
202 struct bheap* heap)
203{
204 if (!heap->min)
205 heap->min = __bheap_extract_min(higher_prio, heap);
206 return heap->min;
207}
208
209struct bheap_node* bheap_take(bheap_prio_t higher_prio,
210 struct bheap* heap)
211{
212 struct bheap_node *node;
213 if (!heap->min)
214 heap->min = __bheap_extract_min(higher_prio, heap);
215 node = heap->min;
216 heap->min = NULL;
217 if (node)
218 node->degree = NOT_IN_HEAP;
219 return node;
220}
221
222int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
223{
224 struct bheap_node *parent;
225 struct bheap_node** tmp_ref;
226 void* tmp;
227
228 /* bubble up */
229 parent = node->parent;
230 while (parent && higher_prio(node, parent)) {
231 /* swap parent and node */
232 tmp = parent->value;
233 parent->value = node->value;
234 node->value = tmp;
235 /* swap references */
236 *(parent->ref) = node;
237 *(node->ref) = parent;
238 tmp_ref = parent->ref;
239 parent->ref = node->ref;
240 node->ref = tmp_ref;
241 /* step up */
242 node = parent;
243 parent = node->parent;
244 }
245
246 return parent != NULL;
247}
248
249void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
250 struct bheap_node* node)
251{
252 struct bheap_node *parent, *prev, *pos;
253 struct bheap_node** tmp_ref;
254 void* tmp;
255
256 if (heap->min != node) {
257 /* bubble up */
258 parent = node->parent;
259 while (parent) {
260 /* swap parent and node */
261 tmp = parent->value;
262 parent->value = node->value;
263 node->value = tmp;
264 /* swap references */
265 *(parent->ref) = node;
266 *(node->ref) = parent;
267 tmp_ref = parent->ref;
268 parent->ref = node->ref;
269 node->ref = tmp_ref;
270 /* step up */
271 node = parent;
272 parent = node->parent;
273 }
274 /* now delete:
275 * first find prev */
276 prev = NULL;
277 pos = heap->head;
278 while (pos != node) {
279 BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
280 prev = pos;
281 pos = pos->next;
282 }
283 /* we have prev, now remove node */
284 if (prev)
285 prev->next = node->next;
286 else
287 heap->head = node->next;
288 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
289 } else
290 heap->min = NULL;
291 node->degree = NOT_IN_HEAP;
292}
293
294/* allocate a heap node for value and insert into the heap */
295int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
296 void* value, int gfp_flags)
297{
298 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
299 if (likely(hn)) {
300 bheap_node_init(&hn, value);
301 bheap_insert(higher_prio, heap, hn);
302 }
303 return hn != NULL;
304}
305
306void* bheap_take_del(bheap_prio_t higher_prio,
307 struct bheap* heap)
308{
309 struct bheap_node* hn = bheap_take(higher_prio, heap);
310 void* ret = NULL;
311 if (hn) {
312 ret = hn->value;
313 bheap_node_free(hn);
314 }
315 return ret;
316}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..d3ab34b92096
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,387 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..f7712be29adb
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,113 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4
5#include <litmus/litmus.h>
6#include <litmus/preempt.h>
7
8#include <litmus/budget.h>
9
10struct enforcement_timer {
11 /* The enforcement timer is used to accurately police
12 * slice budgets. */
13 struct hrtimer timer;
14 int armed;
15};
16
17DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
18
19static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
20{
21 struct enforcement_timer* et = container_of(timer,
22 struct enforcement_timer,
23 timer);
24 unsigned long flags;
25
26 local_irq_save(flags);
27 TRACE("enforcement timer fired.\n");
28 et->armed = 0;
29 /* activate scheduler */
30 litmus_reschedule_local();
31 local_irq_restore(flags);
32
33 return HRTIMER_NORESTART;
34}
35
36/* assumes called with IRQs off */
37static void cancel_enforcement_timer(struct enforcement_timer* et)
38{
39 int ret;
40
41 TRACE("cancelling enforcement timer.\n");
42
43 /* Since interrupts are disabled and et->armed is only
44 * modified locally, we do not need any locks.
45 */
46
47 if (et->armed) {
48 ret = hrtimer_try_to_cancel(&et->timer);
49 /* Should never be inactive. */
50 BUG_ON(ret == 0);
51 /* Should never be running concurrently. */
52 BUG_ON(ret == -1);
53
54 et->armed = 0;
55 }
56}
57
58/* assumes called with IRQs off */
59static void arm_enforcement_timer(struct enforcement_timer* et,
60 struct task_struct* t)
61{
62 lt_t when_to_fire;
63 TRACE_TASK(t, "arming enforcement timer.\n");
64
65 /* Calling this when there is no budget left for the task
66 * makes no sense, unless the task is non-preemptive. */
67 BUG_ON(budget_exhausted(t) && (!is_np(t)));
68
69 /* __hrtimer_start_range_ns() cancels the timer
70 * anyway, so we don't have to check whether it is still armed */
71
72 if (likely(!is_np(t))) {
73 when_to_fire = litmus_clock() + budget_remaining(t);
74 __hrtimer_start_range_ns(&et->timer,
75 ns_to_ktime(when_to_fire),
76 0 /* delta */,
77 HRTIMER_MODE_ABS_PINNED,
78 0 /* no wakeup */);
79 et->armed = 1;
80 }
81}
82
83
84/* expects to be called with IRQs off */
85void update_enforcement_timer(struct task_struct* t)
86{
87 struct enforcement_timer* et = &__get_cpu_var(budget_timer);
88
89 if (t && budget_precisely_enforced(t)) {
90 /* Make sure we call into the scheduler when this budget
91 * expires. */
92 arm_enforcement_timer(et, t);
93 } else if (et->armed) {
94 /* Make sure we don't cause unnecessary interrupts. */
95 cancel_enforcement_timer(et);
96 }
97}
98
99
100static int __init init_budget_enforcement(void)
101{
102 int cpu;
103 struct enforcement_timer* et;
104
105 for (cpu = 0; cpu < NR_CPUS; cpu++) {
106 et = &per_cpu(budget_timer, cpu);
107 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
108 et->timer.function = on_enforcement_timeout;
109 }
110 return 0;
111}
112
113module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..979fac6bebb7
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,111 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4
5#include <litmus/clustered.h>
6
7#if !defined(CONFIG_X86) || !defined(CONFIG_SYSFS)
8/* fake get_shared_cpu_map() on non-x86 architectures */
9
10int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
11{
12 if (index != 1)
13 return 1;
14 else {
15 /* Fake L1: CPU is all by itself. */
16 cpumask_clear(mask);
17 cpumask_set_cpu(cpu, mask);
18 return 0;
19 }
20}
21
22#endif
23
24int get_cluster_size(enum cache_level level)
25{
26 cpumask_var_t mask;
27 int ok;
28 int num_cpus;
29
30 if (level == GLOBAL_CLUSTER)
31 return num_online_cpus();
32 else {
33 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
34 return -ENOMEM;
35 /* assumes CPU 0 is representative of all CPUs */
36 ok = get_shared_cpu_map(mask, 0, level);
37 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
38 if (ok == 0)
39 num_cpus = cpumask_weight(mask);
40 free_cpumask_var(mask);
41
42 if (ok == 0)
43 return num_cpus;
44 else
45 return -EINVAL;
46 }
47}
48
49int assign_cpus_to_clusters(enum cache_level level,
50 struct scheduling_cluster* clusters[],
51 unsigned int num_clusters,
52 struct cluster_cpu* cpus[],
53 unsigned int num_cpus)
54{
55 cpumask_var_t mask;
56 unsigned int i, free_cluster = 0, low_cpu;
57 int err = 0;
58
59 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
60 return -ENOMEM;
61
62 /* clear cluster pointers */
63 for (i = 0; i < num_cpus; i++) {
64 cpus[i]->id = i;
65 cpus[i]->cluster = NULL;
66 }
67
68 /* initialize clusters */
69 for (i = 0; i < num_clusters; i++) {
70 clusters[i]->id = i;
71 INIT_LIST_HEAD(&clusters[i]->cpus);
72 }
73
74 /* Assign each CPU. Two assumtions are made:
75 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
76 * 2) All cpus that belong to some cluster are online.
77 */
78 for_each_online_cpu(i) {
79 /* get lowest-id CPU in cluster */
80 if (level != GLOBAL_CLUSTER) {
81 err = get_shared_cpu_map(mask, cpus[i]->id, level);
82 if (err != 0) {
83 /* ugh... wrong cache level? Either caller screwed up
84 * or the CPU topology is weird. */
85 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
86 level, err);
87 err = -EINVAL;
88 goto out;
89 }
90 low_cpu = cpumask_first(mask);
91 } else
92 low_cpu = 0;
93 if (low_cpu == i) {
94 /* caller must provide an appropriate number of clusters */
95 BUG_ON(free_cluster >= num_clusters);
96
97 /* create new cluster */
98 cpus[i]->cluster = clusters[free_cluster++];
99 } else {
100 /* low_cpu points to the right cluster
101 * Assumption: low_cpu is actually online and was processed earlier. */
102 cpus[i]->cluster = cpus[low_cpu]->cluster;
103 }
104 /* enqueue in cpus list */
105 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
107 }
108out:
109 free_cpumask_var(mask);
110 return err;
111}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..3e2ac2b3fbe8
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,160 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8
9/* only one page for now, but we might want to add a RO version at some point */
10
11#define CTRL_NAME "litmus/ctrl"
12
13/* allocate t->rt_param.ctrl_page*/
14static int alloc_ctrl_page(struct task_struct *t)
15{
16 int err = 0;
17
18 /* only allocate if the task doesn't have one yet */
19 if (!tsk_rt(t)->ctrl_page) {
20 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
21 if (!tsk_rt(t)->ctrl_page)
22 err = -ENOMEM;
23 /* will get de-allocated in task teardown */
24 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
25 tsk_rt(t)->ctrl_page);
26 }
27 return err;
28}
29
30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
31{
32 int err;
33
34 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
35
36 TRACE_CUR(CTRL_NAME
37 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
38 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
39 vma->vm_page_prot);
40
41 /* Map it into the vma. */
42 err = vm_insert_page(vma, vma->vm_start, ctrl);
43
44 if (err)
45 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
46
47 return err;
48}
49
50static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
51{
52 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
53 vma->vm_flags, vma->vm_page_prot);
54
55 TRACE_CUR(CTRL_NAME
56 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
57 (void*) vma->vm_start, (void*) vma->vm_end, vma,
58 vma->vm_private_data);
59}
60
61static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
62 struct vm_fault* vmf)
63{
64 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
65 vma->vm_flags, vmf->pgoff);
66
67 /* This function should never be called, since all pages should have
68 * been mapped by mmap() already. */
69 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
70
71 return VM_FAULT_SIGBUS;
72}
73
74static struct vm_operations_struct litmus_ctrl_vm_ops = {
75 .close = litmus_ctrl_vm_close,
76 .fault = litmus_ctrl_vm_fault,
77};
78
79static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
80{
81 int err = 0;
82
83 /* first make sure mapper knows what he's doing */
84
85 /* you can only get one page */
86 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
87 return -EINVAL;
88
89 /* you can only map the "first" page */
90 if (vma->vm_pgoff != 0)
91 return -EINVAL;
92
93 /* you can't share it with anyone */
94 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
95 return -EINVAL;
96
97 vma->vm_ops = &litmus_ctrl_vm_ops;
98 /* This mapping should not be kept across forks,
99 * cannot be expanded, and is not a "normal" page. */
100 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO;
101
102 /* We don't want the first write access to trigger a "minor" page fault
103 * to mark the page as dirty. This is transient, private memory, we
104 * don't care if it was touched or not. __S011 means RW access, but not
105 * execute, and avoids copy-on-write behavior.
106 * See protection_map in mmap.c. */
107 vma->vm_page_prot = __S011;
108
109 err = alloc_ctrl_page(current);
110 if (!err)
111 err = map_ctrl_page(current, vma);
112
113 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
114 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
115
116 return err;
117}
118
119static struct file_operations litmus_ctrl_fops = {
120 .owner = THIS_MODULE,
121 .mmap = litmus_ctrl_mmap,
122};
123
124static struct miscdevice litmus_ctrl_dev = {
125 .name = CTRL_NAME,
126 .minor = MISC_DYNAMIC_MINOR,
127 .fops = &litmus_ctrl_fops,
128};
129
130static int __init init_litmus_ctrl_dev(void)
131{
132 int err;
133
134 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
135
136 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
137
138 BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
139 != LITMUS_CP_OFFSET_SCHED);
140 BUILD_BUG_ON(offsetof(struct control_page, irq_count)
141 != LITMUS_CP_OFFSET_IRQ_COUNT);
142 BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
143 != LITMUS_CP_OFFSET_TS_SC_START);
144 BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
145 != LITMUS_CP_OFFSET_IRQ_SC_START);
146
147 printk("Initializing LITMUS^RT control device.\n");
148 err = misc_register(&litmus_ctrl_dev);
149 if (err)
150 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
151 return err;
152}
153
154static void __exit exit_litmus_ctrl_dev(void)
155{
156 misc_deregister(&litmus_ctrl_dev);
157}
158
159module_init(init_litmus_ctrl_dev);
160module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..5aca2934a7b5
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,200 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/edf_common.h>
16
17#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
18#include <litmus/fpmath.h>
19#endif
20
21#ifdef CONFIG_EDF_TIE_BREAK_HASH
22#include <linux/hash.h>
23static inline long edf_hash(struct task_struct *t)
24{
25 /* pid is 32 bits, so normally we would shove that into the
26 * upper 32-bits and and put the job number in the bottom
27 * and hash the 64-bit number with hash_64(). Sadly,
28 * in testing, hash_64() doesn't distribute keys were the
29 * upper bits are close together (as would be the case with
30 * pids) and job numbers are equal (as would be the case with
31 * synchronous task sets with all relative deadlines equal).
32 *
33 * A 2006 Linux patch proposed the following solution
34 * (but for some reason it wasn't accepted...).
35 *
36 * At least this workaround works for 32-bit systems as well.
37 */
38 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
39}
40#endif
41
42
43/* edf_higher_prio - returns true if first has a higher EDF priority
44 * than second. Deadline ties are broken by PID.
45 *
46 * both first and second may be NULL
47 */
48int edf_higher_prio(struct task_struct* first,
49 struct task_struct* second)
50{
51 struct task_struct *first_task = first;
52 struct task_struct *second_task = second;
53
54 /* There is no point in comparing a task to itself. */
55 if (first && first == second) {
56 TRACE_TASK(first,
57 "WARNING: pointless edf priority comparison.\n");
58 return 0;
59 }
60
61
62 /* check for NULL tasks */
63 if (!first || !second)
64 return first && !second;
65
66#ifdef CONFIG_LITMUS_LOCKING
67
68 /* Check for inherited priorities. Change task
69 * used for comparison in such a case.
70 */
71 if (unlikely(first->rt_param.inh_task))
72 first_task = first->rt_param.inh_task;
73 if (unlikely(second->rt_param.inh_task))
74 second_task = second->rt_param.inh_task;
75
76 /* Check for priority boosting. Tie-break by start of boosting.
77 */
78 if (unlikely(is_priority_boosted(first_task))) {
79 /* first_task is boosted, how about second_task? */
80 if (!is_priority_boosted(second_task) ||
81 lt_before(get_boost_start(first_task),
82 get_boost_start(second_task)))
83 return 1;
84 else
85 return 0;
86 } else if (unlikely(is_priority_boosted(second_task)))
87 /* second_task is boosted, first is not*/
88 return 0;
89
90#endif
91
92 if (earlier_deadline(first_task, second_task)) {
93 return 1;
94 }
95 else if (get_deadline(first_task) == get_deadline(second_task)) {
96 /* Need to tie break. All methods must set pid_break to 0/1 if
97 * first_task does not have priority over second_task.
98 */
99 int pid_break;
100
101
102#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
103 /* Tie break by lateness. Jobs with greater lateness get
104 * priority. This should spread tardiness across all tasks,
105 * especially in task sets where all tasks have the same
106 * period and relative deadlines.
107 */
108 if (get_lateness(first_task) > get_lateness(second_task)) {
109 return 1;
110 }
111 pid_break = (get_lateness(first_task) == get_lateness(second_task));
112
113
114#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
115 /* Tie break by lateness, normalized by relative deadline. Jobs with
116 * greater normalized lateness get priority.
117 *
118 * Note: Considered using the algebraically equivalent
119 * lateness(first)*relative_deadline(second) >
120 lateness(second)*relative_deadline(first)
121 * to avoid fixed-point math, but values are prone to overflow if inputs
122 * are on the order of several seconds, even in 64-bit.
123 */
124 fp_t fnorm = _frac(get_lateness(first_task),
125 get_rt_relative_deadline(first_task));
126 fp_t snorm = _frac(get_lateness(second_task),
127 get_rt_relative_deadline(second_task));
128 if (_gt(fnorm, snorm)) {
129 return 1;
130 }
131 pid_break = _eq(fnorm, snorm);
132
133
134#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
135 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
136 * a 50% chance that first_task has a higher priority than second_task.
137 */
138 long fhash = edf_hash(first_task);
139 long shash = edf_hash(second_task);
140 if (fhash < shash) {
141 return 1;
142 }
143 pid_break = (fhash == shash);
144#else
145
146
147 /* CONFIG_EDF_PID_TIE_BREAK */
148 pid_break = 1; // fall through to tie-break by pid;
149#endif
150
151 /* Tie break by pid */
152 if(pid_break) {
153 if (first_task->pid < second_task->pid) {
154 return 1;
155 }
156 else if (first_task->pid == second_task->pid) {
157 /* If the PIDs are the same then the task with the
158 * inherited priority wins.
159 */
160 if (!second->rt_param.inh_task) {
161 return 1;
162 }
163 }
164 }
165 }
166 return 0; /* fall-through. prio(second_task) > prio(first_task) */
167}
168
169int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
170{
171 return edf_higher_prio(bheap2task(a), bheap2task(b));
172}
173
174void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
175 release_jobs_t release)
176{
177 rt_domain_init(rt, edf_ready_order, resched, release);
178}
179
180/* need_to_preempt - check whether the task t needs to be preempted
181 * call only with irqs disabled and with ready_lock acquired
182 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
183 */
184int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
185{
186 /* we need the read lock for edf_ready_queue */
187 /* no need to preempt if there is nothing pending */
188 if (!__jobs_pending(rt))
189 return 0;
190 /* we need to reschedule if t doesn't exist */
191 if (!t)
192 return 1;
193
194 /* NOTE: We cannot check for non-preemptibility since we
195 * don't know what address space we're currently in.
196 */
197
198 /* make sure to get non-rt stuff out of the way */
199 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
200}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..c4b450be4509
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,307 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
30};
31
32static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
33{
34 BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
35
36 if (fdso_ops[type]->create)
37 return fdso_ops[type]->create(obj_ref, type, config);
38 else
39 return -EINVAL;
40}
41
42static void fdso_destroy(obj_type_t type, void* obj)
43{
44 fdso_ops[type]->destroy(type, obj);
45}
46
47static int fdso_open(struct od_table_entry* entry, void* __user config)
48{
49 if (fdso_ops[entry->obj->type]->open)
50 return fdso_ops[entry->obj->type]->open(entry, config);
51 else
52 return 0;
53}
54
55static int fdso_close(struct od_table_entry* entry)
56{
57 if (fdso_ops[entry->obj->type]->close)
58 return fdso_ops[entry->obj->type]->close(entry);
59 else
60 return 0;
61}
62
63/* inode must be locked already */
64static int alloc_inode_obj(struct inode_obj_id** obj_ref,
65 struct inode* inode,
66 obj_type_t type,
67 unsigned int id,
68 void* __user config)
69{
70 struct inode_obj_id* obj;
71 void* raw_obj;
72 int err;
73
74 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
75 if (!obj) {
76 return -ENOMEM;
77 }
78
79 err = fdso_create(&raw_obj, type, config);
80 if (err != 0) {
81 kfree(obj);
82 return err;
83 }
84
85 INIT_LIST_HEAD(&obj->list);
86 atomic_set(&obj->count, 1);
87 obj->type = type;
88 obj->id = id;
89 obj->obj = raw_obj;
90 obj->inode = inode;
91
92 list_add(&obj->list, &inode->i_obj_list);
93 atomic_inc(&inode->i_count);
94
95 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
96
97 *obj_ref = obj;
98 return 0;
99}
100
101/* inode must be locked already */
102static struct inode_obj_id* get_inode_obj(struct inode* inode,
103 obj_type_t type,
104 unsigned int id)
105{
106 struct list_head* pos;
107 struct inode_obj_id* obj = NULL;
108
109 list_for_each(pos, &inode->i_obj_list) {
110 obj = list_entry(pos, struct inode_obj_id, list);
111 if (obj->id == id && obj->type == type) {
112 atomic_inc(&obj->count);
113 return obj;
114 }
115 }
116 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
117 return NULL;
118}
119
120
121static void put_inode_obj(struct inode_obj_id* obj)
122{
123 struct inode* inode;
124 int let_go = 0;
125
126 inode = obj->inode;
127 if (atomic_dec_and_test(&obj->count)) {
128
129 mutex_lock(&inode->i_obj_mutex);
130 /* no new references can be obtained */
131 if (!atomic_read(&obj->count)) {
132 list_del(&obj->list);
133 fdso_destroy(obj->type, obj->obj);
134 kfree(obj);
135 let_go = 1;
136 }
137 mutex_unlock(&inode->i_obj_mutex);
138 if (let_go)
139 iput(inode);
140 }
141}
142
143static struct od_table_entry* get_od_entry(struct task_struct* t)
144{
145 struct od_table_entry* table;
146 int i;
147
148
149 table = t->od_table;
150 if (!table) {
151 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
152 GFP_KERNEL);
153 t->od_table = table;
154 }
155
156 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
157 if (!table[i].used) {
158 table[i].used = 1;
159 return table + i;
160 }
161 return NULL;
162}
163
164static int put_od_entry(struct od_table_entry* od)
165{
166 put_inode_obj(od->obj);
167 od->used = 0;
168 return 0;
169}
170
171static long close_od_entry(struct od_table_entry *od)
172{
173 long ret;
174
175 /* Give the class a chance to reject the close. */
176 ret = fdso_close(od);
177 if (ret == 0)
178 ret = put_od_entry(od);
179
180 return ret;
181}
182
183void exit_od_table(struct task_struct* t)
184{
185 int i;
186
187 if (t->od_table) {
188 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
189 if (t->od_table[i].used)
190 close_od_entry(t->od_table + i);
191 kfree(t->od_table);
192 t->od_table = NULL;
193 }
194}
195
196static int do_sys_od_open(struct file* file, obj_type_t type, int id,
197 void* __user config)
198{
199 int idx = 0, err = 0;
200 struct inode* inode;
201 struct inode_obj_id* obj = NULL;
202 struct od_table_entry* entry;
203
204 inode = file->f_dentry->d_inode;
205
206 entry = get_od_entry(current);
207 if (!entry)
208 return -ENOMEM;
209
210 mutex_lock(&inode->i_obj_mutex);
211 obj = get_inode_obj(inode, type, id);
212 if (!obj)
213 err = alloc_inode_obj(&obj, inode, type, id, config);
214 if (err != 0) {
215 obj = NULL;
216 idx = err;
217 entry->used = 0;
218 } else {
219 entry->obj = obj;
220 entry->class = fdso_ops[type];
221 idx = entry - current->od_table;
222 }
223
224 mutex_unlock(&inode->i_obj_mutex);
225
226 /* open only if creation succeeded */
227 if (!err)
228 err = fdso_open(entry, config);
229 if (err < 0) {
230 /* The class rejected the open call.
231 * We need to clean up and tell user space.
232 */
233 if (obj)
234 put_od_entry(entry);
235 idx = err;
236 }
237
238 return idx;
239}
240
241
242struct od_table_entry* get_entry_for_od(int od)
243{
244 struct task_struct *t = current;
245
246 if (!t->od_table)
247 return NULL;
248 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
249 return NULL;
250 if (!t->od_table[od].used)
251 return NULL;
252 return t->od_table + od;
253}
254
255
256asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
257{
258 int ret = 0;
259 struct file* file;
260
261 /*
262 1) get file from fd, get inode from file
263 2) lock inode
264 3) try to lookup object
265 4) if not present create and enqueue object, inc inode refcnt
266 5) increment refcnt of object
267 6) alloc od_table_entry, setup ptrs
268 7) unlock inode
269 8) return offset in od_table as OD
270 */
271
272 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
273 ret = -EINVAL;
274 goto out;
275 }
276
277 file = fget(fd);
278 if (!file) {
279 ret = -EBADF;
280 goto out;
281 }
282
283 ret = do_sys_od_open(file, type, obj_id, config);
284
285 fput(file);
286
287out:
288 return ret;
289}
290
291
292asmlinkage long sys_od_close(int od)
293{
294 int ret = -EINVAL;
295 struct task_struct *t = current;
296
297 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
298 return ret;
299
300 if (!t->od_table || !t->od_table[od].used)
301 return ret;
302
303
304 ret = close_od_entry(t->od_table + od);
305
306 return ret;
307}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..964a4729deff
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fp_common.h>
16
17/* fp_higher_prio - returns true if first has a higher static priority
18 * than second. Ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fp_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (unlikely(first && first == second)) {
30 TRACE_TASK(first,
31 "WARNING: pointless FP priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40 if (!is_realtime(second_task))
41 return 1;
42
43#ifdef CONFIG_LITMUS_LOCKING
44
45 /* Check for inherited priorities. Change task
46 * used for comparison in such a case.
47 */
48 if (unlikely(first->rt_param.inh_task))
49 first_task = first->rt_param.inh_task;
50 if (unlikely(second->rt_param.inh_task))
51 second_task = second->rt_param.inh_task;
52
53 /* Check for priority boosting. Tie-break by start of boosting.
54 */
55 if (unlikely(is_priority_boosted(first_task))) {
56 /* first_task is boosted, how about second_task? */
57 if (is_priority_boosted(second_task))
58 /* break by priority point */
59 return lt_before(get_boost_start(first_task),
60 get_boost_start(second_task));
61 else
62 /* priority boosting wins. */
63 return 1;
64 } else if (unlikely(is_priority_boosted(second_task)))
65 /* second_task is boosted, first is not*/
66 return 0;
67
68#endif
69
70 /* Comparisons to itself are not expected; priority inheritance
71 * should also not cause this to happen. */
72 BUG_ON(first_task == second_task);
73
74 if (get_priority(first_task) < get_priority(second_task))
75 return 1;
76 else if (get_priority(first_task) == get_priority(second_task))
77 /* Break by PID. */
78 return first_task->pid < second_task->pid;
79 else
80 return 0;
81}
82
83int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
84{
85 return fp_higher_prio(bheap2task(a), bheap2task(b));
86}
87
88void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
89 release_jobs_t release)
90{
91 rt_domain_init(rt, fp_ready_order, resched, release);
92}
93
94/* need_to_preempt - check whether the task t needs to be preempted
95 */
96int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
97{
98 struct task_struct *pending;
99
100 pending = fp_prio_peek(q);
101
102 if (!pending)
103 return 0;
104 if (!t)
105 return 1;
106
107 /* make sure to get non-rt stuff out of the way */
108 return !is_realtime(t) || fp_higher_prio(pending, t);
109}
110
111void fp_prio_queue_init(struct fp_prio_queue* q)
112{
113 int i;
114
115 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
116 q->bitmask[i] = 0;
117 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
118 bheap_init(&q->queue[i]);
119}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..89a0810415fa
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,55 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/litmus.h>
7#include <litmus/jobs.h>
8
9static inline void setup_release(struct task_struct *t, lt_t release)
10{
11 /* prepare next release */
12 t->rt_param.job_params.release = release;
13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
14 t->rt_param.job_params.exec_time = 0;
15
16 /* update job sequence number */
17 t->rt_param.job_params.job_no++;
18}
19
20void prepare_for_next_period(struct task_struct *t)
21{
22 BUG_ON(!t);
23
24 /* Record lateness before we set up the next job's
25 * release and deadline. Lateness may be negative.
26 */
27 t->rt_param.job_params.lateness =
28 (long long)litmus_clock() -
29 (long long)t->rt_param.job_params.deadline;
30
31 setup_release(t, get_release(t) + get_rt_period(t));
32 tsk_rt(t)->dont_requeue = 0;
33}
34
35void release_at(struct task_struct *t, lt_t start)
36{
37 BUG_ON(!t);
38 setup_release(t, start);
39 tsk_rt(t)->completed = 0;
40}
41
42
43/*
44 * Deactivate current task until the beginning of the next period.
45 */
46long complete_job(void)
47{
48 /* Mark that we do not excute anymore */
49 tsk_rt(current)->completed = 1;
50 /* call schedule, this will return when a new job arrives
51 * it also takes care of preparing for the next release
52 */
53 schedule();
54 return 0;
55}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..7417a8fbda74
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,576 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
14
15#include <litmus/litmus.h>
16#include <litmus/bheap.h>
17#include <litmus/trace.h>
18#include <litmus/rt_domain.h>
19#include <litmus/litmus_proc.h>
20#include <litmus/sched_trace.h>
21
22#ifdef CONFIG_SCHED_CPU_AFFINITY
23#include <litmus/affinity.h>
24#endif
25
26/* Number of RT tasks that exist in the system */
27atomic_t rt_task_count = ATOMIC_INIT(0);
28
29#ifdef CONFIG_RELEASE_MASTER
30/* current master CPU for handling timer IRQs */
31atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
32#endif
33
34static struct kmem_cache * bheap_node_cache;
35extern struct kmem_cache * release_heap_cache;
36
37struct bheap_node* bheap_node_alloc(int gfp_flags)
38{
39 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
40}
41
42void bheap_node_free(struct bheap_node* hn)
43{
44 kmem_cache_free(bheap_node_cache, hn);
45}
46
47struct release_heap* release_heap_alloc(int gfp_flags);
48void release_heap_free(struct release_heap* rh);
49
50/*
51 * sys_set_task_rt_param
52 * @pid: Pid of the task which scheduling parameters must be changed
53 * @param: New real-time extension parameters such as the execution cost and
54 * period
55 * Syscall for manipulating with task rt extension params
56 * Returns EFAULT if param is NULL.
57 * ESRCH if pid is not corrsponding
58 * to a valid task.
59 * EINVAL if either period or execution cost is <=0
60 * EPERM if pid is a real-time task
61 * 0 if success
62 *
63 * Only non-real-time tasks may be configured with this system call
64 * to avoid races with the scheduler. In practice, this means that a
65 * task's parameters must be set _before_ calling sys_prepare_rt_task()
66 *
67 * find_task_by_vpid() assumes that we are in the same namespace of the
68 * target.
69 */
70asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
71{
72 struct rt_task tp;
73 struct task_struct *target;
74 int retval = -EINVAL;
75
76 printk("Setting up rt task parameters for process %d.\n", pid);
77
78 if (pid < 0 || param == 0) {
79 goto out;
80 }
81 if (copy_from_user(&tp, param, sizeof(tp))) {
82 retval = -EFAULT;
83 goto out;
84 }
85
86 /* Task search and manipulation must be protected */
87 read_lock_irq(&tasklist_lock);
88 if (!(target = find_task_by_vpid(pid))) {
89 retval = -ESRCH;
90 goto out_unlock;
91 }
92
93 if (is_realtime(target)) {
94 /* The task is already a real-time task.
95 * We cannot not allow parameter changes at this point.
96 */
97 retval = -EBUSY;
98 goto out_unlock;
99 }
100
101 /* set relative deadline to be implicit if left unspecified */
102 if (tp.relative_deadline == 0)
103 tp.relative_deadline = tp.period;
104
105 if (tp.exec_cost <= 0)
106 goto out_unlock;
107 if (tp.period <= 0)
108 goto out_unlock;
109 if (!cpu_online(tp.cpu))
110 goto out_unlock;
111 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
112 {
113 printk(KERN_INFO "litmus: real-time task %d rejected "
114 "because task density > 1.0\n", pid);
115 goto out_unlock;
116 }
117 if (tp.cls != RT_CLASS_HARD &&
118 tp.cls != RT_CLASS_SOFT &&
119 tp.cls != RT_CLASS_BEST_EFFORT)
120 {
121 printk(KERN_INFO "litmus: real-time task %d rejected "
122 "because its class is invalid\n", pid);
123 goto out_unlock;
124 }
125 if (tp.budget_policy != NO_ENFORCEMENT &&
126 tp.budget_policy != QUANTUM_ENFORCEMENT &&
127 tp.budget_policy != PRECISE_ENFORCEMENT)
128 {
129 printk(KERN_INFO "litmus: real-time task %d rejected "
130 "because unsupported budget enforcement policy "
131 "specified (%d)\n",
132 pid, tp.budget_policy);
133 goto out_unlock;
134 }
135
136 target->rt_param.task_params = tp;
137
138 retval = 0;
139 out_unlock:
140 read_unlock_irq(&tasklist_lock);
141 out:
142 return retval;
143}
144
145/*
146 * Getter of task's RT params
147 * returns EINVAL if param or pid is NULL
148 * returns ESRCH if pid does not correspond to a valid task
149 * returns EFAULT if copying of parameters has failed.
150 *
151 * find_task_by_vpid() assumes that we are in the same namespace of the
152 * target.
153 */
154asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
155{
156 int retval = -EINVAL;
157 struct task_struct *source;
158 struct rt_task lp;
159 if (param == 0 || pid < 0)
160 goto out;
161 read_lock(&tasklist_lock);
162 if (!(source = find_task_by_vpid(pid))) {
163 retval = -ESRCH;
164 goto out_unlock;
165 }
166 lp = source->rt_param.task_params;
167 read_unlock(&tasklist_lock);
168 /* Do copying outside the lock */
169 retval =
170 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
171 return retval;
172 out_unlock:
173 read_unlock(&tasklist_lock);
174 out:
175 return retval;
176
177}
178
179/*
180 * This is the crucial function for periodic task implementation,
181 * It checks if a task is periodic, checks if such kind of sleep
182 * is permitted and calls plugin-specific sleep, which puts the
183 * task into a wait array.
184 * returns 0 on successful wakeup
185 * returns EPERM if current conditions do not permit such sleep
186 * returns EINVAL if current task is not able to go to sleep
187 */
188asmlinkage long sys_complete_job(void)
189{
190 int retval = -EPERM;
191 if (!is_realtime(current)) {
192 retval = -EINVAL;
193 goto out;
194 }
195 /* Task with negative or zero period cannot sleep */
196 if (get_rt_period(current) <= 0) {
197 retval = -EINVAL;
198 goto out;
199 }
200 /* The plugin has to put the task into an
201 * appropriate queue and call schedule
202 */
203 retval = litmus->complete_job();
204 out:
205 return retval;
206}
207
208/* This is an "improved" version of sys_complete_job that
209 * addresses the problem of unintentionally missing a job after
210 * an overrun.
211 *
212 * returns 0 on successful wakeup
213 * returns EPERM if current conditions do not permit such sleep
214 * returns EINVAL if current task is not able to go to sleep
215 */
216asmlinkage long sys_wait_for_job_release(unsigned int job)
217{
218 int retval = -EPERM;
219 if (!is_realtime(current)) {
220 retval = -EINVAL;
221 goto out;
222 }
223
224 /* Task with negative or zero period cannot sleep */
225 if (get_rt_period(current) <= 0) {
226 retval = -EINVAL;
227 goto out;
228 }
229
230 retval = 0;
231
232 /* first wait until we have "reached" the desired job
233 *
234 * This implementation has at least two problems:
235 *
236 * 1) It doesn't gracefully handle the wrap around of
237 * job_no. Since LITMUS is a prototype, this is not much
238 * of a problem right now.
239 *
240 * 2) It is theoretically racy if a job release occurs
241 * between checking job_no and calling sleep_next_period().
242 * A proper solution would requiring adding another callback
243 * in the plugin structure and testing the condition with
244 * interrupts disabled.
245 *
246 * FIXME: At least problem 2 should be taken care of eventually.
247 */
248 while (!retval && job > current->rt_param.job_params.job_no)
249 /* If the last job overran then job <= job_no and we
250 * don't send the task to sleep.
251 */
252 retval = litmus->complete_job();
253 out:
254 return retval;
255}
256
257/* This is a helper syscall to query the current job sequence number.
258 *
259 * returns 0 on successful query
260 * returns EPERM if task is not a real-time task.
261 * returns EFAULT if &job is not a valid pointer.
262 */
263asmlinkage long sys_query_job_no(unsigned int __user *job)
264{
265 int retval = -EPERM;
266 if (is_realtime(current))
267 retval = put_user(current->rt_param.job_params.job_no, job);
268
269 return retval;
270}
271
272/* sys_null_call() is only used for determining raw system call
273 * overheads (kernel entry, kernel exit). It has no useful side effects.
274 * If ts is non-NULL, then the current Feather-Trace time is recorded.
275 */
276asmlinkage long sys_null_call(cycles_t __user *ts)
277{
278 long ret = 0;
279 cycles_t now;
280
281 if (ts) {
282 now = get_cycles();
283 ret = put_user(now, ts);
284 }
285
286 return ret;
287}
288
289/* p is a real-time task. Re-init its state as a best-effort task. */
290static void reinit_litmus_state(struct task_struct* p, int restore)
291{
292 struct rt_task user_config = {};
293 void* ctrl_page = NULL;
294
295 if (restore) {
296 /* Safe user-space provided configuration data.
297 * and allocated page. */
298 user_config = p->rt_param.task_params;
299 ctrl_page = p->rt_param.ctrl_page;
300 }
301
302 /* We probably should not be inheriting any task's priority
303 * at this point in time.
304 */
305 WARN_ON(p->rt_param.inh_task);
306
307 /* Cleanup everything else. */
308 memset(&p->rt_param, 0, sizeof(p->rt_param));
309
310 /* Restore preserved fields. */
311 if (restore) {
312 p->rt_param.task_params = user_config;
313 p->rt_param.ctrl_page = ctrl_page;
314 }
315}
316
317long litmus_admit_task(struct task_struct* tsk)
318{
319 long retval = 0;
320
321 BUG_ON(is_realtime(tsk));
322
323 tsk_rt(tsk)->heap_node = NULL;
324 tsk_rt(tsk)->rel_heap = NULL;
325
326 if (get_rt_relative_deadline(tsk) == 0 ||
327 get_exec_cost(tsk) >
328 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
329 TRACE_TASK(tsk,
330 "litmus admit: invalid task parameters "
331 "(e = %lu, p = %lu, d = %lu)\n",
332 get_exec_cost(tsk), get_rt_period(tsk),
333 get_rt_relative_deadline(tsk));
334 retval = -EINVAL;
335 goto out;
336 }
337
338 if (!cpu_online(get_partition(tsk))) {
339 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
340 get_partition(tsk));
341 retval = -EINVAL;
342 goto out;
343 }
344
345 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
346
347 /* allocate heap node for this task */
348 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
349 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
350
351 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
352 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
353
354 retval = -ENOMEM;
355 goto out;
356 } else {
357 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
358 }
359
360 preempt_disable();
361
362 retval = litmus->admit_task(tsk);
363
364 if (!retval) {
365 sched_trace_task_name(tsk);
366 sched_trace_task_param(tsk);
367 atomic_inc(&rt_task_count);
368 }
369
370 preempt_enable();
371
372out:
373 if (retval) {
374 bheap_node_free(tsk_rt(tsk)->heap_node);
375 release_heap_free(tsk_rt(tsk)->rel_heap);
376 }
377 return retval;
378}
379
380void litmus_exit_task(struct task_struct* tsk)
381{
382 if (is_realtime(tsk)) {
383 sched_trace_task_completion(tsk, 1);
384
385 litmus->task_exit(tsk);
386
387 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
388 bheap_node_free(tsk_rt(tsk)->heap_node);
389 release_heap_free(tsk_rt(tsk)->rel_heap);
390
391 atomic_dec(&rt_task_count);
392 reinit_litmus_state(tsk, 1);
393 }
394}
395
396static int do_plugin_switch(void *_plugin)
397{
398 int ret;
399 struct sched_plugin* plugin = _plugin;
400
401 /* don't switch if there are active real-time tasks */
402 if (atomic_read(&rt_task_count) == 0) {
403 ret = litmus->deactivate_plugin();
404 if (0 != ret)
405 goto out;
406 ret = plugin->activate_plugin();
407 if (0 != ret) {
408 printk(KERN_INFO "Can't activate %s (%d).\n",
409 plugin->plugin_name, ret);
410 plugin = &linux_sched_plugin;
411 }
412 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
413 litmus = plugin;
414 } else
415 ret = -EBUSY;
416out:
417 return ret;
418}
419
420/* Switching a plugin in use is tricky.
421 * We must watch out that no real-time tasks exists
422 * (and that none is created in parallel) and that the plugin is not
423 * currently in use on any processor (in theory).
424 */
425int switch_sched_plugin(struct sched_plugin* plugin)
426{
427 BUG_ON(!plugin);
428
429 if (atomic_read(&rt_task_count) == 0)
430 return stop_machine(do_plugin_switch, plugin, NULL);
431 else
432 return -EBUSY;
433}
434
435/* Called upon fork.
436 * p is the newly forked task.
437 */
438void litmus_fork(struct task_struct* p)
439{
440 if (is_realtime(p)) {
441 /* clean out any litmus related state, don't preserve anything */
442 reinit_litmus_state(p, 0);
443 /* Don't let the child be a real-time task. */
444 p->sched_reset_on_fork = 1;
445 } else
446 /* non-rt tasks might have ctrl_page set */
447 tsk_rt(p)->ctrl_page = NULL;
448
449 /* od tables are never inherited across a fork */
450 p->od_table = NULL;
451}
452
453/* Called upon execve().
454 * current is doing the exec.
455 * Don't let address space specific stuff leak.
456 */
457void litmus_exec(void)
458{
459 struct task_struct* p = current;
460
461 if (is_realtime(p)) {
462 WARN_ON(p->rt_param.inh_task);
463 if (tsk_rt(p)->ctrl_page) {
464 free_page((unsigned long) tsk_rt(p)->ctrl_page);
465 tsk_rt(p)->ctrl_page = NULL;
466 }
467 }
468}
469
470void exit_litmus(struct task_struct *dead_tsk)
471{
472 /* We also allow non-RT tasks to
473 * allocate control pages to allow
474 * measurements with non-RT tasks.
475 * So check if we need to free the page
476 * in any case.
477 */
478 if (tsk_rt(dead_tsk)->ctrl_page) {
479 TRACE_TASK(dead_tsk,
480 "freeing ctrl_page %p\n",
481 tsk_rt(dead_tsk)->ctrl_page);
482 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
483 }
484
485 /* main cleanup only for RT tasks */
486 if (is_realtime(dead_tsk))
487 litmus_exit_task(dead_tsk);
488}
489
490
491#ifdef CONFIG_MAGIC_SYSRQ
492int sys_kill(int pid, int sig);
493
494static void sysrq_handle_kill_rt_tasks(int key)
495{
496 struct task_struct *t;
497 read_lock(&tasklist_lock);
498 for_each_process(t) {
499 if (is_realtime(t)) {
500 sys_kill(t->pid, SIGKILL);
501 }
502 }
503 read_unlock(&tasklist_lock);
504}
505
506static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
507 .handler = sysrq_handle_kill_rt_tasks,
508 .help_msg = "quit-rt-tasks(X)",
509 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
510};
511#endif
512
513extern struct sched_plugin linux_sched_plugin;
514
515static int litmus_shutdown_nb(struct notifier_block *unused1,
516 unsigned long unused2, void *unused3)
517{
518 /* Attempt to switch back to regular Linux scheduling.
519 * Forces the active plugin to clean up.
520 */
521 if (litmus != &linux_sched_plugin) {
522 int ret = switch_sched_plugin(&linux_sched_plugin);
523 if (ret) {
524 printk("Auto-shutdown of active Litmus plugin failed.\n");
525 }
526 }
527 return NOTIFY_DONE;
528}
529
530static struct notifier_block shutdown_notifier = {
531 .notifier_call = litmus_shutdown_nb,
532};
533
534static int __init _init_litmus(void)
535{
536 /* Common initializers,
537 * mode change lock is used to enforce single mode change
538 * operation.
539 */
540 printk("Starting LITMUS^RT kernel\n");
541
542 register_sched_plugin(&linux_sched_plugin);
543
544 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
545 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
546
547#ifdef CONFIG_MAGIC_SYSRQ
548 /* offer some debugging help */
549 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
550 printk("Registered kill rt tasks magic sysrq.\n");
551 else
552 printk("Could not register kill rt tasks magic sysrq.\n");
553#endif
554
555 init_litmus_proc();
556
557#ifdef CONFIG_SCHED_CPU_AFFINITY
558 init_topology();
559#endif
560
561 register_reboot_notifier(&shutdown_notifier);
562
563 return 0;
564}
565
566static void _exit_litmus(void)
567{
568 unregister_reboot_notifier(&shutdown_notifier);
569
570 exit_litmus_proc();
571 kmem_cache_destroy(bheap_node_cache);
572 kmem_cache_destroy(release_heap_cache);
573}
574
575module_init(_init_litmus);
576module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..1ebf1277f5d3
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,407 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/uaccess.h>
7#include <linux/seq_file.h>
8
9#include <litmus/litmus.h>
10#include <litmus/litmus_proc.h>
11
12#include <litmus/clustered.h>
13
14/* in litmus/litmus.c */
15extern atomic_t rt_task_count;
16
17static struct proc_dir_entry *litmus_dir = NULL,
18 *curr_file = NULL,
19 *stat_file = NULL,
20 *plugs_dir = NULL,
21#ifdef CONFIG_RELEASE_MASTER
22 *release_master_file = NULL,
23#endif
24 *plugs_file = NULL;
25
26/* in litmus/sync.c */
27int count_tasks_waiting_for_release(void);
28
29static int litmus_stats_proc_show(struct seq_file *m, void *v)
30{
31 seq_printf(m,
32 "real-time tasks = %d\n"
33 "ready for release = %d\n",
34 atomic_read(&rt_task_count),
35 count_tasks_waiting_for_release());
36 return 0;
37}
38
39static int litmus_stats_proc_open(struct inode *inode, struct file *file)
40{
41 return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
42}
43
44static const struct file_operations litmus_stats_proc_fops = {
45 .open = litmus_stats_proc_open,
46 .read = seq_read,
47 .llseek = seq_lseek,
48 .release = single_release,
49};
50
51
52static int litmus_loaded_proc_show(struct seq_file *m, void *v)
53{
54 print_sched_plugins(m);
55 return 0;
56}
57
58static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
59{
60 return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
61}
62
63static const struct file_operations litmus_loaded_proc_fops = {
64 .open = litmus_loaded_proc_open,
65 .read = seq_read,
66 .llseek = seq_lseek,
67 .release = single_release,
68};
69
70
71
72
73/* in litmus/litmus.c */
74int switch_sched_plugin(struct sched_plugin*);
75
76static ssize_t litmus_active_proc_write(struct file *file,
77 const char __user *buffer, size_t count,
78 loff_t *ppos)
79{
80 char name[65];
81 struct sched_plugin* found;
82 ssize_t ret = -EINVAL;
83 int err;
84
85
86 ret = copy_and_chomp(name, sizeof(name), buffer, count);
87 if (ret < 0)
88 return ret;
89
90 found = find_sched_plugin(name);
91
92 if (found) {
93 err = switch_sched_plugin(found);
94 if (err) {
95 printk(KERN_INFO "Could not switch plugin: %d\n", err);
96 ret = err;
97 }
98 } else {
99 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
100 ret = -ESRCH;
101 }
102
103 return ret;
104}
105
106static int litmus_active_proc_show(struct seq_file *m, void *v)
107{
108 seq_printf(m, "%s\n", litmus->plugin_name);
109 return 0;
110}
111
112static int litmus_active_proc_open(struct inode *inode, struct file *file)
113{
114 return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
115}
116
117static const struct file_operations litmus_active_proc_fops = {
118 .open = litmus_active_proc_open,
119 .read = seq_read,
120 .llseek = seq_lseek,
121 .release = single_release,
122 .write = litmus_active_proc_write,
123};
124
125
126#ifdef CONFIG_RELEASE_MASTER
127static ssize_t litmus_release_master_proc_write(
128 struct file *file,
129 const char __user *buffer, size_t count,
130 loff_t *ppos)
131{
132 int cpu, err, online = 0;
133 char msg[64];
134 ssize_t len;
135
136 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
137
138 if (len < 0)
139 return len;
140
141 if (strcmp(msg, "NO_CPU") == 0)
142 atomic_set(&release_master_cpu, NO_CPU);
143 else {
144 err = sscanf(msg, "%d", &cpu);
145 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
146 atomic_set(&release_master_cpu, cpu);
147 } else {
148 TRACE("invalid release master: '%s' "
149 "(err:%d cpu:%d online:%d)\n",
150 msg, err, cpu, online);
151 len = -EINVAL;
152 }
153 }
154 return len;
155}
156
157static int litmus_release_master_proc_show(struct seq_file *m, void *v)
158{
159 int master;
160 master = atomic_read(&release_master_cpu);
161 if (master == NO_CPU)
162 seq_printf(m, "NO_CPU\n");
163 else
164 seq_printf(m, "%d\n", master);
165 return 0;
166}
167
168static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
169{
170 return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
171}
172
173static const struct file_operations litmus_release_master_proc_fops = {
174 .open = litmus_release_master_proc_open,
175 .read = seq_read,
176 .llseek = seq_lseek,
177 .release = single_release,
178 .write = litmus_release_master_proc_write,
179};
180#endif
181
182int __init init_litmus_proc(void)
183{
184 litmus_dir = proc_mkdir("litmus", NULL);
185 if (!litmus_dir) {
186 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
187 return -ENOMEM;
188 }
189
190 curr_file = proc_create("active_plugin", 0644, litmus_dir,
191 &litmus_active_proc_fops);
192
193 if (!curr_file) {
194 printk(KERN_ERR "Could not allocate active_plugin "
195 "procfs entry.\n");
196 return -ENOMEM;
197 }
198
199#ifdef CONFIG_RELEASE_MASTER
200 release_master_file = proc_create("release_master", 0644, litmus_dir,
201 &litmus_release_master_proc_fops);
202 if (!release_master_file) {
203 printk(KERN_ERR "Could not allocate release_master "
204 "procfs entry.\n");
205 return -ENOMEM;
206 }
207#endif
208
209 stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
210
211 plugs_dir = proc_mkdir("plugins", litmus_dir);
212 if (!plugs_dir){
213 printk(KERN_ERR "Could not allocate plugins directory "
214 "procfs entry.\n");
215 return -ENOMEM;
216 }
217
218 plugs_file = proc_create("loaded", 0444, plugs_dir,
219 &litmus_loaded_proc_fops);
220
221 return 0;
222}
223
224void exit_litmus_proc(void)
225{
226 if (plugs_file)
227 remove_proc_entry("loaded", plugs_dir);
228 if (plugs_dir)
229 remove_proc_entry("plugins", litmus_dir);
230 if (stat_file)
231 remove_proc_entry("stats", litmus_dir);
232 if (curr_file)
233 remove_proc_entry("active_plugin", litmus_dir);
234#ifdef CONFIG_RELEASE_MASTER
235 if (release_master_file)
236 remove_proc_entry("release_master", litmus_dir);
237#endif
238 if (litmus_dir)
239 remove_proc_entry("litmus", NULL);
240}
241
242long make_plugin_proc_dir(struct sched_plugin* plugin,
243 struct proc_dir_entry** pde_in)
244{
245 struct proc_dir_entry *pde_new = NULL;
246 long rv;
247
248 if (!plugin || !plugin->plugin_name){
249 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
250 __func__);
251 rv = -EINVAL;
252 goto out_no_pde;
253 }
254
255 if (!plugs_dir){
256 printk(KERN_ERR "Could not make plugin sub-directory, because "
257 "/proc/litmus/plugins does not exist.\n");
258 rv = -ENOENT;
259 goto out_no_pde;
260 }
261
262 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
263 if (!pde_new){
264 printk(KERN_ERR "Could not make plugin sub-directory: "
265 "out of memory?.\n");
266 rv = -ENOMEM;
267 goto out_no_pde;
268 }
269
270 rv = 0;
271 *pde_in = pde_new;
272 goto out_ok;
273
274out_no_pde:
275 *pde_in = NULL;
276out_ok:
277 return rv;
278}
279
280void remove_plugin_proc_dir(struct sched_plugin* plugin)
281{
282 if (!plugin || !plugin->plugin_name){
283 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
284 __func__);
285 return;
286 }
287 remove_proc_entry(plugin->plugin_name, plugs_dir);
288}
289
290
291
292/* misc. I/O helper functions */
293
294int copy_and_chomp(char *kbuf, unsigned long ksize,
295 __user const char* ubuf, unsigned long ulength)
296{
297 /* caller must provide buffer space */
298 BUG_ON(!ksize);
299
300 ksize--; /* leave space for null byte */
301
302 if (ksize > ulength)
303 ksize = ulength;
304
305 if(copy_from_user(kbuf, ubuf, ksize))
306 return -EFAULT;
307
308 kbuf[ksize] = '\0';
309
310 /* chomp kbuf */
311 if (ksize > 0 && kbuf[ksize - 1] == '\n')
312 kbuf[ksize - 1] = '\0';
313
314 return ksize;
315}
316
317/* helper functions for clustered plugins */
318static const char* cache_level_names[] = {
319 "ALL",
320 "L1",
321 "L2",
322 "L3",
323};
324
325int parse_cache_level(const char *cache_name, enum cache_level *level)
326{
327 int err = -EINVAL;
328 int i;
329 /* do a quick and dirty comparison to find the cluster size */
330 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
331 if (!strcmp(cache_name, cache_level_names[i])) {
332 *level = (enum cache_level) i;
333 err = 0;
334 break;
335 }
336 return err;
337}
338
339const char* cache_level_name(enum cache_level level)
340{
341 int idx = level;
342
343 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
344 return cache_level_names[idx];
345 else
346 return "INVALID";
347}
348
349
350
351
352/* proc file interface to configure the cluster size */
353
354static ssize_t litmus_cluster_proc_write(struct file *file,
355 const char __user *buffer, size_t count,
356 loff_t *ppos)
357{
358 enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
359 ssize_t len;
360 char cache_name[8];
361
362 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
363
364 if (len > 0 && parse_cache_level(cache_name, level)) {
365 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
366 len = -EINVAL;
367 }
368
369 return len;
370}
371
372static int litmus_cluster_proc_show(struct seq_file *m, void *v)
373{
374 enum cache_level *level = (enum cache_level *) m->private;
375
376 seq_printf(m, "%s\n", cache_level_name(*level));
377 return 0;
378}
379
380static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
381{
382 return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
383}
384
385static const struct file_operations litmus_cluster_proc_fops = {
386 .open = litmus_cluster_proc_open,
387 .read = seq_read,
388 .llseek = seq_lseek,
389 .release = single_release,
390 .write = litmus_cluster_proc_write,
391};
392
393struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
394 enum cache_level* level)
395{
396 struct proc_dir_entry* cluster_file;
397
398
399 cluster_file = proc_create_data("cluster", 0644, parent,
400 &litmus_cluster_proc_fops,
401 (void *) level);
402 if (!cluster_file) {
403 printk(KERN_ERR
404 "Could not cluster procfs entry.\n");
405 }
406 return cluster_file;
407}
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..43d9aece2e74
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,188 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
3#include <litmus/fdso.h>
4
5#ifdef CONFIG_LITMUS_LOCKING
6
7#include <linux/sched.h>
8#include <litmus/litmus.h>
9#include <litmus/sched_plugin.h>
10#include <litmus/trace.h>
11#include <litmus/wait.h>
12
13static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
14static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
15static int close_generic_lock(struct od_table_entry* entry);
16static void destroy_generic_lock(obj_type_t type, void* sem);
17
18struct fdso_ops generic_lock_ops = {
19 .create = create_generic_lock,
20 .open = open_generic_lock,
21 .close = close_generic_lock,
22 .destroy = destroy_generic_lock
23};
24
25static inline bool is_lock(struct od_table_entry* entry)
26{
27 return entry->class == &generic_lock_ops;
28}
29
30static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
31{
32 BUG_ON(!is_lock(entry));
33 return (struct litmus_lock*) entry->obj->obj;
34}
35
36static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
37{
38 struct litmus_lock* lock;
39 int err;
40
41 err = litmus->allocate_lock(&lock, type, arg);
42 if (err == 0)
43 *obj_ref = lock;
44 return err;
45}
46
47static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
48{
49 struct litmus_lock* lock = get_lock(entry);
50 if (lock->ops->open)
51 return lock->ops->open(lock, arg);
52 else
53 return 0; /* default: any task can open it */
54}
55
56static int close_generic_lock(struct od_table_entry* entry)
57{
58 struct litmus_lock* lock = get_lock(entry);
59 if (lock->ops->close)
60 return lock->ops->close(lock);
61 else
62 return 0; /* default: closing succeeds */
63}
64
65static void destroy_generic_lock(obj_type_t type, void* obj)
66{
67 struct litmus_lock* lock = (struct litmus_lock*) obj;
68 lock->ops->deallocate(lock);
69}
70
71asmlinkage long sys_litmus_lock(int lock_od)
72{
73 long err = -EINVAL;
74 struct od_table_entry* entry;
75 struct litmus_lock* l;
76
77 TS_SYSCALL_IN_START;
78
79 TS_SYSCALL_IN_END;
80
81 TS_LOCK_START;
82
83 entry = get_entry_for_od(lock_od);
84 if (entry && is_lock(entry)) {
85 l = get_lock(entry);
86 TRACE_CUR("attempts to lock 0x%p\n", l);
87 err = l->ops->lock(l);
88 }
89
90 /* Note: task my have been suspended or preempted in between! Take
91 * this into account when computing overheads. */
92 TS_LOCK_END;
93
94 TS_SYSCALL_OUT_START;
95
96 return err;
97}
98
99asmlinkage long sys_litmus_unlock(int lock_od)
100{
101 long err = -EINVAL;
102 struct od_table_entry* entry;
103 struct litmus_lock* l;
104
105 TS_SYSCALL_IN_START;
106
107 TS_SYSCALL_IN_END;
108
109 TS_UNLOCK_START;
110
111 entry = get_entry_for_od(lock_od);
112 if (entry && is_lock(entry)) {
113 l = get_lock(entry);
114 TRACE_CUR("attempts to unlock 0x%p\n", l);
115 err = l->ops->unlock(l);
116 }
117
118 /* Note: task my have been preempted in between! Take this into
119 * account when computing overheads. */
120 TS_UNLOCK_END;
121
122 TS_SYSCALL_OUT_START;
123
124 return err;
125}
126
127struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
128{
129 wait_queue_t* q;
130 struct task_struct* t = NULL;
131
132 if (waitqueue_active(wq)) {
133 q = list_entry(wq->task_list.next,
134 wait_queue_t, task_list);
135 t = (struct task_struct*) q->private;
136 __remove_wait_queue(wq, q);
137 }
138 return(t);
139}
140
141unsigned int __add_wait_queue_prio_exclusive(
142 wait_queue_head_t* head,
143 prio_wait_queue_t *new)
144{
145 struct list_head *pos;
146 unsigned int passed = 0;
147
148 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
149
150 /* find a spot where the new entry is less than the next */
151 list_for_each(pos, &head->task_list) {
152 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
153 wq.task_list);
154
155 if (unlikely(lt_before(new->priority, queued->priority) ||
156 (new->priority == queued->priority &&
157 new->tie_breaker < queued->tie_breaker))) {
158 /* pos is not less than new, thus insert here */
159 __list_add(&new->wq.task_list, pos->prev, pos);
160 goto out;
161 }
162 passed++;
163 }
164
165 /* if we get to this point either the list is empty or every entry
166 * queued element is less than new.
167 * Let's add new to the end. */
168 list_add_tail(&new->wq.task_list, &head->task_list);
169out:
170 return passed;
171}
172
173
174#else
175
176struct fdso_ops generic_lock_ops = {};
177
178asmlinkage long sys_litmus_lock(int sem_od)
179{
180 return -ENOSYS;
181}
182
183asmlinkage long sys_litmus_unlock(int sem_od)
184{
185 return -ENOSYS;
186}
187
188#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..6be2f26728b8
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,137 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/preempt.h>
5#include <litmus/trace.h>
6
7/* The rescheduling state of each processor.
8 */
9DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
10
11void sched_state_will_schedule(struct task_struct* tsk)
12{
13 /* Litmus hack: we only care about processor-local invocations of
14 * set_tsk_need_resched(). We can't reliably set the flag remotely
15 * since it might race with other updates to the scheduling state. We
16 * can't rely on the runqueue lock protecting updates to the sched
17 * state since processors do not acquire the runqueue locks for all
18 * updates to the sched state (to avoid acquiring two runqueue locks at
19 * the same time). Further, if tsk is residing on a remote processor,
20 * then that processor doesn't actually know yet that it is going to
21 * reschedule; it still must receive an IPI (unless a local invocation
22 * races).
23 */
24 if (likely(task_cpu(tsk) == smp_processor_id())) {
25 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
26 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
27 set_sched_state(PICKED_WRONG_TASK);
28 else
29 set_sched_state(WILL_SCHEDULE);
30 } else
31 /* Litmus tasks should never be subject to a remote
32 * set_tsk_need_resched(). */
33 BUG_ON(is_realtime(tsk));
34#ifdef CONFIG_PREEMPT_STATE_TRACE
35 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
36 __builtin_return_address(0));
37#endif
38}
39
40/* Called by the IPI handler after another CPU called smp_send_resched(). */
41void sched_state_ipi(void)
42{
43 /* If the IPI was slow, we might be in any state right now. The IPI is
44 * only meaningful if we are in SHOULD_SCHEDULE. */
45 if (is_in_sched_state(SHOULD_SCHEDULE)) {
46 /* Cause scheduler to be invoked.
47 * This will cause a transition to WILL_SCHEDULE. */
48 set_tsk_need_resched(current);
49 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
50 current->comm, current->pid);
51 TS_SEND_RESCHED_END;
52 } else {
53 /* ignore */
54 TRACE_STATE("ignoring IPI in state %x (%s)\n",
55 get_sched_state(),
56 sched_state_name(get_sched_state()));
57 }
58}
59
60/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
61 * hold the lock that is used to serialize scheduling decisions. */
62void litmus_reschedule(int cpu)
63{
64 int picked_transition_ok = 0;
65 int scheduled_transition_ok = 0;
66
67 /* The (remote) CPU could be in any state. */
68
69 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
70 * is not aware of the need to reschedule at this point. */
71
72 /* is a context switch in progress? */
73 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
74 picked_transition_ok = sched_state_transition_on(
75 cpu, TASK_PICKED, PICKED_WRONG_TASK);
76
77 if (!picked_transition_ok &&
78 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
79 /* We either raced with the end of the context switch, or the
80 * CPU was in TASK_SCHEDULED anyway. */
81 scheduled_transition_ok = sched_state_transition_on(
82 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
83 }
84
85 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
86 * scheduler to be invoked. */
87 if (scheduled_transition_ok) {
88 if (smp_processor_id() == cpu)
89 set_tsk_need_resched(current);
90 else {
91 TS_SEND_RESCHED_START(cpu);
92 smp_send_reschedule(cpu);
93 }
94 }
95
96 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
97 __FUNCTION__,
98 picked_transition_ok,
99 scheduled_transition_ok);
100}
101
102void litmus_reschedule_local(void)
103{
104 if (is_in_sched_state(TASK_PICKED))
105 set_sched_state(PICKED_WRONG_TASK);
106 else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
107 set_sched_state(WILL_SCHEDULE);
108 set_tsk_need_resched(current);
109 }
110}
111
112#ifdef CONFIG_DEBUG_KERNEL
113
114void sched_state_plugin_check(void)
115{
116 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
117 TRACE("!!!! plugin did not call sched_state_task_picked()!"
118 "Calling sched_state_task_picked() is mandatory---fix this.\n");
119 set_sched_state(TASK_PICKED);
120 }
121}
122
123#define NAME_CHECK(x) case x: return #x
124const char* sched_state_name(int s)
125{
126 switch (s) {
127 NAME_CHECK(TASK_SCHEDULED);
128 NAME_CHECK(SHOULD_SCHEDULE);
129 NAME_CHECK(WILL_SCHEDULE);
130 NAME_CHECK(TASK_PICKED);
131 NAME_CHECK(PICKED_WRONG_TASK);
132 default:
133 return "UNKNOWN";
134 };
135}
136
137#endif
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..bfadf1284291
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,348 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17
18#include <litmus/rt_domain.h>
19
20#include <litmus/trace.h>
21
22#include <litmus/bheap.h>
23
24/* Uncomment when debugging timer races... */
25#if 0
26#define VTRACE_TASK TRACE_TASK
27#define VTRACE TRACE
28#else
29#define VTRACE_TASK(t, fmt, args...) /* shut up */
30#define VTRACE(fmt, args...) /* be quiet already */
31#endif
32
33static int dummy_resched(rt_domain_t *rt)
34{
35 return 0;
36}
37
38static int dummy_order(struct bheap_node* a, struct bheap_node* b)
39{
40 return 0;
41}
42
43/* default implementation: use default lock */
44static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
45{
46 merge_ready(rt, tasks);
47}
48
49static unsigned int time2slot(lt_t time)
50{
51 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
52}
53
54static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
55{
56 unsigned long flags;
57 struct release_heap* rh;
58 rh = container_of(timer, struct release_heap, timer);
59
60 TS_RELEASE_LATENCY(rh->release_time);
61
62 VTRACE("on_release_timer(0x%p) starts.\n", timer);
63
64 TS_RELEASE_START;
65
66
67 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
68 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
69 /* remove from release queue */
70 list_del(&rh->list);
71 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
72 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
73
74 /* call release callback */
75 rh->dom->release_jobs(rh->dom, &rh->heap);
76 /* WARNING: rh can be referenced from other CPUs from now on. */
77
78 TS_RELEASE_END;
79
80 VTRACE("on_release_timer(0x%p) ends.\n", timer);
81
82 return HRTIMER_NORESTART;
83}
84
85/* allocated in litmus.c */
86struct kmem_cache * release_heap_cache;
87
88struct release_heap* release_heap_alloc(int gfp_flags)
89{
90 struct release_heap* rh;
91 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
92 if (rh) {
93 /* initialize timer */
94 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
95 rh->timer.function = on_release_timer;
96 }
97 return rh;
98}
99
100void release_heap_free(struct release_heap* rh)
101{
102 /* make sure timer is no longer in use */
103 hrtimer_cancel(&rh->timer);
104 kmem_cache_free(release_heap_cache, rh);
105}
106
107/* Caller must hold release lock.
108 * Will return heap for given time. If no such heap exists prior to
109 * the invocation it will be created.
110 */
111static struct release_heap* get_release_heap(rt_domain_t *rt,
112 struct task_struct* t,
113 int use_task_heap)
114{
115 struct list_head* pos;
116 struct release_heap* heap = NULL;
117 struct release_heap* rh;
118 lt_t release_time = get_release(t);
119 unsigned int slot = time2slot(release_time);
120
121 /* initialize pos for the case that the list is empty */
122 pos = rt->release_queue.slot[slot].next;
123 list_for_each(pos, &rt->release_queue.slot[slot]) {
124 rh = list_entry(pos, struct release_heap, list);
125 if (release_time == rh->release_time) {
126 /* perfect match -- this happens on hyperperiod
127 * boundaries
128 */
129 heap = rh;
130 break;
131 } else if (lt_before(release_time, rh->release_time)) {
132 /* we need to insert a new node since rh is
133 * already in the future
134 */
135 break;
136 }
137 }
138 if (!heap && use_task_heap) {
139 /* use pre-allocated release heap */
140 rh = tsk_rt(t)->rel_heap;
141
142 rh->dom = rt;
143 rh->release_time = release_time;
144
145 /* add to release queue */
146 list_add(&rh->list, pos->prev);
147 heap = rh;
148 }
149 return heap;
150}
151
152static void reinit_release_heap(struct task_struct* t)
153{
154 struct release_heap* rh;
155
156 /* use pre-allocated release heap */
157 rh = tsk_rt(t)->rel_heap;
158
159 /* Make sure it is safe to use. The timer callback could still
160 * be executing on another CPU; hrtimer_cancel() will wait
161 * until the timer callback has completed. However, under no
162 * circumstances should the timer be active (= yet to be
163 * triggered).
164 *
165 * WARNING: If the CPU still holds the release_lock at this point,
166 * deadlock may occur!
167 */
168 BUG_ON(hrtimer_cancel(&rh->timer));
169
170 /* initialize */
171 bheap_init(&rh->heap);
172#ifdef CONFIG_RELEASE_MASTER
173 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
174#endif
175}
176/* arm_release_timer() - start local release timer or trigger
177 * remote timer (pull timer)
178 *
179 * Called by add_release() with:
180 * - tobe_lock taken
181 * - IRQ disabled
182 */
183#ifdef CONFIG_RELEASE_MASTER
184#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
185static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
186#else
187static void arm_release_timer(rt_domain_t *_rt)
188#endif
189{
190 rt_domain_t *rt = _rt;
191 struct list_head list;
192 struct list_head *pos, *safe;
193 struct task_struct* t;
194 struct release_heap* rh;
195
196 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
197 list_replace_init(&rt->tobe_released, &list);
198
199 list_for_each_safe(pos, safe, &list) {
200 /* pick task of work list */
201 t = list_entry(pos, struct task_struct, rt_param.list);
202 sched_trace_task_release(t);
203 list_del(pos);
204
205 /* put into release heap while holding release_lock */
206 raw_spin_lock(&rt->release_lock);
207 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
208
209 rh = get_release_heap(rt, t, 0);
210 if (!rh) {
211 /* need to use our own, but drop lock first */
212 raw_spin_unlock(&rt->release_lock);
213 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
214 &rt->release_lock);
215
216 reinit_release_heap(t);
217 VTRACE_TASK(t, "release_heap ready\n");
218
219 raw_spin_lock(&rt->release_lock);
220 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
221 &rt->release_lock);
222
223 rh = get_release_heap(rt, t, 1);
224 }
225 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
226 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
227
228 raw_spin_unlock(&rt->release_lock);
229 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
230
231 /* To avoid arming the timer multiple times, we only let the
232 * owner do the arming (which is the "first" task to reference
233 * this release_heap anyway).
234 */
235 if (rh == tsk_rt(t)->rel_heap) {
236 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
237 /* we cannot arm the timer using hrtimer_start()
238 * as it may deadlock on rq->lock
239 *
240 * PINNED mode is ok on both local and remote CPU
241 */
242#ifdef CONFIG_RELEASE_MASTER
243 if (rt->release_master == NO_CPU &&
244 target_cpu == NO_CPU)
245#endif
246 __hrtimer_start_range_ns(&rh->timer,
247 ns_to_ktime(rh->release_time),
248 0, HRTIMER_MODE_ABS_PINNED, 0);
249#ifdef CONFIG_RELEASE_MASTER
250 else
251 hrtimer_start_on(
252 /* target_cpu overrides release master */
253 (target_cpu != NO_CPU ?
254 target_cpu : rt->release_master),
255 &rh->info, &rh->timer,
256 ns_to_ktime(rh->release_time),
257 HRTIMER_MODE_ABS_PINNED);
258#endif
259 } else
260 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
261 }
262}
263
264void rt_domain_init(rt_domain_t *rt,
265 bheap_prio_t order,
266 check_resched_needed_t check,
267 release_jobs_t release
268 )
269{
270 int i;
271
272 BUG_ON(!rt);
273 if (!check)
274 check = dummy_resched;
275 if (!release)
276 release = default_release_jobs;
277 if (!order)
278 order = dummy_order;
279
280#ifdef CONFIG_RELEASE_MASTER
281 rt->release_master = NO_CPU;
282#endif
283
284 bheap_init(&rt->ready_queue);
285 INIT_LIST_HEAD(&rt->tobe_released);
286 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
287 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
288
289 raw_spin_lock_init(&rt->ready_lock);
290 raw_spin_lock_init(&rt->release_lock);
291 raw_spin_lock_init(&rt->tobe_lock);
292
293 rt->check_resched = check;
294 rt->release_jobs = release;
295 rt->order = order;
296}
297
298/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
299 * @new: the newly released task
300 */
301void __add_ready(rt_domain_t* rt, struct task_struct *new)
302{
303 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
304 "to ready queue at %llu\n",
305 new->comm, new->pid,
306 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
307 get_release(new), litmus_clock());
308
309 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
310
311 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
312 rt->check_resched(rt);
313}
314
315/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
316 * @tasks - the newly released tasks
317 */
318void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
319{
320 bheap_union(rt->order, &rt->ready_queue, tasks);
321 rt->check_resched(rt);
322}
323
324
325#ifdef CONFIG_RELEASE_MASTER
326void __add_release_on(rt_domain_t* rt, struct task_struct *task,
327 int target_cpu)
328{
329 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
330 get_release(task), target_cpu);
331 list_add(&tsk_rt(task)->list, &rt->tobe_released);
332 task->rt_param.domain = rt;
333
334 arm_release_timer_on(rt, target_cpu);
335}
336#endif
337
338/* add_release - add a real-time task to the rt release queue.
339 * @task: the sleeping task
340 */
341void __add_release(rt_domain_t* rt, struct task_struct *task)
342{
343 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
344 list_add(&tsk_rt(task)->list, &rt->tobe_released);
345 task->rt_param.domain = rt;
346
347 arm_release_timer(rt);
348}
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..c4747e0ef2ab
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,224 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10#include <linux/seq_file.h>
11
12#include <litmus/litmus.h>
13#include <litmus/sched_plugin.h>
14#include <litmus/preempt.h>
15#include <litmus/jobs.h>
16
17/*
18 * Generic function to trigger preemption on either local or remote cpu
19 * from scheduler plugins. The key feature is that this function is
20 * non-preemptive section aware and does not invoke the scheduler / send
21 * IPIs if the to-be-preempted task is actually non-preemptive.
22 */
23void preempt_if_preemptable(struct task_struct* t, int cpu)
24{
25 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
26 * on_cpu is currently scheduling background work.
27 */
28
29 int reschedule = 0;
30
31 if (!t)
32 /* move non-real-time task out of the way */
33 reschedule = 1;
34 else {
35 if (smp_processor_id() == cpu) {
36 /* local CPU case */
37 /* check if we need to poke userspace */
38 if (is_user_np(t))
39 /* Yes, poke it. This doesn't have to be atomic since
40 * the task is definitely not executing. */
41 request_exit_np(t);
42 else if (!is_kernel_np(t))
43 /* only if we are allowed to preempt the
44 * currently-executing task */
45 reschedule = 1;
46 } else {
47 /* Remote CPU case. Only notify if it's not a kernel
48 * NP section and if we didn't set the userspace
49 * flag. */
50 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
51 }
52 }
53 if (likely(reschedule))
54 litmus_reschedule(cpu);
55}
56
57
58/*************************************************************
59 * Dummy plugin functions *
60 *************************************************************/
61
62static void litmus_dummy_finish_switch(struct task_struct * prev)
63{
64}
65
66static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
67{
68 sched_state_task_picked();
69 return NULL;
70}
71
72static void litmus_dummy_tick(struct task_struct* tsk)
73{
74}
75
76static long litmus_dummy_admit_task(struct task_struct* tsk)
77{
78 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
79 tsk->comm, tsk->pid);
80 return -EINVAL;
81}
82
83static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
84{
85}
86
87static void litmus_dummy_task_wake_up(struct task_struct *task)
88{
89}
90
91static void litmus_dummy_task_block(struct task_struct *task)
92{
93}
94
95static void litmus_dummy_task_exit(struct task_struct *task)
96{
97}
98
99static long litmus_dummy_complete_job(void)
100{
101 return -ENOSYS;
102}
103
104static long litmus_dummy_activate_plugin(void)
105{
106 return 0;
107}
108
109static long litmus_dummy_deactivate_plugin(void)
110{
111 return 0;
112}
113
114#ifdef CONFIG_LITMUS_LOCKING
115
116static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
117 void* __user config)
118{
119 return -ENXIO;
120}
121
122#endif
123
124
125/* The default scheduler plugin. It doesn't do anything and lets Linux do its
126 * job.
127 */
128struct sched_plugin linux_sched_plugin = {
129 .plugin_name = "Linux",
130 .tick = litmus_dummy_tick,
131 .task_new = litmus_dummy_task_new,
132 .task_exit = litmus_dummy_task_exit,
133 .task_wake_up = litmus_dummy_task_wake_up,
134 .task_block = litmus_dummy_task_block,
135 .complete_job = litmus_dummy_complete_job,
136 .schedule = litmus_dummy_schedule,
137 .finish_switch = litmus_dummy_finish_switch,
138 .activate_plugin = litmus_dummy_activate_plugin,
139 .deactivate_plugin = litmus_dummy_deactivate_plugin,
140#ifdef CONFIG_LITMUS_LOCKING
141 .allocate_lock = litmus_dummy_allocate_lock,
142#endif
143 .admit_task = litmus_dummy_admit_task
144};
145
146/*
147 * The reference to current plugin that is used to schedule tasks within
148 * the system. It stores references to actual function implementations
149 * Should be initialized by calling "init_***_plugin()"
150 */
151struct sched_plugin *litmus = &linux_sched_plugin;
152
153/* the list of registered scheduling plugins */
154static LIST_HEAD(sched_plugins);
155static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
156
157#define CHECK(func) {\
158 if (!plugin->func) \
159 plugin->func = litmus_dummy_ ## func;}
160
161/* FIXME: get reference to module */
162int register_sched_plugin(struct sched_plugin* plugin)
163{
164 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
165 plugin->plugin_name);
166
167 /* make sure we don't trip over null pointers later */
168 CHECK(finish_switch);
169 CHECK(schedule);
170 CHECK(tick);
171 CHECK(task_wake_up);
172 CHECK(task_exit);
173 CHECK(task_block);
174 CHECK(task_new);
175 CHECK(complete_job);
176 CHECK(activate_plugin);
177 CHECK(deactivate_plugin);
178#ifdef CONFIG_LITMUS_LOCKING
179 CHECK(allocate_lock);
180#endif
181 CHECK(admit_task);
182
183 if (!plugin->release_at)
184 plugin->release_at = release_at;
185
186 raw_spin_lock(&sched_plugins_lock);
187 list_add(&plugin->list, &sched_plugins);
188 raw_spin_unlock(&sched_plugins_lock);
189
190 return 0;
191}
192
193
194/* FIXME: reference counting, etc. */
195struct sched_plugin* find_sched_plugin(const char* name)
196{
197 struct list_head *pos;
198 struct sched_plugin *plugin;
199
200 raw_spin_lock(&sched_plugins_lock);
201 list_for_each(pos, &sched_plugins) {
202 plugin = list_entry(pos, struct sched_plugin, list);
203 if (!strcmp(plugin->plugin_name, name))
204 goto out_unlock;
205 }
206 plugin = NULL;
207
208out_unlock:
209 raw_spin_unlock(&sched_plugins_lock);
210 return plugin;
211}
212
213void print_sched_plugins(struct seq_file *m)
214{
215 struct list_head *pos;
216 struct sched_plugin *plugin;
217
218 raw_spin_lock(&sched_plugins_lock);
219 list_for_each(pos, &sched_plugins) {
220 plugin = list_entry(pos, struct sched_plugin, list);
221 seq_printf(m, "%s\n", plugin->plugin_name);
222 }
223 raw_spin_unlock(&sched_plugins_lock);
224}
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..c88dbf2f580f
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,305 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/wait.h>
8
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/fdso.h>
12#include <litmus/trace.h>
13
14
15#ifdef CONFIG_LITMUS_LOCKING
16
17#include <litmus/srp.h>
18
19srp_prioritization_t get_srp_prio;
20
21struct srp {
22 struct list_head ceiling;
23 wait_queue_head_t ceiling_blocked;
24};
25#define system_ceiling(srp) list2prio(srp->ceiling.next)
26#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
27
28#define UNDEF_SEM -2
29
30atomic_t srp_objects_in_use = ATOMIC_INIT(0);
31
32DEFINE_PER_CPU(struct srp, srp);
33
34/* Initialize SRP semaphores at boot time. */
35static int __init srp_init(void)
36{
37 int i;
38
39 printk("Initializing SRP per-CPU ceilings...");
40 for (i = 0; i < NR_CPUS; i++) {
41 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
42 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
43 }
44 printk(" done!\n");
45
46 return 0;
47}
48module_init(srp_init);
49
50/* SRP task priority comparison function. Smaller numeric values have higher
51 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
52 */
53static int srp_higher_prio(struct srp_priority* first,
54 struct srp_priority* second)
55{
56 if (!first->priority)
57 return 0;
58 else
59 return !second->priority ||
60 first->priority < second->priority || (
61 first->priority == second->priority &&
62 first->pid < second->pid);
63}
64
65
66static int srp_exceeds_ceiling(struct task_struct* first,
67 struct srp* srp)
68{
69 struct srp_priority prio;
70
71 if (list_empty(&srp->ceiling))
72 return 1;
73 else {
74 prio.pid = first->pid;
75 prio.priority = get_srp_prio(first);
76 return srp_higher_prio(&prio, system_ceiling(srp)) ||
77 ceiling2sem(system_ceiling(srp))->owner == first;
78 }
79}
80
81static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
82{
83 struct list_head *pos;
84 if (in_list(&prio->list)) {
85 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
86 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
87 return;
88 }
89 list_for_each(pos, &srp->ceiling)
90 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
91 __list_add(&prio->list, pos->prev, pos);
92 return;
93 }
94
95 list_add_tail(&prio->list, &srp->ceiling);
96}
97
98
99static int lock_srp_semaphore(struct litmus_lock* l)
100{
101 struct task_struct* t = current;
102 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
103
104 if (!is_realtime(t))
105 return -EPERM;
106
107 /* prevent acquisition of local locks in global critical sections */
108 if (tsk_rt(t)->num_locks_held)
109 return -EBUSY;
110
111 preempt_disable();
112
113 /* Update ceiling. */
114 srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
115
116 /* SRP invariant: all resources available */
117 BUG_ON(sem->owner != NULL);
118
119 sem->owner = t;
120 TRACE_CUR("acquired srp 0x%p\n", sem);
121
122 tsk_rt(t)->num_local_locks_held++;
123
124 preempt_enable();
125
126 return 0;
127}
128
129static int unlock_srp_semaphore(struct litmus_lock* l)
130{
131 struct task_struct* t = current;
132 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
133 int err = 0;
134
135 preempt_disable();
136
137 if (sem->owner != t) {
138 err = -EINVAL;
139 } else {
140 /* Determine new system priority ceiling for this CPU. */
141 BUG_ON(!in_list(&sem->ceiling.list));
142
143 list_del(&sem->ceiling.list);
144 sem->owner = NULL;
145
146 /* Wake tasks on this CPU, if they exceed current ceiling. */
147 TRACE_CUR("released srp 0x%p\n", sem);
148 wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
149
150 tsk_rt(t)->num_local_locks_held--;
151 }
152
153 preempt_enable();
154 return err;
155}
156
157static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
158{
159 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
160 int err = 0;
161 struct task_struct* t = current;
162 struct srp_priority t_prio;
163
164 if (!is_realtime(t))
165 return -EPERM;
166
167 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
168
169 preempt_disable();
170
171 if (sem->owner != NULL)
172 err = -EBUSY;
173
174 if (err == 0) {
175 if (sem->cpu == UNDEF_SEM)
176 sem->cpu = get_partition(t);
177 else if (sem->cpu != get_partition(t))
178 err = -EPERM;
179 }
180
181 if (err == 0) {
182 t_prio.priority = get_srp_prio(t);
183 t_prio.pid = t->pid;
184 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
185 sem->ceiling.priority = t_prio.priority;
186 sem->ceiling.pid = t_prio.pid;
187 }
188 }
189
190 preempt_enable();
191
192 return err;
193}
194
195static int close_srp_semaphore(struct litmus_lock* l)
196{
197 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
198 int err = 0;
199
200 preempt_disable();
201
202 if (sem->owner == current)
203 unlock_srp_semaphore(l);
204
205 preempt_enable();
206
207 return err;
208}
209
210static void deallocate_srp_semaphore(struct litmus_lock* l)
211{
212 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
213 atomic_dec(&srp_objects_in_use);
214 kfree(sem);
215}
216
217static struct litmus_lock_ops srp_lock_ops = {
218 .open = open_srp_semaphore,
219 .close = close_srp_semaphore,
220 .lock = lock_srp_semaphore,
221 .unlock = unlock_srp_semaphore,
222 .deallocate = deallocate_srp_semaphore,
223};
224
225struct srp_semaphore* allocate_srp_semaphore(void)
226{
227 struct srp_semaphore* sem;
228
229 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
230 if (!sem)
231 return NULL;
232
233 INIT_LIST_HEAD(&sem->ceiling.list);
234 sem->ceiling.priority = 0;
235 sem->cpu = UNDEF_SEM;
236 sem->owner = NULL;
237
238 sem->litmus_lock.ops = &srp_lock_ops;
239
240 atomic_inc(&srp_objects_in_use);
241 return sem;
242}
243
244static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
245 void *key)
246{
247 int cpu = smp_processor_id();
248 struct task_struct *tsk = wait->private;
249 if (cpu != get_partition(tsk))
250 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
251 get_partition(tsk));
252 else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
253 return default_wake_function(wait, mode, sync, key);
254 return 0;
255}
256
257static void do_ceiling_block(struct task_struct *tsk)
258{
259 wait_queue_t wait = {
260 .private = tsk,
261 .func = srp_wake_up,
262 .task_list = {NULL, NULL}
263 };
264
265 tsk->state = TASK_UNINTERRUPTIBLE;
266 add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
267 tsk->rt_param.srp_non_recurse = 1;
268 preempt_enable_no_resched();
269 schedule();
270 preempt_disable();
271 tsk->rt_param.srp_non_recurse = 0;
272 remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
273}
274
275/* Wait for current task priority to exceed system-wide priority ceiling.
276 * FIXME: the hotpath should be inline.
277 */
278void srp_ceiling_block(void)
279{
280 struct task_struct *tsk = current;
281
282 /* Only applies to real-time tasks, but optimize for RT tasks. */
283 if (unlikely(!is_realtime(tsk)))
284 return;
285
286 /* Avoid recursive ceiling blocking. */
287 if (unlikely(tsk->rt_param.srp_non_recurse))
288 return;
289
290 /* Bail out early if there aren't any SRP resources around. */
291 if (likely(!atomic_read(&srp_objects_in_use)))
292 return;
293
294 preempt_disable();
295 if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
296 TRACE_CUR("is priority ceiling blocked.\n");
297 while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
298 do_ceiling_block(tsk);
299 TRACE_CUR("finally exceeds system ceiling.\n");
300 } else
301 TRACE_CUR("is not priority ceiling blocked\n");
302 preempt_enable();
303}
304
305#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..61a95463e4d2
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,166 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18
19struct ts_release_wait {
20 struct list_head list;
21 struct completion completion;
22 lt_t ts_release_time;
23};
24
25#define DECLARE_TS_RELEASE_WAIT(symb) \
26 struct ts_release_wait symb = \
27 { \
28 LIST_HEAD_INIT(symb.list), \
29 COMPLETION_INITIALIZER_ONSTACK(symb.completion), \
30 0 \
31 }
32
33static LIST_HEAD(task_release_list);
34static DEFINE_MUTEX(task_release_lock);
35
36static long do_wait_for_ts_release(void)
37{
38 DECLARE_TS_RELEASE_WAIT(wait);
39
40 long ret = -ERESTARTSYS;
41
42 if (mutex_lock_interruptible(&task_release_lock))
43 goto out;
44
45 list_add(&wait.list, &task_release_list);
46
47 mutex_unlock(&task_release_lock);
48
49 /* We are enqueued, now we wait for someone to wake us up. */
50 ret = wait_for_completion_interruptible(&wait.completion);
51
52 if (!ret) {
53 /* Setting this flag before releasing ensures that this CPU
54 * will be the next CPU to requeue the task on a ready or
55 * release queue. Cleared by prepare_for_next_period()
56 */
57 tsk_rt(current)->dont_requeue = 1;
58
59 /* Completion succeeded, setup release time. complete_job()
60 * will indirectly cause the period to be added to the next
61 * release time, so subtract it here. */
62 litmus->release_at(current, wait.ts_release_time
63 + current->rt_param.task_params.phase
64 - current->rt_param.task_params.period);
65
66 /* Advance to next job --- when complete_job() returns, the
67 * first job has been released. Since we patched up the release
68 * time, this occurs when all tasks synchronously release their
69 * first job.*/
70 ret = complete_job();
71 } else {
72 /* We were interrupted, must cleanup list. */
73 mutex_lock(&task_release_lock);
74 if (!wait.completion.done)
75 list_del(&wait.list);
76 mutex_unlock(&task_release_lock);
77 }
78
79out:
80 return ret;
81}
82
83int count_tasks_waiting_for_release(void)
84{
85 int task_count = 0;
86 struct list_head *pos;
87
88 mutex_lock(&task_release_lock);
89
90 list_for_each(pos, &task_release_list) {
91 task_count++;
92 }
93
94 mutex_unlock(&task_release_lock);
95
96
97 return task_count;
98}
99
100static long do_release_ts(lt_t start)
101{
102 long task_count = 0;
103
104 struct list_head *pos, *safe;
105 struct ts_release_wait *wait;
106
107 if (mutex_lock_interruptible(&task_release_lock)) {
108 task_count = -ERESTARTSYS;
109 goto out;
110 }
111
112 TRACE("<<<<<< synchronous task system release >>>>>>\n");
113 sched_trace_sys_release(&start);
114
115 task_count = 0;
116 list_for_each_safe(pos, safe, &task_release_list) {
117 wait = (struct ts_release_wait*)
118 list_entry(pos, struct ts_release_wait, list);
119
120 task_count++;
121 wait->ts_release_time = start;
122 complete(&wait->completion);
123 }
124
125 /* clear stale list */
126 INIT_LIST_HEAD(&task_release_list);
127
128 mutex_unlock(&task_release_lock);
129
130out:
131 return task_count;
132}
133
134
135asmlinkage long sys_wait_for_ts_release(void)
136{
137 long ret = -EPERM;
138 struct task_struct *t = current;
139
140 if (is_realtime(t))
141 ret = do_wait_for_ts_release();
142
143 return ret;
144}
145
146#define ONE_MS 1000000
147
148asmlinkage long sys_release_ts(lt_t __user *__delay)
149{
150 long ret;
151 lt_t delay;
152 lt_t start_time;
153
154 /* FIXME: check capabilities... */
155
156 ret = copy_from_user(&delay, __delay, sizeof(delay));
157 if (ret == 0) {
158 /* round up to next larger integral millisecond */
159 start_time = litmus_clock();
160 do_div(start_time, ONE_MS);
161 start_time *= ONE_MS;
162 ret = do_release_ts(start_time + delay);
163 }
164
165 return ret;
166}