aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2008-02-13 14:13:15 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2008-02-13 14:13:15 -0500
commit8ce9b0cb97d9266b3b64b2b57835e17f6e03f585 (patch)
treea6ef1acaf9c9dc116ccc9f24f5233fa7d25cd426
parent49914084e797530d9baaf51df9eda77babc98fa8 (diff)
LITMUS 2008: Initial Port
This introduces the core changes ported from LITMUS 2007. The kernel seems to work under QEMU, but many bugs probably remain.
-rw-r--r--Makefile2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/syscall_table_32.S15
-rw-r--r--fs/exec.c3
-rw-r--r--fs/inode.c2
-rw-r--r--include/asm-x86/unistd_32.h16
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/uaccess.h16
-rw-r--r--include/litmus/edf_common.h35
-rw-r--r--include/litmus/fdso.h69
-rw-r--r--include/litmus/feather_buffer.h108
-rw-r--r--include/litmus/feather_trace.h93
-rw-r--r--include/litmus/litmus.h192
-rw-r--r--include/litmus/rt_domain.h94
-rw-r--r--include/litmus/rt_param.h135
-rw-r--r--include/litmus/sched_plugin.h118
-rw-r--r--include/litmus/sched_trace.h31
-rw-r--r--include/litmus/trace.h74
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/sched.c34
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--litmus/Kconfig24
-rw-r--r--litmus/Makefile8
-rw-r--r--litmus/edf_common.c132
-rw-r--r--litmus/fdso.c279
-rw-r--r--litmus/ft_event.c104
-rw-r--r--litmus/litmus.c799
-rw-r--r--litmus/litmus_sem.c566
-rw-r--r--litmus/rt_domain.c130
-rw-r--r--litmus/sched_gsn_edf.c719
-rw-r--r--litmus/sched_litmus.c149
-rw-r--r--litmus/sched_plugin.c174
-rw-r--r--litmus/sched_psn_edf.c440
-rw-r--r--litmus/sched_trace.c541
-rw-r--r--litmus/trace.c303
38 files changed, 5434 insertions, 7 deletions
diff --git a/Makefile b/Makefile
index 189d8ef416..d9e4495038 100644
--- a/Makefile
+++ b/Makefile
@@ -597,7 +597,7 @@ export mod_strip_cmd
597 597
598 598
599ifeq ($(KBUILD_EXTMOD),) 599ifeq ($(KBUILD_EXTMOD),)
600core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ 600core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
601 601
602vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ 602vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
603 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ 603 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 80b7ba4056..f99330fed0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1620,3 +1620,5 @@ source "security/Kconfig"
1620source "crypto/Kconfig" 1620source "crypto/Kconfig"
1621 1621
1622source "lib/Kconfig" 1622source "lib/Kconfig"
1623
1624source "litmus/Kconfig"
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 8344c70adf..9c9ffbe8b6 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -324,3 +324,18 @@ ENTRY(sys_call_table)
324 .long sys_timerfd 324 .long sys_timerfd
325 .long sys_eventfd 325 .long sys_eventfd
326 .long sys_fallocate 326 .long sys_fallocate
327 /* LITMUS */
328 .long sys_set_rt_task_param /* 325 */
329 .long sys_get_rt_task_param
330 .long sys_complete_job
331 .long sys_register_np_flag
332 .long sys_exit_np
333 .long sys_od_open /* 330 */
334 .long sys_od_close
335 .long sys_pi_down
336 .long sys_pi_up
337 .long sys_srp_down
338 .long sys_srp_up /* 335 */
339 .long sys_reg_task_srp_sem
340 .long sys_query_job_no
341 .long sys_wait_for_job_release /* 338 */
diff --git a/fs/exec.c b/fs/exec.c
index 282240afe9..6f47786702 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,8 @@
56#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
57#include <asm/tlb.h> 57#include <asm/tlb.h>
58 58
59#include <litmus/litmus.h>
60
59#ifdef CONFIG_KMOD 61#ifdef CONFIG_KMOD
60#include <linux/kmod.h> 62#include <linux/kmod.h>
61#endif 63#endif
@@ -1309,6 +1311,7 @@ int do_execve(char * filename,
1309 goto out_kfree; 1311 goto out_kfree;
1310 1312
1311 sched_exec(); 1313 sched_exec();
1314 litmus_exec();
1312 1315
1313 bprm->file = file; 1316 bprm->file = file;
1314 bprm->filename = filename; 1317 bprm->filename = filename;
diff --git a/fs/inode.c b/fs/inode.c
index ed35383d0b..ef71ea06c6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -220,6 +220,8 @@ void inode_init_once(struct inode *inode)
220 INIT_LIST_HEAD(&inode->inotify_watches); 220 INIT_LIST_HEAD(&inode->inotify_watches);
221 mutex_init(&inode->inotify_mutex); 221 mutex_init(&inode->inotify_mutex);
222#endif 222#endif
223 INIT_LIST_HEAD(&inode->i_obj_list);
224 mutex_init(&inode->i_obj_mutex);
223} 225}
224 226
225EXPORT_SYMBOL(inode_init_once); 227EXPORT_SYMBOL(inode_init_once);
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 9b15545eb9..063c5856f2 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -330,10 +330,24 @@
330#define __NR_timerfd 322 330#define __NR_timerfd 322
331#define __NR_eventfd 323 331#define __NR_eventfd 323
332#define __NR_fallocate 324 332#define __NR_fallocate 324
333#define __NR_set_rt_task_param 325
334#define __NR_get_rt_task_param 326
335#define __NR_sleep_next_period 327
336#define __NR_register_np_flag 328
337#define __NR_exit_np 329
338#define __NR_od_open 330
339#define __NR_od_close 331
340#define __NR_pi_down 332
341#define __NR_pi_up 333
342#define __NR_srp_down 334
343#define __NR_srp_up 335
344#define __NR_reg_task_srp_sem 336
345#define __NR_query_job_no 337
346#define __NR_wait_for_job_release 338
333 347
334#ifdef __KERNEL__ 348#ifdef __KERNEL__
335 349
336#define NR_syscalls 325 350#define NR_syscalls 339
337 351
338#define __ARCH_WANT_IPC_PARSE_VERSION 352#define __ARCH_WANT_IPC_PARSE_VERSION
339#define __ARCH_WANT_OLD_READDIR 353#define __ARCH_WANT_OLD_READDIR
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b3ec4a496d..22f856c14e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -588,6 +588,8 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
588#define i_size_ordered_init(inode) do { } while (0) 588#define i_size_ordered_init(inode) do { } while (0)
589#endif 589#endif
590 590
591struct inode_obj_id_table;
592
591struct inode { 593struct inode {
592 struct hlist_node i_hash; 594 struct hlist_node i_hash;
593 struct list_head i_list; 595 struct list_head i_list;
@@ -653,6 +655,9 @@ struct inode {
653 void *i_security; 655 void *i_security;
654#endif 656#endif
655 void *i_private; /* fs or device private pointer */ 657 void *i_private; /* fs or device private pointer */
658
659 struct list_head i_obj_list;
660 struct mutex i_obj_mutex;
656}; 661};
657 662
658/* 663/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cc14656f86..9541cc8fe8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -37,6 +37,7 @@
37#define SCHED_BATCH 3 37#define SCHED_BATCH 3
38/* SCHED_ISO: reserved but not implemented yet */ 38/* SCHED_ISO: reserved but not implemented yet */
39#define SCHED_IDLE 5 39#define SCHED_IDLE 5
40#define SCHED_LITMUS 6
40 41
41#ifdef __KERNEL__ 42#ifdef __KERNEL__
42 43
@@ -91,6 +92,8 @@ struct sched_param {
91 92
92#include <asm/processor.h> 93#include <asm/processor.h>
93 94
95#include <litmus/rt_param.h>
96
94struct exec_domain; 97struct exec_domain;
95struct futex_pi_state; 98struct futex_pi_state;
96struct bio; 99struct bio;
@@ -914,6 +917,8 @@ struct sched_entity {
914#endif 917#endif
915}; 918};
916 919
920struct od_table_entry;
921
917struct task_struct { 922struct task_struct {
918 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 923 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
919 void *stack; 924 void *stack;
@@ -1178,6 +1183,17 @@ struct task_struct {
1178 int make_it_fail; 1183 int make_it_fail;
1179#endif 1184#endif
1180 struct prop_local_single dirties; 1185 struct prop_local_single dirties;
1186
1187 /* litmus parameters and state */
1188 struct rt_param rt_param;
1189
1190 /* allow scheduler plugins to queue in release lists, etc.
1191 * Cleanup: Move this into the rt_param struct.
1192 */
1193 struct list_head rt_list;
1194
1195 /* references to PI semaphores, etc. */
1196 struct od_table_entry* od_table;
1181}; 1197};
1182 1198
1183/* 1199/*
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 975c963e57..6ae0ff9494 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -84,4 +84,20 @@ static inline unsigned long __copy_from_user_nocache(void *to,
84 ret; \ 84 ret; \
85 }) 85 })
86 86
87/* This is a naive attempt at a write version of the above native Linux macro.
88 */
89#define poke_kernel_address(val, addr) \
90 ({ \
91 long ret; \
92 mm_segment_t old_fs = get_fs(); \
93 \
94 set_fs(KERNEL_DS); \
95 pagefault_disable(); \
96 ret = __put_user(val, (__force typeof(val) __user *)(addr)); \
97 pagefault_enable(); \
98 set_fs(old_fs); \
99 ret; \
100 })
101
102
87#endif /* __LINUX_UACCESS_H__ */ 103#endif /* __LINUX_UACCESS_H__ */
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 0000000000..f3c930b137
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,35 @@
1/* EDF common data structures and utility functions shared by all EDF
2 * based scheduler plugins
3 */
4
5/* CLEANUP: Add comments and make it less messy.
6 *
7 */
8
9#ifndef __UNC_EDF_COMMON_H__
10#define __UNC_EDF_COMMON_H__
11
12#include <litmus/rt_domain.h>
13
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched);
16
17int edf_higher_prio(struct task_struct* first,
18 struct task_struct* second);
19
20int edf_ready_order(struct list_head* a, struct list_head* b);
21
22void edf_release_at(struct task_struct *t, lt_t start);
23
24int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
25long edf_complete_job(void);
26
27void edf_prepare_for_next_period(struct task_struct *t);
28
29#define job_completed(t) (!is_be(t) && \
30 (t)->rt_param.times.exec_time == (t)->rt_param.basic_params.exec_cost)
31
32int edf_set_hp_task(struct pi_semaphore *sem);
33int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu);
34
35#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 0000000000..5a783555e7
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,69 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13
14#define MAX_OBJECT_DESCRIPTORS 32
15
16typedef enum {
17 MIN_OBJ_TYPE = 0,
18
19 PI_SEM = 0,
20 SRP_SEM = 1,
21
22 MAX_OBJ_TYPE = 1
23} obj_type_t;
24
25struct inode_obj_id {
26 struct list_head list;
27 atomic_t count;
28 struct inode* inode;
29
30 obj_type_t type;
31 void* obj;
32 unsigned int id;
33};
34
35
36struct od_table_entry {
37 unsigned int used;
38
39 struct inode_obj_id* obj;
40 void* extra;
41};
42
43struct fdso_ops {
44 void* (*create) (void);
45 void (*destroy)(void*);
46 int (*open) (struct od_table_entry*, void* __user);
47 int (*close) (struct od_table_entry*);
48};
49
50/* translate a userspace supplied od into the raw table entry
51 * returns NULL if od is invalid
52 */
53struct od_table_entry* __od_lookup(int od);
54
55/* translate a userspace supplied od into the associated object
56 * returns NULL if od is invalid
57 */
58static inline void* od_lookup(int od, obj_type_t type)
59{
60 struct od_table_entry* e = __od_lookup(od);
61 return e && e->obj->type == type ? e->obj->obj : NULL;
62}
63
64#define lookup_pi_sem(od) ((struct pi_semaphore*) od_lookup(od, PI_SEM))
65#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
66#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
67
68
69#endif
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
new file mode 100644
index 0000000000..c788227905
--- /dev/null
+++ b/include/litmus/feather_buffer.h
@@ -0,0 +1,108 @@
1#ifndef _FEATHER_BUFFER_H_
2#define _FEATHER_BUFFER_H_
3
4/* requires UINT_MAX and memcpy */
5
6static inline int fetch_and_inc(int *val)
7{
8 int ret = 1;
9 __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
10 return ret;
11}
12
13static inline int fetch_and_dec(int *val)
14{
15 int ret = -1;
16 __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
17 return ret;
18}
19
20#define SLOT_FREE 0
21#define SLOT_BUSY 1
22#define SLOT_READY 2
23
24struct ft_buffer {
25 unsigned int slot_count;
26 unsigned int slot_size;
27
28 int free_count;
29 unsigned int write_idx;
30 unsigned int read_idx;
31
32 char* slots;
33 void* buffer_mem;
34 unsigned int failed_writes;
35};
36
37static inline int init_ft_buffer(struct ft_buffer* buf,
38 unsigned int slot_count,
39 unsigned int slot_size,
40 char* slots,
41 void* buffer_mem)
42{
43 int i = 0;
44 if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
45 /* The slot count must divide UNIT_MAX + 1 so that when it
46 * wraps around the index correctly points to 0.
47 */
48 return 0;
49 } else {
50 buf->slot_count = slot_count;
51 buf->slot_size = slot_size;
52 buf->slots = slots;
53 buf->buffer_mem = buffer_mem;
54 buf->free_count = slot_count;
55 buf->write_idx = 0;
56 buf->read_idx = 0;
57 buf->failed_writes = 0;
58 for (i = 0; i < slot_count; i++)
59 buf->slots[i] = SLOT_FREE;
60 return 1;
61 }
62}
63
64static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
65{
66 int free = fetch_and_dec(&buf->free_count);
67 unsigned int idx;
68 if (free <= 0) {
69 fetch_and_inc(&buf->free_count);
70 *ptr = 0;
71 fetch_and_inc(&buf->failed_writes);
72 return 0;
73 } else {
74 idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
75 buf->slots[idx] = SLOT_BUSY;
76 *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
77 return 1;
78 }
79}
80
81static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
82{
83 unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
84 buf->slots[idx] = SLOT_READY;
85}
86
87
88/* exclusive reader access is assumed */
89static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
90{
91 unsigned int idx;
92 if (buf->free_count == buf->slot_count)
93 /* nothing available */
94 return 0;
95 idx = buf->read_idx % buf->slot_count;
96 if (buf->slots[idx] == SLOT_READY) {
97 memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
98 buf->slot_size);
99 buf->slots[idx] = SLOT_FREE;
100 buf->read_idx++;
101 fetch_and_inc(&buf->free_count);
102 return 1;
103 } else
104 return 0;
105}
106
107
108#endif
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
new file mode 100644
index 0000000000..5c37ea71ea
--- /dev/null
+++ b/include/litmus/feather_trace.h
@@ -0,0 +1,93 @@
1#ifndef _FEATHER_TRACE_H_
2#define _FEATHER_TRACE_H_
3
4#define feather_callback __attribute__((regparm(0)))
5
6/* make the compiler reload any register that is not saved in
7 * a cdecl function call
8 */
9#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
10
11#define ft_event(id, callback) \
12 __asm__ __volatile__( \
13 "1: jmp 2f \n\t" \
14 " call " #callback " \n\t" \
15 ".section __event_table, \"aw\" \n\t" \
16 ".long " #id ", 0, 1b, 2f \n\t" \
17 ".previous \n\t" \
18 "2: \n\t" \
19 : : : CLOBBER_LIST)
20
21#define ft_event0(id, callback) \
22 __asm__ __volatile__( \
23 "1: jmp 2f \n\t" \
24 " subl $4, %%esp \n\t" \
25 " movl $" #id ", (%%esp) \n\t" \
26 " call " #callback " \n\t" \
27 " addl $4, %%esp \n\t" \
28 ".section __event_table, \"aw\" \n\t" \
29 ".long " #id ", 0, 1b, 2f \n\t" \
30 ".previous \n\t" \
31 "2: \n\t" \
32 : : : CLOBBER_LIST)
33
34#define ft_event1(id, callback, param) \
35 __asm__ __volatile__( \
36 "1: jmp 2f \n\t" \
37 " subl $8, %%esp \n\t" \
38 " movl %0, 4(%%esp) \n\t" \
39 " movl $" #id ", (%%esp) \n\t" \
40 " call " #callback " \n\t" \
41 " addl $8, %%esp \n\t" \
42 ".section __event_table, \"aw\" \n\t" \
43 ".long " #id ", 0, 1b, 2f \n\t" \
44 ".previous \n\t" \
45 "2: \n\t" \
46 : : "r" (param) : CLOBBER_LIST)
47
48#define ft_event2(id, callback, param, param2) \
49 __asm__ __volatile__( \
50 "1: jmp 2f \n\t" \
51 " subl $12, %%esp \n\t" \
52 " movl %1, 8(%%esp) \n\t" \
53 " movl %0, 4(%%esp) \n\t" \
54 " movl $" #id ", (%%esp) \n\t" \
55 " call " #callback " \n\t" \
56 " addl $12, %%esp \n\t" \
57 ".section __event_table, \"aw\" \n\t" \
58 ".long " #id ", 0, 1b, 2f \n\t" \
59 ".previous \n\t" \
60 "2: \n\t" \
61 : : "r" (param), "r" (param2) : CLOBBER_LIST)
62
63
64#define ft_event3(id, callback, p, p2, p3) \
65 __asm__ __volatile__( \
66 "1: jmp 2f \n\t" \
67 " subl $16, %%esp \n\t" \
68 " movl %1, 12(%%esp) \n\t" \
69 " movl %1, 8(%%esp) \n\t" \
70 " movl %0, 4(%%esp) \n\t" \
71 " movl $" #id ", (%%esp) \n\t" \
72 " call " #callback " \n\t" \
73 " addl $16, %%esp \n\t" \
74 ".section __event_table, \"aw\" \n\t" \
75 ".long " #id ", 0, 1b, 2f \n\t" \
76 ".previous \n\t" \
77 "2: \n\t" \
78 : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
79
80
81static inline unsigned long long ft_read_tsc(void)
82{
83 unsigned long long ret;
84 __asm__ __volatile__("rdtsc" : "=A" (ret));
85 return ret;
86}
87
88int ft_enable_event(unsigned long id);
89int ft_disable_event(unsigned long id);
90int ft_is_event_enabled(unsigned long id);
91int ft_disable_all_events(void);
92
93#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
new file mode 100644
index 0000000000..6e99e651d7
--- /dev/null
+++ b/include/litmus/litmus.h
@@ -0,0 +1,192 @@
1/*
2 * Constant definitions related to
3 * scheduling policy.
4 */
5
6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_
8
9#include <linux/jiffies.h>
10#include <litmus/sched_trace.h>
11
12typedef enum {
13 SCHED_LINUX = 0,
14 SCHED_GSN_EDF = 10,
15 SCHED_PSN_EDF = 11,
16 /* Add your scheduling policy here */
17
18 SCHED_DEFAULT = 0,
19 SCHED_INVALID = -1,
20} spolicy;
21
22
23typedef enum {
24 LITMUS_RESERVED_RANGE = 1024,
25
26} sched_setup_cmd_t;
27
28/* per-task modes */
29enum rt_task_mode_t {
30 BACKGROUND_TASK = 0,
31 LITMUS_RT_TASK = 1
32};
33
34/* Plugin boot options, for convenience */
35#define PLUGIN_LINUX "linux"
36#define PLUGIN_GSN_EDF "gsn_edf"
37#define PLUGIN_PSN_EDF "psn_edf"
38
39extern spolicy sched_policy;
40
41/* RT mode start time */
42extern volatile unsigned long rt_start_time;
43
44#define TRACE(fmt, args...) \
45 sched_trace_log_message("%d: " fmt, raw_smp_processor_id(), ## args)
46
47#define TRACE_TASK(t, fmt, args...) \
48 TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
49
50#define TRACE_CUR(fmt, args...) \
51 TRACE_TASK(current, fmt, ## args)
52
53#define TRACE_BUG_ON(cond) \
54 do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
55 "called from %p current=%s/%d state=%d " \
56 "flags=%x partition=%d cpu=%d rtflags=%d"\
57 " job=%u knp=%d timeslice=%u\n", \
58 #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
59 current->pid, current->state, current->flags, \
60 get_partition(current), smp_processor_id(), get_rt_flags(current), \
61 current->rt_param.job_params.job_no, current->rt_param.kernel_np, \
62 current->time_slice\
63 ); } while(0);
64
65
66/* in_list - is a given list_head queued on some list?
67 */
68static inline int in_list(struct list_head* list)
69{
70 return !( /* case 1: deleted */
71 (list->next == LIST_POISON1 &&
72 list->prev == LIST_POISON2)
73 ||
74 /* case 2: initialized */
75 (list->next == list &&
76 list->prev == list)
77 );
78}
79
80typedef int (*list_cmp_t)(struct list_head*, struct list_head*);
81
82static inline unsigned int list_insert(struct list_head* new,
83 struct list_head* head,
84 list_cmp_t order_before)
85{
86 struct list_head *pos;
87 unsigned int passed = 0;
88
89 BUG_ON(!new);
90
91 /* find a spot where the new entry is less than the next */
92 list_for_each(pos, head) {
93 if (unlikely(order_before(new, pos))) {
94 /* pos is not less than new, thus insert here */
95 __list_add(new, pos->prev, pos);
96 goto out;
97 }
98 passed++;
99 }
100 /* if we get to this point either the list is empty or every entry
101 * queued element is less than new.
102 * Let's add new to the end. */
103 list_add_tail(new, head);
104 out:
105 return passed;
106}
107
108void list_qsort(struct list_head* list, list_cmp_t less_than);
109
110
111#define RT_PREEMPTIVE 0x2050 /* = NP */
112#define RT_NON_PREEMPTIVE 0x4e50 /* = P */
113#define RT_EXIT_NP_REQUESTED 0x5251 /* = RQ */
114
115/* returns 1 if task t has registered np flag and set it to RT_NON_PREEMPTIVE
116 */
117int is_np(struct task_struct *t);
118
119/* request that the task should call sys_exit_np()
120 */
121void request_exit_np(struct task_struct *t);
122
123/* kill naughty tasks
124 */
125void scheduler_signal(struct task_struct *t, unsigned int signal);
126void send_scheduler_signals(void);
127void np_mem_kill(struct task_struct *t);
128
129void litmus_fork(struct task_struct *tsk);
130void litmus_exec(void);
131/* clean up real-time state of a task */
132void exit_litmus(struct task_struct *dead_tsk);
133
134long litmus_admit_task(struct task_struct *tsk);
135void litmus_exit_task(struct task_struct *tsk);
136
137#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
138#define rt_transition_pending(t) \
139 ((t)->rt_param.transition_pending)
140
141/* Realtime utility macros */
142#define get_rt_flags(t) ((t)->rt_param.flags)
143#define set_rt_flags(t,f) (t)->rt_param.flags=(f)
144#define get_exec_cost(t) ((t)->rt_param.task_params.exec_cost)
145#define get_exec_time(t) ((t)->rt_param.job_params.exec_time)
146#define get_rt_period(t) ((t)->rt_param.task_params.period)
147#define get_partition(t) (t)->rt_param.task_params.cpu
148#define get_deadline(t) ((t)->rt_param.job_params.deadline)
149#define get_class(t) ((t)->rt_param.task_params.cls)
150
151inline static int budget_exhausted(struct task_struct* t)
152{
153 return get_exec_time(t) >= get_exec_cost(t);
154}
155
156#define is_subject_to_srp(t) ((t)->rt_param.subject_to_srp)
157#define is_hrt(t) \
158 ((t)->rt_param.task_params.class == RT_CLASS_HARD)
159#define is_srt(t) \
160 ((t)->rt_param.task_params.class == RT_CLASS_SOFT)
161#define is_be(t) \
162 ((t)->rt_param.task_params.class == RT_CLASS_BEST_EFFORT)
163
164#define get_release(t) ((t)->rt_param.job_params.release)
165
166/* Honor the flag in the preempt_count variable that is set
167 * when scheduling is in progress.
168 */
169#define is_running(t) \
170 ((t)->state == TASK_RUNNING || \
171 task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
172
173#define is_blocked(t) \
174 (!is_running(t))
175#define is_released(t, now) \
176 (lt_before_eq(get_release(t), now))
177#define is_tardy(t, now) \
178 (lt_before_eq((t)->rt_param.job_params.deadline, now))
179
180/* real-time comparison macros */
181#define earlier_deadline(a, b) (lt_before(\
182 (a)->rt_param.job_params.deadline,\
183 (b)->rt_param.job_params.deadline))
184#define earlier_release(a, b) (lt_before(\
185 (a)->rt_param.job_params.release,\
186 (b)->rt_param.job_params.release))
187
188#define make_np(t) do {t->rt_param.kernel_np++;} while(0);
189#define take_np(t) do {t->rt_param.kernel_np--;} while(0);
190
191
192#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 0000000000..79b6034f22
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,94 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8struct _rt_domain;
9
10typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
11typedef void (*release_at_t)(struct task_struct *t, lt_t start);
12
13typedef struct _rt_domain {
14 /* runnable rt tasks are in here */
15 rwlock_t ready_lock;
16 struct list_head ready_queue;
17
18 /* real-time tasks waiting for release are in here */
19 spinlock_t release_lock;
20 struct list_head release_queue;
21
22 /* how do we check if we need to kick another CPU? */
23 check_resched_needed_t check_resched;
24
25 /* how are tasks ordered in the ready queue? */
26 list_cmp_t order;
27} rt_domain_t;
28
29#define next_ready(rt) \
30 (list_entry((rt)->ready_queue.next, struct task_struct, rt_list))
31
32#define ready_jobs_pending(rt) \
33 (!list_empty(&(rt)->ready_queue))
34
35void rt_domain_init(rt_domain_t *rt, check_resched_needed_t f,
36 list_cmp_t order);
37
38void __add_ready(rt_domain_t* rt, struct task_struct *new);
39void __add_release(rt_domain_t* rt, struct task_struct *task);
40
41struct task_struct* __take_ready(rt_domain_t* rt);
42struct task_struct* __peek_ready(rt_domain_t* rt);
43
44void try_release_pending(rt_domain_t* rt);
45void __release_pending(rt_domain_t* rt);
46
47static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
48{
49 unsigned long flags;
50 /* first we need the write lock for rt_ready_queue */
51 write_lock_irqsave(&rt->ready_lock, flags);
52 __add_ready(rt, new);
53 write_unlock_irqrestore(&rt->ready_lock, flags);
54}
55
56static inline struct task_struct* take_ready(rt_domain_t* rt)
57{
58 unsigned long flags;
59 struct task_struct* ret;
60 /* first we need the write lock for rt_ready_queue */
61 write_lock_irqsave(&rt->ready_lock, flags);
62 ret = __take_ready(rt);
63 write_unlock_irqrestore(&rt->ready_lock, flags);
64 return ret;
65}
66
67
68static inline void add_release(rt_domain_t* rt, struct task_struct *task)
69{
70 unsigned long flags;
71 /* first we need the write lock for rt_ready_queue */
72 spin_lock_irqsave(&rt->release_lock, flags);
73 __add_release(rt, task);
74 spin_unlock_irqrestore(&rt->release_lock, flags);
75}
76
77static inline int __jobs_pending(rt_domain_t* rt)
78{
79 return !list_empty(&rt->ready_queue);
80}
81
82static inline int jobs_pending(rt_domain_t* rt)
83{
84 unsigned long flags;
85 int ret;
86 /* first we need the write lock for rt_ready_queue */
87 read_lock_irqsave(&rt->ready_lock, flags);
88 ret = __jobs_pending(rt);
89 read_unlock_irqrestore(&rt->ready_lock, flags);
90 return ret;
91}
92
93
94#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
new file mode 100644
index 0000000000..9fb5b19b78
--- /dev/null
+++ b/include/litmus/rt_param.h
@@ -0,0 +1,135 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_RT_PARAM_H_
6#define _LINUX_RT_PARAM_H_
7
8/* Litmus time type. */
9typedef unsigned long long lt_t;
10
11static inline int lt_after(lt_t a, lt_t b)
12{
13 return ((long long) b) - ((long long) a) < 0;
14}
15#define lt_before(a, b) lt_after(b, a)
16
17static inline int lt_after_eq(lt_t a, lt_t b)
18{
19 return ((long long) a) - ((long long) b) >= 0;
20}
21#define lt_before_eq(a, b) lt_after_eq(b, a)
22
23/* different types of clients */
24typedef enum {
25 RT_CLASS_HARD,
26 RT_CLASS_SOFT,
27 RT_CLASS_BEST_EFFORT
28} task_class_t;
29
30struct rt_task {
31 lt_t exec_cost;
32 lt_t period;
33 unsigned int cpu;
34 task_class_t cls;
35};
36
37/* don't export internal data structures to user space (liblitmus) */
38#ifdef __KERNEL__
39
40struct rt_job {
41 /* Time instant the the job was or will be released. */
42 lt_t release;
43 /* What is the current deadline? */
44 lt_t deadline;
45 /* How much service has this job received so far?
46 */
47 lt_t exec_time;
48
49 /* Which job is this. This is used to let user space
50 * specify which job to wait for, which is important if jobs
51 * overrun. If we just call sys_sleep_next_period() then we
52 * will unintentionally miss jobs after an overrun.
53 *
54 * Increase this sequence number when a job is released.
55 */
56 unsigned int job_no;
57
58 /* when did this job start executing? */
59 lt_t exec_start;
60};
61
62
63/* RT task parameters for scheduling extensions
64 * These parameters are inherited during clone and therefore must
65 * be explicitly set up before the task set is launched.
66 */
67struct rt_param {
68 /* is the task sleeping? */
69 unsigned int flags:8;
70
71 /* Did this task register any SRP controlled resource accesses?
72 * This, of course, should only ever be true under partitioning.
73 * However, this limitation is not currently enforced.
74 */
75 unsigned int subject_to_srp:1;
76
77 /* user controlled parameters */
78 struct rt_task task_params;
79
80 /* timing parameters */
81 struct rt_job job_params;
82
83 /* task representing the current "inherited" task
84 * priority, assigned by inherit_priority and
85 * return priority in the scheduler plugins.
86 * could point to self if PI does not result in
87 * an increased task priority.
88 */
89 struct task_struct* inh_task;
90
91 /* Don't just dereference this pointer in kernel space!
92 * It might very well point to junk or nothing at all.
93 * NULL indicates that the task has not requested any non-preemptable
94 * section support.
95 * Not inherited upon fork.
96 */
97 short* np_flag;
98
99 /* For the FMLP under PSN-EDF, it is required to make the task
100 * non-preemptive from kernel space. In order not to interfere with
101 * user space, this counter indicates the kernel space np setting.
102 * kernel_np > 0 => task is non-preemptive
103 */
104 unsigned int kernel_np;
105
106 /* This field can be used by plugins to store where the task
107 * is currently scheduled. It is the responsibility of the
108 * plugin to avoid race conditions.
109 *
110 * Used by GSN-EDF.
111 */
112 volatile int scheduled_on;
113
114 /* This field can be used by plugins to store where the task
115 * is currently linked. It is the responsibility of the plugin
116 * to avoid race conditions.
117 *
118 * Used by GSN-EDF.
119 */
120 volatile int linked_on;
121
122 /* Fields saved before BE->RT transition.
123 */
124 int old_policy;
125 int old_prio;
126};
127
128/* Possible RT flags */
129#define RT_F_RUNNING 0x00000000
130#define RT_F_SLEEP 0x00000001
131#define RT_F_EXIT_SEM 0x00000008
132
133#endif
134
135#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 0000000000..421c54f517
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,118 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10/* struct for semaphore with priority inheritance */
11struct pi_semaphore {
12 atomic_t count;
13 int sleepers;
14 wait_queue_head_t wait;
15 union {
16 /* highest-prio holder/waiter */
17 struct task_struct *task;
18 struct task_struct* cpu_task[NR_CPUS];
19 } hp;
20 /* current lock holder */
21 struct task_struct *holder;
22};
23
24
25/********************* scheduler invocation ******************/
26
27/* Plugin-specific realtime tick handler */
28typedef void (*scheduler_tick_t) (struct task_struct *cur);
29/* Novell make sched decision function */
30typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
31/* Clean up after the task switch has occured.
32 * This function is called after every (even non-rt) task switch.
33 */
34typedef void (*finish_switch_t)(struct task_struct *prev);
35
36
37/********************* task state changes ********************/
38
39/* Called to setup a new real-time task.
40 * Release the first job, enqueue, etc.
41 * Task may already be running.
42 */
43typedef void (*task_new_t) (struct task_struct *task,
44 int on_rq,
45 int running);
46
47/* Called to re-introduce a task after blocking.
48 * Can potentially be called multiple times.
49 */
50typedef void (*task_wake_up_t) (struct task_struct *task);
51/* called to notify the plugin of a blocking real-time task
52 * it will only be called for real-time tasks and before schedule is called */
53typedef void (*task_block_t) (struct task_struct *task);
54/* Called when a real-time task exits or changes to a different scheduling
55 * class.
56 * Free any allocated resources
57 */
58typedef void (*task_exit_t) (struct task_struct *);
59
60/* Called when the new_owner is released from the wait queue
61 * it should now inherit the priority from sem, _before_ it gets readded
62 * to any queue
63 */
64typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
65 struct task_struct *new_owner);
66
67/* Called when the current task releases a semahpore where it might have
68 * inherited a piority from
69 */
70typedef long (*return_priority_t) (struct pi_semaphore *sem);
71
72/* Called when a task tries to acquire a semaphore and fails. Check if its
73 * priority is higher than that of the current holder.
74 */
75typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
76
77
78/********************* sys call backends ********************/
79/* This function causes the caller to sleep until the next release */
80typedef long (*complete_job_t) (void);
81
82typedef long (*admit_task_t)(struct task_struct* tsk);
83
84struct sched_plugin {
85 struct list_head list;
86 /* basic info */
87 char *plugin_name;
88
89 /* scheduler invocation */
90 scheduler_tick_t tick;
91 schedule_t schedule;
92 finish_switch_t finish_switch;
93
94 /* syscall backend */
95 complete_job_t complete_job;
96
97 /* task state changes */
98 admit_task_t admit_task;
99
100 task_new_t task_new;
101 task_wake_up_t task_wake_up;
102 task_block_t task_block;
103 task_exit_t task_exit;
104
105 /* priority inheritance */
106 inherit_priority_t inherit_priority;
107 return_priority_t return_priority;
108 pi_block_t pi_block;
109} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
110
111
112extern struct sched_plugin *litmus;
113
114int register_sched_plugin(struct sched_plugin* plugin);
115struct sched_plugin* find_sched_plugin(const char* name);
116int print_sched_plugins(char* buf, int max);
117
118#endif
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
new file mode 100644
index 0000000000..60dcbfb0ae
--- /dev/null
+++ b/include/litmus/sched_trace.h
@@ -0,0 +1,31 @@
1/* sched_trace.h -- record scheduler events to a byte stream for offline analysis.
2 */
3#ifndef _LINUX_SCHED_TRACE_H_
4#define _LINUX_SCHED_TRACE_H_
5
6#include <linux/sched.h>
7
8/* dummies, need to be re-implemented */
9
10/* used in sched.c */
11#define sched_trace_task_arrival(t)
12#define sched_trace_task_departure(t)
13#define sched_trace_task_preemption(t, by)
14#define sched_trace_task_scheduled(t)
15
16/* used in scheduler plugins */
17#define sched_trace_job_release(t)
18#define sched_trace_job_completion(t)
19
20
21#ifdef CONFIG_SCHED_DEBUG_TRACE
22void sched_trace_log_message(const char* fmt, ...);
23
24#else
25
26#define sched_trace_log_message(fmt, ...)
27
28#endif
29
30
31#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
new file mode 100644
index 0000000000..04510237ec
--- /dev/null
+++ b/include/litmus/trace.h
@@ -0,0 +1,74 @@
1
2#ifndef _SYS_TRACE_H_
3#define _SYS_TRACE_H_
4
5#include <litmus/feather_trace.h>
6#include <litmus/feather_buffer.h>
7
8
9/*********************** TIMESTAMPS ************************/
10
11struct timestamp {
12 unsigned long event;
13 unsigned long long timestamp;
14 unsigned int seq_no;
15 int cpu;
16};
17
18
19/* buffer holding time stamps - will be provided by driver */
20extern struct ft_buffer* trace_ts_buf;
21
22/* save_timestamp: stores current time as struct timestamp
23 * in trace_ts_buf
24 */
25asmlinkage void save_timestamp(unsigned long event);
26
27#define TIMESTAMP(id) ft_event0(id, save_timestamp)
28
29/* Convention for timestamps
30 * =========================
31 *
32 * In order to process the trace files with a common tool, we use the following
33 * convention to measure execution times: The end time id of a code segment is
34 * always the next number after the start time event id.
35 */
36
37#define TS_SCHED_START TIMESTAMP(100)
38#define TS_SCHED_END TIMESTAMP(101)
39#define TS_CXS_START TIMESTAMP(102)
40#define TS_CXS_END TIMESTAMP(103)
41
42#define TS_TICK_START TIMESTAMP(110)
43#define TS_TICK_END TIMESTAMP(111)
44
45#define TS_PLUGIN_SCHED_START TIMESTAMP(120)
46#define TS_PLUGIN_SCHED_END TIMESTAMP(121)
47
48#define TS_PLUGIN_TICK_START TIMESTAMP(130)
49#define TS_PLUGIN_TICK_END TIMESTAMP(131)
50
51#define TS_ENTER_NP_START TIMESTAMP(140)
52#define TS_ENTER_NP_END TIMESTAMP(141)
53
54#define TS_EXIT_NP_START TIMESTAMP(150)
55#define TS_EXIT_NP_END TIMESTAMP(151)
56
57#define TS_SRP_UP_START TIMESTAMP(160)
58#define TS_SRP_UP_END TIMESTAMP(161)
59#define TS_SRP_DOWN_START TIMESTAMP(162)
60#define TS_SRP_DOWN_END TIMESTAMP(163)
61
62#define TS_PI_UP_START TIMESTAMP(170)
63#define TS_PI_UP_END TIMESTAMP(171)
64#define TS_PI_DOWN_START TIMESTAMP(172)
65#define TS_PI_DOWN_END TIMESTAMP(173)
66
67#define TS_FIFO_UP_START TIMESTAMP(180)
68#define TS_FIFO_UP_END TIMESTAMP(181)
69#define TS_FIFO_DOWN_START TIMESTAMP(182)
70#define TS_FIFO_DOWN_END TIMESTAMP(183)
71
72
73
74#endif /* !_SYS_TRACE_H_ */
diff --git a/kernel/exit.c b/kernel/exit.c
index 549c0558ba..bc313b74a1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -52,6 +52,8 @@
52 52
53extern void sem_exit (void); 53extern void sem_exit (void);
54 54
55extern void exit_od_table(struct task_struct* t);
56
55static void exit_mm(struct task_struct * tsk); 57static void exit_mm(struct task_struct * tsk);
56 58
57static void __unhash_process(struct task_struct *p) 59static void __unhash_process(struct task_struct *p)
@@ -987,6 +989,8 @@ fastcall NORET_TYPE void do_exit(long code)
987 if (unlikely(tsk->audit_context)) 989 if (unlikely(tsk->audit_context))
988 audit_free(tsk); 990 audit_free(tsk);
989 991
992 exit_od_table(tsk);
993
990 tsk->exit_code = code; 994 tsk->exit_code = code;
991 taskstats_exit(tsk, group_dead); 995 taskstats_exit(tsk, group_dead);
992 996
diff --git a/kernel/fork.c b/kernel/fork.c
index 8dd8ff2810..9e42d3a207 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -59,6 +59,9 @@
59#include <asm/cacheflush.h> 59#include <asm/cacheflush.h>
60#include <asm/tlbflush.h> 60#include <asm/tlbflush.h>
61 61
62#include <litmus/litmus.h>
63#include <litmus/sched_plugin.h>
64
62/* 65/*
63 * Protected counters by write_lock_irq(&tasklist_lock) 66 * Protected counters by write_lock_irq(&tasklist_lock)
64 */ 67 */
@@ -121,6 +124,8 @@ void __put_task_struct(struct task_struct *tsk)
121 WARN_ON(atomic_read(&tsk->usage)); 124 WARN_ON(atomic_read(&tsk->usage));
122 WARN_ON(tsk == current); 125 WARN_ON(tsk == current);
123 126
127 exit_litmus(tsk);
128
124 security_task_free(tsk); 129 security_task_free(tsk);
125 free_uid(tsk->user); 130 free_uid(tsk->user);
126 put_group_info(tsk->group_info); 131 put_group_info(tsk->group_info);
diff --git a/kernel/sched.c b/kernel/sched.c
index e76b11ca6d..4890a12786 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -324,6 +324,8 @@ struct rq {
324 324
325 atomic_t nr_iowait; 325 atomic_t nr_iowait;
326 326
327 struct task_struct* litmus_next;
328
327#ifdef CONFIG_SMP 329#ifdef CONFIG_SMP
328 struct sched_domain *sd; 330 struct sched_domain *sd;
329 331
@@ -875,11 +877,12 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
875#include "sched_idletask.c" 877#include "sched_idletask.c"
876#include "sched_fair.c" 878#include "sched_fair.c"
877#include "sched_rt.c" 879#include "sched_rt.c"
880#include "../litmus/sched_litmus.c"
878#ifdef CONFIG_SCHED_DEBUG 881#ifdef CONFIG_SCHED_DEBUG
879# include "sched_debug.c" 882# include "sched_debug.c"
880#endif 883#endif
881 884
882#define sched_class_highest (&rt_sched_class) 885#define sched_class_highest (&litmus_sched_class)
883 886
884/* 887/*
885 * Update delta_exec, delta_fair fields for rq. 888 * Update delta_exec, delta_fair fields for rq.
@@ -1529,7 +1532,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1529 this_cpu = smp_processor_id(); 1532 this_cpu = smp_processor_id();
1530 1533
1531#ifdef CONFIG_SMP 1534#ifdef CONFIG_SMP
1532 if (unlikely(task_running(rq, p))) 1535 if (unlikely(task_running(rq, p) || is_realtime(p)))
1533 goto out_activate; 1536 goto out_activate;
1534 1537
1535 new_cpu = cpu; 1538 new_cpu = cpu;
@@ -1890,6 +1893,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1890 */ 1893 */
1891 prev_state = prev->state; 1894 prev_state = prev->state;
1892 finish_arch_switch(prev); 1895 finish_arch_switch(prev);
1896 litmus->finish_switch(prev);
1893 finish_lock_switch(rq, prev); 1897 finish_lock_switch(rq, prev);
1894 fire_sched_in_preempt_notifiers(current); 1898 fire_sched_in_preempt_notifiers(current);
1895 if (mm) 1899 if (mm)
@@ -3491,6 +3495,7 @@ void scheduler_tick(void)
3491 update_cpu_load(rq); 3495 update_cpu_load(rq);
3492 if (curr != rq->idle) /* FIXME: needed? */ 3496 if (curr != rq->idle) /* FIXME: needed? */
3493 curr->sched_class->task_tick(rq, curr); 3497 curr->sched_class->task_tick(rq, curr);
3498 litmus_tick(rq, curr);
3494 spin_unlock(&rq->lock); 3499 spin_unlock(&rq->lock);
3495 3500
3496#ifdef CONFIG_SMP 3501#ifdef CONFIG_SMP
@@ -3641,6 +3646,10 @@ need_resched_nonpreemptible:
3641 */ 3646 */
3642 local_irq_disable(); 3647 local_irq_disable();
3643 __update_rq_clock(rq); 3648 __update_rq_clock(rq);
3649 /* do litmus scheduling outside of rq lock, so that we
3650 * can do proper migrations for global schedulers
3651 */
3652 litmus_schedule(rq, prev);
3644 spin_lock(&rq->lock); 3653 spin_lock(&rq->lock);
3645 clear_tsk_need_resched(prev); 3654 clear_tsk_need_resched(prev);
3646 3655
@@ -4236,6 +4245,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4236 case SCHED_RR: 4245 case SCHED_RR:
4237 p->sched_class = &rt_sched_class; 4246 p->sched_class = &rt_sched_class;
4238 break; 4247 break;
4248 case SCHED_LITMUS:
4249 p->sched_class = &litmus_sched_class;
4250 break;
4239 } 4251 }
4240 4252
4241 p->rt_priority = prio; 4253 p->rt_priority = prio;
@@ -4268,7 +4280,7 @@ recheck:
4268 policy = oldpolicy = p->policy; 4280 policy = oldpolicy = p->policy;
4269 else if (policy != SCHED_FIFO && policy != SCHED_RR && 4281 else if (policy != SCHED_FIFO && policy != SCHED_RR &&
4270 policy != SCHED_NORMAL && policy != SCHED_BATCH && 4282 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
4271 policy != SCHED_IDLE) 4283 policy != SCHED_IDLE && policy != SCHED_LITMUS)
4272 return -EINVAL; 4284 return -EINVAL;
4273 /* 4285 /*
4274 * Valid priorities for SCHED_FIFO and SCHED_RR are 4286 * Valid priorities for SCHED_FIFO and SCHED_RR are
@@ -4282,6 +4294,9 @@ recheck:
4282 if (rt_policy(policy) != (param->sched_priority != 0)) 4294 if (rt_policy(policy) != (param->sched_priority != 0))
4283 return -EINVAL; 4295 return -EINVAL;
4284 4296
4297 if (policy == SCHED_LITMUS && policy == p->policy)
4298 return -EINVAL;
4299
4285 /* 4300 /*
4286 * Allow unprivileged RT tasks to decrease priority: 4301 * Allow unprivileged RT tasks to decrease priority:
4287 */ 4302 */
@@ -4316,6 +4331,12 @@ recheck:
4316 return -EPERM; 4331 return -EPERM;
4317 } 4332 }
4318 4333
4334 if (policy == SCHED_LITMUS) {
4335 retval = litmus_admit_task(p);
4336 if (retval)
4337 return retval;
4338 }
4339
4319 retval = security_task_setscheduler(p, policy, param); 4340 retval = security_task_setscheduler(p, policy, param);
4320 if (retval) 4341 if (retval)
4321 return retval; 4342 return retval;
@@ -4345,9 +4366,15 @@ recheck:
4345 p->sched_class->put_prev_task(rq, p); 4366 p->sched_class->put_prev_task(rq, p);
4346 } 4367 }
4347 4368
4369 if (p->policy == SCHED_LITMUS)
4370 litmus_exit_task(p);
4371
4348 oldprio = p->prio; 4372 oldprio = p->prio;
4349 __setscheduler(rq, p, policy, param->sched_priority); 4373 __setscheduler(rq, p, policy, param->sched_priority);
4350 4374
4375 if (policy == SCHED_LITMUS)
4376 litmus->task_new(p, on_rq, running);
4377
4351 if (on_rq) { 4378 if (on_rq) {
4352 if (running) 4379 if (running)
4353 p->sched_class->set_curr_task(rq); 4380 p->sched_class->set_curr_task(rq);
@@ -4364,6 +4391,7 @@ recheck:
4364 check_preempt_curr(rq, p); 4391 check_preempt_curr(rq, p);
4365 } 4392 }
4366 } 4393 }
4394
4367 __task_rq_unlock(rq); 4395 __task_rq_unlock(rq);
4368 spin_unlock_irqrestore(&p->pi_lock, flags); 4396 spin_unlock_irqrestore(&p->pi_lock, flags);
4369 4397
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index da7c061e72..de30496263 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -845,7 +845,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
845 struct sched_entity *se = &curr->se, *pse = &p->se; 845 struct sched_entity *se = &curr->se, *pse = &p->se;
846 unsigned long gran; 846 unsigned long gran;
847 847
848 if (unlikely(rt_prio(p->prio))) { 848 if (unlikely(rt_prio(p->prio) || p->policy == SCHED_LITMUS)) {
849 update_rq_clock(rq); 849 update_rq_clock(rq);
850 update_curr(cfs_rq); 850 update_curr(cfs_rq);
851 resched_task(curr); 851 resched_task(curr);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9ba3daa034..c7c938cee2 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -70,7 +70,7 @@ yield_task_rt(struct rq *rq)
70 */ 70 */
71static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) 71static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
72{ 72{
73 if (p->prio < rq->curr->prio) 73 if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS)
74 resched_task(rq->curr); 74 resched_task(rq->curr);
75} 75}
76 76
diff --git a/litmus/Kconfig b/litmus/Kconfig
new file mode 100644
index 0000000000..e6c5469d70
--- /dev/null
+++ b/litmus/Kconfig
@@ -0,0 +1,24 @@
1menu "LITMUS^RT"
2
3config SCHED_TASK_TRACE
4 bool "Trace real-time tasks"
5 default y
6 help
7 Include support for the sched_trace_XXX() tracing functions. This
8 allows the collection of real-time task events such as job
9 completions, job releases, early completions, etc. This results in a
10 small overhead in the scheduling code. Disable if the overhead is not
11 acceptable (e.g., benchmarking).
12
13config SCHED_DEBUG_TRACE
14 bool "TRACE() debugging"
15 default y
16 help
17 Include support for sched_trace_log_messageg(), which is used to
18 implement TRACE(). If disabled, no TRACE() messages will be included
19 in the kernel, and no overheads due to debugging statements will be
20 incurred by the scheduler. Disable if the overhead is not acceptable
21 (e.g. benchmarking).
22
23
24endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
new file mode 100644
index 0000000000..4ad854f117
--- /dev/null
+++ b/litmus/Makefile
@@ -0,0 +1,8 @@
1#
2# Makefile for LITMUS^RT
3#
4
5obj-y = sched_plugin.o litmus.o sched_trace.o \
6 edf_common.o \
7 sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \
8 trace.o ft_event.o rt_domain.o fdso.o
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 0000000000..3d9dca852d
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,132 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15
16#include <litmus/edf_common.h>
17
18/* edf_higher_prio - returns true if first has a higher EDF priority
19 * than second. Deadline ties are broken by PID.
20 *
21 * first first must not be NULL and a real-time task.
22 * second may be NULL or a non-rt task.
23 */
24int edf_higher_prio(struct task_struct* first,
25 struct task_struct* second)
26{
27 struct task_struct *first_task = first;
28 struct task_struct *second_task = second;
29
30 /* Check for inherited priorities. Change task
31 * used for comparison in such a case.
32 */
33 if (first && first->rt_param.inh_task)
34 first_task = first->rt_param.inh_task;
35 if (second && second->rt_param.inh_task)
36 second_task = second->rt_param.inh_task;
37
38 return
39 /* does the second task exist and is it a real-time task? If
40 * not, the first task (which is a RT task) has higher
41 * priority.
42 */
43 !second_task || !is_realtime(second_task) ||
44
45 /* is the deadline of the first task earlier?
46 * Then it has higher priority.
47 */
48 earlier_deadline(first_task, second_task) ||
49
50 /* Do we have a deadline tie?
51 * Then break by PID.
52 */
53 (get_deadline(first_task) == get_deadline(second_task) &&
54 (first_task->pid < second_task->pid ||
55
56 /* If the PIDs are the same then the task with the inherited
57 * priority wins.
58 */
59 (first_task->pid == second_task->pid &&
60 !second->rt_param.inh_task)));
61}
62
63int edf_ready_order(struct list_head* a, struct list_head* b)
64{
65 return edf_higher_prio(
66 list_entry(a, struct task_struct, rt_list),
67 list_entry(b, struct task_struct, rt_list));
68}
69
70void edf_release_at(struct task_struct *t, lt_t start)
71{
72 t->rt_param.job_params.deadline = start;
73 edf_prepare_for_next_period(t);
74 set_rt_flags(t, RT_F_RUNNING);
75}
76
77void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched)
78{
79 rt_domain_init(rt, resched, edf_ready_order);
80}
81
82void edf_prepare_for_next_period(struct task_struct *t)
83{
84 BUG_ON(!t);
85 /* prepare next release */
86 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
87 t->rt_param.job_params.deadline += get_rt_period(t);
88 t->rt_param.job_params.exec_time = 0;
89 /* update job sequence number */
90 t->rt_param.job_params.job_no++;
91
92 /* don't confuse Linux */
93 t->time_slice = 1;
94}
95
96/* need_to_preempt - check whether the task t needs to be preempted
97 * call only with irqs disabled and with ready_lock acquired
98 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
99 */
100int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
101{
102 /* we need the read lock for edf_ready_queue */
103 /* no need to preempt if there is nothing pending */
104 if (!ready_jobs_pending(rt))
105 return 0;
106 /* we need to reschedule if t doesn't exist */
107 if (!t)
108 return 1;
109
110 /* NOTE: We cannot check for non-preemptibility since we
111 * don't know what address space we're currently in.
112 */
113
114 /* make sure to get non-rt stuff out of the way */
115 return !is_realtime(t) || edf_higher_prio(next_ready(rt), t);
116}
117
118
119/*
120 * Deactivate current task until the beginning of the next period.
121 */
122long edf_complete_job(void)
123{
124 /* Mark that we do not excute anymore */
125 set_rt_flags(current, RT_F_SLEEP);
126 /* call schedule, this will return when a new job arrives
127 * it also takes care of preparing for the next release
128 */
129 schedule();
130 return 0;
131}
132
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 0000000000..ca9557d877
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,279 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops pi_sem_ops;
22extern struct fdso_ops srp_sem_ops;
23
24static const struct fdso_ops* fdso_ops[] = {
25 &pi_sem_ops,
26 &srp_sem_ops,
27};
28
29static void* fdso_create(obj_type_t type)
30{
31 return fdso_ops[type]->create();
32}
33
34static void fdso_destroy(obj_type_t type, void* obj)
35{
36 fdso_ops[type]->destroy(obj);
37}
38
39static int fdso_open(struct od_table_entry* entry, void* __user config)
40{
41 if (fdso_ops[entry->obj->type]->open)
42 return fdso_ops[entry->obj->type]->open(entry, config);
43 else
44 return 0;
45}
46
47static int fdso_close(struct od_table_entry* entry)
48{
49 if (fdso_ops[entry->obj->type]->close)
50 return fdso_ops[entry->obj->type]->close(entry);
51 else
52 return 0;
53}
54
55/* inode must be locked already */
56static struct inode_obj_id* alloc_inode_obj(struct inode* inode,
57 obj_type_t type,
58 unsigned int id)
59{
60 struct inode_obj_id* obj;
61 void* raw_obj;
62
63 raw_obj = fdso_create(type);
64 if (!raw_obj)
65 return NULL;
66
67 obj = kmalloc(sizeof(struct inode_obj_id), GFP_KERNEL);
68 if (!obj)
69 return NULL;
70 INIT_LIST_HEAD(&obj->list);
71 atomic_set(&obj->count, 1);
72 obj->type = type;
73 obj->id = id;
74 obj->obj = raw_obj;
75 obj->inode = inode;
76
77 list_add(&obj->list, &inode->i_obj_list);
78 atomic_inc(&inode->i_count);
79
80 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
81 return obj;
82}
83
84/* inode must be locked already */
85static struct inode_obj_id* get_inode_obj(struct inode* inode,
86 obj_type_t type,
87 unsigned int id)
88{
89 struct list_head* pos;
90 struct inode_obj_id* obj = NULL;
91
92 list_for_each(pos, &inode->i_obj_list) {
93 obj = list_entry(pos, struct inode_obj_id, list);
94 if (obj->id == id && obj->type == type) {
95 atomic_inc(&obj->count);
96 return obj;
97 }
98 }
99 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
100 return NULL;
101}
102
103
104static void put_inode_obj(struct inode_obj_id* obj)
105{
106 struct inode* inode;
107 int let_go = 0;
108
109 inode = obj->inode;
110 if (atomic_dec_and_test(&obj->count)) {
111
112 mutex_lock(&inode->i_obj_mutex);
113 /* no new references can be obtained */
114 if (!atomic_read(&obj->count)) {
115 list_del(&obj->list);
116 fdso_destroy(obj->type, obj->obj);
117 kfree(obj);
118 let_go = 1;
119 }
120 mutex_unlock(&inode->i_obj_mutex);
121 if (let_go)
122 iput(inode);
123 }
124}
125
126static struct od_table_entry* get_od_entry(struct task_struct* t)
127{
128 struct od_table_entry* table;
129 int i;
130
131
132 table = t->od_table;
133 if (!table) {
134 table = (struct od_table_entry*)
135 kzalloc(sizeof(struct od_table_entry) *
136 MAX_OBJECT_DESCRIPTORS, GFP_KERNEL);
137 t->od_table = table;
138 }
139
140 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
141 if (!table[i].used) {
142 table[i].used = 1;
143 return table + i;
144 }
145 return NULL;
146}
147
148static int put_od_entry(struct od_table_entry* od)
149{
150 put_inode_obj(od->obj);
151 od->used = 0;
152 return 0;
153}
154
155void exit_od_table(struct task_struct* t)
156{
157 int i;
158
159 if (t->od_table) {
160 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
161 if (t->od_table[i].used)
162 put_od_entry(t->od_table + i);
163 kfree(t->od_table);
164 t->od_table = NULL;
165 }
166}
167
168static int do_sys_od_open(struct file* file, obj_type_t type, int id,
169 void* __user config)
170{
171 int idx = 0, err;
172 struct inode* inode;
173 struct inode_obj_id* obj = NULL;
174 struct od_table_entry* entry;
175
176 inode = file->f_dentry->d_inode;
177
178 entry = get_od_entry(current);
179 if (!entry)
180 return -ENOMEM;
181
182 mutex_lock(&inode->i_obj_mutex);
183 obj = get_inode_obj(inode, type, id);
184 if (!obj)
185 obj = alloc_inode_obj(inode, type, id);
186 if (!obj) {
187 idx = -ENOMEM;
188 entry->used = 0;
189 } else {
190 entry->obj = obj;
191 entry->extra = NULL;
192 idx = entry - current->od_table;
193 }
194
195 mutex_unlock(&inode->i_obj_mutex);
196
197 err = fdso_open(entry, config);
198 if (err < 0) {
199 /* The class rejected the open call.
200 * We need to clean up and tell user space.
201 */
202 put_od_entry(entry);
203 idx = err;
204 }
205
206 return idx;
207}
208
209
210struct od_table_entry* __od_lookup(int od)
211{
212 struct task_struct *t = current;
213
214 if (!t->od_table)
215 return NULL;
216 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
217 return NULL;
218 if (!t->od_table[od].used)
219 return NULL;
220 return t->od_table + od;
221}
222
223
224asmlinkage int sys_od_open(int fd, int type, int obj_id, void* __user config)
225{
226 int ret = 0;
227 struct file* file;
228
229 /*
230 1) get file from fd, get inode from file
231 2) lock inode
232 3) try to lookup object
233 4) if not present create and enqueue object, inc inode refcnt
234 5) increment refcnt of object
235 6) alloc od_table_entry, setup ptrs
236 7) unlock inode
237 8) return offset in od_table as OD
238 */
239
240 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
241 ret = -EINVAL;
242 goto out;
243 }
244
245 file = fget(fd);
246 if (!file) {
247 ret = -EBADF;
248 goto out;
249 }
250
251 ret = do_sys_od_open(file, type, obj_id, config);
252
253 fput(file);
254
255out:
256 return ret;
257}
258
259
260asmlinkage int sys_od_close(int od)
261{
262 int ret = -EINVAL;
263 struct task_struct *t = current;
264
265 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
266 return ret;
267
268 if (!t->od_table || !t->od_table[od].used)
269 return ret;
270
271
272 /* give the class a chance to reject the close
273 */
274 ret = fdso_close(t->od_table + od);
275 if (ret == 0)
276 ret = put_od_entry(t->od_table + od);
277
278 return ret;
279}
diff --git a/litmus/ft_event.c b/litmus/ft_event.c
new file mode 100644
index 0000000000..b1d80c52d7
--- /dev/null
+++ b/litmus/ft_event.c
@@ -0,0 +1,104 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5/* the feather trace management functions assume
6 * exclusive access to the event table
7 */
8
9
10#define BYTE_JUMP 0xeb
11#define BYTE_JUMP_LEN 0x02
12
13/* for each event, there is an entry in the event table */
14struct trace_event {
15 long id;
16 long count;
17 long start_addr;
18 long end_addr;
19};
20
21extern struct trace_event __start___event_table[];
22extern struct trace_event __stop___event_table[];
23
24int ft_enable_event(unsigned long id)
25{
26 struct trace_event* te = __start___event_table;
27 int count = 0;
28 char* delta;
29 unsigned char* instr;
30
31 while (te < __stop___event_table) {
32 if (te->id == id && ++te->count == 1) {
33 instr = (unsigned char*) te->start_addr;
34 /* make sure we don't clobber something wrong */
35 if (*instr == BYTE_JUMP) {
36 delta = (((unsigned char*) te->start_addr) + 1);
37 *delta = 0;
38 }
39 }
40 if (te->id == id)
41 count++;
42 te++;
43 }
44 return count;
45}
46
47int ft_disable_event(unsigned long id)
48{
49 struct trace_event* te = __start___event_table;
50 int count = 0;
51 char* delta;
52 unsigned char* instr;
53
54 while (te < __stop___event_table) {
55 if (te->id == id && --te->count == 0) {
56 instr = (unsigned char*) te->start_addr;
57 if (*instr == BYTE_JUMP) {
58 delta = (((unsigned char*) te->start_addr) + 1);
59 *delta = te->end_addr - te->start_addr -
60 BYTE_JUMP_LEN;
61 }
62 }
63 if (te->id == id)
64 count++;
65 te++;
66 }
67 return count;
68}
69
70int ft_disable_all_events(void)
71{
72 struct trace_event* te = __start___event_table;
73 int count = 0;
74 char* delta;
75 unsigned char* instr;
76
77 while (te < __stop___event_table) {
78 if (te->count) {
79 instr = (unsigned char*) te->start_addr;
80 if (*instr == BYTE_JUMP) {
81 delta = (((unsigned char*) te->start_addr)
82 + 1);
83 *delta = te->end_addr - te->start_addr -
84 BYTE_JUMP_LEN;
85 te->count = 0;
86 count++;
87 }
88 }
89 te++;
90 }
91 return count;
92}
93
94int ft_is_event_enabled(unsigned long id)
95{
96 struct trace_event* te = __start___event_table;
97
98 while (te < __stop___event_table) {
99 if (te->id == id)
100 return te->count;
101 te++;
102 }
103 return 0;
104}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 0000000000..8ab96452e6
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,799 @@
1/* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization code,
2 * and the procfs interface..
3 */
4#include <asm/uaccess.h>
5#include <linux/uaccess.h>
6#include <linux/sysrq.h>
7
8#include <linux/module.h>
9#include <linux/proc_fs.h>
10
11
12#include <litmus/litmus.h>
13#include <linux/sched.h>
14#include <litmus/sched_plugin.h>
15
16#include <litmus/trace.h>
17
18/* Number of RT tasks that exist in the system */
19atomic_t rt_task_count = ATOMIC_INIT(0);
20static DEFINE_SPINLOCK(task_transition_lock);
21
22/* To send signals from the scheduler
23 * Must drop locks first.
24 */
25static LIST_HEAD(sched_sig_list);
26static DEFINE_SPINLOCK(sched_sig_list_lock);
27
28/*
29 * sys_set_task_rt_param
30 * @pid: Pid of the task which scheduling parameters must be changed
31 * @param: New real-time extension parameters such as the execution cost and
32 * period
33 * Syscall for manipulating with task rt extension params
34 * Returns EFAULT if param is NULL.
35 * ESRCH if pid is not corrsponding
36 * to a valid task.
37 * EINVAL if either period or execution cost is <=0
38 * EPERM if pid is a real-time task
39 * 0 if success
40 *
41 * Only non-real-time tasks may be configured with this system call
42 * to avoid races with the scheduler. In practice, this means that a
43 * task's parameters must be set _before_ calling sys_prepare_rt_task()
44 */
45asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
46{
47 struct rt_task tp;
48 struct task_struct *target;
49 int retval = -EINVAL;
50
51 printk("Setting up rt task parameters for process %d.\n", pid);
52
53 if (pid < 0 || param == 0) {
54 goto out;
55 }
56 if (copy_from_user(&tp, param, sizeof(tp))) {
57 retval = -EFAULT;
58 goto out;
59 }
60
61 /* Task search and manipulation must be protected */
62 read_lock_irq(&tasklist_lock);
63 if (!(target = find_task_by_pid(pid))) {
64 retval = -ESRCH;
65 goto out_unlock;
66 }
67
68 if (is_realtime(target)) {
69 /* The task is already a real-time task.
70 * We cannot not allow parameter changes at this point.
71 */
72 retval = -EBUSY;
73 goto out_unlock;
74 }
75
76 if (tp.exec_cost <= 0)
77 goto out_unlock;
78 if (tp.period <= 0)
79 goto out_unlock;
80 if (!cpu_online(tp.cpu))
81 goto out_unlock;
82 if (tp.period < tp.exec_cost)
83 {
84 printk(KERN_INFO "litmus: real-time task %d rejected "
85 "because wcet > period\n", pid);
86 goto out_unlock;
87 }
88
89 target->rt_param.task_params = tp;
90
91 retval = 0;
92 out_unlock:
93 read_unlock_irq(&tasklist_lock);
94 out:
95 return retval;
96}
97
98/* Getter of task's RT params
99 * returns EINVAL if param or pid is NULL
100 * returns ESRCH if pid does not correspond to a valid task
101 * returns EFAULT if copying of parameters has failed.
102 */
103asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
104{
105 int retval = -EINVAL;
106 struct task_struct *source;
107 struct rt_task lp;
108 if (param == 0 || pid < 0)
109 goto out;
110 read_lock(&tasklist_lock);
111 if (!(source = find_task_by_pid(pid))) {
112 retval = -ESRCH;
113 goto out_unlock;
114 }
115 lp = source->rt_param.task_params;
116 read_unlock(&tasklist_lock);
117 /* Do copying outside the lock */
118 retval =
119 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
120 return retval;
121 out_unlock:
122 read_unlock(&tasklist_lock);
123 out:
124 return retval;
125
126}
127
128/* implemented in kernel/litmus_sem.c */
129void srp_ceiling_block(void);
130
131/*
132 * This is the crucial function for periodic task implementation,
133 * It checks if a task is periodic, checks if such kind of sleep
134 * is permitted and calls plugin-specific sleep, which puts the
135 * task into a wait array.
136 * returns 0 on successful wakeup
137 * returns EPERM if current conditions do not permit such sleep
138 * returns EINVAL if current task is not able to go to sleep
139 */
140asmlinkage long sys_complete_job(void)
141{
142 int retval = -EPERM;
143 if (!is_realtime(current)) {
144 retval = -EINVAL;
145 goto out;
146 }
147 /* Task with negative or zero period cannot sleep */
148 if (get_rt_period(current) <= 0) {
149 retval = -EINVAL;
150 goto out;
151 }
152 /* The plugin has to put the task into an
153 * appropriate queue and call schedule
154 */
155 retval = litmus->complete_job();
156 if (!retval && is_subject_to_srp(current))
157 srp_ceiling_block();
158 out:
159 return retval;
160}
161
162/* This is an "improved" version of sys_complete_job that
163 * addresses the problem of unintentionally missing a job after
164 * an overrun.
165 *
166 * returns 0 on successful wakeup
167 * returns EPERM if current conditions do not permit such sleep
168 * returns EINVAL if current task is not able to go to sleep
169 */
170asmlinkage long sys_wait_for_job_release(unsigned int job)
171{
172 int retval = -EPERM;
173 if (!is_realtime(current)) {
174 retval = -EINVAL;
175 goto out;
176 }
177
178 /* Task with negative or zero period cannot sleep */
179 if (get_rt_period(current) <= 0) {
180 retval = -EINVAL;
181 goto out;
182 }
183
184 retval = 0;
185
186 /* first wait until we have "reached" the desired job
187 *
188 * This implementation has at least two problems:
189 *
190 * 1) It doesn't gracefully handle the wrap around of
191 * job_no. Since LITMUS is a prototype, this is not much
192 * of a problem right now.
193 *
194 * 2) It is theoretically racy if a job release occurs
195 * between checking job_no and calling sleep_next_period().
196 * A proper solution would requiring adding another callback
197 * in the plugin structure and testing the condition with
198 * interrupts disabled.
199 *
200 * FIXME: At least problem 2 should be taken care of eventually.
201 */
202 while (!retval && job > current->rt_param.job_params.job_no)
203 /* If the last job overran then job <= job_no and we
204 * don't send the task to sleep.
205 */
206 retval = litmus->complete_job();
207
208 /* We still have to honor the SRP after the actual release.
209 */
210 if (!retval && is_subject_to_srp(current))
211 srp_ceiling_block();
212 out:
213 return retval;
214}
215
216/* This is a helper syscall to query the current job sequence number.
217 *
218 * returns 0 on successful query
219 * returns EPERM if task is not a real-time task.
220 * returns EFAULT if &job is not a valid pointer.
221 */
222asmlinkage long sys_query_job_no(unsigned int __user *job)
223{
224 int retval = -EPERM;
225 if (is_realtime(current))
226 retval = put_user(current->rt_param.job_params.job_no, job);
227
228 return retval;
229}
230
231struct sched_sig {
232 struct list_head list;
233 struct task_struct* task;
234 unsigned int signal:31;
235 int force:1;
236};
237
238static void __scheduler_signal(struct task_struct *t, unsigned int signo,
239 int force)
240{
241 struct sched_sig* sig;
242
243 sig = kmalloc(GFP_ATOMIC, sizeof(struct sched_sig));
244 if (!sig) {
245 TRACE_TASK(t, "dropping signal: %u\n", t);
246 return;
247 }
248
249 spin_lock(&sched_sig_list_lock);
250
251 sig->signal = signo;
252 sig->force = force;
253 sig->task = t;
254 get_task_struct(t);
255 list_add(&sig->list, &sched_sig_list);
256
257 spin_unlock(&sched_sig_list_lock);
258}
259
260void scheduler_signal(struct task_struct *t, unsigned int signo)
261{
262 __scheduler_signal(t, signo, 0);
263}
264
265void force_scheduler_signal(struct task_struct *t, unsigned int signo)
266{
267 __scheduler_signal(t, signo, 1);
268}
269
270/* FIXME: get rid of the locking and do this on a per-processor basis */
271void send_scheduler_signals(void)
272{
273 unsigned long flags;
274 struct list_head *p, *extra;
275 struct siginfo info;
276 struct sched_sig* sig;
277 struct task_struct* t;
278 struct list_head claimed;
279
280 if (spin_trylock_irqsave(&sched_sig_list_lock, flags)) {
281 if (list_empty(&sched_sig_list))
282 p = NULL;
283 else {
284 p = sched_sig_list.next;
285 list_del(&sched_sig_list);
286 INIT_LIST_HEAD(&sched_sig_list);
287 }
288 spin_unlock_irqrestore(&sched_sig_list_lock, flags);
289
290 /* abort if there are no signals */
291 if (!p)
292 return;
293
294 /* take signal list we just obtained */
295 list_add(&claimed, p);
296
297 list_for_each_safe(p, extra, &claimed) {
298 list_del(p);
299 sig = list_entry(p, struct sched_sig, list);
300 t = sig->task;
301 info.si_signo = sig->signal;
302 info.si_errno = 0;
303 info.si_code = SI_KERNEL;
304 info.si_pid = 1;
305 info.si_uid = 0;
306 TRACE("sending signal %d to %d\n", info.si_signo,
307 t->pid);
308 if (sig->force)
309 force_sig_info(sig->signal, &info, t);
310 else
311 send_sig_info(sig->signal, &info, t);
312 put_task_struct(t);
313 kfree(sig);
314 }
315 }
316
317}
318
319static inline void np_mem_error(struct task_struct* t, const char* reason)
320{
321 if (t->state != TASK_DEAD && !(t->flags & PF_EXITING)) {
322 TRACE("np section: %s => %s/%d killed\n",
323 reason, t->comm, t->pid);
324 force_scheduler_signal(t, SIGKILL);
325 }
326}
327
328/* sys_register_np_flag() allows real-time tasks to register an
329 * np section indicator.
330 * returns 0 if the flag was successfully registered
331 * returns EINVAL if current task is not a real-time task
332 * returns EFAULT if *flag couldn't be written
333 */
334asmlinkage long sys_register_np_flag(short __user *flag)
335{
336 int retval = -EINVAL;
337 short test_val = RT_PREEMPTIVE;
338
339 /* avoid races with the scheduler */
340 preempt_disable();
341 TRACE("reg_np_flag(%p) for %s/%d\n", flag,
342 current->comm, current->pid);
343
344 /* Let's first try to write to the address.
345 * That way it is initialized and any bugs
346 * involving dangling pointers will caught
347 * early.
348 * NULL indicates disabling np section support
349 * and should not be tested.
350 */
351 if (flag)
352 retval = poke_kernel_address(test_val, flag);
353 else
354 retval = 0;
355 TRACE("reg_np_flag: retval=%d\n", retval);
356 if (unlikely(0 != retval))
357 np_mem_error(current, "np flag: not writable");
358 else
359 /* the pointer is ok */
360 current->rt_param.np_flag = flag;
361
362 preempt_enable();
363 return retval;
364}
365
366
367void request_exit_np(struct task_struct *t)
368{
369 int ret;
370 short flag;
371
372 /* We can only do this if t is actually currently scheduled on this CPU
373 * because otherwise we are in the wrong address space. Thus make sure
374 * to check.
375 */
376 BUG_ON(t != current);
377
378 if (unlikely(!is_realtime(t) || !t->rt_param.np_flag)) {
379 TRACE_TASK(t, "request_exit_np(): BAD TASK!\n");
380 return;
381 }
382
383 flag = RT_EXIT_NP_REQUESTED;
384 ret = poke_kernel_address(flag, t->rt_param.np_flag + 1);
385 TRACE("request_exit_np(%s/%d)\n", t->comm, t->pid);
386 if (unlikely(0 != ret))
387 np_mem_error(current, "request_exit_np(): flag not writable");
388
389}
390
391
392int is_np(struct task_struct* t)
393{
394 int ret;
395 unsigned short flag = 0x5858; /* = XX, looks nicer in debug*/
396
397 BUG_ON(t != current);
398
399 if (unlikely(t->rt_param.kernel_np))
400 return 1;
401 else if (unlikely(t->rt_param.np_flag == NULL) ||
402 t->flags & PF_EXITING ||
403 t->state == TASK_DEAD)
404 return 0;
405 else {
406 /* This is the tricky part. The process has registered a
407 * non-preemptive section marker. We now need to check whether
408 * it is set to to NON_PREEMPTIVE. Along the way we could
409 * discover that the pointer points to an unmapped region (=>
410 * kill the task) or that the location contains some garbage
411 * value (=> also kill the task). Killing the task in any case
412 * forces userspace to play nicely. Any bugs will be discovered
413 * immediately.
414 */
415 ret = probe_kernel_address(t->rt_param.np_flag, flag);
416 if (0 == ret && (flag == RT_NON_PREEMPTIVE ||
417 flag == RT_PREEMPTIVE))
418 return flag != RT_PREEMPTIVE;
419 else {
420 /* either we could not read from the address or
421 * it contained garbage => kill the process
422 * FIXME: Should we cause a SEGFAULT instead?
423 */
424 TRACE("is_np: ret=%d flag=%c%c (%x)\n", ret,
425 flag & 0xff, (flag >> 8) & 0xff, flag);
426 np_mem_error(t, "is_np() could not read");
427 return 0;
428 }
429 }
430}
431
432/*
433 * sys_exit_np() allows real-time tasks to signal that it left a
434 * non-preemptable section. It will be called after the kernel requested a
435 * callback in the preemption indicator flag.
436 * returns 0 if the signal was valid and processed.
437 * returns EINVAL if current task is not a real-time task
438 */
439asmlinkage long sys_exit_np(void)
440{
441 int retval = -EINVAL;
442
443 TS_EXIT_NP_START;
444
445 if (!is_realtime(current))
446 goto out;
447
448 TRACE("sys_exit_np(%s/%d)\n", current->comm, current->pid);
449 /* force rescheduling so that we can be preempted */
450 set_tsk_need_resched(current);
451 retval = 0;
452 out:
453
454 TS_EXIT_NP_END;
455 return retval;
456}
457
458/* p is a real-time task. Re-init its state as a best-effort task. */
459static void reinit_litmus_state(struct task_struct* p, int restore)
460{
461 struct rt_task user_config = {};
462 __user short *np_flag = NULL;
463
464 if (restore) {
465 /* Safe user-space provided configuration data.
466 * FIXME: This is missing service levels for adaptive tasks.
467 */
468 user_config = p->rt_param.task_params;
469 np_flag = p->rt_param.np_flag;
470 }
471
472 /* We probably should not be inheriting any task's priority
473 * at this point in time.
474 */
475 WARN_ON(p->rt_param.inh_task);
476
477 /* We need to restore the priority of the task. */
478// __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio);
479
480 /* Cleanup everything else. */
481 memset(&p->rt_param, 0, sizeof(struct rt_task));
482
483 /* Restore preserved fields. */
484 if (restore) {
485 p->rt_param.task_params = user_config;
486 p->rt_param.np_flag = np_flag;
487 }
488}
489
490long litmus_admit_task(struct task_struct* tsk)
491{
492 long retval;
493 long flags;
494
495 BUG_ON(is_realtime(tsk));
496
497 if (get_rt_period(tsk) == 0 ||
498 get_exec_cost(tsk) > get_rt_period(tsk)) {
499 TRACE_TASK(tsk, "litmus admit: invalid task parameters "
500 "(%lu, %lu)\n",
501 get_exec_cost(tsk), get_rt_period(tsk));
502 return -EINVAL;
503 }
504
505 if (!cpu_online(get_partition(tsk)))
506 {
507 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
508 get_partition(tsk));
509 return -EINVAL;
510 }
511
512 INIT_LIST_HEAD(&tsk->rt_list);
513
514 /* avoid scheduler plugin changing underneath us */
515 spin_lock_irqsave(&task_transition_lock, flags);
516 retval = litmus->admit_task(tsk);
517
518 if (!retval)
519 atomic_inc(&rt_task_count);
520 spin_unlock_irqrestore(&task_transition_lock, flags);
521
522 return retval;
523
524}
525
526void litmus_exit_task(struct task_struct* tsk)
527{
528 if (is_realtime(tsk)) {
529 litmus->task_exit(tsk);
530 atomic_dec(&rt_task_count);
531 reinit_litmus_state(tsk, 1);
532 }
533}
534
535/* Switching a plugin in use is tricky.
536 * We must watch out that no real-time tasks exists
537 * (and that none is created in parallel) and that the plugin is not
538 * currently in use on any processor (in theory).
539 *
540 * For now, we don't enforce the second part since it is unlikely to cause
541 * any trouble by itself as long as we don't unload modules.
542 */
543int switch_sched_plugin(struct sched_plugin* plugin)
544{
545 long flags;
546 int ret = 0;
547
548 BUG_ON(!plugin);
549
550 /* stop task transitions */
551 spin_lock_irqsave(&task_transition_lock, flags);
552
553 /* don't switch if there are active real-time tasks */
554 if (atomic_read(&rt_task_count) == 0) {
555 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
556 litmus = plugin;
557 } else
558 ret = -EBUSY;
559
560 spin_unlock_irqrestore(&task_transition_lock, flags);
561 return ret;
562}
563
564/* Called upon fork.
565 * p is the newly forked task.
566 */
567void litmus_fork(struct task_struct* p)
568{
569 if (is_realtime(p))
570 /* clean out any litmus related state, don't preserve anything*/
571 reinit_litmus_state(p, 0);
572}
573
574/* Called upon execve().
575 * current is doing the exec.
576 * Don't let address space specific stuff leak.
577 */
578void litmus_exec(void)
579{
580 struct task_struct* p = current;
581
582 if (is_realtime(p)) {
583 WARN_ON(p->rt_param.inh_task);
584 p->rt_param.np_flag = NULL;
585 }
586}
587
588void exit_litmus(struct task_struct *dead_tsk)
589{
590 if (is_realtime(dead_tsk))
591 litmus_exit_task(dead_tsk);
592}
593
594
595void list_qsort(struct list_head* list, list_cmp_t less_than)
596{
597 struct list_head lt;
598 struct list_head geq;
599 struct list_head *pos, *extra, *pivot;
600 int n_lt = 0, n_geq = 0;
601 BUG_ON(!list);
602
603 if (list->next == list)
604 return;
605
606 INIT_LIST_HEAD(&lt);
607 INIT_LIST_HEAD(&geq);
608
609 pivot = list->next;
610 list_del(pivot);
611 list_for_each_safe(pos, extra, list) {
612 list_del(pos);
613 if (less_than(pos, pivot)) {
614 list_add(pos, &lt);
615 n_lt++;
616 } else {
617 list_add(pos, &geq);
618 n_geq++;
619 }
620 }
621 if (n_lt < n_geq) {
622 list_qsort(&lt, less_than);
623 list_qsort(&geq, less_than);
624 } else {
625 list_qsort(&geq, less_than);
626 list_qsort(&lt, less_than);
627 }
628 list_splice(&geq, list);
629 list_add(pivot, list);
630 list_splice(&lt, list);
631}
632
633#ifdef CONFIG_MAGIC_SYSRQ
634int sys_kill(int pid, int sig);
635
636static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
637{
638 struct task_struct *t;
639 read_lock(&tasklist_lock);
640 for_each_process(t) {
641 if (is_realtime(t)) {
642 sys_kill(t->pid, SIGKILL);
643 }
644 }
645 read_unlock(&tasklist_lock);
646}
647
648static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
649 .handler = sysrq_handle_kill_rt_tasks,
650 .help_msg = "Quit-rt-tasks",
651 .action_msg = "sent SIGKILL to all real-time tasks",
652};
653#endif
654
655static int proc_read_stats(char *page, char **start,
656 off_t off, int count,
657 int *eof, void *data)
658{
659 int len;
660
661 len = snprintf(page, PAGE_SIZE,
662 "real-time task count = %d\n",
663 atomic_read(&rt_task_count));
664 return len;
665}
666
667static int proc_read_plugins(char *page, char **start,
668 off_t off, int count,
669 int *eof, void *data)
670{
671 int len;
672
673 len = print_sched_plugins(page, PAGE_SIZE);
674 return len;
675}
676
677static int proc_read_curr(char *page, char **start,
678 off_t off, int count,
679 int *eof, void *data)
680{
681 int len;
682
683 len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
684 return len;
685}
686
687static int proc_write_curr(struct file *file,
688 const char *buffer,
689 unsigned long count,
690 void *data)
691{
692 int len, ret;
693 char name[65];
694 struct sched_plugin* found;
695
696 if(count > 64)
697 len = 64;
698 else
699 len = count;
700
701 if(copy_from_user(name, buffer, len))
702 return -EFAULT;
703
704 name[len] = '\0';
705 /* chomp name */
706 if (len > 1 && name[len - 1] == '\n')
707 name[len - 1] = '\0';
708
709 found = find_sched_plugin(name);
710
711 if (found) {
712 ret = switch_sched_plugin(found);
713 if (ret != 0)
714 printk(KERN_INFO "Could not switch plugin: %d\n", ret);
715 } else
716 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
717
718 return len;
719}
720
721
722static struct proc_dir_entry *litmus_dir = NULL,
723 *curr_file = NULL,
724 *stat_file = NULL,
725 *plugs_file = NULL;
726
727static int __init init_litmus_proc(void)
728{
729 litmus_dir = proc_mkdir("litmus", NULL);
730 if (!litmus_dir) {
731 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
732 return -ENOMEM;
733 }
734 litmus_dir->owner = THIS_MODULE;
735
736 curr_file = create_proc_entry("active_plugin",
737 0644, litmus_dir);
738 if (!curr_file) {
739 printk(KERN_ERR "Could not allocate active_plugin "
740 "procfs entry.\n");
741 return -ENOMEM;
742 }
743 curr_file->owner = THIS_MODULE;
744 curr_file->read_proc = proc_read_curr;
745 curr_file->write_proc = proc_write_curr;
746
747 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
748 proc_read_stats, NULL);
749
750 plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir,
751 proc_read_plugins, NULL);
752
753 return 0;
754}
755
756static void exit_litmus_proc(void)
757{
758 if (plugs_file)
759 remove_proc_entry("plugins", litmus_dir);
760 if (stat_file)
761 remove_proc_entry("stats", litmus_dir);
762 if (curr_file)
763 remove_proc_entry("active_plugin", litmus_dir);
764 if (litmus_dir)
765 remove_proc_entry("litmus", NULL);
766}
767
768extern struct sched_plugin linux_sched_plugin;
769
770static int __init _init_litmus(void)
771{
772 /* Common initializers,
773 * mode change lock is used to enforce single mode change
774 * operation.
775 */
776 printk("Starting LITMUS^RT kernel\n");
777
778 register_sched_plugin(&linux_sched_plugin);
779
780#ifdef CONFIG_MAGIC_SYSRQ
781 /* offer some debugging help */
782 if (!register_sysrq_key('q', &sysrq_kill_rt_tasks_op))
783 printk("Registered kill rt tasks magic sysrq.\n");
784 else
785 printk("Could not register kill rt tasks magic sysrq.\n");
786#endif
787
788 init_litmus_proc();
789
790 return 0;
791}
792
793static void _exit_litmus(void)
794{
795 exit_litmus_proc();
796}
797
798module_init(_init_litmus);
799module_exit(_exit_litmus);
diff --git a/litmus/litmus_sem.c b/litmus/litmus_sem.c
new file mode 100644
index 0000000000..f52941c5ca
--- /dev/null
+++ b/litmus/litmus_sem.c
@@ -0,0 +1,566 @@
1/*
2 * PI semaphores and SRP implementations.
3 * Much of the code here is borrowed from include/asm-i386/semaphore.h.
4 *
5 * NOTE: This implementation is very much a prototype and horribly insecure. It
6 * is intended to be a proof of concept, not a feature-complete solution.
7 */
8
9#include <asm/atomic.h>
10#include <asm/semaphore.h>
11#include <linux/sched.h>
12#include <linux/wait.h>
13#include <linux/spinlock.h>
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/edf_common.h>
17
18#include <litmus/fdso.h>
19
20#include <litmus/trace.h>
21
22/* ************************************************************************** */
23/* PRIORITY INHERITANCE */
24/* ************************************************************************** */
25
26static void* create_pi_semaphore(void)
27{
28 struct pi_semaphore* sem;
29 int i;
30
31 sem = kmalloc(sizeof(struct pi_semaphore), GFP_KERNEL);
32 if (!sem)
33 return NULL;
34 atomic_set(&sem->count, 1);
35 sem->sleepers = 0;
36 init_waitqueue_head(&sem->wait);
37 sem->hp.task = NULL;
38 sem->holder = NULL;
39 for (i = 0; i < NR_CPUS; i++)
40 sem->hp.cpu_task[i] = NULL;
41 return sem;
42}
43
44static void destroy_pi_semaphore(void* sem)
45{
46 /* XXX assert invariants */
47 kfree(sem);
48}
49
50struct fdso_ops pi_sem_ops = {
51 .create = create_pi_semaphore,
52 .destroy = destroy_pi_semaphore
53};
54
55struct wq_pair {
56 struct task_struct* tsk;
57 struct pi_semaphore* sem;
58};
59
60static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync,
61 void *key)
62{
63 struct wq_pair* wqp = (struct wq_pair*) wait->private;
64 set_rt_flags(wqp->tsk, RT_F_EXIT_SEM);
65 litmus->inherit_priority(wqp->sem, wqp->tsk);
66 TRACE_TASK(wqp->tsk,
67 "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n");
68 /* point to task for default_wake_function() */
69 wait->private = wqp->tsk;
70 default_wake_function(wait, mode, sync, key);
71
72 /* Always return true since we know that if we encountered a task
73 * that was already running the wake_up raced with the schedule in
74 * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled
75 * immediately and own the lock. We must not wake up another task in
76 * any case.
77 */
78 return 1;
79}
80
81/* caller is responsible for locking */
82int edf_set_hp_task(struct pi_semaphore *sem)
83{
84 struct list_head *tmp, *next;
85 struct task_struct *queued;
86 int ret = 0;
87
88 sem->hp.task = NULL;
89 list_for_each_safe(tmp, next, &sem->wait.task_list) {
90 queued = ((struct wq_pair*)
91 list_entry(tmp, wait_queue_t,
92 task_list)->private)->tsk;
93
94 /* Compare task prios, find high prio task. */
95 if (edf_higher_prio(queued, sem->hp.task)) {
96 sem->hp.task = queued;
97 ret = 1;
98 }
99 }
100 return ret;
101}
102
103/* caller is responsible for locking */
104int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu)
105{
106 struct list_head *tmp, *next;
107 struct task_struct *queued;
108 int ret = 0;
109
110 sem->hp.cpu_task[cpu] = NULL;
111 list_for_each_safe(tmp, next, &sem->wait.task_list) {
112 queued = ((struct wq_pair*)
113 list_entry(tmp, wait_queue_t,
114 task_list)->private)->tsk;
115
116 /* Compare task prios, find high prio task. */
117 if (get_partition(queued) == cpu &&
118 edf_higher_prio(queued, sem->hp.cpu_task[cpu])) {
119 sem->hp.cpu_task[cpu] = queued;
120 ret = 1;
121 }
122 }
123 return ret;
124}
125
126int do_pi_down(struct pi_semaphore* sem)
127{
128 unsigned long flags;
129 struct task_struct *tsk = current;
130 struct wq_pair pair;
131 int suspended = 1;
132 wait_queue_t wait = {
133 .private = &pair,
134 .func = rt_pi_wake_up,
135 .task_list = {NULL, NULL}
136 };
137
138 pair.tsk = tsk;
139 pair.sem = sem;
140 spin_lock_irqsave(&sem->wait.lock, flags);
141
142 if (atomic_dec_return(&sem->count) < 0 ||
143 waitqueue_active(&sem->wait)) {
144 /* we need to suspend */
145 tsk->state = TASK_UNINTERRUPTIBLE;
146 add_wait_queue_exclusive_locked(&sem->wait, &wait);
147
148 TRACE_CUR("suspends on PI lock %p\n", sem);
149 litmus->pi_block(sem, tsk);
150
151 /* release lock before sleeping */
152 spin_unlock_irqrestore(&sem->wait.lock, flags);
153
154 TS_PI_DOWN_END;
155 preempt_enable_no_resched();
156
157
158 /* we depend on the FIFO order
159 * Thus, we don't need to recheck when we wake up, we
160 * are guaranteed to have the lock since there is only one
161 * wake up per release
162 */
163 schedule();
164
165 TRACE_CUR("woke up, now owns PI lock %p\n", sem);
166
167 /* try_to_wake_up() set our state to TASK_RUNNING,
168 * all we need to do is to remove our wait queue entry
169 */
170 remove_wait_queue(&sem->wait, &wait);
171 } else {
172 /* no priority inheritance necessary, since there are no queued
173 * tasks.
174 */
175 suspended = 0;
176 TRACE_CUR("acquired PI lock %p, no contention\n", sem);
177 sem->holder = tsk;
178 sem->hp.task = tsk;
179 litmus->inherit_priority(sem, tsk);
180 spin_unlock_irqrestore(&sem->wait.lock, flags);
181 }
182 return suspended;
183}
184
185void do_pi_up(struct pi_semaphore* sem)
186{
187 unsigned long flags;
188
189 spin_lock_irqsave(&sem->wait.lock, flags);
190
191 TRACE_CUR("releases PI lock %p\n", sem);
192 litmus->return_priority(sem);
193 sem->holder = NULL;
194 if (atomic_inc_return(&sem->count) < 1)
195 /* there is a task queued */
196 wake_up_locked(&sem->wait);
197
198 spin_unlock_irqrestore(&sem->wait.lock, flags);
199}
200
201asmlinkage long sys_pi_down(int sem_od)
202{
203 long ret = 0;
204 struct pi_semaphore * sem;
205 int suspended = 0;
206
207 preempt_disable();
208 TS_PI_DOWN_START;
209
210 sem = lookup_pi_sem(sem_od);
211 if (sem)
212 suspended = do_pi_down(sem);
213 else
214 ret = -EINVAL;
215
216 if (!suspended) {
217 TS_PI_DOWN_END;
218 preempt_enable();
219 }
220
221 return ret;
222}
223
224asmlinkage long sys_pi_up(int sem_od)
225{
226 long ret = 0;
227 struct pi_semaphore * sem;
228
229 preempt_disable();
230 TS_PI_UP_START;
231
232 sem = lookup_pi_sem(sem_od);
233 if (sem)
234 do_pi_up(sem);
235 else
236 ret = -EINVAL;
237
238
239 TS_PI_UP_END;
240 preempt_enable();
241
242 return ret;
243}
244
245/* Clear wait queue and wakeup waiting tasks, and free semaphore. */
246/*
247asmlinkage long sys_pi_sema_free(int sem_id)
248{
249 struct list_head *tmp, *next;
250 unsigned long flags;
251
252 if (sem_id < 0 || sem_id >= MAX_PI_SEMAPHORES)
253 return -EINVAL;
254
255 if (!pi_sems[sem_id].used)
256 return -EINVAL;
257
258 spin_lock_irqsave(&pi_sems[sem_id].wait.lock, flags);
259 if (waitqueue_active(&pi_sems[sem_id].wait)) {
260 list_for_each_safe(tmp, next,
261 &pi_sems[sem_id].wait.task_list) {
262 wait_queue_t *curr = list_entry(tmp, wait_queue_t,
263 task_list);
264 list_del(tmp);
265 set_rt_flags((struct task_struct*)curr->private,
266 RT_F_EXIT_SEM);
267 curr->func(curr,
268 TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
269 0, NULL);
270 }
271 }
272
273 spin_unlock_irqrestore(&pi_sems[sem_id].wait.lock, flags);
274 pi_sems[sem_id].used = 0;
275
276 return 0;
277}
278*/
279
280
281
282/* ************************************************************************** */
283/* STACK RESOURCE POLICY */
284/* ************************************************************************** */
285
286
287struct srp_priority {
288 struct list_head list;
289 unsigned int period;
290 pid_t pid;
291};
292
293#define list2prio(l) list_entry(l, struct srp_priority, list)
294
295/* SRP task priority comparison function. Smaller periods have highest
296 * priority, tie-break is PID. Special case: period == 0 <=> no priority
297 */
298static int srp_higher_prio(struct srp_priority* first,
299 struct srp_priority* second)
300{
301 if (!first->period)
302 return 0;
303 else
304 return !second->period ||
305 first->period < second->period || (
306 first->period == second->period &&
307 first->pid < second->pid);
308}
309
310struct srp {
311 struct list_head ceiling;
312 wait_queue_head_t ceiling_blocked;
313};
314
315
316DEFINE_PER_CPU(struct srp, srp);
317
318#define system_ceiling(srp) list2prio(srp->ceiling.next)
319
320static int srp_exceeds_ceiling(struct task_struct* first,
321 struct srp* srp)
322{
323 return list_empty(&srp->ceiling) ||
324 get_rt_period(first) < system_ceiling(srp)->period ||
325 (get_rt_period(first) == system_ceiling(srp)->period &&
326 first->pid < system_ceiling(srp)->pid);
327}
328
329static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
330{
331 struct list_head *pos;
332 if (in_list(&prio->list)) {
333 TRACE_CUR("WARNING: SRP violation detected, prio is already in "
334 "ceiling list!\n");
335 return;
336 }
337 list_for_each(pos, &srp->ceiling)
338 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
339 __list_add(&prio->list, pos->prev, pos);
340 return;
341 }
342
343 list_add_tail(&prio->list, &srp->ceiling);
344}
345
346/* struct for uniprocessor SRP "semaphore" */
347struct srp_semaphore {
348 struct srp_priority ceiling;
349 int cpu; /* cpu associated with this "semaphore" and resource */
350 int claimed; /* is the resource claimed (ceiling should be used)? */
351};
352
353
354static void* create_srp_semaphore(void)
355{
356 struct srp_semaphore* sem;
357
358 if (!is_realtime(current))
359 /* XXX log error */
360 return NULL;
361
362 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
363 if (!sem)
364 return NULL;
365
366 INIT_LIST_HEAD(&sem->ceiling.list);
367 sem->ceiling.period = 0;
368 sem->claimed = 0;
369 sem->cpu = get_partition(current);
370 return sem;
371}
372
373static void destroy_srp_semaphore(void* sem)
374{
375 /* XXX invariants */
376 kfree(sem);
377}
378
379struct fdso_ops srp_sem_ops = {
380 .create = create_srp_semaphore,
381 .destroy = destroy_srp_semaphore
382};
383
384/* Initialize SRP semaphores at boot time. */
385static int __init srp_sema_boot_init(void)
386{
387 int i;
388
389 printk("Initializing SRP per-CPU ceilings...");
390 for (i = 0; i < NR_CPUS; i++) {
391 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
392 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
393 }
394 printk(" done!\n");
395
396 return 0;
397}
398__initcall(srp_sema_boot_init);
399
400
401void do_srp_down(struct srp_semaphore* sem)
402{
403 /* claim... */
404 sem->claimed = 1;
405 /* ...and update ceiling */
406 srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
407}
408
409void do_srp_up(struct srp_semaphore* sem)
410{
411 sem->claimed = 0;
412
413 /* Determine new system priority ceiling for this CPU. */
414 if (in_list(&sem->ceiling.list))
415 list_del(&sem->ceiling.list);
416 else
417 TRACE_CUR("WARNING: SRP violation detected, prio not in ceiling"
418 " list!\n");
419
420 /* Wake tasks on this CPU, if they exceed current ceiling. */
421 wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
422}
423
424/* Adjust the system-wide priority ceiling if resource is claimed. */
425asmlinkage long sys_srp_down(int sem_od)
426{
427 int cpu;
428 int ret = -EINVAL;
429 struct srp_semaphore* sem;
430
431 /* disabling preemptions is sufficient protection since
432 * SRP is strictly per CPU and we don't interfere with any
433 * interrupt handlers
434 */
435 preempt_disable();
436 TS_SRP_DOWN_START;
437
438 cpu = smp_processor_id();
439 sem = lookup_srp_sem(sem_od);
440 if (sem && sem->cpu == cpu) {
441 do_srp_down(sem);
442 ret = 0;
443 }
444
445 TS_SRP_DOWN_END;
446 preempt_enable();
447 return ret;
448}
449
450/* Adjust the system-wide priority ceiling if resource is freed. */
451asmlinkage long sys_srp_up(int sem_od)
452{
453 int cpu;
454 int ret = -EINVAL;
455 struct srp_semaphore* sem;
456
457 preempt_disable();
458 TS_SRP_UP_START;
459
460 cpu = smp_processor_id();
461 sem = lookup_srp_sem(sem_od);
462
463 if (sem && sem->cpu == cpu) {
464 do_srp_up(sem);
465 ret = 0;
466 }
467
468 TS_SRP_UP_END;
469 preempt_enable();
470 return ret;
471}
472
473/* Indicate that task will use a resource associated with a given
474 * semaphore. Should be done *a priori* before RT task system is
475 * executed, so this does *not* update the system priority
476 * ceiling! (The ceiling would be meaningless anyway, as the SRP
477 * breaks without this a priori knowledge.)
478 */
479asmlinkage long sys_reg_task_srp_sem(int sem_od)
480{
481 /*
482 * FIXME: This whole concept is rather brittle!
483 * There must be a better solution. Maybe register on
484 * first reference?
485 */
486
487 struct task_struct *t = current;
488 struct srp_priority t_prio;
489 struct srp_semaphore* sem;
490
491 sem = lookup_srp_sem(sem_od);
492
493 if (!sem)
494 return -EINVAL;
495
496 if (!is_realtime(t))
497 return -EPERM;
498
499 if (sem->cpu != get_partition(t))
500 return -EINVAL;
501
502 preempt_disable();
503 t->rt_param.subject_to_srp = 1;
504 t_prio.period = get_rt_period(t);
505 t_prio.pid = t->pid;
506 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
507 sem->ceiling.period = t_prio.period;
508 sem->ceiling.pid = t_prio.pid;
509 }
510
511 preempt_enable();
512
513 return 0;
514}
515
516static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
517 void *key)
518{
519 int cpu = smp_processor_id();
520 struct task_struct *tsk = wait->private;
521 if (cpu != get_partition(tsk))
522 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
523 get_partition(tsk));
524 else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
525 return default_wake_function(wait, mode, sync, key);
526 return 0;
527}
528
529
530/* Wait for current task priority to exceed system-wide priority ceiling.
531 * Can be used to determine when it is safe to run a job after its release.
532 */
533void srp_ceiling_block(void)
534{
535 struct task_struct *tsk = current;
536 wait_queue_t wait = {
537 .private = tsk,
538 .func = srp_wake_up,
539 .task_list = {NULL, NULL}
540 };
541
542 preempt_disable();
543 if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
544 tsk->state = TASK_UNINTERRUPTIBLE;
545 add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
546 TRACE_CUR("is priority ceiling blocked.\n");
547 preempt_enable_no_resched();
548 schedule();
549 /* Access to CPU var must occur with preemptions disabled,
550 * otherwise Linux debug code complains loudly, even if it is
551 * ok here.
552 */
553 preempt_disable();
554 TRACE_CUR("finally exceeds system ceiling.\n");
555 remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
556 preempt_enable();
557 } else {
558 TRACE_CUR("is not priority ceiling blocked\n");
559 preempt_enable();
560 }
561}
562
563/* ************************************************************************** */
564
565
566
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 0000000000..fe7bd29b19
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,130 @@
1/*
2 * kernel/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/sched_trace.h>
16
17#include <litmus/rt_domain.h>
18
19
20static int dummy_resched(rt_domain_t *rt)
21{
22 return 0;
23}
24
25static int dummy_order(struct list_head* a, struct list_head* b)
26{
27 return 0;
28}
29
30int release_order(struct list_head* a, struct list_head* b)
31{
32 return earlier_release(
33 list_entry(a, struct task_struct, rt_list),
34 list_entry(b, struct task_struct, rt_list));
35}
36
37
38void rt_domain_init(rt_domain_t *rt,
39 check_resched_needed_t f,
40 list_cmp_t order)
41{
42 BUG_ON(!rt);
43 if (!f)
44 f = dummy_resched;
45 if (!order)
46 order = dummy_order;
47 INIT_LIST_HEAD(&rt->ready_queue);
48 INIT_LIST_HEAD(&rt->release_queue);
49 rt->ready_lock = RW_LOCK_UNLOCKED;
50 rt->release_lock = SPIN_LOCK_UNLOCKED;
51 rt->check_resched = f;
52 rt->order = order;
53}
54
55/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
56 * @new: the newly released task
57 */
58void __add_ready(rt_domain_t* rt, struct task_struct *new)
59{
60 TRACE("rt: adding %s/%d (%llu, %llu) to ready queue at %llu\n",
61 new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
62 sched_clock());
63
64 if (!list_insert(&new->rt_list, &rt->ready_queue, rt->order))
65 rt->check_resched(rt);
66}
67
68struct task_struct* __take_ready(rt_domain_t* rt)
69{
70 struct task_struct *t = __peek_ready(rt);
71
72 /* kick it out of the ready list */
73 if (t)
74 list_del(&t->rt_list);
75 return t;
76}
77
78struct task_struct* __peek_ready(rt_domain_t* rt)
79{
80 if (!list_empty(&rt->ready_queue))
81 return next_ready(rt);
82 else
83 return NULL;
84}
85
86/* add_release - add a real-time task to the rt release queue.
87 * @task: the sleeping task
88 */
89void __add_release(rt_domain_t* rt, struct task_struct *task)
90{
91 TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to release queue\n",
92 task->comm, task->pid, get_exec_cost(task), get_rt_period(task),
93 get_release(task));
94
95 list_insert(&task->rt_list, &rt->release_queue, release_order);
96}
97
98void __release_pending(rt_domain_t* rt)
99{
100 struct list_head *pos, *save;
101 struct task_struct *queued;
102 lt_t now = sched_clock();
103 list_for_each_safe(pos, save, &rt->release_queue) {
104 queued = list_entry(pos, struct task_struct, rt_list);
105 if (likely(is_released(queued, now))) {
106 /* this one is ready to go*/
107 list_del(pos);
108 set_rt_flags(queued, RT_F_RUNNING);
109
110 sched_trace_job_release(queued);
111
112 /* now it can be picked up */
113 barrier();
114 add_ready(rt, queued);
115 }
116 else
117 /* the release queue is ordered */
118 break;
119 }
120}
121
122void try_release_pending(rt_domain_t* rt)
123{
124 unsigned long flags;
125
126 if (spin_trylock_irqsave(&rt->release_lock, flags)) {
127 __release_pending(rt);
128 spin_unlock_irqrestore(&rt->release_lock, flags);
129 }
130}
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
new file mode 100644
index 0000000000..e879b02888
--- /dev/null
+++ b/litmus/sched_gsn_edf.c
@@ -0,0 +1,719 @@
1/*
2 * kernel/sched_gsn_edf.c
3 *
4 * Implementation of the GSN-EDF scheduling algorithm.
5 *
6 * This version uses the simple approach and serializes all scheduling
7 * decisions by the use of a queue lock. This is probably not the
8 * best way to do it, but it should suffice for now.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/list.h>
15
16#include <litmus/litmus.h>
17#include <litmus/sched_plugin.h>
18#include <litmus/edf_common.h>
19#include <litmus/sched_trace.h>
20
21#include <linux/module.h>
22
23/* Overview of GSN-EDF operations.
24 *
25 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
26 * description only covers how the individual operations are implemented in
27 * LITMUS.
28 *
29 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
30 * structure (NOT the actually scheduled
31 * task). If there is another linked task To
32 * already it will set To->linked_on = NO_CPU
33 * (thereby removing its association with this
34 * CPU). However, it will not requeue the
35 * previously linked task (if any). It will set
36 * T's state to RT_F_RUNNING and check whether
37 * it is already running somewhere else. If T
38 * is scheduled somewhere else it will link
39 * it to that CPU instead (and pull the linked
40 * task to cpu). T may be NULL.
41 *
42 * unlink(T) - Unlink removes T from all scheduler data
43 * structures. If it is linked to some CPU it
44 * will link NULL to that CPU. If it is
45 * currently queued in the gsnedf queue it will
46 * be removed from the T->rt_list. It is safe to
47 * call unlink(T) if T is not linked. T may not
48 * be NULL.
49 *
50 * requeue(T) - Requeue will insert T into the appropriate
51 * queue. If the system is in real-time mode and
52 * the T is released already, it will go into the
53 * ready queue. If the system is not in
54 * real-time mode is T, then T will go into the
55 * release queue. If T's release time is in the
56 * future, it will go into the release
57 * queue. That means that T's release time/job
58 * no/etc. has to be updated before requeu(T) is
59 * called. It is not safe to call requeue(T)
60 * when T is already queued. T may not be NULL.
61 *
62 * gsnedf_job_arrival(T) - This is the catch all function when T enters
63 * the system after either a suspension or at a
64 * job release. It will queue T (which means it
65 * is not safe to call gsnedf_job_arrival(T) if
66 * T is already queued) and then check whether a
67 * preemption is necessary. If a preemption is
68 * necessary it will update the linkage
69 * accordingly and cause scheduled to be called
70 * (either with an IPI or need_resched). It is
71 * safe to call gsnedf_job_arrival(T) if T's
72 * next job has not been actually released yet
73 * (releast time in the future). T will be put
74 * on the release queue in that case.
75 *
76 * job_completion(T) - Take care of everything that needs to be done
77 * to prepare T for its next release and place
78 * it in the right queue with
79 * gsnedf_job_arrival().
80 *
81 *
82 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
83 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
84 * the functions will automatically propagate pending task from the ready queue
85 * to a linked task. This is the job of the calling function ( by means of
86 * __take_ready).
87 */
88
89
90/* cpu_entry_t - maintain the linked and scheduled state
91 */
92typedef struct {
93 int cpu;
94 struct task_struct* linked; /* only RT tasks */
95 struct task_struct* scheduled; /* only RT tasks */
96 struct list_head list;
97 atomic_t will_schedule; /* prevent unneeded IPIs */
98} cpu_entry_t;
99DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
100
101#define set_will_schedule() \
102 (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
103#define clear_will_schedule() \
104 (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
105#define test_will_schedule(cpu) \
106 (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
107
108
109#define NO_CPU 0xffffffff
110
111/* The gsnedf_lock is used to serialize all scheduling events.
112 * It protects
113 */
114static DEFINE_SPINLOCK(gsnedf_lock);
115/* the cpus queue themselves according to priority in here */
116static LIST_HEAD(gsnedf_cpu_queue);
117
118static rt_domain_t gsnedf;
119
120
121/* update_cpu_position - Move the cpu entry to the correct place to maintain
122 * order in the cpu queue. Caller must hold gsnedf lock.
123 *
124 * This really should be a heap.
125 */
126static void update_cpu_position(cpu_entry_t *entry)
127{
128 cpu_entry_t *other;
129 struct list_head *pos;
130
131 if (likely(in_list(&entry->list)))
132 list_del(&entry->list);
133 /* if we do not execute real-time jobs we just move
134 * to the end of the queue
135 */
136 if (entry->linked) {
137 list_for_each(pos, &gsnedf_cpu_queue) {
138 other = list_entry(pos, cpu_entry_t, list);
139 if (edf_higher_prio(entry->linked, other->linked)) {
140 __list_add(&entry->list, pos->prev, pos);
141 return;
142 }
143 }
144 }
145 /* if we get this far we have the lowest priority job */
146 list_add_tail(&entry->list, &gsnedf_cpu_queue);
147}
148
149/* link_task_to_cpu - Update the link of a CPU.
150 * Handles the case where the to-be-linked task is already
151 * scheduled on a different CPU.
152 */
153static noinline void link_task_to_cpu(struct task_struct* linked,
154 cpu_entry_t *entry)
155{
156 cpu_entry_t *sched;
157 struct task_struct* tmp;
158 int on_cpu;
159
160 BUG_ON(linked && !is_realtime(linked));
161
162 /* Currently linked task is set to be unlinked. */
163 if (entry->linked) {
164 entry->linked->rt_param.linked_on = NO_CPU;
165 }
166
167 /* Link new task to CPU. */
168 if (linked) {
169 set_rt_flags(linked, RT_F_RUNNING);
170 /* handle task is already scheduled somewhere! */
171 on_cpu = linked->rt_param.scheduled_on;
172 if (on_cpu != NO_CPU) {
173 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
174 /* this should only happen if not linked already */
175 BUG_ON(sched->linked == linked);
176
177 /* If we are already scheduled on the CPU to which we
178 * wanted to link, we don't need to do the swap --
179 * we just link ourselves to the CPU and depend on
180 * the caller to get things right.
181 */
182 if (entry != sched) {
183 tmp = sched->linked;
184 linked->rt_param.linked_on = sched->cpu;
185 sched->linked = linked;
186 update_cpu_position(sched);
187 linked = tmp;
188 }
189 }
190 if (linked) /* might be NULL due to swap */
191 linked->rt_param.linked_on = entry->cpu;
192 }
193 entry->linked = linked;
194 update_cpu_position(entry);
195}
196
197/* unlink - Make sure a task is not linked any longer to an entry
198 * where it was linked before. Must hold gsnedf_lock.
199 */
200static noinline void unlink(struct task_struct* t)
201{
202 cpu_entry_t *entry;
203
204 if (unlikely(!t)) {
205 TRACE_BUG_ON(!t);
206 return;
207 }
208
209 if (t->rt_param.linked_on != NO_CPU) {
210 /* unlink */
211 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
212 t->rt_param.linked_on = NO_CPU;
213 link_task_to_cpu(NULL, entry);
214 } else if (in_list(&t->rt_list)) {
215 /* This is an interesting situation: t is scheduled,
216 * but was just recently unlinked. It cannot be
217 * linked anywhere else (because then it would have
218 * been relinked to this CPU), thus it must be in some
219 * queue. We must remove it from the list in this
220 * case.
221 */
222 list_del(&t->rt_list);
223 }
224}
225
226
227/* preempt - force a CPU to reschedule
228 */
229static noinline void preempt(cpu_entry_t *entry)
230{
231 /* We cannot make the is_np() decision here if it is a remote CPU
232 * because requesting exit_np() requires that we currently use the
233 * address space of the task. Thus, in the remote case we just send
234 * the IPI and let schedule() handle the problem.
235 */
236
237 if (smp_processor_id() == entry->cpu) {
238 if (entry->scheduled && is_np(entry->scheduled))
239 request_exit_np(entry->scheduled);
240 else
241 set_tsk_need_resched(current);
242 } else
243 /* in case that it is a remote CPU we have to defer the
244 * the decision to the remote CPU
245 * FIXME: We could save a few IPI's here if we leave the flag
246 * set when we are waiting for a np_exit().
247 */
248 if (!test_will_schedule(entry->cpu))
249 smp_send_reschedule(entry->cpu);
250}
251
252/* requeue - Put an unlinked task into gsn-edf domain.
253 * Caller must hold gsnedf_lock.
254 */
255static noinline void requeue(struct task_struct* task)
256{
257 BUG_ON(!task);
258 /* sanity check rt_list before insertion */
259 BUG_ON(in_list(&task->rt_list));
260
261 if (get_rt_flags(task) == RT_F_SLEEP) {
262 /* this task has expired
263 * _schedule has already taken care of updating
264 * the release and
265 * deadline. We just must check if it has been released.
266 */
267 if (is_released(task, sched_clock()))
268 __add_ready(&gsnedf, task);
269 else {
270 /* it has got to wait */
271 __add_release(&gsnedf, task);
272 }
273
274 } else
275 /* this is a forced preemption
276 * thus the task stays in the ready_queue
277 * we only must make it available to others
278 */
279 __add_ready(&gsnedf, task);
280}
281
282/* gsnedf_job_arrival: task is either resumed or released */
283static noinline void gsnedf_job_arrival(struct task_struct* task)
284{
285 cpu_entry_t* last;
286
287 BUG_ON(list_empty(&gsnedf_cpu_queue));
288 BUG_ON(!task);
289
290 /* first queue arriving job */
291 requeue(task);
292
293 /* then check for any necessary preemptions */
294 last = list_entry(gsnedf_cpu_queue.prev, cpu_entry_t, list);
295 if (edf_preemption_needed(&gsnedf, last->linked)) {
296 /* preemption necessary */
297 task = __take_ready(&gsnedf);
298 TRACE("job_arrival: task %d linked to %d\n",
299 task->pid, last->cpu);
300 if (last->linked)
301 requeue(last->linked);
302
303 link_task_to_cpu(task, last);
304 preempt(last);
305 }
306}
307
308/* check for current job releases */
309static noinline void gsnedf_release_jobs(void)
310{
311 struct list_head *pos, *save;
312 struct task_struct *queued;
313 lt_t now = sched_clock();
314
315
316 list_for_each_safe(pos, save, &gsnedf.release_queue) {
317 queued = list_entry(pos, struct task_struct, rt_list);
318 if (likely(is_released(queued, now))) {
319 /* this one is ready to go*/
320 list_del(pos);
321 set_rt_flags(queued, RT_F_RUNNING);
322
323 sched_trace_job_release(queued);
324 gsnedf_job_arrival(queued);
325 }
326 else
327 /* the release queue is ordered */
328 break;
329 }
330}
331
332/* gsnedf_tick - this function is called for every local timer
333 * interrupt.
334 *
335 * checks whether the current task has expired and checks
336 * whether we need to preempt it if it has not expired
337 */
338static void gsnedf_tick(struct task_struct* t)
339{
340 unsigned long flags;
341
342 if (is_realtime(t) && budget_exhausted(t)) {
343 if (!is_np(t)) {
344 /* np tasks will be preempted when they become
345 * preemptable again
346 */
347 set_tsk_need_resched(t);
348 set_will_schedule();
349 TRACE("gsnedf_scheduler_tick: "
350 "%d is preemptable "
351 " => FORCE_RESCHED\n", t->pid);
352 } else {
353 TRACE("gsnedf_scheduler_tick: "
354 "%d is non-preemptable, "
355 "preemption delayed.\n", t->pid);
356 request_exit_np(t);
357 }
358 }
359
360 /* only the first CPU needs to release jobs */
361 /* FIXME: drive this from a hrtimer */
362 if (smp_processor_id() == 0) {
363 spin_lock_irqsave(&gsnedf_lock, flags);
364
365 /* Try to release pending jobs */
366 gsnedf_release_jobs();
367
368 /* We don't need to check linked != scheduled since
369 * set_tsk_need_resched has been set by preempt() if necessary.
370 */
371
372 spin_unlock_irqrestore(&gsnedf_lock, flags);
373 }
374}
375
376/* caller holds gsnedf_lock */
377static noinline void job_completion(struct task_struct *t)
378{
379 BUG_ON(!t);
380
381 sched_trace_job_completion(t);
382
383 TRACE_TASK(t, "job_completion().\n");
384
385 /* set flags */
386 set_rt_flags(t, RT_F_SLEEP);
387 /* prepare for next period */
388 edf_prepare_for_next_period(t);
389 /* unlink */
390 unlink(t);
391 /* requeue
392 * But don't requeue a blocking task. */
393 if (is_running(t))
394 gsnedf_job_arrival(t);
395}
396
397
398/* Getting schedule() right is a bit tricky. schedule() may not make any
399 * assumptions on the state of the current task since it may be called for a
400 * number of reasons. The reasons include a scheduler_tick() determined that it
401 * was necessary, because sys_exit_np() was called, because some Linux
402 * subsystem determined so, or even (in the worst case) because there is a bug
403 * hidden somewhere. Thus, we must take extreme care to determine what the
404 * current state is.
405 *
406 * The CPU could currently be scheduling a task (or not), be linked (or not).
407 *
408 * The following assertions for the scheduled task could hold:
409 *
410 * - !is_running(scheduled) // the job blocks
411 * - scheduled->timeslice == 0 // the job completed (forcefully)
412 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
413 * - linked != scheduled // we need to reschedule (for any reason)
414 * - is_np(scheduled) // rescheduling must be delayed,
415 * sys_exit_np must be requested
416 *
417 * Any of these can occur together.
418 */
419static struct task_struct* gsnedf_schedule(struct task_struct * prev)
420{
421 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
422 int out_of_time, sleep, preempt, np, exists, blocks;
423 struct task_struct* next = NULL;
424
425 /* Will be released in finish_switch. */
426 spin_lock(&gsnedf_lock);
427 clear_will_schedule();
428
429 /* sanity checking */
430 BUG_ON(entry->scheduled && entry->scheduled != prev);
431 BUG_ON(entry->scheduled && !is_realtime(prev));
432 BUG_ON(is_realtime(prev) && !entry->scheduled);
433
434 /* (0) Determine state */
435 exists = entry->scheduled != NULL;
436 blocks = exists && !is_running(entry->scheduled);
437 out_of_time = exists && budget_exhausted(entry->scheduled);
438 np = exists && is_np(entry->scheduled);
439 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
440 preempt = entry->scheduled != entry->linked;
441
442 /* If a task blocks we have no choice but to reschedule.
443 */
444 if (blocks)
445 unlink(entry->scheduled);
446
447 /* Request a sys_exit_np() call if we would like to preempt but cannot.
448 * We need to make sure to update the link structure anyway in case
449 * that we are still linked. Multiple calls to request_exit_np() don't
450 * hurt.
451 */
452 if (np && (out_of_time || preempt || sleep)) {
453 unlink(entry->scheduled);
454 request_exit_np(entry->scheduled);
455 }
456
457 /* Any task that is preemptable and either exhausts its execution
458 * budget or wants to sleep completes. We may have to reschedule after
459 * this.
460 */
461 if (!np && (out_of_time || sleep))
462 job_completion(entry->scheduled);
463
464 /* Link pending task if we became unlinked.
465 */
466 if (!entry->linked)
467 link_task_to_cpu(__take_ready(&gsnedf), entry);
468
469 /* The final scheduling decision. Do we need to switch for some reason?
470 * If linked different from scheduled select linked as next.
471 */
472 if ((!np || blocks) &&
473 entry->linked != entry->scheduled) {
474 /* Schedule a linked job? */
475 if (entry->linked)
476 next = entry->linked;
477 } else
478 /* Only override Linux scheduler if we have real-time task
479 * scheduled that needs to continue.
480 */
481 if (exists)
482 next = prev;
483
484 spin_unlock(&gsnedf_lock);
485
486 /* don't race with a concurrent switch */
487 if (next && prev != next)
488 while (next->rt_param.scheduled_on != NO_CPU)
489 cpu_relax();
490 return next;
491}
492
493
494/* _finish_switch - we just finished the switch away from prev
495 */
496static void gsnedf_finish_switch(struct task_struct *prev)
497{
498 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
499
500 entry->scheduled = is_realtime(current) ? current : NULL;
501
502 prev->rt_param.scheduled_on = NO_CPU;
503 current->rt_param.scheduled_on = smp_processor_id();
504}
505
506
507/* Prepare a task for running in RT mode
508 */
509static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
510{
511 unsigned long flags;
512 cpu_entry_t* entry;
513
514 TRACE("gsn edf: task new %d\n", t->pid);
515
516 spin_lock_irqsave(&gsnedf_lock, flags);
517 if (running) {
518 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
519 BUG_ON(entry->scheduled);
520 entry->scheduled = t;
521 t->rt_param.scheduled_on = task_cpu(t);
522 } else
523 t->rt_param.scheduled_on = NO_CPU;
524 t->rt_param.linked_on = NO_CPU;
525
526 /* setup job params */
527 edf_release_at(t, sched_clock());
528
529 gsnedf_job_arrival(t);
530 spin_unlock_irqrestore(&gsnedf_lock, flags);
531}
532
533static void gsnedf_task_wake_up(struct task_struct *task)
534{
535 unsigned long flags;
536 lt_t now;
537
538 spin_lock_irqsave(&gsnedf_lock, flags);
539 /* We need to take suspensions because of semaphores into
540 * account! If a job resumes after being suspended due to acquiring
541 * a semaphore, it should never be treated as a new job release.
542 */
543 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
544 set_rt_flags(task, RT_F_RUNNING);
545 } else {
546 now = sched_clock();
547 if (is_tardy(task, now)) {
548 /* new sporadic release */
549 edf_release_at(task, now);
550 sched_trace_job_release(task);
551 }
552 else if (task->time_slice)
553 /* came back in time before deadline
554 */
555 set_rt_flags(task, RT_F_RUNNING);
556 }
557 gsnedf_job_arrival(task);
558 spin_unlock_irqrestore(&gsnedf_lock, flags);
559}
560
561static void gsnedf_task_block(struct task_struct *t)
562{
563 unsigned long flags;
564
565 /* unlink if necessary */
566 spin_lock_irqsave(&gsnedf_lock, flags);
567 unlink(t);
568 spin_unlock_irqrestore(&gsnedf_lock, flags);
569
570 BUG_ON(!is_realtime(t));
571 BUG_ON(t->rt_list.next != LIST_POISON1);
572 BUG_ON(t->rt_list.prev != LIST_POISON2);
573}
574
575
576static void gsnedf_task_exit(struct task_struct * t)
577{
578 unsigned long flags;
579
580 /* unlink if necessary */
581 spin_lock_irqsave(&gsnedf_lock, flags);
582 unlink(t);
583 spin_unlock_irqrestore(&gsnedf_lock, flags);
584
585 BUG_ON(!is_realtime(t));
586 TRACE_TASK(t, "RIP\n");
587 BUG_ON(t->rt_list.next != LIST_POISON1);
588 BUG_ON(t->rt_list.prev != LIST_POISON2);
589}
590
591static long gsnedf_pi_block(struct pi_semaphore *sem,
592 struct task_struct *new_waiter)
593{
594 /* This callback has to handle the situation where a new waiter is
595 * added to the wait queue of the semaphore.
596 *
597 * We must check if has a higher priority than the currently
598 * highest-priority task, and then potentially reschedule.
599 */
600
601 BUG_ON(!new_waiter);
602
603 if (edf_higher_prio(new_waiter, sem->hp.task)) {
604 TRACE_TASK(new_waiter, " boosts priority\n");
605 /* called with IRQs disabled */
606 spin_lock(&gsnedf_lock);
607 /* store new highest-priority task */
608 sem->hp.task = new_waiter;
609 if (sem->holder) {
610 /* let holder inherit */
611 sem->holder->rt_param.inh_task = new_waiter;
612 unlink(sem->holder);
613 gsnedf_job_arrival(sem->holder);
614 }
615 spin_unlock(&gsnedf_lock);
616 }
617
618 return 0;
619}
620
621static long gsnedf_inherit_priority(struct pi_semaphore *sem,
622 struct task_struct *new_owner)
623{
624 /* We don't need to acquire the gsnedf_lock since at the time of this
625 * call new_owner isn't actually scheduled yet (it's still sleeping)
626 * and since the calling function already holds sem->wait.lock, which
627 * prevents concurrent sem->hp.task changes.
628 */
629
630 if (sem->hp.task && sem->hp.task != new_owner) {
631 new_owner->rt_param.inh_task = sem->hp.task;
632 TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
633 sem->hp.task->comm, sem->hp.task->pid);
634 } else
635 TRACE_TASK(new_owner,
636 "cannot inherit priority, "
637 "no higher priority job waits.\n");
638 return 0;
639}
640
641/* This function is called on a semaphore release, and assumes that
642 * the current task is also the semaphore holder.
643 */
644static long gsnedf_return_priority(struct pi_semaphore *sem)
645{
646 struct task_struct* t = current;
647 int ret = 0;
648
649 /* Find new highest-priority semaphore task
650 * if holder task is the current hp.task.
651 *
652 * Calling function holds sem->wait.lock.
653 */
654 if (t == sem->hp.task)
655 edf_set_hp_task(sem);
656
657 TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
658
659 if (t->rt_param.inh_task) {
660 /* interrupts already disabled by PI code */
661 spin_lock(&gsnedf_lock);
662
663 /* Reset inh_task to NULL. */
664 t->rt_param.inh_task = NULL;
665
666 /* Check if rescheduling is necessary */
667 unlink(t);
668 gsnedf_job_arrival(t);
669 spin_unlock(&gsnedf_lock);
670 }
671
672 return ret;
673}
674
675static long gsnedf_admit_task(struct task_struct* tsk)
676{
677 return 0;
678}
679
680
681/* Plugin object */
682static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
683 .plugin_name = "GSN-EDF",
684 .finish_switch = gsnedf_finish_switch,
685 .tick = gsnedf_tick,
686 .task_new = gsnedf_task_new,
687 .complete_job = edf_complete_job,
688 .task_exit = gsnedf_task_exit,
689 .schedule = gsnedf_schedule,
690 .task_wake_up = gsnedf_task_wake_up,
691 .task_block = gsnedf_task_block,
692 .pi_block = gsnedf_pi_block,
693 .inherit_priority = gsnedf_inherit_priority,
694 .return_priority = gsnedf_return_priority,
695 .admit_task = gsnedf_admit_task
696};
697
698
699static int __init init_gsn_edf(void)
700{
701 int cpu;
702 cpu_entry_t *entry;
703
704 /* initialize CPU state */
705 for (cpu = 0; cpu < NR_CPUS; cpu++) {
706 entry = &per_cpu(gsnedf_cpu_entries, cpu);
707 atomic_set(&entry->will_schedule, 0);
708 entry->linked = NULL;
709 entry->scheduled = NULL;
710 entry->cpu = cpu;
711 INIT_LIST_HEAD(&entry->list);
712 }
713
714 edf_domain_init(&gsnedf, NULL);
715 return register_sched_plugin(&gsn_edf_plugin);
716}
717
718
719module_init(init_gsn_edf);
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
new file mode 100644
index 0000000000..89ae3941db
--- /dev/null
+++ b/litmus/sched_litmus.c
@@ -0,0 +1,149 @@
1/* This file is included from kernel/sched.c */
2
3#include <litmus/litmus.h>
4#include <litmus/sched_plugin.h>
5
6static void update_time_litmus(struct rq *rq, struct task_struct *p)
7{
8 lt_t now = sched_clock();
9 p->rt_param.job_params.exec_time +=
10 now - p->rt_param.job_params.exec_start;
11 p->rt_param.job_params.exec_start = now;
12}
13
14static void double_rq_lock(struct rq *rq1, struct rq *rq2);
15static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
16
17static void litmus_tick(struct rq *rq, struct task_struct *p)
18{
19 if (is_realtime(p))
20 update_time_litmus(rq, p);
21 litmus->tick(p);
22}
23
24static void litmus_schedule(struct rq *rq, struct task_struct *prev)
25{
26 struct rq* other_rq;
27 int success = 0;
28 /* WARNING: rq is _not_ locked! */
29 if (is_realtime(prev))
30 update_time_litmus(rq, prev);
31
32 while (!success) {
33 /* let the plugin schedule */
34 rq->litmus_next = litmus->schedule(prev);
35
36 /* check if a global plugin pulled a task from a different RQ */
37 if (rq->litmus_next && task_rq(rq->litmus_next) != rq) {
38 /* we need to migrate the task */
39 other_rq = task_rq(rq->litmus_next);
40 double_rq_lock(rq, other_rq);
41 /* now that we have the lock we need to make sure a
42 * couple of things still hold:
43 * - it is still a real-time task
44 * - it is still runnable (could have been stopped)
45 */
46 if (is_realtime(rq->litmus_next) &&
47 is_running(rq->litmus_next)) {
48 set_task_cpu(rq->litmus_next, smp_processor_id());
49 success = 1;
50 } /* else something raced, retry */
51 double_rq_unlock(rq, other_rq);
52 } else
53 success = 1;
54 }
55}
56
57static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, int wakeup)
58{
59 if (wakeup)
60 litmus->task_wake_up(p);
61}
62
63static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep)
64{
65 if (sleep)
66 litmus->task_block(p);
67}
68
69static void yield_task_litmus(struct rq *rq)
70{
71 BUG_ON(rq->curr != current);
72 litmus->complete_job();
73}
74
75/* Plugins are responsible for this.
76 */
77static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p)
78{
79}
80
81/* has already been taken care of */
82static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
83{
84}
85
86static struct task_struct *pick_next_task_litmus(struct rq *rq)
87{
88 struct task_struct* picked = rq->litmus_next;
89 rq->litmus_next = NULL;
90 if (picked)
91 picked->rt_param.job_params.exec_start = sched_clock();
92 return picked;
93}
94
95static void task_tick_litmus(struct rq *rq, struct task_struct *p)
96{
97}
98
99/* This is called when a task became a real-time task, either due
100 * to a SCHED_* class transition or due to PI mutex inheritance.\
101 * We don't handle Linux PI mutex inheritance yet. Use LITMUS provided
102 * synchronization primitives instead.
103 */
104static void set_curr_task_litmus(struct rq *rq)
105{
106 rq->curr->rt_param.job_params.exec_start = sched_clock();
107}
108
109
110#ifdef CONFIG_SMP
111
112/* we don't repartition at runtime */
113
114static unsigned long
115load_balance_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
116 unsigned long max_load_move,
117 struct sched_domain *sd, enum cpu_idle_type idle,
118 int *all_pinned, int *this_best_prio)
119{
120 return 0;
121}
122
123static int
124move_one_task_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
125 struct sched_domain *sd, enum cpu_idle_type idle)
126{
127 return 0;
128}
129#endif
130
131const struct sched_class litmus_sched_class = {
132 .next = &rt_sched_class,
133 .enqueue_task = enqueue_task_litmus,
134 .dequeue_task = dequeue_task_litmus,
135 .yield_task = yield_task_litmus,
136
137 .check_preempt_curr = check_preempt_curr_litmus,
138
139 .pick_next_task = pick_next_task_litmus,
140 .put_prev_task = put_prev_task_litmus,
141
142#ifdef CONFIG_SMP
143 .load_balance = load_balance_litmus,
144 .move_one_task = move_one_task_litmus,
145#endif
146
147 .set_curr_task = set_curr_task_litmus,
148 .task_tick = task_tick_litmus,
149};
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 0000000000..f7eb116ee4
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,174 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin and some dummy functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9
10#include <litmus/litmus.h>
11#include <litmus/sched_plugin.h>
12
13
14/*************************************************************
15 * Dummy plugin functions *
16 *************************************************************/
17
18static void litmus_dummy_finish_switch(struct task_struct * prev)
19{
20}
21
22static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
23{
24 return NULL;
25}
26
27static void litmus_dummy_tick(struct task_struct* tsk)
28{
29}
30
31static long litmus_dummy_admit_task(struct task_struct* tsk)
32{
33 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
34 tsk->comm, tsk->pid);
35 return -EINVAL;
36}
37
38static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
39{
40}
41
42static void litmus_dummy_task_wake_up(struct task_struct *task)
43{
44}
45
46static void litmus_dummy_task_block(struct task_struct *task)
47{
48}
49
50static void litmus_dummy_task_exit(struct task_struct *task)
51{
52}
53
54static long litmus_dummy_complete_job(void)
55{
56 return -ENOSYS;
57}
58
59static long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
60 struct task_struct *new_owner)
61{
62 return -ENOSYS;
63}
64
65static long litmus_dummy_return_priority(struct pi_semaphore *sem)
66{
67 return -ENOSYS;
68}
69
70static long litmus_dummy_pi_block(struct pi_semaphore *sem,
71 struct task_struct *new_waiter)
72{
73 return -ENOSYS;
74}
75
76
77
78/* The default scheduler plugin. It doesn't do anything and lets Linux do its
79 * job.
80 */
81struct sched_plugin linux_sched_plugin = {
82 .plugin_name = "Linux",
83 .tick = litmus_dummy_tick,
84 .task_new = litmus_dummy_task_new,
85 .task_exit = litmus_dummy_task_exit,
86 .task_wake_up = litmus_dummy_task_wake_up,
87 .task_block = litmus_dummy_task_block,
88 .complete_job = litmus_dummy_complete_job,
89 .schedule = litmus_dummy_schedule,
90 .finish_switch = litmus_dummy_finish_switch,
91 .inherit_priority = litmus_dummy_inherit_priority,
92 .return_priority = litmus_dummy_return_priority,
93 .pi_block = litmus_dummy_pi_block,
94 .admit_task = litmus_dummy_admit_task
95};
96
97/*
98 * The reference to current plugin that is used to schedule tasks within
99 * the system. It stores references to actual function implementations
100 * Should be initialized by calling "init_***_plugin()"
101 */
102struct sched_plugin *litmus = &linux_sched_plugin;
103
104/* the list of registered scheduling plugins */
105static LIST_HEAD(sched_plugins);
106static DEFINE_SPINLOCK(sched_plugins_lock);
107
108#define CHECK(func) {\
109 if (!plugin->func) \
110 plugin->func = litmus_dummy_ ## func;}
111
112/* FIXME: get reference to module */
113int register_sched_plugin(struct sched_plugin* plugin)
114{
115 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
116 plugin->plugin_name);
117
118 /* make sure we don't trip over null pointers later */
119 CHECK(finish_switch);
120 CHECK(schedule);
121 CHECK(tick);
122 CHECK(task_wake_up);
123 CHECK(task_exit);
124 CHECK(task_block);
125 CHECK(task_new);
126 CHECK(complete_job);
127 CHECK(inherit_priority);
128 CHECK(return_priority);
129 CHECK(pi_block);
130 CHECK(admit_task);
131
132 spin_lock(&sched_plugins_lock);
133 list_add(&plugin->list, &sched_plugins);
134 spin_unlock(&sched_plugins_lock);
135
136 return 0;
137}
138
139
140/* FIXME: reference counting, etc. */
141struct sched_plugin* find_sched_plugin(const char* name)
142{
143 struct list_head *pos;
144 struct sched_plugin *plugin;
145
146 spin_lock(&sched_plugins_lock);
147 list_for_each(pos, &sched_plugins) {
148 plugin = list_entry(pos, struct sched_plugin, list);
149 if (!strcmp(plugin->plugin_name, name))
150 goto out_unlock;
151 }
152 plugin = NULL;
153
154out_unlock:
155 spin_unlock(&sched_plugins_lock);
156 return plugin;
157}
158
159int print_sched_plugins(char* buf, int max)
160{
161 int count = 0;
162 struct list_head *pos;
163 struct sched_plugin *plugin;
164
165 spin_lock(&sched_plugins_lock);
166 list_for_each(pos, &sched_plugins) {
167 plugin = list_entry(pos, struct sched_plugin, list);
168 count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
169 if (max - count <= 0)
170 break;
171 }
172 spin_unlock(&sched_plugins_lock);
173 return count;
174}
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 0000000000..961680d0a6
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,440 @@
1
2/*
3 * kernel/sched_psn_edf.c
4 *
5 * Implementation of the PSN-EDF scheduler plugin.
6 * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
7 *
8 * Suspensions and non-preemptable sections are supported.
9 * Priority inheritance is not supported.
10 */
11
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16
17#include <linux/module.h>
18
19#include <litmus/litmus.h>
20#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h>
22
23
24typedef struct {
25 rt_domain_t domain;
26 int cpu;
27 struct task_struct* scheduled; /* only RT tasks */
28 spinlock_t lock; /* protects the domain and
29 * serializes scheduling decisions
30 */
31} psnedf_domain_t;
32
33DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
34
35#define local_edf (&__get_cpu_var(psnedf_domains).domain)
36#define local_pedf (&__get_cpu_var(psnedf_domains))
37#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
38#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
39#define task_edf(task) remote_edf(get_partition(task))
40#define task_pedf(task) remote_pedf(get_partition(task))
41
42
43static void psnedf_domain_init(psnedf_domain_t* pedf,
44 check_resched_needed_t check,
45 int cpu)
46{
47 edf_domain_init(&pedf->domain, check);
48 pedf->cpu = cpu;
49 pedf->lock = SPIN_LOCK_UNLOCKED;
50 pedf->scheduled = NULL;
51}
52
53static void requeue(struct task_struct* t, rt_domain_t *edf)
54{
55 /* only requeue if t is actually running */
56 BUG_ON(!is_running(t));
57
58 if (t->state != TASK_RUNNING)
59 TRACE_TASK(t, "requeue: !TASK_RUNNING");
60
61 set_rt_flags(t, RT_F_RUNNING);
62 if (is_released(t, sched_clock()))
63 __add_ready(edf, t);
64 else
65 __add_release(edf, t); /* it has got to wait */
66}
67
68/* we assume the lock is being held */
69static void preempt(psnedf_domain_t *pedf)
70{
71 if (smp_processor_id() == pedf->cpu) {
72 if (pedf->scheduled && is_np(pedf->scheduled))
73 request_exit_np(pedf->scheduled);
74 else
75 set_tsk_need_resched(current);
76 } else
77 /* in case that it is a remote CPU we have to defer the
78 * the decision to the remote CPU
79 */
80 smp_send_reschedule(pedf->cpu);
81}
82
83/* This check is trivial in partioned systems as we only have to consider
84 * the CPU of the partition.
85 */
86static int psnedf_check_resched(rt_domain_t *edf)
87{
88 psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
89 int ret = 0;
90
91 /* because this is a callback from rt_domain_t we already hold
92 * the necessary lock for the ready queue
93 */
94 if (edf_preemption_needed(edf, pedf->scheduled)) {
95 preempt(pedf);
96 ret = 1;
97 }
98 return ret;
99}
100
101
102static void psnedf_tick(struct task_struct *t)
103{
104 unsigned long flags;
105 rt_domain_t *edf = local_edf;
106 psnedf_domain_t *pedf = local_pedf;
107
108 /* Check for inconsistency. We don't need the lock for this since
109 * ->scheduled is only changed in schedule, which obviously is not
110 * executing in parallel on this CPU
111 */
112 BUG_ON(is_realtime(t) && t != pedf->scheduled);
113
114 if (is_realtime(t) && budget_exhausted(t)) {
115 if (!is_np(t))
116 set_tsk_need_resched(t);
117 else {
118 TRACE("psnedf_scheduler_tick: "
119 "%d is non-preemptable, "
120 "preemption delayed.\n", t->pid);
121 request_exit_np(t);
122 }
123 }
124
125 spin_lock_irqsave(&pedf->lock, flags);
126 /* FIXME: release via hrtimer */
127 __release_pending(edf);
128 spin_unlock_irqrestore(&pedf->lock, flags);
129}
130
131static void job_completion(struct task_struct* t)
132{
133 TRACE_TASK(t, "job_completion().\n");
134 set_rt_flags(t, RT_F_SLEEP);
135 edf_prepare_for_next_period(t);
136}
137
138static struct task_struct* psnedf_schedule(struct task_struct * prev)
139{
140 psnedf_domain_t* pedf = local_pedf;
141 rt_domain_t* edf = &pedf->domain;
142 struct task_struct* next;
143
144 int out_of_time, sleep, preempt,
145 np, exists, blocks, resched;
146
147 spin_lock(&pedf->lock);
148
149 /* sanity checking */
150 BUG_ON(pedf->scheduled && pedf->scheduled != prev);
151 BUG_ON(pedf->scheduled && !is_realtime(prev));
152
153 /* (0) Determine state */
154 exists = pedf->scheduled != NULL;
155 blocks = exists && !is_running(pedf->scheduled);
156 out_of_time = exists && budget_exhausted(pedf->scheduled);
157 np = exists && is_np(pedf->scheduled);
158 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
159 preempt = edf_preemption_needed(edf, prev);
160
161 /* If we need to preempt do so.
162 * The following checks set resched to 1 in case of special
163 * circumstances.
164 */
165 resched = preempt;
166
167 /* If a task blocks we have no choice but to reschedule.
168 */
169 if (blocks)
170 resched = 1;
171
172 /* Request a sys_exit_np() call if we would like to preempt but cannot.
173 * Multiple calls to request_exit_np() don't hurt.
174 */
175 if (np && (out_of_time || preempt || sleep))
176 request_exit_np(pedf->scheduled);
177
178 /* Any task that is preemptable and either exhausts its execution
179 * budget or wants to sleep completes. We may have to reschedule after
180 * this.
181 */
182 if (!np && (out_of_time || sleep)) {
183 job_completion(pedf->scheduled);
184 resched = 1;
185 }
186
187 /* The final scheduling decision. Do we need to switch for some reason?
188 * Switch if we are in RT mode and have no task or if we need to
189 * resched.
190 */
191 next = NULL;
192 if ((!np || blocks) && (resched || !exists)) {
193 /* Take care of a previously scheduled
194 * job by taking it out of the Linux runqueue.
195 */
196 if (pedf->scheduled && !blocks)
197 requeue(pedf->scheduled, edf);
198 next = __take_ready(edf);
199 } else
200 /* Only override Linux scheduler if we have a real-time task
201 * scheduled that needs to continue.
202 */
203 if (exists)
204 next = prev;
205
206 if (next)
207 set_rt_flags(next, RT_F_RUNNING);
208
209 pedf->scheduled = next;
210 spin_unlock(&pedf->lock);
211 return next;
212}
213
214
215/* Prepare a task for running in RT mode
216 * Enqueues the task into master queue data structure
217 */
218static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
219{
220 rt_domain_t* edf = task_edf(t);
221 psnedf_domain_t* pedf = task_pedf(t);
222 unsigned long flags;
223
224 TRACE("[%d] psn edf: prepare new %d on CPU %d\n",
225 smp_processor_id(), t->pid, get_partition(t));
226
227 /* setup job parameters */
228 edf_release_at(t, sched_clock());
229
230 /* The task should be running in the queue, otherwise signal
231 * code will try to wake it up with fatal consequences.
232 */
233 spin_lock_irqsave(&pedf->lock, flags);
234 if (running) {
235 /* there shouldn't be anything else running at the time */
236 BUG_ON(pedf->scheduled);
237 pedf->scheduled = t;
238 } else {
239 requeue(t, edf);
240 /* maybe we have to reschedule */
241 preempt(pedf);
242 }
243 spin_unlock_irqrestore(&pedf->lock, flags);
244}
245
246static void psnedf_task_wake_up(struct task_struct *task)
247{
248 unsigned long flags;
249 psnedf_domain_t* pedf = task_pedf(task);
250 rt_domain_t* edf = task_edf(task);
251 lt_t now;
252
253 spin_lock_irqsave(&pedf->lock, flags);
254 BUG_ON(in_list(&task->rt_list));
255 /* We need to take suspensions because of semaphores into
256 * account! If a job resumes after being suspended due to acquiring
257 * a semaphore, it should never be treated as a new job release.
258 *
259 * FIXME: This should be done in some more predictable and userspace-controlled way.
260 */
261 now = sched_clock();
262 if (is_tardy(task, now) &&
263 get_rt_flags(task) != RT_F_EXIT_SEM) {
264 /* new sporadic release */
265 edf_release_at(task, now);
266 sched_trace_job_release(task);
267 }
268 requeue(task, edf);
269 spin_unlock_irqrestore(&pedf->lock, flags);
270}
271
272static void psnedf_task_block(struct task_struct *t)
273{
274 /* only running tasks can block, thus t is in no queue */
275 BUG_ON(!is_realtime(t));
276 BUG_ON(in_list(&t->rt_list));
277}
278
279static void psnedf_task_exit(struct task_struct * t)
280{
281 unsigned long flags;
282 psnedf_domain_t* pedf = task_pedf(t);
283
284 spin_lock_irqsave(&pedf->lock, flags);
285
286 if (in_list(&t->rt_list))
287 /* dequeue */
288 list_del(&t->rt_list);
289 preempt(pedf);
290 spin_unlock_irqrestore(&pedf->lock, flags);
291}
292
293static long psnedf_pi_block(struct pi_semaphore *sem,
294 struct task_struct *new_waiter)
295{
296 psnedf_domain_t* pedf;
297 rt_domain_t* edf;
298 struct task_struct* t;
299 int cpu = get_partition(new_waiter);
300
301 BUG_ON(!new_waiter);
302
303 if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
304 TRACE_TASK(new_waiter, " boosts priority\n");
305 pedf = task_pedf(new_waiter);
306 edf = task_edf(new_waiter);
307
308 /* interrupts already disabled */
309 spin_lock(&pedf->lock);
310
311 /* store new highest-priority task */
312 sem->hp.cpu_task[cpu] = new_waiter;
313 if (sem->holder &&
314 get_partition(sem->holder) == get_partition(new_waiter)) {
315 /* let holder inherit */
316 sem->holder->rt_param.inh_task = new_waiter;
317 t = sem->holder;
318 if (in_list(&t->rt_list)) {
319 /* queued in domain*/
320 list_del(&t->rt_list);
321 /* readd to make priority change take place */
322 if (is_released(t, sched_clock()))
323 __add_ready(edf, t);
324 else
325 __add_release(edf, t);
326 }
327 }
328
329 /* check if we need to reschedule */
330 if (edf_preemption_needed(edf, current))
331 preempt(pedf);
332
333 spin_unlock(&pedf->lock);
334 }
335
336 return 0;
337}
338
339static long psnedf_inherit_priority(struct pi_semaphore *sem,
340 struct task_struct *new_owner)
341{
342 int cpu = get_partition(new_owner);
343
344 new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
345 if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
346 TRACE_TASK(new_owner,
347 "inherited priority from %s/%d\n",
348 sem->hp.cpu_task[cpu]->comm,
349 sem->hp.cpu_task[cpu]->pid);
350 } else
351 TRACE_TASK(new_owner,
352 "cannot inherit priority: "
353 "no higher priority job waits on this CPU!\n");
354 /* make new owner non-preemptable as required by FMLP under
355 * PSN-EDF.
356 */
357 make_np(new_owner);
358 return 0;
359}
360
361
362/* This function is called on a semaphore release, and assumes that
363 * the current task is also the semaphore holder.
364 */
365static long psnedf_return_priority(struct pi_semaphore *sem)
366{
367 struct task_struct* t = current;
368 psnedf_domain_t* pedf = task_pedf(t);
369 rt_domain_t* edf = task_edf(t);
370 int ret = 0;
371 int cpu = get_partition(current);
372
373
374 /* Find new highest-priority semaphore task
375 * if holder task is the current hp.cpu_task[cpu].
376 *
377 * Calling function holds sem->wait.lock.
378 */
379 if (t == sem->hp.cpu_task[cpu])
380 edf_set_hp_cpu_task(sem, cpu);
381
382 take_np(t);
383 if (current->rt_param.inh_task) {
384 TRACE_CUR("return priority of %s/%d\n",
385 current->rt_param.inh_task->comm,
386 current->rt_param.inh_task->pid);
387 spin_lock(&pedf->lock);
388
389 /* Reset inh_task to NULL. */
390 current->rt_param.inh_task = NULL;
391
392 /* check if we need to reschedule */
393 if (edf_preemption_needed(edf, current))
394 preempt(pedf);
395
396 spin_unlock(&pedf->lock);
397 } else
398 TRACE_CUR(" no priority to return %p\n", sem);
399
400 return ret;
401}
402
403
404static long psnedf_admit_task(struct task_struct* tsk)
405{
406 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
407}
408
409/* Plugin object */
410static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
411 .plugin_name = "PSN-EDF",
412 .tick = psnedf_tick,
413 .task_new = psnedf_task_new,
414 .complete_job = edf_complete_job,
415 .task_exit = psnedf_task_exit,
416 .schedule = psnedf_schedule,
417 .task_wake_up = psnedf_task_wake_up,
418 .task_block = psnedf_task_block,
419 .pi_block = psnedf_pi_block,
420 .inherit_priority = psnedf_inherit_priority,
421 .return_priority = psnedf_return_priority,
422 .admit_task = psnedf_admit_task
423};
424
425
426static int __init init_psn_edf(void)
427{
428 int i;
429
430 for (i = 0; i < NR_CPUS; i++)
431 {
432 psnedf_domain_init(remote_pedf(i),
433 psnedf_check_resched, i);
434 }
435 return register_sched_plugin(&psn_edf_plugin);
436}
437
438
439
440module_init(init_psn_edf);
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
new file mode 100644
index 0000000000..0976e830ad
--- /dev/null
+++ b/litmus/sched_trace.c
@@ -0,0 +1,541 @@
1/* sched_trace.c -- record scheduling events to a byte stream.
2 *
3 * TODO: Move ring buffer to a lockfree implementation.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/fs.h>
8#include <linux/cdev.h>
9#include <asm/semaphore.h>
10#include <asm/uaccess.h>
11#include <linux/module.h>
12
13#include <litmus/sched_trace.h>
14#include <litmus/litmus.h>
15
16
17typedef struct {
18 /* guard read and write pointers */
19 spinlock_t lock;
20 /* guard against concurrent freeing of buffer */
21 rwlock_t del_lock;
22
23 /* memory allocated for ring buffer */
24 unsigned long order;
25 char* buf;
26 char* end;
27
28 /* Read/write pointer. May not cross.
29 * They point to the position of next write and
30 * last read.
31 */
32 char* writep;
33 char* readp;
34
35} ring_buffer_t;
36
37#define EMPTY_RING_BUFFER { \
38 .lock = SPIN_LOCK_UNLOCKED, \
39 .del_lock = RW_LOCK_UNLOCKED, \
40 .buf = NULL, \
41 .end = NULL, \
42 .writep = NULL, \
43 .readp = NULL \
44}
45
46void rb_init(ring_buffer_t* buf)
47{
48 *buf = (ring_buffer_t) EMPTY_RING_BUFFER;
49}
50
51int rb_alloc_buf(ring_buffer_t* buf, unsigned long order)
52{
53 unsigned long flags;
54 int error = 0;
55 char *mem;
56
57 /* do memory allocation while not atomic */
58 mem = (char *) __get_free_pages(GFP_KERNEL, order);
59 if (!mem)
60 return -ENOMEM;
61 write_lock_irqsave(&buf->del_lock, flags);
62 BUG_ON(buf->buf);
63 buf->buf = mem;
64 buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1;
65 memset(buf->buf, 0xff, buf->end - buf->buf);
66 buf->order = order;
67 buf->writep = buf->buf + 1;
68 buf->readp = buf->buf;
69 write_unlock_irqrestore(&buf->del_lock, flags);
70 return error;
71}
72
73int rb_free_buf(ring_buffer_t* buf)
74{
75 unsigned long flags;
76 int error = 0;
77 write_lock_irqsave(&buf->del_lock, flags);
78 BUG_ON(!buf->buf);
79 free_pages((unsigned long) buf->buf, buf->order);
80 buf->buf = NULL;
81 buf->end = NULL;
82 buf->writep = NULL;
83 buf->readp = NULL;
84 write_unlock_irqrestore(&buf->del_lock, flags);
85 return error;
86}
87
88/* Assumption: concurrent writes are serialized externally
89 *
90 * Will only succeed if there is enough space for all len bytes.
91 */
92int rb_put(ring_buffer_t* buf, char* mem, size_t len)
93{
94 unsigned long flags;
95 char* r , *w;
96 int error = 0;
97 read_lock_irqsave(&buf->del_lock, flags);
98 if (!buf->buf) {
99 error = -ENODEV;
100 goto out;
101 }
102 spin_lock(&buf->lock);
103 r = buf->readp;
104 w = buf->writep;
105 spin_unlock(&buf->lock);
106 if (r < w && buf->end - w >= len - 1) {
107 /* easy case: there is enough space in the buffer
108 * to write it in one continous chunk*/
109 memcpy(w, mem, len);
110 w += len;
111 if (w > buf->end)
112 /* special case: fit exactly into buffer
113 * w is now buf->end + 1
114 */
115 w = buf->buf;
116 } else if (w < r && r - w >= len) { /* >= len because may not cross */
117 /* we are constrained by the read pointer but we there
118 * is enough space
119 */
120 memcpy(w, mem, len);
121 w += len;
122 } else if (r <= w && buf->end - w < len - 1) {
123 /* the wrap around case: there may or may not be space */
124 if ((buf->end - w) + (r - buf->buf) >= len - 1) {
125 /* copy chunk that fits at the end */
126 memcpy(w, mem, buf->end - w + 1);
127 mem += buf->end - w + 1;
128 len -= (buf->end - w + 1);
129 w = buf->buf;
130 /* copy the rest */
131 memcpy(w, mem, len);
132 w += len;
133 }
134 else
135 error = -ENOMEM;
136 } else {
137 error = -ENOMEM;
138 }
139 if (!error) {
140 spin_lock(&buf->lock);
141 buf->writep = w;
142 spin_unlock(&buf->lock);
143 }
144 out:
145 read_unlock_irqrestore(&buf->del_lock, flags);
146 return error;
147}
148
149/* Assumption: concurrent reads are serialized externally */
150int rb_get(ring_buffer_t* buf, char* mem, size_t len)
151{
152 unsigned long flags;
153 char* r , *w;
154 int error = 0;
155 read_lock_irqsave(&buf->del_lock, flags);
156 if (!buf->buf) {
157 error = -ENODEV;
158 goto out;
159 }
160 spin_lock(&buf->lock);
161 r = buf->readp;
162 w = buf->writep;
163 spin_unlock(&buf->lock);
164
165 if (w <= r && buf->end - r >= len) {
166 /* easy case: there is enough data in the buffer
167 * to get it in one chunk*/
168 memcpy(mem, r + 1, len);
169 r += len;
170 error = len;
171
172 } else if (r + 1 < w && w - r - 1 >= len) {
173 /* we are constrained by the write pointer but
174 * there is enough data
175 */
176 memcpy(mem, r + 1, len);
177 r += len;
178 error = len;
179
180 } else if (r + 1 < w && w - r - 1 < len) {
181 /* we are constrained by the write pointer and there
182 * there is not enough data
183 */
184 memcpy(mem, r + 1, w - r - 1);
185 error = w - r - 1;
186 r += w - r - 1;
187
188 } else if (w <= r && buf->end - r < len) {
189 /* the wrap around case: there may or may not be enough data
190 * first let's get what is available
191 */
192 memcpy(mem, r + 1, buf->end - r);
193 error += (buf->end - r);
194 mem += (buf->end - r);
195 len -= (buf->end - r);
196 r += (buf->end - r);
197
198 if (w > buf->buf) {
199 /* there is more to get */
200 r = buf->buf - 1;
201 if (w - r >= len) {
202 /* plenty */
203 memcpy(mem, r + 1, len);
204 error += len;
205 r += len;
206 } else {
207 memcpy(mem, r + 1, w - r - 1);
208 error += w - r - 1;
209 r += w - r - 1;
210 }
211 }
212 } /* nothing available */
213
214 if (error > 0) {
215 spin_lock(&buf->lock);
216 buf->readp = r;
217 spin_unlock(&buf->lock);
218 }
219 out:
220 read_unlock_irqrestore(&buf->del_lock, flags);
221 return error;
222}
223
224
225
226/******************************************************************************/
227/* DEVICE FILE DRIVER */
228/******************************************************************************/
229
230
231
232/* Allocate a buffer of about 1 MB per CPU.
233 *
234 */
235#define BUFFER_ORDER 8
236
237typedef struct {
238 ring_buffer_t buf;
239 atomic_t reader_cnt;
240 struct semaphore reader_mutex;
241} trace_buffer_t;
242
243
244/* This does not initialize the semaphore!! */
245
246#define EMPTY_TRACE_BUFFER \
247 { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)}
248
249static DEFINE_PER_CPU(trace_buffer_t, trace_buffer);
250
251#ifdef CONFIG_SCHED_DEBUG_TRACE
252static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED;
253#endif
254static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER;
255
256static void init_buffers(void)
257{
258 int i;
259
260 for (i = 0; i < NR_CPUS; i++) {
261 rb_init(&per_cpu(trace_buffer, i).buf);
262 init_MUTEX(&per_cpu(trace_buffer, i).reader_mutex);
263 atomic_set(&per_cpu(trace_buffer, i).reader_cnt, 0);
264 }
265 /* only initialize the mutex, the rest was initialized as part
266 * of the static initialization macro
267 */
268 init_MUTEX(&log_buffer.reader_mutex);
269}
270
271static int trace_release(struct inode *in, struct file *filp)
272{
273 int error = -EINVAL;
274 trace_buffer_t* buf = filp->private_data;
275
276 BUG_ON(!filp->private_data);
277
278 if (down_interruptible(&buf->reader_mutex)) {
279 error = -ERESTARTSYS;
280 goto out;
281 }
282
283 /* last release must deallocate buffers */
284 if (atomic_dec_return(&buf->reader_cnt) == 0) {
285 error = rb_free_buf(&buf->buf);
286 }
287
288 up(&buf->reader_mutex);
289 out:
290 return error;
291}
292
293static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
294 loff_t *f_pos)
295{
296 /* we ignore f_pos, this is strictly sequential */
297
298 ssize_t error = -EINVAL;
299 char* mem;
300 trace_buffer_t *buf = filp->private_data;
301
302 if (down_interruptible(&buf->reader_mutex)) {
303 error = -ERESTARTSYS;
304 goto out;
305 }
306
307 if (len > 64 * 1024)
308 len = 64 * 1024;
309 mem = kmalloc(len, GFP_KERNEL);
310 if (!mem) {
311 error = -ENOMEM;
312 goto out_unlock;
313 }
314
315 error = rb_get(&buf->buf, mem, len);
316 while (!error) {
317 set_current_state(TASK_INTERRUPTIBLE);
318 schedule_timeout(110);
319 if (signal_pending(current))
320 error = -ERESTARTSYS;
321 else
322 error = rb_get(&buf->buf, mem, len);
323 }
324
325 if (error > 0 && copy_to_user(to, mem, error))
326 error = -EFAULT;
327
328 kfree(mem);
329 out_unlock:
330 up(&buf->reader_mutex);
331 out:
332 return error;
333}
334
335
336/* trace_open - Open one of the per-CPU sched_trace buffers.
337 */
338static int trace_open(struct inode *in, struct file *filp)
339{
340 int error = -EINVAL;
341 int cpu = MINOR(in->i_rdev);
342 trace_buffer_t* buf;
343
344 if (!cpu_online(cpu)) {
345 printk(KERN_WARNING "sched trace: "
346 "CPU #%d is not online. (open failed)\n", cpu);
347 error = -ENODEV;
348 goto out;
349 }
350
351 buf = &per_cpu(trace_buffer, cpu);
352
353 if (down_interruptible(&buf->reader_mutex)) {
354 error = -ERESTARTSYS;
355 goto out;
356 }
357
358 /* first open must allocate buffers */
359 if (atomic_inc_return(&buf->reader_cnt) == 1) {
360 if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
361 {
362 atomic_dec(&buf->reader_cnt);
363 goto out_unlock;
364 }
365 }
366
367 error = 0;
368 filp->private_data = buf;
369
370 out_unlock:
371 up(&buf->reader_mutex);
372 out:
373 return error;
374}
375
376/* log_open - open the global log message ring buffer.
377 */
378static int log_open(struct inode *in, struct file *filp)
379{
380 int error = -EINVAL;
381 trace_buffer_t* buf;
382
383 buf = &log_buffer;
384
385 if (down_interruptible(&buf->reader_mutex)) {
386 error = -ERESTARTSYS;
387 goto out;
388 }
389
390 /* first open must allocate buffers */
391 if (atomic_inc_return(&buf->reader_cnt) == 1) {
392 if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
393 {
394 atomic_dec(&buf->reader_cnt);
395 goto out_unlock;
396 }
397 }
398
399 error = 0;
400 filp->private_data = buf;
401
402 out_unlock:
403 up(&buf->reader_mutex);
404 out:
405 return error;
406}
407
408/******************************************************************************/
409/* Device Registration */
410/******************************************************************************/
411
412/* the major numbes are from the unassigned/local use block
413 *
414 * This should be converted to dynamic allocation at some point...
415 */
416#define TRACE_MAJOR 250
417#define LOG_MAJOR 251
418
419/* trace_fops - The file operations for accessing the per-CPU scheduling event
420 * trace buffers.
421 */
422struct file_operations trace_fops = {
423 .owner = THIS_MODULE,
424 .open = trace_open,
425 .release = trace_release,
426 .read = trace_read,
427};
428
429/* log_fops - The file operations for accessing the global LITMUS log message
430 * buffer.
431 *
432 * Except for opening the device file it uses the same operations as trace_fops.
433 */
434struct file_operations log_fops = {
435 .owner = THIS_MODULE,
436 .open = log_open,
437 .release = trace_release,
438 .read = trace_read,
439};
440
441static int __init register_buffer_dev(const char* name,
442 struct file_operations* fops,
443 int major, int count)
444{
445 dev_t trace_dev;
446 struct cdev *cdev;
447 int error = 0;
448
449 trace_dev = MKDEV(major, 0);
450 error = register_chrdev_region(trace_dev, count, name);
451 if (error)
452 {
453 printk(KERN_WARNING "sched trace: "
454 "Could not register major/minor number %d\n", major);
455 return error;
456 }
457 cdev = cdev_alloc();
458 if (!cdev) {
459 printk(KERN_WARNING "sched trace: "
460 "Could not get a cdev for %s.\n", name);
461 return -ENOMEM;
462 }
463 cdev->owner = THIS_MODULE;
464 cdev->ops = fops;
465 error = cdev_add(cdev, trace_dev, count);
466 if (error) {
467 printk(KERN_WARNING "sched trace: "
468 "add_cdev failed for %s.\n", name);
469 return -ENOMEM;
470 }
471 return error;
472
473}
474
475static int __init init_sched_trace(void)
476{
477 int error1 = 0, error2 = 0;
478
479 printk("Initializing scheduler trace device\n");
480 init_buffers();
481
482 error1 = register_buffer_dev("schedtrace", &trace_fops,
483 TRACE_MAJOR, NR_CPUS);
484
485 error2 = register_buffer_dev("litmus_log", &log_fops,
486 LOG_MAJOR, 1);
487 if (error1 || error2)
488 return min(error1, error2);
489 else
490 return 0;
491}
492
493module_init(init_sched_trace);
494
495/******************************************************************************/
496/* KERNEL API */
497/******************************************************************************/
498
499/* The per-CPU LITMUS log buffer. Don't put it on the stack, it is too big for
500 * that and the kernel gets very picky with nested interrupts and small stacks.
501 */
502
503#ifdef CONFIG_SCHED_DEBUG_TRACE
504
505#define MSG_SIZE 255
506static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
507
508/* sched_trace_log_message - This is the only function that accesses the the
509 * log buffer inside the kernel for writing.
510 * Concurrent access to it is serialized via the
511 * log_buffer_lock.
512 *
513 * The maximum length of a formatted message is 255.
514 */
515void sched_trace_log_message(const char* fmt, ...)
516{
517 unsigned long flags;
518 va_list args;
519 size_t len;
520 char* buf;
521
522 va_start(args, fmt);
523 local_irq_save(flags);
524
525 /* format message */
526 buf = __get_cpu_var(fmt_buffer);
527 len = vscnprintf(buf, MSG_SIZE, fmt, args);
528
529 spin_lock(&log_buffer_lock);
530 /* Don't copy the trailing null byte, we don't want null bytes
531 * in a text file.
532 */
533 rb_put(&log_buffer.buf, buf, len);
534 spin_unlock(&log_buffer_lock);
535
536 local_irq_restore(flags);
537 va_end(args);
538}
539
540#endif
541
diff --git a/litmus/trace.c b/litmus/trace.c
new file mode 100644
index 0000000000..90ef443bd9
--- /dev/null
+++ b/litmus/trace.c
@@ -0,0 +1,303 @@
1#include <linux/sched.h>
2#include <linux/fs.h>
3#include <linux/cdev.h>
4#include <asm/semaphore.h>
5#include <asm/uaccess.h>
6#include <linux/module.h>
7
8#include <litmus/trace.h>
9
10/******************************************************************************/
11/* Allocation */
12/******************************************************************************/
13
14struct ft_buffer* trace_ts_buf = NULL;
15
16static unsigned int ts_seq_no = 0;
17
18feather_callback void save_timestamp(unsigned long event)
19{
20 unsigned int seq_no = fetch_and_inc((int *) &ts_seq_no);
21 struct timestamp *ts;
22 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
23 ts->event = event;
24 ts->timestamp = ft_read_tsc();
25 ts->seq_no = seq_no;
26 ts->cpu = raw_smp_processor_id();
27 ft_buffer_finish_write(trace_ts_buf, ts);
28 }
29}
30
31static struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
32{
33 struct ft_buffer* buf;
34 size_t total = (size + 1) * count;
35 char* mem;
36 int order = 0, pages = 1;
37
38 buf = kmalloc(sizeof(struct ft_buffer), GFP_KERNEL);
39 if (!buf)
40 return NULL;
41
42 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
43 while (pages < total) {
44 order++;
45 pages *= 2;
46 }
47
48 mem = (char*) __get_free_pages(GFP_KERNEL, order);
49 if (!mem) {
50 kfree(buf);
51 return NULL;
52 }
53
54 if (!init_ft_buffer(buf, count, size,
55 mem + (count * size), /* markers at the end */
56 mem)) { /* buffer objects */
57 free_pages((unsigned long) mem, order);
58 kfree(buf);
59 return NULL;
60 }
61 return buf;
62}
63
64static void free_ft_buffer(struct ft_buffer* buf)
65{
66 int order = 0, pages = 1;
67 size_t total;
68
69 if (buf) {
70 total = (buf->slot_size + 1) * buf->slot_count;
71 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
72 while (pages < total) {
73 order++;
74 pages *= 2;
75 }
76 free_pages((unsigned long) buf->buffer_mem, order);
77 kfree(buf);
78 }
79}
80
81
82/******************************************************************************/
83/* DEVICE FILE DRIVER */
84/******************************************************************************/
85
86#define NO_TIMESTAMPS 262144
87
88static DECLARE_MUTEX(feather_lock);
89static int use_count = 0;
90
91static int trace_release(struct inode *in, struct file *filp)
92{
93 int err = -EINVAL;
94
95 if (down_interruptible(&feather_lock)) {
96 err = -ERESTARTSYS;
97 goto out;
98 }
99
100 printk(KERN_ALERT "%s/%d disconnects from feather trace device. "
101 "use_count=%d\n",
102 current->comm, current->pid, use_count);
103
104 if (use_count == 1) {
105 /* disable events */
106 ft_disable_all_events();
107
108 /* wait for any pending events to complete */
109 set_current_state(TASK_UNINTERRUPTIBLE);
110 schedule_timeout(HZ);
111
112 printk(KERN_ALERT "Failed trace writes: %u\n",
113 trace_ts_buf->failed_writes);
114
115 free_ft_buffer(trace_ts_buf);
116 trace_ts_buf = NULL;
117 }
118
119 use_count--;
120 up(&feather_lock);
121out:
122 return err;
123}
124
125
126static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
127 loff_t *f_pos)
128{
129 /* we ignore f_pos, this is strictly sequential */
130 ssize_t error = 0;
131 struct timestamp ts;
132
133 if (down_interruptible(&feather_lock)) {
134 error = -ERESTARTSYS;
135 goto out;
136 }
137
138
139 while (len >= sizeof(struct timestamp)) {
140 if (ft_buffer_read(trace_ts_buf, &ts)) {
141 if (copy_to_user(to, &ts, sizeof(struct timestamp))) {
142 error = -EFAULT;
143 break;
144 } else {
145 len -= sizeof(struct timestamp);
146 to += sizeof(struct timestamp);
147 error += sizeof(struct timestamp);
148 }
149 } else {
150 set_current_state(TASK_INTERRUPTIBLE);
151 schedule_timeout(50);
152 if (signal_pending(current)) {
153 error = -ERESTARTSYS;
154 break;
155 }
156 }
157 }
158 up(&feather_lock);
159out:
160 return error;
161}
162
163#define ENABLE_CMD 0
164#define DISABLE_CMD 1
165
166static ssize_t trace_write(struct file *filp, const char __user *from,
167 size_t len, loff_t *f_pos)
168{
169 ssize_t error = -EINVAL;
170 unsigned long cmd;
171 unsigned long id;
172
173 if (len % sizeof(long) || len < 2 * sizeof(long))
174 goto out;
175
176 if (copy_from_user(&cmd, from, sizeof(long))) {
177 error = -EFAULT;
178 goto out;
179 }
180 len -= sizeof(long);
181 from += sizeof(long);
182
183 if (cmd != ENABLE_CMD && cmd != DISABLE_CMD)
184 goto out;
185
186 if (down_interruptible(&feather_lock)) {
187 error = -ERESTARTSYS;
188 goto out;
189 }
190
191 error = sizeof(long);
192 while (len) {
193 if (copy_from_user(&id, from, sizeof(long))) {
194 error = -EFAULT;
195 goto out;
196 }
197 len -= sizeof(long);
198 from += sizeof(long);
199 if (cmd) {
200 printk(KERN_INFO
201 "Disabling feather-trace event %lu.\n", id);
202 ft_disable_event(id);
203 } else {
204 printk(KERN_INFO
205 "Enabling feather-trace event %lu.\n", id);
206 ft_enable_event(id);
207 }
208 error += sizeof(long);
209 }
210
211 up(&feather_lock);
212 out:
213 return error;
214}
215
216static int trace_open(struct inode *in, struct file *filp)
217{
218 int err = 0;
219 unsigned int count = NO_TIMESTAMPS;
220
221 if (down_interruptible(&feather_lock)) {
222 err = -ERESTARTSYS;
223 goto out;
224 }
225
226 while (count && !trace_ts_buf) {
227 printk("trace: trying to allocate %u time stamps.\n", count);
228 trace_ts_buf = alloc_ft_buffer(count, sizeof(struct timestamp));
229 count /= 2;
230 }
231 if (!trace_ts_buf)
232 err = -ENOMEM;
233 else
234 use_count++;
235
236 up(&feather_lock);
237out:
238 return err;
239}
240
241/******************************************************************************/
242/* Device Registration */
243/******************************************************************************/
244
245#define FT_TRACE_MAJOR 252
246
247struct file_operations ft_trace_fops = {
248 .owner = THIS_MODULE,
249 .open = trace_open,
250 .release = trace_release,
251 .write = trace_write,
252 .read = trace_read,
253};
254
255
256static int __init register_buffer_dev(const char* name,
257 struct file_operations* fops,
258 int major, int count)
259{
260 dev_t trace_dev;
261 struct cdev *cdev;
262 int error = 0;
263
264 trace_dev = MKDEV(major, 0);
265 error = register_chrdev_region(trace_dev, count, name);
266 if (error)
267 {
268 printk(KERN_WARNING "trace: "
269 "Could not register major/minor number %d\n", major);
270 return error;
271 }
272 cdev = cdev_alloc();
273 if (!cdev) {
274 printk(KERN_WARNING "trace: "
275 "Could not get a cdev for %s.\n", name);
276 return -ENOMEM;
277 }
278 cdev->owner = THIS_MODULE;
279 cdev->ops = fops;
280 error = cdev_add(cdev, trace_dev, count);
281 if (error) {
282 printk(KERN_WARNING "trace: "
283 "add_cdev failed for %s.\n", name);
284 return -ENOMEM;
285 }
286 return error;
287
288}
289
290static int __init init_sched_trace(void)
291{
292 int error = 0;
293
294 printk("Initializing Feather-Trace device\n");
295 /* dummy entry to make linker happy */
296 ft_event0(666, save_timestamp);
297
298 error = register_buffer_dev("ft_trace", &ft_trace_fops,
299 FT_TRACE_MAJOR, 1);
300 return error;
301}
302
303module_init(init_sched_trace);