diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2008-02-13 14:13:15 -0500 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2008-02-13 14:13:15 -0500 |
commit | 8ce9b0cb97d9266b3b64b2b57835e17f6e03f585 (patch) | |
tree | a6ef1acaf9c9dc116ccc9f24f5233fa7d25cd426 | |
parent | 49914084e797530d9baaf51df9eda77babc98fa8 (diff) |
LITMUS 2008: Initial Port
This introduces the core changes ported from LITMUS 2007.
The kernel seems to work under QEMU, but many bugs probably remain.
38 files changed, 5434 insertions, 7 deletions
@@ -597,7 +597,7 @@ export mod_strip_cmd | |||
597 | 597 | ||
598 | 598 | ||
599 | ifeq ($(KBUILD_EXTMOD),) | 599 | ifeq ($(KBUILD_EXTMOD),) |
600 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ | 600 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/ |
601 | 601 | ||
602 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ | 602 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ |
603 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ | 603 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 80b7ba4056..f99330fed0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1620,3 +1620,5 @@ source "security/Kconfig" | |||
1620 | source "crypto/Kconfig" | 1620 | source "crypto/Kconfig" |
1621 | 1621 | ||
1622 | source "lib/Kconfig" | 1622 | source "lib/Kconfig" |
1623 | |||
1624 | source "litmus/Kconfig" | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 8344c70adf..9c9ffbe8b6 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -324,3 +324,18 @@ ENTRY(sys_call_table) | |||
324 | .long sys_timerfd | 324 | .long sys_timerfd |
325 | .long sys_eventfd | 325 | .long sys_eventfd |
326 | .long sys_fallocate | 326 | .long sys_fallocate |
327 | /* LITMUS */ | ||
328 | .long sys_set_rt_task_param /* 325 */ | ||
329 | .long sys_get_rt_task_param | ||
330 | .long sys_complete_job | ||
331 | .long sys_register_np_flag | ||
332 | .long sys_exit_np | ||
333 | .long sys_od_open /* 330 */ | ||
334 | .long sys_od_close | ||
335 | .long sys_pi_down | ||
336 | .long sys_pi_up | ||
337 | .long sys_srp_down | ||
338 | .long sys_srp_up /* 335 */ | ||
339 | .long sys_reg_task_srp_sem | ||
340 | .long sys_query_job_no | ||
341 | .long sys_wait_for_job_release /* 338 */ | ||
@@ -56,6 +56,8 @@ | |||
56 | #include <asm/mmu_context.h> | 56 | #include <asm/mmu_context.h> |
57 | #include <asm/tlb.h> | 57 | #include <asm/tlb.h> |
58 | 58 | ||
59 | #include <litmus/litmus.h> | ||
60 | |||
59 | #ifdef CONFIG_KMOD | 61 | #ifdef CONFIG_KMOD |
60 | #include <linux/kmod.h> | 62 | #include <linux/kmod.h> |
61 | #endif | 63 | #endif |
@@ -1309,6 +1311,7 @@ int do_execve(char * filename, | |||
1309 | goto out_kfree; | 1311 | goto out_kfree; |
1310 | 1312 | ||
1311 | sched_exec(); | 1313 | sched_exec(); |
1314 | litmus_exec(); | ||
1312 | 1315 | ||
1313 | bprm->file = file; | 1316 | bprm->file = file; |
1314 | bprm->filename = filename; | 1317 | bprm->filename = filename; |
diff --git a/fs/inode.c b/fs/inode.c index ed35383d0b..ef71ea06c6 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -220,6 +220,8 @@ void inode_init_once(struct inode *inode) | |||
220 | INIT_LIST_HEAD(&inode->inotify_watches); | 220 | INIT_LIST_HEAD(&inode->inotify_watches); |
221 | mutex_init(&inode->inotify_mutex); | 221 | mutex_init(&inode->inotify_mutex); |
222 | #endif | 222 | #endif |
223 | INIT_LIST_HEAD(&inode->i_obj_list); | ||
224 | mutex_init(&inode->i_obj_mutex); | ||
223 | } | 225 | } |
224 | 226 | ||
225 | EXPORT_SYMBOL(inode_init_once); | 227 | EXPORT_SYMBOL(inode_init_once); |
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index 9b15545eb9..063c5856f2 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h | |||
@@ -330,10 +330,24 @@ | |||
330 | #define __NR_timerfd 322 | 330 | #define __NR_timerfd 322 |
331 | #define __NR_eventfd 323 | 331 | #define __NR_eventfd 323 |
332 | #define __NR_fallocate 324 | 332 | #define __NR_fallocate 324 |
333 | #define __NR_set_rt_task_param 325 | ||
334 | #define __NR_get_rt_task_param 326 | ||
335 | #define __NR_sleep_next_period 327 | ||
336 | #define __NR_register_np_flag 328 | ||
337 | #define __NR_exit_np 329 | ||
338 | #define __NR_od_open 330 | ||
339 | #define __NR_od_close 331 | ||
340 | #define __NR_pi_down 332 | ||
341 | #define __NR_pi_up 333 | ||
342 | #define __NR_srp_down 334 | ||
343 | #define __NR_srp_up 335 | ||
344 | #define __NR_reg_task_srp_sem 336 | ||
345 | #define __NR_query_job_no 337 | ||
346 | #define __NR_wait_for_job_release 338 | ||
333 | 347 | ||
334 | #ifdef __KERNEL__ | 348 | #ifdef __KERNEL__ |
335 | 349 | ||
336 | #define NR_syscalls 325 | 350 | #define NR_syscalls 339 |
337 | 351 | ||
338 | #define __ARCH_WANT_IPC_PARSE_VERSION | 352 | #define __ARCH_WANT_IPC_PARSE_VERSION |
339 | #define __ARCH_WANT_OLD_READDIR | 353 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/include/linux/fs.h b/include/linux/fs.h index b3ec4a496d..22f856c14e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -588,6 +588,8 @@ static inline int mapping_writably_mapped(struct address_space *mapping) | |||
588 | #define i_size_ordered_init(inode) do { } while (0) | 588 | #define i_size_ordered_init(inode) do { } while (0) |
589 | #endif | 589 | #endif |
590 | 590 | ||
591 | struct inode_obj_id_table; | ||
592 | |||
591 | struct inode { | 593 | struct inode { |
592 | struct hlist_node i_hash; | 594 | struct hlist_node i_hash; |
593 | struct list_head i_list; | 595 | struct list_head i_list; |
@@ -653,6 +655,9 @@ struct inode { | |||
653 | void *i_security; | 655 | void *i_security; |
654 | #endif | 656 | #endif |
655 | void *i_private; /* fs or device private pointer */ | 657 | void *i_private; /* fs or device private pointer */ |
658 | |||
659 | struct list_head i_obj_list; | ||
660 | struct mutex i_obj_mutex; | ||
656 | }; | 661 | }; |
657 | 662 | ||
658 | /* | 663 | /* |
diff --git a/include/linux/sched.h b/include/linux/sched.h index cc14656f86..9541cc8fe8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -37,6 +37,7 @@ | |||
37 | #define SCHED_BATCH 3 | 37 | #define SCHED_BATCH 3 |
38 | /* SCHED_ISO: reserved but not implemented yet */ | 38 | /* SCHED_ISO: reserved but not implemented yet */ |
39 | #define SCHED_IDLE 5 | 39 | #define SCHED_IDLE 5 |
40 | #define SCHED_LITMUS 6 | ||
40 | 41 | ||
41 | #ifdef __KERNEL__ | 42 | #ifdef __KERNEL__ |
42 | 43 | ||
@@ -91,6 +92,8 @@ struct sched_param { | |||
91 | 92 | ||
92 | #include <asm/processor.h> | 93 | #include <asm/processor.h> |
93 | 94 | ||
95 | #include <litmus/rt_param.h> | ||
96 | |||
94 | struct exec_domain; | 97 | struct exec_domain; |
95 | struct futex_pi_state; | 98 | struct futex_pi_state; |
96 | struct bio; | 99 | struct bio; |
@@ -914,6 +917,8 @@ struct sched_entity { | |||
914 | #endif | 917 | #endif |
915 | }; | 918 | }; |
916 | 919 | ||
920 | struct od_table_entry; | ||
921 | |||
917 | struct task_struct { | 922 | struct task_struct { |
918 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 923 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
919 | void *stack; | 924 | void *stack; |
@@ -1178,6 +1183,17 @@ struct task_struct { | |||
1178 | int make_it_fail; | 1183 | int make_it_fail; |
1179 | #endif | 1184 | #endif |
1180 | struct prop_local_single dirties; | 1185 | struct prop_local_single dirties; |
1186 | |||
1187 | /* litmus parameters and state */ | ||
1188 | struct rt_param rt_param; | ||
1189 | |||
1190 | /* allow scheduler plugins to queue in release lists, etc. | ||
1191 | * Cleanup: Move this into the rt_param struct. | ||
1192 | */ | ||
1193 | struct list_head rt_list; | ||
1194 | |||
1195 | /* references to PI semaphores, etc. */ | ||
1196 | struct od_table_entry* od_table; | ||
1181 | }; | 1197 | }; |
1182 | 1198 | ||
1183 | /* | 1199 | /* |
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 975c963e57..6ae0ff9494 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h | |||
@@ -84,4 +84,20 @@ static inline unsigned long __copy_from_user_nocache(void *to, | |||
84 | ret; \ | 84 | ret; \ |
85 | }) | 85 | }) |
86 | 86 | ||
87 | /* This is a naive attempt at a write version of the above native Linux macro. | ||
88 | */ | ||
89 | #define poke_kernel_address(val, addr) \ | ||
90 | ({ \ | ||
91 | long ret; \ | ||
92 | mm_segment_t old_fs = get_fs(); \ | ||
93 | \ | ||
94 | set_fs(KERNEL_DS); \ | ||
95 | pagefault_disable(); \ | ||
96 | ret = __put_user(val, (__force typeof(val) __user *)(addr)); \ | ||
97 | pagefault_enable(); \ | ||
98 | set_fs(old_fs); \ | ||
99 | ret; \ | ||
100 | }) | ||
101 | |||
102 | |||
87 | #endif /* __LINUX_UACCESS_H__ */ | 103 | #endif /* __LINUX_UACCESS_H__ */ |
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h new file mode 100644 index 0000000000..f3c930b137 --- /dev/null +++ b/include/litmus/edf_common.h | |||
@@ -0,0 +1,35 @@ | |||
1 | /* EDF common data structures and utility functions shared by all EDF | ||
2 | * based scheduler plugins | ||
3 | */ | ||
4 | |||
5 | /* CLEANUP: Add comments and make it less messy. | ||
6 | * | ||
7 | */ | ||
8 | |||
9 | #ifndef __UNC_EDF_COMMON_H__ | ||
10 | #define __UNC_EDF_COMMON_H__ | ||
11 | |||
12 | #include <litmus/rt_domain.h> | ||
13 | |||
14 | |||
15 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched); | ||
16 | |||
17 | int edf_higher_prio(struct task_struct* first, | ||
18 | struct task_struct* second); | ||
19 | |||
20 | int edf_ready_order(struct list_head* a, struct list_head* b); | ||
21 | |||
22 | void edf_release_at(struct task_struct *t, lt_t start); | ||
23 | |||
24 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
25 | long edf_complete_job(void); | ||
26 | |||
27 | void edf_prepare_for_next_period(struct task_struct *t); | ||
28 | |||
29 | #define job_completed(t) (!is_be(t) && \ | ||
30 | (t)->rt_param.times.exec_time == (t)->rt_param.basic_params.exec_cost) | ||
31 | |||
32 | int edf_set_hp_task(struct pi_semaphore *sem); | ||
33 | int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu); | ||
34 | |||
35 | #endif | ||
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h new file mode 100644 index 0000000000..5a783555e7 --- /dev/null +++ b/include/litmus/fdso.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* fdso.h - file descriptor attached shared objects | ||
2 | * | ||
3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
4 | */ | ||
5 | |||
6 | #ifndef _LINUX_FDSO_H_ | ||
7 | #define _LINUX_FDSO_H_ | ||
8 | |||
9 | #include <linux/list.h> | ||
10 | #include <asm/atomic.h> | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | |||
14 | #define MAX_OBJECT_DESCRIPTORS 32 | ||
15 | |||
16 | typedef enum { | ||
17 | MIN_OBJ_TYPE = 0, | ||
18 | |||
19 | PI_SEM = 0, | ||
20 | SRP_SEM = 1, | ||
21 | |||
22 | MAX_OBJ_TYPE = 1 | ||
23 | } obj_type_t; | ||
24 | |||
25 | struct inode_obj_id { | ||
26 | struct list_head list; | ||
27 | atomic_t count; | ||
28 | struct inode* inode; | ||
29 | |||
30 | obj_type_t type; | ||
31 | void* obj; | ||
32 | unsigned int id; | ||
33 | }; | ||
34 | |||
35 | |||
36 | struct od_table_entry { | ||
37 | unsigned int used; | ||
38 | |||
39 | struct inode_obj_id* obj; | ||
40 | void* extra; | ||
41 | }; | ||
42 | |||
43 | struct fdso_ops { | ||
44 | void* (*create) (void); | ||
45 | void (*destroy)(void*); | ||
46 | int (*open) (struct od_table_entry*, void* __user); | ||
47 | int (*close) (struct od_table_entry*); | ||
48 | }; | ||
49 | |||
50 | /* translate a userspace supplied od into the raw table entry | ||
51 | * returns NULL if od is invalid | ||
52 | */ | ||
53 | struct od_table_entry* __od_lookup(int od); | ||
54 | |||
55 | /* translate a userspace supplied od into the associated object | ||
56 | * returns NULL if od is invalid | ||
57 | */ | ||
58 | static inline void* od_lookup(int od, obj_type_t type) | ||
59 | { | ||
60 | struct od_table_entry* e = __od_lookup(od); | ||
61 | return e && e->obj->type == type ? e->obj->obj : NULL; | ||
62 | } | ||
63 | |||
64 | #define lookup_pi_sem(od) ((struct pi_semaphore*) od_lookup(od, PI_SEM)) | ||
65 | #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) | ||
66 | #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) | ||
67 | |||
68 | |||
69 | #endif | ||
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h new file mode 100644 index 0000000000..c788227905 --- /dev/null +++ b/include/litmus/feather_buffer.h | |||
@@ -0,0 +1,108 @@ | |||
1 | #ifndef _FEATHER_BUFFER_H_ | ||
2 | #define _FEATHER_BUFFER_H_ | ||
3 | |||
4 | /* requires UINT_MAX and memcpy */ | ||
5 | |||
6 | static inline int fetch_and_inc(int *val) | ||
7 | { | ||
8 | int ret = 1; | ||
9 | __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" ); | ||
10 | return ret; | ||
11 | } | ||
12 | |||
13 | static inline int fetch_and_dec(int *val) | ||
14 | { | ||
15 | int ret = -1; | ||
16 | __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" ); | ||
17 | return ret; | ||
18 | } | ||
19 | |||
20 | #define SLOT_FREE 0 | ||
21 | #define SLOT_BUSY 1 | ||
22 | #define SLOT_READY 2 | ||
23 | |||
24 | struct ft_buffer { | ||
25 | unsigned int slot_count; | ||
26 | unsigned int slot_size; | ||
27 | |||
28 | int free_count; | ||
29 | unsigned int write_idx; | ||
30 | unsigned int read_idx; | ||
31 | |||
32 | char* slots; | ||
33 | void* buffer_mem; | ||
34 | unsigned int failed_writes; | ||
35 | }; | ||
36 | |||
37 | static inline int init_ft_buffer(struct ft_buffer* buf, | ||
38 | unsigned int slot_count, | ||
39 | unsigned int slot_size, | ||
40 | char* slots, | ||
41 | void* buffer_mem) | ||
42 | { | ||
43 | int i = 0; | ||
44 | if (!slot_count || UINT_MAX % slot_count != slot_count - 1) { | ||
45 | /* The slot count must divide UNIT_MAX + 1 so that when it | ||
46 | * wraps around the index correctly points to 0. | ||
47 | */ | ||
48 | return 0; | ||
49 | } else { | ||
50 | buf->slot_count = slot_count; | ||
51 | buf->slot_size = slot_size; | ||
52 | buf->slots = slots; | ||
53 | buf->buffer_mem = buffer_mem; | ||
54 | buf->free_count = slot_count; | ||
55 | buf->write_idx = 0; | ||
56 | buf->read_idx = 0; | ||
57 | buf->failed_writes = 0; | ||
58 | for (i = 0; i < slot_count; i++) | ||
59 | buf->slots[i] = SLOT_FREE; | ||
60 | return 1; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr) | ||
65 | { | ||
66 | int free = fetch_and_dec(&buf->free_count); | ||
67 | unsigned int idx; | ||
68 | if (free <= 0) { | ||
69 | fetch_and_inc(&buf->free_count); | ||
70 | *ptr = 0; | ||
71 | fetch_and_inc(&buf->failed_writes); | ||
72 | return 0; | ||
73 | } else { | ||
74 | idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count; | ||
75 | buf->slots[idx] = SLOT_BUSY; | ||
76 | *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size; | ||
77 | return 1; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr) | ||
82 | { | ||
83 | unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size; | ||
84 | buf->slots[idx] = SLOT_READY; | ||
85 | } | ||
86 | |||
87 | |||
88 | /* exclusive reader access is assumed */ | ||
89 | static inline int ft_buffer_read(struct ft_buffer* buf, void* dest) | ||
90 | { | ||
91 | unsigned int idx; | ||
92 | if (buf->free_count == buf->slot_count) | ||
93 | /* nothing available */ | ||
94 | return 0; | ||
95 | idx = buf->read_idx % buf->slot_count; | ||
96 | if (buf->slots[idx] == SLOT_READY) { | ||
97 | memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size, | ||
98 | buf->slot_size); | ||
99 | buf->slots[idx] = SLOT_FREE; | ||
100 | buf->read_idx++; | ||
101 | fetch_and_inc(&buf->free_count); | ||
102 | return 1; | ||
103 | } else | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | |||
108 | #endif | ||
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h new file mode 100644 index 0000000000..5c37ea71ea --- /dev/null +++ b/include/litmus/feather_trace.h | |||
@@ -0,0 +1,93 @@ | |||
1 | #ifndef _FEATHER_TRACE_H_ | ||
2 | #define _FEATHER_TRACE_H_ | ||
3 | |||
4 | #define feather_callback __attribute__((regparm(0))) | ||
5 | |||
6 | /* make the compiler reload any register that is not saved in | ||
7 | * a cdecl function call | ||
8 | */ | ||
9 | #define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx" | ||
10 | |||
11 | #define ft_event(id, callback) \ | ||
12 | __asm__ __volatile__( \ | ||
13 | "1: jmp 2f \n\t" \ | ||
14 | " call " #callback " \n\t" \ | ||
15 | ".section __event_table, \"aw\" \n\t" \ | ||
16 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
17 | ".previous \n\t" \ | ||
18 | "2: \n\t" \ | ||
19 | : : : CLOBBER_LIST) | ||
20 | |||
21 | #define ft_event0(id, callback) \ | ||
22 | __asm__ __volatile__( \ | ||
23 | "1: jmp 2f \n\t" \ | ||
24 | " subl $4, %%esp \n\t" \ | ||
25 | " movl $" #id ", (%%esp) \n\t" \ | ||
26 | " call " #callback " \n\t" \ | ||
27 | " addl $4, %%esp \n\t" \ | ||
28 | ".section __event_table, \"aw\" \n\t" \ | ||
29 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
30 | ".previous \n\t" \ | ||
31 | "2: \n\t" \ | ||
32 | : : : CLOBBER_LIST) | ||
33 | |||
34 | #define ft_event1(id, callback, param) \ | ||
35 | __asm__ __volatile__( \ | ||
36 | "1: jmp 2f \n\t" \ | ||
37 | " subl $8, %%esp \n\t" \ | ||
38 | " movl %0, 4(%%esp) \n\t" \ | ||
39 | " movl $" #id ", (%%esp) \n\t" \ | ||
40 | " call " #callback " \n\t" \ | ||
41 | " addl $8, %%esp \n\t" \ | ||
42 | ".section __event_table, \"aw\" \n\t" \ | ||
43 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
44 | ".previous \n\t" \ | ||
45 | "2: \n\t" \ | ||
46 | : : "r" (param) : CLOBBER_LIST) | ||
47 | |||
48 | #define ft_event2(id, callback, param, param2) \ | ||
49 | __asm__ __volatile__( \ | ||
50 | "1: jmp 2f \n\t" \ | ||
51 | " subl $12, %%esp \n\t" \ | ||
52 | " movl %1, 8(%%esp) \n\t" \ | ||
53 | " movl %0, 4(%%esp) \n\t" \ | ||
54 | " movl $" #id ", (%%esp) \n\t" \ | ||
55 | " call " #callback " \n\t" \ | ||
56 | " addl $12, %%esp \n\t" \ | ||
57 | ".section __event_table, \"aw\" \n\t" \ | ||
58 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
59 | ".previous \n\t" \ | ||
60 | "2: \n\t" \ | ||
61 | : : "r" (param), "r" (param2) : CLOBBER_LIST) | ||
62 | |||
63 | |||
64 | #define ft_event3(id, callback, p, p2, p3) \ | ||
65 | __asm__ __volatile__( \ | ||
66 | "1: jmp 2f \n\t" \ | ||
67 | " subl $16, %%esp \n\t" \ | ||
68 | " movl %1, 12(%%esp) \n\t" \ | ||
69 | " movl %1, 8(%%esp) \n\t" \ | ||
70 | " movl %0, 4(%%esp) \n\t" \ | ||
71 | " movl $" #id ", (%%esp) \n\t" \ | ||
72 | " call " #callback " \n\t" \ | ||
73 | " addl $16, %%esp \n\t" \ | ||
74 | ".section __event_table, \"aw\" \n\t" \ | ||
75 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
76 | ".previous \n\t" \ | ||
77 | "2: \n\t" \ | ||
78 | : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) | ||
79 | |||
80 | |||
81 | static inline unsigned long long ft_read_tsc(void) | ||
82 | { | ||
83 | unsigned long long ret; | ||
84 | __asm__ __volatile__("rdtsc" : "=A" (ret)); | ||
85 | return ret; | ||
86 | } | ||
87 | |||
88 | int ft_enable_event(unsigned long id); | ||
89 | int ft_disable_event(unsigned long id); | ||
90 | int ft_is_event_enabled(unsigned long id); | ||
91 | int ft_disable_all_events(void); | ||
92 | |||
93 | #endif | ||
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h new file mode 100644 index 0000000000..6e99e651d7 --- /dev/null +++ b/include/litmus/litmus.h | |||
@@ -0,0 +1,192 @@ | |||
1 | /* | ||
2 | * Constant definitions related to | ||
3 | * scheduling policy. | ||
4 | */ | ||
5 | |||
6 | #ifndef _LINUX_LITMUS_H_ | ||
7 | #define _LINUX_LITMUS_H_ | ||
8 | |||
9 | #include <linux/jiffies.h> | ||
10 | #include <litmus/sched_trace.h> | ||
11 | |||
12 | typedef enum { | ||
13 | SCHED_LINUX = 0, | ||
14 | SCHED_GSN_EDF = 10, | ||
15 | SCHED_PSN_EDF = 11, | ||
16 | /* Add your scheduling policy here */ | ||
17 | |||
18 | SCHED_DEFAULT = 0, | ||
19 | SCHED_INVALID = -1, | ||
20 | } spolicy; | ||
21 | |||
22 | |||
23 | typedef enum { | ||
24 | LITMUS_RESERVED_RANGE = 1024, | ||
25 | |||
26 | } sched_setup_cmd_t; | ||
27 | |||
28 | /* per-task modes */ | ||
29 | enum rt_task_mode_t { | ||
30 | BACKGROUND_TASK = 0, | ||
31 | LITMUS_RT_TASK = 1 | ||
32 | }; | ||
33 | |||
34 | /* Plugin boot options, for convenience */ | ||
35 | #define PLUGIN_LINUX "linux" | ||
36 | #define PLUGIN_GSN_EDF "gsn_edf" | ||
37 | #define PLUGIN_PSN_EDF "psn_edf" | ||
38 | |||
39 | extern spolicy sched_policy; | ||
40 | |||
41 | /* RT mode start time */ | ||
42 | extern volatile unsigned long rt_start_time; | ||
43 | |||
44 | #define TRACE(fmt, args...) \ | ||
45 | sched_trace_log_message("%d: " fmt, raw_smp_processor_id(), ## args) | ||
46 | |||
47 | #define TRACE_TASK(t, fmt, args...) \ | ||
48 | TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args) | ||
49 | |||
50 | #define TRACE_CUR(fmt, args...) \ | ||
51 | TRACE_TASK(current, fmt, ## args) | ||
52 | |||
53 | #define TRACE_BUG_ON(cond) \ | ||
54 | do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \ | ||
55 | "called from %p current=%s/%d state=%d " \ | ||
56 | "flags=%x partition=%d cpu=%d rtflags=%d"\ | ||
57 | " job=%u knp=%d timeslice=%u\n", \ | ||
58 | #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \ | ||
59 | current->pid, current->state, current->flags, \ | ||
60 | get_partition(current), smp_processor_id(), get_rt_flags(current), \ | ||
61 | current->rt_param.job_params.job_no, current->rt_param.kernel_np, \ | ||
62 | current->time_slice\ | ||
63 | ); } while(0); | ||
64 | |||
65 | |||
66 | /* in_list - is a given list_head queued on some list? | ||
67 | */ | ||
68 | static inline int in_list(struct list_head* list) | ||
69 | { | ||
70 | return !( /* case 1: deleted */ | ||
71 | (list->next == LIST_POISON1 && | ||
72 | list->prev == LIST_POISON2) | ||
73 | || | ||
74 | /* case 2: initialized */ | ||
75 | (list->next == list && | ||
76 | list->prev == list) | ||
77 | ); | ||
78 | } | ||
79 | |||
80 | typedef int (*list_cmp_t)(struct list_head*, struct list_head*); | ||
81 | |||
82 | static inline unsigned int list_insert(struct list_head* new, | ||
83 | struct list_head* head, | ||
84 | list_cmp_t order_before) | ||
85 | { | ||
86 | struct list_head *pos; | ||
87 | unsigned int passed = 0; | ||
88 | |||
89 | BUG_ON(!new); | ||
90 | |||
91 | /* find a spot where the new entry is less than the next */ | ||
92 | list_for_each(pos, head) { | ||
93 | if (unlikely(order_before(new, pos))) { | ||
94 | /* pos is not less than new, thus insert here */ | ||
95 | __list_add(new, pos->prev, pos); | ||
96 | goto out; | ||
97 | } | ||
98 | passed++; | ||
99 | } | ||
100 | /* if we get to this point either the list is empty or every entry | ||
101 | * queued element is less than new. | ||
102 | * Let's add new to the end. */ | ||
103 | list_add_tail(new, head); | ||
104 | out: | ||
105 | return passed; | ||
106 | } | ||
107 | |||
108 | void list_qsort(struct list_head* list, list_cmp_t less_than); | ||
109 | |||
110 | |||
111 | #define RT_PREEMPTIVE 0x2050 /* = NP */ | ||
112 | #define RT_NON_PREEMPTIVE 0x4e50 /* = P */ | ||
113 | #define RT_EXIT_NP_REQUESTED 0x5251 /* = RQ */ | ||
114 | |||
115 | /* returns 1 if task t has registered np flag and set it to RT_NON_PREEMPTIVE | ||
116 | */ | ||
117 | int is_np(struct task_struct *t); | ||
118 | |||
119 | /* request that the task should call sys_exit_np() | ||
120 | */ | ||
121 | void request_exit_np(struct task_struct *t); | ||
122 | |||
123 | /* kill naughty tasks | ||
124 | */ | ||
125 | void scheduler_signal(struct task_struct *t, unsigned int signal); | ||
126 | void send_scheduler_signals(void); | ||
127 | void np_mem_kill(struct task_struct *t); | ||
128 | |||
129 | void litmus_fork(struct task_struct *tsk); | ||
130 | void litmus_exec(void); | ||
131 | /* clean up real-time state of a task */ | ||
132 | void exit_litmus(struct task_struct *dead_tsk); | ||
133 | |||
134 | long litmus_admit_task(struct task_struct *tsk); | ||
135 | void litmus_exit_task(struct task_struct *tsk); | ||
136 | |||
137 | #define is_realtime(t) ((t)->policy == SCHED_LITMUS) | ||
138 | #define rt_transition_pending(t) \ | ||
139 | ((t)->rt_param.transition_pending) | ||
140 | |||
141 | /* Realtime utility macros */ | ||
142 | #define get_rt_flags(t) ((t)->rt_param.flags) | ||
143 | #define set_rt_flags(t,f) (t)->rt_param.flags=(f) | ||
144 | #define get_exec_cost(t) ((t)->rt_param.task_params.exec_cost) | ||
145 | #define get_exec_time(t) ((t)->rt_param.job_params.exec_time) | ||
146 | #define get_rt_period(t) ((t)->rt_param.task_params.period) | ||
147 | #define get_partition(t) (t)->rt_param.task_params.cpu | ||
148 | #define get_deadline(t) ((t)->rt_param.job_params.deadline) | ||
149 | #define get_class(t) ((t)->rt_param.task_params.cls) | ||
150 | |||
151 | inline static int budget_exhausted(struct task_struct* t) | ||
152 | { | ||
153 | return get_exec_time(t) >= get_exec_cost(t); | ||
154 | } | ||
155 | |||
156 | #define is_subject_to_srp(t) ((t)->rt_param.subject_to_srp) | ||
157 | #define is_hrt(t) \ | ||
158 | ((t)->rt_param.task_params.class == RT_CLASS_HARD) | ||
159 | #define is_srt(t) \ | ||
160 | ((t)->rt_param.task_params.class == RT_CLASS_SOFT) | ||
161 | #define is_be(t) \ | ||
162 | ((t)->rt_param.task_params.class == RT_CLASS_BEST_EFFORT) | ||
163 | |||
164 | #define get_release(t) ((t)->rt_param.job_params.release) | ||
165 | |||
166 | /* Honor the flag in the preempt_count variable that is set | ||
167 | * when scheduling is in progress. | ||
168 | */ | ||
169 | #define is_running(t) \ | ||
170 | ((t)->state == TASK_RUNNING || \ | ||
171 | task_thread_info(t)->preempt_count & PREEMPT_ACTIVE) | ||
172 | |||
173 | #define is_blocked(t) \ | ||
174 | (!is_running(t)) | ||
175 | #define is_released(t, now) \ | ||
176 | (lt_before_eq(get_release(t), now)) | ||
177 | #define is_tardy(t, now) \ | ||
178 | (lt_before_eq((t)->rt_param.job_params.deadline, now)) | ||
179 | |||
180 | /* real-time comparison macros */ | ||
181 | #define earlier_deadline(a, b) (lt_before(\ | ||
182 | (a)->rt_param.job_params.deadline,\ | ||
183 | (b)->rt_param.job_params.deadline)) | ||
184 | #define earlier_release(a, b) (lt_before(\ | ||
185 | (a)->rt_param.job_params.release,\ | ||
186 | (b)->rt_param.job_params.release)) | ||
187 | |||
188 | #define make_np(t) do {t->rt_param.kernel_np++;} while(0); | ||
189 | #define take_np(t) do {t->rt_param.kernel_np--;} while(0); | ||
190 | |||
191 | |||
192 | #endif | ||
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h new file mode 100644 index 0000000000..79b6034f22 --- /dev/null +++ b/include/litmus/rt_domain.h | |||
@@ -0,0 +1,94 @@ | |||
1 | /* CLEANUP: Add comments and make it less messy. | ||
2 | * | ||
3 | */ | ||
4 | |||
5 | #ifndef __UNC_RT_DOMAIN_H__ | ||
6 | #define __UNC_RT_DOMAIN_H__ | ||
7 | |||
8 | struct _rt_domain; | ||
9 | |||
10 | typedef int (*check_resched_needed_t)(struct _rt_domain *rt); | ||
11 | typedef void (*release_at_t)(struct task_struct *t, lt_t start); | ||
12 | |||
13 | typedef struct _rt_domain { | ||
14 | /* runnable rt tasks are in here */ | ||
15 | rwlock_t ready_lock; | ||
16 | struct list_head ready_queue; | ||
17 | |||
18 | /* real-time tasks waiting for release are in here */ | ||
19 | spinlock_t release_lock; | ||
20 | struct list_head release_queue; | ||
21 | |||
22 | /* how do we check if we need to kick another CPU? */ | ||
23 | check_resched_needed_t check_resched; | ||
24 | |||
25 | /* how are tasks ordered in the ready queue? */ | ||
26 | list_cmp_t order; | ||
27 | } rt_domain_t; | ||
28 | |||
29 | #define next_ready(rt) \ | ||
30 | (list_entry((rt)->ready_queue.next, struct task_struct, rt_list)) | ||
31 | |||
32 | #define ready_jobs_pending(rt) \ | ||
33 | (!list_empty(&(rt)->ready_queue)) | ||
34 | |||
35 | void rt_domain_init(rt_domain_t *rt, check_resched_needed_t f, | ||
36 | list_cmp_t order); | ||
37 | |||
38 | void __add_ready(rt_domain_t* rt, struct task_struct *new); | ||
39 | void __add_release(rt_domain_t* rt, struct task_struct *task); | ||
40 | |||
41 | struct task_struct* __take_ready(rt_domain_t* rt); | ||
42 | struct task_struct* __peek_ready(rt_domain_t* rt); | ||
43 | |||
44 | void try_release_pending(rt_domain_t* rt); | ||
45 | void __release_pending(rt_domain_t* rt); | ||
46 | |||
47 | static inline void add_ready(rt_domain_t* rt, struct task_struct *new) | ||
48 | { | ||
49 | unsigned long flags; | ||
50 | /* first we need the write lock for rt_ready_queue */ | ||
51 | write_lock_irqsave(&rt->ready_lock, flags); | ||
52 | __add_ready(rt, new); | ||
53 | write_unlock_irqrestore(&rt->ready_lock, flags); | ||
54 | } | ||
55 | |||
56 | static inline struct task_struct* take_ready(rt_domain_t* rt) | ||
57 | { | ||
58 | unsigned long flags; | ||
59 | struct task_struct* ret; | ||
60 | /* first we need the write lock for rt_ready_queue */ | ||
61 | write_lock_irqsave(&rt->ready_lock, flags); | ||
62 | ret = __take_ready(rt); | ||
63 | write_unlock_irqrestore(&rt->ready_lock, flags); | ||
64 | return ret; | ||
65 | } | ||
66 | |||
67 | |||
68 | static inline void add_release(rt_domain_t* rt, struct task_struct *task) | ||
69 | { | ||
70 | unsigned long flags; | ||
71 | /* first we need the write lock for rt_ready_queue */ | ||
72 | spin_lock_irqsave(&rt->release_lock, flags); | ||
73 | __add_release(rt, task); | ||
74 | spin_unlock_irqrestore(&rt->release_lock, flags); | ||
75 | } | ||
76 | |||
77 | static inline int __jobs_pending(rt_domain_t* rt) | ||
78 | { | ||
79 | return !list_empty(&rt->ready_queue); | ||
80 | } | ||
81 | |||
82 | static inline int jobs_pending(rt_domain_t* rt) | ||
83 | { | ||
84 | unsigned long flags; | ||
85 | int ret; | ||
86 | /* first we need the write lock for rt_ready_queue */ | ||
87 | read_lock_irqsave(&rt->ready_lock, flags); | ||
88 | ret = __jobs_pending(rt); | ||
89 | read_unlock_irqrestore(&rt->ready_lock, flags); | ||
90 | return ret; | ||
91 | } | ||
92 | |||
93 | |||
94 | #endif | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h new file mode 100644 index 0000000000..9fb5b19b78 --- /dev/null +++ b/include/litmus/rt_param.h | |||
@@ -0,0 +1,135 @@ | |||
1 | /* | ||
2 | * Definition of the scheduler plugin interface. | ||
3 | * | ||
4 | */ | ||
5 | #ifndef _LINUX_RT_PARAM_H_ | ||
6 | #define _LINUX_RT_PARAM_H_ | ||
7 | |||
8 | /* Litmus time type. */ | ||
9 | typedef unsigned long long lt_t; | ||
10 | |||
11 | static inline int lt_after(lt_t a, lt_t b) | ||
12 | { | ||
13 | return ((long long) b) - ((long long) a) < 0; | ||
14 | } | ||
15 | #define lt_before(a, b) lt_after(b, a) | ||
16 | |||
17 | static inline int lt_after_eq(lt_t a, lt_t b) | ||
18 | { | ||
19 | return ((long long) a) - ((long long) b) >= 0; | ||
20 | } | ||
21 | #define lt_before_eq(a, b) lt_after_eq(b, a) | ||
22 | |||
23 | /* different types of clients */ | ||
24 | typedef enum { | ||
25 | RT_CLASS_HARD, | ||
26 | RT_CLASS_SOFT, | ||
27 | RT_CLASS_BEST_EFFORT | ||
28 | } task_class_t; | ||
29 | |||
30 | struct rt_task { | ||
31 | lt_t exec_cost; | ||
32 | lt_t period; | ||
33 | unsigned int cpu; | ||
34 | task_class_t cls; | ||
35 | }; | ||
36 | |||
37 | /* don't export internal data structures to user space (liblitmus) */ | ||
38 | #ifdef __KERNEL__ | ||
39 | |||
40 | struct rt_job { | ||
41 | /* Time instant the the job was or will be released. */ | ||
42 | lt_t release; | ||
43 | /* What is the current deadline? */ | ||
44 | lt_t deadline; | ||
45 | /* How much service has this job received so far? | ||
46 | */ | ||
47 | lt_t exec_time; | ||
48 | |||
49 | /* Which job is this. This is used to let user space | ||
50 | * specify which job to wait for, which is important if jobs | ||
51 | * overrun. If we just call sys_sleep_next_period() then we | ||
52 | * will unintentionally miss jobs after an overrun. | ||
53 | * | ||
54 | * Increase this sequence number when a job is released. | ||
55 | */ | ||
56 | unsigned int job_no; | ||
57 | |||
58 | /* when did this job start executing? */ | ||
59 | lt_t exec_start; | ||
60 | }; | ||
61 | |||
62 | |||
63 | /* RT task parameters for scheduling extensions | ||
64 | * These parameters are inherited during clone and therefore must | ||
65 | * be explicitly set up before the task set is launched. | ||
66 | */ | ||
67 | struct rt_param { | ||
68 | /* is the task sleeping? */ | ||
69 | unsigned int flags:8; | ||
70 | |||
71 | /* Did this task register any SRP controlled resource accesses? | ||
72 | * This, of course, should only ever be true under partitioning. | ||
73 | * However, this limitation is not currently enforced. | ||
74 | */ | ||
75 | unsigned int subject_to_srp:1; | ||
76 | |||
77 | /* user controlled parameters */ | ||
78 | struct rt_task task_params; | ||
79 | |||
80 | /* timing parameters */ | ||
81 | struct rt_job job_params; | ||
82 | |||
83 | /* task representing the current "inherited" task | ||
84 | * priority, assigned by inherit_priority and | ||
85 | * return priority in the scheduler plugins. | ||
86 | * could point to self if PI does not result in | ||
87 | * an increased task priority. | ||
88 | */ | ||
89 | struct task_struct* inh_task; | ||
90 | |||
91 | /* Don't just dereference this pointer in kernel space! | ||
92 | * It might very well point to junk or nothing at all. | ||
93 | * NULL indicates that the task has not requested any non-preemptable | ||
94 | * section support. | ||
95 | * Not inherited upon fork. | ||
96 | */ | ||
97 | short* np_flag; | ||
98 | |||
99 | /* For the FMLP under PSN-EDF, it is required to make the task | ||
100 | * non-preemptive from kernel space. In order not to interfere with | ||
101 | * user space, this counter indicates the kernel space np setting. | ||
102 | * kernel_np > 0 => task is non-preemptive | ||
103 | */ | ||
104 | unsigned int kernel_np; | ||
105 | |||
106 | /* This field can be used by plugins to store where the task | ||
107 | * is currently scheduled. It is the responsibility of the | ||
108 | * plugin to avoid race conditions. | ||
109 | * | ||
110 | * Used by GSN-EDF. | ||
111 | */ | ||
112 | volatile int scheduled_on; | ||
113 | |||
114 | /* This field can be used by plugins to store where the task | ||
115 | * is currently linked. It is the responsibility of the plugin | ||
116 | * to avoid race conditions. | ||
117 | * | ||
118 | * Used by GSN-EDF. | ||
119 | */ | ||
120 | volatile int linked_on; | ||
121 | |||
122 | /* Fields saved before BE->RT transition. | ||
123 | */ | ||
124 | int old_policy; | ||
125 | int old_prio; | ||
126 | }; | ||
127 | |||
128 | /* Possible RT flags */ | ||
129 | #define RT_F_RUNNING 0x00000000 | ||
130 | #define RT_F_SLEEP 0x00000001 | ||
131 | #define RT_F_EXIT_SEM 0x00000008 | ||
132 | |||
133 | #endif | ||
134 | |||
135 | #endif | ||
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h new file mode 100644 index 0000000000..421c54f517 --- /dev/null +++ b/include/litmus/sched_plugin.h | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * Definition of the scheduler plugin interface. | ||
3 | * | ||
4 | */ | ||
5 | #ifndef _LINUX_SCHED_PLUGIN_H_ | ||
6 | #define _LINUX_SCHED_PLUGIN_H_ | ||
7 | |||
8 | #include <linux/sched.h> | ||
9 | |||
10 | /* struct for semaphore with priority inheritance */ | ||
11 | struct pi_semaphore { | ||
12 | atomic_t count; | ||
13 | int sleepers; | ||
14 | wait_queue_head_t wait; | ||
15 | union { | ||
16 | /* highest-prio holder/waiter */ | ||
17 | struct task_struct *task; | ||
18 | struct task_struct* cpu_task[NR_CPUS]; | ||
19 | } hp; | ||
20 | /* current lock holder */ | ||
21 | struct task_struct *holder; | ||
22 | }; | ||
23 | |||
24 | |||
25 | /********************* scheduler invocation ******************/ | ||
26 | |||
27 | /* Plugin-specific realtime tick handler */ | ||
28 | typedef void (*scheduler_tick_t) (struct task_struct *cur); | ||
29 | /* Novell make sched decision function */ | ||
30 | typedef struct task_struct* (*schedule_t)(struct task_struct * prev); | ||
31 | /* Clean up after the task switch has occured. | ||
32 | * This function is called after every (even non-rt) task switch. | ||
33 | */ | ||
34 | typedef void (*finish_switch_t)(struct task_struct *prev); | ||
35 | |||
36 | |||
37 | /********************* task state changes ********************/ | ||
38 | |||
39 | /* Called to setup a new real-time task. | ||
40 | * Release the first job, enqueue, etc. | ||
41 | * Task may already be running. | ||
42 | */ | ||
43 | typedef void (*task_new_t) (struct task_struct *task, | ||
44 | int on_rq, | ||
45 | int running); | ||
46 | |||
47 | /* Called to re-introduce a task after blocking. | ||
48 | * Can potentially be called multiple times. | ||
49 | */ | ||
50 | typedef void (*task_wake_up_t) (struct task_struct *task); | ||
51 | /* called to notify the plugin of a blocking real-time task | ||
52 | * it will only be called for real-time tasks and before schedule is called */ | ||
53 | typedef void (*task_block_t) (struct task_struct *task); | ||
54 | /* Called when a real-time task exits or changes to a different scheduling | ||
55 | * class. | ||
56 | * Free any allocated resources | ||
57 | */ | ||
58 | typedef void (*task_exit_t) (struct task_struct *); | ||
59 | |||
60 | /* Called when the new_owner is released from the wait queue | ||
61 | * it should now inherit the priority from sem, _before_ it gets readded | ||
62 | * to any queue | ||
63 | */ | ||
64 | typedef long (*inherit_priority_t) (struct pi_semaphore *sem, | ||
65 | struct task_struct *new_owner); | ||
66 | |||
67 | /* Called when the current task releases a semahpore where it might have | ||
68 | * inherited a piority from | ||
69 | */ | ||
70 | typedef long (*return_priority_t) (struct pi_semaphore *sem); | ||
71 | |||
72 | /* Called when a task tries to acquire a semaphore and fails. Check if its | ||
73 | * priority is higher than that of the current holder. | ||
74 | */ | ||
75 | typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t); | ||
76 | |||
77 | |||
78 | /********************* sys call backends ********************/ | ||
79 | /* This function causes the caller to sleep until the next release */ | ||
80 | typedef long (*complete_job_t) (void); | ||
81 | |||
82 | typedef long (*admit_task_t)(struct task_struct* tsk); | ||
83 | |||
84 | struct sched_plugin { | ||
85 | struct list_head list; | ||
86 | /* basic info */ | ||
87 | char *plugin_name; | ||
88 | |||
89 | /* scheduler invocation */ | ||
90 | scheduler_tick_t tick; | ||
91 | schedule_t schedule; | ||
92 | finish_switch_t finish_switch; | ||
93 | |||
94 | /* syscall backend */ | ||
95 | complete_job_t complete_job; | ||
96 | |||
97 | /* task state changes */ | ||
98 | admit_task_t admit_task; | ||
99 | |||
100 | task_new_t task_new; | ||
101 | task_wake_up_t task_wake_up; | ||
102 | task_block_t task_block; | ||
103 | task_exit_t task_exit; | ||
104 | |||
105 | /* priority inheritance */ | ||
106 | inherit_priority_t inherit_priority; | ||
107 | return_priority_t return_priority; | ||
108 | pi_block_t pi_block; | ||
109 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); | ||
110 | |||
111 | |||
112 | extern struct sched_plugin *litmus; | ||
113 | |||
114 | int register_sched_plugin(struct sched_plugin* plugin); | ||
115 | struct sched_plugin* find_sched_plugin(const char* name); | ||
116 | int print_sched_plugins(char* buf, int max); | ||
117 | |||
118 | #endif | ||
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h new file mode 100644 index 0000000000..60dcbfb0ae --- /dev/null +++ b/include/litmus/sched_trace.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* sched_trace.h -- record scheduler events to a byte stream for offline analysis. | ||
2 | */ | ||
3 | #ifndef _LINUX_SCHED_TRACE_H_ | ||
4 | #define _LINUX_SCHED_TRACE_H_ | ||
5 | |||
6 | #include <linux/sched.h> | ||
7 | |||
8 | /* dummies, need to be re-implemented */ | ||
9 | |||
10 | /* used in sched.c */ | ||
11 | #define sched_trace_task_arrival(t) | ||
12 | #define sched_trace_task_departure(t) | ||
13 | #define sched_trace_task_preemption(t, by) | ||
14 | #define sched_trace_task_scheduled(t) | ||
15 | |||
16 | /* used in scheduler plugins */ | ||
17 | #define sched_trace_job_release(t) | ||
18 | #define sched_trace_job_completion(t) | ||
19 | |||
20 | |||
21 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
22 | void sched_trace_log_message(const char* fmt, ...); | ||
23 | |||
24 | #else | ||
25 | |||
26 | #define sched_trace_log_message(fmt, ...) | ||
27 | |||
28 | #endif | ||
29 | |||
30 | |||
31 | #endif | ||
diff --git a/include/litmus/trace.h b/include/litmus/trace.h new file mode 100644 index 0000000000..04510237ec --- /dev/null +++ b/include/litmus/trace.h | |||
@@ -0,0 +1,74 @@ | |||
1 | |||
2 | #ifndef _SYS_TRACE_H_ | ||
3 | #define _SYS_TRACE_H_ | ||
4 | |||
5 | #include <litmus/feather_trace.h> | ||
6 | #include <litmus/feather_buffer.h> | ||
7 | |||
8 | |||
9 | /*********************** TIMESTAMPS ************************/ | ||
10 | |||
11 | struct timestamp { | ||
12 | unsigned long event; | ||
13 | unsigned long long timestamp; | ||
14 | unsigned int seq_no; | ||
15 | int cpu; | ||
16 | }; | ||
17 | |||
18 | |||
19 | /* buffer holding time stamps - will be provided by driver */ | ||
20 | extern struct ft_buffer* trace_ts_buf; | ||
21 | |||
22 | /* save_timestamp: stores current time as struct timestamp | ||
23 | * in trace_ts_buf | ||
24 | */ | ||
25 | asmlinkage void save_timestamp(unsigned long event); | ||
26 | |||
27 | #define TIMESTAMP(id) ft_event0(id, save_timestamp) | ||
28 | |||
29 | /* Convention for timestamps | ||
30 | * ========================= | ||
31 | * | ||
32 | * In order to process the trace files with a common tool, we use the following | ||
33 | * convention to measure execution times: The end time id of a code segment is | ||
34 | * always the next number after the start time event id. | ||
35 | */ | ||
36 | |||
37 | #define TS_SCHED_START TIMESTAMP(100) | ||
38 | #define TS_SCHED_END TIMESTAMP(101) | ||
39 | #define TS_CXS_START TIMESTAMP(102) | ||
40 | #define TS_CXS_END TIMESTAMP(103) | ||
41 | |||
42 | #define TS_TICK_START TIMESTAMP(110) | ||
43 | #define TS_TICK_END TIMESTAMP(111) | ||
44 | |||
45 | #define TS_PLUGIN_SCHED_START TIMESTAMP(120) | ||
46 | #define TS_PLUGIN_SCHED_END TIMESTAMP(121) | ||
47 | |||
48 | #define TS_PLUGIN_TICK_START TIMESTAMP(130) | ||
49 | #define TS_PLUGIN_TICK_END TIMESTAMP(131) | ||
50 | |||
51 | #define TS_ENTER_NP_START TIMESTAMP(140) | ||
52 | #define TS_ENTER_NP_END TIMESTAMP(141) | ||
53 | |||
54 | #define TS_EXIT_NP_START TIMESTAMP(150) | ||
55 | #define TS_EXIT_NP_END TIMESTAMP(151) | ||
56 | |||
57 | #define TS_SRP_UP_START TIMESTAMP(160) | ||
58 | #define TS_SRP_UP_END TIMESTAMP(161) | ||
59 | #define TS_SRP_DOWN_START TIMESTAMP(162) | ||
60 | #define TS_SRP_DOWN_END TIMESTAMP(163) | ||
61 | |||
62 | #define TS_PI_UP_START TIMESTAMP(170) | ||
63 | #define TS_PI_UP_END TIMESTAMP(171) | ||
64 | #define TS_PI_DOWN_START TIMESTAMP(172) | ||
65 | #define TS_PI_DOWN_END TIMESTAMP(173) | ||
66 | |||
67 | #define TS_FIFO_UP_START TIMESTAMP(180) | ||
68 | #define TS_FIFO_UP_END TIMESTAMP(181) | ||
69 | #define TS_FIFO_DOWN_START TIMESTAMP(182) | ||
70 | #define TS_FIFO_DOWN_END TIMESTAMP(183) | ||
71 | |||
72 | |||
73 | |||
74 | #endif /* !_SYS_TRACE_H_ */ | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 549c0558ba..bc313b74a1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -52,6 +52,8 @@ | |||
52 | 52 | ||
53 | extern void sem_exit (void); | 53 | extern void sem_exit (void); |
54 | 54 | ||
55 | extern void exit_od_table(struct task_struct* t); | ||
56 | |||
55 | static void exit_mm(struct task_struct * tsk); | 57 | static void exit_mm(struct task_struct * tsk); |
56 | 58 | ||
57 | static void __unhash_process(struct task_struct *p) | 59 | static void __unhash_process(struct task_struct *p) |
@@ -987,6 +989,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
987 | if (unlikely(tsk->audit_context)) | 989 | if (unlikely(tsk->audit_context)) |
988 | audit_free(tsk); | 990 | audit_free(tsk); |
989 | 991 | ||
992 | exit_od_table(tsk); | ||
993 | |||
990 | tsk->exit_code = code; | 994 | tsk->exit_code = code; |
991 | taskstats_exit(tsk, group_dead); | 995 | taskstats_exit(tsk, group_dead); |
992 | 996 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 8dd8ff2810..9e42d3a207 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -59,6 +59,9 @@ | |||
59 | #include <asm/cacheflush.h> | 59 | #include <asm/cacheflush.h> |
60 | #include <asm/tlbflush.h> | 60 | #include <asm/tlbflush.h> |
61 | 61 | ||
62 | #include <litmus/litmus.h> | ||
63 | #include <litmus/sched_plugin.h> | ||
64 | |||
62 | /* | 65 | /* |
63 | * Protected counters by write_lock_irq(&tasklist_lock) | 66 | * Protected counters by write_lock_irq(&tasklist_lock) |
64 | */ | 67 | */ |
@@ -121,6 +124,8 @@ void __put_task_struct(struct task_struct *tsk) | |||
121 | WARN_ON(atomic_read(&tsk->usage)); | 124 | WARN_ON(atomic_read(&tsk->usage)); |
122 | WARN_ON(tsk == current); | 125 | WARN_ON(tsk == current); |
123 | 126 | ||
127 | exit_litmus(tsk); | ||
128 | |||
124 | security_task_free(tsk); | 129 | security_task_free(tsk); |
125 | free_uid(tsk->user); | 130 | free_uid(tsk->user); |
126 | put_group_info(tsk->group_info); | 131 | put_group_info(tsk->group_info); |
diff --git a/kernel/sched.c b/kernel/sched.c index e76b11ca6d..4890a12786 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -324,6 +324,8 @@ struct rq { | |||
324 | 324 | ||
325 | atomic_t nr_iowait; | 325 | atomic_t nr_iowait; |
326 | 326 | ||
327 | struct task_struct* litmus_next; | ||
328 | |||
327 | #ifdef CONFIG_SMP | 329 | #ifdef CONFIG_SMP |
328 | struct sched_domain *sd; | 330 | struct sched_domain *sd; |
329 | 331 | ||
@@ -875,11 +877,12 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | |||
875 | #include "sched_idletask.c" | 877 | #include "sched_idletask.c" |
876 | #include "sched_fair.c" | 878 | #include "sched_fair.c" |
877 | #include "sched_rt.c" | 879 | #include "sched_rt.c" |
880 | #include "../litmus/sched_litmus.c" | ||
878 | #ifdef CONFIG_SCHED_DEBUG | 881 | #ifdef CONFIG_SCHED_DEBUG |
879 | # include "sched_debug.c" | 882 | # include "sched_debug.c" |
880 | #endif | 883 | #endif |
881 | 884 | ||
882 | #define sched_class_highest (&rt_sched_class) | 885 | #define sched_class_highest (&litmus_sched_class) |
883 | 886 | ||
884 | /* | 887 | /* |
885 | * Update delta_exec, delta_fair fields for rq. | 888 | * Update delta_exec, delta_fair fields for rq. |
@@ -1529,7 +1532,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1529 | this_cpu = smp_processor_id(); | 1532 | this_cpu = smp_processor_id(); |
1530 | 1533 | ||
1531 | #ifdef CONFIG_SMP | 1534 | #ifdef CONFIG_SMP |
1532 | if (unlikely(task_running(rq, p))) | 1535 | if (unlikely(task_running(rq, p) || is_realtime(p))) |
1533 | goto out_activate; | 1536 | goto out_activate; |
1534 | 1537 | ||
1535 | new_cpu = cpu; | 1538 | new_cpu = cpu; |
@@ -1890,6 +1893,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
1890 | */ | 1893 | */ |
1891 | prev_state = prev->state; | 1894 | prev_state = prev->state; |
1892 | finish_arch_switch(prev); | 1895 | finish_arch_switch(prev); |
1896 | litmus->finish_switch(prev); | ||
1893 | finish_lock_switch(rq, prev); | 1897 | finish_lock_switch(rq, prev); |
1894 | fire_sched_in_preempt_notifiers(current); | 1898 | fire_sched_in_preempt_notifiers(current); |
1895 | if (mm) | 1899 | if (mm) |
@@ -3491,6 +3495,7 @@ void scheduler_tick(void) | |||
3491 | update_cpu_load(rq); | 3495 | update_cpu_load(rq); |
3492 | if (curr != rq->idle) /* FIXME: needed? */ | 3496 | if (curr != rq->idle) /* FIXME: needed? */ |
3493 | curr->sched_class->task_tick(rq, curr); | 3497 | curr->sched_class->task_tick(rq, curr); |
3498 | litmus_tick(rq, curr); | ||
3494 | spin_unlock(&rq->lock); | 3499 | spin_unlock(&rq->lock); |
3495 | 3500 | ||
3496 | #ifdef CONFIG_SMP | 3501 | #ifdef CONFIG_SMP |
@@ -3641,6 +3646,10 @@ need_resched_nonpreemptible: | |||
3641 | */ | 3646 | */ |
3642 | local_irq_disable(); | 3647 | local_irq_disable(); |
3643 | __update_rq_clock(rq); | 3648 | __update_rq_clock(rq); |
3649 | /* do litmus scheduling outside of rq lock, so that we | ||
3650 | * can do proper migrations for global schedulers | ||
3651 | */ | ||
3652 | litmus_schedule(rq, prev); | ||
3644 | spin_lock(&rq->lock); | 3653 | spin_lock(&rq->lock); |
3645 | clear_tsk_need_resched(prev); | 3654 | clear_tsk_need_resched(prev); |
3646 | 3655 | ||
@@ -4236,6 +4245,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
4236 | case SCHED_RR: | 4245 | case SCHED_RR: |
4237 | p->sched_class = &rt_sched_class; | 4246 | p->sched_class = &rt_sched_class; |
4238 | break; | 4247 | break; |
4248 | case SCHED_LITMUS: | ||
4249 | p->sched_class = &litmus_sched_class; | ||
4250 | break; | ||
4239 | } | 4251 | } |
4240 | 4252 | ||
4241 | p->rt_priority = prio; | 4253 | p->rt_priority = prio; |
@@ -4268,7 +4280,7 @@ recheck: | |||
4268 | policy = oldpolicy = p->policy; | 4280 | policy = oldpolicy = p->policy; |
4269 | else if (policy != SCHED_FIFO && policy != SCHED_RR && | 4281 | else if (policy != SCHED_FIFO && policy != SCHED_RR && |
4270 | policy != SCHED_NORMAL && policy != SCHED_BATCH && | 4282 | policy != SCHED_NORMAL && policy != SCHED_BATCH && |
4271 | policy != SCHED_IDLE) | 4283 | policy != SCHED_IDLE && policy != SCHED_LITMUS) |
4272 | return -EINVAL; | 4284 | return -EINVAL; |
4273 | /* | 4285 | /* |
4274 | * Valid priorities for SCHED_FIFO and SCHED_RR are | 4286 | * Valid priorities for SCHED_FIFO and SCHED_RR are |
@@ -4282,6 +4294,9 @@ recheck: | |||
4282 | if (rt_policy(policy) != (param->sched_priority != 0)) | 4294 | if (rt_policy(policy) != (param->sched_priority != 0)) |
4283 | return -EINVAL; | 4295 | return -EINVAL; |
4284 | 4296 | ||
4297 | if (policy == SCHED_LITMUS && policy == p->policy) | ||
4298 | return -EINVAL; | ||
4299 | |||
4285 | /* | 4300 | /* |
4286 | * Allow unprivileged RT tasks to decrease priority: | 4301 | * Allow unprivileged RT tasks to decrease priority: |
4287 | */ | 4302 | */ |
@@ -4316,6 +4331,12 @@ recheck: | |||
4316 | return -EPERM; | 4331 | return -EPERM; |
4317 | } | 4332 | } |
4318 | 4333 | ||
4334 | if (policy == SCHED_LITMUS) { | ||
4335 | retval = litmus_admit_task(p); | ||
4336 | if (retval) | ||
4337 | return retval; | ||
4338 | } | ||
4339 | |||
4319 | retval = security_task_setscheduler(p, policy, param); | 4340 | retval = security_task_setscheduler(p, policy, param); |
4320 | if (retval) | 4341 | if (retval) |
4321 | return retval; | 4342 | return retval; |
@@ -4345,9 +4366,15 @@ recheck: | |||
4345 | p->sched_class->put_prev_task(rq, p); | 4366 | p->sched_class->put_prev_task(rq, p); |
4346 | } | 4367 | } |
4347 | 4368 | ||
4369 | if (p->policy == SCHED_LITMUS) | ||
4370 | litmus_exit_task(p); | ||
4371 | |||
4348 | oldprio = p->prio; | 4372 | oldprio = p->prio; |
4349 | __setscheduler(rq, p, policy, param->sched_priority); | 4373 | __setscheduler(rq, p, policy, param->sched_priority); |
4350 | 4374 | ||
4375 | if (policy == SCHED_LITMUS) | ||
4376 | litmus->task_new(p, on_rq, running); | ||
4377 | |||
4351 | if (on_rq) { | 4378 | if (on_rq) { |
4352 | if (running) | 4379 | if (running) |
4353 | p->sched_class->set_curr_task(rq); | 4380 | p->sched_class->set_curr_task(rq); |
@@ -4364,6 +4391,7 @@ recheck: | |||
4364 | check_preempt_curr(rq, p); | 4391 | check_preempt_curr(rq, p); |
4365 | } | 4392 | } |
4366 | } | 4393 | } |
4394 | |||
4367 | __task_rq_unlock(rq); | 4395 | __task_rq_unlock(rq); |
4368 | spin_unlock_irqrestore(&p->pi_lock, flags); | 4396 | spin_unlock_irqrestore(&p->pi_lock, flags); |
4369 | 4397 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index da7c061e72..de30496263 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -845,7 +845,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
845 | struct sched_entity *se = &curr->se, *pse = &p->se; | 845 | struct sched_entity *se = &curr->se, *pse = &p->se; |
846 | unsigned long gran; | 846 | unsigned long gran; |
847 | 847 | ||
848 | if (unlikely(rt_prio(p->prio))) { | 848 | if (unlikely(rt_prio(p->prio) || p->policy == SCHED_LITMUS)) { |
849 | update_rq_clock(rq); | 849 | update_rq_clock(rq); |
850 | update_curr(cfs_rq); | 850 | update_curr(cfs_rq); |
851 | resched_task(curr); | 851 | resched_task(curr); |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 9ba3daa034..c7c938cee2 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -70,7 +70,7 @@ yield_task_rt(struct rq *rq) | |||
70 | */ | 70 | */ |
71 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 71 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) |
72 | { | 72 | { |
73 | if (p->prio < rq->curr->prio) | 73 | if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) |
74 | resched_task(rq->curr); | 74 | resched_task(rq->curr); |
75 | } | 75 | } |
76 | 76 | ||
diff --git a/litmus/Kconfig b/litmus/Kconfig new file mode 100644 index 0000000000..e6c5469d70 --- /dev/null +++ b/litmus/Kconfig | |||
@@ -0,0 +1,24 @@ | |||
1 | menu "LITMUS^RT" | ||
2 | |||
3 | config SCHED_TASK_TRACE | ||
4 | bool "Trace real-time tasks" | ||
5 | default y | ||
6 | help | ||
7 | Include support for the sched_trace_XXX() tracing functions. This | ||
8 | allows the collection of real-time task events such as job | ||
9 | completions, job releases, early completions, etc. This results in a | ||
10 | small overhead in the scheduling code. Disable if the overhead is not | ||
11 | acceptable (e.g., benchmarking). | ||
12 | |||
13 | config SCHED_DEBUG_TRACE | ||
14 | bool "TRACE() debugging" | ||
15 | default y | ||
16 | help | ||
17 | Include support for sched_trace_log_messageg(), which is used to | ||
18 | implement TRACE(). If disabled, no TRACE() messages will be included | ||
19 | in the kernel, and no overheads due to debugging statements will be | ||
20 | incurred by the scheduler. Disable if the overhead is not acceptable | ||
21 | (e.g. benchmarking). | ||
22 | |||
23 | |||
24 | endmenu | ||
diff --git a/litmus/Makefile b/litmus/Makefile new file mode 100644 index 0000000000..4ad854f117 --- /dev/null +++ b/litmus/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | # | ||
2 | # Makefile for LITMUS^RT | ||
3 | # | ||
4 | |||
5 | obj-y = sched_plugin.o litmus.o sched_trace.o \ | ||
6 | edf_common.o \ | ||
7 | sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \ | ||
8 | trace.o ft_event.o rt_domain.o fdso.o | ||
diff --git a/litmus/edf_common.c b/litmus/edf_common.c new file mode 100644 index 0000000000..3d9dca852d --- /dev/null +++ b/litmus/edf_common.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * kernel/edf_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | |||
16 | #include <litmus/edf_common.h> | ||
17 | |||
18 | /* edf_higher_prio - returns true if first has a higher EDF priority | ||
19 | * than second. Deadline ties are broken by PID. | ||
20 | * | ||
21 | * first first must not be NULL and a real-time task. | ||
22 | * second may be NULL or a non-rt task. | ||
23 | */ | ||
24 | int edf_higher_prio(struct task_struct* first, | ||
25 | struct task_struct* second) | ||
26 | { | ||
27 | struct task_struct *first_task = first; | ||
28 | struct task_struct *second_task = second; | ||
29 | |||
30 | /* Check for inherited priorities. Change task | ||
31 | * used for comparison in such a case. | ||
32 | */ | ||
33 | if (first && first->rt_param.inh_task) | ||
34 | first_task = first->rt_param.inh_task; | ||
35 | if (second && second->rt_param.inh_task) | ||
36 | second_task = second->rt_param.inh_task; | ||
37 | |||
38 | return | ||
39 | /* does the second task exist and is it a real-time task? If | ||
40 | * not, the first task (which is a RT task) has higher | ||
41 | * priority. | ||
42 | */ | ||
43 | !second_task || !is_realtime(second_task) || | ||
44 | |||
45 | /* is the deadline of the first task earlier? | ||
46 | * Then it has higher priority. | ||
47 | */ | ||
48 | earlier_deadline(first_task, second_task) || | ||
49 | |||
50 | /* Do we have a deadline tie? | ||
51 | * Then break by PID. | ||
52 | */ | ||
53 | (get_deadline(first_task) == get_deadline(second_task) && | ||
54 | (first_task->pid < second_task->pid || | ||
55 | |||
56 | /* If the PIDs are the same then the task with the inherited | ||
57 | * priority wins. | ||
58 | */ | ||
59 | (first_task->pid == second_task->pid && | ||
60 | !second->rt_param.inh_task))); | ||
61 | } | ||
62 | |||
63 | int edf_ready_order(struct list_head* a, struct list_head* b) | ||
64 | { | ||
65 | return edf_higher_prio( | ||
66 | list_entry(a, struct task_struct, rt_list), | ||
67 | list_entry(b, struct task_struct, rt_list)); | ||
68 | } | ||
69 | |||
70 | void edf_release_at(struct task_struct *t, lt_t start) | ||
71 | { | ||
72 | t->rt_param.job_params.deadline = start; | ||
73 | edf_prepare_for_next_period(t); | ||
74 | set_rt_flags(t, RT_F_RUNNING); | ||
75 | } | ||
76 | |||
77 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched) | ||
78 | { | ||
79 | rt_domain_init(rt, resched, edf_ready_order); | ||
80 | } | ||
81 | |||
82 | void edf_prepare_for_next_period(struct task_struct *t) | ||
83 | { | ||
84 | BUG_ON(!t); | ||
85 | /* prepare next release */ | ||
86 | t->rt_param.job_params.release = t->rt_param.job_params.deadline; | ||
87 | t->rt_param.job_params.deadline += get_rt_period(t); | ||
88 | t->rt_param.job_params.exec_time = 0; | ||
89 | /* update job sequence number */ | ||
90 | t->rt_param.job_params.job_no++; | ||
91 | |||
92 | /* don't confuse Linux */ | ||
93 | t->time_slice = 1; | ||
94 | } | ||
95 | |||
96 | /* need_to_preempt - check whether the task t needs to be preempted | ||
97 | * call only with irqs disabled and with ready_lock acquired | ||
98 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
99 | */ | ||
100 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
101 | { | ||
102 | /* we need the read lock for edf_ready_queue */ | ||
103 | /* no need to preempt if there is nothing pending */ | ||
104 | if (!ready_jobs_pending(rt)) | ||
105 | return 0; | ||
106 | /* we need to reschedule if t doesn't exist */ | ||
107 | if (!t) | ||
108 | return 1; | ||
109 | |||
110 | /* NOTE: We cannot check for non-preemptibility since we | ||
111 | * don't know what address space we're currently in. | ||
112 | */ | ||
113 | |||
114 | /* make sure to get non-rt stuff out of the way */ | ||
115 | return !is_realtime(t) || edf_higher_prio(next_ready(rt), t); | ||
116 | } | ||
117 | |||
118 | |||
119 | /* | ||
120 | * Deactivate current task until the beginning of the next period. | ||
121 | */ | ||
122 | long edf_complete_job(void) | ||
123 | { | ||
124 | /* Mark that we do not excute anymore */ | ||
125 | set_rt_flags(current, RT_F_SLEEP); | ||
126 | /* call schedule, this will return when a new job arrives | ||
127 | * it also takes care of preparing for the next release | ||
128 | */ | ||
129 | schedule(); | ||
130 | return 0; | ||
131 | } | ||
132 | |||
diff --git a/litmus/fdso.c b/litmus/fdso.c new file mode 100644 index 0000000000..ca9557d877 --- /dev/null +++ b/litmus/fdso.c | |||
@@ -0,0 +1,279 @@ | |||
1 | /* fdso.c - file descriptor attached shared objects | ||
2 | * | ||
3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
4 | * | ||
5 | * Notes: | ||
6 | * - objects descriptor (OD) tables are not cloned during a fork. | ||
7 | * - objects are created on-demand, and freed after the last reference | ||
8 | * is dropped. | ||
9 | * - for now, object types are hard coded. | ||
10 | * - As long as we have live objects, we keep a reference to the inode. | ||
11 | */ | ||
12 | |||
13 | #include <linux/errno.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mutex.h> | ||
16 | #include <linux/file.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | |||
19 | #include <litmus/fdso.h> | ||
20 | |||
21 | extern struct fdso_ops pi_sem_ops; | ||
22 | extern struct fdso_ops srp_sem_ops; | ||
23 | |||
24 | static const struct fdso_ops* fdso_ops[] = { | ||
25 | &pi_sem_ops, | ||
26 | &srp_sem_ops, | ||
27 | }; | ||
28 | |||
29 | static void* fdso_create(obj_type_t type) | ||
30 | { | ||
31 | return fdso_ops[type]->create(); | ||
32 | } | ||
33 | |||
34 | static void fdso_destroy(obj_type_t type, void* obj) | ||
35 | { | ||
36 | fdso_ops[type]->destroy(obj); | ||
37 | } | ||
38 | |||
39 | static int fdso_open(struct od_table_entry* entry, void* __user config) | ||
40 | { | ||
41 | if (fdso_ops[entry->obj->type]->open) | ||
42 | return fdso_ops[entry->obj->type]->open(entry, config); | ||
43 | else | ||
44 | return 0; | ||
45 | } | ||
46 | |||
47 | static int fdso_close(struct od_table_entry* entry) | ||
48 | { | ||
49 | if (fdso_ops[entry->obj->type]->close) | ||
50 | return fdso_ops[entry->obj->type]->close(entry); | ||
51 | else | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | /* inode must be locked already */ | ||
56 | static struct inode_obj_id* alloc_inode_obj(struct inode* inode, | ||
57 | obj_type_t type, | ||
58 | unsigned int id) | ||
59 | { | ||
60 | struct inode_obj_id* obj; | ||
61 | void* raw_obj; | ||
62 | |||
63 | raw_obj = fdso_create(type); | ||
64 | if (!raw_obj) | ||
65 | return NULL; | ||
66 | |||
67 | obj = kmalloc(sizeof(struct inode_obj_id), GFP_KERNEL); | ||
68 | if (!obj) | ||
69 | return NULL; | ||
70 | INIT_LIST_HEAD(&obj->list); | ||
71 | atomic_set(&obj->count, 1); | ||
72 | obj->type = type; | ||
73 | obj->id = id; | ||
74 | obj->obj = raw_obj; | ||
75 | obj->inode = inode; | ||
76 | |||
77 | list_add(&obj->list, &inode->i_obj_list); | ||
78 | atomic_inc(&inode->i_count); | ||
79 | |||
80 | printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); | ||
81 | return obj; | ||
82 | } | ||
83 | |||
84 | /* inode must be locked already */ | ||
85 | static struct inode_obj_id* get_inode_obj(struct inode* inode, | ||
86 | obj_type_t type, | ||
87 | unsigned int id) | ||
88 | { | ||
89 | struct list_head* pos; | ||
90 | struct inode_obj_id* obj = NULL; | ||
91 | |||
92 | list_for_each(pos, &inode->i_obj_list) { | ||
93 | obj = list_entry(pos, struct inode_obj_id, list); | ||
94 | if (obj->id == id && obj->type == type) { | ||
95 | atomic_inc(&obj->count); | ||
96 | return obj; | ||
97 | } | ||
98 | } | ||
99 | printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); | ||
100 | return NULL; | ||
101 | } | ||
102 | |||
103 | |||
104 | static void put_inode_obj(struct inode_obj_id* obj) | ||
105 | { | ||
106 | struct inode* inode; | ||
107 | int let_go = 0; | ||
108 | |||
109 | inode = obj->inode; | ||
110 | if (atomic_dec_and_test(&obj->count)) { | ||
111 | |||
112 | mutex_lock(&inode->i_obj_mutex); | ||
113 | /* no new references can be obtained */ | ||
114 | if (!atomic_read(&obj->count)) { | ||
115 | list_del(&obj->list); | ||
116 | fdso_destroy(obj->type, obj->obj); | ||
117 | kfree(obj); | ||
118 | let_go = 1; | ||
119 | } | ||
120 | mutex_unlock(&inode->i_obj_mutex); | ||
121 | if (let_go) | ||
122 | iput(inode); | ||
123 | } | ||
124 | } | ||
125 | |||
126 | static struct od_table_entry* get_od_entry(struct task_struct* t) | ||
127 | { | ||
128 | struct od_table_entry* table; | ||
129 | int i; | ||
130 | |||
131 | |||
132 | table = t->od_table; | ||
133 | if (!table) { | ||
134 | table = (struct od_table_entry*) | ||
135 | kzalloc(sizeof(struct od_table_entry) * | ||
136 | MAX_OBJECT_DESCRIPTORS, GFP_KERNEL); | ||
137 | t->od_table = table; | ||
138 | } | ||
139 | |||
140 | for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) | ||
141 | if (!table[i].used) { | ||
142 | table[i].used = 1; | ||
143 | return table + i; | ||
144 | } | ||
145 | return NULL; | ||
146 | } | ||
147 | |||
148 | static int put_od_entry(struct od_table_entry* od) | ||
149 | { | ||
150 | put_inode_obj(od->obj); | ||
151 | od->used = 0; | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | void exit_od_table(struct task_struct* t) | ||
156 | { | ||
157 | int i; | ||
158 | |||
159 | if (t->od_table) { | ||
160 | for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) | ||
161 | if (t->od_table[i].used) | ||
162 | put_od_entry(t->od_table + i); | ||
163 | kfree(t->od_table); | ||
164 | t->od_table = NULL; | ||
165 | } | ||
166 | } | ||
167 | |||
168 | static int do_sys_od_open(struct file* file, obj_type_t type, int id, | ||
169 | void* __user config) | ||
170 | { | ||
171 | int idx = 0, err; | ||
172 | struct inode* inode; | ||
173 | struct inode_obj_id* obj = NULL; | ||
174 | struct od_table_entry* entry; | ||
175 | |||
176 | inode = file->f_dentry->d_inode; | ||
177 | |||
178 | entry = get_od_entry(current); | ||
179 | if (!entry) | ||
180 | return -ENOMEM; | ||
181 | |||
182 | mutex_lock(&inode->i_obj_mutex); | ||
183 | obj = get_inode_obj(inode, type, id); | ||
184 | if (!obj) | ||
185 | obj = alloc_inode_obj(inode, type, id); | ||
186 | if (!obj) { | ||
187 | idx = -ENOMEM; | ||
188 | entry->used = 0; | ||
189 | } else { | ||
190 | entry->obj = obj; | ||
191 | entry->extra = NULL; | ||
192 | idx = entry - current->od_table; | ||
193 | } | ||
194 | |||
195 | mutex_unlock(&inode->i_obj_mutex); | ||
196 | |||
197 | err = fdso_open(entry, config); | ||
198 | if (err < 0) { | ||
199 | /* The class rejected the open call. | ||
200 | * We need to clean up and tell user space. | ||
201 | */ | ||
202 | put_od_entry(entry); | ||
203 | idx = err; | ||
204 | } | ||
205 | |||
206 | return idx; | ||
207 | } | ||
208 | |||
209 | |||
210 | struct od_table_entry* __od_lookup(int od) | ||
211 | { | ||
212 | struct task_struct *t = current; | ||
213 | |||
214 | if (!t->od_table) | ||
215 | return NULL; | ||
216 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
217 | return NULL; | ||
218 | if (!t->od_table[od].used) | ||
219 | return NULL; | ||
220 | return t->od_table + od; | ||
221 | } | ||
222 | |||
223 | |||
224 | asmlinkage int sys_od_open(int fd, int type, int obj_id, void* __user config) | ||
225 | { | ||
226 | int ret = 0; | ||
227 | struct file* file; | ||
228 | |||
229 | /* | ||
230 | 1) get file from fd, get inode from file | ||
231 | 2) lock inode | ||
232 | 3) try to lookup object | ||
233 | 4) if not present create and enqueue object, inc inode refcnt | ||
234 | 5) increment refcnt of object | ||
235 | 6) alloc od_table_entry, setup ptrs | ||
236 | 7) unlock inode | ||
237 | 8) return offset in od_table as OD | ||
238 | */ | ||
239 | |||
240 | if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { | ||
241 | ret = -EINVAL; | ||
242 | goto out; | ||
243 | } | ||
244 | |||
245 | file = fget(fd); | ||
246 | if (!file) { | ||
247 | ret = -EBADF; | ||
248 | goto out; | ||
249 | } | ||
250 | |||
251 | ret = do_sys_od_open(file, type, obj_id, config); | ||
252 | |||
253 | fput(file); | ||
254 | |||
255 | out: | ||
256 | return ret; | ||
257 | } | ||
258 | |||
259 | |||
260 | asmlinkage int sys_od_close(int od) | ||
261 | { | ||
262 | int ret = -EINVAL; | ||
263 | struct task_struct *t = current; | ||
264 | |||
265 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
266 | return ret; | ||
267 | |||
268 | if (!t->od_table || !t->od_table[od].used) | ||
269 | return ret; | ||
270 | |||
271 | |||
272 | /* give the class a chance to reject the close | ||
273 | */ | ||
274 | ret = fdso_close(t->od_table + od); | ||
275 | if (ret == 0) | ||
276 | ret = put_od_entry(t->od_table + od); | ||
277 | |||
278 | return ret; | ||
279 | } | ||
diff --git a/litmus/ft_event.c b/litmus/ft_event.c new file mode 100644 index 0000000000..b1d80c52d7 --- /dev/null +++ b/litmus/ft_event.c | |||
@@ -0,0 +1,104 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <litmus/feather_trace.h> | ||
4 | |||
5 | /* the feather trace management functions assume | ||
6 | * exclusive access to the event table | ||
7 | */ | ||
8 | |||
9 | |||
10 | #define BYTE_JUMP 0xeb | ||
11 | #define BYTE_JUMP_LEN 0x02 | ||
12 | |||
13 | /* for each event, there is an entry in the event table */ | ||
14 | struct trace_event { | ||
15 | long id; | ||
16 | long count; | ||
17 | long start_addr; | ||
18 | long end_addr; | ||
19 | }; | ||
20 | |||
21 | extern struct trace_event __start___event_table[]; | ||
22 | extern struct trace_event __stop___event_table[]; | ||
23 | |||
24 | int ft_enable_event(unsigned long id) | ||
25 | { | ||
26 | struct trace_event* te = __start___event_table; | ||
27 | int count = 0; | ||
28 | char* delta; | ||
29 | unsigned char* instr; | ||
30 | |||
31 | while (te < __stop___event_table) { | ||
32 | if (te->id == id && ++te->count == 1) { | ||
33 | instr = (unsigned char*) te->start_addr; | ||
34 | /* make sure we don't clobber something wrong */ | ||
35 | if (*instr == BYTE_JUMP) { | ||
36 | delta = (((unsigned char*) te->start_addr) + 1); | ||
37 | *delta = 0; | ||
38 | } | ||
39 | } | ||
40 | if (te->id == id) | ||
41 | count++; | ||
42 | te++; | ||
43 | } | ||
44 | return count; | ||
45 | } | ||
46 | |||
47 | int ft_disable_event(unsigned long id) | ||
48 | { | ||
49 | struct trace_event* te = __start___event_table; | ||
50 | int count = 0; | ||
51 | char* delta; | ||
52 | unsigned char* instr; | ||
53 | |||
54 | while (te < __stop___event_table) { | ||
55 | if (te->id == id && --te->count == 0) { | ||
56 | instr = (unsigned char*) te->start_addr; | ||
57 | if (*instr == BYTE_JUMP) { | ||
58 | delta = (((unsigned char*) te->start_addr) + 1); | ||
59 | *delta = te->end_addr - te->start_addr - | ||
60 | BYTE_JUMP_LEN; | ||
61 | } | ||
62 | } | ||
63 | if (te->id == id) | ||
64 | count++; | ||
65 | te++; | ||
66 | } | ||
67 | return count; | ||
68 | } | ||
69 | |||
70 | int ft_disable_all_events(void) | ||
71 | { | ||
72 | struct trace_event* te = __start___event_table; | ||
73 | int count = 0; | ||
74 | char* delta; | ||
75 | unsigned char* instr; | ||
76 | |||
77 | while (te < __stop___event_table) { | ||
78 | if (te->count) { | ||
79 | instr = (unsigned char*) te->start_addr; | ||
80 | if (*instr == BYTE_JUMP) { | ||
81 | delta = (((unsigned char*) te->start_addr) | ||
82 | + 1); | ||
83 | *delta = te->end_addr - te->start_addr - | ||
84 | BYTE_JUMP_LEN; | ||
85 | te->count = 0; | ||
86 | count++; | ||
87 | } | ||
88 | } | ||
89 | te++; | ||
90 | } | ||
91 | return count; | ||
92 | } | ||
93 | |||
94 | int ft_is_event_enabled(unsigned long id) | ||
95 | { | ||
96 | struct trace_event* te = __start___event_table; | ||
97 | |||
98 | while (te < __stop___event_table) { | ||
99 | if (te->id == id) | ||
100 | return te->count; | ||
101 | te++; | ||
102 | } | ||
103 | return 0; | ||
104 | } | ||
diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 0000000000..8ab96452e6 --- /dev/null +++ b/litmus/litmus.c | |||
@@ -0,0 +1,799 @@ | |||
1 | /* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization code, | ||
2 | * and the procfs interface.. | ||
3 | */ | ||
4 | #include <asm/uaccess.h> | ||
5 | #include <linux/uaccess.h> | ||
6 | #include <linux/sysrq.h> | ||
7 | |||
8 | #include <linux/module.h> | ||
9 | #include <linux/proc_fs.h> | ||
10 | |||
11 | |||
12 | #include <litmus/litmus.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <litmus/sched_plugin.h> | ||
15 | |||
16 | #include <litmus/trace.h> | ||
17 | |||
18 | /* Number of RT tasks that exist in the system */ | ||
19 | atomic_t rt_task_count = ATOMIC_INIT(0); | ||
20 | static DEFINE_SPINLOCK(task_transition_lock); | ||
21 | |||
22 | /* To send signals from the scheduler | ||
23 | * Must drop locks first. | ||
24 | */ | ||
25 | static LIST_HEAD(sched_sig_list); | ||
26 | static DEFINE_SPINLOCK(sched_sig_list_lock); | ||
27 | |||
28 | /* | ||
29 | * sys_set_task_rt_param | ||
30 | * @pid: Pid of the task which scheduling parameters must be changed | ||
31 | * @param: New real-time extension parameters such as the execution cost and | ||
32 | * period | ||
33 | * Syscall for manipulating with task rt extension params | ||
34 | * Returns EFAULT if param is NULL. | ||
35 | * ESRCH if pid is not corrsponding | ||
36 | * to a valid task. | ||
37 | * EINVAL if either period or execution cost is <=0 | ||
38 | * EPERM if pid is a real-time task | ||
39 | * 0 if success | ||
40 | * | ||
41 | * Only non-real-time tasks may be configured with this system call | ||
42 | * to avoid races with the scheduler. In practice, this means that a | ||
43 | * task's parameters must be set _before_ calling sys_prepare_rt_task() | ||
44 | */ | ||
45 | asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
46 | { | ||
47 | struct rt_task tp; | ||
48 | struct task_struct *target; | ||
49 | int retval = -EINVAL; | ||
50 | |||
51 | printk("Setting up rt task parameters for process %d.\n", pid); | ||
52 | |||
53 | if (pid < 0 || param == 0) { | ||
54 | goto out; | ||
55 | } | ||
56 | if (copy_from_user(&tp, param, sizeof(tp))) { | ||
57 | retval = -EFAULT; | ||
58 | goto out; | ||
59 | } | ||
60 | |||
61 | /* Task search and manipulation must be protected */ | ||
62 | read_lock_irq(&tasklist_lock); | ||
63 | if (!(target = find_task_by_pid(pid))) { | ||
64 | retval = -ESRCH; | ||
65 | goto out_unlock; | ||
66 | } | ||
67 | |||
68 | if (is_realtime(target)) { | ||
69 | /* The task is already a real-time task. | ||
70 | * We cannot not allow parameter changes at this point. | ||
71 | */ | ||
72 | retval = -EBUSY; | ||
73 | goto out_unlock; | ||
74 | } | ||
75 | |||
76 | if (tp.exec_cost <= 0) | ||
77 | goto out_unlock; | ||
78 | if (tp.period <= 0) | ||
79 | goto out_unlock; | ||
80 | if (!cpu_online(tp.cpu)) | ||
81 | goto out_unlock; | ||
82 | if (tp.period < tp.exec_cost) | ||
83 | { | ||
84 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
85 | "because wcet > period\n", pid); | ||
86 | goto out_unlock; | ||
87 | } | ||
88 | |||
89 | target->rt_param.task_params = tp; | ||
90 | |||
91 | retval = 0; | ||
92 | out_unlock: | ||
93 | read_unlock_irq(&tasklist_lock); | ||
94 | out: | ||
95 | return retval; | ||
96 | } | ||
97 | |||
98 | /* Getter of task's RT params | ||
99 | * returns EINVAL if param or pid is NULL | ||
100 | * returns ESRCH if pid does not correspond to a valid task | ||
101 | * returns EFAULT if copying of parameters has failed. | ||
102 | */ | ||
103 | asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
104 | { | ||
105 | int retval = -EINVAL; | ||
106 | struct task_struct *source; | ||
107 | struct rt_task lp; | ||
108 | if (param == 0 || pid < 0) | ||
109 | goto out; | ||
110 | read_lock(&tasklist_lock); | ||
111 | if (!(source = find_task_by_pid(pid))) { | ||
112 | retval = -ESRCH; | ||
113 | goto out_unlock; | ||
114 | } | ||
115 | lp = source->rt_param.task_params; | ||
116 | read_unlock(&tasklist_lock); | ||
117 | /* Do copying outside the lock */ | ||
118 | retval = | ||
119 | copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; | ||
120 | return retval; | ||
121 | out_unlock: | ||
122 | read_unlock(&tasklist_lock); | ||
123 | out: | ||
124 | return retval; | ||
125 | |||
126 | } | ||
127 | |||
128 | /* implemented in kernel/litmus_sem.c */ | ||
129 | void srp_ceiling_block(void); | ||
130 | |||
131 | /* | ||
132 | * This is the crucial function for periodic task implementation, | ||
133 | * It checks if a task is periodic, checks if such kind of sleep | ||
134 | * is permitted and calls plugin-specific sleep, which puts the | ||
135 | * task into a wait array. | ||
136 | * returns 0 on successful wakeup | ||
137 | * returns EPERM if current conditions do not permit such sleep | ||
138 | * returns EINVAL if current task is not able to go to sleep | ||
139 | */ | ||
140 | asmlinkage long sys_complete_job(void) | ||
141 | { | ||
142 | int retval = -EPERM; | ||
143 | if (!is_realtime(current)) { | ||
144 | retval = -EINVAL; | ||
145 | goto out; | ||
146 | } | ||
147 | /* Task with negative or zero period cannot sleep */ | ||
148 | if (get_rt_period(current) <= 0) { | ||
149 | retval = -EINVAL; | ||
150 | goto out; | ||
151 | } | ||
152 | /* The plugin has to put the task into an | ||
153 | * appropriate queue and call schedule | ||
154 | */ | ||
155 | retval = litmus->complete_job(); | ||
156 | if (!retval && is_subject_to_srp(current)) | ||
157 | srp_ceiling_block(); | ||
158 | out: | ||
159 | return retval; | ||
160 | } | ||
161 | |||
162 | /* This is an "improved" version of sys_complete_job that | ||
163 | * addresses the problem of unintentionally missing a job after | ||
164 | * an overrun. | ||
165 | * | ||
166 | * returns 0 on successful wakeup | ||
167 | * returns EPERM if current conditions do not permit such sleep | ||
168 | * returns EINVAL if current task is not able to go to sleep | ||
169 | */ | ||
170 | asmlinkage long sys_wait_for_job_release(unsigned int job) | ||
171 | { | ||
172 | int retval = -EPERM; | ||
173 | if (!is_realtime(current)) { | ||
174 | retval = -EINVAL; | ||
175 | goto out; | ||
176 | } | ||
177 | |||
178 | /* Task with negative or zero period cannot sleep */ | ||
179 | if (get_rt_period(current) <= 0) { | ||
180 | retval = -EINVAL; | ||
181 | goto out; | ||
182 | } | ||
183 | |||
184 | retval = 0; | ||
185 | |||
186 | /* first wait until we have "reached" the desired job | ||
187 | * | ||
188 | * This implementation has at least two problems: | ||
189 | * | ||
190 | * 1) It doesn't gracefully handle the wrap around of | ||
191 | * job_no. Since LITMUS is a prototype, this is not much | ||
192 | * of a problem right now. | ||
193 | * | ||
194 | * 2) It is theoretically racy if a job release occurs | ||
195 | * between checking job_no and calling sleep_next_period(). | ||
196 | * A proper solution would requiring adding another callback | ||
197 | * in the plugin structure and testing the condition with | ||
198 | * interrupts disabled. | ||
199 | * | ||
200 | * FIXME: At least problem 2 should be taken care of eventually. | ||
201 | */ | ||
202 | while (!retval && job > current->rt_param.job_params.job_no) | ||
203 | /* If the last job overran then job <= job_no and we | ||
204 | * don't send the task to sleep. | ||
205 | */ | ||
206 | retval = litmus->complete_job(); | ||
207 | |||
208 | /* We still have to honor the SRP after the actual release. | ||
209 | */ | ||
210 | if (!retval && is_subject_to_srp(current)) | ||
211 | srp_ceiling_block(); | ||
212 | out: | ||
213 | return retval; | ||
214 | } | ||
215 | |||
216 | /* This is a helper syscall to query the current job sequence number. | ||
217 | * | ||
218 | * returns 0 on successful query | ||
219 | * returns EPERM if task is not a real-time task. | ||
220 | * returns EFAULT if &job is not a valid pointer. | ||
221 | */ | ||
222 | asmlinkage long sys_query_job_no(unsigned int __user *job) | ||
223 | { | ||
224 | int retval = -EPERM; | ||
225 | if (is_realtime(current)) | ||
226 | retval = put_user(current->rt_param.job_params.job_no, job); | ||
227 | |||
228 | return retval; | ||
229 | } | ||
230 | |||
231 | struct sched_sig { | ||
232 | struct list_head list; | ||
233 | struct task_struct* task; | ||
234 | unsigned int signal:31; | ||
235 | int force:1; | ||
236 | }; | ||
237 | |||
238 | static void __scheduler_signal(struct task_struct *t, unsigned int signo, | ||
239 | int force) | ||
240 | { | ||
241 | struct sched_sig* sig; | ||
242 | |||
243 | sig = kmalloc(GFP_ATOMIC, sizeof(struct sched_sig)); | ||
244 | if (!sig) { | ||
245 | TRACE_TASK(t, "dropping signal: %u\n", t); | ||
246 | return; | ||
247 | } | ||
248 | |||
249 | spin_lock(&sched_sig_list_lock); | ||
250 | |||
251 | sig->signal = signo; | ||
252 | sig->force = force; | ||
253 | sig->task = t; | ||
254 | get_task_struct(t); | ||
255 | list_add(&sig->list, &sched_sig_list); | ||
256 | |||
257 | spin_unlock(&sched_sig_list_lock); | ||
258 | } | ||
259 | |||
260 | void scheduler_signal(struct task_struct *t, unsigned int signo) | ||
261 | { | ||
262 | __scheduler_signal(t, signo, 0); | ||
263 | } | ||
264 | |||
265 | void force_scheduler_signal(struct task_struct *t, unsigned int signo) | ||
266 | { | ||
267 | __scheduler_signal(t, signo, 1); | ||
268 | } | ||
269 | |||
270 | /* FIXME: get rid of the locking and do this on a per-processor basis */ | ||
271 | void send_scheduler_signals(void) | ||
272 | { | ||
273 | unsigned long flags; | ||
274 | struct list_head *p, *extra; | ||
275 | struct siginfo info; | ||
276 | struct sched_sig* sig; | ||
277 | struct task_struct* t; | ||
278 | struct list_head claimed; | ||
279 | |||
280 | if (spin_trylock_irqsave(&sched_sig_list_lock, flags)) { | ||
281 | if (list_empty(&sched_sig_list)) | ||
282 | p = NULL; | ||
283 | else { | ||
284 | p = sched_sig_list.next; | ||
285 | list_del(&sched_sig_list); | ||
286 | INIT_LIST_HEAD(&sched_sig_list); | ||
287 | } | ||
288 | spin_unlock_irqrestore(&sched_sig_list_lock, flags); | ||
289 | |||
290 | /* abort if there are no signals */ | ||
291 | if (!p) | ||
292 | return; | ||
293 | |||
294 | /* take signal list we just obtained */ | ||
295 | list_add(&claimed, p); | ||
296 | |||
297 | list_for_each_safe(p, extra, &claimed) { | ||
298 | list_del(p); | ||
299 | sig = list_entry(p, struct sched_sig, list); | ||
300 | t = sig->task; | ||
301 | info.si_signo = sig->signal; | ||
302 | info.si_errno = 0; | ||
303 | info.si_code = SI_KERNEL; | ||
304 | info.si_pid = 1; | ||
305 | info.si_uid = 0; | ||
306 | TRACE("sending signal %d to %d\n", info.si_signo, | ||
307 | t->pid); | ||
308 | if (sig->force) | ||
309 | force_sig_info(sig->signal, &info, t); | ||
310 | else | ||
311 | send_sig_info(sig->signal, &info, t); | ||
312 | put_task_struct(t); | ||
313 | kfree(sig); | ||
314 | } | ||
315 | } | ||
316 | |||
317 | } | ||
318 | |||
319 | static inline void np_mem_error(struct task_struct* t, const char* reason) | ||
320 | { | ||
321 | if (t->state != TASK_DEAD && !(t->flags & PF_EXITING)) { | ||
322 | TRACE("np section: %s => %s/%d killed\n", | ||
323 | reason, t->comm, t->pid); | ||
324 | force_scheduler_signal(t, SIGKILL); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | /* sys_register_np_flag() allows real-time tasks to register an | ||
329 | * np section indicator. | ||
330 | * returns 0 if the flag was successfully registered | ||
331 | * returns EINVAL if current task is not a real-time task | ||
332 | * returns EFAULT if *flag couldn't be written | ||
333 | */ | ||
334 | asmlinkage long sys_register_np_flag(short __user *flag) | ||
335 | { | ||
336 | int retval = -EINVAL; | ||
337 | short test_val = RT_PREEMPTIVE; | ||
338 | |||
339 | /* avoid races with the scheduler */ | ||
340 | preempt_disable(); | ||
341 | TRACE("reg_np_flag(%p) for %s/%d\n", flag, | ||
342 | current->comm, current->pid); | ||
343 | |||
344 | /* Let's first try to write to the address. | ||
345 | * That way it is initialized and any bugs | ||
346 | * involving dangling pointers will caught | ||
347 | * early. | ||
348 | * NULL indicates disabling np section support | ||
349 | * and should not be tested. | ||
350 | */ | ||
351 | if (flag) | ||
352 | retval = poke_kernel_address(test_val, flag); | ||
353 | else | ||
354 | retval = 0; | ||
355 | TRACE("reg_np_flag: retval=%d\n", retval); | ||
356 | if (unlikely(0 != retval)) | ||
357 | np_mem_error(current, "np flag: not writable"); | ||
358 | else | ||
359 | /* the pointer is ok */ | ||
360 | current->rt_param.np_flag = flag; | ||
361 | |||
362 | preempt_enable(); | ||
363 | return retval; | ||
364 | } | ||
365 | |||
366 | |||
367 | void request_exit_np(struct task_struct *t) | ||
368 | { | ||
369 | int ret; | ||
370 | short flag; | ||
371 | |||
372 | /* We can only do this if t is actually currently scheduled on this CPU | ||
373 | * because otherwise we are in the wrong address space. Thus make sure | ||
374 | * to check. | ||
375 | */ | ||
376 | BUG_ON(t != current); | ||
377 | |||
378 | if (unlikely(!is_realtime(t) || !t->rt_param.np_flag)) { | ||
379 | TRACE_TASK(t, "request_exit_np(): BAD TASK!\n"); | ||
380 | return; | ||
381 | } | ||
382 | |||
383 | flag = RT_EXIT_NP_REQUESTED; | ||
384 | ret = poke_kernel_address(flag, t->rt_param.np_flag + 1); | ||
385 | TRACE("request_exit_np(%s/%d)\n", t->comm, t->pid); | ||
386 | if (unlikely(0 != ret)) | ||
387 | np_mem_error(current, "request_exit_np(): flag not writable"); | ||
388 | |||
389 | } | ||
390 | |||
391 | |||
392 | int is_np(struct task_struct* t) | ||
393 | { | ||
394 | int ret; | ||
395 | unsigned short flag = 0x5858; /* = XX, looks nicer in debug*/ | ||
396 | |||
397 | BUG_ON(t != current); | ||
398 | |||
399 | if (unlikely(t->rt_param.kernel_np)) | ||
400 | return 1; | ||
401 | else if (unlikely(t->rt_param.np_flag == NULL) || | ||
402 | t->flags & PF_EXITING || | ||
403 | t->state == TASK_DEAD) | ||
404 | return 0; | ||
405 | else { | ||
406 | /* This is the tricky part. The process has registered a | ||
407 | * non-preemptive section marker. We now need to check whether | ||
408 | * it is set to to NON_PREEMPTIVE. Along the way we could | ||
409 | * discover that the pointer points to an unmapped region (=> | ||
410 | * kill the task) or that the location contains some garbage | ||
411 | * value (=> also kill the task). Killing the task in any case | ||
412 | * forces userspace to play nicely. Any bugs will be discovered | ||
413 | * immediately. | ||
414 | */ | ||
415 | ret = probe_kernel_address(t->rt_param.np_flag, flag); | ||
416 | if (0 == ret && (flag == RT_NON_PREEMPTIVE || | ||
417 | flag == RT_PREEMPTIVE)) | ||
418 | return flag != RT_PREEMPTIVE; | ||
419 | else { | ||
420 | /* either we could not read from the address or | ||
421 | * it contained garbage => kill the process | ||
422 | * FIXME: Should we cause a SEGFAULT instead? | ||
423 | */ | ||
424 | TRACE("is_np: ret=%d flag=%c%c (%x)\n", ret, | ||
425 | flag & 0xff, (flag >> 8) & 0xff, flag); | ||
426 | np_mem_error(t, "is_np() could not read"); | ||
427 | return 0; | ||
428 | } | ||
429 | } | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * sys_exit_np() allows real-time tasks to signal that it left a | ||
434 | * non-preemptable section. It will be called after the kernel requested a | ||
435 | * callback in the preemption indicator flag. | ||
436 | * returns 0 if the signal was valid and processed. | ||
437 | * returns EINVAL if current task is not a real-time task | ||
438 | */ | ||
439 | asmlinkage long sys_exit_np(void) | ||
440 | { | ||
441 | int retval = -EINVAL; | ||
442 | |||
443 | TS_EXIT_NP_START; | ||
444 | |||
445 | if (!is_realtime(current)) | ||
446 | goto out; | ||
447 | |||
448 | TRACE("sys_exit_np(%s/%d)\n", current->comm, current->pid); | ||
449 | /* force rescheduling so that we can be preempted */ | ||
450 | set_tsk_need_resched(current); | ||
451 | retval = 0; | ||
452 | out: | ||
453 | |||
454 | TS_EXIT_NP_END; | ||
455 | return retval; | ||
456 | } | ||
457 | |||
458 | /* p is a real-time task. Re-init its state as a best-effort task. */ | ||
459 | static void reinit_litmus_state(struct task_struct* p, int restore) | ||
460 | { | ||
461 | struct rt_task user_config = {}; | ||
462 | __user short *np_flag = NULL; | ||
463 | |||
464 | if (restore) { | ||
465 | /* Safe user-space provided configuration data. | ||
466 | * FIXME: This is missing service levels for adaptive tasks. | ||
467 | */ | ||
468 | user_config = p->rt_param.task_params; | ||
469 | np_flag = p->rt_param.np_flag; | ||
470 | } | ||
471 | |||
472 | /* We probably should not be inheriting any task's priority | ||
473 | * at this point in time. | ||
474 | */ | ||
475 | WARN_ON(p->rt_param.inh_task); | ||
476 | |||
477 | /* We need to restore the priority of the task. */ | ||
478 | // __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio); | ||
479 | |||
480 | /* Cleanup everything else. */ | ||
481 | memset(&p->rt_param, 0, sizeof(struct rt_task)); | ||
482 | |||
483 | /* Restore preserved fields. */ | ||
484 | if (restore) { | ||
485 | p->rt_param.task_params = user_config; | ||
486 | p->rt_param.np_flag = np_flag; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | long litmus_admit_task(struct task_struct* tsk) | ||
491 | { | ||
492 | long retval; | ||
493 | long flags; | ||
494 | |||
495 | BUG_ON(is_realtime(tsk)); | ||
496 | |||
497 | if (get_rt_period(tsk) == 0 || | ||
498 | get_exec_cost(tsk) > get_rt_period(tsk)) { | ||
499 | TRACE_TASK(tsk, "litmus admit: invalid task parameters " | ||
500 | "(%lu, %lu)\n", | ||
501 | get_exec_cost(tsk), get_rt_period(tsk)); | ||
502 | return -EINVAL; | ||
503 | } | ||
504 | |||
505 | if (!cpu_online(get_partition(tsk))) | ||
506 | { | ||
507 | TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", | ||
508 | get_partition(tsk)); | ||
509 | return -EINVAL; | ||
510 | } | ||
511 | |||
512 | INIT_LIST_HEAD(&tsk->rt_list); | ||
513 | |||
514 | /* avoid scheduler plugin changing underneath us */ | ||
515 | spin_lock_irqsave(&task_transition_lock, flags); | ||
516 | retval = litmus->admit_task(tsk); | ||
517 | |||
518 | if (!retval) | ||
519 | atomic_inc(&rt_task_count); | ||
520 | spin_unlock_irqrestore(&task_transition_lock, flags); | ||
521 | |||
522 | return retval; | ||
523 | |||
524 | } | ||
525 | |||
526 | void litmus_exit_task(struct task_struct* tsk) | ||
527 | { | ||
528 | if (is_realtime(tsk)) { | ||
529 | litmus->task_exit(tsk); | ||
530 | atomic_dec(&rt_task_count); | ||
531 | reinit_litmus_state(tsk, 1); | ||
532 | } | ||
533 | } | ||
534 | |||
535 | /* Switching a plugin in use is tricky. | ||
536 | * We must watch out that no real-time tasks exists | ||
537 | * (and that none is created in parallel) and that the plugin is not | ||
538 | * currently in use on any processor (in theory). | ||
539 | * | ||
540 | * For now, we don't enforce the second part since it is unlikely to cause | ||
541 | * any trouble by itself as long as we don't unload modules. | ||
542 | */ | ||
543 | int switch_sched_plugin(struct sched_plugin* plugin) | ||
544 | { | ||
545 | long flags; | ||
546 | int ret = 0; | ||
547 | |||
548 | BUG_ON(!plugin); | ||
549 | |||
550 | /* stop task transitions */ | ||
551 | spin_lock_irqsave(&task_transition_lock, flags); | ||
552 | |||
553 | /* don't switch if there are active real-time tasks */ | ||
554 | if (atomic_read(&rt_task_count) == 0) { | ||
555 | printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); | ||
556 | litmus = plugin; | ||
557 | } else | ||
558 | ret = -EBUSY; | ||
559 | |||
560 | spin_unlock_irqrestore(&task_transition_lock, flags); | ||
561 | return ret; | ||
562 | } | ||
563 | |||
564 | /* Called upon fork. | ||
565 | * p is the newly forked task. | ||
566 | */ | ||
567 | void litmus_fork(struct task_struct* p) | ||
568 | { | ||
569 | if (is_realtime(p)) | ||
570 | /* clean out any litmus related state, don't preserve anything*/ | ||
571 | reinit_litmus_state(p, 0); | ||
572 | } | ||
573 | |||
574 | /* Called upon execve(). | ||
575 | * current is doing the exec. | ||
576 | * Don't let address space specific stuff leak. | ||
577 | */ | ||
578 | void litmus_exec(void) | ||
579 | { | ||
580 | struct task_struct* p = current; | ||
581 | |||
582 | if (is_realtime(p)) { | ||
583 | WARN_ON(p->rt_param.inh_task); | ||
584 | p->rt_param.np_flag = NULL; | ||
585 | } | ||
586 | } | ||
587 | |||
588 | void exit_litmus(struct task_struct *dead_tsk) | ||
589 | { | ||
590 | if (is_realtime(dead_tsk)) | ||
591 | litmus_exit_task(dead_tsk); | ||
592 | } | ||
593 | |||
594 | |||
595 | void list_qsort(struct list_head* list, list_cmp_t less_than) | ||
596 | { | ||
597 | struct list_head lt; | ||
598 | struct list_head geq; | ||
599 | struct list_head *pos, *extra, *pivot; | ||
600 | int n_lt = 0, n_geq = 0; | ||
601 | BUG_ON(!list); | ||
602 | |||
603 | if (list->next == list) | ||
604 | return; | ||
605 | |||
606 | INIT_LIST_HEAD(<); | ||
607 | INIT_LIST_HEAD(&geq); | ||
608 | |||
609 | pivot = list->next; | ||
610 | list_del(pivot); | ||
611 | list_for_each_safe(pos, extra, list) { | ||
612 | list_del(pos); | ||
613 | if (less_than(pos, pivot)) { | ||
614 | list_add(pos, <); | ||
615 | n_lt++; | ||
616 | } else { | ||
617 | list_add(pos, &geq); | ||
618 | n_geq++; | ||
619 | } | ||
620 | } | ||
621 | if (n_lt < n_geq) { | ||
622 | list_qsort(<, less_than); | ||
623 | list_qsort(&geq, less_than); | ||
624 | } else { | ||
625 | list_qsort(&geq, less_than); | ||
626 | list_qsort(<, less_than); | ||
627 | } | ||
628 | list_splice(&geq, list); | ||
629 | list_add(pivot, list); | ||
630 | list_splice(<, list); | ||
631 | } | ||
632 | |||
633 | #ifdef CONFIG_MAGIC_SYSRQ | ||
634 | int sys_kill(int pid, int sig); | ||
635 | |||
636 | static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty) | ||
637 | { | ||
638 | struct task_struct *t; | ||
639 | read_lock(&tasklist_lock); | ||
640 | for_each_process(t) { | ||
641 | if (is_realtime(t)) { | ||
642 | sys_kill(t->pid, SIGKILL); | ||
643 | } | ||
644 | } | ||
645 | read_unlock(&tasklist_lock); | ||
646 | } | ||
647 | |||
648 | static struct sysrq_key_op sysrq_kill_rt_tasks_op = { | ||
649 | .handler = sysrq_handle_kill_rt_tasks, | ||
650 | .help_msg = "Quit-rt-tasks", | ||
651 | .action_msg = "sent SIGKILL to all real-time tasks", | ||
652 | }; | ||
653 | #endif | ||
654 | |||
655 | static int proc_read_stats(char *page, char **start, | ||
656 | off_t off, int count, | ||
657 | int *eof, void *data) | ||
658 | { | ||
659 | int len; | ||
660 | |||
661 | len = snprintf(page, PAGE_SIZE, | ||
662 | "real-time task count = %d\n", | ||
663 | atomic_read(&rt_task_count)); | ||
664 | return len; | ||
665 | } | ||
666 | |||
667 | static int proc_read_plugins(char *page, char **start, | ||
668 | off_t off, int count, | ||
669 | int *eof, void *data) | ||
670 | { | ||
671 | int len; | ||
672 | |||
673 | len = print_sched_plugins(page, PAGE_SIZE); | ||
674 | return len; | ||
675 | } | ||
676 | |||
677 | static int proc_read_curr(char *page, char **start, | ||
678 | off_t off, int count, | ||
679 | int *eof, void *data) | ||
680 | { | ||
681 | int len; | ||
682 | |||
683 | len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name); | ||
684 | return len; | ||
685 | } | ||
686 | |||
687 | static int proc_write_curr(struct file *file, | ||
688 | const char *buffer, | ||
689 | unsigned long count, | ||
690 | void *data) | ||
691 | { | ||
692 | int len, ret; | ||
693 | char name[65]; | ||
694 | struct sched_plugin* found; | ||
695 | |||
696 | if(count > 64) | ||
697 | len = 64; | ||
698 | else | ||
699 | len = count; | ||
700 | |||
701 | if(copy_from_user(name, buffer, len)) | ||
702 | return -EFAULT; | ||
703 | |||
704 | name[len] = '\0'; | ||
705 | /* chomp name */ | ||
706 | if (len > 1 && name[len - 1] == '\n') | ||
707 | name[len - 1] = '\0'; | ||
708 | |||
709 | found = find_sched_plugin(name); | ||
710 | |||
711 | if (found) { | ||
712 | ret = switch_sched_plugin(found); | ||
713 | if (ret != 0) | ||
714 | printk(KERN_INFO "Could not switch plugin: %d\n", ret); | ||
715 | } else | ||
716 | printk(KERN_INFO "Plugin '%s' is unknown.\n", name); | ||
717 | |||
718 | return len; | ||
719 | } | ||
720 | |||
721 | |||
722 | static struct proc_dir_entry *litmus_dir = NULL, | ||
723 | *curr_file = NULL, | ||
724 | *stat_file = NULL, | ||
725 | *plugs_file = NULL; | ||
726 | |||
727 | static int __init init_litmus_proc(void) | ||
728 | { | ||
729 | litmus_dir = proc_mkdir("litmus", NULL); | ||
730 | if (!litmus_dir) { | ||
731 | printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); | ||
732 | return -ENOMEM; | ||
733 | } | ||
734 | litmus_dir->owner = THIS_MODULE; | ||
735 | |||
736 | curr_file = create_proc_entry("active_plugin", | ||
737 | 0644, litmus_dir); | ||
738 | if (!curr_file) { | ||
739 | printk(KERN_ERR "Could not allocate active_plugin " | ||
740 | "procfs entry.\n"); | ||
741 | return -ENOMEM; | ||
742 | } | ||
743 | curr_file->owner = THIS_MODULE; | ||
744 | curr_file->read_proc = proc_read_curr; | ||
745 | curr_file->write_proc = proc_write_curr; | ||
746 | |||
747 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, | ||
748 | proc_read_stats, NULL); | ||
749 | |||
750 | plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir, | ||
751 | proc_read_plugins, NULL); | ||
752 | |||
753 | return 0; | ||
754 | } | ||
755 | |||
756 | static void exit_litmus_proc(void) | ||
757 | { | ||
758 | if (plugs_file) | ||
759 | remove_proc_entry("plugins", litmus_dir); | ||
760 | if (stat_file) | ||
761 | remove_proc_entry("stats", litmus_dir); | ||
762 | if (curr_file) | ||
763 | remove_proc_entry("active_plugin", litmus_dir); | ||
764 | if (litmus_dir) | ||
765 | remove_proc_entry("litmus", NULL); | ||
766 | } | ||
767 | |||
768 | extern struct sched_plugin linux_sched_plugin; | ||
769 | |||
770 | static int __init _init_litmus(void) | ||
771 | { | ||
772 | /* Common initializers, | ||
773 | * mode change lock is used to enforce single mode change | ||
774 | * operation. | ||
775 | */ | ||
776 | printk("Starting LITMUS^RT kernel\n"); | ||
777 | |||
778 | register_sched_plugin(&linux_sched_plugin); | ||
779 | |||
780 | #ifdef CONFIG_MAGIC_SYSRQ | ||
781 | /* offer some debugging help */ | ||
782 | if (!register_sysrq_key('q', &sysrq_kill_rt_tasks_op)) | ||
783 | printk("Registered kill rt tasks magic sysrq.\n"); | ||
784 | else | ||
785 | printk("Could not register kill rt tasks magic sysrq.\n"); | ||
786 | #endif | ||
787 | |||
788 | init_litmus_proc(); | ||
789 | |||
790 | return 0; | ||
791 | } | ||
792 | |||
793 | static void _exit_litmus(void) | ||
794 | { | ||
795 | exit_litmus_proc(); | ||
796 | } | ||
797 | |||
798 | module_init(_init_litmus); | ||
799 | module_exit(_exit_litmus); | ||
diff --git a/litmus/litmus_sem.c b/litmus/litmus_sem.c new file mode 100644 index 0000000000..f52941c5ca --- /dev/null +++ b/litmus/litmus_sem.c | |||
@@ -0,0 +1,566 @@ | |||
1 | /* | ||
2 | * PI semaphores and SRP implementations. | ||
3 | * Much of the code here is borrowed from include/asm-i386/semaphore.h. | ||
4 | * | ||
5 | * NOTE: This implementation is very much a prototype and horribly insecure. It | ||
6 | * is intended to be a proof of concept, not a feature-complete solution. | ||
7 | */ | ||
8 | |||
9 | #include <asm/atomic.h> | ||
10 | #include <asm/semaphore.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/wait.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <litmus/litmus.h> | ||
15 | #include <litmus/sched_plugin.h> | ||
16 | #include <litmus/edf_common.h> | ||
17 | |||
18 | #include <litmus/fdso.h> | ||
19 | |||
20 | #include <litmus/trace.h> | ||
21 | |||
22 | /* ************************************************************************** */ | ||
23 | /* PRIORITY INHERITANCE */ | ||
24 | /* ************************************************************************** */ | ||
25 | |||
26 | static void* create_pi_semaphore(void) | ||
27 | { | ||
28 | struct pi_semaphore* sem; | ||
29 | int i; | ||
30 | |||
31 | sem = kmalloc(sizeof(struct pi_semaphore), GFP_KERNEL); | ||
32 | if (!sem) | ||
33 | return NULL; | ||
34 | atomic_set(&sem->count, 1); | ||
35 | sem->sleepers = 0; | ||
36 | init_waitqueue_head(&sem->wait); | ||
37 | sem->hp.task = NULL; | ||
38 | sem->holder = NULL; | ||
39 | for (i = 0; i < NR_CPUS; i++) | ||
40 | sem->hp.cpu_task[i] = NULL; | ||
41 | return sem; | ||
42 | } | ||
43 | |||
44 | static void destroy_pi_semaphore(void* sem) | ||
45 | { | ||
46 | /* XXX assert invariants */ | ||
47 | kfree(sem); | ||
48 | } | ||
49 | |||
50 | struct fdso_ops pi_sem_ops = { | ||
51 | .create = create_pi_semaphore, | ||
52 | .destroy = destroy_pi_semaphore | ||
53 | }; | ||
54 | |||
55 | struct wq_pair { | ||
56 | struct task_struct* tsk; | ||
57 | struct pi_semaphore* sem; | ||
58 | }; | ||
59 | |||
60 | static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync, | ||
61 | void *key) | ||
62 | { | ||
63 | struct wq_pair* wqp = (struct wq_pair*) wait->private; | ||
64 | set_rt_flags(wqp->tsk, RT_F_EXIT_SEM); | ||
65 | litmus->inherit_priority(wqp->sem, wqp->tsk); | ||
66 | TRACE_TASK(wqp->tsk, | ||
67 | "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n"); | ||
68 | /* point to task for default_wake_function() */ | ||
69 | wait->private = wqp->tsk; | ||
70 | default_wake_function(wait, mode, sync, key); | ||
71 | |||
72 | /* Always return true since we know that if we encountered a task | ||
73 | * that was already running the wake_up raced with the schedule in | ||
74 | * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled | ||
75 | * immediately and own the lock. We must not wake up another task in | ||
76 | * any case. | ||
77 | */ | ||
78 | return 1; | ||
79 | } | ||
80 | |||
81 | /* caller is responsible for locking */ | ||
82 | int edf_set_hp_task(struct pi_semaphore *sem) | ||
83 | { | ||
84 | struct list_head *tmp, *next; | ||
85 | struct task_struct *queued; | ||
86 | int ret = 0; | ||
87 | |||
88 | sem->hp.task = NULL; | ||
89 | list_for_each_safe(tmp, next, &sem->wait.task_list) { | ||
90 | queued = ((struct wq_pair*) | ||
91 | list_entry(tmp, wait_queue_t, | ||
92 | task_list)->private)->tsk; | ||
93 | |||
94 | /* Compare task prios, find high prio task. */ | ||
95 | if (edf_higher_prio(queued, sem->hp.task)) { | ||
96 | sem->hp.task = queued; | ||
97 | ret = 1; | ||
98 | } | ||
99 | } | ||
100 | return ret; | ||
101 | } | ||
102 | |||
103 | /* caller is responsible for locking */ | ||
104 | int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu) | ||
105 | { | ||
106 | struct list_head *tmp, *next; | ||
107 | struct task_struct *queued; | ||
108 | int ret = 0; | ||
109 | |||
110 | sem->hp.cpu_task[cpu] = NULL; | ||
111 | list_for_each_safe(tmp, next, &sem->wait.task_list) { | ||
112 | queued = ((struct wq_pair*) | ||
113 | list_entry(tmp, wait_queue_t, | ||
114 | task_list)->private)->tsk; | ||
115 | |||
116 | /* Compare task prios, find high prio task. */ | ||
117 | if (get_partition(queued) == cpu && | ||
118 | edf_higher_prio(queued, sem->hp.cpu_task[cpu])) { | ||
119 | sem->hp.cpu_task[cpu] = queued; | ||
120 | ret = 1; | ||
121 | } | ||
122 | } | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | int do_pi_down(struct pi_semaphore* sem) | ||
127 | { | ||
128 | unsigned long flags; | ||
129 | struct task_struct *tsk = current; | ||
130 | struct wq_pair pair; | ||
131 | int suspended = 1; | ||
132 | wait_queue_t wait = { | ||
133 | .private = &pair, | ||
134 | .func = rt_pi_wake_up, | ||
135 | .task_list = {NULL, NULL} | ||
136 | }; | ||
137 | |||
138 | pair.tsk = tsk; | ||
139 | pair.sem = sem; | ||
140 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
141 | |||
142 | if (atomic_dec_return(&sem->count) < 0 || | ||
143 | waitqueue_active(&sem->wait)) { | ||
144 | /* we need to suspend */ | ||
145 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
146 | add_wait_queue_exclusive_locked(&sem->wait, &wait); | ||
147 | |||
148 | TRACE_CUR("suspends on PI lock %p\n", sem); | ||
149 | litmus->pi_block(sem, tsk); | ||
150 | |||
151 | /* release lock before sleeping */ | ||
152 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
153 | |||
154 | TS_PI_DOWN_END; | ||
155 | preempt_enable_no_resched(); | ||
156 | |||
157 | |||
158 | /* we depend on the FIFO order | ||
159 | * Thus, we don't need to recheck when we wake up, we | ||
160 | * are guaranteed to have the lock since there is only one | ||
161 | * wake up per release | ||
162 | */ | ||
163 | schedule(); | ||
164 | |||
165 | TRACE_CUR("woke up, now owns PI lock %p\n", sem); | ||
166 | |||
167 | /* try_to_wake_up() set our state to TASK_RUNNING, | ||
168 | * all we need to do is to remove our wait queue entry | ||
169 | */ | ||
170 | remove_wait_queue(&sem->wait, &wait); | ||
171 | } else { | ||
172 | /* no priority inheritance necessary, since there are no queued | ||
173 | * tasks. | ||
174 | */ | ||
175 | suspended = 0; | ||
176 | TRACE_CUR("acquired PI lock %p, no contention\n", sem); | ||
177 | sem->holder = tsk; | ||
178 | sem->hp.task = tsk; | ||
179 | litmus->inherit_priority(sem, tsk); | ||
180 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
181 | } | ||
182 | return suspended; | ||
183 | } | ||
184 | |||
185 | void do_pi_up(struct pi_semaphore* sem) | ||
186 | { | ||
187 | unsigned long flags; | ||
188 | |||
189 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
190 | |||
191 | TRACE_CUR("releases PI lock %p\n", sem); | ||
192 | litmus->return_priority(sem); | ||
193 | sem->holder = NULL; | ||
194 | if (atomic_inc_return(&sem->count) < 1) | ||
195 | /* there is a task queued */ | ||
196 | wake_up_locked(&sem->wait); | ||
197 | |||
198 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
199 | } | ||
200 | |||
201 | asmlinkage long sys_pi_down(int sem_od) | ||
202 | { | ||
203 | long ret = 0; | ||
204 | struct pi_semaphore * sem; | ||
205 | int suspended = 0; | ||
206 | |||
207 | preempt_disable(); | ||
208 | TS_PI_DOWN_START; | ||
209 | |||
210 | sem = lookup_pi_sem(sem_od); | ||
211 | if (sem) | ||
212 | suspended = do_pi_down(sem); | ||
213 | else | ||
214 | ret = -EINVAL; | ||
215 | |||
216 | if (!suspended) { | ||
217 | TS_PI_DOWN_END; | ||
218 | preempt_enable(); | ||
219 | } | ||
220 | |||
221 | return ret; | ||
222 | } | ||
223 | |||
224 | asmlinkage long sys_pi_up(int sem_od) | ||
225 | { | ||
226 | long ret = 0; | ||
227 | struct pi_semaphore * sem; | ||
228 | |||
229 | preempt_disable(); | ||
230 | TS_PI_UP_START; | ||
231 | |||
232 | sem = lookup_pi_sem(sem_od); | ||
233 | if (sem) | ||
234 | do_pi_up(sem); | ||
235 | else | ||
236 | ret = -EINVAL; | ||
237 | |||
238 | |||
239 | TS_PI_UP_END; | ||
240 | preempt_enable(); | ||
241 | |||
242 | return ret; | ||
243 | } | ||
244 | |||
245 | /* Clear wait queue and wakeup waiting tasks, and free semaphore. */ | ||
246 | /* | ||
247 | asmlinkage long sys_pi_sema_free(int sem_id) | ||
248 | { | ||
249 | struct list_head *tmp, *next; | ||
250 | unsigned long flags; | ||
251 | |||
252 | if (sem_id < 0 || sem_id >= MAX_PI_SEMAPHORES) | ||
253 | return -EINVAL; | ||
254 | |||
255 | if (!pi_sems[sem_id].used) | ||
256 | return -EINVAL; | ||
257 | |||
258 | spin_lock_irqsave(&pi_sems[sem_id].wait.lock, flags); | ||
259 | if (waitqueue_active(&pi_sems[sem_id].wait)) { | ||
260 | list_for_each_safe(tmp, next, | ||
261 | &pi_sems[sem_id].wait.task_list) { | ||
262 | wait_queue_t *curr = list_entry(tmp, wait_queue_t, | ||
263 | task_list); | ||
264 | list_del(tmp); | ||
265 | set_rt_flags((struct task_struct*)curr->private, | ||
266 | RT_F_EXIT_SEM); | ||
267 | curr->func(curr, | ||
268 | TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, | ||
269 | 0, NULL); | ||
270 | } | ||
271 | } | ||
272 | |||
273 | spin_unlock_irqrestore(&pi_sems[sem_id].wait.lock, flags); | ||
274 | pi_sems[sem_id].used = 0; | ||
275 | |||
276 | return 0; | ||
277 | } | ||
278 | */ | ||
279 | |||
280 | |||
281 | |||
282 | /* ************************************************************************** */ | ||
283 | /* STACK RESOURCE POLICY */ | ||
284 | /* ************************************************************************** */ | ||
285 | |||
286 | |||
287 | struct srp_priority { | ||
288 | struct list_head list; | ||
289 | unsigned int period; | ||
290 | pid_t pid; | ||
291 | }; | ||
292 | |||
293 | #define list2prio(l) list_entry(l, struct srp_priority, list) | ||
294 | |||
295 | /* SRP task priority comparison function. Smaller periods have highest | ||
296 | * priority, tie-break is PID. Special case: period == 0 <=> no priority | ||
297 | */ | ||
298 | static int srp_higher_prio(struct srp_priority* first, | ||
299 | struct srp_priority* second) | ||
300 | { | ||
301 | if (!first->period) | ||
302 | return 0; | ||
303 | else | ||
304 | return !second->period || | ||
305 | first->period < second->period || ( | ||
306 | first->period == second->period && | ||
307 | first->pid < second->pid); | ||
308 | } | ||
309 | |||
310 | struct srp { | ||
311 | struct list_head ceiling; | ||
312 | wait_queue_head_t ceiling_blocked; | ||
313 | }; | ||
314 | |||
315 | |||
316 | DEFINE_PER_CPU(struct srp, srp); | ||
317 | |||
318 | #define system_ceiling(srp) list2prio(srp->ceiling.next) | ||
319 | |||
320 | static int srp_exceeds_ceiling(struct task_struct* first, | ||
321 | struct srp* srp) | ||
322 | { | ||
323 | return list_empty(&srp->ceiling) || | ||
324 | get_rt_period(first) < system_ceiling(srp)->period || | ||
325 | (get_rt_period(first) == system_ceiling(srp)->period && | ||
326 | first->pid < system_ceiling(srp)->pid); | ||
327 | } | ||
328 | |||
329 | static void srp_add_prio(struct srp* srp, struct srp_priority* prio) | ||
330 | { | ||
331 | struct list_head *pos; | ||
332 | if (in_list(&prio->list)) { | ||
333 | TRACE_CUR("WARNING: SRP violation detected, prio is already in " | ||
334 | "ceiling list!\n"); | ||
335 | return; | ||
336 | } | ||
337 | list_for_each(pos, &srp->ceiling) | ||
338 | if (unlikely(srp_higher_prio(prio, list2prio(pos)))) { | ||
339 | __list_add(&prio->list, pos->prev, pos); | ||
340 | return; | ||
341 | } | ||
342 | |||
343 | list_add_tail(&prio->list, &srp->ceiling); | ||
344 | } | ||
345 | |||
346 | /* struct for uniprocessor SRP "semaphore" */ | ||
347 | struct srp_semaphore { | ||
348 | struct srp_priority ceiling; | ||
349 | int cpu; /* cpu associated with this "semaphore" and resource */ | ||
350 | int claimed; /* is the resource claimed (ceiling should be used)? */ | ||
351 | }; | ||
352 | |||
353 | |||
354 | static void* create_srp_semaphore(void) | ||
355 | { | ||
356 | struct srp_semaphore* sem; | ||
357 | |||
358 | if (!is_realtime(current)) | ||
359 | /* XXX log error */ | ||
360 | return NULL; | ||
361 | |||
362 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
363 | if (!sem) | ||
364 | return NULL; | ||
365 | |||
366 | INIT_LIST_HEAD(&sem->ceiling.list); | ||
367 | sem->ceiling.period = 0; | ||
368 | sem->claimed = 0; | ||
369 | sem->cpu = get_partition(current); | ||
370 | return sem; | ||
371 | } | ||
372 | |||
373 | static void destroy_srp_semaphore(void* sem) | ||
374 | { | ||
375 | /* XXX invariants */ | ||
376 | kfree(sem); | ||
377 | } | ||
378 | |||
379 | struct fdso_ops srp_sem_ops = { | ||
380 | .create = create_srp_semaphore, | ||
381 | .destroy = destroy_srp_semaphore | ||
382 | }; | ||
383 | |||
384 | /* Initialize SRP semaphores at boot time. */ | ||
385 | static int __init srp_sema_boot_init(void) | ||
386 | { | ||
387 | int i; | ||
388 | |||
389 | printk("Initializing SRP per-CPU ceilings..."); | ||
390 | for (i = 0; i < NR_CPUS; i++) { | ||
391 | init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked); | ||
392 | INIT_LIST_HEAD(&per_cpu(srp, i).ceiling); | ||
393 | } | ||
394 | printk(" done!\n"); | ||
395 | |||
396 | return 0; | ||
397 | } | ||
398 | __initcall(srp_sema_boot_init); | ||
399 | |||
400 | |||
401 | void do_srp_down(struct srp_semaphore* sem) | ||
402 | { | ||
403 | /* claim... */ | ||
404 | sem->claimed = 1; | ||
405 | /* ...and update ceiling */ | ||
406 | srp_add_prio(&__get_cpu_var(srp), &sem->ceiling); | ||
407 | } | ||
408 | |||
409 | void do_srp_up(struct srp_semaphore* sem) | ||
410 | { | ||
411 | sem->claimed = 0; | ||
412 | |||
413 | /* Determine new system priority ceiling for this CPU. */ | ||
414 | if (in_list(&sem->ceiling.list)) | ||
415 | list_del(&sem->ceiling.list); | ||
416 | else | ||
417 | TRACE_CUR("WARNING: SRP violation detected, prio not in ceiling" | ||
418 | " list!\n"); | ||
419 | |||
420 | /* Wake tasks on this CPU, if they exceed current ceiling. */ | ||
421 | wake_up_all(&__get_cpu_var(srp).ceiling_blocked); | ||
422 | } | ||
423 | |||
424 | /* Adjust the system-wide priority ceiling if resource is claimed. */ | ||
425 | asmlinkage long sys_srp_down(int sem_od) | ||
426 | { | ||
427 | int cpu; | ||
428 | int ret = -EINVAL; | ||
429 | struct srp_semaphore* sem; | ||
430 | |||
431 | /* disabling preemptions is sufficient protection since | ||
432 | * SRP is strictly per CPU and we don't interfere with any | ||
433 | * interrupt handlers | ||
434 | */ | ||
435 | preempt_disable(); | ||
436 | TS_SRP_DOWN_START; | ||
437 | |||
438 | cpu = smp_processor_id(); | ||
439 | sem = lookup_srp_sem(sem_od); | ||
440 | if (sem && sem->cpu == cpu) { | ||
441 | do_srp_down(sem); | ||
442 | ret = 0; | ||
443 | } | ||
444 | |||
445 | TS_SRP_DOWN_END; | ||
446 | preempt_enable(); | ||
447 | return ret; | ||
448 | } | ||
449 | |||
450 | /* Adjust the system-wide priority ceiling if resource is freed. */ | ||
451 | asmlinkage long sys_srp_up(int sem_od) | ||
452 | { | ||
453 | int cpu; | ||
454 | int ret = -EINVAL; | ||
455 | struct srp_semaphore* sem; | ||
456 | |||
457 | preempt_disable(); | ||
458 | TS_SRP_UP_START; | ||
459 | |||
460 | cpu = smp_processor_id(); | ||
461 | sem = lookup_srp_sem(sem_od); | ||
462 | |||
463 | if (sem && sem->cpu == cpu) { | ||
464 | do_srp_up(sem); | ||
465 | ret = 0; | ||
466 | } | ||
467 | |||
468 | TS_SRP_UP_END; | ||
469 | preempt_enable(); | ||
470 | return ret; | ||
471 | } | ||
472 | |||
473 | /* Indicate that task will use a resource associated with a given | ||
474 | * semaphore. Should be done *a priori* before RT task system is | ||
475 | * executed, so this does *not* update the system priority | ||
476 | * ceiling! (The ceiling would be meaningless anyway, as the SRP | ||
477 | * breaks without this a priori knowledge.) | ||
478 | */ | ||
479 | asmlinkage long sys_reg_task_srp_sem(int sem_od) | ||
480 | { | ||
481 | /* | ||
482 | * FIXME: This whole concept is rather brittle! | ||
483 | * There must be a better solution. Maybe register on | ||
484 | * first reference? | ||
485 | */ | ||
486 | |||
487 | struct task_struct *t = current; | ||
488 | struct srp_priority t_prio; | ||
489 | struct srp_semaphore* sem; | ||
490 | |||
491 | sem = lookup_srp_sem(sem_od); | ||
492 | |||
493 | if (!sem) | ||
494 | return -EINVAL; | ||
495 | |||
496 | if (!is_realtime(t)) | ||
497 | return -EPERM; | ||
498 | |||
499 | if (sem->cpu != get_partition(t)) | ||
500 | return -EINVAL; | ||
501 | |||
502 | preempt_disable(); | ||
503 | t->rt_param.subject_to_srp = 1; | ||
504 | t_prio.period = get_rt_period(t); | ||
505 | t_prio.pid = t->pid; | ||
506 | if (srp_higher_prio(&t_prio, &sem->ceiling)) { | ||
507 | sem->ceiling.period = t_prio.period; | ||
508 | sem->ceiling.pid = t_prio.pid; | ||
509 | } | ||
510 | |||
511 | preempt_enable(); | ||
512 | |||
513 | return 0; | ||
514 | } | ||
515 | |||
516 | static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync, | ||
517 | void *key) | ||
518 | { | ||
519 | int cpu = smp_processor_id(); | ||
520 | struct task_struct *tsk = wait->private; | ||
521 | if (cpu != get_partition(tsk)) | ||
522 | TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b", | ||
523 | get_partition(tsk)); | ||
524 | else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
525 | return default_wake_function(wait, mode, sync, key); | ||
526 | return 0; | ||
527 | } | ||
528 | |||
529 | |||
530 | /* Wait for current task priority to exceed system-wide priority ceiling. | ||
531 | * Can be used to determine when it is safe to run a job after its release. | ||
532 | */ | ||
533 | void srp_ceiling_block(void) | ||
534 | { | ||
535 | struct task_struct *tsk = current; | ||
536 | wait_queue_t wait = { | ||
537 | .private = tsk, | ||
538 | .func = srp_wake_up, | ||
539 | .task_list = {NULL, NULL} | ||
540 | }; | ||
541 | |||
542 | preempt_disable(); | ||
543 | if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) { | ||
544 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
545 | add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
546 | TRACE_CUR("is priority ceiling blocked.\n"); | ||
547 | preempt_enable_no_resched(); | ||
548 | schedule(); | ||
549 | /* Access to CPU var must occur with preemptions disabled, | ||
550 | * otherwise Linux debug code complains loudly, even if it is | ||
551 | * ok here. | ||
552 | */ | ||
553 | preempt_disable(); | ||
554 | TRACE_CUR("finally exceeds system ceiling.\n"); | ||
555 | remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
556 | preempt_enable(); | ||
557 | } else { | ||
558 | TRACE_CUR("is not priority ceiling blocked\n"); | ||
559 | preempt_enable(); | ||
560 | } | ||
561 | } | ||
562 | |||
563 | /* ************************************************************************** */ | ||
564 | |||
565 | |||
566 | |||
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c new file mode 100644 index 0000000000..fe7bd29b19 --- /dev/null +++ b/litmus/rt_domain.c | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * kernel/rt_domain.c | ||
3 | * | ||
4 | * LITMUS real-time infrastructure. This file contains the | ||
5 | * functions that manipulate RT domains. RT domains are an abstraction | ||
6 | * of a ready queue and a release queue. | ||
7 | */ | ||
8 | |||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/list.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_plugin.h> | ||
15 | #include <litmus/sched_trace.h> | ||
16 | |||
17 | #include <litmus/rt_domain.h> | ||
18 | |||
19 | |||
20 | static int dummy_resched(rt_domain_t *rt) | ||
21 | { | ||
22 | return 0; | ||
23 | } | ||
24 | |||
25 | static int dummy_order(struct list_head* a, struct list_head* b) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | int release_order(struct list_head* a, struct list_head* b) | ||
31 | { | ||
32 | return earlier_release( | ||
33 | list_entry(a, struct task_struct, rt_list), | ||
34 | list_entry(b, struct task_struct, rt_list)); | ||
35 | } | ||
36 | |||
37 | |||
38 | void rt_domain_init(rt_domain_t *rt, | ||
39 | check_resched_needed_t f, | ||
40 | list_cmp_t order) | ||
41 | { | ||
42 | BUG_ON(!rt); | ||
43 | if (!f) | ||
44 | f = dummy_resched; | ||
45 | if (!order) | ||
46 | order = dummy_order; | ||
47 | INIT_LIST_HEAD(&rt->ready_queue); | ||
48 | INIT_LIST_HEAD(&rt->release_queue); | ||
49 | rt->ready_lock = RW_LOCK_UNLOCKED; | ||
50 | rt->release_lock = SPIN_LOCK_UNLOCKED; | ||
51 | rt->check_resched = f; | ||
52 | rt->order = order; | ||
53 | } | ||
54 | |||
55 | /* add_ready - add a real-time task to the rt ready queue. It must be runnable. | ||
56 | * @new: the newly released task | ||
57 | */ | ||
58 | void __add_ready(rt_domain_t* rt, struct task_struct *new) | ||
59 | { | ||
60 | TRACE("rt: adding %s/%d (%llu, %llu) to ready queue at %llu\n", | ||
61 | new->comm, new->pid, get_exec_cost(new), get_rt_period(new), | ||
62 | sched_clock()); | ||
63 | |||
64 | if (!list_insert(&new->rt_list, &rt->ready_queue, rt->order)) | ||
65 | rt->check_resched(rt); | ||
66 | } | ||
67 | |||
68 | struct task_struct* __take_ready(rt_domain_t* rt) | ||
69 | { | ||
70 | struct task_struct *t = __peek_ready(rt); | ||
71 | |||
72 | /* kick it out of the ready list */ | ||
73 | if (t) | ||
74 | list_del(&t->rt_list); | ||
75 | return t; | ||
76 | } | ||
77 | |||
78 | struct task_struct* __peek_ready(rt_domain_t* rt) | ||
79 | { | ||
80 | if (!list_empty(&rt->ready_queue)) | ||
81 | return next_ready(rt); | ||
82 | else | ||
83 | return NULL; | ||
84 | } | ||
85 | |||
86 | /* add_release - add a real-time task to the rt release queue. | ||
87 | * @task: the sleeping task | ||
88 | */ | ||
89 | void __add_release(rt_domain_t* rt, struct task_struct *task) | ||
90 | { | ||
91 | TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to release queue\n", | ||
92 | task->comm, task->pid, get_exec_cost(task), get_rt_period(task), | ||
93 | get_release(task)); | ||
94 | |||
95 | list_insert(&task->rt_list, &rt->release_queue, release_order); | ||
96 | } | ||
97 | |||
98 | void __release_pending(rt_domain_t* rt) | ||
99 | { | ||
100 | struct list_head *pos, *save; | ||
101 | struct task_struct *queued; | ||
102 | lt_t now = sched_clock(); | ||
103 | list_for_each_safe(pos, save, &rt->release_queue) { | ||
104 | queued = list_entry(pos, struct task_struct, rt_list); | ||
105 | if (likely(is_released(queued, now))) { | ||
106 | /* this one is ready to go*/ | ||
107 | list_del(pos); | ||
108 | set_rt_flags(queued, RT_F_RUNNING); | ||
109 | |||
110 | sched_trace_job_release(queued); | ||
111 | |||
112 | /* now it can be picked up */ | ||
113 | barrier(); | ||
114 | add_ready(rt, queued); | ||
115 | } | ||
116 | else | ||
117 | /* the release queue is ordered */ | ||
118 | break; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | void try_release_pending(rt_domain_t* rt) | ||
123 | { | ||
124 | unsigned long flags; | ||
125 | |||
126 | if (spin_trylock_irqsave(&rt->release_lock, flags)) { | ||
127 | __release_pending(rt); | ||
128 | spin_unlock_irqrestore(&rt->release_lock, flags); | ||
129 | } | ||
130 | } | ||
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c new file mode 100644 index 0000000000..e879b02888 --- /dev/null +++ b/litmus/sched_gsn_edf.c | |||
@@ -0,0 +1,719 @@ | |||
1 | /* | ||
2 | * kernel/sched_gsn_edf.c | ||
3 | * | ||
4 | * Implementation of the GSN-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This version uses the simple approach and serializes all scheduling | ||
7 | * decisions by the use of a queue lock. This is probably not the | ||
8 | * best way to do it, but it should suffice for now. | ||
9 | */ | ||
10 | |||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/percpu.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/list.h> | ||
15 | |||
16 | #include <litmus/litmus.h> | ||
17 | #include <litmus/sched_plugin.h> | ||
18 | #include <litmus/edf_common.h> | ||
19 | #include <litmus/sched_trace.h> | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | |||
23 | /* Overview of GSN-EDF operations. | ||
24 | * | ||
25 | * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This | ||
26 | * description only covers how the individual operations are implemented in | ||
27 | * LITMUS. | ||
28 | * | ||
29 | * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage | ||
30 | * structure (NOT the actually scheduled | ||
31 | * task). If there is another linked task To | ||
32 | * already it will set To->linked_on = NO_CPU | ||
33 | * (thereby removing its association with this | ||
34 | * CPU). However, it will not requeue the | ||
35 | * previously linked task (if any). It will set | ||
36 | * T's state to RT_F_RUNNING and check whether | ||
37 | * it is already running somewhere else. If T | ||
38 | * is scheduled somewhere else it will link | ||
39 | * it to that CPU instead (and pull the linked | ||
40 | * task to cpu). T may be NULL. | ||
41 | * | ||
42 | * unlink(T) - Unlink removes T from all scheduler data | ||
43 | * structures. If it is linked to some CPU it | ||
44 | * will link NULL to that CPU. If it is | ||
45 | * currently queued in the gsnedf queue it will | ||
46 | * be removed from the T->rt_list. It is safe to | ||
47 | * call unlink(T) if T is not linked. T may not | ||
48 | * be NULL. | ||
49 | * | ||
50 | * requeue(T) - Requeue will insert T into the appropriate | ||
51 | * queue. If the system is in real-time mode and | ||
52 | * the T is released already, it will go into the | ||
53 | * ready queue. If the system is not in | ||
54 | * real-time mode is T, then T will go into the | ||
55 | * release queue. If T's release time is in the | ||
56 | * future, it will go into the release | ||
57 | * queue. That means that T's release time/job | ||
58 | * no/etc. has to be updated before requeu(T) is | ||
59 | * called. It is not safe to call requeue(T) | ||
60 | * when T is already queued. T may not be NULL. | ||
61 | * | ||
62 | * gsnedf_job_arrival(T) - This is the catch all function when T enters | ||
63 | * the system after either a suspension or at a | ||
64 | * job release. It will queue T (which means it | ||
65 | * is not safe to call gsnedf_job_arrival(T) if | ||
66 | * T is already queued) and then check whether a | ||
67 | * preemption is necessary. If a preemption is | ||
68 | * necessary it will update the linkage | ||
69 | * accordingly and cause scheduled to be called | ||
70 | * (either with an IPI or need_resched). It is | ||
71 | * safe to call gsnedf_job_arrival(T) if T's | ||
72 | * next job has not been actually released yet | ||
73 | * (releast time in the future). T will be put | ||
74 | * on the release queue in that case. | ||
75 | * | ||
76 | * job_completion(T) - Take care of everything that needs to be done | ||
77 | * to prepare T for its next release and place | ||
78 | * it in the right queue with | ||
79 | * gsnedf_job_arrival(). | ||
80 | * | ||
81 | * | ||
82 | * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is | ||
83 | * equivalent to unlink(T). Note that if you unlink a task from a CPU none of | ||
84 | * the functions will automatically propagate pending task from the ready queue | ||
85 | * to a linked task. This is the job of the calling function ( by means of | ||
86 | * __take_ready). | ||
87 | */ | ||
88 | |||
89 | |||
90 | /* cpu_entry_t - maintain the linked and scheduled state | ||
91 | */ | ||
92 | typedef struct { | ||
93 | int cpu; | ||
94 | struct task_struct* linked; /* only RT tasks */ | ||
95 | struct task_struct* scheduled; /* only RT tasks */ | ||
96 | struct list_head list; | ||
97 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
98 | } cpu_entry_t; | ||
99 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | ||
100 | |||
101 | #define set_will_schedule() \ | ||
102 | (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1)) | ||
103 | #define clear_will_schedule() \ | ||
104 | (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0)) | ||
105 | #define test_will_schedule(cpu) \ | ||
106 | (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule)) | ||
107 | |||
108 | |||
109 | #define NO_CPU 0xffffffff | ||
110 | |||
111 | /* The gsnedf_lock is used to serialize all scheduling events. | ||
112 | * It protects | ||
113 | */ | ||
114 | static DEFINE_SPINLOCK(gsnedf_lock); | ||
115 | /* the cpus queue themselves according to priority in here */ | ||
116 | static LIST_HEAD(gsnedf_cpu_queue); | ||
117 | |||
118 | static rt_domain_t gsnedf; | ||
119 | |||
120 | |||
121 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
122 | * order in the cpu queue. Caller must hold gsnedf lock. | ||
123 | * | ||
124 | * This really should be a heap. | ||
125 | */ | ||
126 | static void update_cpu_position(cpu_entry_t *entry) | ||
127 | { | ||
128 | cpu_entry_t *other; | ||
129 | struct list_head *pos; | ||
130 | |||
131 | if (likely(in_list(&entry->list))) | ||
132 | list_del(&entry->list); | ||
133 | /* if we do not execute real-time jobs we just move | ||
134 | * to the end of the queue | ||
135 | */ | ||
136 | if (entry->linked) { | ||
137 | list_for_each(pos, &gsnedf_cpu_queue) { | ||
138 | other = list_entry(pos, cpu_entry_t, list); | ||
139 | if (edf_higher_prio(entry->linked, other->linked)) { | ||
140 | __list_add(&entry->list, pos->prev, pos); | ||
141 | return; | ||
142 | } | ||
143 | } | ||
144 | } | ||
145 | /* if we get this far we have the lowest priority job */ | ||
146 | list_add_tail(&entry->list, &gsnedf_cpu_queue); | ||
147 | } | ||
148 | |||
149 | /* link_task_to_cpu - Update the link of a CPU. | ||
150 | * Handles the case where the to-be-linked task is already | ||
151 | * scheduled on a different CPU. | ||
152 | */ | ||
153 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
154 | cpu_entry_t *entry) | ||
155 | { | ||
156 | cpu_entry_t *sched; | ||
157 | struct task_struct* tmp; | ||
158 | int on_cpu; | ||
159 | |||
160 | BUG_ON(linked && !is_realtime(linked)); | ||
161 | |||
162 | /* Currently linked task is set to be unlinked. */ | ||
163 | if (entry->linked) { | ||
164 | entry->linked->rt_param.linked_on = NO_CPU; | ||
165 | } | ||
166 | |||
167 | /* Link new task to CPU. */ | ||
168 | if (linked) { | ||
169 | set_rt_flags(linked, RT_F_RUNNING); | ||
170 | /* handle task is already scheduled somewhere! */ | ||
171 | on_cpu = linked->rt_param.scheduled_on; | ||
172 | if (on_cpu != NO_CPU) { | ||
173 | sched = &per_cpu(gsnedf_cpu_entries, on_cpu); | ||
174 | /* this should only happen if not linked already */ | ||
175 | BUG_ON(sched->linked == linked); | ||
176 | |||
177 | /* If we are already scheduled on the CPU to which we | ||
178 | * wanted to link, we don't need to do the swap -- | ||
179 | * we just link ourselves to the CPU and depend on | ||
180 | * the caller to get things right. | ||
181 | */ | ||
182 | if (entry != sched) { | ||
183 | tmp = sched->linked; | ||
184 | linked->rt_param.linked_on = sched->cpu; | ||
185 | sched->linked = linked; | ||
186 | update_cpu_position(sched); | ||
187 | linked = tmp; | ||
188 | } | ||
189 | } | ||
190 | if (linked) /* might be NULL due to swap */ | ||
191 | linked->rt_param.linked_on = entry->cpu; | ||
192 | } | ||
193 | entry->linked = linked; | ||
194 | update_cpu_position(entry); | ||
195 | } | ||
196 | |||
197 | /* unlink - Make sure a task is not linked any longer to an entry | ||
198 | * where it was linked before. Must hold gsnedf_lock. | ||
199 | */ | ||
200 | static noinline void unlink(struct task_struct* t) | ||
201 | { | ||
202 | cpu_entry_t *entry; | ||
203 | |||
204 | if (unlikely(!t)) { | ||
205 | TRACE_BUG_ON(!t); | ||
206 | return; | ||
207 | } | ||
208 | |||
209 | if (t->rt_param.linked_on != NO_CPU) { | ||
210 | /* unlink */ | ||
211 | entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on); | ||
212 | t->rt_param.linked_on = NO_CPU; | ||
213 | link_task_to_cpu(NULL, entry); | ||
214 | } else if (in_list(&t->rt_list)) { | ||
215 | /* This is an interesting situation: t is scheduled, | ||
216 | * but was just recently unlinked. It cannot be | ||
217 | * linked anywhere else (because then it would have | ||
218 | * been relinked to this CPU), thus it must be in some | ||
219 | * queue. We must remove it from the list in this | ||
220 | * case. | ||
221 | */ | ||
222 | list_del(&t->rt_list); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | |||
227 | /* preempt - force a CPU to reschedule | ||
228 | */ | ||
229 | static noinline void preempt(cpu_entry_t *entry) | ||
230 | { | ||
231 | /* We cannot make the is_np() decision here if it is a remote CPU | ||
232 | * because requesting exit_np() requires that we currently use the | ||
233 | * address space of the task. Thus, in the remote case we just send | ||
234 | * the IPI and let schedule() handle the problem. | ||
235 | */ | ||
236 | |||
237 | if (smp_processor_id() == entry->cpu) { | ||
238 | if (entry->scheduled && is_np(entry->scheduled)) | ||
239 | request_exit_np(entry->scheduled); | ||
240 | else | ||
241 | set_tsk_need_resched(current); | ||
242 | } else | ||
243 | /* in case that it is a remote CPU we have to defer the | ||
244 | * the decision to the remote CPU | ||
245 | * FIXME: We could save a few IPI's here if we leave the flag | ||
246 | * set when we are waiting for a np_exit(). | ||
247 | */ | ||
248 | if (!test_will_schedule(entry->cpu)) | ||
249 | smp_send_reschedule(entry->cpu); | ||
250 | } | ||
251 | |||
252 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
253 | * Caller must hold gsnedf_lock. | ||
254 | */ | ||
255 | static noinline void requeue(struct task_struct* task) | ||
256 | { | ||
257 | BUG_ON(!task); | ||
258 | /* sanity check rt_list before insertion */ | ||
259 | BUG_ON(in_list(&task->rt_list)); | ||
260 | |||
261 | if (get_rt_flags(task) == RT_F_SLEEP) { | ||
262 | /* this task has expired | ||
263 | * _schedule has already taken care of updating | ||
264 | * the release and | ||
265 | * deadline. We just must check if it has been released. | ||
266 | */ | ||
267 | if (is_released(task, sched_clock())) | ||
268 | __add_ready(&gsnedf, task); | ||
269 | else { | ||
270 | /* it has got to wait */ | ||
271 | __add_release(&gsnedf, task); | ||
272 | } | ||
273 | |||
274 | } else | ||
275 | /* this is a forced preemption | ||
276 | * thus the task stays in the ready_queue | ||
277 | * we only must make it available to others | ||
278 | */ | ||
279 | __add_ready(&gsnedf, task); | ||
280 | } | ||
281 | |||
282 | /* gsnedf_job_arrival: task is either resumed or released */ | ||
283 | static noinline void gsnedf_job_arrival(struct task_struct* task) | ||
284 | { | ||
285 | cpu_entry_t* last; | ||
286 | |||
287 | BUG_ON(list_empty(&gsnedf_cpu_queue)); | ||
288 | BUG_ON(!task); | ||
289 | |||
290 | /* first queue arriving job */ | ||
291 | requeue(task); | ||
292 | |||
293 | /* then check for any necessary preemptions */ | ||
294 | last = list_entry(gsnedf_cpu_queue.prev, cpu_entry_t, list); | ||
295 | if (edf_preemption_needed(&gsnedf, last->linked)) { | ||
296 | /* preemption necessary */ | ||
297 | task = __take_ready(&gsnedf); | ||
298 | TRACE("job_arrival: task %d linked to %d\n", | ||
299 | task->pid, last->cpu); | ||
300 | if (last->linked) | ||
301 | requeue(last->linked); | ||
302 | |||
303 | link_task_to_cpu(task, last); | ||
304 | preempt(last); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | /* check for current job releases */ | ||
309 | static noinline void gsnedf_release_jobs(void) | ||
310 | { | ||
311 | struct list_head *pos, *save; | ||
312 | struct task_struct *queued; | ||
313 | lt_t now = sched_clock(); | ||
314 | |||
315 | |||
316 | list_for_each_safe(pos, save, &gsnedf.release_queue) { | ||
317 | queued = list_entry(pos, struct task_struct, rt_list); | ||
318 | if (likely(is_released(queued, now))) { | ||
319 | /* this one is ready to go*/ | ||
320 | list_del(pos); | ||
321 | set_rt_flags(queued, RT_F_RUNNING); | ||
322 | |||
323 | sched_trace_job_release(queued); | ||
324 | gsnedf_job_arrival(queued); | ||
325 | } | ||
326 | else | ||
327 | /* the release queue is ordered */ | ||
328 | break; | ||
329 | } | ||
330 | } | ||
331 | |||
332 | /* gsnedf_tick - this function is called for every local timer | ||
333 | * interrupt. | ||
334 | * | ||
335 | * checks whether the current task has expired and checks | ||
336 | * whether we need to preempt it if it has not expired | ||
337 | */ | ||
338 | static void gsnedf_tick(struct task_struct* t) | ||
339 | { | ||
340 | unsigned long flags; | ||
341 | |||
342 | if (is_realtime(t) && budget_exhausted(t)) { | ||
343 | if (!is_np(t)) { | ||
344 | /* np tasks will be preempted when they become | ||
345 | * preemptable again | ||
346 | */ | ||
347 | set_tsk_need_resched(t); | ||
348 | set_will_schedule(); | ||
349 | TRACE("gsnedf_scheduler_tick: " | ||
350 | "%d is preemptable " | ||
351 | " => FORCE_RESCHED\n", t->pid); | ||
352 | } else { | ||
353 | TRACE("gsnedf_scheduler_tick: " | ||
354 | "%d is non-preemptable, " | ||
355 | "preemption delayed.\n", t->pid); | ||
356 | request_exit_np(t); | ||
357 | } | ||
358 | } | ||
359 | |||
360 | /* only the first CPU needs to release jobs */ | ||
361 | /* FIXME: drive this from a hrtimer */ | ||
362 | if (smp_processor_id() == 0) { | ||
363 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
364 | |||
365 | /* Try to release pending jobs */ | ||
366 | gsnedf_release_jobs(); | ||
367 | |||
368 | /* We don't need to check linked != scheduled since | ||
369 | * set_tsk_need_resched has been set by preempt() if necessary. | ||
370 | */ | ||
371 | |||
372 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
373 | } | ||
374 | } | ||
375 | |||
376 | /* caller holds gsnedf_lock */ | ||
377 | static noinline void job_completion(struct task_struct *t) | ||
378 | { | ||
379 | BUG_ON(!t); | ||
380 | |||
381 | sched_trace_job_completion(t); | ||
382 | |||
383 | TRACE_TASK(t, "job_completion().\n"); | ||
384 | |||
385 | /* set flags */ | ||
386 | set_rt_flags(t, RT_F_SLEEP); | ||
387 | /* prepare for next period */ | ||
388 | edf_prepare_for_next_period(t); | ||
389 | /* unlink */ | ||
390 | unlink(t); | ||
391 | /* requeue | ||
392 | * But don't requeue a blocking task. */ | ||
393 | if (is_running(t)) | ||
394 | gsnedf_job_arrival(t); | ||
395 | } | ||
396 | |||
397 | |||
398 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
399 | * assumptions on the state of the current task since it may be called for a | ||
400 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
401 | * was necessary, because sys_exit_np() was called, because some Linux | ||
402 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
403 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
404 | * current state is. | ||
405 | * | ||
406 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
407 | * | ||
408 | * The following assertions for the scheduled task could hold: | ||
409 | * | ||
410 | * - !is_running(scheduled) // the job blocks | ||
411 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
412 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
413 | * - linked != scheduled // we need to reschedule (for any reason) | ||
414 | * - is_np(scheduled) // rescheduling must be delayed, | ||
415 | * sys_exit_np must be requested | ||
416 | * | ||
417 | * Any of these can occur together. | ||
418 | */ | ||
419 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) | ||
420 | { | ||
421 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
422 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
423 | struct task_struct* next = NULL; | ||
424 | |||
425 | /* Will be released in finish_switch. */ | ||
426 | spin_lock(&gsnedf_lock); | ||
427 | clear_will_schedule(); | ||
428 | |||
429 | /* sanity checking */ | ||
430 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
431 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
432 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
433 | |||
434 | /* (0) Determine state */ | ||
435 | exists = entry->scheduled != NULL; | ||
436 | blocks = exists && !is_running(entry->scheduled); | ||
437 | out_of_time = exists && budget_exhausted(entry->scheduled); | ||
438 | np = exists && is_np(entry->scheduled); | ||
439 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
440 | preempt = entry->scheduled != entry->linked; | ||
441 | |||
442 | /* If a task blocks we have no choice but to reschedule. | ||
443 | */ | ||
444 | if (blocks) | ||
445 | unlink(entry->scheduled); | ||
446 | |||
447 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
448 | * We need to make sure to update the link structure anyway in case | ||
449 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
450 | * hurt. | ||
451 | */ | ||
452 | if (np && (out_of_time || preempt || sleep)) { | ||
453 | unlink(entry->scheduled); | ||
454 | request_exit_np(entry->scheduled); | ||
455 | } | ||
456 | |||
457 | /* Any task that is preemptable and either exhausts its execution | ||
458 | * budget or wants to sleep completes. We may have to reschedule after | ||
459 | * this. | ||
460 | */ | ||
461 | if (!np && (out_of_time || sleep)) | ||
462 | job_completion(entry->scheduled); | ||
463 | |||
464 | /* Link pending task if we became unlinked. | ||
465 | */ | ||
466 | if (!entry->linked) | ||
467 | link_task_to_cpu(__take_ready(&gsnedf), entry); | ||
468 | |||
469 | /* The final scheduling decision. Do we need to switch for some reason? | ||
470 | * If linked different from scheduled select linked as next. | ||
471 | */ | ||
472 | if ((!np || blocks) && | ||
473 | entry->linked != entry->scheduled) { | ||
474 | /* Schedule a linked job? */ | ||
475 | if (entry->linked) | ||
476 | next = entry->linked; | ||
477 | } else | ||
478 | /* Only override Linux scheduler if we have real-time task | ||
479 | * scheduled that needs to continue. | ||
480 | */ | ||
481 | if (exists) | ||
482 | next = prev; | ||
483 | |||
484 | spin_unlock(&gsnedf_lock); | ||
485 | |||
486 | /* don't race with a concurrent switch */ | ||
487 | if (next && prev != next) | ||
488 | while (next->rt_param.scheduled_on != NO_CPU) | ||
489 | cpu_relax(); | ||
490 | return next; | ||
491 | } | ||
492 | |||
493 | |||
494 | /* _finish_switch - we just finished the switch away from prev | ||
495 | */ | ||
496 | static void gsnedf_finish_switch(struct task_struct *prev) | ||
497 | { | ||
498 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
499 | |||
500 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
501 | |||
502 | prev->rt_param.scheduled_on = NO_CPU; | ||
503 | current->rt_param.scheduled_on = smp_processor_id(); | ||
504 | } | ||
505 | |||
506 | |||
507 | /* Prepare a task for running in RT mode | ||
508 | */ | ||
509 | static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
510 | { | ||
511 | unsigned long flags; | ||
512 | cpu_entry_t* entry; | ||
513 | |||
514 | TRACE("gsn edf: task new %d\n", t->pid); | ||
515 | |||
516 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
517 | if (running) { | ||
518 | entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t)); | ||
519 | BUG_ON(entry->scheduled); | ||
520 | entry->scheduled = t; | ||
521 | t->rt_param.scheduled_on = task_cpu(t); | ||
522 | } else | ||
523 | t->rt_param.scheduled_on = NO_CPU; | ||
524 | t->rt_param.linked_on = NO_CPU; | ||
525 | |||
526 | /* setup job params */ | ||
527 | edf_release_at(t, sched_clock()); | ||
528 | |||
529 | gsnedf_job_arrival(t); | ||
530 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
531 | } | ||
532 | |||
533 | static void gsnedf_task_wake_up(struct task_struct *task) | ||
534 | { | ||
535 | unsigned long flags; | ||
536 | lt_t now; | ||
537 | |||
538 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
539 | /* We need to take suspensions because of semaphores into | ||
540 | * account! If a job resumes after being suspended due to acquiring | ||
541 | * a semaphore, it should never be treated as a new job release. | ||
542 | */ | ||
543 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
544 | set_rt_flags(task, RT_F_RUNNING); | ||
545 | } else { | ||
546 | now = sched_clock(); | ||
547 | if (is_tardy(task, now)) { | ||
548 | /* new sporadic release */ | ||
549 | edf_release_at(task, now); | ||
550 | sched_trace_job_release(task); | ||
551 | } | ||
552 | else if (task->time_slice) | ||
553 | /* came back in time before deadline | ||
554 | */ | ||
555 | set_rt_flags(task, RT_F_RUNNING); | ||
556 | } | ||
557 | gsnedf_job_arrival(task); | ||
558 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
559 | } | ||
560 | |||
561 | static void gsnedf_task_block(struct task_struct *t) | ||
562 | { | ||
563 | unsigned long flags; | ||
564 | |||
565 | /* unlink if necessary */ | ||
566 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
567 | unlink(t); | ||
568 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
569 | |||
570 | BUG_ON(!is_realtime(t)); | ||
571 | BUG_ON(t->rt_list.next != LIST_POISON1); | ||
572 | BUG_ON(t->rt_list.prev != LIST_POISON2); | ||
573 | } | ||
574 | |||
575 | |||
576 | static void gsnedf_task_exit(struct task_struct * t) | ||
577 | { | ||
578 | unsigned long flags; | ||
579 | |||
580 | /* unlink if necessary */ | ||
581 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
582 | unlink(t); | ||
583 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
584 | |||
585 | BUG_ON(!is_realtime(t)); | ||
586 | TRACE_TASK(t, "RIP\n"); | ||
587 | BUG_ON(t->rt_list.next != LIST_POISON1); | ||
588 | BUG_ON(t->rt_list.prev != LIST_POISON2); | ||
589 | } | ||
590 | |||
591 | static long gsnedf_pi_block(struct pi_semaphore *sem, | ||
592 | struct task_struct *new_waiter) | ||
593 | { | ||
594 | /* This callback has to handle the situation where a new waiter is | ||
595 | * added to the wait queue of the semaphore. | ||
596 | * | ||
597 | * We must check if has a higher priority than the currently | ||
598 | * highest-priority task, and then potentially reschedule. | ||
599 | */ | ||
600 | |||
601 | BUG_ON(!new_waiter); | ||
602 | |||
603 | if (edf_higher_prio(new_waiter, sem->hp.task)) { | ||
604 | TRACE_TASK(new_waiter, " boosts priority\n"); | ||
605 | /* called with IRQs disabled */ | ||
606 | spin_lock(&gsnedf_lock); | ||
607 | /* store new highest-priority task */ | ||
608 | sem->hp.task = new_waiter; | ||
609 | if (sem->holder) { | ||
610 | /* let holder inherit */ | ||
611 | sem->holder->rt_param.inh_task = new_waiter; | ||
612 | unlink(sem->holder); | ||
613 | gsnedf_job_arrival(sem->holder); | ||
614 | } | ||
615 | spin_unlock(&gsnedf_lock); | ||
616 | } | ||
617 | |||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | static long gsnedf_inherit_priority(struct pi_semaphore *sem, | ||
622 | struct task_struct *new_owner) | ||
623 | { | ||
624 | /* We don't need to acquire the gsnedf_lock since at the time of this | ||
625 | * call new_owner isn't actually scheduled yet (it's still sleeping) | ||
626 | * and since the calling function already holds sem->wait.lock, which | ||
627 | * prevents concurrent sem->hp.task changes. | ||
628 | */ | ||
629 | |||
630 | if (sem->hp.task && sem->hp.task != new_owner) { | ||
631 | new_owner->rt_param.inh_task = sem->hp.task; | ||
632 | TRACE_TASK(new_owner, "inherited priority from %s/%d\n", | ||
633 | sem->hp.task->comm, sem->hp.task->pid); | ||
634 | } else | ||
635 | TRACE_TASK(new_owner, | ||
636 | "cannot inherit priority, " | ||
637 | "no higher priority job waits.\n"); | ||
638 | return 0; | ||
639 | } | ||
640 | |||
641 | /* This function is called on a semaphore release, and assumes that | ||
642 | * the current task is also the semaphore holder. | ||
643 | */ | ||
644 | static long gsnedf_return_priority(struct pi_semaphore *sem) | ||
645 | { | ||
646 | struct task_struct* t = current; | ||
647 | int ret = 0; | ||
648 | |||
649 | /* Find new highest-priority semaphore task | ||
650 | * if holder task is the current hp.task. | ||
651 | * | ||
652 | * Calling function holds sem->wait.lock. | ||
653 | */ | ||
654 | if (t == sem->hp.task) | ||
655 | edf_set_hp_task(sem); | ||
656 | |||
657 | TRACE_CUR("gsnedf_return_priority for lock %p\n", sem); | ||
658 | |||
659 | if (t->rt_param.inh_task) { | ||
660 | /* interrupts already disabled by PI code */ | ||
661 | spin_lock(&gsnedf_lock); | ||
662 | |||
663 | /* Reset inh_task to NULL. */ | ||
664 | t->rt_param.inh_task = NULL; | ||
665 | |||
666 | /* Check if rescheduling is necessary */ | ||
667 | unlink(t); | ||
668 | gsnedf_job_arrival(t); | ||
669 | spin_unlock(&gsnedf_lock); | ||
670 | } | ||
671 | |||
672 | return ret; | ||
673 | } | ||
674 | |||
675 | static long gsnedf_admit_task(struct task_struct* tsk) | ||
676 | { | ||
677 | return 0; | ||
678 | } | ||
679 | |||
680 | |||
681 | /* Plugin object */ | ||
682 | static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { | ||
683 | .plugin_name = "GSN-EDF", | ||
684 | .finish_switch = gsnedf_finish_switch, | ||
685 | .tick = gsnedf_tick, | ||
686 | .task_new = gsnedf_task_new, | ||
687 | .complete_job = edf_complete_job, | ||
688 | .task_exit = gsnedf_task_exit, | ||
689 | .schedule = gsnedf_schedule, | ||
690 | .task_wake_up = gsnedf_task_wake_up, | ||
691 | .task_block = gsnedf_task_block, | ||
692 | .pi_block = gsnedf_pi_block, | ||
693 | .inherit_priority = gsnedf_inherit_priority, | ||
694 | .return_priority = gsnedf_return_priority, | ||
695 | .admit_task = gsnedf_admit_task | ||
696 | }; | ||
697 | |||
698 | |||
699 | static int __init init_gsn_edf(void) | ||
700 | { | ||
701 | int cpu; | ||
702 | cpu_entry_t *entry; | ||
703 | |||
704 | /* initialize CPU state */ | ||
705 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
706 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
707 | atomic_set(&entry->will_schedule, 0); | ||
708 | entry->linked = NULL; | ||
709 | entry->scheduled = NULL; | ||
710 | entry->cpu = cpu; | ||
711 | INIT_LIST_HEAD(&entry->list); | ||
712 | } | ||
713 | |||
714 | edf_domain_init(&gsnedf, NULL); | ||
715 | return register_sched_plugin(&gsn_edf_plugin); | ||
716 | } | ||
717 | |||
718 | |||
719 | module_init(init_gsn_edf); | ||
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c new file mode 100644 index 0000000000..89ae3941db --- /dev/null +++ b/litmus/sched_litmus.c | |||
@@ -0,0 +1,149 @@ | |||
1 | /* This file is included from kernel/sched.c */ | ||
2 | |||
3 | #include <litmus/litmus.h> | ||
4 | #include <litmus/sched_plugin.h> | ||
5 | |||
6 | static void update_time_litmus(struct rq *rq, struct task_struct *p) | ||
7 | { | ||
8 | lt_t now = sched_clock(); | ||
9 | p->rt_param.job_params.exec_time += | ||
10 | now - p->rt_param.job_params.exec_start; | ||
11 | p->rt_param.job_params.exec_start = now; | ||
12 | } | ||
13 | |||
14 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
15 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2); | ||
16 | |||
17 | static void litmus_tick(struct rq *rq, struct task_struct *p) | ||
18 | { | ||
19 | if (is_realtime(p)) | ||
20 | update_time_litmus(rq, p); | ||
21 | litmus->tick(p); | ||
22 | } | ||
23 | |||
24 | static void litmus_schedule(struct rq *rq, struct task_struct *prev) | ||
25 | { | ||
26 | struct rq* other_rq; | ||
27 | int success = 0; | ||
28 | /* WARNING: rq is _not_ locked! */ | ||
29 | if (is_realtime(prev)) | ||
30 | update_time_litmus(rq, prev); | ||
31 | |||
32 | while (!success) { | ||
33 | /* let the plugin schedule */ | ||
34 | rq->litmus_next = litmus->schedule(prev); | ||
35 | |||
36 | /* check if a global plugin pulled a task from a different RQ */ | ||
37 | if (rq->litmus_next && task_rq(rq->litmus_next) != rq) { | ||
38 | /* we need to migrate the task */ | ||
39 | other_rq = task_rq(rq->litmus_next); | ||
40 | double_rq_lock(rq, other_rq); | ||
41 | /* now that we have the lock we need to make sure a | ||
42 | * couple of things still hold: | ||
43 | * - it is still a real-time task | ||
44 | * - it is still runnable (could have been stopped) | ||
45 | */ | ||
46 | if (is_realtime(rq->litmus_next) && | ||
47 | is_running(rq->litmus_next)) { | ||
48 | set_task_cpu(rq->litmus_next, smp_processor_id()); | ||
49 | success = 1; | ||
50 | } /* else something raced, retry */ | ||
51 | double_rq_unlock(rq, other_rq); | ||
52 | } else | ||
53 | success = 1; | ||
54 | } | ||
55 | } | ||
56 | |||
57 | static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, int wakeup) | ||
58 | { | ||
59 | if (wakeup) | ||
60 | litmus->task_wake_up(p); | ||
61 | } | ||
62 | |||
63 | static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep) | ||
64 | { | ||
65 | if (sleep) | ||
66 | litmus->task_block(p); | ||
67 | } | ||
68 | |||
69 | static void yield_task_litmus(struct rq *rq) | ||
70 | { | ||
71 | BUG_ON(rq->curr != current); | ||
72 | litmus->complete_job(); | ||
73 | } | ||
74 | |||
75 | /* Plugins are responsible for this. | ||
76 | */ | ||
77 | static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p) | ||
78 | { | ||
79 | } | ||
80 | |||
81 | /* has already been taken care of */ | ||
82 | static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) | ||
83 | { | ||
84 | } | ||
85 | |||
86 | static struct task_struct *pick_next_task_litmus(struct rq *rq) | ||
87 | { | ||
88 | struct task_struct* picked = rq->litmus_next; | ||
89 | rq->litmus_next = NULL; | ||
90 | if (picked) | ||
91 | picked->rt_param.job_params.exec_start = sched_clock(); | ||
92 | return picked; | ||
93 | } | ||
94 | |||
95 | static void task_tick_litmus(struct rq *rq, struct task_struct *p) | ||
96 | { | ||
97 | } | ||
98 | |||
99 | /* This is called when a task became a real-time task, either due | ||
100 | * to a SCHED_* class transition or due to PI mutex inheritance.\ | ||
101 | * We don't handle Linux PI mutex inheritance yet. Use LITMUS provided | ||
102 | * synchronization primitives instead. | ||
103 | */ | ||
104 | static void set_curr_task_litmus(struct rq *rq) | ||
105 | { | ||
106 | rq->curr->rt_param.job_params.exec_start = sched_clock(); | ||
107 | } | ||
108 | |||
109 | |||
110 | #ifdef CONFIG_SMP | ||
111 | |||
112 | /* we don't repartition at runtime */ | ||
113 | |||
114 | static unsigned long | ||
115 | load_balance_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
116 | unsigned long max_load_move, | ||
117 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
118 | int *all_pinned, int *this_best_prio) | ||
119 | { | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | static int | ||
124 | move_one_task_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
125 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
126 | { | ||
127 | return 0; | ||
128 | } | ||
129 | #endif | ||
130 | |||
131 | const struct sched_class litmus_sched_class = { | ||
132 | .next = &rt_sched_class, | ||
133 | .enqueue_task = enqueue_task_litmus, | ||
134 | .dequeue_task = dequeue_task_litmus, | ||
135 | .yield_task = yield_task_litmus, | ||
136 | |||
137 | .check_preempt_curr = check_preempt_curr_litmus, | ||
138 | |||
139 | .pick_next_task = pick_next_task_litmus, | ||
140 | .put_prev_task = put_prev_task_litmus, | ||
141 | |||
142 | #ifdef CONFIG_SMP | ||
143 | .load_balance = load_balance_litmus, | ||
144 | .move_one_task = move_one_task_litmus, | ||
145 | #endif | ||
146 | |||
147 | .set_curr_task = set_curr_task_litmus, | ||
148 | .task_tick = task_tick_litmus, | ||
149 | }; | ||
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c new file mode 100644 index 0000000000..f7eb116ee4 --- /dev/null +++ b/litmus/sched_plugin.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* sched_plugin.c -- core infrastructure for the scheduler plugin system | ||
2 | * | ||
3 | * This file includes the initialization of the plugin system, the no-op Linux | ||
4 | * scheduler plugin and some dummy functions. | ||
5 | */ | ||
6 | |||
7 | #include <linux/list.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | |||
10 | #include <litmus/litmus.h> | ||
11 | #include <litmus/sched_plugin.h> | ||
12 | |||
13 | |||
14 | /************************************************************* | ||
15 | * Dummy plugin functions * | ||
16 | *************************************************************/ | ||
17 | |||
18 | static void litmus_dummy_finish_switch(struct task_struct * prev) | ||
19 | { | ||
20 | } | ||
21 | |||
22 | static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) | ||
23 | { | ||
24 | return NULL; | ||
25 | } | ||
26 | |||
27 | static void litmus_dummy_tick(struct task_struct* tsk) | ||
28 | { | ||
29 | } | ||
30 | |||
31 | static long litmus_dummy_admit_task(struct task_struct* tsk) | ||
32 | { | ||
33 | printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", | ||
34 | tsk->comm, tsk->pid); | ||
35 | return -EINVAL; | ||
36 | } | ||
37 | |||
38 | static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) | ||
39 | { | ||
40 | } | ||
41 | |||
42 | static void litmus_dummy_task_wake_up(struct task_struct *task) | ||
43 | { | ||
44 | } | ||
45 | |||
46 | static void litmus_dummy_task_block(struct task_struct *task) | ||
47 | { | ||
48 | } | ||
49 | |||
50 | static void litmus_dummy_task_exit(struct task_struct *task) | ||
51 | { | ||
52 | } | ||
53 | |||
54 | static long litmus_dummy_complete_job(void) | ||
55 | { | ||
56 | return -ENOSYS; | ||
57 | } | ||
58 | |||
59 | static long litmus_dummy_inherit_priority(struct pi_semaphore *sem, | ||
60 | struct task_struct *new_owner) | ||
61 | { | ||
62 | return -ENOSYS; | ||
63 | } | ||
64 | |||
65 | static long litmus_dummy_return_priority(struct pi_semaphore *sem) | ||
66 | { | ||
67 | return -ENOSYS; | ||
68 | } | ||
69 | |||
70 | static long litmus_dummy_pi_block(struct pi_semaphore *sem, | ||
71 | struct task_struct *new_waiter) | ||
72 | { | ||
73 | return -ENOSYS; | ||
74 | } | ||
75 | |||
76 | |||
77 | |||
78 | /* The default scheduler plugin. It doesn't do anything and lets Linux do its | ||
79 | * job. | ||
80 | */ | ||
81 | struct sched_plugin linux_sched_plugin = { | ||
82 | .plugin_name = "Linux", | ||
83 | .tick = litmus_dummy_tick, | ||
84 | .task_new = litmus_dummy_task_new, | ||
85 | .task_exit = litmus_dummy_task_exit, | ||
86 | .task_wake_up = litmus_dummy_task_wake_up, | ||
87 | .task_block = litmus_dummy_task_block, | ||
88 | .complete_job = litmus_dummy_complete_job, | ||
89 | .schedule = litmus_dummy_schedule, | ||
90 | .finish_switch = litmus_dummy_finish_switch, | ||
91 | .inherit_priority = litmus_dummy_inherit_priority, | ||
92 | .return_priority = litmus_dummy_return_priority, | ||
93 | .pi_block = litmus_dummy_pi_block, | ||
94 | .admit_task = litmus_dummy_admit_task | ||
95 | }; | ||
96 | |||
97 | /* | ||
98 | * The reference to current plugin that is used to schedule tasks within | ||
99 | * the system. It stores references to actual function implementations | ||
100 | * Should be initialized by calling "init_***_plugin()" | ||
101 | */ | ||
102 | struct sched_plugin *litmus = &linux_sched_plugin; | ||
103 | |||
104 | /* the list of registered scheduling plugins */ | ||
105 | static LIST_HEAD(sched_plugins); | ||
106 | static DEFINE_SPINLOCK(sched_plugins_lock); | ||
107 | |||
108 | #define CHECK(func) {\ | ||
109 | if (!plugin->func) \ | ||
110 | plugin->func = litmus_dummy_ ## func;} | ||
111 | |||
112 | /* FIXME: get reference to module */ | ||
113 | int register_sched_plugin(struct sched_plugin* plugin) | ||
114 | { | ||
115 | printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", | ||
116 | plugin->plugin_name); | ||
117 | |||
118 | /* make sure we don't trip over null pointers later */ | ||
119 | CHECK(finish_switch); | ||
120 | CHECK(schedule); | ||
121 | CHECK(tick); | ||
122 | CHECK(task_wake_up); | ||
123 | CHECK(task_exit); | ||
124 | CHECK(task_block); | ||
125 | CHECK(task_new); | ||
126 | CHECK(complete_job); | ||
127 | CHECK(inherit_priority); | ||
128 | CHECK(return_priority); | ||
129 | CHECK(pi_block); | ||
130 | CHECK(admit_task); | ||
131 | |||
132 | spin_lock(&sched_plugins_lock); | ||
133 | list_add(&plugin->list, &sched_plugins); | ||
134 | spin_unlock(&sched_plugins_lock); | ||
135 | |||
136 | return 0; | ||
137 | } | ||
138 | |||
139 | |||
140 | /* FIXME: reference counting, etc. */ | ||
141 | struct sched_plugin* find_sched_plugin(const char* name) | ||
142 | { | ||
143 | struct list_head *pos; | ||
144 | struct sched_plugin *plugin; | ||
145 | |||
146 | spin_lock(&sched_plugins_lock); | ||
147 | list_for_each(pos, &sched_plugins) { | ||
148 | plugin = list_entry(pos, struct sched_plugin, list); | ||
149 | if (!strcmp(plugin->plugin_name, name)) | ||
150 | goto out_unlock; | ||
151 | } | ||
152 | plugin = NULL; | ||
153 | |||
154 | out_unlock: | ||
155 | spin_unlock(&sched_plugins_lock); | ||
156 | return plugin; | ||
157 | } | ||
158 | |||
159 | int print_sched_plugins(char* buf, int max) | ||
160 | { | ||
161 | int count = 0; | ||
162 | struct list_head *pos; | ||
163 | struct sched_plugin *plugin; | ||
164 | |||
165 | spin_lock(&sched_plugins_lock); | ||
166 | list_for_each(pos, &sched_plugins) { | ||
167 | plugin = list_entry(pos, struct sched_plugin, list); | ||
168 | count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name); | ||
169 | if (max - count <= 0) | ||
170 | break; | ||
171 | } | ||
172 | spin_unlock(&sched_plugins_lock); | ||
173 | return count; | ||
174 | } | ||
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 0000000000..961680d0a6 --- /dev/null +++ b/litmus/sched_psn_edf.c | |||
@@ -0,0 +1,440 @@ | |||
1 | |||
2 | /* | ||
3 | * kernel/sched_psn_edf.c | ||
4 | * | ||
5 | * Implementation of the PSN-EDF scheduler plugin. | ||
6 | * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c. | ||
7 | * | ||
8 | * Suspensions and non-preemptable sections are supported. | ||
9 | * Priority inheritance is not supported. | ||
10 | */ | ||
11 | |||
12 | #include <linux/percpu.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/list.h> | ||
15 | #include <linux/spinlock.h> | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | |||
19 | #include <litmus/litmus.h> | ||
20 | #include <litmus/sched_plugin.h> | ||
21 | #include <litmus/edf_common.h> | ||
22 | |||
23 | |||
24 | typedef struct { | ||
25 | rt_domain_t domain; | ||
26 | int cpu; | ||
27 | struct task_struct* scheduled; /* only RT tasks */ | ||
28 | spinlock_t lock; /* protects the domain and | ||
29 | * serializes scheduling decisions | ||
30 | */ | ||
31 | } psnedf_domain_t; | ||
32 | |||
33 | DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains); | ||
34 | |||
35 | #define local_edf (&__get_cpu_var(psnedf_domains).domain) | ||
36 | #define local_pedf (&__get_cpu_var(psnedf_domains)) | ||
37 | #define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain) | ||
38 | #define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu)) | ||
39 | #define task_edf(task) remote_edf(get_partition(task)) | ||
40 | #define task_pedf(task) remote_pedf(get_partition(task)) | ||
41 | |||
42 | |||
43 | static void psnedf_domain_init(psnedf_domain_t* pedf, | ||
44 | check_resched_needed_t check, | ||
45 | int cpu) | ||
46 | { | ||
47 | edf_domain_init(&pedf->domain, check); | ||
48 | pedf->cpu = cpu; | ||
49 | pedf->lock = SPIN_LOCK_UNLOCKED; | ||
50 | pedf->scheduled = NULL; | ||
51 | } | ||
52 | |||
53 | static void requeue(struct task_struct* t, rt_domain_t *edf) | ||
54 | { | ||
55 | /* only requeue if t is actually running */ | ||
56 | BUG_ON(!is_running(t)); | ||
57 | |||
58 | if (t->state != TASK_RUNNING) | ||
59 | TRACE_TASK(t, "requeue: !TASK_RUNNING"); | ||
60 | |||
61 | set_rt_flags(t, RT_F_RUNNING); | ||
62 | if (is_released(t, sched_clock())) | ||
63 | __add_ready(edf, t); | ||
64 | else | ||
65 | __add_release(edf, t); /* it has got to wait */ | ||
66 | } | ||
67 | |||
68 | /* we assume the lock is being held */ | ||
69 | static void preempt(psnedf_domain_t *pedf) | ||
70 | { | ||
71 | if (smp_processor_id() == pedf->cpu) { | ||
72 | if (pedf->scheduled && is_np(pedf->scheduled)) | ||
73 | request_exit_np(pedf->scheduled); | ||
74 | else | ||
75 | set_tsk_need_resched(current); | ||
76 | } else | ||
77 | /* in case that it is a remote CPU we have to defer the | ||
78 | * the decision to the remote CPU | ||
79 | */ | ||
80 | smp_send_reschedule(pedf->cpu); | ||
81 | } | ||
82 | |||
83 | /* This check is trivial in partioned systems as we only have to consider | ||
84 | * the CPU of the partition. | ||
85 | */ | ||
86 | static int psnedf_check_resched(rt_domain_t *edf) | ||
87 | { | ||
88 | psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain); | ||
89 | int ret = 0; | ||
90 | |||
91 | /* because this is a callback from rt_domain_t we already hold | ||
92 | * the necessary lock for the ready queue | ||
93 | */ | ||
94 | if (edf_preemption_needed(edf, pedf->scheduled)) { | ||
95 | preempt(pedf); | ||
96 | ret = 1; | ||
97 | } | ||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | |||
102 | static void psnedf_tick(struct task_struct *t) | ||
103 | { | ||
104 | unsigned long flags; | ||
105 | rt_domain_t *edf = local_edf; | ||
106 | psnedf_domain_t *pedf = local_pedf; | ||
107 | |||
108 | /* Check for inconsistency. We don't need the lock for this since | ||
109 | * ->scheduled is only changed in schedule, which obviously is not | ||
110 | * executing in parallel on this CPU | ||
111 | */ | ||
112 | BUG_ON(is_realtime(t) && t != pedf->scheduled); | ||
113 | |||
114 | if (is_realtime(t) && budget_exhausted(t)) { | ||
115 | if (!is_np(t)) | ||
116 | set_tsk_need_resched(t); | ||
117 | else { | ||
118 | TRACE("psnedf_scheduler_tick: " | ||
119 | "%d is non-preemptable, " | ||
120 | "preemption delayed.\n", t->pid); | ||
121 | request_exit_np(t); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | spin_lock_irqsave(&pedf->lock, flags); | ||
126 | /* FIXME: release via hrtimer */ | ||
127 | __release_pending(edf); | ||
128 | spin_unlock_irqrestore(&pedf->lock, flags); | ||
129 | } | ||
130 | |||
131 | static void job_completion(struct task_struct* t) | ||
132 | { | ||
133 | TRACE_TASK(t, "job_completion().\n"); | ||
134 | set_rt_flags(t, RT_F_SLEEP); | ||
135 | edf_prepare_for_next_period(t); | ||
136 | } | ||
137 | |||
138 | static struct task_struct* psnedf_schedule(struct task_struct * prev) | ||
139 | { | ||
140 | psnedf_domain_t* pedf = local_pedf; | ||
141 | rt_domain_t* edf = &pedf->domain; | ||
142 | struct task_struct* next; | ||
143 | |||
144 | int out_of_time, sleep, preempt, | ||
145 | np, exists, blocks, resched; | ||
146 | |||
147 | spin_lock(&pedf->lock); | ||
148 | |||
149 | /* sanity checking */ | ||
150 | BUG_ON(pedf->scheduled && pedf->scheduled != prev); | ||
151 | BUG_ON(pedf->scheduled && !is_realtime(prev)); | ||
152 | |||
153 | /* (0) Determine state */ | ||
154 | exists = pedf->scheduled != NULL; | ||
155 | blocks = exists && !is_running(pedf->scheduled); | ||
156 | out_of_time = exists && budget_exhausted(pedf->scheduled); | ||
157 | np = exists && is_np(pedf->scheduled); | ||
158 | sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP; | ||
159 | preempt = edf_preemption_needed(edf, prev); | ||
160 | |||
161 | /* If we need to preempt do so. | ||
162 | * The following checks set resched to 1 in case of special | ||
163 | * circumstances. | ||
164 | */ | ||
165 | resched = preempt; | ||
166 | |||
167 | /* If a task blocks we have no choice but to reschedule. | ||
168 | */ | ||
169 | if (blocks) | ||
170 | resched = 1; | ||
171 | |||
172 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
173 | * Multiple calls to request_exit_np() don't hurt. | ||
174 | */ | ||
175 | if (np && (out_of_time || preempt || sleep)) | ||
176 | request_exit_np(pedf->scheduled); | ||
177 | |||
178 | /* Any task that is preemptable and either exhausts its execution | ||
179 | * budget or wants to sleep completes. We may have to reschedule after | ||
180 | * this. | ||
181 | */ | ||
182 | if (!np && (out_of_time || sleep)) { | ||
183 | job_completion(pedf->scheduled); | ||
184 | resched = 1; | ||
185 | } | ||
186 | |||
187 | /* The final scheduling decision. Do we need to switch for some reason? | ||
188 | * Switch if we are in RT mode and have no task or if we need to | ||
189 | * resched. | ||
190 | */ | ||
191 | next = NULL; | ||
192 | if ((!np || blocks) && (resched || !exists)) { | ||
193 | /* Take care of a previously scheduled | ||
194 | * job by taking it out of the Linux runqueue. | ||
195 | */ | ||
196 | if (pedf->scheduled && !blocks) | ||
197 | requeue(pedf->scheduled, edf); | ||
198 | next = __take_ready(edf); | ||
199 | } else | ||
200 | /* Only override Linux scheduler if we have a real-time task | ||
201 | * scheduled that needs to continue. | ||
202 | */ | ||
203 | if (exists) | ||
204 | next = prev; | ||
205 | |||
206 | if (next) | ||
207 | set_rt_flags(next, RT_F_RUNNING); | ||
208 | |||
209 | pedf->scheduled = next; | ||
210 | spin_unlock(&pedf->lock); | ||
211 | return next; | ||
212 | } | ||
213 | |||
214 | |||
215 | /* Prepare a task for running in RT mode | ||
216 | * Enqueues the task into master queue data structure | ||
217 | */ | ||
218 | static void psnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
219 | { | ||
220 | rt_domain_t* edf = task_edf(t); | ||
221 | psnedf_domain_t* pedf = task_pedf(t); | ||
222 | unsigned long flags; | ||
223 | |||
224 | TRACE("[%d] psn edf: prepare new %d on CPU %d\n", | ||
225 | smp_processor_id(), t->pid, get_partition(t)); | ||
226 | |||
227 | /* setup job parameters */ | ||
228 | edf_release_at(t, sched_clock()); | ||
229 | |||
230 | /* The task should be running in the queue, otherwise signal | ||
231 | * code will try to wake it up with fatal consequences. | ||
232 | */ | ||
233 | spin_lock_irqsave(&pedf->lock, flags); | ||
234 | if (running) { | ||
235 | /* there shouldn't be anything else running at the time */ | ||
236 | BUG_ON(pedf->scheduled); | ||
237 | pedf->scheduled = t; | ||
238 | } else { | ||
239 | requeue(t, edf); | ||
240 | /* maybe we have to reschedule */ | ||
241 | preempt(pedf); | ||
242 | } | ||
243 | spin_unlock_irqrestore(&pedf->lock, flags); | ||
244 | } | ||
245 | |||
246 | static void psnedf_task_wake_up(struct task_struct *task) | ||
247 | { | ||
248 | unsigned long flags; | ||
249 | psnedf_domain_t* pedf = task_pedf(task); | ||
250 | rt_domain_t* edf = task_edf(task); | ||
251 | lt_t now; | ||
252 | |||
253 | spin_lock_irqsave(&pedf->lock, flags); | ||
254 | BUG_ON(in_list(&task->rt_list)); | ||
255 | /* We need to take suspensions because of semaphores into | ||
256 | * account! If a job resumes after being suspended due to acquiring | ||
257 | * a semaphore, it should never be treated as a new job release. | ||
258 | * | ||
259 | * FIXME: This should be done in some more predictable and userspace-controlled way. | ||
260 | */ | ||
261 | now = sched_clock(); | ||
262 | if (is_tardy(task, now) && | ||
263 | get_rt_flags(task) != RT_F_EXIT_SEM) { | ||
264 | /* new sporadic release */ | ||
265 | edf_release_at(task, now); | ||
266 | sched_trace_job_release(task); | ||
267 | } | ||
268 | requeue(task, edf); | ||
269 | spin_unlock_irqrestore(&pedf->lock, flags); | ||
270 | } | ||
271 | |||
272 | static void psnedf_task_block(struct task_struct *t) | ||
273 | { | ||
274 | /* only running tasks can block, thus t is in no queue */ | ||
275 | BUG_ON(!is_realtime(t)); | ||
276 | BUG_ON(in_list(&t->rt_list)); | ||
277 | } | ||
278 | |||
279 | static void psnedf_task_exit(struct task_struct * t) | ||
280 | { | ||
281 | unsigned long flags; | ||
282 | psnedf_domain_t* pedf = task_pedf(t); | ||
283 | |||
284 | spin_lock_irqsave(&pedf->lock, flags); | ||
285 | |||
286 | if (in_list(&t->rt_list)) | ||
287 | /* dequeue */ | ||
288 | list_del(&t->rt_list); | ||
289 | preempt(pedf); | ||
290 | spin_unlock_irqrestore(&pedf->lock, flags); | ||
291 | } | ||
292 | |||
293 | static long psnedf_pi_block(struct pi_semaphore *sem, | ||
294 | struct task_struct *new_waiter) | ||
295 | { | ||
296 | psnedf_domain_t* pedf; | ||
297 | rt_domain_t* edf; | ||
298 | struct task_struct* t; | ||
299 | int cpu = get_partition(new_waiter); | ||
300 | |||
301 | BUG_ON(!new_waiter); | ||
302 | |||
303 | if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) { | ||
304 | TRACE_TASK(new_waiter, " boosts priority\n"); | ||
305 | pedf = task_pedf(new_waiter); | ||
306 | edf = task_edf(new_waiter); | ||
307 | |||
308 | /* interrupts already disabled */ | ||
309 | spin_lock(&pedf->lock); | ||
310 | |||
311 | /* store new highest-priority task */ | ||
312 | sem->hp.cpu_task[cpu] = new_waiter; | ||
313 | if (sem->holder && | ||
314 | get_partition(sem->holder) == get_partition(new_waiter)) { | ||
315 | /* let holder inherit */ | ||
316 | sem->holder->rt_param.inh_task = new_waiter; | ||
317 | t = sem->holder; | ||
318 | if (in_list(&t->rt_list)) { | ||
319 | /* queued in domain*/ | ||
320 | list_del(&t->rt_list); | ||
321 | /* readd to make priority change take place */ | ||
322 | if (is_released(t, sched_clock())) | ||
323 | __add_ready(edf, t); | ||
324 | else | ||
325 | __add_release(edf, t); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | /* check if we need to reschedule */ | ||
330 | if (edf_preemption_needed(edf, current)) | ||
331 | preempt(pedf); | ||
332 | |||
333 | spin_unlock(&pedf->lock); | ||
334 | } | ||
335 | |||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | static long psnedf_inherit_priority(struct pi_semaphore *sem, | ||
340 | struct task_struct *new_owner) | ||
341 | { | ||
342 | int cpu = get_partition(new_owner); | ||
343 | |||
344 | new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu]; | ||
345 | if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) { | ||
346 | TRACE_TASK(new_owner, | ||
347 | "inherited priority from %s/%d\n", | ||
348 | sem->hp.cpu_task[cpu]->comm, | ||
349 | sem->hp.cpu_task[cpu]->pid); | ||
350 | } else | ||
351 | TRACE_TASK(new_owner, | ||
352 | "cannot inherit priority: " | ||
353 | "no higher priority job waits on this CPU!\n"); | ||
354 | /* make new owner non-preemptable as required by FMLP under | ||
355 | * PSN-EDF. | ||
356 | */ | ||
357 | make_np(new_owner); | ||
358 | return 0; | ||
359 | } | ||
360 | |||
361 | |||
362 | /* This function is called on a semaphore release, and assumes that | ||
363 | * the current task is also the semaphore holder. | ||
364 | */ | ||
365 | static long psnedf_return_priority(struct pi_semaphore *sem) | ||
366 | { | ||
367 | struct task_struct* t = current; | ||
368 | psnedf_domain_t* pedf = task_pedf(t); | ||
369 | rt_domain_t* edf = task_edf(t); | ||
370 | int ret = 0; | ||
371 | int cpu = get_partition(current); | ||
372 | |||
373 | |||
374 | /* Find new highest-priority semaphore task | ||
375 | * if holder task is the current hp.cpu_task[cpu]. | ||
376 | * | ||
377 | * Calling function holds sem->wait.lock. | ||
378 | */ | ||
379 | if (t == sem->hp.cpu_task[cpu]) | ||
380 | edf_set_hp_cpu_task(sem, cpu); | ||
381 | |||
382 | take_np(t); | ||
383 | if (current->rt_param.inh_task) { | ||
384 | TRACE_CUR("return priority of %s/%d\n", | ||
385 | current->rt_param.inh_task->comm, | ||
386 | current->rt_param.inh_task->pid); | ||
387 | spin_lock(&pedf->lock); | ||
388 | |||
389 | /* Reset inh_task to NULL. */ | ||
390 | current->rt_param.inh_task = NULL; | ||
391 | |||
392 | /* check if we need to reschedule */ | ||
393 | if (edf_preemption_needed(edf, current)) | ||
394 | preempt(pedf); | ||
395 | |||
396 | spin_unlock(&pedf->lock); | ||
397 | } else | ||
398 | TRACE_CUR(" no priority to return %p\n", sem); | ||
399 | |||
400 | return ret; | ||
401 | } | ||
402 | |||
403 | |||
404 | static long psnedf_admit_task(struct task_struct* tsk) | ||
405 | { | ||
406 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | ||
407 | } | ||
408 | |||
409 | /* Plugin object */ | ||
410 | static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = { | ||
411 | .plugin_name = "PSN-EDF", | ||
412 | .tick = psnedf_tick, | ||
413 | .task_new = psnedf_task_new, | ||
414 | .complete_job = edf_complete_job, | ||
415 | .task_exit = psnedf_task_exit, | ||
416 | .schedule = psnedf_schedule, | ||
417 | .task_wake_up = psnedf_task_wake_up, | ||
418 | .task_block = psnedf_task_block, | ||
419 | .pi_block = psnedf_pi_block, | ||
420 | .inherit_priority = psnedf_inherit_priority, | ||
421 | .return_priority = psnedf_return_priority, | ||
422 | .admit_task = psnedf_admit_task | ||
423 | }; | ||
424 | |||
425 | |||
426 | static int __init init_psn_edf(void) | ||
427 | { | ||
428 | int i; | ||
429 | |||
430 | for (i = 0; i < NR_CPUS; i++) | ||
431 | { | ||
432 | psnedf_domain_init(remote_pedf(i), | ||
433 | psnedf_check_resched, i); | ||
434 | } | ||
435 | return register_sched_plugin(&psn_edf_plugin); | ||
436 | } | ||
437 | |||
438 | |||
439 | |||
440 | module_init(init_psn_edf); | ||
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c new file mode 100644 index 0000000000..0976e830ad --- /dev/null +++ b/litmus/sched_trace.c | |||
@@ -0,0 +1,541 @@ | |||
1 | /* sched_trace.c -- record scheduling events to a byte stream. | ||
2 | * | ||
3 | * TODO: Move ring buffer to a lockfree implementation. | ||
4 | */ | ||
5 | |||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/fs.h> | ||
8 | #include <linux/cdev.h> | ||
9 | #include <asm/semaphore.h> | ||
10 | #include <asm/uaccess.h> | ||
11 | #include <linux/module.h> | ||
12 | |||
13 | #include <litmus/sched_trace.h> | ||
14 | #include <litmus/litmus.h> | ||
15 | |||
16 | |||
17 | typedef struct { | ||
18 | /* guard read and write pointers */ | ||
19 | spinlock_t lock; | ||
20 | /* guard against concurrent freeing of buffer */ | ||
21 | rwlock_t del_lock; | ||
22 | |||
23 | /* memory allocated for ring buffer */ | ||
24 | unsigned long order; | ||
25 | char* buf; | ||
26 | char* end; | ||
27 | |||
28 | /* Read/write pointer. May not cross. | ||
29 | * They point to the position of next write and | ||
30 | * last read. | ||
31 | */ | ||
32 | char* writep; | ||
33 | char* readp; | ||
34 | |||
35 | } ring_buffer_t; | ||
36 | |||
37 | #define EMPTY_RING_BUFFER { \ | ||
38 | .lock = SPIN_LOCK_UNLOCKED, \ | ||
39 | .del_lock = RW_LOCK_UNLOCKED, \ | ||
40 | .buf = NULL, \ | ||
41 | .end = NULL, \ | ||
42 | .writep = NULL, \ | ||
43 | .readp = NULL \ | ||
44 | } | ||
45 | |||
46 | void rb_init(ring_buffer_t* buf) | ||
47 | { | ||
48 | *buf = (ring_buffer_t) EMPTY_RING_BUFFER; | ||
49 | } | ||
50 | |||
51 | int rb_alloc_buf(ring_buffer_t* buf, unsigned long order) | ||
52 | { | ||
53 | unsigned long flags; | ||
54 | int error = 0; | ||
55 | char *mem; | ||
56 | |||
57 | /* do memory allocation while not atomic */ | ||
58 | mem = (char *) __get_free_pages(GFP_KERNEL, order); | ||
59 | if (!mem) | ||
60 | return -ENOMEM; | ||
61 | write_lock_irqsave(&buf->del_lock, flags); | ||
62 | BUG_ON(buf->buf); | ||
63 | buf->buf = mem; | ||
64 | buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1; | ||
65 | memset(buf->buf, 0xff, buf->end - buf->buf); | ||
66 | buf->order = order; | ||
67 | buf->writep = buf->buf + 1; | ||
68 | buf->readp = buf->buf; | ||
69 | write_unlock_irqrestore(&buf->del_lock, flags); | ||
70 | return error; | ||
71 | } | ||
72 | |||
73 | int rb_free_buf(ring_buffer_t* buf) | ||
74 | { | ||
75 | unsigned long flags; | ||
76 | int error = 0; | ||
77 | write_lock_irqsave(&buf->del_lock, flags); | ||
78 | BUG_ON(!buf->buf); | ||
79 | free_pages((unsigned long) buf->buf, buf->order); | ||
80 | buf->buf = NULL; | ||
81 | buf->end = NULL; | ||
82 | buf->writep = NULL; | ||
83 | buf->readp = NULL; | ||
84 | write_unlock_irqrestore(&buf->del_lock, flags); | ||
85 | return error; | ||
86 | } | ||
87 | |||
88 | /* Assumption: concurrent writes are serialized externally | ||
89 | * | ||
90 | * Will only succeed if there is enough space for all len bytes. | ||
91 | */ | ||
92 | int rb_put(ring_buffer_t* buf, char* mem, size_t len) | ||
93 | { | ||
94 | unsigned long flags; | ||
95 | char* r , *w; | ||
96 | int error = 0; | ||
97 | read_lock_irqsave(&buf->del_lock, flags); | ||
98 | if (!buf->buf) { | ||
99 | error = -ENODEV; | ||
100 | goto out; | ||
101 | } | ||
102 | spin_lock(&buf->lock); | ||
103 | r = buf->readp; | ||
104 | w = buf->writep; | ||
105 | spin_unlock(&buf->lock); | ||
106 | if (r < w && buf->end - w >= len - 1) { | ||
107 | /* easy case: there is enough space in the buffer | ||
108 | * to write it in one continous chunk*/ | ||
109 | memcpy(w, mem, len); | ||
110 | w += len; | ||
111 | if (w > buf->end) | ||
112 | /* special case: fit exactly into buffer | ||
113 | * w is now buf->end + 1 | ||
114 | */ | ||
115 | w = buf->buf; | ||
116 | } else if (w < r && r - w >= len) { /* >= len because may not cross */ | ||
117 | /* we are constrained by the read pointer but we there | ||
118 | * is enough space | ||
119 | */ | ||
120 | memcpy(w, mem, len); | ||
121 | w += len; | ||
122 | } else if (r <= w && buf->end - w < len - 1) { | ||
123 | /* the wrap around case: there may or may not be space */ | ||
124 | if ((buf->end - w) + (r - buf->buf) >= len - 1) { | ||
125 | /* copy chunk that fits at the end */ | ||
126 | memcpy(w, mem, buf->end - w + 1); | ||
127 | mem += buf->end - w + 1; | ||
128 | len -= (buf->end - w + 1); | ||
129 | w = buf->buf; | ||
130 | /* copy the rest */ | ||
131 | memcpy(w, mem, len); | ||
132 | w += len; | ||
133 | } | ||
134 | else | ||
135 | error = -ENOMEM; | ||
136 | } else { | ||
137 | error = -ENOMEM; | ||
138 | } | ||
139 | if (!error) { | ||
140 | spin_lock(&buf->lock); | ||
141 | buf->writep = w; | ||
142 | spin_unlock(&buf->lock); | ||
143 | } | ||
144 | out: | ||
145 | read_unlock_irqrestore(&buf->del_lock, flags); | ||
146 | return error; | ||
147 | } | ||
148 | |||
149 | /* Assumption: concurrent reads are serialized externally */ | ||
150 | int rb_get(ring_buffer_t* buf, char* mem, size_t len) | ||
151 | { | ||
152 | unsigned long flags; | ||
153 | char* r , *w; | ||
154 | int error = 0; | ||
155 | read_lock_irqsave(&buf->del_lock, flags); | ||
156 | if (!buf->buf) { | ||
157 | error = -ENODEV; | ||
158 | goto out; | ||
159 | } | ||
160 | spin_lock(&buf->lock); | ||
161 | r = buf->readp; | ||
162 | w = buf->writep; | ||
163 | spin_unlock(&buf->lock); | ||
164 | |||
165 | if (w <= r && buf->end - r >= len) { | ||
166 | /* easy case: there is enough data in the buffer | ||
167 | * to get it in one chunk*/ | ||
168 | memcpy(mem, r + 1, len); | ||
169 | r += len; | ||
170 | error = len; | ||
171 | |||
172 | } else if (r + 1 < w && w - r - 1 >= len) { | ||
173 | /* we are constrained by the write pointer but | ||
174 | * there is enough data | ||
175 | */ | ||
176 | memcpy(mem, r + 1, len); | ||
177 | r += len; | ||
178 | error = len; | ||
179 | |||
180 | } else if (r + 1 < w && w - r - 1 < len) { | ||
181 | /* we are constrained by the write pointer and there | ||
182 | * there is not enough data | ||
183 | */ | ||
184 | memcpy(mem, r + 1, w - r - 1); | ||
185 | error = w - r - 1; | ||
186 | r += w - r - 1; | ||
187 | |||
188 | } else if (w <= r && buf->end - r < len) { | ||
189 | /* the wrap around case: there may or may not be enough data | ||
190 | * first let's get what is available | ||
191 | */ | ||
192 | memcpy(mem, r + 1, buf->end - r); | ||
193 | error += (buf->end - r); | ||
194 | mem += (buf->end - r); | ||
195 | len -= (buf->end - r); | ||
196 | r += (buf->end - r); | ||
197 | |||
198 | if (w > buf->buf) { | ||
199 | /* there is more to get */ | ||
200 | r = buf->buf - 1; | ||
201 | if (w - r >= len) { | ||
202 | /* plenty */ | ||
203 | memcpy(mem, r + 1, len); | ||
204 | error += len; | ||
205 | r += len; | ||
206 | } else { | ||
207 | memcpy(mem, r + 1, w - r - 1); | ||
208 | error += w - r - 1; | ||
209 | r += w - r - 1; | ||
210 | } | ||
211 | } | ||
212 | } /* nothing available */ | ||
213 | |||
214 | if (error > 0) { | ||
215 | spin_lock(&buf->lock); | ||
216 | buf->readp = r; | ||
217 | spin_unlock(&buf->lock); | ||
218 | } | ||
219 | out: | ||
220 | read_unlock_irqrestore(&buf->del_lock, flags); | ||
221 | return error; | ||
222 | } | ||
223 | |||
224 | |||
225 | |||
226 | /******************************************************************************/ | ||
227 | /* DEVICE FILE DRIVER */ | ||
228 | /******************************************************************************/ | ||
229 | |||
230 | |||
231 | |||
232 | /* Allocate a buffer of about 1 MB per CPU. | ||
233 | * | ||
234 | */ | ||
235 | #define BUFFER_ORDER 8 | ||
236 | |||
237 | typedef struct { | ||
238 | ring_buffer_t buf; | ||
239 | atomic_t reader_cnt; | ||
240 | struct semaphore reader_mutex; | ||
241 | } trace_buffer_t; | ||
242 | |||
243 | |||
244 | /* This does not initialize the semaphore!! */ | ||
245 | |||
246 | #define EMPTY_TRACE_BUFFER \ | ||
247 | { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)} | ||
248 | |||
249 | static DEFINE_PER_CPU(trace_buffer_t, trace_buffer); | ||
250 | |||
251 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
252 | static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED; | ||
253 | #endif | ||
254 | static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER; | ||
255 | |||
256 | static void init_buffers(void) | ||
257 | { | ||
258 | int i; | ||
259 | |||
260 | for (i = 0; i < NR_CPUS; i++) { | ||
261 | rb_init(&per_cpu(trace_buffer, i).buf); | ||
262 | init_MUTEX(&per_cpu(trace_buffer, i).reader_mutex); | ||
263 | atomic_set(&per_cpu(trace_buffer, i).reader_cnt, 0); | ||
264 | } | ||
265 | /* only initialize the mutex, the rest was initialized as part | ||
266 | * of the static initialization macro | ||
267 | */ | ||
268 | init_MUTEX(&log_buffer.reader_mutex); | ||
269 | } | ||
270 | |||
271 | static int trace_release(struct inode *in, struct file *filp) | ||
272 | { | ||
273 | int error = -EINVAL; | ||
274 | trace_buffer_t* buf = filp->private_data; | ||
275 | |||
276 | BUG_ON(!filp->private_data); | ||
277 | |||
278 | if (down_interruptible(&buf->reader_mutex)) { | ||
279 | error = -ERESTARTSYS; | ||
280 | goto out; | ||
281 | } | ||
282 | |||
283 | /* last release must deallocate buffers */ | ||
284 | if (atomic_dec_return(&buf->reader_cnt) == 0) { | ||
285 | error = rb_free_buf(&buf->buf); | ||
286 | } | ||
287 | |||
288 | up(&buf->reader_mutex); | ||
289 | out: | ||
290 | return error; | ||
291 | } | ||
292 | |||
293 | static ssize_t trace_read(struct file *filp, char __user *to, size_t len, | ||
294 | loff_t *f_pos) | ||
295 | { | ||
296 | /* we ignore f_pos, this is strictly sequential */ | ||
297 | |||
298 | ssize_t error = -EINVAL; | ||
299 | char* mem; | ||
300 | trace_buffer_t *buf = filp->private_data; | ||
301 | |||
302 | if (down_interruptible(&buf->reader_mutex)) { | ||
303 | error = -ERESTARTSYS; | ||
304 | goto out; | ||
305 | } | ||
306 | |||
307 | if (len > 64 * 1024) | ||
308 | len = 64 * 1024; | ||
309 | mem = kmalloc(len, GFP_KERNEL); | ||
310 | if (!mem) { | ||
311 | error = -ENOMEM; | ||
312 | goto out_unlock; | ||
313 | } | ||
314 | |||
315 | error = rb_get(&buf->buf, mem, len); | ||
316 | while (!error) { | ||
317 | set_current_state(TASK_INTERRUPTIBLE); | ||
318 | schedule_timeout(110); | ||
319 | if (signal_pending(current)) | ||
320 | error = -ERESTARTSYS; | ||
321 | else | ||
322 | error = rb_get(&buf->buf, mem, len); | ||
323 | } | ||
324 | |||
325 | if (error > 0 && copy_to_user(to, mem, error)) | ||
326 | error = -EFAULT; | ||
327 | |||
328 | kfree(mem); | ||
329 | out_unlock: | ||
330 | up(&buf->reader_mutex); | ||
331 | out: | ||
332 | return error; | ||
333 | } | ||
334 | |||
335 | |||
336 | /* trace_open - Open one of the per-CPU sched_trace buffers. | ||
337 | */ | ||
338 | static int trace_open(struct inode *in, struct file *filp) | ||
339 | { | ||
340 | int error = -EINVAL; | ||
341 | int cpu = MINOR(in->i_rdev); | ||
342 | trace_buffer_t* buf; | ||
343 | |||
344 | if (!cpu_online(cpu)) { | ||
345 | printk(KERN_WARNING "sched trace: " | ||
346 | "CPU #%d is not online. (open failed)\n", cpu); | ||
347 | error = -ENODEV; | ||
348 | goto out; | ||
349 | } | ||
350 | |||
351 | buf = &per_cpu(trace_buffer, cpu); | ||
352 | |||
353 | if (down_interruptible(&buf->reader_mutex)) { | ||
354 | error = -ERESTARTSYS; | ||
355 | goto out; | ||
356 | } | ||
357 | |||
358 | /* first open must allocate buffers */ | ||
359 | if (atomic_inc_return(&buf->reader_cnt) == 1) { | ||
360 | if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER))) | ||
361 | { | ||
362 | atomic_dec(&buf->reader_cnt); | ||
363 | goto out_unlock; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | error = 0; | ||
368 | filp->private_data = buf; | ||
369 | |||
370 | out_unlock: | ||
371 | up(&buf->reader_mutex); | ||
372 | out: | ||
373 | return error; | ||
374 | } | ||
375 | |||
376 | /* log_open - open the global log message ring buffer. | ||
377 | */ | ||
378 | static int log_open(struct inode *in, struct file *filp) | ||
379 | { | ||
380 | int error = -EINVAL; | ||
381 | trace_buffer_t* buf; | ||
382 | |||
383 | buf = &log_buffer; | ||
384 | |||
385 | if (down_interruptible(&buf->reader_mutex)) { | ||
386 | error = -ERESTARTSYS; | ||
387 | goto out; | ||
388 | } | ||
389 | |||
390 | /* first open must allocate buffers */ | ||
391 | if (atomic_inc_return(&buf->reader_cnt) == 1) { | ||
392 | if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER))) | ||
393 | { | ||
394 | atomic_dec(&buf->reader_cnt); | ||
395 | goto out_unlock; | ||
396 | } | ||
397 | } | ||
398 | |||
399 | error = 0; | ||
400 | filp->private_data = buf; | ||
401 | |||
402 | out_unlock: | ||
403 | up(&buf->reader_mutex); | ||
404 | out: | ||
405 | return error; | ||
406 | } | ||
407 | |||
408 | /******************************************************************************/ | ||
409 | /* Device Registration */ | ||
410 | /******************************************************************************/ | ||
411 | |||
412 | /* the major numbes are from the unassigned/local use block | ||
413 | * | ||
414 | * This should be converted to dynamic allocation at some point... | ||
415 | */ | ||
416 | #define TRACE_MAJOR 250 | ||
417 | #define LOG_MAJOR 251 | ||
418 | |||
419 | /* trace_fops - The file operations for accessing the per-CPU scheduling event | ||
420 | * trace buffers. | ||
421 | */ | ||
422 | struct file_operations trace_fops = { | ||
423 | .owner = THIS_MODULE, | ||
424 | .open = trace_open, | ||
425 | .release = trace_release, | ||
426 | .read = trace_read, | ||
427 | }; | ||
428 | |||
429 | /* log_fops - The file operations for accessing the global LITMUS log message | ||
430 | * buffer. | ||
431 | * | ||
432 | * Except for opening the device file it uses the same operations as trace_fops. | ||
433 | */ | ||
434 | struct file_operations log_fops = { | ||
435 | .owner = THIS_MODULE, | ||
436 | .open = log_open, | ||
437 | .release = trace_release, | ||
438 | .read = trace_read, | ||
439 | }; | ||
440 | |||
441 | static int __init register_buffer_dev(const char* name, | ||
442 | struct file_operations* fops, | ||
443 | int major, int count) | ||
444 | { | ||
445 | dev_t trace_dev; | ||
446 | struct cdev *cdev; | ||
447 | int error = 0; | ||
448 | |||
449 | trace_dev = MKDEV(major, 0); | ||
450 | error = register_chrdev_region(trace_dev, count, name); | ||
451 | if (error) | ||
452 | { | ||
453 | printk(KERN_WARNING "sched trace: " | ||
454 | "Could not register major/minor number %d\n", major); | ||
455 | return error; | ||
456 | } | ||
457 | cdev = cdev_alloc(); | ||
458 | if (!cdev) { | ||
459 | printk(KERN_WARNING "sched trace: " | ||
460 | "Could not get a cdev for %s.\n", name); | ||
461 | return -ENOMEM; | ||
462 | } | ||
463 | cdev->owner = THIS_MODULE; | ||
464 | cdev->ops = fops; | ||
465 | error = cdev_add(cdev, trace_dev, count); | ||
466 | if (error) { | ||
467 | printk(KERN_WARNING "sched trace: " | ||
468 | "add_cdev failed for %s.\n", name); | ||
469 | return -ENOMEM; | ||
470 | } | ||
471 | return error; | ||
472 | |||
473 | } | ||
474 | |||
475 | static int __init init_sched_trace(void) | ||
476 | { | ||
477 | int error1 = 0, error2 = 0; | ||
478 | |||
479 | printk("Initializing scheduler trace device\n"); | ||
480 | init_buffers(); | ||
481 | |||
482 | error1 = register_buffer_dev("schedtrace", &trace_fops, | ||
483 | TRACE_MAJOR, NR_CPUS); | ||
484 | |||
485 | error2 = register_buffer_dev("litmus_log", &log_fops, | ||
486 | LOG_MAJOR, 1); | ||
487 | if (error1 || error2) | ||
488 | return min(error1, error2); | ||
489 | else | ||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | module_init(init_sched_trace); | ||
494 | |||
495 | /******************************************************************************/ | ||
496 | /* KERNEL API */ | ||
497 | /******************************************************************************/ | ||
498 | |||
499 | /* The per-CPU LITMUS log buffer. Don't put it on the stack, it is too big for | ||
500 | * that and the kernel gets very picky with nested interrupts and small stacks. | ||
501 | */ | ||
502 | |||
503 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
504 | |||
505 | #define MSG_SIZE 255 | ||
506 | static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer); | ||
507 | |||
508 | /* sched_trace_log_message - This is the only function that accesses the the | ||
509 | * log buffer inside the kernel for writing. | ||
510 | * Concurrent access to it is serialized via the | ||
511 | * log_buffer_lock. | ||
512 | * | ||
513 | * The maximum length of a formatted message is 255. | ||
514 | */ | ||
515 | void sched_trace_log_message(const char* fmt, ...) | ||
516 | { | ||
517 | unsigned long flags; | ||
518 | va_list args; | ||
519 | size_t len; | ||
520 | char* buf; | ||
521 | |||
522 | va_start(args, fmt); | ||
523 | local_irq_save(flags); | ||
524 | |||
525 | /* format message */ | ||
526 | buf = __get_cpu_var(fmt_buffer); | ||
527 | len = vscnprintf(buf, MSG_SIZE, fmt, args); | ||
528 | |||
529 | spin_lock(&log_buffer_lock); | ||
530 | /* Don't copy the trailing null byte, we don't want null bytes | ||
531 | * in a text file. | ||
532 | */ | ||
533 | rb_put(&log_buffer.buf, buf, len); | ||
534 | spin_unlock(&log_buffer_lock); | ||
535 | |||
536 | local_irq_restore(flags); | ||
537 | va_end(args); | ||
538 | } | ||
539 | |||
540 | #endif | ||
541 | |||
diff --git a/litmus/trace.c b/litmus/trace.c new file mode 100644 index 0000000000..90ef443bd9 --- /dev/null +++ b/litmus/trace.c | |||
@@ -0,0 +1,303 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/fs.h> | ||
3 | #include <linux/cdev.h> | ||
4 | #include <asm/semaphore.h> | ||
5 | #include <asm/uaccess.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | #include <litmus/trace.h> | ||
9 | |||
10 | /******************************************************************************/ | ||
11 | /* Allocation */ | ||
12 | /******************************************************************************/ | ||
13 | |||
14 | struct ft_buffer* trace_ts_buf = NULL; | ||
15 | |||
16 | static unsigned int ts_seq_no = 0; | ||
17 | |||
18 | feather_callback void save_timestamp(unsigned long event) | ||
19 | { | ||
20 | unsigned int seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
21 | struct timestamp *ts; | ||
22 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
23 | ts->event = event; | ||
24 | ts->timestamp = ft_read_tsc(); | ||
25 | ts->seq_no = seq_no; | ||
26 | ts->cpu = raw_smp_processor_id(); | ||
27 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
28 | } | ||
29 | } | ||
30 | |||
31 | static struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size) | ||
32 | { | ||
33 | struct ft_buffer* buf; | ||
34 | size_t total = (size + 1) * count; | ||
35 | char* mem; | ||
36 | int order = 0, pages = 1; | ||
37 | |||
38 | buf = kmalloc(sizeof(struct ft_buffer), GFP_KERNEL); | ||
39 | if (!buf) | ||
40 | return NULL; | ||
41 | |||
42 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
43 | while (pages < total) { | ||
44 | order++; | ||
45 | pages *= 2; | ||
46 | } | ||
47 | |||
48 | mem = (char*) __get_free_pages(GFP_KERNEL, order); | ||
49 | if (!mem) { | ||
50 | kfree(buf); | ||
51 | return NULL; | ||
52 | } | ||
53 | |||
54 | if (!init_ft_buffer(buf, count, size, | ||
55 | mem + (count * size), /* markers at the end */ | ||
56 | mem)) { /* buffer objects */ | ||
57 | free_pages((unsigned long) mem, order); | ||
58 | kfree(buf); | ||
59 | return NULL; | ||
60 | } | ||
61 | return buf; | ||
62 | } | ||
63 | |||
64 | static void free_ft_buffer(struct ft_buffer* buf) | ||
65 | { | ||
66 | int order = 0, pages = 1; | ||
67 | size_t total; | ||
68 | |||
69 | if (buf) { | ||
70 | total = (buf->slot_size + 1) * buf->slot_count; | ||
71 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
72 | while (pages < total) { | ||
73 | order++; | ||
74 | pages *= 2; | ||
75 | } | ||
76 | free_pages((unsigned long) buf->buffer_mem, order); | ||
77 | kfree(buf); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | |||
82 | /******************************************************************************/ | ||
83 | /* DEVICE FILE DRIVER */ | ||
84 | /******************************************************************************/ | ||
85 | |||
86 | #define NO_TIMESTAMPS 262144 | ||
87 | |||
88 | static DECLARE_MUTEX(feather_lock); | ||
89 | static int use_count = 0; | ||
90 | |||
91 | static int trace_release(struct inode *in, struct file *filp) | ||
92 | { | ||
93 | int err = -EINVAL; | ||
94 | |||
95 | if (down_interruptible(&feather_lock)) { | ||
96 | err = -ERESTARTSYS; | ||
97 | goto out; | ||
98 | } | ||
99 | |||
100 | printk(KERN_ALERT "%s/%d disconnects from feather trace device. " | ||
101 | "use_count=%d\n", | ||
102 | current->comm, current->pid, use_count); | ||
103 | |||
104 | if (use_count == 1) { | ||
105 | /* disable events */ | ||
106 | ft_disable_all_events(); | ||
107 | |||
108 | /* wait for any pending events to complete */ | ||
109 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
110 | schedule_timeout(HZ); | ||
111 | |||
112 | printk(KERN_ALERT "Failed trace writes: %u\n", | ||
113 | trace_ts_buf->failed_writes); | ||
114 | |||
115 | free_ft_buffer(trace_ts_buf); | ||
116 | trace_ts_buf = NULL; | ||
117 | } | ||
118 | |||
119 | use_count--; | ||
120 | up(&feather_lock); | ||
121 | out: | ||
122 | return err; | ||
123 | } | ||
124 | |||
125 | |||
126 | static ssize_t trace_read(struct file *filp, char __user *to, size_t len, | ||
127 | loff_t *f_pos) | ||
128 | { | ||
129 | /* we ignore f_pos, this is strictly sequential */ | ||
130 | ssize_t error = 0; | ||
131 | struct timestamp ts; | ||
132 | |||
133 | if (down_interruptible(&feather_lock)) { | ||
134 | error = -ERESTARTSYS; | ||
135 | goto out; | ||
136 | } | ||
137 | |||
138 | |||
139 | while (len >= sizeof(struct timestamp)) { | ||
140 | if (ft_buffer_read(trace_ts_buf, &ts)) { | ||
141 | if (copy_to_user(to, &ts, sizeof(struct timestamp))) { | ||
142 | error = -EFAULT; | ||
143 | break; | ||
144 | } else { | ||
145 | len -= sizeof(struct timestamp); | ||
146 | to += sizeof(struct timestamp); | ||
147 | error += sizeof(struct timestamp); | ||
148 | } | ||
149 | } else { | ||
150 | set_current_state(TASK_INTERRUPTIBLE); | ||
151 | schedule_timeout(50); | ||
152 | if (signal_pending(current)) { | ||
153 | error = -ERESTARTSYS; | ||
154 | break; | ||
155 | } | ||
156 | } | ||
157 | } | ||
158 | up(&feather_lock); | ||
159 | out: | ||
160 | return error; | ||
161 | } | ||
162 | |||
163 | #define ENABLE_CMD 0 | ||
164 | #define DISABLE_CMD 1 | ||
165 | |||
166 | static ssize_t trace_write(struct file *filp, const char __user *from, | ||
167 | size_t len, loff_t *f_pos) | ||
168 | { | ||
169 | ssize_t error = -EINVAL; | ||
170 | unsigned long cmd; | ||
171 | unsigned long id; | ||
172 | |||
173 | if (len % sizeof(long) || len < 2 * sizeof(long)) | ||
174 | goto out; | ||
175 | |||
176 | if (copy_from_user(&cmd, from, sizeof(long))) { | ||
177 | error = -EFAULT; | ||
178 | goto out; | ||
179 | } | ||
180 | len -= sizeof(long); | ||
181 | from += sizeof(long); | ||
182 | |||
183 | if (cmd != ENABLE_CMD && cmd != DISABLE_CMD) | ||
184 | goto out; | ||
185 | |||
186 | if (down_interruptible(&feather_lock)) { | ||
187 | error = -ERESTARTSYS; | ||
188 | goto out; | ||
189 | } | ||
190 | |||
191 | error = sizeof(long); | ||
192 | while (len) { | ||
193 | if (copy_from_user(&id, from, sizeof(long))) { | ||
194 | error = -EFAULT; | ||
195 | goto out; | ||
196 | } | ||
197 | len -= sizeof(long); | ||
198 | from += sizeof(long); | ||
199 | if (cmd) { | ||
200 | printk(KERN_INFO | ||
201 | "Disabling feather-trace event %lu.\n", id); | ||
202 | ft_disable_event(id); | ||
203 | } else { | ||
204 | printk(KERN_INFO | ||
205 | "Enabling feather-trace event %lu.\n", id); | ||
206 | ft_enable_event(id); | ||
207 | } | ||
208 | error += sizeof(long); | ||
209 | } | ||
210 | |||
211 | up(&feather_lock); | ||
212 | out: | ||
213 | return error; | ||
214 | } | ||
215 | |||
216 | static int trace_open(struct inode *in, struct file *filp) | ||
217 | { | ||
218 | int err = 0; | ||
219 | unsigned int count = NO_TIMESTAMPS; | ||
220 | |||
221 | if (down_interruptible(&feather_lock)) { | ||
222 | err = -ERESTARTSYS; | ||
223 | goto out; | ||
224 | } | ||
225 | |||
226 | while (count && !trace_ts_buf) { | ||
227 | printk("trace: trying to allocate %u time stamps.\n", count); | ||
228 | trace_ts_buf = alloc_ft_buffer(count, sizeof(struct timestamp)); | ||
229 | count /= 2; | ||
230 | } | ||
231 | if (!trace_ts_buf) | ||
232 | err = -ENOMEM; | ||
233 | else | ||
234 | use_count++; | ||
235 | |||
236 | up(&feather_lock); | ||
237 | out: | ||
238 | return err; | ||
239 | } | ||
240 | |||
241 | /******************************************************************************/ | ||
242 | /* Device Registration */ | ||
243 | /******************************************************************************/ | ||
244 | |||
245 | #define FT_TRACE_MAJOR 252 | ||
246 | |||
247 | struct file_operations ft_trace_fops = { | ||
248 | .owner = THIS_MODULE, | ||
249 | .open = trace_open, | ||
250 | .release = trace_release, | ||
251 | .write = trace_write, | ||
252 | .read = trace_read, | ||
253 | }; | ||
254 | |||
255 | |||
256 | static int __init register_buffer_dev(const char* name, | ||
257 | struct file_operations* fops, | ||
258 | int major, int count) | ||
259 | { | ||
260 | dev_t trace_dev; | ||
261 | struct cdev *cdev; | ||
262 | int error = 0; | ||
263 | |||
264 | trace_dev = MKDEV(major, 0); | ||
265 | error = register_chrdev_region(trace_dev, count, name); | ||
266 | if (error) | ||
267 | { | ||
268 | printk(KERN_WARNING "trace: " | ||
269 | "Could not register major/minor number %d\n", major); | ||
270 | return error; | ||
271 | } | ||
272 | cdev = cdev_alloc(); | ||
273 | if (!cdev) { | ||
274 | printk(KERN_WARNING "trace: " | ||
275 | "Could not get a cdev for %s.\n", name); | ||
276 | return -ENOMEM; | ||
277 | } | ||
278 | cdev->owner = THIS_MODULE; | ||
279 | cdev->ops = fops; | ||
280 | error = cdev_add(cdev, trace_dev, count); | ||
281 | if (error) { | ||
282 | printk(KERN_WARNING "trace: " | ||
283 | "add_cdev failed for %s.\n", name); | ||
284 | return -ENOMEM; | ||
285 | } | ||
286 | return error; | ||
287 | |||
288 | } | ||
289 | |||
290 | static int __init init_sched_trace(void) | ||
291 | { | ||
292 | int error = 0; | ||
293 | |||
294 | printk("Initializing Feather-Trace device\n"); | ||
295 | /* dummy entry to make linker happy */ | ||
296 | ft_event0(666, save_timestamp); | ||
297 | |||
298 | error = register_buffer_dev("ft_trace", &ft_trace_fops, | ||
299 | FT_TRACE_MAJOR, 1); | ||
300 | return error; | ||
301 | } | ||
302 | |||
303 | module_init(init_sched_trace); | ||