diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-04-19 17:31:52 -0400 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-04-19 17:31:52 -0400 |
| commit | f70a290e8a889caa905ab7650c696f2bb299be1a (patch) | |
| tree | 56f0886d839499e9f522f189999024b3e86f9be2 | |
| parent | fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (diff) | |
| parent | 7ef4a793a624c6e66c16ca1051847f75161f5bec (diff) | |
Merge branch 'wip-nested-locking' into tegra-nested-lockingwip-nested-locking
Conflicts:
Makefile
include/linux/fs.h
103 files changed, 16165 insertions, 40 deletions
| @@ -708,7 +708,7 @@ export mod_strip_cmd | |||
| 708 | 708 | ||
| 709 | 709 | ||
| 710 | ifeq ($(KBUILD_EXTMOD),) | 710 | ifeq ($(KBUILD_EXTMOD),) |
| 711 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ | 711 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/ |
| 712 | 712 | ||
| 713 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ | 713 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ |
| 714 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ | 714 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3c3b868948a..6dc9a2f42ab 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
| @@ -2214,3 +2214,11 @@ source "security/Kconfig" | |||
| 2214 | source "crypto/Kconfig" | 2214 | source "crypto/Kconfig" |
| 2215 | 2215 | ||
| 2216 | source "lib/Kconfig" | 2216 | source "lib/Kconfig" |
| 2217 | |||
| 2218 | config ARCH_HAS_SEND_PULL_TIMERS | ||
| 2219 | def_bool n | ||
| 2220 | |||
| 2221 | config ARCH_HAS_FEATHER_TRACE | ||
| 2222 | def_bool n | ||
| 2223 | |||
| 2224 | source "litmus/Kconfig" | ||
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 3be8de3adab..8a102a383a3 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h | |||
| @@ -16,9 +16,11 @@ | |||
| 16 | 16 | ||
| 17 | typedef unsigned long cycles_t; | 17 | typedef unsigned long cycles_t; |
| 18 | 18 | ||
| 19 | #ifndef get_cycles | ||
| 19 | static inline cycles_t get_cycles (void) | 20 | static inline cycles_t get_cycles (void) |
| 20 | { | 21 | { |
| 21 | return 0; | 22 | return 0; |
| 22 | } | 23 | } |
| 24 | #endif | ||
| 23 | 25 | ||
| 24 | #endif | 26 | #endif |
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index c60a2944f95..23ae09ffc49 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h | |||
| @@ -403,6 +403,9 @@ | |||
| 403 | #define __NR_sendmmsg (__NR_SYSCALL_BASE+374) | 403 | #define __NR_sendmmsg (__NR_SYSCALL_BASE+374) |
| 404 | #define __NR_setns (__NR_SYSCALL_BASE+375) | 404 | #define __NR_setns (__NR_SYSCALL_BASE+375) |
| 405 | 405 | ||
| 406 | #define __NR_LITMUS (__NR_SYSCALL_BASE+376) | ||
| 407 | #include <litmus/unistd_32.h> | ||
| 408 | |||
| 406 | /* | 409 | /* |
| 407 | * The following SWIs are ARM private. | 410 | * The following SWIs are ARM private. |
| 408 | */ | 411 | */ |
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 9943e9e74a1..c0de805e4ea 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S | |||
| @@ -385,6 +385,20 @@ | |||
| 385 | CALL(sys_syncfs) | 385 | CALL(sys_syncfs) |
| 386 | CALL(sys_sendmmsg) | 386 | CALL(sys_sendmmsg) |
| 387 | /* 375 */ CALL(sys_setns) | 387 | /* 375 */ CALL(sys_setns) |
| 388 | CALL(sys_set_rt_task_param) | ||
| 389 | CALL(sys_get_rt_task_param) | ||
| 390 | CALL(sys_complete_job) | ||
| 391 | CALL(sys_od_open) | ||
| 392 | /* 380 */ CALL(sys_od_close) | ||
| 393 | CALL(sys_litmus_lock) | ||
| 394 | CALL(sys_litmus_unlock) | ||
| 395 | CALL(sys_query_job_no) | ||
| 396 | CALL(sys_wait_for_job_release) | ||
| 397 | /* 385 */ CALL(sys_wait_for_ts_release) | ||
| 398 | CALL(sys_release_ts) | ||
| 399 | CALL(sys_null_call) | ||
| 400 | CALL(sys_dynamic_group_lock) | ||
| 401 | CALL(sys_dynamic_group_unlock) | ||
| 388 | #ifndef syscalls_counted | 402 | #ifndef syscalls_counted |
| 389 | .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls | 403 | .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls |
| 390 | #define syscalls_counted | 404 | #define syscalls_counted |
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 14d06f50d16..a07ca050112 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c | |||
| @@ -40,6 +40,8 @@ | |||
| 40 | #include <asm/ptrace.h> | 40 | #include <asm/ptrace.h> |
| 41 | #include <asm/localtimer.h> | 41 | #include <asm/localtimer.h> |
| 42 | 42 | ||
| 43 | #include <litmus/preempt.h> | ||
| 44 | |||
| 43 | /* | 45 | /* |
| 44 | * as from 2.5, kernels no longer have an init_tasks structure | 46 | * as from 2.5, kernels no longer have an init_tasks structure |
| 45 | * so we need some other way of telling a new secondary core | 47 | * so we need some other way of telling a new secondary core |
| @@ -629,6 +631,8 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) | |||
| 629 | break; | 631 | break; |
| 630 | 632 | ||
| 631 | case IPI_RESCHEDULE: | 633 | case IPI_RESCHEDULE: |
| 634 | /* LITMUS^RT: take action based on scheduler state */ | ||
| 635 | sched_state_ipi(); | ||
| 632 | scheduler_ipi(); | 636 | scheduler_ipi(); |
| 633 | break; | 637 | break; |
| 634 | 638 | ||
diff --git a/arch/arm/mach-realview/include/mach/timex.h b/arch/arm/mach-realview/include/mach/timex.h index 4eeb069373c..e8bcc40d1f0 100644 --- a/arch/arm/mach-realview/include/mach/timex.h +++ b/arch/arm/mach-realview/include/mach/timex.h | |||
| @@ -21,3 +21,30 @@ | |||
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | #define CLOCK_TICK_RATE (50000000 / 16) | 23 | #define CLOCK_TICK_RATE (50000000 / 16) |
| 24 | |||
| 25 | #if defined(CONFIG_MACH_REALVIEW_PB11MP) || defined(CONFIG_MACH_REALVIEW_PB1176) | ||
| 26 | |||
| 27 | static inline unsigned long realview_get_arm11_cp15_ccnt(void) | ||
| 28 | { | ||
| 29 | unsigned long cycles; | ||
| 30 | /* Read CP15 CCNT register. */ | ||
| 31 | asm volatile ("mrc p15, 0, %0, c15, c12, 1" : "=r" (cycles)); | ||
| 32 | return cycles; | ||
| 33 | } | ||
| 34 | |||
| 35 | #define get_cycles realview_get_arm11_cp15_ccnt | ||
| 36 | |||
| 37 | #elif defined(CONFIG_MACH_REALVIEW_PBA8) | ||
| 38 | |||
| 39 | |||
| 40 | static inline unsigned long realview_get_a8_cp15_ccnt(void) | ||
| 41 | { | ||
| 42 | unsigned long cycles; | ||
| 43 | /* Read CP15 CCNT register. */ | ||
| 44 | asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles)); | ||
| 45 | return cycles; | ||
| 46 | } | ||
| 47 | |||
| 48 | #define get_cycles realview_get_a8_cp15_ccnt | ||
| 49 | |||
| 50 | #endif | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b8cd5448b0e..4ff921c9f84 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -2151,3 +2151,11 @@ source "crypto/Kconfig" | |||
| 2151 | source "arch/x86/kvm/Kconfig" | 2151 | source "arch/x86/kvm/Kconfig" |
| 2152 | 2152 | ||
| 2153 | source "lib/Kconfig" | 2153 | source "lib/Kconfig" |
| 2154 | |||
| 2155 | config ARCH_HAS_FEATHER_TRACE | ||
| 2156 | def_bool y | ||
| 2157 | |||
| 2158 | config ARCH_HAS_SEND_PULL_TIMERS | ||
| 2159 | def_bool y | ||
| 2160 | |||
| 2161 | source "litmus/Kconfig" | ||
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 0baa628e330..e2c555f2191 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) | 13 | BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) |
| 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) |
| 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
| 16 | BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR) | ||
| 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 17 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
| 17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | 18 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) |
| 18 | 19 | ||
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h new file mode 100644 index 00000000000..4fd31633405 --- /dev/null +++ b/arch/x86/include/asm/feather_trace.h | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | #ifndef _ARCH_FEATHER_TRACE_H | ||
| 2 | #define _ARCH_FEATHER_TRACE_H | ||
| 3 | |||
| 4 | #include <asm/msr.h> | ||
| 5 | |||
| 6 | static inline unsigned long long ft_timestamp(void) | ||
| 7 | { | ||
| 8 | return __native_read_tsc(); | ||
| 9 | } | ||
| 10 | |||
| 11 | #ifdef CONFIG_X86_32 | ||
| 12 | #include "feather_trace_32.h" | ||
| 13 | #else | ||
| 14 | #include "feather_trace_64.h" | ||
| 15 | #endif | ||
| 16 | |||
| 17 | #endif | ||
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h new file mode 100644 index 00000000000..75e81a9f938 --- /dev/null +++ b/arch/x86/include/asm/feather_trace_32.h | |||
| @@ -0,0 +1,115 @@ | |||
| 1 | /* Copyright (c) 2007-2012 Björn Brandenburg, <bbb@mpi-sws.org> | ||
| 2 | * | ||
| 3 | * Permission is hereby granted, free of charge, to any person obtaining | ||
| 4 | * a copy of this software and associated documentation files (the | ||
| 5 | * "Software"), to deal in the Software without restriction, including | ||
| 6 | * without limitation the rights to use, copy, modify, merge, publish, | ||
| 7 | * distribute, sublicense, and/or sell copies of the Software, and to | ||
| 8 | * permit persons to whom the Software is furnished to do so, subject to | ||
| 9 | * the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be | ||
| 12 | * included in all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
| 15 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
| 16 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 17 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
| 18 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
| 19 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
| 20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| 21 | * SOFTWARE. | ||
| 22 | */ | ||
| 23 | |||
| 24 | /* Do not directly include this file. Include feather_trace.h instead */ | ||
| 25 | |||
| 26 | #define feather_callback __attribute__((regparm(3))) __attribute__((used)) | ||
| 27 | |||
| 28 | /* | ||
| 29 | * Make the compiler reload any register that is not saved in a cdecl function | ||
| 30 | * call (minus the registers that we explicitly clobber as output registers). | ||
| 31 | */ | ||
| 32 | #define __FT_CLOBBER_LIST0 "memory", "cc", "eax", "edx", "ecx" | ||
| 33 | #define __FT_CLOBBER_LIST1 "memory", "cc", "eax", "ecx" | ||
| 34 | #define __FT_CLOBBER_LIST2 "memory", "cc", "eax" | ||
| 35 | #define __FT_CLOBBER_LIST3 "memory", "cc", "eax" | ||
| 36 | |||
| 37 | #define __FT_TMP1(x) "=d" (x) | ||
| 38 | #define __FT_ARG1(x) "0" ((long) (x)) | ||
| 39 | #define __FT_TMP2(x) "=c" (x) | ||
| 40 | #define __FT_ARG2(x) "1" ((long) (x)) | ||
| 41 | |||
| 42 | #define __FT_ARG3(x) "r" ((long) (x)) | ||
| 43 | |||
| 44 | #define ft_event(id, callback) \ | ||
| 45 | __asm__ __volatile__( \ | ||
| 46 | "1: jmp 2f \n\t" \ | ||
| 47 | " call " #callback " \n\t" \ | ||
| 48 | ".section __event_table, \"aw\" \n\t" \ | ||
| 49 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
| 50 | ".previous \n\t" \ | ||
| 51 | "2: \n\t" \ | ||
| 52 | : : : __FT_CLOBBER_LIST0) | ||
| 53 | |||
| 54 | #define ft_event0(id, callback) \ | ||
| 55 | __asm__ __volatile__( \ | ||
| 56 | "1: jmp 2f \n\t" \ | ||
| 57 | " movl $" #id ", %%eax \n\t" \ | ||
| 58 | " call " #callback " \n\t" \ | ||
| 59 | ".section __event_table, \"aw\" \n\t" \ | ||
| 60 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
| 61 | ".previous \n\t" \ | ||
| 62 | "2: \n\t" \ | ||
| 63 | : : : __FT_CLOBBER_LIST0) | ||
| 64 | |||
| 65 | #define ft_event1(id, callback, param) \ | ||
| 66 | do { \ | ||
| 67 | long __ft_tmp1; \ | ||
| 68 | __asm__ __volatile__( \ | ||
| 69 | "1: jmp 2f \n\t" \ | ||
| 70 | " movl $" #id ", %%eax \n\t" \ | ||
| 71 | " call " #callback " \n\t" \ | ||
| 72 | ".section __event_table, \"aw\" \n\t" \ | ||
| 73 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
| 74 | ".previous \n\t" \ | ||
| 75 | "2: \n\t" \ | ||
| 76 | : __FT_TMP1(__ft_tmp1) \ | ||
| 77 | : __FT_ARG1(param) \ | ||
| 78 | : __FT_CLOBBER_LIST1); \ | ||
| 79 | } while (0); | ||
| 80 | |||
| 81 | #define ft_event2(id, callback, param, param2) \ | ||
| 82 | do { \ | ||
| 83 | long __ft_tmp1, __ft_tmp2; \ | ||
| 84 | __asm__ __volatile__( \ | ||
| 85 | "1: jmp 2f \n\t" \ | ||
| 86 | " movl $" #id ", %%eax \n\t" \ | ||
| 87 | " call " #callback " \n\t" \ | ||
| 88 | ".section __event_table, \"aw\" \n\t" \ | ||
| 89 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
| 90 | ".previous \n\t" \ | ||
| 91 | "2: \n\t" \ | ||
| 92 | : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \ | ||
| 93 | : __FT_ARG1(param), __FT_ARG2(param2) \ | ||
| 94 | : __FT_CLOBBER_LIST2); \ | ||
| 95 | } while (0); | ||
| 96 | |||
| 97 | |||
| 98 | #define ft_event3(id, callback, param, param2, param3) \ | ||
| 99 | do { \ | ||
| 100 | long __ft_tmp1, __ft_tmp2; \ | ||
| 101 | __asm__ __volatile__( \ | ||
| 102 | "1: jmp 2f \n\t" \ | ||
| 103 | " subl $4, %%esp \n\t" \ | ||
| 104 | " movl $" #id ", %%eax \n\t" \ | ||
| 105 | " movl %2, (%%esp) \n\t" \ | ||
| 106 | " call " #callback " \n\t" \ | ||
| 107 | " addl $4, %%esp \n\t" \ | ||
| 108 | ".section __event_table, \"aw\" \n\t" \ | ||
| 109 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
| 110 | ".previous \n\t" \ | ||
| 111 | "2: \n\t" \ | ||
| 112 | : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \ | ||
| 113 | : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \ | ||
| 114 | : __FT_CLOBBER_LIST3); \ | ||
| 115 | } while (0); | ||
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h new file mode 100644 index 00000000000..5ce49e2eebb --- /dev/null +++ b/arch/x86/include/asm/feather_trace_64.h | |||
| @@ -0,0 +1,124 @@ | |||
| 1 | /* Copyright (c) 2010 Andrea Bastoni, <bastoni@cs.unc.edu> | ||
| 2 | * Copyright (c) 2012 Björn Brandenburg, <bbb@mpi-sws.org> | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining | ||
| 5 | * a copy of this software and associated documentation files (the | ||
| 6 | * "Software"), to deal in the Software without restriction, including | ||
| 7 | * without limitation the rights to use, copy, modify, merge, publish, | ||
| 8 | * distribute, sublicense, and/or sell copies of the Software, and to | ||
| 9 | * permit persons to whom the Software is furnished to do so, subject to | ||
| 10 | * the following conditions: | ||
| 11 | * | ||
| 12 | * The above copyright notice and this permission notice shall be | ||
| 13 | * included in all copies or substantial portions of the Software. | ||
| 14 | * | ||
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
| 16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
| 17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
| 19 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
| 20 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
| 21 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| 22 | * SOFTWARE. | ||
| 23 | */ | ||
| 24 | |||
| 25 | /* Do not directly include this file. Include feather_trace.h instead */ | ||
| 26 | |||
| 27 | /* regparm is the default on x86_64 */ | ||
| 28 | #define feather_callback __attribute__((used)) | ||
| 29 | |||
| 30 | #define __FT_EVENT_TABLE(id,from,to) \ | ||
| 31 | ".section __event_table, \"aw\"\n\t" \ | ||
| 32 | ".balign 8\n\t" \ | ||
| 33 | ".quad " #id ", 0, " #from ", " #to " \n\t" \ | ||
| 34 | ".previous \n\t" | ||
| 35 | |||
| 36 | /* | ||
| 37 | * x86_64 caller only owns rbp, rbx, r12-r15; | ||
| 38 | * the callee can freely modify the others. | ||
| 39 | */ | ||
| 40 | #define __FT_CLOBBER_LIST0 "memory", "cc", "rdi", "rsi", "rdx", "rcx", \ | ||
| 41 | "r8", "r9", "r10", "r11", "rax" | ||
| 42 | |||
| 43 | #define __FT_CLOBBER_LIST1 "memory", "cc", "rdi", "rdx", "rcx", \ | ||
| 44 | "r8", "r9", "r10", "r11", "rax" | ||
| 45 | |||
| 46 | #define __FT_CLOBBER_LIST2 "memory", "cc", "rdi", "rcx", \ | ||
| 47 | "r8", "r9", "r10", "r11", "rax" | ||
| 48 | |||
| 49 | #define __FT_CLOBBER_LIST3 "memory", "cc", "rdi", \ | ||
| 50 | "r8", "r9", "r10", "r11", "rax" | ||
| 51 | |||
| 52 | /* The registers RDI, RSI, RDX, RCX, R8 and R9 are used for integer and pointer | ||
| 53 | * arguments. */ | ||
| 54 | |||
| 55 | /* RSI */ | ||
| 56 | #define __FT_TMP1(x) "=S" (x) | ||
| 57 | #define __FT_ARG1(x) "0" ((long) (x)) | ||
| 58 | |||
| 59 | /* RDX */ | ||
| 60 | #define __FT_TMP2(x) "=d" (x) | ||
| 61 | #define __FT_ARG2(x) "1" ((long) (x)) | ||
| 62 | |||
| 63 | /* RCX */ | ||
| 64 | #define __FT_TMP3(x) "=c" (x) | ||
| 65 | #define __FT_ARG3(x) "2" ((long) (x)) | ||
| 66 | |||
| 67 | #define ft_event(id, callback) \ | ||
| 68 | __asm__ __volatile__( \ | ||
| 69 | "1: jmp 2f \n\t" \ | ||
| 70 | " call " #callback " \n\t" \ | ||
| 71 | __FT_EVENT_TABLE(id,1b,2f) \ | ||
| 72 | "2: \n\t" \ | ||
| 73 | : : : __FT_CLOBBER_LIST0) | ||
| 74 | |||
| 75 | #define ft_event0(id, callback) \ | ||
| 76 | __asm__ __volatile__( \ | ||
| 77 | "1: jmp 2f \n\t" \ | ||
| 78 | " movq $" #id ", %%rdi \n\t" \ | ||
| 79 | " call " #callback " \n\t" \ | ||
| 80 | __FT_EVENT_TABLE(id,1b,2f) \ | ||
| 81 | "2: \n\t" \ | ||
| 82 | : : : __FT_CLOBBER_LIST0) | ||
| 83 | |||
| 84 | #define ft_event1(id, callback, param) \ | ||
| 85 | do { \ | ||
| 86 | long __ft_tmp1; \ | ||
| 87 | __asm__ __volatile__( \ | ||
| 88 | "1: jmp 2f \n\t" \ | ||
| 89 | " movq $" #id ", %%rdi \n\t" \ | ||
| 90 | " call " #callback " \n\t" \ | ||
| 91 | __FT_EVENT_TABLE(id,1b,2f) \ | ||
| 92 | "2: \n\t" \ | ||
| 93 | : __FT_TMP1(__ft_tmp1) \ | ||
| 94 | : __FT_ARG1(param) \ | ||
| 95 | : __FT_CLOBBER_LIST1); \ | ||
| 96 | } while (0); | ||
| 97 | |||
| 98 | #define ft_event2(id, callback, param, param2) \ | ||
| 99 | do { \ | ||
| 100 | long __ft_tmp1, __ft_tmp2; \ | ||
| 101 | __asm__ __volatile__( \ | ||
| 102 | "1: jmp 2f \n\t" \ | ||
| 103 | " movq $" #id ", %%rdi \n\t" \ | ||
| 104 | " call " #callback " \n\t" \ | ||
| 105 | __FT_EVENT_TABLE(id,1b,2f) \ | ||
| 106 | "2: \n\t" \ | ||
| 107 | : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \ | ||
| 108 | : __FT_ARG1(param), __FT_ARG2(param2) \ | ||
| 109 | : __FT_CLOBBER_LIST2); \ | ||
| 110 | } while (0); | ||
| 111 | |||
| 112 | #define ft_event3(id, callback, param, param2, param3) \ | ||
| 113 | do { \ | ||
| 114 | long __ft_tmp1, __ft_tmp2, __ft_tmp3; \ | ||
| 115 | __asm__ __volatile__( \ | ||
| 116 | "1: jmp 2f \n\t" \ | ||
| 117 | " movq $" #id ", %%rdi \n\t" \ | ||
| 118 | " call " #callback " \n\t" \ | ||
| 119 | __FT_EVENT_TABLE(id,1b,2f) \ | ||
| 120 | "2: \n\t" \ | ||
| 121 | : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2), __FT_TMP3(__ft_tmp3) \ | ||
| 122 | : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \ | ||
| 123 | : __FT_CLOBBER_LIST3); \ | ||
| 124 | } while (0); | ||
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eb92a6ed2be..8f1e5445d37 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
| @@ -76,6 +76,8 @@ extern void threshold_interrupt(void); | |||
| 76 | extern void call_function_interrupt(void); | 76 | extern void call_function_interrupt(void); |
| 77 | extern void call_function_single_interrupt(void); | 77 | extern void call_function_single_interrupt(void); |
| 78 | 78 | ||
| 79 | extern void pull_timers_interrupt(void); | ||
| 80 | |||
| 79 | /* IOAPIC */ | 81 | /* IOAPIC */ |
| 80 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) | 82 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) |
| 81 | extern unsigned long io_apic_irqs; | 83 | extern unsigned long io_apic_irqs; |
| @@ -154,6 +156,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); | |||
| 154 | extern void smp_reschedule_interrupt(struct pt_regs *); | 156 | extern void smp_reschedule_interrupt(struct pt_regs *); |
| 155 | extern void smp_call_function_interrupt(struct pt_regs *); | 157 | extern void smp_call_function_interrupt(struct pt_regs *); |
| 156 | extern void smp_call_function_single_interrupt(struct pt_regs *); | 158 | extern void smp_call_function_single_interrupt(struct pt_regs *); |
| 159 | extern void smp_pull_timers_interrupt(struct pt_regs *); | ||
| 157 | #ifdef CONFIG_X86_32 | 160 | #ifdef CONFIG_X86_32 |
| 158 | extern void smp_invalidate_interrupt(struct pt_regs *); | 161 | extern void smp_invalidate_interrupt(struct pt_regs *); |
| 159 | #else | 162 | #else |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 7e50f06393a..7de6ad70365 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
| @@ -130,6 +130,13 @@ | |||
| 130 | #define INVALIDATE_TLB_VECTOR_START \ | 130 | #define INVALIDATE_TLB_VECTOR_START \ |
| 131 | (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) | 131 | (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) |
| 132 | 132 | ||
| 133 | /* | ||
| 134 | * LITMUS^RT pull timers IRQ vector | ||
| 135 | * Make sure it's below the above max 32 vectors. | ||
| 136 | */ | ||
| 137 | #define PULL_TIMERS_VECTOR 0xce | ||
| 138 | |||
| 139 | |||
| 133 | #define NR_VECTORS 256 | 140 | #define NR_VECTORS 256 |
| 134 | 141 | ||
| 135 | #define FPU_IRQ 13 | 142 | #define FPU_IRQ 13 |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0d1171c9772..7e6a7b66203 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -166,6 +166,10 @@ extern void print_cpu_info(struct cpuinfo_x86 *); | |||
| 166 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | 166 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); |
| 167 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | 167 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
| 168 | extern unsigned short num_cache_leaves; | 168 | extern unsigned short num_cache_leaves; |
| 169 | #ifdef CONFIG_SYSFS | ||
| 170 | extern int get_shared_cpu_map(cpumask_var_t mask, | ||
| 171 | unsigned int cpu, int index); | ||
| 172 | #endif | ||
| 169 | 173 | ||
| 170 | extern void detect_extended_topology(struct cpuinfo_x86 *c); | 174 | extern void detect_extended_topology(struct cpuinfo_x86 *c); |
| 171 | extern void detect_ht(struct cpuinfo_x86 *c); | 175 | extern void detect_ht(struct cpuinfo_x86 *c); |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 593485b38ab..2f6e127db30 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
| @@ -353,9 +353,13 @@ | |||
| 353 | #define __NR_sendmmsg 345 | 353 | #define __NR_sendmmsg 345 |
| 354 | #define __NR_setns 346 | 354 | #define __NR_setns 346 |
| 355 | 355 | ||
| 356 | #define __NR_LITMUS 347 | ||
| 357 | |||
| 358 | #include "litmus/unistd_32.h" | ||
| 359 | |||
| 356 | #ifdef __KERNEL__ | 360 | #ifdef __KERNEL__ |
| 357 | 361 | ||
| 358 | #define NR_syscalls 347 | 362 | #define NR_syscalls 347 + NR_litmus_syscalls |
| 359 | 363 | ||
| 360 | #define __ARCH_WANT_IPC_PARSE_VERSION | 364 | #define __ARCH_WANT_IPC_PARSE_VERSION |
| 361 | #define __ARCH_WANT_OLD_READDIR | 365 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 20104057344..f6f37d0ca33 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
| @@ -684,6 +684,10 @@ __SYSCALL(__NR_setns, sys_setns) | |||
| 684 | #define __NR_getcpu 309 | 684 | #define __NR_getcpu 309 |
| 685 | __SYSCALL(__NR_getcpu, sys_getcpu) | 685 | __SYSCALL(__NR_getcpu, sys_getcpu) |
| 686 | 686 | ||
| 687 | #define __NR_LITMUS 309 | ||
| 688 | |||
| 689 | #include "litmus/unistd_64.h" | ||
| 690 | |||
| 687 | #ifndef __NO_STUBS | 691 | #ifndef __NO_STUBS |
| 688 | #define __ARCH_WANT_OLD_READDIR | 692 | #define __ARCH_WANT_OLD_READDIR |
| 689 | #define __ARCH_WANT_OLD_STAT | 693 | #define __ARCH_WANT_OLD_STAT |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 82f2912155a..c84954ad12f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -99,6 +99,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
| 99 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 99 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
| 100 | obj-$(CONFIG_OF) += devicetree.o | 100 | obj-$(CONFIG_OF) += devicetree.o |
| 101 | 101 | ||
| 102 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o | ||
| 103 | |||
| 102 | ### | 104 | ### |
| 103 | # 64 bit specific files | 105 | # 64 bit specific files |
| 104 | ifeq ($(CONFIG_X86_64),y) | 106 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index c105c533ed9..0bf12644aa7 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -747,6 +747,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
| 747 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); | 747 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); |
| 748 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) | 748 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) |
| 749 | 749 | ||
| 750 | /* returns CPUs that share the index cache with cpu */ | ||
| 751 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | ||
| 752 | { | ||
| 753 | int ret = 0; | ||
| 754 | struct _cpuid4_info *this_leaf; | ||
| 755 | |||
| 756 | if (index >= num_cache_leaves) { | ||
| 757 | index = num_cache_leaves - 1; | ||
| 758 | ret = index; | ||
| 759 | } | ||
| 760 | |||
| 761 | this_leaf = CPUID4_INFO_IDX(cpu,index); | ||
| 762 | cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map)); | ||
| 763 | |||
| 764 | return ret; | ||
| 765 | } | ||
| 766 | |||
| 750 | #ifdef CONFIG_SMP | 767 | #ifdef CONFIG_SMP |
| 751 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | 768 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
| 752 | { | 769 | { |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6419bb05ecd..e5d2d3fa7a0 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -993,6 +993,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \ | |||
| 993 | call_function_interrupt smp_call_function_interrupt | 993 | call_function_interrupt smp_call_function_interrupt |
| 994 | apicinterrupt RESCHEDULE_VECTOR \ | 994 | apicinterrupt RESCHEDULE_VECTOR \ |
| 995 | reschedule_interrupt smp_reschedule_interrupt | 995 | reschedule_interrupt smp_reschedule_interrupt |
| 996 | apicinterrupt PULL_TIMERS_VECTOR \ | ||
| 997 | pull_timers_interrupt smp_pull_timers_interrupt | ||
| 996 | #endif | 998 | #endif |
| 997 | 999 | ||
| 998 | apicinterrupt ERROR_APIC_VECTOR \ | 1000 | apicinterrupt ERROR_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c new file mode 100644 index 00000000000..37cc3325271 --- /dev/null +++ b/arch/x86/kernel/ft_event.c | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | #include <linux/types.h> | ||
| 2 | |||
| 3 | #include <litmus/feather_trace.h> | ||
| 4 | |||
| 5 | /* the feather trace management functions assume | ||
| 6 | * exclusive access to the event table | ||
| 7 | */ | ||
| 8 | |||
| 9 | #ifndef CONFIG_DEBUG_RODATA | ||
| 10 | |||
| 11 | #define BYTE_JUMP 0xeb | ||
| 12 | #define BYTE_JUMP_LEN 0x02 | ||
| 13 | |||
| 14 | /* for each event, there is an entry in the event table */ | ||
| 15 | struct trace_event { | ||
| 16 | long id; | ||
| 17 | long count; | ||
| 18 | long start_addr; | ||
| 19 | long end_addr; | ||
| 20 | }; | ||
| 21 | |||
| 22 | extern struct trace_event __start___event_table[]; | ||
| 23 | extern struct trace_event __stop___event_table[]; | ||
| 24 | |||
| 25 | /* Workaround: if no events are defined, then the event_table section does not | ||
| 26 | * exist and the above references cause linker errors. This could probably be | ||
| 27 | * fixed by adjusting the linker script, but it is easier to maintain for us if | ||
| 28 | * we simply create a dummy symbol in the event table section. | ||
| 29 | */ | ||
| 30 | int __event_table_dummy[0] __attribute__ ((section("__event_table"))); | ||
| 31 | |||
| 32 | int ft_enable_event(unsigned long id) | ||
| 33 | { | ||
| 34 | struct trace_event* te = __start___event_table; | ||
| 35 | int count = 0; | ||
| 36 | char* delta; | ||
| 37 | unsigned char* instr; | ||
| 38 | |||
| 39 | while (te < __stop___event_table) { | ||
| 40 | if (te->id == id && ++te->count == 1) { | ||
| 41 | instr = (unsigned char*) te->start_addr; | ||
| 42 | /* make sure we don't clobber something wrong */ | ||
| 43 | if (*instr == BYTE_JUMP) { | ||
| 44 | delta = (((unsigned char*) te->start_addr) + 1); | ||
| 45 | *delta = 0; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | if (te->id == id) | ||
| 49 | count++; | ||
| 50 | te++; | ||
| 51 | } | ||
| 52 | |||
| 53 | printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count); | ||
| 54 | return count; | ||
| 55 | } | ||
| 56 | |||
| 57 | int ft_disable_event(unsigned long id) | ||
| 58 | { | ||
| 59 | struct trace_event* te = __start___event_table; | ||
| 60 | int count = 0; | ||
| 61 | char* delta; | ||
| 62 | unsigned char* instr; | ||
| 63 | |||
| 64 | while (te < __stop___event_table) { | ||
| 65 | if (te->id == id && --te->count == 0) { | ||
| 66 | instr = (unsigned char*) te->start_addr; | ||
| 67 | if (*instr == BYTE_JUMP) { | ||
| 68 | delta = (((unsigned char*) te->start_addr) + 1); | ||
| 69 | *delta = te->end_addr - te->start_addr - | ||
| 70 | BYTE_JUMP_LEN; | ||
| 71 | } | ||
| 72 | } | ||
| 73 | if (te->id == id) | ||
| 74 | count++; | ||
| 75 | te++; | ||
| 76 | } | ||
| 77 | |||
| 78 | printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count); | ||
| 79 | return count; | ||
| 80 | } | ||
| 81 | |||
| 82 | int ft_disable_all_events(void) | ||
| 83 | { | ||
| 84 | struct trace_event* te = __start___event_table; | ||
| 85 | int count = 0; | ||
| 86 | char* delta; | ||
| 87 | unsigned char* instr; | ||
| 88 | |||
| 89 | while (te < __stop___event_table) { | ||
| 90 | if (te->count) { | ||
| 91 | instr = (unsigned char*) te->start_addr; | ||
| 92 | if (*instr == BYTE_JUMP) { | ||
| 93 | delta = (((unsigned char*) te->start_addr) | ||
| 94 | + 1); | ||
| 95 | *delta = te->end_addr - te->start_addr - | ||
| 96 | BYTE_JUMP_LEN; | ||
| 97 | te->count = 0; | ||
| 98 | count++; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | te++; | ||
| 102 | } | ||
| 103 | return count; | ||
| 104 | } | ||
| 105 | |||
| 106 | int ft_is_event_enabled(unsigned long id) | ||
| 107 | { | ||
| 108 | struct trace_event* te = __start___event_table; | ||
| 109 | |||
| 110 | while (te < __stop___event_table) { | ||
| 111 | if (te->id == id) | ||
| 112 | return te->count; | ||
| 113 | te++; | ||
| 114 | } | ||
| 115 | return 0; | ||
| 116 | } | ||
| 117 | |||
| 118 | #endif | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index b3300e6bace..f3a90e926f5 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -252,6 +252,9 @@ static void __init smp_intr_init(void) | |||
| 252 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | 252 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, |
| 253 | call_function_single_interrupt); | 253 | call_function_single_interrupt); |
| 254 | 254 | ||
| 255 | /* IPI for hrtimer pulling on remote cpus */ | ||
| 256 | alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt); | ||
| 257 | |||
| 255 | /* Low priority IPI to cleanup after moving an irq */ | 258 | /* Low priority IPI to cleanup after moving an irq */ |
| 256 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 259 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
| 257 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | 260 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 013e7eba83b..7539d84628f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
| @@ -23,6 +23,9 @@ | |||
| 23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
| 24 | #include <linux/gfp.h> | 24 | #include <linux/gfp.h> |
| 25 | 25 | ||
| 26 | #include <litmus/preempt.h> | ||
| 27 | #include <litmus/debug_trace.h> | ||
| 28 | |||
| 26 | #include <asm/mtrr.h> | 29 | #include <asm/mtrr.h> |
| 27 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
| 28 | #include <asm/mmu_context.h> | 31 | #include <asm/mmu_context.h> |
| @@ -147,6 +150,16 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
| 147 | free_cpumask_var(allbutself); | 150 | free_cpumask_var(allbutself); |
| 148 | } | 151 | } |
| 149 | 152 | ||
| 153 | /* trigger timers on remote cpu */ | ||
| 154 | void smp_send_pull_timers(int cpu) | ||
| 155 | { | ||
| 156 | if (unlikely(cpu_is_offline(cpu))) { | ||
| 157 | WARN_ON(1); | ||
| 158 | return; | ||
| 159 | } | ||
| 160 | apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR); | ||
| 161 | } | ||
| 162 | |||
| 150 | /* | 163 | /* |
| 151 | * this function calls the 'stop' function on all other CPUs in the system. | 164 | * this function calls the 'stop' function on all other CPUs in the system. |
| 152 | */ | 165 | */ |
| @@ -204,6 +217,11 @@ void smp_reschedule_interrupt(struct pt_regs *regs) | |||
| 204 | /* | 217 | /* |
| 205 | * KVM uses this interrupt to force a cpu out of guest mode | 218 | * KVM uses this interrupt to force a cpu out of guest mode |
| 206 | */ | 219 | */ |
| 220 | |||
| 221 | /* LITMUS^RT: this IPI might need to trigger the sched state machine. | ||
| 222 | * Starting from 3.0 schedule_ipi() actually does something. This may | ||
| 223 | * increase IPI latencies compared with previous versions. */ | ||
| 224 | sched_state_ipi(); | ||
| 207 | } | 225 | } |
| 208 | 226 | ||
| 209 | void smp_call_function_interrupt(struct pt_regs *regs) | 227 | void smp_call_function_interrupt(struct pt_regs *regs) |
| @@ -224,6 +242,17 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
| 224 | irq_exit(); | 242 | irq_exit(); |
| 225 | } | 243 | } |
| 226 | 244 | ||
| 245 | extern void hrtimer_pull(void); | ||
| 246 | |||
| 247 | void smp_pull_timers_interrupt(struct pt_regs *regs) | ||
| 248 | { | ||
| 249 | ack_APIC_irq(); | ||
| 250 | irq_enter(); | ||
| 251 | TRACE("pull timer interrupt\n"); | ||
| 252 | hrtimer_pull(); | ||
| 253 | irq_exit(); | ||
| 254 | } | ||
| 255 | |||
| 227 | struct smp_ops smp_ops = { | 256 | struct smp_ops smp_ops = { |
| 228 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 257 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
| 229 | .smp_prepare_cpus = native_smp_prepare_cpus, | 258 | .smp_prepare_cpus = native_smp_prepare_cpus, |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index bc19be332bc..058cac30916 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
| @@ -346,3 +346,17 @@ ENTRY(sys_call_table) | |||
| 346 | .long sys_syncfs | 346 | .long sys_syncfs |
| 347 | .long sys_sendmmsg /* 345 */ | 347 | .long sys_sendmmsg /* 345 */ |
| 348 | .long sys_setns | 348 | .long sys_setns |
| 349 | .long sys_set_rt_task_param /* LITMUS^RT 347 */ | ||
| 350 | .long sys_get_rt_task_param | ||
| 351 | .long sys_complete_job | ||
| 352 | .long sys_od_open | ||
| 353 | .long sys_od_close | ||
| 354 | .long sys_litmus_lock /* +5 */ | ||
| 355 | .long sys_litmus_unlock | ||
| 356 | .long sys_query_job_no | ||
| 357 | .long sys_wait_for_job_release | ||
| 358 | .long sys_wait_for_ts_release | ||
| 359 | .long sys_release_ts /* +10 */ | ||
| 360 | .long sys_null_call | ||
| 361 | .long sys_dynamic_group_lock | ||
| 362 | .long sys_dynamic_group_unlock | ||
| @@ -19,7 +19,7 @@ | |||
| 19 | * current->executable is only used by the procfs. This allows a dispatch | 19 | * current->executable is only used by the procfs. This allows a dispatch |
| 20 | * table to check for several different types of binary formats. We keep | 20 | * table to check for several different types of binary formats. We keep |
| 21 | * trying until we recognize the file or we run out of supported binary | 21 | * trying until we recognize the file or we run out of supported binary |
| 22 | * formats. | 22 | * formats. |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| @@ -56,6 +56,8 @@ | |||
| 56 | #include <linux/oom.h> | 56 | #include <linux/oom.h> |
| 57 | #include <linux/compat.h> | 57 | #include <linux/compat.h> |
| 58 | 58 | ||
| 59 | #include <litmus/litmus.h> | ||
| 60 | |||
| 59 | #include <asm/uaccess.h> | 61 | #include <asm/uaccess.h> |
| 60 | #include <asm/mmu_context.h> | 62 | #include <asm/mmu_context.h> |
| 61 | #include <asm/tlb.h> | 63 | #include <asm/tlb.h> |
| @@ -85,7 +87,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert) | |||
| 85 | insert ? list_add(&fmt->lh, &formats) : | 87 | insert ? list_add(&fmt->lh, &formats) : |
| 86 | list_add_tail(&fmt->lh, &formats); | 88 | list_add_tail(&fmt->lh, &formats); |
| 87 | write_unlock(&binfmt_lock); | 89 | write_unlock(&binfmt_lock); |
| 88 | return 0; | 90 | return 0; |
| 89 | } | 91 | } |
| 90 | 92 | ||
| 91 | EXPORT_SYMBOL(__register_binfmt); | 93 | EXPORT_SYMBOL(__register_binfmt); |
| @@ -1170,7 +1172,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
| 1170 | group */ | 1172 | group */ |
| 1171 | 1173 | ||
| 1172 | current->self_exec_id++; | 1174 | current->self_exec_id++; |
| 1173 | 1175 | ||
| 1174 | flush_signal_handlers(current, 0); | 1176 | flush_signal_handlers(current, 0); |
| 1175 | flush_old_files(current->files); | 1177 | flush_old_files(current->files); |
| 1176 | } | 1178 | } |
| @@ -1265,8 +1267,8 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
| 1265 | return res; | 1267 | return res; |
| 1266 | } | 1268 | } |
| 1267 | 1269 | ||
| 1268 | /* | 1270 | /* |
| 1269 | * Fill the binprm structure from the inode. | 1271 | * Fill the binprm structure from the inode. |
| 1270 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes | 1272 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes |
| 1271 | * | 1273 | * |
| 1272 | * This may be called multiple times for binary chains (scripts for example). | 1274 | * This may be called multiple times for binary chains (scripts for example). |
| @@ -1502,6 +1504,7 @@ static int do_execve_common(const char *filename, | |||
| 1502 | goto out_unmark; | 1504 | goto out_unmark; |
| 1503 | 1505 | ||
| 1504 | sched_exec(); | 1506 | sched_exec(); |
| 1507 | litmus_exec(); | ||
| 1505 | 1508 | ||
| 1506 | bprm->file = file; | 1509 | bprm->file = file; |
| 1507 | bprm->filename = filename; | 1510 | bprm->filename = filename; |
diff --git a/fs/inode.c b/fs/inode.c index ec7924696a1..d858c6b9823 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -299,6 +299,8 @@ void inode_init_once(struct inode *inode) | |||
| 299 | #ifdef CONFIG_FSNOTIFY | 299 | #ifdef CONFIG_FSNOTIFY |
| 300 | INIT_HLIST_HEAD(&inode->i_fsnotify_marks); | 300 | INIT_HLIST_HEAD(&inode->i_fsnotify_marks); |
| 301 | #endif | 301 | #endif |
| 302 | INIT_LIST_HEAD(&inode->i_obj_list); | ||
| 303 | mutex_init(&inode->i_obj_mutex); | ||
| 302 | } | 304 | } |
| 303 | EXPORT_SYMBOL(inode_init_once); | 305 | EXPORT_SYMBOL(inode_init_once); |
| 304 | 306 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index cf7bc25928c..78987e9a384 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -17,8 +17,8 @@ | |||
| 17 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute | 17 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute |
| 18 | * upper limit on files-per-process. | 18 | * upper limit on files-per-process. |
| 19 | * | 19 | * |
| 20 | * Some programs (notably those using select()) may have to be | 20 | * Some programs (notably those using select()) may have to be |
| 21 | * recompiled to take full advantage of the new limits.. | 21 | * recompiled to take full advantage of the new limits.. |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | /* Fixed constants first: */ | 24 | /* Fixed constants first: */ |
| @@ -173,7 +173,7 @@ struct inodes_stat_t { | |||
| 173 | #define SEL_EX 4 | 173 | #define SEL_EX 4 |
| 174 | 174 | ||
| 175 | /* public flags for file_system_type */ | 175 | /* public flags for file_system_type */ |
| 176 | #define FS_REQUIRES_DEV 1 | 176 | #define FS_REQUIRES_DEV 1 |
| 177 | #define FS_BINARY_MOUNTDATA 2 | 177 | #define FS_BINARY_MOUNTDATA 2 |
| 178 | #define FS_HAS_SUBTYPE 4 | 178 | #define FS_HAS_SUBTYPE 4 |
| 179 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 179 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
| @@ -481,7 +481,7 @@ struct iattr { | |||
| 481 | */ | 481 | */ |
| 482 | #include <linux/quota.h> | 482 | #include <linux/quota.h> |
| 483 | 483 | ||
| 484 | /** | 484 | /** |
| 485 | * enum positive_aop_returns - aop return codes with specific semantics | 485 | * enum positive_aop_returns - aop return codes with specific semantics |
| 486 | * | 486 | * |
| 487 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | 487 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has |
| @@ -491,7 +491,7 @@ struct iattr { | |||
| 491 | * be a candidate for writeback again in the near | 491 | * be a candidate for writeback again in the near |
| 492 | * future. Other callers must be careful to unlock | 492 | * future. Other callers must be careful to unlock |
| 493 | * the page if they get this return. Returned by | 493 | * the page if they get this return. Returned by |
| 494 | * writepage(); | 494 | * writepage(); |
| 495 | * | 495 | * |
| 496 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | 496 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has |
| 497 | * unlocked it and the page might have been truncated. | 497 | * unlocked it and the page might have been truncated. |
| @@ -735,6 +735,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping) | |||
| 735 | 735 | ||
| 736 | struct posix_acl; | 736 | struct posix_acl; |
| 737 | #define ACL_NOT_CACHED ((void *)(-1)) | 737 | #define ACL_NOT_CACHED ((void *)(-1)) |
| 738 | struct inode_obj_id_table; | ||
| 738 | 739 | ||
| 739 | #define IOP_FASTPERM 0x0001 | 740 | #define IOP_FASTPERM 0x0001 |
| 740 | #define IOP_LOOKUP 0x0002 | 741 | #define IOP_LOOKUP 0x0002 |
| @@ -1048,10 +1049,10 @@ static inline int file_check_writeable(struct file *filp) | |||
| 1048 | 1049 | ||
| 1049 | #define MAX_NON_LFS ((1UL<<31) - 1) | 1050 | #define MAX_NON_LFS ((1UL<<31) - 1) |
| 1050 | 1051 | ||
| 1051 | /* Page cache limit. The filesystems should put that into their s_maxbytes | 1052 | /* Page cache limit. The filesystems should put that into their s_maxbytes |
| 1052 | limits, otherwise bad things can happen in VM. */ | 1053 | limits, otherwise bad things can happen in VM. */ |
| 1053 | #if BITS_PER_LONG==32 | 1054 | #if BITS_PER_LONG==32 |
| 1054 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | 1055 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) |
| 1055 | #elif BITS_PER_LONG==64 | 1056 | #elif BITS_PER_LONG==64 |
| 1056 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL | 1057 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL |
| 1057 | #endif | 1058 | #endif |
| @@ -2282,7 +2283,7 @@ extern void free_write_pipe(struct file *); | |||
| 2282 | 2283 | ||
| 2283 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); | 2284 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); |
| 2284 | extern struct file * open_exec(const char *); | 2285 | extern struct file * open_exec(const char *); |
| 2285 | 2286 | ||
| 2286 | /* fs/dcache.c -- generic fs support functions */ | 2287 | /* fs/dcache.c -- generic fs support functions */ |
| 2287 | extern int is_subdir(struct dentry *, struct dentry *); | 2288 | extern int is_subdir(struct dentry *, struct dentry *); |
| 2288 | extern int path_is_under(struct path *, struct path *); | 2289 | extern int path_is_under(struct path *, struct path *); |
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f743883f769..ef18786a7b4 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | #include <linux/ftrace_irq.h> | 6 | #include <linux/ftrace_irq.h> |
| 7 | #include <asm/hardirq.h> | 7 | #include <asm/hardirq.h> |
| 8 | 8 | ||
| 9 | #include <litmus/trace_irq.h> | ||
| 10 | |||
| 9 | /* | 11 | /* |
| 10 | * We put the hardirq and softirq counter into the preemption | 12 | * We put the hardirq and softirq counter into the preemption |
| 11 | * counter. The bitmask has the following meaning: | 13 | * counter. The bitmask has the following meaning: |
| @@ -186,6 +188,7 @@ extern void rcu_nmi_exit(void); | |||
| 186 | account_system_vtime(current); \ | 188 | account_system_vtime(current); \ |
| 187 | add_preempt_count(HARDIRQ_OFFSET); \ | 189 | add_preempt_count(HARDIRQ_OFFSET); \ |
| 188 | trace_hardirq_enter(); \ | 190 | trace_hardirq_enter(); \ |
| 191 | ft_irq_fired(); \ | ||
| 189 | } while (0) | 192 | } while (0) |
| 190 | 193 | ||
| 191 | /* | 194 | /* |
| @@ -216,6 +219,7 @@ extern void irq_exit(void); | |||
| 216 | lockdep_off(); \ | 219 | lockdep_off(); \ |
| 217 | rcu_nmi_enter(); \ | 220 | rcu_nmi_enter(); \ |
| 218 | trace_hardirq_enter(); \ | 221 | trace_hardirq_enter(); \ |
| 222 | ft_irq_fired(); \ | ||
| 219 | } while (0) | 223 | } while (0) |
| 220 | 224 | ||
| 221 | #define nmi_exit() \ | 225 | #define nmi_exit() \ |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0dc30c9f1..d91bba539ca 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
| @@ -174,6 +174,7 @@ enum hrtimer_base_type { | |||
| 174 | * @nr_hangs: Total number of hrtimer interrupt hangs | 174 | * @nr_hangs: Total number of hrtimer interrupt hangs |
| 175 | * @max_hang_time: Maximum time spent in hrtimer_interrupt | 175 | * @max_hang_time: Maximum time spent in hrtimer_interrupt |
| 176 | * @clock_base: array of clock bases for this cpu | 176 | * @clock_base: array of clock bases for this cpu |
| 177 | * @to_pull: LITMUS^RT list of timers to be pulled on this cpu | ||
| 177 | */ | 178 | */ |
| 178 | struct hrtimer_cpu_base { | 179 | struct hrtimer_cpu_base { |
| 179 | raw_spinlock_t lock; | 180 | raw_spinlock_t lock; |
| @@ -188,8 +189,32 @@ struct hrtimer_cpu_base { | |||
| 188 | ktime_t max_hang_time; | 189 | ktime_t max_hang_time; |
| 189 | #endif | 190 | #endif |
| 190 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | 191 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; |
| 192 | struct list_head to_pull; | ||
| 191 | }; | 193 | }; |
| 192 | 194 | ||
| 195 | #ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS | ||
| 196 | |||
| 197 | #define HRTIMER_START_ON_INACTIVE 0 | ||
| 198 | #define HRTIMER_START_ON_QUEUED 1 | ||
| 199 | |||
| 200 | /* | ||
| 201 | * struct hrtimer_start_on_info - save timer info on remote cpu | ||
| 202 | * @list: list of hrtimer_start_on_info on remote cpu (to_pull) | ||
| 203 | * @timer: timer to be triggered on remote cpu | ||
| 204 | * @time: time event | ||
| 205 | * @mode: timer mode | ||
| 206 | * @state: activity flag | ||
| 207 | */ | ||
| 208 | struct hrtimer_start_on_info { | ||
| 209 | struct list_head list; | ||
| 210 | struct hrtimer *timer; | ||
| 211 | ktime_t time; | ||
| 212 | enum hrtimer_mode mode; | ||
| 213 | atomic_t state; | ||
| 214 | }; | ||
| 215 | |||
| 216 | #endif | ||
| 217 | |||
| 193 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) | 218 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) |
| 194 | { | 219 | { |
| 195 | timer->node.expires = time; | 220 | timer->node.expires = time; |
| @@ -355,6 +380,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
| 355 | unsigned long delta_ns, | 380 | unsigned long delta_ns, |
| 356 | const enum hrtimer_mode mode, int wakeup); | 381 | const enum hrtimer_mode mode, int wakeup); |
| 357 | 382 | ||
| 383 | #ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS | ||
| 384 | extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info); | ||
| 385 | extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info, | ||
| 386 | struct hrtimer *timer, ktime_t time, | ||
| 387 | const enum hrtimer_mode mode); | ||
| 388 | #endif | ||
| 389 | |||
| 358 | extern int hrtimer_cancel(struct hrtimer *timer); | 390 | extern int hrtimer_cancel(struct hrtimer *timer); |
| 359 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); | 391 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); |
| 360 | 392 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 5bb4dd2e4c5..096834c7c63 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #define SCHED_BATCH 3 | 39 | #define SCHED_BATCH 3 |
| 40 | /* SCHED_ISO: reserved but not implemented yet */ | 40 | /* SCHED_ISO: reserved but not implemented yet */ |
| 41 | #define SCHED_IDLE 5 | 41 | #define SCHED_IDLE 5 |
| 42 | #define SCHED_LITMUS 6 | ||
| 42 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ | 43 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ |
| 43 | #define SCHED_RESET_ON_FORK 0x40000000 | 44 | #define SCHED_RESET_ON_FORK 0x40000000 |
| 44 | 45 | ||
| @@ -93,6 +94,10 @@ struct sched_param { | |||
| 93 | 94 | ||
| 94 | #include <asm/processor.h> | 95 | #include <asm/processor.h> |
| 95 | 96 | ||
| 97 | #include <litmus/rt_param.h> | ||
| 98 | #include <litmus/preempt.h> | ||
| 99 | #include <litmus/fdso.h> | ||
| 100 | |||
| 96 | struct exec_domain; | 101 | struct exec_domain; |
| 97 | struct futex_pi_state; | 102 | struct futex_pi_state; |
| 98 | struct robust_list_head; | 103 | struct robust_list_head; |
| @@ -1209,6 +1214,7 @@ struct sched_rt_entity { | |||
| 1209 | }; | 1214 | }; |
| 1210 | 1215 | ||
| 1211 | struct rcu_node; | 1216 | struct rcu_node; |
| 1217 | struct od_table_entry; | ||
| 1212 | 1218 | ||
| 1213 | enum perf_event_task_context { | 1219 | enum perf_event_task_context { |
| 1214 | perf_invalid_context = -1, | 1220 | perf_invalid_context = -1, |
| @@ -1313,9 +1319,9 @@ struct task_struct { | |||
| 1313 | unsigned long stack_canary; | 1319 | unsigned long stack_canary; |
| 1314 | #endif | 1320 | #endif |
| 1315 | 1321 | ||
| 1316 | /* | 1322 | /* |
| 1317 | * pointers to (original) parent process, youngest child, younger sibling, | 1323 | * pointers to (original) parent process, youngest child, younger sibling, |
| 1318 | * older sibling, respectively. (p->father can be replaced with | 1324 | * older sibling, respectively. (p->father can be replaced with |
| 1319 | * p->real_parent->pid) | 1325 | * p->real_parent->pid) |
| 1320 | */ | 1326 | */ |
| 1321 | struct task_struct *real_parent; /* real parent process */ | 1327 | struct task_struct *real_parent; /* real parent process */ |
| @@ -1525,6 +1531,15 @@ struct task_struct { | |||
| 1525 | int make_it_fail; | 1531 | int make_it_fail; |
| 1526 | #endif | 1532 | #endif |
| 1527 | struct prop_local_single dirties; | 1533 | struct prop_local_single dirties; |
| 1534 | |||
| 1535 | /* LITMUS RT parameters and state */ | ||
| 1536 | struct rt_param rt_param; | ||
| 1537 | |||
| 1538 | /* references to PI semaphores, etc. */ | ||
| 1539 | struct od_table_entry *od_table; | ||
| 1540 | |||
| 1541 | resource_mask_t resources; | ||
| 1542 | |||
| 1528 | #ifdef CONFIG_LATENCYTOP | 1543 | #ifdef CONFIG_LATENCYTOP |
| 1529 | int latency_record_count; | 1544 | int latency_record_count; |
| 1530 | struct latency_record latency_record[LT_SAVECOUNT]; | 1545 | struct latency_record latency_record[LT_SAVECOUNT]; |
| @@ -2464,6 +2479,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) | |||
| 2464 | static inline void set_tsk_need_resched(struct task_struct *tsk) | 2479 | static inline void set_tsk_need_resched(struct task_struct *tsk) |
| 2465 | { | 2480 | { |
| 2466 | set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); | 2481 | set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); |
| 2482 | sched_state_will_schedule(tsk); | ||
| 2467 | } | 2483 | } |
| 2468 | 2484 | ||
| 2469 | static inline void clear_tsk_need_resched(struct task_struct *tsk) | 2485 | static inline void clear_tsk_need_resched(struct task_struct *tsk) |
diff --git a/include/linux/smp.h b/include/linux/smp.h index 8cc38d3bab0..53b1beef27a 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
| @@ -82,6 +82,11 @@ int smp_call_function_any(const struct cpumask *mask, | |||
| 82 | smp_call_func_t func, void *info, int wait); | 82 | smp_call_func_t func, void *info, int wait); |
| 83 | 83 | ||
| 84 | /* | 84 | /* |
| 85 | * sends a 'pull timer' event to a remote CPU | ||
| 86 | */ | ||
| 87 | extern void smp_send_pull_timers(int cpu); | ||
| 88 | |||
| 89 | /* | ||
| 85 | * Generic and arch helpers | 90 | * Generic and arch helpers |
| 86 | */ | 91 | */ |
| 87 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | 92 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS |
diff --git a/include/linux/tick.h b/include/linux/tick.h index b232ccc0ee2..1e29bd5b18a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
| @@ -74,6 +74,11 @@ extern int tick_is_oneshot_available(void); | |||
| 74 | extern struct tick_device *tick_get_device(int cpu); | 74 | extern struct tick_device *tick_get_device(int cpu); |
| 75 | 75 | ||
| 76 | # ifdef CONFIG_HIGH_RES_TIMERS | 76 | # ifdef CONFIG_HIGH_RES_TIMERS |
| 77 | /* LITMUS^RT tick alignment */ | ||
| 78 | #define LINUX_DEFAULT_TICKS 0 | ||
| 79 | #define LITMUS_ALIGNED_TICKS 1 | ||
| 80 | #define LITMUS_STAGGERED_TICKS 2 | ||
| 81 | |||
| 77 | extern int tick_init_highres(void); | 82 | extern int tick_init_highres(void); |
| 78 | extern int tick_program_event(ktime_t expires, int force); | 83 | extern int tick_program_event(ktime_t expires, int force); |
| 79 | extern void tick_setup_sched_timer(void); | 84 | extern void tick_setup_sched_timer(void); |
diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h new file mode 100644 index 00000000000..ca2e442eb54 --- /dev/null +++ b/include/litmus/affinity.h | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | #ifndef __LITMUS_AFFINITY_H | ||
| 2 | #define __LITMUS_AFFINITY_H | ||
| 3 | |||
| 4 | #include <linux/cpumask.h> | ||
| 5 | |||
| 6 | /* | ||
| 7 | L1 (instr) = depth 0 | ||
| 8 | L1 (data) = depth 1 | ||
| 9 | L2 = depth 2 | ||
| 10 | L3 = depth 3 | ||
| 11 | */ | ||
| 12 | #define NUM_CACHE_LEVELS 4 | ||
| 13 | |||
| 14 | struct neighborhood | ||
| 15 | { | ||
| 16 | unsigned int size[NUM_CACHE_LEVELS]; | ||
| 17 | cpumask_var_t neighbors[NUM_CACHE_LEVELS]; | ||
| 18 | }; | ||
| 19 | |||
| 20 | /* topology info is stored redundently in a big array for fast lookups */ | ||
| 21 | extern struct neighborhood neigh_info[NR_CPUS]; | ||
| 22 | |||
| 23 | void init_topology(void); /* called by Litmus module's _init_litmus() */ | ||
| 24 | |||
| 25 | /* Works like: | ||
| 26 | void get_nearest_available_cpu( | ||
| 27 | cpu_entry_t **nearest, | ||
| 28 | cpu_entry_t *start, | ||
| 29 | cpu_entry_t *entries, | ||
| 30 | int release_master) | ||
| 31 | |||
| 32 | Set release_master = NO_CPU for no Release Master. | ||
| 33 | |||
| 34 | We use a macro here to exploit the fact that C-EDF and G-EDF | ||
| 35 | have similar structures for their cpu_entry_t structs, even though | ||
| 36 | they do not share a common base-struct. The macro allows us to | ||
| 37 | avoid code duplication. | ||
| 38 | |||
| 39 | TODO: Factor out the job-to-processor linking from C/G-EDF into | ||
| 40 | a reusable "processor mapping". (See B.B.'s RTSS'09 paper & | ||
| 41 | dissertation.) | ||
| 42 | */ | ||
| 43 | #define get_nearest_available_cpu(nearest, start, entries, release_master) \ | ||
| 44 | { \ | ||
| 45 | (nearest) = NULL; \ | ||
| 46 | if (!(start)->linked) { \ | ||
| 47 | (nearest) = (start); \ | ||
| 48 | } else { \ | ||
| 49 | int __level; \ | ||
| 50 | int __cpu; \ | ||
| 51 | int __release_master = ((release_master) == NO_CPU) ? -1 : (release_master); \ | ||
| 52 | struct neighborhood *__neighbors = &neigh_info[(start)->cpu]; \ | ||
| 53 | \ | ||
| 54 | for (__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) { \ | ||
| 55 | if (__neighbors->size[__level] > 1) { \ | ||
| 56 | for_each_cpu(__cpu, __neighbors->neighbors[__level]) { \ | ||
| 57 | if (__cpu != __release_master) { \ | ||
| 58 | cpu_entry_t *__entry = &per_cpu((entries), __cpu); \ | ||
| 59 | if (!__entry->linked) { \ | ||
| 60 | (nearest) = __entry; \ | ||
| 61 | break; \ | ||
| 62 | } \ | ||
| 63 | } \ | ||
| 64 | } \ | ||
| 65 | } else if (__neighbors->size[__level] == 0) { \ | ||
| 66 | break; \ | ||
| 67 | } \ | ||
| 68 | } \ | ||
| 69 | } \ | ||
| 70 | \ | ||
| 71 | if ((nearest)) { \ | ||
| 72 | TRACE("P%d is closest available CPU to P%d\n", \ | ||
| 73 | (nearest)->cpu, (start)->cpu); \ | ||
| 74 | } else { \ | ||
| 75 | TRACE("Could not find an available CPU close to P%d\n", \ | ||
| 76 | (start)->cpu); \ | ||
| 77 | } \ | ||
| 78 | } | ||
| 79 | |||
| 80 | #endif | ||
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h new file mode 100644 index 00000000000..cf4864a498d --- /dev/null +++ b/include/litmus/bheap.h | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | /* bheaps.h -- Binomial Heaps | ||
| 2 | * | ||
| 3 | * (c) 2008, 2009 Bjoern Brandenburg | ||
| 4 | */ | ||
| 5 | |||
| 6 | #ifndef BHEAP_H | ||
| 7 | #define BHEAP_H | ||
| 8 | |||
| 9 | #define NOT_IN_HEAP UINT_MAX | ||
| 10 | |||
| 11 | struct bheap_node { | ||
| 12 | struct bheap_node* parent; | ||
| 13 | struct bheap_node* next; | ||
| 14 | struct bheap_node* child; | ||
| 15 | |||
| 16 | unsigned int degree; | ||
| 17 | void* value; | ||
| 18 | struct bheap_node** ref; | ||
| 19 | }; | ||
| 20 | |||
| 21 | struct bheap { | ||
| 22 | struct bheap_node* head; | ||
| 23 | /* We cache the minimum of the heap. | ||
| 24 | * This speeds up repeated peek operations. | ||
| 25 | */ | ||
| 26 | struct bheap_node* min; | ||
| 27 | }; | ||
| 28 | |||
| 29 | typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b); | ||
| 30 | |||
| 31 | void bheap_init(struct bheap* heap); | ||
| 32 | void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value); | ||
| 33 | |||
| 34 | static inline int bheap_node_in_heap(struct bheap_node* h) | ||
| 35 | { | ||
| 36 | return h->degree != NOT_IN_HEAP; | ||
| 37 | } | ||
| 38 | |||
| 39 | static inline int bheap_empty(struct bheap* heap) | ||
| 40 | { | ||
| 41 | return heap->head == NULL && heap->min == NULL; | ||
| 42 | } | ||
| 43 | |||
| 44 | /* insert (and reinitialize) a node into the heap */ | ||
| 45 | void bheap_insert(bheap_prio_t higher_prio, | ||
| 46 | struct bheap* heap, | ||
| 47 | struct bheap_node* node); | ||
| 48 | |||
| 49 | /* merge addition into target */ | ||
| 50 | void bheap_union(bheap_prio_t higher_prio, | ||
| 51 | struct bheap* target, | ||
| 52 | struct bheap* addition); | ||
| 53 | |||
| 54 | struct bheap_node* bheap_peek(bheap_prio_t higher_prio, | ||
| 55 | struct bheap* heap); | ||
| 56 | |||
| 57 | struct bheap_node* bheap_take(bheap_prio_t higher_prio, | ||
| 58 | struct bheap* heap); | ||
| 59 | |||
| 60 | void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap); | ||
| 61 | int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node); | ||
| 62 | |||
| 63 | void bheap_delete(bheap_prio_t higher_prio, | ||
| 64 | struct bheap* heap, | ||
| 65 | struct bheap_node* node); | ||
| 66 | |||
| 67 | /* allocate from memcache */ | ||
| 68 | struct bheap_node* bheap_node_alloc(int gfp_flags); | ||
| 69 | void bheap_node_free(struct bheap_node* hn); | ||
| 70 | |||
| 71 | /* allocate a heap node for value and insert into the heap */ | ||
| 72 | int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, | ||
| 73 | void* value, int gfp_flags); | ||
| 74 | |||
| 75 | void* bheap_take_del(bheap_prio_t higher_prio, | ||
| 76 | struct bheap* heap); | ||
| 77 | #endif | ||
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h new file mode 100644 index 00000000000..901a30a3e29 --- /dev/null +++ b/include/litmus/binheap.h | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | #ifndef LITMUS_BINARY_HEAP_H | ||
| 2 | #define LITMUS_BINARY_HEAP_H | ||
| 3 | |||
| 4 | #include <linux/kernel.h> | ||
| 5 | |||
| 6 | /** | ||
| 7 | * Simple binary heap with add, arbitrary delete, delete_root, and top | ||
| 8 | * operations. | ||
| 9 | * | ||
| 10 | * Style meant to conform with list.h. | ||
| 11 | * | ||
| 12 | * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial | ||
| 13 | * heap may be overkill (and perhaps not general enough) for some applications. | ||
| 14 | * | ||
| 15 | * Note: In order to make node swaps fast, a node inserted with a data pointer | ||
| 16 | * may not always hold said data pointer. This is similar to the binomial heap | ||
| 17 | * implementation. This does make node deletion tricky since we have to | ||
| 18 | * (1) locate the node that holds the data pointer to delete, and (2) the | ||
| 19 | * node that was originally inserted with said data pointer. These have to be | ||
| 20 | * coalesced into a single node before removal (see usage of | ||
| 21 | * __binheap_safe_swap()). We have to track node references to accomplish this. | ||
| 22 | */ | ||
| 23 | |||
| 24 | struct binheap_node { | ||
| 25 | void *data; | ||
| 26 | struct binheap_node *parent; | ||
| 27 | struct binheap_node *left; | ||
| 28 | struct binheap_node *right; | ||
| 29 | |||
| 30 | /* pointer to binheap_node that holds *data for which this binheap_node | ||
| 31 | * was originally inserted. (*data "owns" this node) | ||
| 32 | */ | ||
| 33 | struct binheap_node *ref; | ||
| 34 | struct binheap_node **ref_ptr; | ||
| 35 | }; | ||
| 36 | |||
| 37 | /** | ||
| 38 | * Signature of compator function. Assumed 'less-than' (min-heap). | ||
| 39 | * Pass in 'greater-than' for max-heap. | ||
| 40 | * | ||
| 41 | * TODO: Consider macro-based implementation that allows comparator to be | ||
| 42 | * inlined (similar to Linux red/black tree) for greater efficiency. | ||
| 43 | */ | ||
| 44 | typedef int (*binheap_order_t)(struct binheap_node *a, | ||
| 45 | struct binheap_node *b); | ||
| 46 | |||
| 47 | |||
| 48 | struct binheap { | ||
| 49 | struct binheap_node *root; | ||
| 50 | |||
| 51 | /* pointer to node to take next inserted child */ | ||
| 52 | struct binheap_node *next; | ||
| 53 | |||
| 54 | /* pointer to last node in complete binary tree */ | ||
| 55 | struct binheap_node *last; | ||
| 56 | |||
| 57 | /* comparator function pointer */ | ||
| 58 | binheap_order_t compare; | ||
| 59 | }; | ||
| 60 | |||
| 61 | |||
| 62 | /* Initialized heap nodes not in a heap have parent | ||
| 63 | * set to BINHEAP_POISON. | ||
| 64 | */ | ||
| 65 | #define BINHEAP_POISON ((void*)(0xdeadbeef)) | ||
| 66 | |||
| 67 | |||
| 68 | /** | ||
| 69 | * binheap_entry - get the struct for this heap node. | ||
| 70 | * Only valid when called upon heap nodes other than the root handle. | ||
| 71 | * @ptr: the heap node. | ||
| 72 | * @type: the type of struct pointed to by binheap_node::data. | ||
| 73 | * @member: unused. | ||
| 74 | */ | ||
| 75 | #define binheap_entry(ptr, type, member) \ | ||
| 76 | ((type *)((ptr)->data)) | ||
| 77 | |||
| 78 | /** | ||
| 79 | * binheap_node_container - get the struct that contains this node. | ||
| 80 | * Only valid when called upon heap nodes other than the root handle. | ||
| 81 | * @ptr: the heap node. | ||
| 82 | * @type: the type of struct the node is embedded in. | ||
| 83 | * @member: the name of the binheap_struct within the (type) struct. | ||
| 84 | */ | ||
| 85 | #define binheap_node_container(ptr, type, member) \ | ||
| 86 | container_of((ptr), type, member) | ||
| 87 | |||
| 88 | /** | ||
| 89 | * binheap_top_entry - get the struct for the node at the top of the heap. | ||
| 90 | * Only valid when called upon the heap handle node. | ||
| 91 | * @ptr: the special heap-handle node. | ||
| 92 | * @type: the type of the struct the head is embedded in. | ||
| 93 | * @member: the name of the binheap_struct within the (type) struct. | ||
| 94 | */ | ||
| 95 | #define binheap_top_entry(ptr, type, member) \ | ||
| 96 | binheap_entry((ptr)->root, type, member) | ||
| 97 | |||
| 98 | /** | ||
| 99 | * binheap_delete_root - remove the root element from the heap. | ||
| 100 | * @handle: handle to the heap. | ||
| 101 | * @type: the type of the struct the head is embedded in. | ||
| 102 | * @member: the name of the binheap_struct within the (type) struct. | ||
| 103 | */ | ||
| 104 | #define binheap_delete_root(handle, type, member) \ | ||
| 105 | __binheap_delete_root((handle), &((type *)((handle)->root->data))->member) | ||
| 106 | |||
| 107 | /** | ||
| 108 | * binheap_delete - remove an arbitrary element from the heap. | ||
| 109 | * @to_delete: pointer to node to be removed. | ||
| 110 | * @handle: handle to the heap. | ||
| 111 | */ | ||
| 112 | #define binheap_delete(to_delete, handle) \ | ||
| 113 | __binheap_delete((to_delete), (handle)) | ||
| 114 | |||
| 115 | /** | ||
| 116 | * binheap_add - insert an element to the heap | ||
| 117 | * new_node: node to add. | ||
| 118 | * @handle: handle to the heap. | ||
| 119 | * @type: the type of the struct the head is embedded in. | ||
| 120 | * @member: the name of the binheap_struct within the (type) struct. | ||
| 121 | */ | ||
| 122 | #define binheap_add(new_node, handle, type, member) \ | ||
| 123 | __binheap_add((new_node), (handle), container_of((new_node), type, member)) | ||
| 124 | |||
| 125 | /** | ||
| 126 | * binheap_decrease - re-eval the position of a node (based upon its | ||
| 127 | * original data pointer). | ||
| 128 | * @handle: handle to the heap. | ||
| 129 | * @orig_node: node that was associated with the data pointer | ||
| 130 | * (whose value has changed) when said pointer was | ||
| 131 | * added to the heap. | ||
| 132 | */ | ||
| 133 | #define binheap_decrease(orig_node, handle) \ | ||
| 134 | __binheap_decrease((orig_node), (handle)) | ||
| 135 | |||
| 136 | #define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL} | ||
| 137 | |||
| 138 | #define BINHEAP_NODE(name) \ | ||
| 139 | struct binheap_node name = BINHEAP_NODE_INIT() | ||
| 140 | |||
| 141 | |||
| 142 | static inline void INIT_BINHEAP_NODE(struct binheap_node *n) | ||
| 143 | { | ||
| 144 | n->data = NULL; | ||
| 145 | n->parent = BINHEAP_POISON; | ||
| 146 | n->left = NULL; | ||
| 147 | n->right = NULL; | ||
| 148 | n->ref = NULL; | ||
| 149 | n->ref_ptr = NULL; | ||
| 150 | } | ||
| 151 | |||
| 152 | static inline void INIT_BINHEAP_HANDLE(struct binheap *handle, | ||
| 153 | binheap_order_t compare) | ||
| 154 | { | ||
| 155 | handle->root = NULL; | ||
| 156 | handle->next = NULL; | ||
| 157 | handle->last = NULL; | ||
| 158 | handle->compare = compare; | ||
| 159 | } | ||
| 160 | |||
| 161 | /* Returns true if binheap is empty. */ | ||
| 162 | static inline int binheap_empty(struct binheap *handle) | ||
| 163 | { | ||
| 164 | return(handle->root == NULL); | ||
| 165 | } | ||
| 166 | |||
| 167 | /* Returns true if binheap node is in a heap. */ | ||
| 168 | static inline int binheap_is_in_heap(struct binheap_node *node) | ||
| 169 | { | ||
| 170 | return (node->parent != BINHEAP_POISON); | ||
| 171 | } | ||
| 172 | |||
| 173 | /* Returns true if binheap node is in given heap. */ | ||
| 174 | int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap); | ||
| 175 | |||
| 176 | /* Add a node to a heap */ | ||
| 177 | void __binheap_add(struct binheap_node *new_node, | ||
| 178 | struct binheap *handle, | ||
| 179 | void *data); | ||
| 180 | |||
| 181 | /** | ||
| 182 | * Removes the root node from the heap. The node is removed after coalescing | ||
| 183 | * the binheap_node with its original data pointer at the root of the tree. | ||
| 184 | * | ||
| 185 | * The 'last' node in the tree is then swapped up to the root and bubbled | ||
| 186 | * down. | ||
| 187 | */ | ||
| 188 | void __binheap_delete_root(struct binheap *handle, | ||
| 189 | struct binheap_node *container); | ||
| 190 | |||
| 191 | /** | ||
| 192 | * Delete an arbitrary node. Bubble node to delete up to the root, | ||
| 193 | * and then delete to root. | ||
| 194 | */ | ||
| 195 | void __binheap_delete(struct binheap_node *node_to_delete, | ||
| 196 | struct binheap *handle); | ||
| 197 | |||
| 198 | /** | ||
| 199 | * Bubble up a node whose pointer has decreased in value. | ||
| 200 | */ | ||
| 201 | void __binheap_decrease(struct binheap_node *orig_node, | ||
| 202 | struct binheap *handle); | ||
| 203 | |||
| 204 | |||
| 205 | #endif | ||
| 206 | |||
diff --git a/include/litmus/budget.h b/include/litmus/budget.h new file mode 100644 index 00000000000..33344ee8d5f --- /dev/null +++ b/include/litmus/budget.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | #ifndef _LITMUS_BUDGET_H_ | ||
| 2 | #define _LITMUS_BUDGET_H_ | ||
| 3 | |||
| 4 | /* Update the per-processor enforcement timer (arm/reproram/cancel) for | ||
| 5 | * the next task. */ | ||
| 6 | void update_enforcement_timer(struct task_struct* t); | ||
| 7 | |||
| 8 | inline static int budget_exhausted(struct task_struct* t) | ||
| 9 | { | ||
| 10 | return get_exec_time(t) >= get_exec_cost(t); | ||
| 11 | } | ||
| 12 | |||
| 13 | inline static lt_t budget_remaining(struct task_struct* t) | ||
| 14 | { | ||
| 15 | if (!budget_exhausted(t)) | ||
| 16 | return get_exec_cost(t) - get_exec_time(t); | ||
| 17 | else | ||
| 18 | /* avoid overflow */ | ||
| 19 | return 0; | ||
| 20 | } | ||
| 21 | |||
| 22 | #define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) | ||
| 23 | |||
| 24 | #define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \ | ||
| 25 | == PRECISE_ENFORCEMENT) | ||
| 26 | |||
| 27 | static inline int requeue_preempted_job(struct task_struct* t) | ||
| 28 | { | ||
| 29 | /* Add task to ready queue only if not subject to budget enforcement or | ||
| 30 | * if the job has budget remaining. t may be NULL. | ||
| 31 | */ | ||
| 32 | return t && (!budget_exhausted(t) || !budget_enforced(t)); | ||
| 33 | } | ||
| 34 | |||
| 35 | #endif | ||
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h new file mode 100644 index 00000000000..0c18dcb15e6 --- /dev/null +++ b/include/litmus/clustered.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | #ifndef CLUSTERED_H | ||
| 2 | #define CLUSTERED_H | ||
| 3 | |||
| 4 | /* Which cache level should be used to group CPUs into clusters? | ||
| 5 | * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under | ||
| 6 | * global scheduling). | ||
| 7 | */ | ||
| 8 | enum cache_level { | ||
| 9 | GLOBAL_CLUSTER = 0, | ||
| 10 | L1_CLUSTER = 1, | ||
| 11 | L2_CLUSTER = 2, | ||
| 12 | L3_CLUSTER = 3 | ||
| 13 | }; | ||
| 14 | |||
| 15 | int parse_cache_level(const char *str, enum cache_level *level); | ||
| 16 | const char* cache_level_name(enum cache_level level); | ||
| 17 | |||
| 18 | /* expose a cache level in a /proc dir */ | ||
| 19 | struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent, | ||
| 20 | enum cache_level* level); | ||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | struct scheduling_cluster { | ||
| 25 | unsigned int id; | ||
| 26 | /* list of CPUs that are part of this cluster */ | ||
| 27 | struct list_head cpus; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct cluster_cpu { | ||
| 31 | unsigned int id; /* which CPU is this? */ | ||
| 32 | struct list_head cluster_list; /* List of the CPUs in this cluster. */ | ||
| 33 | struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */ | ||
| 34 | }; | ||
| 35 | |||
| 36 | int get_cluster_size(enum cache_level level); | ||
| 37 | |||
| 38 | int assign_cpus_to_clusters(enum cache_level level, | ||
| 39 | struct scheduling_cluster* clusters[], | ||
| 40 | unsigned int num_clusters, | ||
| 41 | struct cluster_cpu* cpus[], | ||
| 42 | unsigned int num_cpus); | ||
| 43 | |||
| 44 | #endif | ||
diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h new file mode 100644 index 00000000000..928b1dfd1db --- /dev/null +++ b/include/litmus/debug_trace.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | #ifndef LITMUS_DEBUG_TRACE_H | ||
| 2 | #define LITMUS_DEBUG_TRACE_H | ||
| 3 | |||
| 4 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
| 5 | void sched_trace_log_message(const char* fmt, ...); | ||
| 6 | void dump_trace_buffer(int max); | ||
| 7 | #else | ||
| 8 | |||
| 9 | #define sched_trace_log_message(fmt, ...) | ||
| 10 | |||
| 11 | #endif | ||
| 12 | |||
| 13 | extern atomic_t __log_seq_no; | ||
| 14 | |||
| 15 | #ifdef CONFIG_SCHED_DEBUG_TRACE_CALLER | ||
| 16 | #define TRACE_PREFIX "%d P%d [%s@%s:%d]: " | ||
| 17 | #define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \ | ||
| 18 | raw_smp_processor_id(), \ | ||
| 19 | __FUNCTION__, __FILE__, __LINE__ | ||
| 20 | #define STRACE(fmt, args...) \ | ||
| 21 | sched_trace_log_message("%d P%d [%s@%s:%d]: " fmt, \ | ||
| 22 | TRACE_ARGS, ## args) | ||
| 23 | #define STRACE2(fmt, args...) \ | ||
| 24 | sched_trace_log_message("%d P%d [%s@%s:%d]: " fmt, \ | ||
| 25 | TRACE_ARGS, ## args) | ||
| 26 | #else | ||
| 27 | #define TRACE_PREFIX "%d P%d: " | ||
| 28 | #define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \ | ||
| 29 | raw_smp_processor_id() | ||
| 30 | #define STRACE(fmt, args...) \ | ||
| 31 | sched_trace_log_message("%d P%d : " fmt, \ | ||
| 32 | TRACE_ARGS, ## args) | ||
| 33 | #define STRACE2(fmt, args...) \ | ||
| 34 | sched_trace_log_message("%d P%d : " fmt, \ | ||
| 35 | TRACE_ARGS, ## args) | ||
| 36 | #endif | ||
| 37 | |||
| 38 | #define TRACE(fmt, args...) \ | ||
| 39 | sched_trace_log_message(TRACE_PREFIX fmt, \ | ||
| 40 | TRACE_ARGS, ## args) | ||
| 41 | |||
| 42 | #define TRACE_TASK(t, fmt, args...) \ | ||
| 43 | TRACE("(%s/%d:%d) " fmt, \ | ||
| 44 | t ? (t)->comm : "null", \ | ||
| 45 | t ? (t)->pid : 0, \ | ||
| 46 | t ? (t)->rt_param.job_params.job_no : 0, \ | ||
| 47 | ##args) | ||
| 48 | |||
| 49 | #define STRACE_TASK(t, fmt, args...) \ | ||
| 50 | STRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid, \ | ||
| 51 | (t)->rt_param.job_params.job_no, ##args) | ||
| 52 | |||
| 53 | #define TRACE_CUR(fmt, args...) \ | ||
| 54 | TRACE_TASK(current, fmt, ## args) | ||
| 55 | |||
| 56 | |||
| 57 | |||
| 58 | #endif | ||
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h new file mode 100644 index 00000000000..bbaf22ea7f1 --- /dev/null +++ b/include/litmus/edf_common.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | /* | ||
| 2 | * EDF common data structures and utility functions shared by all EDF | ||
| 3 | * based scheduler plugins | ||
| 4 | */ | ||
| 5 | |||
| 6 | /* CLEANUP: Add comments and make it less messy. | ||
| 7 | * | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __UNC_EDF_COMMON_H__ | ||
| 11 | #define __UNC_EDF_COMMON_H__ | ||
| 12 | |||
| 13 | #include <litmus/rt_domain.h> | ||
| 14 | |||
| 15 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
| 16 | release_jobs_t release); | ||
| 17 | |||
| 18 | int edf_higher_prio(struct task_struct* first, | ||
| 19 | struct task_struct* second); | ||
| 20 | |||
| 21 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
| 22 | |||
| 23 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
| 24 | |||
| 25 | #endif | ||
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h new file mode 100644 index 00000000000..85a649e2722 --- /dev/null +++ b/include/litmus/fdso.h | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | /* fdso.h - file descriptor attached shared objects | ||
| 2 | * | ||
| 3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
| 4 | */ | ||
| 5 | |||
| 6 | #ifndef _LINUX_FDSO_H_ | ||
| 7 | #define _LINUX_FDSO_H_ | ||
| 8 | |||
| 9 | #include <linux/list.h> | ||
| 10 | #include <asm/atomic.h> | ||
| 11 | |||
| 12 | #include <linux/fs.h> | ||
| 13 | #include <linux/slab.h> | ||
| 14 | |||
| 15 | #define MAX_OBJECT_DESCRIPTORS 32 | ||
| 16 | |||
| 17 | typedef unsigned int resource_mask_t; | ||
| 18 | |||
| 19 | typedef enum { | ||
| 20 | MIN_OBJ_TYPE = 0, | ||
| 21 | |||
| 22 | FMLP_SEM = 0, | ||
| 23 | SRP_SEM = 1, | ||
| 24 | |||
| 25 | MPCP_SEM = 2, | ||
| 26 | MPCP_VS_SEM = 3, | ||
| 27 | DPCP_SEM = 4, | ||
| 28 | |||
| 29 | PCP_SEM = 5, | ||
| 30 | |||
| 31 | DGL_SEM = 6, | ||
| 32 | |||
| 33 | MAX_OBJ_TYPE = 6 | ||
| 34 | } obj_type_t; | ||
| 35 | |||
| 36 | struct inode_obj_id { | ||
| 37 | struct list_head list; | ||
| 38 | atomic_t count; | ||
| 39 | struct inode* inode; | ||
| 40 | |||
| 41 | obj_type_t type; | ||
| 42 | void* obj; | ||
| 43 | unsigned int id; | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct fdso_ops; | ||
| 47 | |||
| 48 | struct od_table_entry { | ||
| 49 | unsigned int used; | ||
| 50 | |||
| 51 | struct inode_obj_id* obj; | ||
| 52 | const struct fdso_ops* class; | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct fdso_ops { | ||
| 56 | int (*create)(void** obj_ref, obj_type_t type, void* __user); | ||
| 57 | void (*destroy)(obj_type_t type, void*); | ||
| 58 | int (*open) (struct od_table_entry*, void* __user); | ||
| 59 | int (*close) (struct od_table_entry*); | ||
| 60 | }; | ||
| 61 | |||
| 62 | /* translate a userspace supplied od into the raw table entry | ||
| 63 | * returns NULL if od is invalid | ||
| 64 | */ | ||
| 65 | struct od_table_entry* get_entry_for_od(int od); | ||
| 66 | |||
| 67 | /* translate a userspace supplied od into the associated object | ||
| 68 | * returns NULL if od is invalid | ||
| 69 | */ | ||
| 70 | static inline void* od_lookup(int od, obj_type_t type) | ||
| 71 | { | ||
| 72 | struct od_table_entry* e = get_entry_for_od(od); | ||
| 73 | return e && e->obj->type == type ? e->obj->obj : NULL; | ||
| 74 | } | ||
| 75 | |||
| 76 | #define lookup_fmlp_sem(od)((struct fmlp_semaphore*) od_lookup(od, FMLP_SEM)) | ||
| 77 | #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) | ||
| 78 | #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) | ||
| 79 | |||
| 80 | |||
| 81 | #endif | ||
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h new file mode 100644 index 00000000000..6c18277fdfc --- /dev/null +++ b/include/litmus/feather_buffer.h | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | #ifndef _FEATHER_BUFFER_H_ | ||
| 2 | #define _FEATHER_BUFFER_H_ | ||
| 3 | |||
| 4 | /* requires UINT_MAX and memcpy */ | ||
| 5 | |||
| 6 | #define SLOT_FREE 0 | ||
| 7 | #define SLOT_BUSY 1 | ||
| 8 | #define SLOT_READY 2 | ||
| 9 | |||
| 10 | struct ft_buffer { | ||
| 11 | unsigned int slot_count; | ||
| 12 | unsigned int slot_size; | ||
| 13 | |||
| 14 | int free_count; | ||
| 15 | unsigned int write_idx; | ||
| 16 | unsigned int read_idx; | ||
| 17 | |||
| 18 | char* slots; | ||
| 19 | void* buffer_mem; | ||
| 20 | unsigned int failed_writes; | ||
| 21 | }; | ||
| 22 | |||
| 23 | static inline int init_ft_buffer(struct ft_buffer* buf, | ||
| 24 | unsigned int slot_count, | ||
| 25 | unsigned int slot_size, | ||
| 26 | char* slots, | ||
| 27 | void* buffer_mem) | ||
| 28 | { | ||
| 29 | int i = 0; | ||
| 30 | if (!slot_count || UINT_MAX % slot_count != slot_count - 1) { | ||
| 31 | /* The slot count must divide UNIT_MAX + 1 so that when it | ||
| 32 | * wraps around the index correctly points to 0. | ||
| 33 | */ | ||
| 34 | return 0; | ||
| 35 | } else { | ||
| 36 | buf->slot_count = slot_count; | ||
| 37 | buf->slot_size = slot_size; | ||
| 38 | buf->slots = slots; | ||
| 39 | buf->buffer_mem = buffer_mem; | ||
| 40 | buf->free_count = slot_count; | ||
| 41 | buf->write_idx = 0; | ||
| 42 | buf->read_idx = 0; | ||
| 43 | buf->failed_writes = 0; | ||
| 44 | for (i = 0; i < slot_count; i++) | ||
| 45 | buf->slots[i] = SLOT_FREE; | ||
| 46 | return 1; | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr) | ||
| 51 | { | ||
| 52 | int free = fetch_and_dec(&buf->free_count); | ||
| 53 | unsigned int idx; | ||
| 54 | if (free <= 0) { | ||
| 55 | fetch_and_inc(&buf->free_count); | ||
| 56 | *ptr = 0; | ||
| 57 | fetch_and_inc(&buf->failed_writes); | ||
| 58 | return 0; | ||
| 59 | } else { | ||
| 60 | idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count; | ||
| 61 | buf->slots[idx] = SLOT_BUSY; | ||
| 62 | *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size; | ||
| 63 | return 1; | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr) | ||
| 68 | { | ||
| 69 | unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size; | ||
| 70 | buf->slots[idx] = SLOT_READY; | ||
| 71 | } | ||
| 72 | |||
| 73 | |||
| 74 | /* exclusive reader access is assumed */ | ||
| 75 | static inline int ft_buffer_read(struct ft_buffer* buf, void* dest) | ||
| 76 | { | ||
| 77 | unsigned int idx; | ||
| 78 | if (buf->free_count == buf->slot_count) | ||
| 79 | /* nothing available */ | ||
| 80 | return 0; | ||
| 81 | idx = buf->read_idx % buf->slot_count; | ||
| 82 | if (buf->slots[idx] == SLOT_READY) { | ||
| 83 | memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size, | ||
| 84 | buf->slot_size); | ||
| 85 | buf->slots[idx] = SLOT_FREE; | ||
| 86 | buf->read_idx++; | ||
| 87 | fetch_and_inc(&buf->free_count); | ||
| 88 | return 1; | ||
| 89 | } else | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | |||
| 94 | #endif | ||
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h new file mode 100644 index 00000000000..028dfb206fb --- /dev/null +++ b/include/litmus/feather_trace.h | |||
| @@ -0,0 +1,65 @@ | |||
| 1 | #ifndef _FEATHER_TRACE_H_ | ||
| 2 | #define _FEATHER_TRACE_H_ | ||
| 3 | |||
| 4 | #include <asm/atomic.h> | ||
| 5 | |||
| 6 | int ft_enable_event(unsigned long id); | ||
| 7 | int ft_disable_event(unsigned long id); | ||
| 8 | int ft_is_event_enabled(unsigned long id); | ||
| 9 | int ft_disable_all_events(void); | ||
| 10 | |||
| 11 | /* atomic_* funcitons are inline anyway */ | ||
| 12 | static inline int fetch_and_inc(int *val) | ||
| 13 | { | ||
| 14 | return atomic_add_return(1, (atomic_t*) val) - 1; | ||
| 15 | } | ||
| 16 | |||
| 17 | static inline int fetch_and_dec(int *val) | ||
| 18 | { | ||
| 19 | return atomic_sub_return(1, (atomic_t*) val) + 1; | ||
| 20 | } | ||
| 21 | |||
| 22 | /* Don't use rewriting implementation if kernel text pages are read-only. | ||
| 23 | * Ftrace gets around this by using the identity mapping, but that's more | ||
| 24 | * effort that is warrented right now for Feather-Trace. | ||
| 25 | * Eventually, it may make sense to replace Feather-Trace with ftrace. | ||
| 26 | */ | ||
| 27 | #if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA) | ||
| 28 | |||
| 29 | #include <asm/feather_trace.h> | ||
| 30 | |||
| 31 | #else /* !__ARCH_HAS_FEATHER_TRACE */ | ||
| 32 | |||
| 33 | /* provide default implementation */ | ||
| 34 | |||
| 35 | #include <asm/timex.h> /* for get_cycles() */ | ||
| 36 | |||
| 37 | static inline unsigned long long ft_timestamp(void) | ||
| 38 | { | ||
| 39 | return get_cycles(); | ||
| 40 | } | ||
| 41 | |||
| 42 | #define feather_callback | ||
| 43 | |||
| 44 | #define MAX_EVENTS 1024 | ||
| 45 | |||
| 46 | extern int ft_events[MAX_EVENTS]; | ||
| 47 | |||
| 48 | #define ft_event(id, callback) \ | ||
| 49 | if (ft_events[id]) callback(); | ||
| 50 | |||
| 51 | #define ft_event0(id, callback) \ | ||
| 52 | if (ft_events[id]) callback(id); | ||
| 53 | |||
| 54 | #define ft_event1(id, callback, param) \ | ||
| 55 | if (ft_events[id]) callback(id, param); | ||
| 56 | |||
| 57 | #define ft_event2(id, callback, param, param2) \ | ||
| 58 | if (ft_events[id]) callback(id, param, param2); | ||
| 59 | |||
| 60 | #define ft_event3(id, callback, p, p2, p3) \ | ||
| 61 | if (ft_events[id]) callback(id, p, p2, p3); | ||
| 62 | |||
| 63 | #endif /* __ARCH_HAS_FEATHER_TRACE */ | ||
| 64 | |||
| 65 | #endif | ||
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h new file mode 100644 index 00000000000..19356c0fa6c --- /dev/null +++ b/include/litmus/fp_common.h | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | /* Fixed-priority scheduler support. | ||
| 2 | */ | ||
| 3 | |||
| 4 | #ifndef __FP_COMMON_H__ | ||
| 5 | #define __FP_COMMON_H__ | ||
| 6 | |||
| 7 | #include <litmus/rt_domain.h> | ||
| 8 | |||
| 9 | #include <asm/bitops.h> | ||
| 10 | |||
| 11 | |||
| 12 | void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
| 13 | release_jobs_t release); | ||
| 14 | |||
| 15 | int fp_higher_prio(struct task_struct* first, | ||
| 16 | struct task_struct* second); | ||
| 17 | |||
| 18 | int fp_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
| 19 | |||
| 20 | #define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG) | ||
| 21 | |||
| 22 | #if (LITMUS_MAX_PRIORITY % BITS_PER_LONG) | ||
| 23 | #error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG | ||
| 24 | #endif | ||
| 25 | |||
| 26 | /* bitmask-inexed priority queue */ | ||
| 27 | struct fp_prio_queue { | ||
| 28 | unsigned long bitmask[FP_PRIO_BIT_WORDS]; | ||
| 29 | struct bheap queue[LITMUS_MAX_PRIORITY]; | ||
| 30 | }; | ||
| 31 | |||
| 32 | void fp_prio_queue_init(struct fp_prio_queue* q); | ||
| 33 | |||
| 34 | static inline void fpq_set(struct fp_prio_queue* q, unsigned int index) | ||
| 35 | { | ||
| 36 | unsigned long *word = q->bitmask + (index / BITS_PER_LONG); | ||
| 37 | __set_bit(index % BITS_PER_LONG, word); | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index) | ||
| 41 | { | ||
| 42 | unsigned long *word = q->bitmask + (index / BITS_PER_LONG); | ||
| 43 | __clear_bit(index % BITS_PER_LONG, word); | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline unsigned int fpq_find(struct fp_prio_queue* q) | ||
| 47 | { | ||
| 48 | int i; | ||
| 49 | |||
| 50 | /* loop optimizer should unroll this */ | ||
| 51 | for (i = 0; i < FP_PRIO_BIT_WORDS; i++) | ||
| 52 | if (q->bitmask[i]) | ||
| 53 | return __ffs(q->bitmask[i]) + i * BITS_PER_LONG; | ||
| 54 | |||
| 55 | return LITMUS_MAX_PRIORITY; /* nothing found */ | ||
| 56 | } | ||
| 57 | |||
| 58 | static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index) | ||
| 59 | { | ||
| 60 | BUG_ON(index >= LITMUS_MAX_PRIORITY); | ||
| 61 | BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node)); | ||
| 62 | |||
| 63 | fpq_set(q, index); | ||
| 64 | bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node); | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index) | ||
| 68 | { | ||
| 69 | BUG_ON(!is_queued(t)); | ||
| 70 | |||
| 71 | bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node); | ||
| 72 | if (likely(bheap_empty(&q->queue[index]))) | ||
| 73 | fpq_clear(q, index); | ||
| 74 | } | ||
| 75 | |||
| 76 | static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q) | ||
| 77 | { | ||
| 78 | unsigned int idx = fpq_find(q); | ||
| 79 | struct bheap_node* hn; | ||
| 80 | |||
| 81 | if (idx < LITMUS_MAX_PRIORITY) { | ||
| 82 | hn = bheap_peek(fp_ready_order, &q->queue[idx]); | ||
| 83 | return bheap2task(hn); | ||
| 84 | } else | ||
| 85 | return NULL; | ||
| 86 | } | ||
| 87 | |||
| 88 | static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q) | ||
| 89 | { | ||
| 90 | unsigned int idx = fpq_find(q); | ||
| 91 | struct bheap_node* hn; | ||
| 92 | |||
| 93 | if (idx < LITMUS_MAX_PRIORITY) { | ||
| 94 | hn = bheap_take(fp_ready_order, &q->queue[idx]); | ||
| 95 | if (likely(bheap_empty(&q->queue[idx]))) | ||
| 96 | fpq_clear(q, idx); | ||
| 97 | return bheap2task(hn); | ||
| 98 | } else | ||
| 99 | return NULL; | ||
| 100 | } | ||
| 101 | |||
| 102 | int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t); | ||
| 103 | |||
| 104 | |||
| 105 | #endif | ||
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h new file mode 100644 index 00000000000..642de98542c --- /dev/null +++ b/include/litmus/fpmath.h | |||
| @@ -0,0 +1,147 @@ | |||
| 1 | #ifndef __FP_MATH_H__ | ||
| 2 | #define __FP_MATH_H__ | ||
| 3 | |||
| 4 | #include <linux/math64.h> | ||
| 5 | |||
| 6 | #ifndef __KERNEL__ | ||
| 7 | #include <stdint.h> | ||
| 8 | #define abs(x) (((x) < 0) ? -(x) : x) | ||
| 9 | #endif | ||
| 10 | |||
| 11 | // Use 64-bit because we want to track things at the nanosecond scale. | ||
| 12 | // This can lead to very large numbers. | ||
| 13 | typedef int64_t fpbuf_t; | ||
| 14 | typedef struct | ||
| 15 | { | ||
| 16 | fpbuf_t val; | ||
| 17 | } fp_t; | ||
| 18 | |||
| 19 | #define FP_SHIFT 10 | ||
| 20 | #define ROUND_BIT (FP_SHIFT - 1) | ||
| 21 | |||
| 22 | #define _fp(x) ((fp_t) {x}) | ||
| 23 | |||
| 24 | #ifdef __KERNEL__ | ||
| 25 | static const fp_t LITMUS_FP_ZERO = {.val = 0}; | ||
| 26 | static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)}; | ||
| 27 | #endif | ||
| 28 | |||
| 29 | static inline fp_t FP(fpbuf_t x) | ||
| 30 | { | ||
| 31 | return _fp(((fpbuf_t) x) << FP_SHIFT); | ||
| 32 | } | ||
| 33 | |||
| 34 | /* divide two integers to obtain a fixed point value */ | ||
| 35 | static inline fp_t _frac(fpbuf_t a, fpbuf_t b) | ||
| 36 | { | ||
| 37 | return _fp(div64_s64(FP(a).val, (b))); | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline fpbuf_t _point(fp_t x) | ||
| 41 | { | ||
| 42 | return (x.val % (1 << FP_SHIFT)); | ||
| 43 | |||
| 44 | } | ||
| 45 | |||
| 46 | #define fp2str(x) x.val | ||
| 47 | /*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */ | ||
| 48 | #define _FP_ "%ld/1024" | ||
| 49 | |||
| 50 | static inline fpbuf_t _floor(fp_t x) | ||
| 51 | { | ||
| 52 | return x.val >> FP_SHIFT; | ||
| 53 | } | ||
| 54 | |||
| 55 | /* FIXME: negative rounding */ | ||
| 56 | static inline fpbuf_t _round(fp_t x) | ||
| 57 | { | ||
| 58 | return _floor(x) + ((x.val >> ROUND_BIT) & 1); | ||
| 59 | } | ||
| 60 | |||
| 61 | /* multiply two fixed point values */ | ||
| 62 | static inline fp_t _mul(fp_t a, fp_t b) | ||
| 63 | { | ||
| 64 | return _fp((a.val * b.val) >> FP_SHIFT); | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline fp_t _div(fp_t a, fp_t b) | ||
| 68 | { | ||
| 69 | #if !defined(__KERNEL__) && !defined(unlikely) | ||
| 70 | #define unlikely(x) (x) | ||
| 71 | #define DO_UNDEF_UNLIKELY | ||
| 72 | #endif | ||
| 73 | /* try not to overflow */ | ||
| 74 | if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) )) | ||
| 75 | return _fp((a.val / b.val) << FP_SHIFT); | ||
| 76 | else | ||
| 77 | return _fp((a.val << FP_SHIFT) / b.val); | ||
| 78 | #ifdef DO_UNDEF_UNLIKELY | ||
| 79 | #undef unlikely | ||
| 80 | #undef DO_UNDEF_UNLIKELY | ||
| 81 | #endif | ||
| 82 | } | ||
| 83 | |||
| 84 | static inline fp_t _add(fp_t a, fp_t b) | ||
| 85 | { | ||
| 86 | return _fp(a.val + b.val); | ||
| 87 | } | ||
| 88 | |||
| 89 | static inline fp_t _sub(fp_t a, fp_t b) | ||
| 90 | { | ||
| 91 | return _fp(a.val - b.val); | ||
| 92 | } | ||
| 93 | |||
| 94 | static inline fp_t _neg(fp_t x) | ||
| 95 | { | ||
| 96 | return _fp(-x.val); | ||
| 97 | } | ||
| 98 | |||
| 99 | static inline fp_t _abs(fp_t x) | ||
| 100 | { | ||
| 101 | return _fp(abs(x.val)); | ||
| 102 | } | ||
| 103 | |||
| 104 | /* works the same as casting float/double to integer */ | ||
| 105 | static inline fpbuf_t _fp_to_integer(fp_t x) | ||
| 106 | { | ||
| 107 | return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1); | ||
| 108 | } | ||
| 109 | |||
| 110 | static inline fp_t _integer_to_fp(fpbuf_t x) | ||
| 111 | { | ||
| 112 | return _frac(x,1); | ||
| 113 | } | ||
| 114 | |||
| 115 | static inline int _leq(fp_t a, fp_t b) | ||
| 116 | { | ||
| 117 | return a.val <= b.val; | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline int _geq(fp_t a, fp_t b) | ||
| 121 | { | ||
| 122 | return a.val >= b.val; | ||
| 123 | } | ||
| 124 | |||
| 125 | static inline int _lt(fp_t a, fp_t b) | ||
| 126 | { | ||
| 127 | return a.val < b.val; | ||
| 128 | } | ||
| 129 | |||
| 130 | static inline int _gt(fp_t a, fp_t b) | ||
| 131 | { | ||
| 132 | return a.val > b.val; | ||
| 133 | } | ||
| 134 | |||
| 135 | static inline int _eq(fp_t a, fp_t b) | ||
| 136 | { | ||
| 137 | return a.val == b.val; | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline fp_t _max(fp_t a, fp_t b) | ||
| 141 | { | ||
| 142 | if (a.val < b.val) | ||
| 143 | return b; | ||
| 144 | else | ||
| 145 | return a; | ||
| 146 | } | ||
| 147 | #endif | ||
diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h new file mode 100644 index 00000000000..0b959874dd7 --- /dev/null +++ b/include/litmus/ftdev.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | #ifndef _LITMUS_FTDEV_H_ | ||
| 2 | #define _LITMUS_FTDEV_H_ | ||
| 3 | |||
| 4 | #include <litmus/feather_trace.h> | ||
| 5 | #include <litmus/feather_buffer.h> | ||
| 6 | #include <linux/mutex.h> | ||
| 7 | #include <linux/cdev.h> | ||
| 8 | |||
| 9 | #define FTDEV_ENABLE_CMD 0 | ||
| 10 | #define FTDEV_DISABLE_CMD 1 | ||
| 11 | |||
| 12 | struct ftdev; | ||
| 13 | |||
| 14 | /* return 0 if buffer can be opened, otherwise -$REASON */ | ||
| 15 | typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no); | ||
| 16 | /* return 0 on success, otherwise -$REASON */ | ||
| 17 | typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no); | ||
| 18 | typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no); | ||
| 19 | /* Let devices handle writes from userspace. No synchronization provided. */ | ||
| 20 | typedef ssize_t (*ftdev_write_t)(struct ft_buffer* buf, size_t len, const char __user *from); | ||
| 21 | |||
| 22 | struct ftdev_event; | ||
| 23 | |||
| 24 | struct ftdev_minor { | ||
| 25 | struct ft_buffer* buf; | ||
| 26 | unsigned int readers; | ||
| 27 | struct mutex lock; | ||
| 28 | /* FIXME: filter for authorized events */ | ||
| 29 | struct ftdev_event* events; | ||
| 30 | struct device* device; | ||
| 31 | struct ftdev* ftdev; | ||
| 32 | }; | ||
| 33 | |||
| 34 | struct ftdev { | ||
| 35 | dev_t major; | ||
| 36 | struct cdev cdev; | ||
| 37 | struct class* class; | ||
| 38 | const char* name; | ||
| 39 | struct ftdev_minor* minor; | ||
| 40 | unsigned int minor_cnt; | ||
| 41 | ftdev_alloc_t alloc; | ||
| 42 | ftdev_free_t free; | ||
| 43 | ftdev_can_open_t can_open; | ||
| 44 | ftdev_write_t write; | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size); | ||
| 48 | void free_ft_buffer(struct ft_buffer* buf); | ||
| 49 | |||
| 50 | int ftdev_init( struct ftdev* ftdev, struct module* owner, | ||
| 51 | const int minor_cnt, const char* name); | ||
| 52 | void ftdev_exit(struct ftdev* ftdev); | ||
| 53 | int register_ftdev(struct ftdev* ftdev); | ||
| 54 | |||
| 55 | #endif | ||
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h new file mode 100644 index 00000000000..9bd361ef394 --- /dev/null +++ b/include/litmus/jobs.h | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | #ifndef __LITMUS_JOBS_H__ | ||
| 2 | #define __LITMUS_JOBS_H__ | ||
| 3 | |||
| 4 | void prepare_for_next_period(struct task_struct *t); | ||
| 5 | void release_at(struct task_struct *t, lt_t start); | ||
| 6 | long complete_job(void); | ||
| 7 | |||
| 8 | #endif | ||
| 9 | |||
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h new file mode 100644 index 00000000000..6a1a59da6b5 --- /dev/null +++ b/include/litmus/litmus.h | |||
| @@ -0,0 +1,312 @@ | |||
| 1 | /* | ||
| 2 | * Constant definitions related to | ||
| 3 | * scheduling policy. | ||
| 4 | */ | ||
| 5 | |||
| 6 | #ifndef _LINUX_LITMUS_H_ | ||
| 7 | #define _LINUX_LITMUS_H_ | ||
| 8 | |||
| 9 | #include <litmus/debug_trace.h> | ||
| 10 | |||
| 11 | #ifdef CONFIG_RELEASE_MASTER | ||
| 12 | extern atomic_t release_master_cpu; | ||
| 13 | #endif | ||
| 14 | |||
| 15 | /* in_list - is a given list_head queued on some list? | ||
| 16 | */ | ||
| 17 | static inline int in_list(struct list_head* list) | ||
| 18 | { | ||
| 19 | return !( /* case 1: deleted */ | ||
| 20 | (list->next == LIST_POISON1 && | ||
| 21 | list->prev == LIST_POISON2) | ||
| 22 | || | ||
| 23 | /* case 2: initialized */ | ||
| 24 | (list->next == list && | ||
| 25 | list->prev == list) | ||
| 26 | ); | ||
| 27 | } | ||
| 28 | |||
| 29 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); | ||
| 30 | struct task_struct* __waitqueue_peek_first(wait_queue_head_t *wq); | ||
| 31 | |||
| 32 | #define NO_CPU 0xffffffff | ||
| 33 | |||
| 34 | void litmus_fork(struct task_struct *tsk); | ||
| 35 | void litmus_exec(void); | ||
| 36 | /* clean up real-time state of a task */ | ||
| 37 | void exit_litmus(struct task_struct *dead_tsk); | ||
| 38 | |||
| 39 | long litmus_admit_task(struct task_struct *tsk); | ||
| 40 | void litmus_exit_task(struct task_struct *tsk); | ||
| 41 | |||
| 42 | #define is_realtime(t) ((t)->policy == SCHED_LITMUS) | ||
| 43 | #define rt_transition_pending(t) \ | ||
| 44 | ((t)->rt_param.transition_pending) | ||
| 45 | |||
| 46 | #define tsk_rt(t) (&(t)->rt_param) | ||
| 47 | |||
| 48 | /* Realtime utility macros */ | ||
| 49 | #define is_priority_boosted(t) (tsk_rt(t)->priority_boosted) | ||
| 50 | #define get_boost_start(t) (tsk_rt(t)->boost_start_time) | ||
| 51 | |||
| 52 | /* task_params macros */ | ||
| 53 | #define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost) | ||
| 54 | #define get_rt_period(t) (tsk_rt(t)->task_params.period) | ||
| 55 | #define get_rt_relative_deadline(t) (tsk_rt(t)->task_params.relative_deadline) | ||
| 56 | #define get_rt_phase(t) (tsk_rt(t)->task_params.phase) | ||
| 57 | #define get_partition(t) (tsk_rt(t)->task_params.cpu) | ||
| 58 | #define get_priority(t) (tsk_rt(t)->task_params.priority) | ||
| 59 | #define get_class(t) (tsk_rt(t)->task_params.cls) | ||
| 60 | #define get_release_policy(t) (tsk_rt(t)->task_params.release_policy) | ||
| 61 | |||
| 62 | /* job_param macros */ | ||
| 63 | #define get_job_no(t) (tsk_rt(t)->job_params.job_no) | ||
| 64 | #define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) | ||
| 65 | #define get_deadline(t) (tsk_rt(t)->job_params.deadline) | ||
| 66 | #define get_release(t) (tsk_rt(t)->job_params.release) | ||
| 67 | #define get_lateness(t) (tsk_rt(t)->job_params.lateness) | ||
| 68 | |||
| 69 | /* release policy macros */ | ||
| 70 | #define is_periodic(t) (get_release_policy(t) == PERIODIC) | ||
| 71 | #define is_sporadic(t) (get_release_policy(t) == SPORADIC) | ||
| 72 | #ifdef CONFIG_ALLOW_EARLY_RELEASE | ||
| 73 | #define is_early_releasing(t) (get_release_policy(t) == EARLY) | ||
| 74 | #else | ||
| 75 | #define is_early_releasing(t) (0) | ||
| 76 | #endif | ||
| 77 | |||
| 78 | #define is_hrt(t) \ | ||
| 79 | (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) | ||
| 80 | #define is_srt(t) \ | ||
| 81 | (tsk_rt(t)->task_params.cls == RT_CLASS_SOFT) | ||
| 82 | #define is_be(t) \ | ||
| 83 | (tsk_rt(t)->task_params.cls == RT_CLASS_BEST_EFFORT) | ||
| 84 | |||
| 85 | /* Our notion of time within LITMUS: kernel monotonic time. */ | ||
| 86 | static inline lt_t litmus_clock(void) | ||
| 87 | { | ||
| 88 | return ktime_to_ns(ktime_get()); | ||
| 89 | } | ||
| 90 | |||
| 91 | /* A macro to convert from nanoseconds to ktime_t. */ | ||
| 92 | #define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t) | ||
| 93 | |||
| 94 | #define get_domain(t) (tsk_rt(t)->domain) | ||
| 95 | |||
| 96 | /* Honor the flag in the preempt_count variable that is set | ||
| 97 | * when scheduling is in progress. | ||
| 98 | */ | ||
| 99 | #define is_running(t) \ | ||
| 100 | ((t)->state == TASK_RUNNING || \ | ||
| 101 | task_thread_info(t)->preempt_count & PREEMPT_ACTIVE) | ||
| 102 | |||
| 103 | #define is_blocked(t) \ | ||
| 104 | (!is_running(t)) | ||
| 105 | #define is_released(t, now) \ | ||
| 106 | (lt_before_eq(get_release(t), now)) | ||
| 107 | #define is_tardy(t, now) \ | ||
| 108 | (lt_before_eq(tsk_rt(t)->job_params.deadline, now)) | ||
| 109 | |||
| 110 | /* real-time comparison macros */ | ||
| 111 | #define earlier_deadline(a, b) (lt_before(\ | ||
| 112 | (a)->rt_param.job_params.deadline,\ | ||
| 113 | (b)->rt_param.job_params.deadline)) | ||
| 114 | #define earlier_release(a, b) (lt_before(\ | ||
| 115 | (a)->rt_param.job_params.release,\ | ||
| 116 | (b)->rt_param.job_params.release)) | ||
| 117 | |||
| 118 | void preempt_if_preemptable(struct task_struct* t, int on_cpu); | ||
| 119 | |||
| 120 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 121 | void srp_ceiling_block(void); | ||
| 122 | #else | ||
| 123 | #define srp_ceiling_block() /* nothing */ | ||
| 124 | #endif | ||
| 125 | |||
| 126 | #define bheap2task(hn) ((struct task_struct*) hn->value) | ||
| 127 | |||
| 128 | #ifdef CONFIG_NP_SECTION | ||
| 129 | |||
| 130 | static inline int is_kernel_np(struct task_struct *t) | ||
| 131 | { | ||
| 132 | return tsk_rt(t)->kernel_np; | ||
| 133 | } | ||
| 134 | |||
| 135 | static inline int is_user_np(struct task_struct *t) | ||
| 136 | { | ||
| 137 | return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline void request_exit_np(struct task_struct *t) | ||
| 141 | { | ||
| 142 | if (is_user_np(t)) { | ||
| 143 | /* Set the flag that tells user space to call | ||
| 144 | * into the kernel at the end of a critical section. */ | ||
| 145 | if (likely(tsk_rt(t)->ctrl_page)) { | ||
| 146 | TRACE_TASK(t, "setting delayed_preemption flag\n"); | ||
| 147 | tsk_rt(t)->ctrl_page->sched.np.preempt = 1; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | static inline void make_np(struct task_struct *t) | ||
| 153 | { | ||
| 154 | tsk_rt(t)->kernel_np++; | ||
| 155 | } | ||
| 156 | |||
| 157 | /* Caller should check if preemption is necessary when | ||
| 158 | * the function return 0. | ||
| 159 | */ | ||
| 160 | static inline int take_np(struct task_struct *t) | ||
| 161 | { | ||
| 162 | return --tsk_rt(t)->kernel_np; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */ | ||
| 166 | static inline int request_exit_np_atomic(struct task_struct *t) | ||
| 167 | { | ||
| 168 | union np_flag old, new; | ||
| 169 | |||
| 170 | if (tsk_rt(t)->ctrl_page) { | ||
| 171 | old.raw = tsk_rt(t)->ctrl_page->sched.raw; | ||
| 172 | if (old.np.flag == 0) { | ||
| 173 | /* no longer non-preemptive */ | ||
| 174 | return 0; | ||
| 175 | } else if (old.np.preempt) { | ||
| 176 | /* already set, nothing for us to do */ | ||
| 177 | return 1; | ||
| 178 | } else { | ||
| 179 | /* non preemptive and flag not set */ | ||
| 180 | new.raw = old.raw; | ||
| 181 | new.np.preempt = 1; | ||
| 182 | /* if we get old back, then we atomically set the flag */ | ||
| 183 | return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw; | ||
| 184 | /* If we raced with a concurrent change, then so be | ||
| 185 | * it. Deliver it by IPI. We don't want an unbounded | ||
| 186 | * retry loop here since tasks might exploit that to | ||
| 187 | * keep the kernel busy indefinitely. */ | ||
| 188 | } | ||
| 189 | } else | ||
| 190 | return 0; | ||
| 191 | } | ||
| 192 | |||
| 193 | #else | ||
| 194 | |||
| 195 | static inline int is_kernel_np(struct task_struct* t) | ||
| 196 | { | ||
| 197 | return 0; | ||
| 198 | } | ||
| 199 | |||
| 200 | static inline int is_user_np(struct task_struct* t) | ||
| 201 | { | ||
| 202 | return 0; | ||
| 203 | } | ||
| 204 | |||
| 205 | static inline void request_exit_np(struct task_struct *t) | ||
| 206 | { | ||
| 207 | /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */ | ||
| 208 | BUG(); | ||
| 209 | } | ||
| 210 | |||
| 211 | static inline int request_exit_np_atomic(struct task_struct *t) | ||
| 212 | { | ||
| 213 | return 0; | ||
| 214 | } | ||
| 215 | |||
| 216 | #endif | ||
| 217 | |||
| 218 | static inline void clear_exit_np(struct task_struct *t) | ||
| 219 | { | ||
| 220 | if (likely(tsk_rt(t)->ctrl_page)) | ||
| 221 | tsk_rt(t)->ctrl_page->sched.np.preempt = 0; | ||
| 222 | } | ||
| 223 | |||
| 224 | static inline int is_np(struct task_struct *t) | ||
| 225 | { | ||
| 226 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
| 227 | int kernel, user; | ||
| 228 | kernel = is_kernel_np(t); | ||
| 229 | user = is_user_np(t); | ||
| 230 | if (kernel || user) | ||
| 231 | TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n", | ||
| 232 | |||
| 233 | kernel, user); | ||
| 234 | return kernel || user; | ||
| 235 | #else | ||
| 236 | return unlikely(is_kernel_np(t) || is_user_np(t)); | ||
| 237 | #endif | ||
| 238 | } | ||
| 239 | |||
| 240 | static inline int is_present(struct task_struct* t) | ||
| 241 | { | ||
| 242 | return t && tsk_rt(t)->present; | ||
| 243 | } | ||
| 244 | |||
| 245 | static inline int is_completed(struct task_struct* t) | ||
| 246 | { | ||
| 247 | return t && tsk_rt(t)->completed; | ||
| 248 | } | ||
| 249 | |||
| 250 | |||
| 251 | /* make the unit explicit */ | ||
| 252 | typedef unsigned long quanta_t; | ||
| 253 | |||
| 254 | enum round { | ||
| 255 | FLOOR, | ||
| 256 | CEIL | ||
| 257 | }; | ||
| 258 | |||
| 259 | |||
| 260 | /* Tick period is used to convert ns-specified execution | ||
| 261 | * costs and periods into tick-based equivalents. | ||
| 262 | */ | ||
| 263 | extern ktime_t tick_period; | ||
| 264 | |||
| 265 | static inline quanta_t time2quanta(lt_t time, enum round round) | ||
| 266 | { | ||
| 267 | s64 quantum_length = ktime_to_ns(tick_period); | ||
| 268 | |||
| 269 | if (do_div(time, quantum_length) && round == CEIL) | ||
| 270 | time++; | ||
| 271 | return (quanta_t) time; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* By how much is cpu staggered behind CPU 0? */ | ||
| 275 | u64 cpu_stagger_offset(int cpu); | ||
| 276 | |||
| 277 | static inline struct control_page* get_control_page(struct task_struct *t) | ||
| 278 | { | ||
| 279 | return tsk_rt(t)->ctrl_page; | ||
| 280 | } | ||
| 281 | |||
| 282 | static inline int has_control_page(struct task_struct* t) | ||
| 283 | { | ||
| 284 | return tsk_rt(t)->ctrl_page != NULL; | ||
| 285 | } | ||
| 286 | |||
| 287 | |||
| 288 | #ifdef CONFIG_SCHED_OVERHEAD_TRACE | ||
| 289 | |||
| 290 | #define TS_SYSCALL_IN_START \ | ||
| 291 | if (has_control_page(current)) { \ | ||
| 292 | __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); \ | ||
| 293 | } | ||
| 294 | |||
| 295 | #define TS_SYSCALL_IN_END \ | ||
| 296 | if (has_control_page(current)) { \ | ||
| 297 | uint64_t irqs; \ | ||
| 298 | local_irq_disable(); \ | ||
| 299 | irqs = get_control_page(current)->irq_count - \ | ||
| 300 | get_control_page(current)->irq_syscall_start; \ | ||
| 301 | __TS_SYSCALL_IN_END(&irqs); \ | ||
| 302 | local_irq_enable(); \ | ||
| 303 | } | ||
| 304 | |||
| 305 | #else | ||
| 306 | |||
| 307 | #define TS_SYSCALL_IN_START | ||
| 308 | #define TS_SYSCALL_IN_END | ||
| 309 | |||
| 310 | #endif | ||
| 311 | |||
| 312 | #endif | ||
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h new file mode 100644 index 00000000000..6800e725d48 --- /dev/null +++ b/include/litmus/litmus_proc.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | #include <litmus/sched_plugin.h> | ||
| 2 | #include <linux/proc_fs.h> | ||
| 3 | |||
| 4 | int __init init_litmus_proc(void); | ||
| 5 | void exit_litmus_proc(void); | ||
| 6 | |||
| 7 | /* | ||
| 8 | * On success, returns 0 and sets the pointer to the location of the new | ||
| 9 | * proc dir entry, otherwise returns an error code and sets pde to NULL. | ||
| 10 | */ | ||
| 11 | long make_plugin_proc_dir(struct sched_plugin* plugin, | ||
| 12 | struct proc_dir_entry** pde); | ||
| 13 | |||
| 14 | /* | ||
| 15 | * Plugins should deallocate all child proc directory entries before | ||
| 16 | * calling this, to avoid memory leaks. | ||
| 17 | */ | ||
| 18 | void remove_plugin_proc_dir(struct sched_plugin* plugin); | ||
| 19 | |||
| 20 | |||
| 21 | /* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and | ||
| 22 | * remove a '\n' if present. Returns the number of bytes that were read or | ||
| 23 | * -EFAULT. */ | ||
| 24 | int copy_and_chomp(char *kbuf, unsigned long ksize, | ||
| 25 | __user const char* ubuf, unsigned long ulength); | ||
diff --git a/include/litmus/locking.h b/include/litmus/locking.h new file mode 100644 index 00000000000..968ba6fa828 --- /dev/null +++ b/include/litmus/locking.h | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | #ifndef LITMUS_LOCKING_H | ||
| 2 | #define LITMUS_LOCKING_H | ||
| 3 | |||
| 4 | #include <litmus/fdso.h> | ||
| 5 | |||
| 6 | struct litmus_lock_ops; | ||
| 7 | |||
| 8 | extern struct fdso_ops generic_lock_ops; | ||
| 9 | |||
| 10 | /* Generic base struct for LITMUS^RT userspace semaphores. | ||
| 11 | * This structure should be embedded in protocol-specific semaphores. | ||
| 12 | */ | ||
| 13 | struct litmus_lock { | ||
| 14 | struct litmus_lock_ops *ops; | ||
| 15 | int type; | ||
| 16 | }; | ||
| 17 | |||
| 18 | struct litmus_lock_ops { | ||
| 19 | /* Current task tries to obtain / drop a reference to a lock. | ||
| 20 | * Optional methods, allowed by default. */ | ||
| 21 | int (*open)(struct litmus_lock*, void* __user); | ||
| 22 | int (*close)(struct litmus_lock*); | ||
| 23 | |||
| 24 | /* Current tries to lock/unlock this lock (mandatory methods). */ | ||
| 25 | int (*lock)(struct litmus_lock*); | ||
| 26 | int (*unlock)(struct litmus_lock*); | ||
| 27 | |||
| 28 | int (*dynamic_group_lock)(struct litmus_lock*, resource_mask_t); | ||
| 29 | int (*dynamic_group_unlock)(struct litmus_lock*, resource_mask_t); | ||
| 30 | |||
| 31 | /* The lock is no longer being referenced (mandatory method). */ | ||
| 32 | void (*deallocate)(struct litmus_lock*); | ||
| 33 | }; | ||
| 34 | |||
| 35 | static inline bool is_lock(struct od_table_entry* entry) | ||
| 36 | { | ||
| 37 | return entry->class == &generic_lock_ops; | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline struct litmus_lock* get_lock(struct od_table_entry* entry) | ||
| 41 | { | ||
| 42 | BUG_ON(!is_lock(entry)); | ||
| 43 | return (struct litmus_lock*) entry->obj->obj; | ||
| 44 | } | ||
| 45 | |||
| 46 | #endif | ||
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h new file mode 100644 index 00000000000..380b886d78f --- /dev/null +++ b/include/litmus/preempt.h | |||
| @@ -0,0 +1,164 @@ | |||
| 1 | #ifndef LITMUS_PREEMPT_H | ||
| 2 | #define LITMUS_PREEMPT_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <linux/cache.h> | ||
| 6 | #include <linux/percpu.h> | ||
| 7 | #include <asm/atomic.h> | ||
| 8 | |||
| 9 | #include <litmus/debug_trace.h> | ||
| 10 | |||
| 11 | extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); | ||
| 12 | |||
| 13 | #ifdef CONFIG_PREEMPT_STATE_TRACE | ||
| 14 | const char* sched_state_name(int s); | ||
| 15 | #define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args) | ||
| 16 | #else | ||
| 17 | #define TRACE_STATE(fmt, args...) /* ignore */ | ||
| 18 | #endif | ||
| 19 | |||
| 20 | #define VERIFY_SCHED_STATE(x) \ | ||
| 21 | do { int __s = get_sched_state(); \ | ||
| 22 | if ((__s & (x)) == 0) \ | ||
| 23 | TRACE_STATE("INVALID s=0x%x (%s) not " \ | ||
| 24 | "in 0x%x (%s) [%s]\n", \ | ||
| 25 | __s, sched_state_name(__s), \ | ||
| 26 | (x), #x, __FUNCTION__); \ | ||
| 27 | } while (0); | ||
| 28 | |||
| 29 | #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ | ||
| 30 | TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ | ||
| 31 | cpu, (x), sched_state_name(x), \ | ||
| 32 | (y), sched_state_name(y)) | ||
| 33 | |||
| 34 | |||
| 35 | typedef enum scheduling_state { | ||
| 36 | TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that | ||
| 37 | * should be scheduled, and the processor does not | ||
| 38 | * plan to invoke schedule(). */ | ||
| 39 | SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the | ||
| 40 | * processor should reschedule, but this has not | ||
| 41 | * been communicated yet (IPI still pending). */ | ||
| 42 | WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to | ||
| 43 | * reschedule and will do so shortly. */ | ||
| 44 | TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(), | ||
| 45 | * has selected a new task to schedule, but has not | ||
| 46 | * yet performed the actual context switch. */ | ||
| 47 | PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context | ||
| 48 | * switch, but a remote processor has already | ||
| 49 | * determined that a higher-priority task became | ||
| 50 | * eligible after the task was picked. */ | ||
| 51 | } sched_state_t; | ||
| 52 | |||
| 53 | static inline sched_state_t get_sched_state_on(int cpu) | ||
| 54 | { | ||
| 55 | return atomic_read(&per_cpu(resched_state, cpu)); | ||
| 56 | } | ||
| 57 | |||
| 58 | static inline sched_state_t get_sched_state(void) | ||
| 59 | { | ||
| 60 | return atomic_read(&__get_cpu_var(resched_state)); | ||
| 61 | } | ||
| 62 | |||
| 63 | static inline int is_in_sched_state(int possible_states) | ||
| 64 | { | ||
| 65 | return get_sched_state() & possible_states; | ||
| 66 | } | ||
| 67 | |||
| 68 | static inline int cpu_is_in_sched_state(int cpu, int possible_states) | ||
| 69 | { | ||
| 70 | return get_sched_state_on(cpu) & possible_states; | ||
| 71 | } | ||
| 72 | |||
| 73 | static inline void set_sched_state(sched_state_t s) | ||
| 74 | { | ||
| 75 | TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id()); | ||
| 76 | atomic_set(&__get_cpu_var(resched_state), s); | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline int sched_state_transition(sched_state_t from, sched_state_t to) | ||
| 80 | { | ||
| 81 | sched_state_t old_state; | ||
| 82 | |||
| 83 | old_state = atomic_cmpxchg(&__get_cpu_var(resched_state), from, to); | ||
| 84 | if (old_state == from) { | ||
| 85 | TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id()); | ||
| 86 | return 1; | ||
| 87 | } else | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline int sched_state_transition_on(int cpu, | ||
| 92 | sched_state_t from, | ||
| 93 | sched_state_t to) | ||
| 94 | { | ||
| 95 | sched_state_t old_state; | ||
| 96 | |||
| 97 | old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to); | ||
| 98 | if (old_state == from) { | ||
| 99 | TRACE_SCHED_STATE_CHANGE(from, to, cpu); | ||
| 100 | return 1; | ||
| 101 | } else | ||
| 102 | return 0; | ||
| 103 | } | ||
| 104 | |||
| 105 | /* Plugins must call this function after they have decided which job to | ||
| 106 | * schedule next. IMPORTANT: this function must be called while still holding | ||
| 107 | * the lock that is used to serialize scheduling decisions. | ||
| 108 | * | ||
| 109 | * (Ideally, we would like to use runqueue locks for this purpose, but that | ||
| 110 | * would lead to deadlocks with the migration code.) | ||
| 111 | */ | ||
| 112 | static inline void sched_state_task_picked(void) | ||
| 113 | { | ||
| 114 | VERIFY_SCHED_STATE(WILL_SCHEDULE); | ||
| 115 | |||
| 116 | /* WILL_SCHEDULE has only a local tansition => simple store is ok */ | ||
| 117 | set_sched_state(TASK_PICKED); | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline void sched_state_entered_schedule(void) | ||
| 121 | { | ||
| 122 | /* Update state for the case that we entered schedule() not due to | ||
| 123 | * set_tsk_need_resched() */ | ||
| 124 | set_sched_state(WILL_SCHEDULE); | ||
| 125 | } | ||
| 126 | |||
| 127 | /* Called by schedule() to check if the scheduling decision is still valid | ||
| 128 | * after a context switch. Returns 1 if the CPU needs to reschdule. */ | ||
| 129 | static inline int sched_state_validate_switch(void) | ||
| 130 | { | ||
| 131 | int left_state_ok = 0; | ||
| 132 | |||
| 133 | VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED); | ||
| 134 | |||
| 135 | if (is_in_sched_state(TASK_PICKED)) { | ||
| 136 | /* Might be good; let's try to transition out of this | ||
| 137 | * state. This must be done atomically since remote processors | ||
| 138 | * may try to change the state, too. */ | ||
| 139 | left_state_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED); | ||
| 140 | } | ||
| 141 | |||
| 142 | if (!left_state_ok) { | ||
| 143 | /* We raced with a higher-priority task arrival => not | ||
| 144 | * valid. The CPU needs to reschedule. */ | ||
| 145 | set_sched_state(WILL_SCHEDULE); | ||
| 146 | return 1; | ||
| 147 | } else | ||
| 148 | return 0; | ||
| 149 | } | ||
| 150 | |||
| 151 | /* State transition events. See litmus/preempt.c for details. */ | ||
| 152 | void sched_state_will_schedule(struct task_struct* tsk); | ||
| 153 | void sched_state_ipi(void); | ||
| 154 | /* Cause a CPU (remote or local) to reschedule. */ | ||
| 155 | void litmus_reschedule(int cpu); | ||
| 156 | void litmus_reschedule_local(void); | ||
| 157 | |||
| 158 | #ifdef CONFIG_DEBUG_KERNEL | ||
| 159 | void sched_state_plugin_check(void); | ||
| 160 | #else | ||
| 161 | #define sched_state_plugin_check() /* no check */ | ||
| 162 | #endif | ||
| 163 | |||
| 164 | #endif | ||
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h new file mode 100644 index 00000000000..ac249292e86 --- /dev/null +++ b/include/litmus/rt_domain.h | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | /* CLEANUP: Add comments and make it less messy. | ||
| 2 | * | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef __UNC_RT_DOMAIN_H__ | ||
| 6 | #define __UNC_RT_DOMAIN_H__ | ||
| 7 | |||
| 8 | #include <litmus/bheap.h> | ||
| 9 | |||
| 10 | #define RELEASE_QUEUE_SLOTS 127 /* prime */ | ||
| 11 | |||
| 12 | struct _rt_domain; | ||
| 13 | |||
| 14 | typedef int (*check_resched_needed_t)(struct _rt_domain *rt); | ||
| 15 | typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks); | ||
| 16 | |||
| 17 | struct release_queue { | ||
| 18 | /* each slot maintains a list of release heaps sorted | ||
| 19 | * by release time */ | ||
| 20 | struct list_head slot[RELEASE_QUEUE_SLOTS]; | ||
| 21 | }; | ||
| 22 | |||
| 23 | typedef struct _rt_domain { | ||
| 24 | /* runnable rt tasks are in here */ | ||
| 25 | raw_spinlock_t ready_lock; | ||
| 26 | struct bheap ready_queue; | ||
| 27 | |||
| 28 | /* real-time tasks waiting for release are in here */ | ||
| 29 | raw_spinlock_t release_lock; | ||
| 30 | struct release_queue release_queue; | ||
| 31 | |||
| 32 | #ifdef CONFIG_RELEASE_MASTER | ||
| 33 | int release_master; | ||
| 34 | #endif | ||
| 35 | |||
| 36 | /* for moving tasks to the release queue */ | ||
| 37 | raw_spinlock_t tobe_lock; | ||
| 38 | struct list_head tobe_released; | ||
| 39 | |||
| 40 | /* how do we check if we need to kick another CPU? */ | ||
| 41 | check_resched_needed_t check_resched; | ||
| 42 | |||
| 43 | /* how do we release jobs? */ | ||
| 44 | release_jobs_t release_jobs; | ||
| 45 | |||
| 46 | /* how are tasks ordered in the ready queue? */ | ||
| 47 | bheap_prio_t order; | ||
| 48 | } rt_domain_t; | ||
| 49 | |||
| 50 | struct release_heap { | ||
| 51 | /* list_head for per-time-slot list */ | ||
| 52 | struct list_head list; | ||
| 53 | lt_t release_time; | ||
| 54 | /* all tasks to be released at release_time */ | ||
| 55 | struct bheap heap; | ||
| 56 | /* used to trigger the release */ | ||
| 57 | struct hrtimer timer; | ||
| 58 | |||
| 59 | #ifdef CONFIG_RELEASE_MASTER | ||
| 60 | /* used to delegate releases */ | ||
| 61 | struct hrtimer_start_on_info info; | ||
| 62 | #endif | ||
| 63 | /* required for the timer callback */ | ||
| 64 | rt_domain_t* dom; | ||
| 65 | }; | ||
| 66 | |||
| 67 | |||
| 68 | static inline struct task_struct* __next_ready(rt_domain_t* rt) | ||
| 69 | { | ||
| 70 | struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue); | ||
| 71 | if (hn) | ||
| 72 | return bheap2task(hn); | ||
| 73 | else | ||
| 74 | return NULL; | ||
| 75 | } | ||
| 76 | |||
| 77 | void rt_domain_init(rt_domain_t *rt, bheap_prio_t order, | ||
| 78 | check_resched_needed_t check, | ||
| 79 | release_jobs_t relase); | ||
| 80 | |||
| 81 | void __add_ready(rt_domain_t* rt, struct task_struct *new); | ||
| 82 | void __merge_ready(rt_domain_t* rt, struct bheap *tasks); | ||
| 83 | void __add_release(rt_domain_t* rt, struct task_struct *task); | ||
| 84 | |||
| 85 | static inline struct task_struct* __take_ready(rt_domain_t* rt) | ||
| 86 | { | ||
| 87 | struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue); | ||
| 88 | if (hn) | ||
| 89 | return bheap2task(hn); | ||
| 90 | else | ||
| 91 | return NULL; | ||
| 92 | } | ||
| 93 | |||
| 94 | static inline struct task_struct* __peek_ready(rt_domain_t* rt) | ||
| 95 | { | ||
| 96 | struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue); | ||
| 97 | if (hn) | ||
| 98 | return bheap2task(hn); | ||
| 99 | else | ||
| 100 | return NULL; | ||
| 101 | } | ||
| 102 | |||
| 103 | static inline int is_queued(struct task_struct *t) | ||
| 104 | { | ||
| 105 | BUG_ON(!tsk_rt(t)->heap_node); | ||
| 106 | return bheap_node_in_heap(tsk_rt(t)->heap_node); | ||
| 107 | } | ||
| 108 | |||
| 109 | static inline void remove(rt_domain_t* rt, struct task_struct *t) | ||
| 110 | { | ||
| 111 | bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node); | ||
| 112 | } | ||
| 113 | |||
| 114 | static inline void add_ready(rt_domain_t* rt, struct task_struct *new) | ||
| 115 | { | ||
| 116 | unsigned long flags; | ||
| 117 | /* first we need the write lock for rt_ready_queue */ | ||
| 118 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
| 119 | __add_ready(rt, new); | ||
| 120 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
| 121 | } | ||
| 122 | |||
| 123 | static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks) | ||
| 124 | { | ||
| 125 | unsigned long flags; | ||
| 126 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
| 127 | __merge_ready(rt, tasks); | ||
| 128 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
| 129 | } | ||
| 130 | |||
| 131 | static inline struct task_struct* take_ready(rt_domain_t* rt) | ||
| 132 | { | ||
| 133 | unsigned long flags; | ||
| 134 | struct task_struct* ret; | ||
| 135 | /* first we need the write lock for rt_ready_queue */ | ||
| 136 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
| 137 | ret = __take_ready(rt); | ||
| 138 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
| 139 | return ret; | ||
| 140 | } | ||
| 141 | |||
| 142 | |||
| 143 | static inline void add_release(rt_domain_t* rt, struct task_struct *task) | ||
| 144 | { | ||
| 145 | unsigned long flags; | ||
| 146 | raw_spin_lock_irqsave(&rt->tobe_lock, flags); | ||
| 147 | __add_release(rt, task); | ||
| 148 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); | ||
| 149 | } | ||
| 150 | |||
| 151 | #ifdef CONFIG_RELEASE_MASTER | ||
| 152 | void __add_release_on(rt_domain_t* rt, struct task_struct *task, | ||
| 153 | int target_cpu); | ||
| 154 | |||
| 155 | static inline void add_release_on(rt_domain_t* rt, | ||
| 156 | struct task_struct *task, | ||
| 157 | int target_cpu) | ||
| 158 | { | ||
| 159 | unsigned long flags; | ||
| 160 | raw_spin_lock_irqsave(&rt->tobe_lock, flags); | ||
| 161 | __add_release_on(rt, task, target_cpu); | ||
| 162 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); | ||
| 163 | } | ||
| 164 | #endif | ||
| 165 | |||
| 166 | static inline int __jobs_pending(rt_domain_t* rt) | ||
| 167 | { | ||
| 168 | return !bheap_empty(&rt->ready_queue); | ||
| 169 | } | ||
| 170 | |||
| 171 | static inline int jobs_pending(rt_domain_t* rt) | ||
| 172 | { | ||
| 173 | unsigned long flags; | ||
| 174 | int ret; | ||
| 175 | /* first we need the write lock for rt_ready_queue */ | ||
| 176 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
| 177 | ret = !bheap_empty(&rt->ready_queue); | ||
| 178 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
| 179 | return ret; | ||
| 180 | } | ||
| 181 | |||
| 182 | #endif | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h new file mode 100644 index 00000000000..70c09acbeb2 --- /dev/null +++ b/include/litmus/rt_param.h | |||
| @@ -0,0 +1,283 @@ | |||
| 1 | /* | ||
| 2 | * Definition of the scheduler plugin interface. | ||
| 3 | * | ||
| 4 | */ | ||
| 5 | #ifndef _LINUX_RT_PARAM_H_ | ||
| 6 | #define _LINUX_RT_PARAM_H_ | ||
| 7 | |||
| 8 | /* Litmus time type. */ | ||
| 9 | typedef unsigned long long lt_t; | ||
| 10 | |||
| 11 | static inline int lt_after(lt_t a, lt_t b) | ||
| 12 | { | ||
| 13 | return ((long long) b) - ((long long) a) < 0; | ||
| 14 | } | ||
| 15 | #define lt_before(a, b) lt_after(b, a) | ||
| 16 | |||
| 17 | static inline int lt_after_eq(lt_t a, lt_t b) | ||
| 18 | { | ||
| 19 | return ((long long) a) - ((long long) b) >= 0; | ||
| 20 | } | ||
| 21 | #define lt_before_eq(a, b) lt_after_eq(b, a) | ||
| 22 | |||
| 23 | /* different types of clients */ | ||
| 24 | typedef enum { | ||
| 25 | RT_CLASS_HARD, | ||
| 26 | RT_CLASS_SOFT, | ||
| 27 | RT_CLASS_BEST_EFFORT | ||
| 28 | } task_class_t; | ||
| 29 | |||
| 30 | typedef enum { | ||
| 31 | NO_ENFORCEMENT, /* job may overrun unhindered */ | ||
| 32 | QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ | ||
| 33 | PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */ | ||
| 34 | } budget_policy_t; | ||
| 35 | |||
| 36 | /* Release behaviors for jobs. PERIODIC and EARLY jobs | ||
| 37 | must end by calling sys_complete_job() (or equivalent) | ||
| 38 | to set up their next release and deadline. */ | ||
| 39 | typedef enum { | ||
| 40 | /* Jobs are released sporadically (provided job precedence | ||
| 41 | constraints are met). */ | ||
| 42 | SPORADIC, | ||
| 43 | |||
| 44 | /* Jobs are released periodically (provided job precedence | ||
| 45 | constraints are met). */ | ||
| 46 | PERIODIC, | ||
| 47 | |||
| 48 | /* Jobs are released immediately after meeting precedence | ||
| 49 | constraints. Beware this can peg your CPUs if used in | ||
| 50 | the wrong applications. Only supported by EDF schedulers. */ | ||
| 51 | EARLY | ||
| 52 | } release_policy_t; | ||
| 53 | |||
| 54 | /* We use the common priority interpretation "lower index == higher priority", | ||
| 55 | * which is commonly used in fixed-priority schedulability analysis papers. | ||
| 56 | * So, a numerically lower priority value implies higher scheduling priority, | ||
| 57 | * with priority 1 being the highest priority. Priority 0 is reserved for | ||
| 58 | * priority boosting. LITMUS_MAX_PRIORITY denotes the maximum priority value | ||
| 59 | * range. | ||
| 60 | */ | ||
| 61 | |||
| 62 | #define LITMUS_MAX_PRIORITY 512 | ||
| 63 | #define LITMUS_HIGHEST_PRIORITY 1 | ||
| 64 | #define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1) | ||
| 65 | |||
| 66 | /* Provide generic comparison macros for userspace, | ||
| 67 | * in case that we change this later. */ | ||
| 68 | #define litmus_higher_fixed_prio(a, b) (a < b) | ||
| 69 | #define litmus_lower_fixed_prio(a, b) (a > b) | ||
| 70 | #define litmus_is_valid_fixed_prio(p) \ | ||
| 71 | ((p) >= LITMUS_HIGHEST_PRIORITY && \ | ||
| 72 | (p) <= LITMUS_LOWEST_PRIORITY) | ||
| 73 | |||
| 74 | struct rt_task { | ||
| 75 | lt_t exec_cost; | ||
| 76 | lt_t period; | ||
| 77 | lt_t relative_deadline; | ||
| 78 | lt_t phase; | ||
| 79 | unsigned int cpu; | ||
| 80 | unsigned int priority; | ||
| 81 | task_class_t cls; | ||
| 82 | budget_policy_t budget_policy; /* ignored by pfair */ | ||
| 83 | release_policy_t release_policy; | ||
| 84 | }; | ||
| 85 | |||
| 86 | union np_flag { | ||
| 87 | uint64_t raw; | ||
| 88 | struct { | ||
| 89 | /* Is the task currently in a non-preemptive section? */ | ||
| 90 | uint64_t flag:31; | ||
| 91 | /* Should the task call into the scheduler? */ | ||
| 92 | uint64_t preempt:1; | ||
| 93 | } np; | ||
| 94 | }; | ||
| 95 | |||
| 96 | /* The definition of the data that is shared between the kernel and real-time | ||
| 97 | * tasks via a shared page (see litmus/ctrldev.c). | ||
| 98 | * | ||
| 99 | * WARNING: User space can write to this, so don't trust | ||
| 100 | * the correctness of the fields! | ||
| 101 | * | ||
| 102 | * This servees two purposes: to enable efficient signaling | ||
| 103 | * of non-preemptive sections (user->kernel) and | ||
| 104 | * delayed preemptions (kernel->user), and to export | ||
| 105 | * some real-time relevant statistics such as preemption and | ||
| 106 | * migration data to user space. We can't use a device to export | ||
| 107 | * statistics because we want to avoid system call overhead when | ||
| 108 | * determining preemption/migration overheads). | ||
| 109 | */ | ||
| 110 | struct control_page { | ||
| 111 | /* This flag is used by userspace to communicate non-preempive | ||
| 112 | * sections. */ | ||
| 113 | volatile union np_flag sched; | ||
| 114 | |||
| 115 | volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is | ||
| 116 | * handled. */ | ||
| 117 | |||
| 118 | /* Locking overhead tracing: userspace records here the time stamp | ||
| 119 | * and IRQ counter prior to starting the system call. */ | ||
| 120 | uint64_t ts_syscall_start; /* Feather-Trace cycles */ | ||
| 121 | uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall | ||
| 122 | * started. */ | ||
| 123 | |||
| 124 | /* to be extended */ | ||
| 125 | }; | ||
| 126 | |||
| 127 | /* Expected offsets within the control page. */ | ||
| 128 | |||
| 129 | #define LITMUS_CP_OFFSET_SCHED 0 | ||
| 130 | #define LITMUS_CP_OFFSET_IRQ_COUNT 8 | ||
| 131 | #define LITMUS_CP_OFFSET_TS_SC_START 16 | ||
| 132 | #define LITMUS_CP_OFFSET_IRQ_SC_START 24 | ||
| 133 | |||
| 134 | /* don't export internal data structures to user space (liblitmus) */ | ||
| 135 | #ifdef __KERNEL__ | ||
| 136 | |||
| 137 | struct _rt_domain; | ||
| 138 | struct bheap_node; | ||
| 139 | struct release_heap; | ||
| 140 | |||
| 141 | struct rt_job { | ||
| 142 | /* Time instant the the job was or will be released. */ | ||
| 143 | lt_t release; | ||
| 144 | /* What is the current deadline? */ | ||
| 145 | lt_t deadline; | ||
| 146 | |||
| 147 | /* How much service has this job received so far? */ | ||
| 148 | lt_t exec_time; | ||
| 149 | |||
| 150 | /* By how much did the prior job miss its deadline by? | ||
| 151 | * Value differs from tardiness in that lateness may | ||
| 152 | * be negative (when job finishes before its deadline). | ||
| 153 | */ | ||
| 154 | long long lateness; | ||
| 155 | |||
| 156 | /* Which job is this. This is used to let user space | ||
| 157 | * specify which job to wait for, which is important if jobs | ||
| 158 | * overrun. If we just call sys_sleep_next_period() then we | ||
| 159 | * will unintentionally miss jobs after an overrun. | ||
| 160 | * | ||
| 161 | * Increase this sequence number when a job is released. | ||
| 162 | */ | ||
| 163 | unsigned int job_no; | ||
| 164 | }; | ||
| 165 | |||
| 166 | struct pfair_param; | ||
| 167 | |||
| 168 | /* RT task parameters for scheduling extensions | ||
| 169 | * These parameters are inherited during clone and therefore must | ||
| 170 | * be explicitly set up before the task set is launched. | ||
| 171 | */ | ||
| 172 | struct rt_param { | ||
| 173 | /* is the task sleeping? */ | ||
| 174 | unsigned int flags:8; | ||
| 175 | |||
| 176 | /* do we need to check for srp blocking? */ | ||
| 177 | unsigned int srp_non_recurse:1; | ||
| 178 | |||
| 179 | /* is the task present? (true if it can be scheduled) */ | ||
| 180 | unsigned int present:1; | ||
| 181 | |||
| 182 | /* has the task completed? */ | ||
| 183 | unsigned int completed:1; | ||
| 184 | |||
| 185 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 186 | /* Is the task being priority-boosted by a locking protocol? */ | ||
| 187 | unsigned int priority_boosted:1; | ||
| 188 | /* If so, when did this start? */ | ||
| 189 | lt_t boost_start_time; | ||
| 190 | |||
| 191 | /* How many LITMUS^RT locks does the task currently hold/wait for? */ | ||
| 192 | unsigned int num_locks_held; | ||
| 193 | /* How many PCP/SRP locks does the task currently hold/wait for? */ | ||
| 194 | unsigned int num_local_locks_held; | ||
| 195 | #endif | ||
| 196 | |||
| 197 | /* user controlled parameters */ | ||
| 198 | struct rt_task task_params; | ||
| 199 | |||
| 200 | /* timing parameters */ | ||
| 201 | struct rt_job job_params; | ||
| 202 | |||
| 203 | /* task representing the current "inherited" task | ||
| 204 | * priority, assigned by inherit_priority and | ||
| 205 | * return priority in the scheduler plugins. | ||
| 206 | * could point to self if PI does not result in | ||
| 207 | * an increased task priority. | ||
| 208 | */ | ||
| 209 | struct task_struct* inh_task; | ||
| 210 | |||
| 211 | #ifdef CONFIG_NP_SECTION | ||
| 212 | /* For the FMLP under PSN-EDF, it is required to make the task | ||
| 213 | * non-preemptive from kernel space. In order not to interfere with | ||
| 214 | * user space, this counter indicates the kernel space np setting. | ||
| 215 | * kernel_np > 0 => task is non-preemptive | ||
| 216 | */ | ||
| 217 | unsigned int kernel_np; | ||
| 218 | #endif | ||
| 219 | |||
| 220 | /* This field can be used by plugins to store where the task | ||
| 221 | * is currently scheduled. It is the responsibility of the | ||
| 222 | * plugin to avoid race conditions. | ||
| 223 | * | ||
| 224 | * This used by GSN-EDF and PFAIR. | ||
| 225 | */ | ||
| 226 | volatile int scheduled_on; | ||
| 227 | |||
| 228 | /* Is the stack of the task currently in use? This is updated by | ||
| 229 | * the LITMUS core. | ||
| 230 | * | ||
| 231 | * Be careful to avoid deadlocks! | ||
| 232 | */ | ||
| 233 | volatile int stack_in_use; | ||
| 234 | |||
| 235 | /* This field can be used by plugins to store where the task | ||
| 236 | * is currently linked. It is the responsibility of the plugin | ||
| 237 | * to avoid race conditions. | ||
| 238 | * | ||
| 239 | * Used by GSN-EDF. | ||
| 240 | */ | ||
| 241 | volatile int linked_on; | ||
| 242 | |||
| 243 | /* PFAIR/PD^2 state. Allocated on demand. */ | ||
| 244 | struct pfair_param* pfair; | ||
| 245 | |||
| 246 | /* Fields saved before BE->RT transition. | ||
| 247 | */ | ||
| 248 | int old_policy; | ||
| 249 | int old_prio; | ||
| 250 | |||
| 251 | /* ready queue for this task */ | ||
| 252 | struct _rt_domain* domain; | ||
| 253 | |||
| 254 | /* heap element for this task | ||
| 255 | * | ||
| 256 | * Warning: Don't statically allocate this node. The heap | ||
| 257 | * implementation swaps these between tasks, thus after | ||
| 258 | * dequeuing from a heap you may end up with a different node | ||
| 259 | * then the one you had when enqueuing the task. For the same | ||
| 260 | * reason, don't obtain and store references to this node | ||
| 261 | * other than this pointer (which is updated by the heap | ||
| 262 | * implementation). | ||
| 263 | */ | ||
| 264 | struct bheap_node* heap_node; | ||
| 265 | struct release_heap* rel_heap; | ||
| 266 | |||
| 267 | /* Used by rt_domain to queue task in release list. | ||
| 268 | */ | ||
| 269 | struct list_head list; | ||
| 270 | |||
| 271 | /* Pointer to the page shared between userspace and kernel. */ | ||
| 272 | struct control_page * ctrl_page; | ||
| 273 | |||
| 274 | lt_t total_tardy; | ||
| 275 | lt_t max_tardy; | ||
| 276 | unsigned int missed; | ||
| 277 | lt_t max_exec_time; | ||
| 278 | lt_t tot_exec_time; | ||
| 279 | }; | ||
| 280 | |||
| 281 | #endif | ||
| 282 | |||
| 283 | #endif | ||
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h new file mode 100644 index 00000000000..1546ab7f1d6 --- /dev/null +++ b/include/litmus/sched_plugin.h | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | /* | ||
| 2 | * Definition of the scheduler plugin interface. | ||
| 3 | * | ||
| 4 | */ | ||
| 5 | #ifndef _LINUX_SCHED_PLUGIN_H_ | ||
| 6 | #define _LINUX_SCHED_PLUGIN_H_ | ||
| 7 | |||
| 8 | #include <linux/sched.h> | ||
| 9 | |||
| 10 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 11 | #include <litmus/locking.h> | ||
| 12 | #endif | ||
| 13 | |||
| 14 | /************************ setup/tear down ********************/ | ||
| 15 | |||
| 16 | typedef long (*activate_plugin_t) (void); | ||
| 17 | typedef long (*deactivate_plugin_t) (void); | ||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | /********************* scheduler invocation ******************/ | ||
| 22 | |||
| 23 | /* Plugin-specific realtime tick handler */ | ||
| 24 | typedef void (*scheduler_tick_t) (struct task_struct *cur); | ||
| 25 | /* Novell make sched decision function */ | ||
| 26 | typedef struct task_struct* (*schedule_t)(struct task_struct * prev); | ||
| 27 | /* Clean up after the task switch has occured. | ||
| 28 | * This function is called after every (even non-rt) task switch. | ||
| 29 | */ | ||
| 30 | typedef void (*finish_switch_t)(struct task_struct *prev); | ||
| 31 | |||
| 32 | |||
| 33 | /********************* task state changes ********************/ | ||
| 34 | |||
| 35 | /* Called to setup a new real-time task. | ||
| 36 | * Release the first job, enqueue, etc. | ||
| 37 | * Task may already be running. | ||
| 38 | */ | ||
| 39 | typedef void (*task_new_t) (struct task_struct *task, | ||
| 40 | int on_rq, | ||
| 41 | int running); | ||
| 42 | |||
| 43 | /* Called to re-introduce a task after blocking. | ||
| 44 | * Can potentially be called multiple times. | ||
| 45 | */ | ||
| 46 | typedef void (*task_wake_up_t) (struct task_struct *task); | ||
| 47 | /* called to notify the plugin of a blocking real-time task | ||
| 48 | * it will only be called for real-time tasks and before schedule is called */ | ||
| 49 | typedef void (*task_block_t) (struct task_struct *task); | ||
| 50 | /* Called when a real-time task exits or changes to a different scheduling | ||
| 51 | * class. | ||
| 52 | * Free any allocated resources | ||
| 53 | */ | ||
| 54 | typedef void (*task_exit_t) (struct task_struct *); | ||
| 55 | |||
| 56 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 57 | /* Called when the current task attempts to create a new lock of a given | ||
| 58 | * protocol type. */ | ||
| 59 | typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, | ||
| 60 | void* __user config); | ||
| 61 | #endif | ||
| 62 | |||
| 63 | |||
| 64 | /********************* sys call backends ********************/ | ||
| 65 | /* This function causes the caller to sleep until the next release */ | ||
| 66 | typedef long (*complete_job_t) (void); | ||
| 67 | |||
| 68 | typedef long (*admit_task_t)(struct task_struct* tsk); | ||
| 69 | |||
| 70 | typedef void (*release_at_t)(struct task_struct *t, lt_t start); | ||
| 71 | |||
| 72 | struct sched_plugin { | ||
| 73 | struct list_head list; | ||
| 74 | /* basic info */ | ||
| 75 | char *plugin_name; | ||
| 76 | |||
| 77 | /* setup */ | ||
| 78 | activate_plugin_t activate_plugin; | ||
| 79 | deactivate_plugin_t deactivate_plugin; | ||
| 80 | |||
| 81 | /* scheduler invocation */ | ||
| 82 | scheduler_tick_t tick; | ||
| 83 | schedule_t schedule; | ||
| 84 | finish_switch_t finish_switch; | ||
| 85 | |||
| 86 | /* syscall backend */ | ||
| 87 | complete_job_t complete_job; | ||
| 88 | release_at_t release_at; | ||
| 89 | |||
| 90 | /* task state changes */ | ||
| 91 | admit_task_t admit_task; | ||
| 92 | |||
| 93 | task_new_t task_new; | ||
| 94 | task_wake_up_t task_wake_up; | ||
| 95 | task_block_t task_block; | ||
| 96 | task_exit_t task_exit; | ||
| 97 | |||
| 98 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 99 | /* locking protocols */ | ||
| 100 | allocate_lock_t allocate_lock; | ||
| 101 | #endif | ||
| 102 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); | ||
| 103 | |||
| 104 | |||
| 105 | extern struct sched_plugin *litmus; | ||
| 106 | |||
| 107 | int register_sched_plugin(struct sched_plugin* plugin); | ||
| 108 | struct sched_plugin* find_sched_plugin(const char* name); | ||
| 109 | int print_sched_plugins(char* buf, int max); | ||
| 110 | |||
| 111 | extern struct sched_plugin linux_sched_plugin; | ||
| 112 | |||
| 113 | #endif | ||
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h new file mode 100644 index 00000000000..fa7042f744c --- /dev/null +++ b/include/litmus/sched_trace.h | |||
| @@ -0,0 +1,342 @@ | |||
| 1 | /* | ||
| 2 | * sched_trace.h -- record scheduler events to a byte stream for offline analysis. | ||
| 3 | */ | ||
| 4 | #ifndef _LINUX_SCHED_TRACE_H_ | ||
| 5 | #define _LINUX_SCHED_TRACE_H_ | ||
| 6 | |||
| 7 | /* all times in nanoseconds */ | ||
| 8 | |||
| 9 | struct st_trace_header { | ||
| 10 | u8 type; /* Of what type is this record? */ | ||
| 11 | u8 cpu; /* On which CPU was it recorded? */ | ||
| 12 | u16 pid; /* PID of the task. */ | ||
| 13 | u32 job; /* The job sequence number. */ | ||
| 14 | }; | ||
| 15 | |||
| 16 | #define ST_NAME_LEN 16 | ||
| 17 | struct st_name_data { | ||
| 18 | char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ | ||
| 19 | }; | ||
| 20 | |||
| 21 | struct st_param_data { /* regular params */ | ||
| 22 | u32 wcet; | ||
| 23 | u32 period; | ||
| 24 | u32 phase; | ||
| 25 | u8 partition; | ||
| 26 | u8 class; | ||
| 27 | u8 __unused[2]; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct st_release_data { /* A job is was/is going to be released. */ | ||
| 31 | u64 release; /* What's the release time? */ | ||
| 32 | u64 deadline; /* By when must it finish? */ | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct st_assigned_data { /* A job was asigned to a CPU. */ | ||
| 36 | u64 when; | ||
| 37 | u8 target; /* Where should it execute? */ | ||
| 38 | u8 __unused[7]; | ||
| 39 | }; | ||
| 40 | |||
| 41 | struct st_switch_to_data { /* A process was switched to on a given CPU. */ | ||
| 42 | u64 when; /* When did this occur? */ | ||
| 43 | u32 exec_time; /* Time the current job has executed. */ | ||
| 44 | u8 __unused[4]; | ||
| 45 | |||
| 46 | }; | ||
| 47 | |||
| 48 | struct st_switch_away_data { /* A process was switched away from on a given CPU. */ | ||
| 49 | u64 when; | ||
| 50 | u64 exec_time; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct st_completion_data { /* A job completed. */ | ||
| 54 | u64 when; | ||
| 55 | u8 forced:1; /* Set to 1 if job overran and kernel advanced to the | ||
| 56 | * next task automatically; set to 0 otherwise. | ||
| 57 | */ | ||
| 58 | u8 __uflags:7; | ||
| 59 | u8 __unused[7]; | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct st_block_data { /* A task blocks. */ | ||
| 63 | u64 when; | ||
| 64 | u64 __unused; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct st_resume_data { /* A task resumes. */ | ||
| 68 | u64 when; | ||
| 69 | u64 __unused; | ||
| 70 | }; | ||
| 71 | |||
| 72 | struct st_action_data { | ||
| 73 | u64 when; | ||
| 74 | u8 action; | ||
| 75 | u8 __unused[7]; | ||
| 76 | }; | ||
| 77 | |||
| 78 | struct st_sys_release_data { | ||
| 79 | u64 when; | ||
| 80 | u64 release; | ||
| 81 | }; | ||
| 82 | |||
| 83 | struct st_task_exit_data { | ||
| 84 | u64 avg_exec_time; | ||
| 85 | u64 max_exec_time; | ||
| 86 | }; | ||
| 87 | |||
| 88 | struct st_task_tardy_data { | ||
| 89 | u64 total_tardy; | ||
| 90 | u32 max_tardy; | ||
| 91 | u32 missed; | ||
| 92 | }; | ||
| 93 | |||
| 94 | #define DATA(x) struct st_ ## x ## _data x; | ||
| 95 | |||
| 96 | typedef enum { | ||
| 97 | ST_NAME = 1, /* Start at one, so that we can spot | ||
| 98 | * uninitialized records. */ | ||
| 99 | ST_PARAM, | ||
| 100 | ST_RELEASE, | ||
| 101 | ST_ASSIGNED, | ||
| 102 | ST_SWITCH_TO, | ||
| 103 | ST_SWITCH_AWAY, | ||
| 104 | ST_COMPLETION, | ||
| 105 | ST_BLOCK, | ||
| 106 | ST_RESUME, | ||
| 107 | ST_ACTION, | ||
| 108 | ST_SYS_RELEASE, | ||
| 109 | ST_TASK_EXIT, | ||
| 110 | ST_TASK_TARDY, | ||
| 111 | } st_event_record_type_t; | ||
| 112 | |||
| 113 | struct st_event_record { | ||
| 114 | struct st_trace_header hdr; | ||
| 115 | union { | ||
| 116 | u64 raw[2]; | ||
| 117 | |||
| 118 | DATA(name); | ||
| 119 | DATA(param); | ||
| 120 | DATA(release); | ||
| 121 | DATA(assigned); | ||
| 122 | DATA(switch_to); | ||
| 123 | DATA(switch_away); | ||
| 124 | DATA(completion); | ||
| 125 | DATA(block); | ||
| 126 | DATA(resume); | ||
| 127 | DATA(action); | ||
| 128 | DATA(sys_release); | ||
| 129 | DATA(task_exit); | ||
| 130 | DATA(task_tardy); | ||
| 131 | } data; | ||
| 132 | }; | ||
| 133 | |||
| 134 | #undef DATA | ||
| 135 | |||
| 136 | #ifdef __KERNEL__ | ||
| 137 | |||
| 138 | #include <linux/sched.h> | ||
| 139 | #include <litmus/feather_trace.h> | ||
| 140 | |||
| 141 | #ifdef CONFIG_SCHED_TASK_TRACE | ||
| 142 | |||
| 143 | #define SCHED_TRACE(id, callback, task) \ | ||
| 144 | ft_event1(id, callback, task) | ||
| 145 | #define SCHED_TRACE2(id, callback, task, xtra) \ | ||
| 146 | ft_event2(id, callback, task, xtra) | ||
| 147 | |||
| 148 | /* provide prototypes; needed on sparc64 */ | ||
| 149 | #ifndef NO_TASK_TRACE_DECLS | ||
| 150 | feather_callback void do_sched_trace_task_name(unsigned long id, | ||
| 151 | struct task_struct* task); | ||
| 152 | feather_callback void do_sched_trace_task_param(unsigned long id, | ||
| 153 | struct task_struct* task); | ||
| 154 | feather_callback void do_sched_trace_task_release(unsigned long id, | ||
| 155 | struct task_struct* task); | ||
| 156 | feather_callback void do_sched_trace_task_switch_to(unsigned long id, | ||
| 157 | struct task_struct* task); | ||
| 158 | feather_callback void do_sched_trace_task_switch_away(unsigned long id, | ||
| 159 | struct task_struct* task); | ||
| 160 | feather_callback void do_sched_trace_task_completion(unsigned long id, | ||
| 161 | struct task_struct* task, | ||
| 162 | unsigned long forced); | ||
| 163 | feather_callback void do_sched_trace_task_block(unsigned long id, | ||
| 164 | struct task_struct* task); | ||
| 165 | feather_callback void do_sched_trace_task_resume(unsigned long id, | ||
| 166 | struct task_struct* task); | ||
| 167 | feather_callback void do_sched_trace_action(unsigned long id, | ||
| 168 | struct task_struct* task, | ||
| 169 | unsigned long action); | ||
| 170 | feather_callback void do_sched_trace_sys_release(unsigned long id, | ||
| 171 | lt_t* start); | ||
| 172 | feather_callback void do_sched_trace_task_exit(unsigned long id, | ||
| 173 | struct task_struct* task); | ||
| 174 | feather_callback void do_sched_trace_task_tardy(unsigned long id, | ||
| 175 | struct task_struct* task); | ||
| 176 | |||
| 177 | #endif | ||
| 178 | |||
| 179 | #else | ||
| 180 | |||
| 181 | #define SCHED_TRACE(id, callback, task) /* no tracing */ | ||
| 182 | #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ | ||
| 183 | |||
| 184 | #endif | ||
| 185 | |||
| 186 | #ifdef CONFIG_SCHED_LITMUS_TRACEPOINT | ||
| 187 | |||
| 188 | #include <trace/events/litmus.h> | ||
| 189 | |||
| 190 | #else | ||
| 191 | |||
| 192 | /* Override trace macros to actually do nothing */ | ||
| 193 | #define trace_litmus_task_param(t) | ||
| 194 | #define trace_litmus_task_release(t) | ||
| 195 | #define trace_litmus_switch_to(t) | ||
| 196 | #define trace_litmus_switch_away(prev) | ||
| 197 | #define trace_litmus_task_completion(t, forced) | ||
| 198 | #define trace_litmus_task_block(t) | ||
| 199 | #define trace_litmus_task_resume(t) | ||
| 200 | #define trace_litmus_sys_release(start) | ||
| 201 | #define trace_litmus_task_exit(t) | ||
| 202 | #define trace_litmus_task_tardy(t) | ||
| 203 | |||
| 204 | #define trace_litmus_container_param(cid, name) | ||
| 205 | #define trace_litmus_server_param(sid, cid, wcet, time) | ||
| 206 | #define trace_litmus_server_switch_to(sid, job, tid) | ||
| 207 | #define trace_litmus_server_switch_away(sid, job, tid) | ||
| 208 | #define trace_litmus_server_release(sid, job, release, deadline) | ||
| 209 | #define trace_litmus_server_completion(sid, job) | ||
| 210 | |||
| 211 | #define trace_litmus_container_param(cid, name) | ||
| 212 | #define trace_litmus_server_param(sid, cid, wcet, time) | ||
| 213 | #define trace_litmus_server_switch_to(sid, job, tid, tjob, cpu) | ||
| 214 | #define trace_litmus_server_switch_away(sid, job, tid, tjob, cpu) | ||
| 215 | #define trace_litmus_server_release(sid, job, release, deadline) | ||
| 216 | #define trace_litmus_server_completion(sid, job) | ||
| 217 | #define trace_litmus_server_block(sid) | ||
| 218 | #define trace_litmus_server_resume(sid) | ||
| 219 | |||
| 220 | #endif | ||
| 221 | |||
| 222 | |||
| 223 | #define SCHED_TRACE_BASE_ID 500 | ||
| 224 | |||
| 225 | |||
| 226 | #define sched_trace_task_name(t) \ | ||
| 227 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, \ | ||
| 228 | do_sched_trace_task_name, t) | ||
| 229 | |||
| 230 | #define sched_trace_task_param(t) \ | ||
| 231 | do { \ | ||
| 232 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, \ | ||
| 233 | do_sched_trace_task_param, t); \ | ||
| 234 | trace_litmus_task_param(t); \ | ||
| 235 | } while (0) | ||
| 236 | |||
| 237 | #define sched_trace_task_release(t) \ | ||
| 238 | do { \ | ||
| 239 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, \ | ||
| 240 | do_sched_trace_task_release, t); \ | ||
| 241 | trace_litmus_task_release(t); \ | ||
| 242 | } while (0) | ||
| 243 | |||
| 244 | #define sched_trace_task_switch_to(t) \ | ||
| 245 | do { \ | ||
| 246 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, \ | ||
| 247 | do_sched_trace_task_switch_to, t); \ | ||
| 248 | trace_litmus_switch_to(t); \ | ||
| 249 | } while (0) | ||
| 250 | |||
| 251 | #define sched_trace_task_switch_away(t) \ | ||
| 252 | do { \ | ||
| 253 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, \ | ||
| 254 | do_sched_trace_task_switch_away, t); \ | ||
| 255 | trace_litmus_switch_away(t); \ | ||
| 256 | } while (0) | ||
| 257 | |||
| 258 | #define sched_trace_task_completion(t, forced) \ | ||
| 259 | do { \ | ||
| 260 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, \ | ||
| 261 | do_sched_trace_task_completion, t, \ | ||
| 262 | (unsigned long) forced); \ | ||
| 263 | trace_litmus_task_completion(t, forced); \ | ||
| 264 | } while (0) | ||
| 265 | |||
| 266 | #define sched_trace_task_block_on(t, i) \ | ||
| 267 | do { \ | ||
| 268 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, \ | ||
| 269 | do_sched_trace_task_block, t); \ | ||
| 270 | trace_litmus_task_block(t, i); \ | ||
| 271 | } while (0) | ||
| 272 | |||
| 273 | #define sched_trace_task_block(t) \ | ||
| 274 | sched_trace_task_block_on(t, 0) | ||
| 275 | |||
| 276 | #define sched_trace_task_resume_on(t, i) \ | ||
| 277 | do { \ | ||
| 278 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, \ | ||
| 279 | do_sched_trace_task_resume, t); \ | ||
| 280 | trace_litmus_task_resume(t, i); \ | ||
| 281 | } while (0) | ||
| 282 | |||
| 283 | #define sched_trace_task_resume(t) \ | ||
| 284 | sched_trace_task_resume_on(t, 0) | ||
| 285 | |||
| 286 | #define sched_trace_resource_acquire(t, i) \ | ||
| 287 | do { \ | ||
| 288 | trace_litmus_resource_acquire(t, i); \ | ||
| 289 | } while (0) | ||
| 290 | |||
| 291 | #define sched_trace_resource_released(t, i) \ | ||
| 292 | do { \ | ||
| 293 | trace_litmus_resource_released(t, i); \ | ||
| 294 | } while (0) | ||
| 295 | |||
| 296 | #define sched_trace_action(t, action) \ | ||
| 297 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, \ | ||
| 298 | do_sched_trace_action, t, (unsigned long) action); | ||
| 299 | |||
| 300 | /* when is a pointer, it does not need an explicit cast to unsigned long */ | ||
| 301 | #define sched_trace_sys_release(when) \ | ||
| 302 | do { \ | ||
| 303 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, \ | ||
| 304 | do_sched_trace_sys_release, when); \ | ||
| 305 | trace_litmus_sys_release(when); \ | ||
| 306 | } while (0) | ||
| 307 | |||
| 308 | #define sched_trace_container_param(cid, name) \ | ||
| 309 | do { \ | ||
| 310 | trace_litmus_container_param(cid, name); \ | ||
| 311 | } while (0) | ||
| 312 | |||
| 313 | #define sched_trace_server_param(sid, cid, wcet, period) \ | ||
| 314 | do { \ | ||
| 315 | trace_litmus_server_param(sid, cid, wcet, period); \ | ||
| 316 | } while(0) | ||
| 317 | |||
| 318 | #define sched_trace_server_switch_to(sid, job, tid, tjob, cpu) \ | ||
| 319 | do { \ | ||
| 320 | trace_litmus_server_switch_to(sid, job, tid, tjob, cpu);\ | ||
| 321 | } while(0) | ||
| 322 | |||
| 323 | #define sched_trace_server_switch_away(sid, job, tid, tjob, cpu) \ | ||
| 324 | do { \ | ||
| 325 | trace_litmus_server_switch_away(sid, job, tid, tjob, cpu);\ | ||
| 326 | } while (0) | ||
| 327 | |||
| 328 | #define sched_trace_server_release(sid, job, release, deadline) \ | ||
| 329 | do { \ | ||
| 330 | trace_litmus_server_release(sid, job, release, deadline); \ | ||
| 331 | } while (0) | ||
| 332 | |||
| 333 | #define sched_trace_server_completion(sid, job) \ | ||
| 334 | do { \ | ||
| 335 | trace_litmus_server_completion(sid, job); \ | ||
| 336 | } while (0) | ||
| 337 | |||
| 338 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ | ||
| 339 | |||
| 340 | #endif /* __KERNEL__ */ | ||
| 341 | |||
| 342 | #endif | ||
diff --git a/include/litmus/srp.h b/include/litmus/srp.h new file mode 100644 index 00000000000..c9a4552b2bf --- /dev/null +++ b/include/litmus/srp.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | #ifndef LITMUS_SRP_H | ||
| 2 | #define LITMUS_SRP_H | ||
| 3 | |||
| 4 | struct srp_semaphore; | ||
| 5 | |||
| 6 | struct srp_priority { | ||
| 7 | struct list_head list; | ||
| 8 | unsigned int priority; | ||
| 9 | pid_t pid; | ||
| 10 | }; | ||
| 11 | #define list2prio(l) list_entry(l, struct srp_priority, list) | ||
| 12 | |||
| 13 | /* struct for uniprocessor SRP "semaphore" */ | ||
| 14 | struct srp_semaphore { | ||
| 15 | struct litmus_lock litmus_lock; | ||
| 16 | struct srp_priority ceiling; | ||
| 17 | struct task_struct* owner; | ||
| 18 | int cpu; /* cpu associated with this "semaphore" and resource */ | ||
| 19 | }; | ||
| 20 | |||
| 21 | /* map a task to its SRP preemption level priority */ | ||
| 22 | typedef unsigned int (*srp_prioritization_t)(struct task_struct* t); | ||
| 23 | /* Must be updated by each plugin that uses SRP.*/ | ||
| 24 | extern srp_prioritization_t get_srp_prio; | ||
| 25 | |||
| 26 | struct srp_semaphore* allocate_srp_semaphore(void); | ||
| 27 | |||
| 28 | #endif | ||
diff --git a/include/litmus/trace.h b/include/litmus/trace.h new file mode 100644 index 00000000000..8ad4966c602 --- /dev/null +++ b/include/litmus/trace.h | |||
| @@ -0,0 +1,145 @@ | |||
| 1 | #ifndef _SYS_TRACE_H_ | ||
| 2 | #define _SYS_TRACE_H_ | ||
| 3 | |||
| 4 | #ifdef CONFIG_SCHED_OVERHEAD_TRACE | ||
| 5 | |||
| 6 | |||
| 7 | #include <litmus/feather_trace.h> | ||
| 8 | #include <litmus/feather_buffer.h> | ||
| 9 | |||
| 10 | |||
| 11 | /*********************** TIMESTAMPS ************************/ | ||
| 12 | |||
| 13 | enum task_type_marker { | ||
| 14 | TSK_BE, | ||
| 15 | TSK_RT, | ||
| 16 | TSK_UNKNOWN | ||
| 17 | }; | ||
| 18 | |||
| 19 | struct timestamp { | ||
| 20 | uint64_t timestamp:48; | ||
| 21 | uint64_t pid:16; | ||
| 22 | uint32_t seq_no; | ||
| 23 | uint8_t cpu; | ||
| 24 | uint8_t event; | ||
| 25 | uint8_t task_type:2; | ||
| 26 | uint8_t irq_flag:1; | ||
| 27 | uint8_t irq_count:5; | ||
| 28 | }; | ||
| 29 | |||
| 30 | /* tracing callbacks */ | ||
| 31 | feather_callback void save_timestamp(unsigned long event); | ||
| 32 | feather_callback void save_timestamp_def(unsigned long event, unsigned long type); | ||
| 33 | feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr); | ||
| 34 | feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu); | ||
| 35 | feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr); | ||
| 36 | feather_callback void save_timestamp_time(unsigned long event, unsigned long time_ptr); | ||
| 37 | feather_callback void save_timestamp_irq(unsigned long event, unsigned long irq_count_ptr); | ||
| 38 | feather_callback void save_timestamp_hide_irq(unsigned long event); | ||
| 39 | |||
| 40 | #define TIMESTAMP(id) ft_event0(id, save_timestamp) | ||
| 41 | |||
| 42 | #define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def) | ||
| 43 | |||
| 44 | #define TIMESTAMP_CUR(id) DTIMESTAMP(id, is_realtime(current) ? TSK_RT : TSK_BE) | ||
| 45 | |||
| 46 | #define TTIMESTAMP(id, task) \ | ||
| 47 | ft_event1(id, save_timestamp_task, (unsigned long) task) | ||
| 48 | |||
| 49 | #define CTIMESTAMP(id, cpu) \ | ||
| 50 | ft_event1(id, save_timestamp_cpu, (unsigned long) cpu) | ||
| 51 | |||
| 52 | #define LTIMESTAMP(id, task) \ | ||
| 53 | ft_event1(id, save_task_latency, (unsigned long) task) | ||
| 54 | |||
| 55 | #define TIMESTAMP_TIME(id, time_ptr) \ | ||
| 56 | ft_event1(id, save_timestamp_time, (unsigned long) time_ptr) | ||
| 57 | |||
| 58 | #define TIMESTAMP_IRQ(id, irq_count_ptr) \ | ||
| 59 | ft_event1(id, save_timestamp_irq, (unsigned long) irq_count_ptr) | ||
| 60 | |||
| 61 | #define TIMESTAMP_IN_IRQ(id) \ | ||
| 62 | ft_event0(id, save_timestamp_hide_irq) | ||
| 63 | |||
| 64 | #else /* !CONFIG_SCHED_OVERHEAD_TRACE */ | ||
| 65 | |||
| 66 | #define TIMESTAMP(id) /* no tracing */ | ||
| 67 | |||
| 68 | #define DTIMESTAMP(id, def) /* no tracing */ | ||
| 69 | |||
| 70 | #define TIMESTAMP_CUR(id) /* no tracing */ | ||
| 71 | |||
| 72 | #define TTIMESTAMP(id, task) /* no tracing */ | ||
| 73 | |||
| 74 | #define CTIMESTAMP(id, cpu) /* no tracing */ | ||
| 75 | |||
| 76 | #define LTIMESTAMP(id, when_ptr) /* no tracing */ | ||
| 77 | |||
| 78 | #define TIMESTAMP_TIME(id, time_ptr) /* no tracing */ | ||
| 79 | |||
| 80 | #define TIMESTAMP_IRQ(id, irq_count_ptr) /* no tracing */ | ||
| 81 | |||
| 82 | #define TIMESTAMP_IN_IRQ(id) /* no tracing */ | ||
| 83 | |||
| 84 | #endif | ||
| 85 | |||
| 86 | |||
| 87 | /* Convention for timestamps | ||
| 88 | * ========================= | ||
| 89 | * | ||
| 90 | * In order to process the trace files with a common tool, we use the following | ||
| 91 | * convention to measure execution times: The end time id of a code segment is | ||
| 92 | * always the next number after the start time event id. | ||
| 93 | */ | ||
| 94 | |||
| 95 | #define __TS_SYSCALL_IN_START(p) TIMESTAMP_TIME(10, p) | ||
| 96 | #define __TS_SYSCALL_IN_END(p) TIMESTAMP_IRQ(11, p) | ||
| 97 | |||
| 98 | #define TS_SYSCALL_OUT_START TIMESTAMP_CUR(20) | ||
| 99 | #define TS_SYSCALL_OUT_END TIMESTAMP_CUR(21) | ||
| 100 | |||
| 101 | #define TS_LOCK_START TIMESTAMP_CUR(30) | ||
| 102 | #define TS_LOCK_END TIMESTAMP_CUR(31) | ||
| 103 | |||
| 104 | #define TS_LOCK_SUSPEND TIMESTAMP_CUR(38) | ||
| 105 | #define TS_LOCK_RESUME TIMESTAMP_CUR(39) | ||
| 106 | |||
| 107 | #define TS_UNLOCK_START TIMESTAMP_CUR(40) | ||
| 108 | #define TS_UNLOCK_END TIMESTAMP_CUR(41) | ||
| 109 | |||
| 110 | #define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only | ||
| 111 | * care | ||
| 112 | * about | ||
| 113 | * next */ | ||
| 114 | #define TS_SCHED_END(t) TTIMESTAMP(101, t) | ||
| 115 | #define TS_SCHED2_START(t) TTIMESTAMP(102, t) | ||
| 116 | #define TS_SCHED2_END(t) TTIMESTAMP(103, t) | ||
| 117 | |||
| 118 | #define TS_CXS_START(t) TTIMESTAMP(104, t) | ||
| 119 | #define TS_CXS_END(t) TTIMESTAMP(105, t) | ||
| 120 | |||
| 121 | #define TS_RELEASE_START DTIMESTAMP(106, TSK_RT) | ||
| 122 | #define TS_RELEASE_END DTIMESTAMP(107, TSK_RT) | ||
| 123 | |||
| 124 | #define TS_TICK_START(t) TTIMESTAMP(110, t) | ||
| 125 | #define TS_TICK_END(t) TTIMESTAMP(111, t) | ||
| 126 | |||
| 127 | |||
| 128 | #define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ | ||
| 129 | #define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ | ||
| 130 | |||
| 131 | #define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */ | ||
| 132 | #define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */ | ||
| 133 | |||
| 134 | #define TS_ENTER_NP_START TIMESTAMP(140) | ||
| 135 | #define TS_ENTER_NP_END TIMESTAMP(141) | ||
| 136 | |||
| 137 | #define TS_EXIT_NP_START TIMESTAMP(150) | ||
| 138 | #define TS_EXIT_NP_END TIMESTAMP(151) | ||
| 139 | |||
| 140 | #define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) | ||
| 141 | #define TS_SEND_RESCHED_END TIMESTAMP_IN_IRQ(191) | ||
| 142 | |||
| 143 | #define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) | ||
| 144 | |||
| 145 | #endif /* !_SYS_TRACE_H_ */ | ||
diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h new file mode 100644 index 00000000000..0d0c042ba9c --- /dev/null +++ b/include/litmus/trace_irq.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #ifndef _LITMUS_TRACE_IRQ_H_ | ||
| 2 | #define _LITMUS_TRACE_IRQ_H_ | ||
| 3 | |||
| 4 | #ifdef CONFIG_SCHED_OVERHEAD_TRACE | ||
| 5 | |||
| 6 | void ft_irq_fired(void); | ||
| 7 | |||
| 8 | #else | ||
| 9 | |||
| 10 | #define ft_irq_fired() /* nothing to do */ | ||
| 11 | |||
| 12 | #endif | ||
| 13 | |||
| 14 | #endif | ||
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h new file mode 100644 index 00000000000..04e453e8991 --- /dev/null +++ b/include/litmus/unistd_32.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* | ||
| 2 | * included from arch/x86/include/asm/unistd_32.h | ||
| 3 | * | ||
| 4 | * LITMUS^RT syscalls with "relative" numbers | ||
| 5 | */ | ||
| 6 | #define __LSC(x) (__NR_LITMUS + x) | ||
| 7 | |||
| 8 | #define __NR_set_rt_task_param __LSC(0) | ||
| 9 | #define __NR_get_rt_task_param __LSC(1) | ||
| 10 | #define __NR_complete_job __LSC(2) | ||
| 11 | #define __NR_od_open __LSC(3) | ||
| 12 | #define __NR_od_close __LSC(4) | ||
| 13 | #define __NR_litmus_lock __LSC(5) | ||
| 14 | #define __NR_litmus_unlock __LSC(6) | ||
| 15 | #define __NR_query_job_no __LSC(7) | ||
| 16 | #define __NR_wait_for_job_release __LSC(8) | ||
| 17 | #define __NR_wait_for_ts_release __LSC(9) | ||
| 18 | #define __NR_release_ts __LSC(10) | ||
| 19 | #define __NR_null_call __LSC(11) | ||
| 20 | #define __NR_dynamic_group_lock __LSC(12) | ||
| 21 | #define __NR_dynamic_group_unlock __LSC(13) | ||
| 22 | |||
| 23 | #define NR_litmus_syscalls 14 | ||
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h new file mode 100644 index 00000000000..ae55b488466 --- /dev/null +++ b/include/litmus/unistd_64.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | /* | ||
| 2 | * included from arch/x86/include/asm/unistd_64.h | ||
| 3 | * | ||
| 4 | * LITMUS^RT syscalls with "relative" numbers | ||
| 5 | */ | ||
| 6 | #define __LSC(x) (__NR_LITMUS + x) | ||
| 7 | |||
| 8 | #define __NR_set_rt_task_param __LSC(0) | ||
| 9 | __SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param) | ||
| 10 | #define __NR_get_rt_task_param __LSC(1) | ||
| 11 | __SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param) | ||
| 12 | #define __NR_complete_job __LSC(2) | ||
| 13 | __SYSCALL(__NR_complete_job, sys_complete_job) | ||
| 14 | #define __NR_od_open __LSC(3) | ||
| 15 | __SYSCALL(__NR_od_open, sys_od_open) | ||
| 16 | #define __NR_od_close __LSC(4) | ||
| 17 | __SYSCALL(__NR_od_close, sys_od_close) | ||
| 18 | #define __NR_litmus_lock __LSC(5) | ||
| 19 | __SYSCALL(__NR_litmus_lock, sys_litmus_lock) | ||
| 20 | #define __NR_litmus_unlock __LSC(6) | ||
| 21 | __SYSCALL(__NR_litmus_unlock, sys_litmus_unlock) | ||
| 22 | #define __NR_query_job_no __LSC(7) | ||
| 23 | __SYSCALL(__NR_query_job_no, sys_query_job_no) | ||
| 24 | #define __NR_wait_for_job_release __LSC(8) | ||
| 25 | __SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release) | ||
| 26 | #define __NR_wait_for_ts_release __LSC(9) | ||
| 27 | __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) | ||
| 28 | #define __NR_release_ts __LSC(10) | ||
| 29 | __SYSCALL(__NR_release_ts, sys_release_ts) | ||
| 30 | #define __NR_null_call __LSC(11) | ||
| 31 | __SYSCALL(__NR_null_call, sys_null_call) | ||
| 32 | #define __NR_dynamic_group_lock __LSC(12) | ||
| 33 | __SYSCALL(__NR_dynamic_group_lock, sys_dynamic_group_lock) | ||
| 34 | #define __NR_dynamic_group_unlock __LSC(13) | ||
| 35 | __SYSCALL(__NR_dynamic_group_unlock, sys_dynamic_group_unlock) | ||
| 36 | |||
| 37 | #define NR_litmus_syscalls 14 | ||
diff --git a/include/litmus/wait.h b/include/litmus/wait.h new file mode 100644 index 00000000000..7e20c0a4a1f --- /dev/null +++ b/include/litmus/wait.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | #ifndef _LITMUS_WAIT_H_ | ||
| 2 | #define _LITMUS_WAIT_H_ | ||
| 3 | |||
| 4 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); | ||
| 5 | struct task_struct* __waitqueue_peek_first(wait_queue_head_t *wq); | ||
| 6 | |||
| 7 | /* wrap regular wait_queue_t head */ | ||
| 8 | struct __prio_wait_queue { | ||
| 9 | wait_queue_t wq; | ||
| 10 | |||
| 11 | /* some priority point */ | ||
| 12 | lt_t priority; | ||
| 13 | /* break ties in priority by lower tie_breaker */ | ||
| 14 | unsigned int tie_breaker; | ||
| 15 | }; | ||
| 16 | |||
| 17 | typedef struct __prio_wait_queue prio_wait_queue_t; | ||
| 18 | |||
| 19 | static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq, | ||
| 20 | struct task_struct* t, | ||
| 21 | lt_t priority) | ||
| 22 | { | ||
| 23 | init_waitqueue_entry(&pwq->wq, t); | ||
| 24 | pwq->priority = priority; | ||
| 25 | pwq->tie_breaker = 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq, | ||
| 29 | struct task_struct* t, | ||
| 30 | lt_t priority, | ||
| 31 | unsigned int tie_breaker) | ||
| 32 | { | ||
| 33 | init_waitqueue_entry(&pwq->wq, t); | ||
| 34 | pwq->priority = priority; | ||
| 35 | pwq->tie_breaker = tie_breaker; | ||
| 36 | } | ||
| 37 | |||
| 38 | unsigned int __add_wait_queue_prio_exclusive( | ||
| 39 | wait_queue_head_t* head, | ||
| 40 | prio_wait_queue_t *new); | ||
| 41 | |||
| 42 | static inline unsigned int add_wait_queue_prio_exclusive( | ||
| 43 | wait_queue_head_t* head, | ||
| 44 | prio_wait_queue_t *new) | ||
| 45 | { | ||
| 46 | unsigned long flags; | ||
| 47 | unsigned int passed; | ||
| 48 | |||
| 49 | spin_lock_irqsave(&head->lock, flags); | ||
| 50 | passed = __add_wait_queue_prio_exclusive(head, new); | ||
| 51 | |||
| 52 | spin_unlock_irqrestore(&head->lock, flags); | ||
| 53 | |||
| 54 | return passed; | ||
| 55 | } | ||
| 56 | |||
| 57 | |||
| 58 | #endif | ||
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h new file mode 100644 index 00000000000..ed50bc809e8 --- /dev/null +++ b/include/trace/events/litmus.h | |||
| @@ -0,0 +1,423 @@ | |||
| 1 | /* | ||
| 2 | * LITMUS^RT kernel style scheduling tracepoints | ||
| 3 | */ | ||
| 4 | #undef TRACE_SYSTEM | ||
| 5 | #define TRACE_SYSTEM litmus | ||
| 6 | |||
| 7 | #if !defined(_SCHED_TASK_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 8 | #define _SCHED_TASK_TRACEPOINT_H | ||
| 9 | |||
| 10 | #include <linux/tracepoint.h> | ||
| 11 | |||
| 12 | #include <litmus/litmus.h> | ||
| 13 | #include <litmus/rt_param.h> | ||
| 14 | TRACE_EVENT(litmus_task_param, | ||
| 15 | |||
| 16 | TP_PROTO(struct task_struct *t), | ||
| 17 | |||
| 18 | TP_ARGS(t), | ||
| 19 | |||
| 20 | TP_STRUCT__entry( | ||
| 21 | __field( pid_t, pid ) | ||
| 22 | __field( unsigned int, job ) | ||
| 23 | __field( unsigned long long, wcet ) | ||
| 24 | __field( unsigned long long, period ) | ||
| 25 | __field( unsigned long long, phase ) | ||
| 26 | __field( int, partition ) | ||
| 27 | ), | ||
| 28 | |||
| 29 | TP_fast_assign( | ||
| 30 | __entry->pid = t ? t->pid : 0; | ||
| 31 | __entry->job = t ? t->rt_param.job_params.job_no : 0; | ||
| 32 | __entry->wcet = get_exec_cost(t); | ||
| 33 | __entry->period = get_rt_period(t); | ||
| 34 | __entry->phase = get_rt_phase(t); | ||
| 35 | __entry->partition = get_partition(t); | ||
| 36 | ), | ||
| 37 | |||
| 38 | TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n", | ||
| 39 | __entry->pid, __entry->period, | ||
| 40 | __entry->pid, __entry->wcet) | ||
| 41 | ); | ||
| 42 | |||
| 43 | /* | ||
| 44 | * Tracing jobs release | ||
| 45 | */ | ||
| 46 | TRACE_EVENT(litmus_task_release, | ||
| 47 | |||
| 48 | TP_PROTO(struct task_struct *t), | ||
| 49 | |||
| 50 | TP_ARGS(t), | ||
| 51 | |||
| 52 | TP_STRUCT__entry( | ||
| 53 | __field( pid_t, pid ) | ||
| 54 | __field( unsigned int, job ) | ||
| 55 | __field( unsigned long long, release ) | ||
| 56 | __field( unsigned long long, deadline ) | ||
| 57 | ), | ||
| 58 | |||
| 59 | TP_fast_assign( | ||
| 60 | __entry->pid = t ? t->pid : 0; | ||
| 61 | __entry->job = t ? t->rt_param.job_params.job_no : 0; | ||
| 62 | __entry->release = get_release(t); | ||
| 63 | __entry->deadline = get_deadline(t); | ||
| 64 | ), | ||
| 65 | |||
| 66 | TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n", | ||
| 67 | __entry->pid, __entry->job, __entry->release, | ||
| 68 | __entry->pid, __entry->job, __entry->deadline) | ||
| 69 | ); | ||
| 70 | |||
| 71 | /* | ||
| 72 | * Tracepoint for switching to new task | ||
| 73 | */ | ||
| 74 | TRACE_EVENT(litmus_switch_to, | ||
| 75 | |||
| 76 | TP_PROTO(struct task_struct *t), | ||
| 77 | |||
| 78 | TP_ARGS(t), | ||
| 79 | |||
| 80 | TP_STRUCT__entry( | ||
| 81 | __field( pid_t, pid ) | ||
| 82 | __field( unsigned int, job ) | ||
| 83 | __field( unsigned long long, when ) | ||
| 84 | __field( unsigned long long, exec_time ) | ||
| 85 | ), | ||
| 86 | |||
| 87 | TP_fast_assign( | ||
| 88 | __entry->pid = is_realtime(t) ? t->pid : 0; | ||
| 89 | __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0; | ||
| 90 | __entry->when = litmus_clock(); | ||
| 91 | __entry->exec_time = get_exec_time(t); | ||
| 92 | ), | ||
| 93 | |||
| 94 | TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n", | ||
| 95 | __entry->pid, __entry->job, | ||
| 96 | __entry->when, __entry->exec_time) | ||
| 97 | ); | ||
| 98 | |||
| 99 | /* | ||
| 100 | * Tracepoint for switching away previous task | ||
| 101 | */ | ||
| 102 | TRACE_EVENT(litmus_switch_away, | ||
| 103 | |||
| 104 | TP_PROTO(struct task_struct *t), | ||
| 105 | |||
| 106 | TP_ARGS(t), | ||
| 107 | |||
| 108 | TP_STRUCT__entry( | ||
| 109 | __field( pid_t, pid ) | ||
| 110 | __field( unsigned int, job ) | ||
| 111 | __field( unsigned long long, when ) | ||
| 112 | __field( unsigned long long, exec_time ) | ||
| 113 | ), | ||
| 114 | |||
| 115 | TP_fast_assign( | ||
| 116 | __entry->pid = is_realtime(t) ? t->pid : 0; | ||
| 117 | __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0; | ||
| 118 | __entry->when = litmus_clock(); | ||
| 119 | __entry->exec_time = get_exec_time(t); | ||
| 120 | ), | ||
| 121 | |||
| 122 | TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n", | ||
| 123 | __entry->pid, __entry->job, | ||
| 124 | __entry->when, __entry->exec_time) | ||
| 125 | ); | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Tracing jobs completion | ||
| 129 | */ | ||
| 130 | TRACE_EVENT(litmus_task_completion, | ||
| 131 | |||
| 132 | TP_PROTO(struct task_struct *t, unsigned long forced), | ||
| 133 | |||
| 134 | TP_ARGS(t, forced), | ||
| 135 | |||
| 136 | TP_STRUCT__entry( | ||
| 137 | __field( pid_t, pid ) | ||
| 138 | __field( unsigned int, job ) | ||
| 139 | __field( unsigned long long, when ) | ||
| 140 | __field( unsigned long, forced ) | ||
| 141 | ), | ||
| 142 | |||
| 143 | TP_fast_assign( | ||
| 144 | __entry->pid = t ? t->pid : 0; | ||
| 145 | __entry->job = t ? t->rt_param.job_params.job_no : 0; | ||
| 146 | __entry->when = litmus_clock(); | ||
| 147 | __entry->forced = forced; | ||
| 148 | ), | ||
| 149 | |||
| 150 | TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n", | ||
| 151 | __entry->pid, __entry->job, | ||
| 152 | __entry->when, __entry->forced) | ||
| 153 | ); | ||
| 154 | |||
| 155 | /* | ||
| 156 | * Trace blocking tasks. | ||
| 157 | */ | ||
| 158 | TRACE_EVENT(litmus_task_block, | ||
| 159 | |||
| 160 | TP_PROTO(struct task_struct *t, int lid), | ||
| 161 | |||
| 162 | TP_ARGS(t, lid), | ||
| 163 | |||
| 164 | TP_STRUCT__entry( | ||
| 165 | __field( pid_t, pid ) | ||
| 166 | __field( int, lid ) | ||
| 167 | __field( unsigned long long, when ) | ||
| 168 | ), | ||
| 169 | |||
| 170 | TP_fast_assign( | ||
| 171 | __entry->pid = t ? t->pid : 0; | ||
| 172 | __entry->lid = lid; | ||
| 173 | __entry->when = litmus_clock(); | ||
| 174 | ), | ||
| 175 | |||
| 176 | TP_printk("(%u) blocks on %d: %Lu\n", __entry->pid, | ||
| 177 | __entry->lid, __entry->when) | ||
| 178 | ); | ||
| 179 | |||
| 180 | /* | ||
| 181 | * Lock events | ||
| 182 | */ | ||
| 183 | TRACE_EVENT(litmus_resource_acquire, | ||
| 184 | |||
| 185 | TP_PROTO(struct task_struct *t, int lid), | ||
| 186 | |||
| 187 | TP_ARGS(t, lid), | ||
| 188 | |||
| 189 | TP_STRUCT__entry( | ||
| 190 | __field( pid_t, pid ) | ||
| 191 | __field( int, lid ) | ||
| 192 | __field( unsigned long long, when ) | ||
| 193 | ), | ||
| 194 | |||
| 195 | TP_fast_assign( | ||
| 196 | __entry->pid = t ? t->pid : 0; | ||
| 197 | __entry->lid = lid; | ||
| 198 | __entry->when = litmus_clock(); | ||
| 199 | ), | ||
| 200 | |||
| 201 | TP_printk("(%u) acquires %d: %Lu\n", __entry->pid, | ||
| 202 | __entry->lid, __entry->when) | ||
| 203 | ); | ||
| 204 | |||
| 205 | TRACE_EVENT(litmus_resource_release, | ||
| 206 | |||
| 207 | TP_PROTO(struct task_struct *t, int lid), | ||
| 208 | |||
| 209 | TP_ARGS(t, lid), | ||
| 210 | |||
| 211 | TP_STRUCT__entry( | ||
| 212 | __field( pid_t, pid ) | ||
| 213 | __field( int, lid ) | ||
| 214 | __field( unsigned long long, when ) | ||
| 215 | ), | ||
| 216 | |||
| 217 | TP_fast_assign( | ||
| 218 | __entry->pid = t ? t->pid : 0; | ||
| 219 | __entry->lid = lid; | ||
| 220 | __entry->when = litmus_clock(); | ||
| 221 | ), | ||
| 222 | |||
| 223 | TP_printk("(%u) releases %d: %Lu\n", __entry->pid, | ||
| 224 | __entry->lid, __entry->when) | ||
| 225 | ); | ||
| 226 | |||
| 227 | /* | ||
| 228 | * Tracing jobs resume | ||
| 229 | */ | ||
| 230 | TRACE_EVENT(litmus_task_resume, | ||
| 231 | |||
| 232 | TP_PROTO(struct task_struct *t, int lid), | ||
| 233 | |||
| 234 | TP_ARGS(t, lid), | ||
| 235 | |||
| 236 | TP_STRUCT__entry( | ||
| 237 | __field( pid_t, pid ) | ||
| 238 | __field( int, lid ) | ||
| 239 | __field( unsigned int, job ) | ||
| 240 | __field( unsigned long long, when ) | ||
| 241 | ), | ||
| 242 | |||
| 243 | TP_fast_assign( | ||
| 244 | __entry->pid = t ? t->pid : 0; | ||
| 245 | __entry->job = t ? t->rt_param.job_params.job_no : 0; | ||
| 246 | __entry->when = litmus_clock(); | ||
| 247 | __entry->lid = lid; | ||
| 248 | ), | ||
| 249 | |||
| 250 | TP_printk("resume(job(%u, %u)) on %d: %Lu\n", | ||
| 251 | __entry->pid, __entry->job, | ||
| 252 | __entry->lid, __entry->when) | ||
| 253 | ); | ||
| 254 | |||
| 255 | /* | ||
| 256 | * Trace synchronous release | ||
| 257 | */ | ||
| 258 | TRACE_EVENT(litmus_sys_release, | ||
| 259 | |||
| 260 | TP_PROTO(unsigned long long *start), | ||
| 261 | |||
| 262 | TP_ARGS(start), | ||
| 263 | |||
| 264 | TP_STRUCT__entry( | ||
| 265 | __field( unsigned long long, rel ) | ||
| 266 | __field( unsigned long long, when ) | ||
| 267 | ), | ||
| 268 | |||
| 269 | TP_fast_assign( | ||
| 270 | __entry->rel = *start; | ||
| 271 | __entry->when = litmus_clock(); | ||
| 272 | ), | ||
| 273 | |||
| 274 | TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when) | ||
| 275 | ); | ||
| 276 | |||
| 277 | /* | ||
| 278 | * Containers | ||
| 279 | */ | ||
| 280 | TRACE_EVENT(litmus_container_param, | ||
| 281 | |||
| 282 | TP_PROTO(int cid, const char *name), | ||
| 283 | |||
| 284 | TP_ARGS(cid, name), | ||
| 285 | |||
| 286 | TP_STRUCT__entry( | ||
| 287 | __field( int, cid ) | ||
| 288 | __array( char, name, TASK_COMM_LEN ) | ||
| 289 | ), | ||
| 290 | |||
| 291 | TP_fast_assign( | ||
| 292 | memcpy(__entry->name, name, TASK_COMM_LEN); | ||
| 293 | __entry->cid = cid; | ||
| 294 | ), | ||
| 295 | |||
| 296 | TP_printk("container, name: %s, id: %d\n", __entry->name, __entry->cid) | ||
| 297 | ); | ||
| 298 | |||
| 299 | TRACE_EVENT(litmus_server_param, | ||
| 300 | |||
| 301 | TP_PROTO(int sid, int cid, unsigned long long wcet, unsigned long long period), | ||
| 302 | |||
| 303 | TP_ARGS(sid, cid, wcet, period), | ||
| 304 | |||
| 305 | TP_STRUCT__entry( | ||
| 306 | __field( int, sid ) | ||
| 307 | __field( int, cid ) | ||
| 308 | __field( unsigned long long, wcet ) | ||
| 309 | __field( unsigned long long, period ) | ||
| 310 | ), | ||
| 311 | |||
| 312 | TP_fast_assign( | ||
| 313 | __entry->cid = cid; | ||
| 314 | __entry->sid = sid; | ||
| 315 | __entry->wcet = wcet; | ||
| 316 | __entry->period = period; | ||
| 317 | ), | ||
| 318 | |||
| 319 | TP_printk("server(%llu, %llu), sid: %llu, cont: %llu\n", | ||
| 320 | __entry->wcet, __entry->period, __entry->sid, __entry->cid) | ||
| 321 | ); | ||
| 322 | |||
| 323 | TRACE_EVENT(litmus_server_switch_to, | ||
| 324 | |||
| 325 | TP_PROTO(int sid, unsigned int job, int tid, unsigned int tjob, int cpu), | ||
| 326 | |||
| 327 | TP_ARGS(sid, job, tid, tjob, cpu), | ||
| 328 | |||
| 329 | TP_STRUCT__entry( | ||
| 330 | __field( int, sid) | ||
| 331 | __field( unsigned int, job) | ||
| 332 | __field( int, tid) | ||
| 333 | __field( unsigned int, tjob) | ||
| 334 | __field( int, cpu) | ||
| 335 | ), | ||
| 336 | |||
| 337 | TP_fast_assign( | ||
| 338 | __entry->sid = sid; | ||
| 339 | __entry->tid = tid; | ||
| 340 | __entry->job = job; | ||
| 341 | __entry->tjob = tjob; | ||
| 342 | __entry->cpu = cpu; | ||
| 343 | ), | ||
| 344 | |||
| 345 | TP_printk("switch_to(server(%d, %u)): (%d, %d) on %d\n", | ||
| 346 | __entry->sid, __entry->job, __entry->tid, __entry->tjob, __entry->cpu) | ||
| 347 | ); | ||
| 348 | |||
| 349 | TRACE_EVENT(litmus_server_switch_away, | ||
| 350 | |||
| 351 | TP_PROTO(int sid, unsigned int job, int tid, unsigned int tjob, int cpu), | ||
| 352 | |||
| 353 | TP_ARGS(sid, job, tid, tjob, cpu), | ||
| 354 | |||
| 355 | TP_STRUCT__entry( | ||
| 356 | __field( int, sid) | ||
| 357 | __field( unsigned int, job) | ||
| 358 | __field( int, tid) | ||
| 359 | __field( unsigned int, tjob) | ||
| 360 | __field( int, cpu) | ||
| 361 | ), | ||
| 362 | |||
| 363 | TP_fast_assign( | ||
| 364 | __entry->sid = sid; | ||
| 365 | __entry->tid = tid; | ||
| 366 | __entry->job = job; | ||
| 367 | __entry->tjob = tjob; | ||
| 368 | __entry->cpu = cpu; | ||
| 369 | ), | ||
| 370 | |||
| 371 | TP_printk("switch_away(server(%d, %u)): (%d, %d) on %d\n", | ||
| 372 | __entry->sid, __entry->job, __entry->tid, __entry->tjob, __entry->cpu) | ||
| 373 | ); | ||
| 374 | |||
| 375 | TRACE_EVENT(litmus_server_release, | ||
| 376 | |||
| 377 | TP_PROTO(int sid, unsigned int job, | ||
| 378 | unsigned long long release, | ||
| 379 | unsigned long long deadline), | ||
| 380 | |||
| 381 | TP_ARGS(sid, job, release, deadline), | ||
| 382 | |||
| 383 | TP_STRUCT__entry( | ||
| 384 | __field( int, sid) | ||
| 385 | __field( unsigned int, job) | ||
| 386 | __field( unsigned long long, release) | ||
| 387 | __field( unsigned long long, deadline) | ||
| 388 | ), | ||
| 389 | |||
| 390 | TP_fast_assign( | ||
| 391 | __entry->sid = sid; | ||
| 392 | __entry->job = job; | ||
| 393 | __entry->release = release; | ||
| 394 | __entry->deadline = deadline; | ||
| 395 | ), | ||
| 396 | |||
| 397 | TP_printk("release(server(%d, %u)), release: %llu, deadline: %llu\n", | ||
| 398 | __entry->sid, __entry->job, __entry->release, __entry->deadline) | ||
| 399 | ); | ||
| 400 | |||
| 401 | TRACE_EVENT(litmus_server_completion, | ||
| 402 | |||
| 403 | TP_PROTO(int sid, int job), | ||
| 404 | |||
| 405 | TP_ARGS(sid, job), | ||
| 406 | |||
| 407 | TP_STRUCT__entry( | ||
| 408 | __field( int, sid) | ||
| 409 | __field( unsigned int, job) | ||
| 410 | ), | ||
| 411 | |||
| 412 | TP_fast_assign( | ||
| 413 | __entry->sid = sid; | ||
| 414 | __entry->job = job; | ||
| 415 | ), | ||
| 416 | |||
| 417 | TP_printk("completion(server(%d, %d))\n", __entry->sid, __entry->job) | ||
| 418 | ); | ||
| 419 | |||
| 420 | #endif /* _SCHED_TASK_TRACEPOINT_H */ | ||
| 421 | |||
| 422 | /* Must stay outside the protection */ | ||
| 423 | #include <trace/define_trace.h> | ||
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 533c49f4804..4d6f3474e8f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/ftrace_event.h> | 19 | #include <linux/ftrace_event.h> |
| 20 | #include <litmus/litmus.h> | ||
| 20 | 21 | ||
| 21 | /* | 22 | /* |
| 22 | * DECLARE_EVENT_CLASS can be used to add a generic function | 23 | * DECLARE_EVENT_CLASS can be used to add a generic function |
| @@ -54,7 +55,7 @@ | |||
| 54 | #define __string(item, src) __dynamic_array(char, item, -1) | 55 | #define __string(item, src) __dynamic_array(char, item, -1) |
| 55 | 56 | ||
| 56 | #undef TP_STRUCT__entry | 57 | #undef TP_STRUCT__entry |
| 57 | #define TP_STRUCT__entry(args...) args | 58 | #define TP_STRUCT__entry(args...) args __field( unsigned long long, __rt_ts ) |
| 58 | 59 | ||
| 59 | #undef DECLARE_EVENT_CLASS | 60 | #undef DECLARE_EVENT_CLASS |
| 60 | #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ | 61 | #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ |
| @@ -507,7 +508,7 @@ static inline notrace int ftrace_get_offsets_##call( \ | |||
| 507 | strcpy(__get_str(dst), src); | 508 | strcpy(__get_str(dst), src); |
| 508 | 509 | ||
| 509 | #undef TP_fast_assign | 510 | #undef TP_fast_assign |
| 510 | #define TP_fast_assign(args...) args | 511 | #define TP_fast_assign(args...) args; __entry->__rt_ts = litmus_clock(); |
| 511 | 512 | ||
| 512 | #undef TP_perf_assign | 513 | #undef TP_perf_assign |
| 513 | #define TP_perf_assign(args...) | 514 | #define TP_perf_assign(args...) |
diff --git a/kernel/exit.c b/kernel/exit.c index 9e316ae4984..9d13da8a8c2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -57,6 +57,8 @@ | |||
| 57 | #include <asm/pgtable.h> | 57 | #include <asm/pgtable.h> |
| 58 | #include <asm/mmu_context.h> | 58 | #include <asm/mmu_context.h> |
| 59 | 59 | ||
| 60 | extern void exit_od_table(struct task_struct *t); | ||
| 61 | |||
| 60 | static void exit_mm(struct task_struct * tsk); | 62 | static void exit_mm(struct task_struct * tsk); |
| 61 | 63 | ||
| 62 | static void __unhash_process(struct task_struct *p, bool group_dead) | 64 | static void __unhash_process(struct task_struct *p, bool group_dead) |
| @@ -970,6 +972,8 @@ NORET_TYPE void do_exit(long code) | |||
| 970 | if (unlikely(tsk->audit_context)) | 972 | if (unlikely(tsk->audit_context)) |
| 971 | audit_free(tsk); | 973 | audit_free(tsk); |
| 972 | 974 | ||
| 975 | exit_od_table(tsk); | ||
| 976 | |||
| 973 | tsk->exit_code = code; | 977 | tsk->exit_code = code; |
| 974 | taskstats_exit(tsk, group_dead); | 978 | taskstats_exit(tsk, group_dead); |
| 975 | 979 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index f65fa0627c0..067992d4838 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -76,6 +76,9 @@ | |||
| 76 | 76 | ||
| 77 | #include <trace/events/sched.h> | 77 | #include <trace/events/sched.h> |
| 78 | 78 | ||
| 79 | #include <litmus/litmus.h> | ||
| 80 | #include <litmus/sched_plugin.h> | ||
| 81 | |||
| 79 | /* | 82 | /* |
| 80 | * Protected counters by write_lock_irq(&tasklist_lock) | 83 | * Protected counters by write_lock_irq(&tasklist_lock) |
| 81 | */ | 84 | */ |
| @@ -205,6 +208,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
| 205 | WARN_ON(atomic_read(&tsk->usage)); | 208 | WARN_ON(atomic_read(&tsk->usage)); |
| 206 | WARN_ON(tsk == current); | 209 | WARN_ON(tsk == current); |
| 207 | 210 | ||
| 211 | exit_litmus(tsk); | ||
| 208 | exit_creds(tsk); | 212 | exit_creds(tsk); |
| 209 | delayacct_tsk_free(tsk); | 213 | delayacct_tsk_free(tsk); |
| 210 | put_signal_struct(tsk->signal); | 214 | put_signal_struct(tsk->signal); |
| @@ -290,6 +294,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 290 | 294 | ||
| 291 | tsk->stack = ti; | 295 | tsk->stack = ti; |
| 292 | 296 | ||
| 297 | /* Don't let the new task be a real-time task. */ | ||
| 298 | litmus_fork(tsk); | ||
| 299 | |||
| 293 | err = prop_local_init_single(&tsk->dirties); | 300 | err = prop_local_init_single(&tsk->dirties); |
| 294 | if (err) | 301 | if (err) |
| 295 | goto out; | 302 | goto out; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2043c08d36c..2391745f656 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -46,6 +46,8 @@ | |||
| 46 | #include <linux/sched.h> | 46 | #include <linux/sched.h> |
| 47 | #include <linux/timer.h> | 47 | #include <linux/timer.h> |
| 48 | 48 | ||
| 49 | #include <litmus/litmus.h> | ||
| 50 | |||
| 49 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
| 50 | 52 | ||
| 51 | #include <trace/events/timer.h> | 53 | #include <trace/events/timer.h> |
| @@ -1028,6 +1030,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
| 1028 | } | 1030 | } |
| 1029 | EXPORT_SYMBOL_GPL(hrtimer_start); | 1031 | EXPORT_SYMBOL_GPL(hrtimer_start); |
| 1030 | 1032 | ||
| 1033 | #ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS | ||
| 1034 | |||
| 1035 | /** | ||
| 1036 | * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info | ||
| 1037 | */ | ||
| 1038 | void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info) | ||
| 1039 | { | ||
| 1040 | memset(info, 0, sizeof(struct hrtimer_start_on_info)); | ||
| 1041 | atomic_set(&info->state, HRTIMER_START_ON_INACTIVE); | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | /** | ||
| 1045 | * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu | ||
| 1046 | */ | ||
| 1047 | void hrtimer_pull(void) | ||
| 1048 | { | ||
| 1049 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | ||
| 1050 | struct hrtimer_start_on_info *info; | ||
| 1051 | struct list_head *pos, *safe, list; | ||
| 1052 | |||
| 1053 | raw_spin_lock(&base->lock); | ||
| 1054 | list_replace_init(&base->to_pull, &list); | ||
| 1055 | raw_spin_unlock(&base->lock); | ||
| 1056 | |||
| 1057 | list_for_each_safe(pos, safe, &list) { | ||
| 1058 | info = list_entry(pos, struct hrtimer_start_on_info, list); | ||
| 1059 | TRACE("pulled timer 0x%x\n", info->timer); | ||
| 1060 | list_del(pos); | ||
| 1061 | hrtimer_start(info->timer, info->time, info->mode); | ||
| 1062 | } | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | /** | ||
| 1066 | * hrtimer_start_on - trigger timer arming on remote cpu | ||
| 1067 | * @cpu: remote cpu | ||
| 1068 | * @info: save timer information for enqueuing on remote cpu | ||
| 1069 | * @timer: timer to be pulled | ||
| 1070 | * @time: expire time | ||
| 1071 | * @mode: timer mode | ||
| 1072 | */ | ||
| 1073 | int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info, | ||
| 1074 | struct hrtimer *timer, ktime_t time, | ||
| 1075 | const enum hrtimer_mode mode) | ||
| 1076 | { | ||
| 1077 | unsigned long flags; | ||
| 1078 | struct hrtimer_cpu_base* base; | ||
| 1079 | int in_use = 0, was_empty; | ||
| 1080 | |||
| 1081 | /* serialize access to info through the timer base */ | ||
| 1082 | lock_hrtimer_base(timer, &flags); | ||
| 1083 | |||
| 1084 | in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE); | ||
| 1085 | if (!in_use) { | ||
| 1086 | INIT_LIST_HEAD(&info->list); | ||
| 1087 | info->timer = timer; | ||
| 1088 | info->time = time; | ||
| 1089 | info->mode = mode; | ||
| 1090 | /* mark as in use */ | ||
| 1091 | atomic_set(&info->state, HRTIMER_START_ON_QUEUED); | ||
| 1092 | } | ||
| 1093 | |||
| 1094 | unlock_hrtimer_base(timer, &flags); | ||
| 1095 | |||
| 1096 | if (!in_use) { | ||
| 1097 | /* initiate pull */ | ||
| 1098 | preempt_disable(); | ||
| 1099 | if (cpu == smp_processor_id()) { | ||
| 1100 | /* start timer locally; we may get called | ||
| 1101 | * with rq->lock held, do not wake up anything | ||
| 1102 | */ | ||
| 1103 | TRACE("hrtimer_start_on: starting on local CPU\n"); | ||
| 1104 | __hrtimer_start_range_ns(info->timer, info->time, | ||
| 1105 | 0, info->mode, 0); | ||
| 1106 | } else { | ||
| 1107 | TRACE("hrtimer_start_on: pulling to remote CPU\n"); | ||
| 1108 | base = &per_cpu(hrtimer_bases, cpu); | ||
| 1109 | raw_spin_lock_irqsave(&base->lock, flags); | ||
| 1110 | was_empty = list_empty(&base->to_pull); | ||
| 1111 | list_add(&info->list, &base->to_pull); | ||
| 1112 | raw_spin_unlock_irqrestore(&base->lock, flags); | ||
| 1113 | if (was_empty) | ||
| 1114 | /* only send IPI if other no else | ||
| 1115 | * has done so already | ||
| 1116 | */ | ||
| 1117 | smp_send_pull_timers(cpu); | ||
| 1118 | } | ||
| 1119 | preempt_enable(); | ||
| 1120 | } | ||
| 1121 | return in_use; | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | #endif | ||
| 1031 | 1125 | ||
| 1032 | /** | 1126 | /** |
| 1033 | * hrtimer_try_to_cancel - try to deactivate a timer | 1127 | * hrtimer_try_to_cancel - try to deactivate a timer |
| @@ -1627,6 +1721,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) | |||
| 1627 | } | 1721 | } |
| 1628 | 1722 | ||
| 1629 | hrtimer_init_hres(cpu_base); | 1723 | hrtimer_init_hres(cpu_base); |
| 1724 | INIT_LIST_HEAD(&cpu_base->to_pull); | ||
| 1630 | } | 1725 | } |
| 1631 | 1726 | ||
| 1632 | #ifdef CONFIG_HOTPLUG_CPU | 1727 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/kernel/printk.c b/kernel/printk.c index 1baace7d867..cbebc142be1 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -74,6 +74,13 @@ int console_printk[4] = { | |||
| 74 | }; | 74 | }; |
| 75 | 75 | ||
| 76 | /* | 76 | /* |
| 77 | * divert printk() messages when there is a LITMUS^RT debug listener | ||
| 78 | */ | ||
| 79 | #include <litmus/litmus.h> | ||
| 80 | int trace_override = 0; | ||
| 81 | int trace_recurse = 0; | ||
| 82 | |||
| 83 | /* | ||
| 77 | * Low level drivers may need that to know if they can schedule in | 84 | * Low level drivers may need that to know if they can schedule in |
| 78 | * their unblank() callback or not. So let's export it. | 85 | * their unblank() callback or not. So let's export it. |
| 79 | */ | 86 | */ |
| @@ -926,6 +933,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
| 926 | /* Emit the output into the temporary buffer */ | 933 | /* Emit the output into the temporary buffer */ |
| 927 | printed_len += vscnprintf(printk_buf + printed_len, | 934 | printed_len += vscnprintf(printk_buf + printed_len, |
| 928 | sizeof(printk_buf) - printed_len, fmt, args); | 935 | sizeof(printk_buf) - printed_len, fmt, args); |
| 936 | /* if LITMUS^RT tracer is active divert printk() msgs */ | ||
| 937 | if (trace_override && !trace_recurse) | ||
| 938 | TRACE("%s", printk_buf); | ||
| 929 | 939 | ||
| 930 | #ifdef CONFIG_DEBUG_LL | 940 | #ifdef CONFIG_DEBUG_LL |
| 931 | printascii(printk_buf); | 941 | printascii(printk_buf); |
| @@ -1006,7 +1016,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
| 1006 | * Try to acquire and then immediately release the | 1016 | * Try to acquire and then immediately release the |
| 1007 | * console semaphore. The release will do all the | 1017 | * console semaphore. The release will do all the |
| 1008 | * actual magic (print out buffers, wake up klogd, | 1018 | * actual magic (print out buffers, wake up klogd, |
| 1009 | * etc). | 1019 | * etc). |
| 1010 | * | 1020 | * |
| 1011 | * The console_trylock_for_printk() function | 1021 | * The console_trylock_for_printk() function |
| 1012 | * will release 'logbuf_lock' regardless of whether it | 1022 | * will release 'logbuf_lock' regardless of whether it |
| @@ -1278,7 +1288,7 @@ int printk_needs_cpu(int cpu) | |||
| 1278 | 1288 | ||
| 1279 | void wake_up_klogd(void) | 1289 | void wake_up_klogd(void) |
| 1280 | { | 1290 | { |
| 1281 | if (waitqueue_active(&log_wait)) | 1291 | if (!trace_override && waitqueue_active(&log_wait)) |
| 1282 | this_cpu_write(printk_pending, 1); | 1292 | this_cpu_write(printk_pending, 1); |
| 1283 | } | 1293 | } |
| 1284 | 1294 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index f6cf5cbc64b..a1bf2646d12 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -87,6 +87,11 @@ | |||
| 87 | #define CREATE_TRACE_POINTS | 87 | #define CREATE_TRACE_POINTS |
| 88 | #include <trace/events/sched.h> | 88 | #include <trace/events/sched.h> |
| 89 | 89 | ||
| 90 | #include <litmus/sched_trace.h> | ||
| 91 | #include <litmus/trace.h> | ||
| 92 | |||
| 93 | static void litmus_tick(struct rq*, struct task_struct*); | ||
| 94 | |||
| 90 | /* | 95 | /* |
| 91 | * Convert user-nice values [ -20 ... 0 ... 19 ] | 96 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
| 92 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | 97 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], |
| @@ -414,6 +419,12 @@ struct rt_rq { | |||
| 414 | #endif | 419 | #endif |
| 415 | }; | 420 | }; |
| 416 | 421 | ||
| 422 | /* Litmus related fields in a runqueue */ | ||
| 423 | struct litmus_rq { | ||
| 424 | unsigned long nr_running; | ||
| 425 | struct task_struct *prev; | ||
| 426 | }; | ||
| 427 | |||
| 417 | #ifdef CONFIG_SMP | 428 | #ifdef CONFIG_SMP |
| 418 | 429 | ||
| 419 | /* | 430 | /* |
| @@ -479,6 +490,7 @@ struct rq { | |||
| 479 | 490 | ||
| 480 | struct cfs_rq cfs; | 491 | struct cfs_rq cfs; |
| 481 | struct rt_rq rt; | 492 | struct rt_rq rt; |
| 493 | struct litmus_rq litmus; | ||
| 482 | 494 | ||
| 483 | #ifdef CONFIG_FAIR_GROUP_SCHED | 495 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 484 | /* list of leaf cfs_rq on this cpu: */ | 496 | /* list of leaf cfs_rq on this cpu: */ |
| @@ -1054,6 +1066,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
| 1054 | raw_spin_lock(&rq->lock); | 1066 | raw_spin_lock(&rq->lock); |
| 1055 | update_rq_clock(rq); | 1067 | update_rq_clock(rq); |
| 1056 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); | 1068 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); |
| 1069 | litmus_tick(rq, rq->curr); | ||
| 1057 | raw_spin_unlock(&rq->lock); | 1070 | raw_spin_unlock(&rq->lock); |
| 1058 | 1071 | ||
| 1059 | return HRTIMER_NORESTART; | 1072 | return HRTIMER_NORESTART; |
| @@ -1750,7 +1763,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
| 1750 | 1763 | ||
| 1751 | static const struct sched_class rt_sched_class; | 1764 | static const struct sched_class rt_sched_class; |
| 1752 | 1765 | ||
| 1753 | #define sched_class_highest (&stop_sched_class) | 1766 | #define sched_class_highest (&litmus_sched_class) |
| 1754 | #define for_each_class(class) \ | 1767 | #define for_each_class(class) \ |
| 1755 | for (class = sched_class_highest; class; class = class->next) | 1768 | for (class = sched_class_highest; class; class = class->next) |
| 1756 | 1769 | ||
| @@ -2044,6 +2057,7 @@ static int irqtime_account_si_update(void) | |||
| 2044 | #include "sched_rt.c" | 2057 | #include "sched_rt.c" |
| 2045 | #include "sched_autogroup.c" | 2058 | #include "sched_autogroup.c" |
| 2046 | #include "sched_stoptask.c" | 2059 | #include "sched_stoptask.c" |
| 2060 | #include "../litmus/sched_litmus.c" | ||
| 2047 | #ifdef CONFIG_SCHED_DEBUG | 2061 | #ifdef CONFIG_SCHED_DEBUG |
| 2048 | # include "sched_debug.c" | 2062 | # include "sched_debug.c" |
| 2049 | #endif | 2063 | #endif |
| @@ -2166,6 +2180,10 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
| 2166 | * A queue event has occurred, and we're going to schedule. In | 2180 | * A queue event has occurred, and we're going to schedule. In |
| 2167 | * this case, we can save a useless back to back clock update. | 2181 | * this case, we can save a useless back to back clock update. |
| 2168 | */ | 2182 | */ |
| 2183 | /* LITMUS^RT: | ||
| 2184 | * The "disable-clock-update" approach was buggy in Linux 2.6.36. | ||
| 2185 | * The issue has been solved in 2.6.37. | ||
| 2186 | */ | ||
| 2169 | if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) | 2187 | if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) |
| 2170 | rq->skip_clock_update = 1; | 2188 | rq->skip_clock_update = 1; |
| 2171 | } | 2189 | } |
| @@ -2592,8 +2610,12 @@ void scheduler_ipi(void) | |||
| 2592 | struct rq *rq = this_rq(); | 2610 | struct rq *rq = this_rq(); |
| 2593 | struct task_struct *list = xchg(&rq->wake_list, NULL); | 2611 | struct task_struct *list = xchg(&rq->wake_list, NULL); |
| 2594 | 2612 | ||
| 2595 | if (!list) | 2613 | if (!list) { |
| 2614 | /* If we don't call irq_enter(), we need to trigger the IRQ | ||
| 2615 | * tracing manually. */ | ||
| 2616 | ft_irq_fired(); | ||
| 2596 | return; | 2617 | return; |
| 2618 | } | ||
| 2597 | 2619 | ||
| 2598 | /* | 2620 | /* |
| 2599 | * Not all reschedule IPI handlers call irq_enter/irq_exit, since | 2621 | * Not all reschedule IPI handlers call irq_enter/irq_exit, since |
| @@ -2656,7 +2678,12 @@ static void ttwu_queue(struct task_struct *p, int cpu) | |||
| 2656 | struct rq *rq = cpu_rq(cpu); | 2678 | struct rq *rq = cpu_rq(cpu); |
| 2657 | 2679 | ||
| 2658 | #if defined(CONFIG_SMP) | 2680 | #if defined(CONFIG_SMP) |
| 2659 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | 2681 | /* |
| 2682 | * LITMUS^RT: whether to send an IPI to the remote CPU | ||
| 2683 | * is plugin specific. | ||
| 2684 | */ | ||
| 2685 | if (!is_realtime(p) && | ||
| 2686 | sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | ||
| 2660 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ | 2687 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ |
| 2661 | ttwu_queue_remote(p, cpu); | 2688 | ttwu_queue_remote(p, cpu); |
| 2662 | return; | 2689 | return; |
| @@ -2689,6 +2716,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 2689 | unsigned long flags; | 2716 | unsigned long flags; |
| 2690 | int cpu, success = 0; | 2717 | int cpu, success = 0; |
| 2691 | 2718 | ||
| 2719 | if (is_realtime(p)) | ||
| 2720 | TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); | ||
| 2721 | |||
| 2692 | smp_wmb(); | 2722 | smp_wmb(); |
| 2693 | raw_spin_lock_irqsave(&p->pi_lock, flags); | 2723 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
| 2694 | if (!(p->state & state)) | 2724 | if (!(p->state & state)) |
| @@ -2725,6 +2755,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 2725 | */ | 2755 | */ |
| 2726 | smp_rmb(); | 2756 | smp_rmb(); |
| 2727 | 2757 | ||
| 2758 | /* LITMUS^RT: once the task can be safely referenced by this | ||
| 2759 | * CPU, don't mess up with Linux load balancing stuff. | ||
| 2760 | */ | ||
| 2761 | if (is_realtime(p)) | ||
| 2762 | goto litmus_out_activate; | ||
| 2763 | |||
| 2728 | p->sched_contributes_to_load = !!task_contributes_to_load(p); | 2764 | p->sched_contributes_to_load = !!task_contributes_to_load(p); |
| 2729 | p->state = TASK_WAKING; | 2765 | p->state = TASK_WAKING; |
| 2730 | 2766 | ||
| @@ -2736,12 +2772,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 2736 | wake_flags |= WF_MIGRATED; | 2772 | wake_flags |= WF_MIGRATED; |
| 2737 | set_task_cpu(p, cpu); | 2773 | set_task_cpu(p, cpu); |
| 2738 | } | 2774 | } |
| 2775 | |||
| 2776 | litmus_out_activate: | ||
| 2739 | #endif /* CONFIG_SMP */ | 2777 | #endif /* CONFIG_SMP */ |
| 2740 | 2778 | ||
| 2741 | ttwu_queue(p, cpu); | 2779 | ttwu_queue(p, cpu); |
| 2742 | stat: | 2780 | stat: |
| 2743 | ttwu_stat(p, cpu, wake_flags); | 2781 | ttwu_stat(p, cpu, wake_flags); |
| 2744 | out: | 2782 | out: |
| 2783 | if (is_realtime(p)) | ||
| 2784 | TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state); | ||
| 2745 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 2785 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
| 2746 | 2786 | ||
| 2747 | return success; | 2787 | return success; |
| @@ -2852,7 +2892,8 @@ void sched_fork(struct task_struct *p) | |||
| 2852 | * Revert to default priority/policy on fork if requested. | 2892 | * Revert to default priority/policy on fork if requested. |
| 2853 | */ | 2893 | */ |
| 2854 | if (unlikely(p->sched_reset_on_fork)) { | 2894 | if (unlikely(p->sched_reset_on_fork)) { |
| 2855 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { | 2895 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR || |
| 2896 | p->policy == SCHED_LITMUS) { | ||
| 2856 | p->policy = SCHED_NORMAL; | 2897 | p->policy = SCHED_NORMAL; |
| 2857 | p->normal_prio = p->static_prio; | 2898 | p->normal_prio = p->static_prio; |
| 2858 | } | 2899 | } |
| @@ -3063,6 +3104,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 3063 | */ | 3104 | */ |
| 3064 | prev_state = prev->state; | 3105 | prev_state = prev->state; |
| 3065 | finish_arch_switch(prev); | 3106 | finish_arch_switch(prev); |
| 3107 | litmus->finish_switch(prev); | ||
| 3108 | prev->rt_param.stack_in_use = NO_CPU; | ||
| 3066 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 3109 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
| 3067 | local_irq_disable(); | 3110 | local_irq_disable(); |
| 3068 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 3111 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
| @@ -3092,6 +3135,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev) | |||
| 3092 | { | 3135 | { |
| 3093 | if (prev->sched_class->pre_schedule) | 3136 | if (prev->sched_class->pre_schedule) |
| 3094 | prev->sched_class->pre_schedule(rq, prev); | 3137 | prev->sched_class->pre_schedule(rq, prev); |
| 3138 | |||
| 3139 | /* LITMUS^RT not very clean hack: we need to save the prev task | ||
| 3140 | * as our scheduling decision rely on it (as we drop the rq lock | ||
| 3141 | * something in prev can change...); there is no way to escape | ||
| 3142 | * this ack apart from modifying pick_nex_task(rq, _prev_) or | ||
| 3143 | * falling back on the previous solution of decoupling | ||
| 3144 | * scheduling decisions | ||
| 3145 | */ | ||
| 3146 | rq->litmus.prev = prev; | ||
| 3095 | } | 3147 | } |
| 3096 | 3148 | ||
| 3097 | /* rq->lock is NOT held, but preemption is disabled */ | 3149 | /* rq->lock is NOT held, but preemption is disabled */ |
| @@ -3128,16 +3180,26 @@ static inline void post_schedule(struct rq *rq) | |||
| 3128 | asmlinkage void schedule_tail(struct task_struct *prev) | 3180 | asmlinkage void schedule_tail(struct task_struct *prev) |
| 3129 | __releases(rq->lock) | 3181 | __releases(rq->lock) |
| 3130 | { | 3182 | { |
| 3131 | struct rq *rq = this_rq(); | 3183 | struct rq *rq; |
| 3132 | 3184 | ||
| 3185 | preempt_disable(); | ||
| 3186 | |||
| 3187 | rq = this_rq(); | ||
| 3133 | finish_task_switch(rq, prev); | 3188 | finish_task_switch(rq, prev); |
| 3134 | 3189 | ||
| 3190 | sched_trace_task_switch_to(current); | ||
| 3191 | |||
| 3135 | /* | 3192 | /* |
| 3136 | * FIXME: do we need to worry about rq being invalidated by the | 3193 | * FIXME: do we need to worry about rq being invalidated by the |
| 3137 | * task_switch? | 3194 | * task_switch? |
| 3138 | */ | 3195 | */ |
| 3139 | post_schedule(rq); | 3196 | post_schedule(rq); |
| 3140 | 3197 | ||
| 3198 | if (sched_state_validate_switch()) | ||
| 3199 | litmus_reschedule_local(); | ||
| 3200 | |||
| 3201 | preempt_enable(); | ||
| 3202 | |||
| 3141 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW | 3203 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW |
| 3142 | /* In this case, finish_task_switch does not reenable preemption */ | 3204 | /* In this case, finish_task_switch does not reenable preemption */ |
| 3143 | preempt_enable(); | 3205 | preempt_enable(); |
| @@ -4108,18 +4170,26 @@ void scheduler_tick(void) | |||
| 4108 | 4170 | ||
| 4109 | sched_clock_tick(); | 4171 | sched_clock_tick(); |
| 4110 | 4172 | ||
| 4173 | TS_TICK_START(current); | ||
| 4174 | |||
| 4111 | raw_spin_lock(&rq->lock); | 4175 | raw_spin_lock(&rq->lock); |
| 4112 | update_rq_clock(rq); | 4176 | update_rq_clock(rq); |
| 4113 | update_cpu_load_active(rq); | 4177 | update_cpu_load_active(rq); |
| 4114 | curr->sched_class->task_tick(rq, curr, 0); | 4178 | curr->sched_class->task_tick(rq, curr, 0); |
| 4179 | |||
| 4180 | /* litmus_tick may force current to resched */ | ||
| 4181 | litmus_tick(rq, curr); | ||
| 4182 | |||
| 4115 | raw_spin_unlock(&rq->lock); | 4183 | raw_spin_unlock(&rq->lock); |
| 4116 | 4184 | ||
| 4117 | perf_event_task_tick(); | 4185 | perf_event_task_tick(); |
| 4118 | 4186 | ||
| 4119 | #ifdef CONFIG_SMP | 4187 | #ifdef CONFIG_SMP |
| 4120 | rq->idle_at_tick = idle_cpu(cpu); | 4188 | rq->idle_at_tick = idle_cpu(cpu); |
| 4121 | trigger_load_balance(rq, cpu); | 4189 | if (!is_realtime(current)) |
| 4190 | trigger_load_balance(rq, cpu); | ||
| 4122 | #endif | 4191 | #endif |
| 4192 | TS_TICK_END(current); | ||
| 4123 | } | 4193 | } |
| 4124 | 4194 | ||
| 4125 | notrace unsigned long get_parent_ip(unsigned long addr) | 4195 | notrace unsigned long get_parent_ip(unsigned long addr) |
| @@ -4239,12 +4309,20 @@ pick_next_task(struct rq *rq) | |||
| 4239 | /* | 4309 | /* |
| 4240 | * Optimization: we know that if all tasks are in | 4310 | * Optimization: we know that if all tasks are in |
| 4241 | * the fair class we can call that function directly: | 4311 | * the fair class we can call that function directly: |
| 4242 | */ | 4312 | |
| 4243 | if (likely(rq->nr_running == rq->cfs.nr_running)) { | 4313 | * NOT IN LITMUS^RT! |
| 4314 | |||
| 4315 | * This breaks many assumptions in the plugins. | ||
| 4316 | * Do not uncomment without thinking long and hard | ||
| 4317 | * about how this affects global plugins such as GSN-EDF. | ||
| 4318 | |||
| 4319 | if (rq->nr_running == rq->cfs.nr_running) { | ||
| 4320 | TRACE("taking shortcut in pick_next_task()\n"); | ||
| 4244 | p = fair_sched_class.pick_next_task(rq); | 4321 | p = fair_sched_class.pick_next_task(rq); |
| 4245 | if (likely(p)) | 4322 | if (likely(p)) |
| 4246 | return p; | 4323 | return p; |
| 4247 | } | 4324 | } |
| 4325 | */ | ||
| 4248 | 4326 | ||
| 4249 | for_each_class(class) { | 4327 | for_each_class(class) { |
| 4250 | p = class->pick_next_task(rq); | 4328 | p = class->pick_next_task(rq); |
| @@ -4267,11 +4345,19 @@ static void __sched __schedule(void) | |||
| 4267 | 4345 | ||
| 4268 | need_resched: | 4346 | need_resched: |
| 4269 | preempt_disable(); | 4347 | preempt_disable(); |
| 4348 | sched_state_entered_schedule(); | ||
| 4270 | cpu = smp_processor_id(); | 4349 | cpu = smp_processor_id(); |
| 4271 | rq = cpu_rq(cpu); | 4350 | rq = cpu_rq(cpu); |
| 4272 | rcu_note_context_switch(cpu); | 4351 | rcu_note_context_switch(cpu); |
| 4273 | prev = rq->curr; | 4352 | prev = rq->curr; |
| 4274 | 4353 | ||
| 4354 | /* LITMUS^RT: quickly re-evaluate the scheduling decision | ||
| 4355 | * if the previous one is no longer valid after CTX. | ||
| 4356 | */ | ||
| 4357 | litmus_need_resched_nonpreemptible: | ||
| 4358 | TS_SCHED_START; | ||
| 4359 | sched_trace_task_switch_away(prev); | ||
| 4360 | |||
| 4275 | schedule_debug(prev); | 4361 | schedule_debug(prev); |
| 4276 | 4362 | ||
| 4277 | if (sched_feat(HRTICK)) | 4363 | if (sched_feat(HRTICK)) |
| @@ -4318,7 +4404,10 @@ need_resched: | |||
| 4318 | rq->curr = next; | 4404 | rq->curr = next; |
| 4319 | ++*switch_count; | 4405 | ++*switch_count; |
| 4320 | 4406 | ||
| 4407 | TS_SCHED_END(next); | ||
| 4408 | TS_CXS_START(next); | ||
| 4321 | context_switch(rq, prev, next); /* unlocks the rq */ | 4409 | context_switch(rq, prev, next); /* unlocks the rq */ |
| 4410 | TS_CXS_END(current); | ||
| 4322 | /* | 4411 | /* |
| 4323 | * The context switch have flipped the stack from under us | 4412 | * The context switch have flipped the stack from under us |
| 4324 | * and restored the local variables which were saved when | 4413 | * and restored the local variables which were saved when |
| @@ -4327,14 +4416,29 @@ need_resched: | |||
| 4327 | */ | 4416 | */ |
| 4328 | cpu = smp_processor_id(); | 4417 | cpu = smp_processor_id(); |
| 4329 | rq = cpu_rq(cpu); | 4418 | rq = cpu_rq(cpu); |
| 4330 | } else | 4419 | } else { |
| 4420 | TS_SCHED_END(prev); | ||
| 4331 | raw_spin_unlock_irq(&rq->lock); | 4421 | raw_spin_unlock_irq(&rq->lock); |
| 4422 | } | ||
| 4423 | |||
| 4424 | TS_SCHED2_START(prev); | ||
| 4425 | sched_trace_task_switch_to(current); | ||
| 4332 | 4426 | ||
| 4333 | post_schedule(rq); | 4427 | post_schedule(rq); |
| 4334 | 4428 | ||
| 4429 | if (sched_state_validate_switch()) { | ||
| 4430 | TS_SCHED2_END(prev); | ||
| 4431 | goto litmus_need_resched_nonpreemptible; | ||
| 4432 | } | ||
| 4433 | |||
| 4335 | preempt_enable_no_resched(); | 4434 | preempt_enable_no_resched(); |
| 4435 | |||
| 4436 | TS_SCHED2_END(prev); | ||
| 4437 | |||
| 4336 | if (need_resched()) | 4438 | if (need_resched()) |
| 4337 | goto need_resched; | 4439 | goto need_resched; |
| 4440 | |||
| 4441 | srp_ceiling_block(); | ||
| 4338 | } | 4442 | } |
| 4339 | 4443 | ||
| 4340 | static inline void sched_submit_work(struct task_struct *tsk) | 4444 | static inline void sched_submit_work(struct task_struct *tsk) |
| @@ -5056,7 +5160,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
| 5056 | p->normal_prio = normal_prio(p); | 5160 | p->normal_prio = normal_prio(p); |
| 5057 | /* we are holding p->pi_lock already */ | 5161 | /* we are holding p->pi_lock already */ |
| 5058 | p->prio = rt_mutex_getprio(p); | 5162 | p->prio = rt_mutex_getprio(p); |
| 5059 | if (rt_prio(p->prio)) | 5163 | if (p->policy == SCHED_LITMUS) |
| 5164 | p->sched_class = &litmus_sched_class; | ||
| 5165 | else if (rt_prio(p->prio)) | ||
| 5060 | p->sched_class = &rt_sched_class; | 5166 | p->sched_class = &rt_sched_class; |
| 5061 | else | 5167 | else |
| 5062 | p->sched_class = &fair_sched_class; | 5168 | p->sched_class = &fair_sched_class; |
| @@ -5104,7 +5210,7 @@ recheck: | |||
| 5104 | 5210 | ||
| 5105 | if (policy != SCHED_FIFO && policy != SCHED_RR && | 5211 | if (policy != SCHED_FIFO && policy != SCHED_RR && |
| 5106 | policy != SCHED_NORMAL && policy != SCHED_BATCH && | 5212 | policy != SCHED_NORMAL && policy != SCHED_BATCH && |
| 5107 | policy != SCHED_IDLE) | 5213 | policy != SCHED_IDLE && policy != SCHED_LITMUS) |
| 5108 | return -EINVAL; | 5214 | return -EINVAL; |
| 5109 | } | 5215 | } |
| 5110 | 5216 | ||
| @@ -5119,6 +5225,8 @@ recheck: | |||
| 5119 | return -EINVAL; | 5225 | return -EINVAL; |
| 5120 | if (rt_policy(policy) != (param->sched_priority != 0)) | 5226 | if (rt_policy(policy) != (param->sched_priority != 0)) |
| 5121 | return -EINVAL; | 5227 | return -EINVAL; |
| 5228 | if (policy == SCHED_LITMUS && policy == p->policy) | ||
| 5229 | return -EINVAL; | ||
| 5122 | 5230 | ||
| 5123 | /* | 5231 | /* |
| 5124 | * Allow unprivileged RT tasks to decrease priority: | 5232 | * Allow unprivileged RT tasks to decrease priority: |
| @@ -5162,6 +5270,12 @@ recheck: | |||
| 5162 | return retval; | 5270 | return retval; |
| 5163 | } | 5271 | } |
| 5164 | 5272 | ||
| 5273 | if (policy == SCHED_LITMUS) { | ||
| 5274 | retval = litmus_admit_task(p); | ||
| 5275 | if (retval) | ||
| 5276 | return retval; | ||
| 5277 | } | ||
| 5278 | |||
| 5165 | /* | 5279 | /* |
| 5166 | * make sure no PI-waiters arrive (or leave) while we are | 5280 | * make sure no PI-waiters arrive (or leave) while we are |
| 5167 | * changing the priority of the task: | 5281 | * changing the priority of the task: |
| @@ -5220,10 +5334,19 @@ recheck: | |||
| 5220 | 5334 | ||
| 5221 | p->sched_reset_on_fork = reset_on_fork; | 5335 | p->sched_reset_on_fork = reset_on_fork; |
| 5222 | 5336 | ||
| 5337 | if (p->policy == SCHED_LITMUS) | ||
| 5338 | litmus_exit_task(p); | ||
| 5339 | |||
| 5223 | oldprio = p->prio; | 5340 | oldprio = p->prio; |
| 5224 | prev_class = p->sched_class; | 5341 | prev_class = p->sched_class; |
| 5225 | __setscheduler(rq, p, policy, param->sched_priority); | 5342 | __setscheduler(rq, p, policy, param->sched_priority); |
| 5226 | 5343 | ||
| 5344 | if (policy == SCHED_LITMUS) { | ||
| 5345 | p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU; | ||
| 5346 | p->rt_param.present = running; | ||
| 5347 | litmus->task_new(p, on_rq, running); | ||
| 5348 | } | ||
| 5349 | |||
| 5227 | if (running) | 5350 | if (running) |
| 5228 | p->sched_class->set_curr_task(rq); | 5351 | p->sched_class->set_curr_task(rq); |
| 5229 | if (on_rq) | 5352 | if (on_rq) |
| @@ -5391,10 +5514,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
| 5391 | rcu_read_lock(); | 5514 | rcu_read_lock(); |
| 5392 | 5515 | ||
| 5393 | p = find_process_by_pid(pid); | 5516 | p = find_process_by_pid(pid); |
| 5394 | if (!p) { | 5517 | /* Don't set affinity if task not found and for LITMUS tasks */ |
| 5518 | if (!p || is_realtime(p)) { | ||
| 5395 | rcu_read_unlock(); | 5519 | rcu_read_unlock(); |
| 5396 | put_online_cpus(); | 5520 | put_online_cpus(); |
| 5397 | return -ESRCH; | 5521 | return p ? -EPERM : -ESRCH; |
| 5398 | } | 5522 | } |
| 5399 | 5523 | ||
| 5400 | /* Prevent p going away */ | 5524 | /* Prevent p going away */ |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index bc8ee999381..22999b257ad 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -1872,6 +1872,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1872 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1872 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
| 1873 | int next_buddy_marked = 0; | 1873 | int next_buddy_marked = 0; |
| 1874 | 1874 | ||
| 1875 | if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS) | ||
| 1876 | goto preempt; | ||
| 1877 | |||
| 1875 | if (unlikely(se == pse)) | 1878 | if (unlikely(se == pse)) |
| 1876 | return; | 1879 | return; |
| 1877 | 1880 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index af1177858be..b827550a0d0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | * policies) | 3 | * policies) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | #include <litmus/litmus.h> | ||
| 7 | |||
| 6 | #ifdef CONFIG_RT_GROUP_SCHED | 8 | #ifdef CONFIG_RT_GROUP_SCHED |
| 7 | 9 | ||
| 8 | #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) | 10 | #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) |
| @@ -240,8 +242,11 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | |||
| 240 | if (rt_rq->rt_nr_running) { | 242 | if (rt_rq->rt_nr_running) { |
| 241 | if (rt_se && !on_rt_rq(rt_se)) | 243 | if (rt_se && !on_rt_rq(rt_se)) |
| 242 | enqueue_rt_entity(rt_se, false); | 244 | enqueue_rt_entity(rt_se, false); |
| 243 | if (rt_rq->highest_prio.curr < curr->prio) | 245 | if (rt_rq->highest_prio.curr < curr->prio && |
| 246 | /* Don't subject LITMUS tasks to remote reschedules */ | ||
| 247 | !is_realtime(curr)) { | ||
| 244 | resched_task(curr); | 248 | resched_task(curr); |
| 249 | } | ||
| 245 | } | 250 | } |
| 246 | } | 251 | } |
| 247 | 252 | ||
| @@ -334,8 +339,10 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
| 334 | 339 | ||
| 335 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | 340 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
| 336 | { | 341 | { |
| 337 | if (rt_rq->rt_nr_running) | 342 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; |
| 338 | resched_task(rq_of_rt_rq(rt_rq)->curr); | 343 | |
| 344 | if (rt_rq->rt_nr_running && !is_realtime(curr)) | ||
| 345 | resched_task(curr); | ||
| 339 | } | 346 | } |
| 340 | 347 | ||
| 341 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | 348 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
| @@ -1090,7 +1097,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
| 1090 | */ | 1097 | */ |
| 1091 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) | 1098 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) |
| 1092 | { | 1099 | { |
| 1093 | if (p->prio < rq->curr->prio) { | 1100 | if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) { |
| 1094 | resched_task(rq->curr); | 1101 | resched_task(rq->curr); |
| 1095 | return; | 1102 | return; |
| 1096 | } | 1103 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index fca82c32042..2f2df08df39 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -211,6 +211,9 @@ asmlinkage void __do_softirq(void) | |||
| 211 | int max_restart = MAX_SOFTIRQ_RESTART; | 211 | int max_restart = MAX_SOFTIRQ_RESTART; |
| 212 | int cpu; | 212 | int cpu; |
| 213 | 213 | ||
| 214 | /* Mark Feather-Trace samples as "disturbed". */ | ||
| 215 | ft_irq_fired(); | ||
| 216 | |||
| 214 | pending = local_softirq_pending(); | 217 | pending = local_softirq_pending(); |
| 215 | account_system_vtime(current); | 218 | account_system_vtime(current); |
| 216 | 219 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b40..0c0e02f1b81 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -766,12 +766,53 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
| 766 | } | 766 | } |
| 767 | 767 | ||
| 768 | /** | 768 | /** |
| 769 | * tick_set_quanta_type - get the quanta type as a boot option | ||
| 770 | * Default is standard setup with ticks staggered over first | ||
| 771 | * half of tick period. | ||
| 772 | */ | ||
| 773 | int quanta_type = LINUX_DEFAULT_TICKS; | ||
| 774 | static int __init tick_set_quanta_type(char *str) | ||
| 775 | { | ||
| 776 | if (strcmp("aligned", str) == 0) { | ||
| 777 | quanta_type = LITMUS_ALIGNED_TICKS; | ||
| 778 | printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n"); | ||
| 779 | } | ||
| 780 | else if (strcmp("staggered", str) == 0) { | ||
| 781 | quanta_type = LITMUS_STAGGERED_TICKS; | ||
| 782 | printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n"); | ||
| 783 | } | ||
| 784 | return 1; | ||
| 785 | } | ||
| 786 | __setup("quanta=", tick_set_quanta_type); | ||
| 787 | |||
| 788 | u64 cpu_stagger_offset(int cpu) | ||
| 789 | { | ||
| 790 | u64 offset = 0; | ||
| 791 | switch (quanta_type) { | ||
| 792 | case LITMUS_ALIGNED_TICKS: | ||
| 793 | offset = 0; | ||
| 794 | break; | ||
| 795 | case LITMUS_STAGGERED_TICKS: | ||
| 796 | offset = ktime_to_ns(tick_period); | ||
| 797 | do_div(offset, num_possible_cpus()); | ||
| 798 | offset *= cpu; | ||
| 799 | break; | ||
| 800 | default: | ||
| 801 | offset = ktime_to_ns(tick_period) >> 1; | ||
| 802 | do_div(offset, num_possible_cpus()); | ||
| 803 | offset *= cpu; | ||
| 804 | } | ||
| 805 | return offset; | ||
| 806 | } | ||
| 807 | |||
| 808 | /** | ||
| 769 | * tick_setup_sched_timer - setup the tick emulation timer | 809 | * tick_setup_sched_timer - setup the tick emulation timer |
| 770 | */ | 810 | */ |
| 771 | void tick_setup_sched_timer(void) | 811 | void tick_setup_sched_timer(void) |
| 772 | { | 812 | { |
| 773 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 813 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
| 774 | ktime_t now = ktime_get(); | 814 | ktime_t now = ktime_get(); |
| 815 | u64 offset; | ||
| 775 | 816 | ||
| 776 | /* | 817 | /* |
| 777 | * Emulate tick processing via per-CPU hrtimers: | 818 | * Emulate tick processing via per-CPU hrtimers: |
| @@ -782,6 +823,12 @@ void tick_setup_sched_timer(void) | |||
| 782 | /* Get the next period (per cpu) */ | 823 | /* Get the next period (per cpu) */ |
| 783 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 824 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
| 784 | 825 | ||
| 826 | /* Offset must be set correctly to achieve desired quanta type. */ | ||
| 827 | offset = cpu_stagger_offset(smp_processor_id()); | ||
| 828 | |||
| 829 | /* Add the correct offset to expiration time */ | ||
| 830 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | ||
| 831 | |||
| 785 | for (;;) { | 832 | for (;;) { |
| 786 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 833 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
| 787 | hrtimer_start_expires(&ts->sched_timer, | 834 | hrtimer_start_expires(&ts->sched_timer, |
diff --git a/litmus/Kconfig b/litmus/Kconfig new file mode 100644 index 00000000000..795fbe1a769 --- /dev/null +++ b/litmus/Kconfig | |||
| @@ -0,0 +1,303 @@ | |||
| 1 | menu "LITMUS^RT" | ||
| 2 | |||
| 3 | menu "Scheduling" | ||
| 4 | |||
| 5 | config PLUGIN_CEDF | ||
| 6 | bool "Clustered-EDF" | ||
| 7 | depends on X86 && SYSFS | ||
| 8 | default y | ||
| 9 | help | ||
| 10 | Include the Clustered EDF (C-EDF) plugin in the kernel. | ||
| 11 | This is appropriate for large platforms with shared caches. | ||
| 12 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF | ||
| 13 | makes little sense since there aren't any shared caches. | ||
| 14 | |||
| 15 | config PLUGIN_PFAIR | ||
| 16 | bool "PFAIR" | ||
| 17 | depends on HIGH_RES_TIMERS && !NO_HZ | ||
| 18 | default y | ||
| 19 | help | ||
| 20 | Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel. | ||
| 21 | The PFAIR plugin requires high resolution timers (for staggered quanta) | ||
| 22 | and does not support NO_HZ (quanta could be missed when the system is idle). | ||
| 23 | |||
| 24 | If unsure, say Yes. | ||
| 25 | |||
| 26 | config RELEASE_MASTER | ||
| 27 | bool "Release-master Support" | ||
| 28 | depends on ARCH_HAS_SEND_PULL_TIMERS | ||
| 29 | default n | ||
| 30 | help | ||
| 31 | Allow one processor to act as a dedicated interrupt processor | ||
| 32 | that services all timer interrupts, but that does not schedule | ||
| 33 | real-time tasks. See RTSS'09 paper for details | ||
| 34 | (http://www.cs.unc.edu/~anderson/papers.html). | ||
| 35 | Currently only supported by GSN-EDF. | ||
| 36 | |||
| 37 | endmenu | ||
| 38 | |||
| 39 | menu "Real-Time Synchronization" | ||
| 40 | |||
| 41 | config NP_SECTION | ||
| 42 | bool "Non-preemptive section support" | ||
| 43 | default n | ||
| 44 | help | ||
| 45 | Allow tasks to become non-preemptable. | ||
| 46 | Note that plugins still need to explicitly support non-preemptivity. | ||
| 47 | Currently, only GSN-EDF and PSN-EDF have such support. | ||
| 48 | |||
| 49 | This is required to support locking protocols such as the FMLP. | ||
| 50 | If disabled, all tasks will be considered preemptable at all times. | ||
| 51 | |||
| 52 | config LITMUS_LOCKING | ||
| 53 | bool "Support for real-time locking protocols" | ||
| 54 | depends on NP_SECTION | ||
| 55 | default n | ||
| 56 | help | ||
| 57 | Enable LITMUS^RT's deterministic multiprocessor real-time | ||
| 58 | locking protocols. | ||
| 59 | |||
| 60 | Say Yes if you want to include locking protocols such as the FMLP and | ||
| 61 | Baker's SRP. | ||
| 62 | |||
| 63 | endmenu | ||
| 64 | |||
| 65 | menu "Performance Enhancements" | ||
| 66 | |||
| 67 | config SCHED_CPU_AFFINITY | ||
| 68 | bool "Local Migration Affinity" | ||
| 69 | depends on X86 | ||
| 70 | default y | ||
| 71 | help | ||
| 72 | Rescheduled tasks prefer CPUs near to their previously used CPU. This | ||
| 73 | may improve performance through possible preservation of cache affinity. | ||
| 74 | |||
| 75 | Warning: May make bugs harder to find since tasks may migrate less often. | ||
| 76 | |||
| 77 | NOTES: | ||
| 78 | * Feature is not utilized by PFair/PD^2. | ||
| 79 | |||
| 80 | Say Yes if unsure. | ||
| 81 | |||
| 82 | config ALLOW_EARLY_RELEASE | ||
| 83 | bool "Allow Early Releasing" | ||
| 84 | default y | ||
| 85 | help | ||
| 86 | Allow tasks to release jobs early (while still maintaining job | ||
| 87 | precedence constraints). Only supported by EDF schedulers. Early | ||
| 88 | releasing must be explicitly requested by real-time tasks via | ||
| 89 | the task_params passed to sys_set_task_rt_param(). | ||
| 90 | |||
| 91 | Early releasing can improve job response times while maintaining | ||
| 92 | real-time correctness. However, it can easily peg your CPUs | ||
| 93 | since tasks never suspend to wait for their next job. As such, early | ||
| 94 | releasing is really only useful in the context of implementing | ||
| 95 | bandwidth servers, interrupt handling threads, or short-lived | ||
| 96 | computations. | ||
| 97 | |||
| 98 | Beware that early releasing may affect real-time analysis | ||
| 99 | if using locking protocols or I/O. | ||
| 100 | |||
| 101 | Say Yes if unsure. | ||
| 102 | |||
| 103 | choice | ||
| 104 | prompt "EDF Tie-Break Behavior" | ||
| 105 | default EDF_TIE_BREAK_LATENESS_NORM | ||
| 106 | help | ||
| 107 | Allows the configuration of tie-breaking behavior when the deadlines | ||
| 108 | of two EDF-scheduled tasks are equal. | ||
| 109 | |||
| 110 | config EDF_TIE_BREAK_LATENESS | ||
| 111 | bool "Lateness-based Tie Break" | ||
| 112 | help | ||
| 113 | Break ties between two jobs, A and B, based upon the lateness of their | ||
| 114 | prior jobs. The job with the greatest lateness has priority. Note that | ||
| 115 | lateness has a negative value if the prior job finished before its | ||
| 116 | deadline. | ||
| 117 | |||
| 118 | config EDF_TIE_BREAK_LATENESS_NORM | ||
| 119 | bool "Normalized Lateness-based Tie Break" | ||
| 120 | help | ||
| 121 | Break ties between two jobs, A and B, based upon the lateness, normalized | ||
| 122 | by relative deadline, of their prior jobs. The job with the greatest | ||
| 123 | normalized lateness has priority. Note that lateness has a negative value | ||
| 124 | if the prior job finished before its deadline. | ||
| 125 | |||
| 126 | Normalized lateness tie-breaks are likely desireable over non-normalized | ||
| 127 | tie-breaks if the execution times and/or relative deadlines of tasks in a | ||
| 128 | task set vary greatly. | ||
| 129 | |||
| 130 | config EDF_TIE_BREAK_HASH | ||
| 131 | bool "Hash-based Tie Breaks" | ||
| 132 | help | ||
| 133 | Break ties between two jobs, A and B, with equal deadlines by using a | ||
| 134 | uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job | ||
| 135 | A has ~50% of winning a given tie-break. | ||
| 136 | |||
| 137 | config EDF_PID_TIE_BREAK | ||
| 138 | bool "PID-based Tie Breaks" | ||
| 139 | help | ||
| 140 | Break ties based upon OS-assigned thread IDs. Use this option if | ||
| 141 | required by algorithm's real-time analysis or per-task response-time | ||
| 142 | jitter must be minimized. | ||
| 143 | |||
| 144 | NOTES: | ||
| 145 | * This tie-breaking method was default in Litmus 2012.2 and before. | ||
| 146 | |||
| 147 | endchoice | ||
| 148 | |||
| 149 | endmenu | ||
| 150 | |||
| 151 | menu "Tracing" | ||
| 152 | |||
| 153 | config FEATHER_TRACE | ||
| 154 | bool "Feather-Trace Infrastructure" | ||
| 155 | default y | ||
| 156 | help | ||
| 157 | Feather-Trace basic tracing infrastructure. Includes device file | ||
| 158 | driver and instrumentation point support. | ||
| 159 | |||
| 160 | There are actually two implementations of Feather-Trace. | ||
| 161 | 1) A slower, but portable, default implementation. | ||
| 162 | 2) Architecture-specific implementations that rewrite kernel .text at runtime. | ||
| 163 | |||
| 164 | If enabled, Feather-Trace will be based on 2) if available (currently only for x86). | ||
| 165 | However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case | ||
| 166 | to avoid problems with write-protected .text pages. | ||
| 167 | |||
| 168 | Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n. | ||
| 169 | |||
| 170 | Note that this option only enables the basic Feather-Trace infrastructure; | ||
| 171 | you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to | ||
| 172 | actually enable any events. | ||
| 173 | |||
| 174 | config SCHED_TASK_TRACE | ||
| 175 | bool "Trace real-time tasks" | ||
| 176 | depends on FEATHER_TRACE | ||
| 177 | default y | ||
| 178 | help | ||
| 179 | Include support for the sched_trace_XXX() tracing functions. This | ||
| 180 | allows the collection of real-time task events such as job | ||
| 181 | completions, job releases, early completions, etc. This results in a | ||
| 182 | small overhead in the scheduling code. Disable if the overhead is not | ||
| 183 | acceptable (e.g., benchmarking). | ||
| 184 | |||
| 185 | Say Yes for debugging. | ||
| 186 | Say No for overhead tracing. | ||
| 187 | |||
| 188 | config SCHED_TASK_TRACE_SHIFT | ||
| 189 | int "Buffer size for sched_trace_xxx() events" | ||
| 190 | depends on SCHED_TASK_TRACE | ||
| 191 | range 8 13 | ||
| 192 | default 9 | ||
| 193 | help | ||
| 194 | |||
| 195 | Select the buffer size of sched_trace_xxx() events as a power of two. | ||
| 196 | These buffers are statically allocated as per-CPU data. Each event | ||
| 197 | requires 24 bytes storage plus one additional flag byte. Too large | ||
| 198 | buffers can cause issues with the per-cpu allocator (and waste | ||
| 199 | memory). Too small buffers can cause scheduling events to be lost. The | ||
| 200 | "right" size is workload dependent and depends on the number of tasks, | ||
| 201 | each task's period, each task's number of suspensions, and how often | ||
| 202 | the buffer is flushed. | ||
| 203 | |||
| 204 | Examples: 12 => 4k events | ||
| 205 | 10 => 1k events | ||
| 206 | 8 => 512 events | ||
| 207 | |||
| 208 | config SCHED_LITMUS_TRACEPOINT | ||
| 209 | bool "Enable Event/Tracepoint Tracing for real-time task tracing" | ||
| 210 | depends on TRACEPOINTS | ||
| 211 | default n | ||
| 212 | help | ||
| 213 | Enable kernel-style events (tracepoint) for Litmus. Litmus events | ||
| 214 | trace the same functions as the above sched_trace_XXX(), but can | ||
| 215 | be enabled independently. | ||
| 216 | Litmus tracepoints can be recorded and analyzed together (single | ||
| 217 | time reference) with all other kernel tracing events (e.g., | ||
| 218 | sched:sched_switch, etc.). | ||
| 219 | |||
| 220 | This also enables a quick way to visualize schedule traces using | ||
| 221 | trace-cmd utility and kernelshark visualizer. | ||
| 222 | |||
| 223 | Say Yes for debugging and visualization purposes. | ||
| 224 | Say No for overhead tracing. | ||
| 225 | |||
| 226 | config SCHED_OVERHEAD_TRACE | ||
| 227 | bool "Record timestamps for overhead measurements" | ||
| 228 | depends on FEATHER_TRACE | ||
| 229 | default n | ||
| 230 | help | ||
| 231 | Export event stream for overhead tracing. | ||
| 232 | Say Yes for overhead tracing. | ||
| 233 | |||
| 234 | config SCHED_DEBUG_TRACE | ||
| 235 | bool "TRACE() debugging" | ||
| 236 | default y | ||
| 237 | help | ||
| 238 | Include support for sched_trace_log_messageg(), which is used to | ||
| 239 | implement TRACE(). If disabled, no TRACE() messages will be included | ||
| 240 | in the kernel, and no overheads due to debugging statements will be | ||
| 241 | incurred by the scheduler. Disable if the overhead is not acceptable | ||
| 242 | (e.g. benchmarking). | ||
| 243 | |||
| 244 | Say Yes for debugging. | ||
| 245 | Say No for overhead tracing. | ||
| 246 | |||
| 247 | config SCHED_DEBUG_TRACE_SHIFT | ||
| 248 | int "Buffer size for TRACE() buffer" | ||
| 249 | depends on SCHED_DEBUG_TRACE | ||
| 250 | range 14 22 | ||
| 251 | default 18 | ||
| 252 | help | ||
| 253 | |||
| 254 | Select the amount of memory needed per for the TRACE() buffer, as a | ||
| 255 | power of two. The TRACE() buffer is global and statically allocated. If | ||
| 256 | the buffer is too small, there will be holes in the TRACE() log if the | ||
| 257 | buffer-flushing task is starved. | ||
| 258 | |||
| 259 | The default should be sufficient for most systems. Increase the buffer | ||
| 260 | size if the log contains holes. Reduce the buffer size when running on | ||
| 261 | a memory-constrained system. | ||
| 262 | |||
| 263 | Examples: 14 => 16KB | ||
| 264 | 18 => 256KB | ||
| 265 | 20 => 1MB | ||
| 266 | |||
| 267 | This buffer is exported to usespace using a misc device as | ||
| 268 | 'litmus/log'. On a system with default udev rules, a corresponding | ||
| 269 | character device node should be created at /dev/litmus/log. The buffer | ||
| 270 | can be flushed using cat, e.g., 'cat /dev/litmus/log > my_log_file.txt'. | ||
| 271 | |||
| 272 | config SCHED_DEBUG_TRACE_CALLER | ||
| 273 | bool "Include [function@file:line] tag in TRACE() log" | ||
| 274 | depends on SCHED_DEBUG_TRACE | ||
| 275 | default n | ||
| 276 | help | ||
| 277 | With this option enabled, TRACE() prepends | ||
| 278 | |||
| 279 | "[<function name>@<filename>:<line number>]" | ||
| 280 | |||
| 281 | to each message in the debug log. Enable this to aid in figuring out | ||
| 282 | what was called in which order. The downside is that it adds a lot of | ||
| 283 | clutter. | ||
| 284 | |||
| 285 | If unsure, say No. | ||
| 286 | |||
| 287 | config PREEMPT_STATE_TRACE | ||
| 288 | bool "Trace preemption state machine transitions" | ||
| 289 | depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL | ||
| 290 | default n | ||
| 291 | help | ||
| 292 | With this option enabled, each CPU will log when it transitions | ||
| 293 | states in the preemption state machine. This state machine is | ||
| 294 | used to determine how to react to IPIs (avoid races with in-flight IPIs). | ||
| 295 | |||
| 296 | Warning: this creates a lot of information in the debug trace. Only | ||
| 297 | recommended when you are debugging preemption-related races. | ||
| 298 | |||
| 299 | If unsure, say No. | ||
| 300 | |||
| 301 | endmenu | ||
| 302 | |||
| 303 | endmenu | ||
diff --git a/litmus/Makefile b/litmus/Makefile new file mode 100644 index 00000000000..2bddc94a399 --- /dev/null +++ b/litmus/Makefile | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | # | ||
| 2 | # Makefile for LITMUS^RT | ||
| 3 | # | ||
| 4 | |||
| 5 | obj-y = sched_plugin.o litmus.o \ | ||
| 6 | preempt.o \ | ||
| 7 | litmus_proc.o \ | ||
| 8 | budget.o \ | ||
| 9 | clustered.o \ | ||
| 10 | jobs.o \ | ||
| 11 | sync.o \ | ||
| 12 | rt_domain.o \ | ||
| 13 | edf_common.o \ | ||
| 14 | fp_common.o \ | ||
| 15 | fdso.o \ | ||
| 16 | locking.o \ | ||
| 17 | srp.o \ | ||
| 18 | bheap.o \ | ||
| 19 | binheap.o \ | ||
| 20 | ctrldev.o \ | ||
| 21 | uncachedev.o \ | ||
| 22 | sched_gsn_edf.o \ | ||
| 23 | sched_psn_edf.o \ | ||
| 24 | sched_pfp.o | ||
| 25 | |||
| 26 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | ||
| 27 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | ||
| 28 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o | ||
| 29 | |||
| 30 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | ||
| 31 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o | ||
| 32 | obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o | ||
| 33 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o | ||
diff --git a/litmus/affinity.c b/litmus/affinity.c new file mode 100644 index 00000000000..3fa6dd78940 --- /dev/null +++ b/litmus/affinity.c | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | #include <linux/cpu.h> | ||
| 2 | |||
| 3 | #include <litmus/affinity.h> | ||
| 4 | |||
| 5 | struct neighborhood neigh_info[NR_CPUS]; | ||
| 6 | |||
| 7 | /* called by _init_litmus() */ | ||
| 8 | void init_topology(void) { | ||
| 9 | int cpu; | ||
| 10 | int i; | ||
| 11 | int chk; | ||
| 12 | int depth = num_cache_leaves; | ||
| 13 | |||
| 14 | if (depth > NUM_CACHE_LEVELS) | ||
| 15 | depth = NUM_CACHE_LEVELS; | ||
| 16 | |||
| 17 | for_each_online_cpu(cpu) { | ||
| 18 | for (i = 0; i < depth; ++i) { | ||
| 19 | chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i); | ||
| 20 | if (chk) { | ||
| 21 | /* failed */ | ||
| 22 | neigh_info[cpu].size[i] = 0; | ||
| 23 | } else { | ||
| 24 | /* size = num bits in mask */ | ||
| 25 | neigh_info[cpu].size[i] = | ||
| 26 | cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); | ||
| 27 | } | ||
| 28 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", | ||
| 29 | cpu, neigh_info[cpu].size[i], i, | ||
| 30 | *cpumask_bits(neigh_info[cpu].neighbors[i])); | ||
| 31 | } | ||
| 32 | |||
| 33 | /* set data for non-existent levels */ | ||
| 34 | for (; i < NUM_CACHE_LEVELS; ++i) { | ||
| 35 | neigh_info[cpu].size[i] = 0; | ||
| 36 | |||
| 37 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", | ||
| 38 | cpu, neigh_info[cpu].size[i], i, 0lu); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
diff --git a/litmus/bheap.c b/litmus/bheap.c new file mode 100644 index 00000000000..528af97f18a --- /dev/null +++ b/litmus/bheap.c | |||
| @@ -0,0 +1,314 @@ | |||
| 1 | #include "linux/kernel.h" | ||
| 2 | #include "litmus/bheap.h" | ||
| 3 | |||
| 4 | void bheap_init(struct bheap* heap) | ||
| 5 | { | ||
| 6 | heap->head = NULL; | ||
| 7 | heap->min = NULL; | ||
| 8 | } | ||
| 9 | |||
| 10 | void bheap_node_init(struct bheap_node** _h, void* value) | ||
| 11 | { | ||
| 12 | struct bheap_node* h = *_h; | ||
| 13 | h->parent = NULL; | ||
| 14 | h->next = NULL; | ||
| 15 | h->child = NULL; | ||
| 16 | h->degree = NOT_IN_HEAP; | ||
| 17 | h->value = value; | ||
| 18 | h->ref = _h; | ||
| 19 | } | ||
| 20 | |||
| 21 | |||
| 22 | /* make child a subtree of root */ | ||
| 23 | static void __bheap_link(struct bheap_node* root, | ||
| 24 | struct bheap_node* child) | ||
| 25 | { | ||
| 26 | child->parent = root; | ||
| 27 | child->next = root->child; | ||
| 28 | root->child = child; | ||
| 29 | root->degree++; | ||
| 30 | } | ||
| 31 | |||
| 32 | /* merge root lists */ | ||
| 33 | static struct bheap_node* __bheap_merge(struct bheap_node* a, | ||
| 34 | struct bheap_node* b) | ||
| 35 | { | ||
| 36 | struct bheap_node* head = NULL; | ||
| 37 | struct bheap_node** pos = &head; | ||
| 38 | |||
| 39 | while (a && b) { | ||
| 40 | if (a->degree < b->degree) { | ||
| 41 | *pos = a; | ||
| 42 | a = a->next; | ||
| 43 | } else { | ||
| 44 | *pos = b; | ||
| 45 | b = b->next; | ||
| 46 | } | ||
| 47 | pos = &(*pos)->next; | ||
| 48 | } | ||
| 49 | if (a) | ||
| 50 | *pos = a; | ||
| 51 | else | ||
| 52 | *pos = b; | ||
| 53 | return head; | ||
| 54 | } | ||
| 55 | |||
| 56 | /* reverse a linked list of nodes. also clears parent pointer */ | ||
| 57 | static struct bheap_node* __bheap_reverse(struct bheap_node* h) | ||
| 58 | { | ||
| 59 | struct bheap_node* tail = NULL; | ||
| 60 | struct bheap_node* next; | ||
| 61 | |||
| 62 | if (!h) | ||
| 63 | return h; | ||
| 64 | |||
| 65 | h->parent = NULL; | ||
| 66 | while (h->next) { | ||
| 67 | next = h->next; | ||
| 68 | h->next = tail; | ||
| 69 | tail = h; | ||
| 70 | h = next; | ||
| 71 | h->parent = NULL; | ||
| 72 | } | ||
| 73 | h->next = tail; | ||
| 74 | return h; | ||
| 75 | } | ||
| 76 | |||
| 77 | static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap, | ||
| 78 | struct bheap_node** prev, struct bheap_node** node) | ||
| 79 | { | ||
| 80 | struct bheap_node *_prev, *cur; | ||
| 81 | *prev = NULL; | ||
| 82 | |||
| 83 | if (!heap->head) { | ||
| 84 | *node = NULL; | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | |||
| 88 | *node = heap->head; | ||
| 89 | _prev = heap->head; | ||
| 90 | cur = heap->head->next; | ||
| 91 | while (cur) { | ||
| 92 | if (higher_prio(cur, *node)) { | ||
| 93 | *node = cur; | ||
| 94 | *prev = _prev; | ||
| 95 | } | ||
| 96 | _prev = cur; | ||
| 97 | cur = cur->next; | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap, | ||
| 102 | struct bheap_node* h2) | ||
| 103 | { | ||
| 104 | struct bheap_node* h1; | ||
| 105 | struct bheap_node *prev, *x, *next; | ||
| 106 | if (!h2) | ||
| 107 | return; | ||
| 108 | h1 = heap->head; | ||
| 109 | if (!h1) { | ||
| 110 | heap->head = h2; | ||
| 111 | return; | ||
| 112 | } | ||
| 113 | h1 = __bheap_merge(h1, h2); | ||
| 114 | prev = NULL; | ||
| 115 | x = h1; | ||
| 116 | next = x->next; | ||
| 117 | while (next) { | ||
| 118 | if (x->degree != next->degree || | ||
| 119 | (next->next && next->next->degree == x->degree)) { | ||
| 120 | /* nothing to do, advance */ | ||
| 121 | prev = x; | ||
| 122 | x = next; | ||
| 123 | } else if (higher_prio(x, next)) { | ||
| 124 | /* x becomes the root of next */ | ||
| 125 | x->next = next->next; | ||
| 126 | __bheap_link(x, next); | ||
| 127 | } else { | ||
| 128 | /* next becomes the root of x */ | ||
| 129 | if (prev) | ||
| 130 | prev->next = next; | ||
| 131 | else | ||
| 132 | h1 = next; | ||
| 133 | __bheap_link(next, x); | ||
| 134 | x = next; | ||
| 135 | } | ||
| 136 | next = x->next; | ||
| 137 | } | ||
| 138 | heap->head = h1; | ||
| 139 | } | ||
| 140 | |||
| 141 | static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio, | ||
| 142 | struct bheap* heap) | ||
| 143 | { | ||
| 144 | struct bheap_node *prev, *node; | ||
| 145 | __bheap_min(higher_prio, heap, &prev, &node); | ||
| 146 | if (!node) | ||
| 147 | return NULL; | ||
| 148 | if (prev) | ||
| 149 | prev->next = node->next; | ||
| 150 | else | ||
| 151 | heap->head = node->next; | ||
| 152 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
| 153 | return node; | ||
| 154 | } | ||
| 155 | |||
| 156 | /* insert (and reinitialize) a node into the heap */ | ||
| 157 | void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap, | ||
| 158 | struct bheap_node* node) | ||
| 159 | { | ||
| 160 | struct bheap_node *min; | ||
| 161 | node->child = NULL; | ||
| 162 | node->parent = NULL; | ||
| 163 | node->next = NULL; | ||
| 164 | node->degree = 0; | ||
| 165 | if (heap->min && higher_prio(node, heap->min)) { | ||
| 166 | /* swap min cache */ | ||
| 167 | min = heap->min; | ||
| 168 | min->child = NULL; | ||
| 169 | min->parent = NULL; | ||
| 170 | min->next = NULL; | ||
| 171 | min->degree = 0; | ||
| 172 | __bheap_union(higher_prio, heap, min); | ||
| 173 | heap->min = node; | ||
| 174 | } else | ||
| 175 | __bheap_union(higher_prio, heap, node); | ||
| 176 | } | ||
| 177 | |||
| 178 | void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap) | ||
| 179 | { | ||
| 180 | struct bheap_node* min; | ||
| 181 | if (heap->min) { | ||
| 182 | min = heap->min; | ||
| 183 | heap->min = NULL; | ||
| 184 | bheap_insert(higher_prio, heap, min); | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | /* merge addition into target */ | ||
| 189 | void bheap_union(bheap_prio_t higher_prio, | ||
| 190 | struct bheap* target, struct bheap* addition) | ||
| 191 | { | ||
| 192 | /* first insert any cached minima, if necessary */ | ||
| 193 | bheap_uncache_min(higher_prio, target); | ||
| 194 | bheap_uncache_min(higher_prio, addition); | ||
| 195 | __bheap_union(higher_prio, target, addition->head); | ||
| 196 | /* this is a destructive merge */ | ||
| 197 | addition->head = NULL; | ||
| 198 | } | ||
| 199 | |||
| 200 | struct bheap_node* bheap_peek(bheap_prio_t higher_prio, | ||
| 201 | struct bheap* heap) | ||
| 202 | { | ||
| 203 | if (!heap->min) | ||
| 204 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
| 205 | return heap->min; | ||
| 206 | } | ||
| 207 | |||
| 208 | struct bheap_node* bheap_take(bheap_prio_t higher_prio, | ||
| 209 | struct bheap* heap) | ||
| 210 | { | ||
| 211 | struct bheap_node *node; | ||
| 212 | if (!heap->min) | ||
| 213 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
| 214 | node = heap->min; | ||
| 215 | heap->min = NULL; | ||
| 216 | if (node) | ||
| 217 | node->degree = NOT_IN_HEAP; | ||
| 218 | return node; | ||
| 219 | } | ||
| 220 | |||
| 221 | int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node) | ||
| 222 | { | ||
| 223 | struct bheap_node *parent; | ||
| 224 | struct bheap_node** tmp_ref; | ||
| 225 | void* tmp; | ||
| 226 | |||
| 227 | /* bubble up */ | ||
| 228 | parent = node->parent; | ||
| 229 | while (parent && higher_prio(node, parent)) { | ||
| 230 | /* swap parent and node */ | ||
| 231 | tmp = parent->value; | ||
| 232 | parent->value = node->value; | ||
| 233 | node->value = tmp; | ||
| 234 | /* swap references */ | ||
| 235 | *(parent->ref) = node; | ||
| 236 | *(node->ref) = parent; | ||
| 237 | tmp_ref = parent->ref; | ||
| 238 | parent->ref = node->ref; | ||
| 239 | node->ref = tmp_ref; | ||
| 240 | /* step up */ | ||
| 241 | node = parent; | ||
| 242 | parent = node->parent; | ||
| 243 | } | ||
| 244 | |||
| 245 | return parent != NULL; | ||
| 246 | } | ||
| 247 | |||
| 248 | void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap, | ||
| 249 | struct bheap_node* node) | ||
| 250 | { | ||
| 251 | struct bheap_node *parent, *prev, *pos; | ||
| 252 | struct bheap_node** tmp_ref; | ||
| 253 | void* tmp; | ||
| 254 | |||
| 255 | if (heap->min != node) { | ||
| 256 | /* bubble up */ | ||
| 257 | parent = node->parent; | ||
| 258 | while (parent) { | ||
| 259 | /* swap parent and node */ | ||
| 260 | tmp = parent->value; | ||
| 261 | parent->value = node->value; | ||
| 262 | node->value = tmp; | ||
| 263 | /* swap references */ | ||
| 264 | *(parent->ref) = node; | ||
| 265 | *(node->ref) = parent; | ||
| 266 | tmp_ref = parent->ref; | ||
| 267 | parent->ref = node->ref; | ||
| 268 | node->ref = tmp_ref; | ||
| 269 | /* step up */ | ||
| 270 | node = parent; | ||
| 271 | parent = node->parent; | ||
| 272 | } | ||
| 273 | /* now delete: | ||
| 274 | * first find prev */ | ||
| 275 | prev = NULL; | ||
| 276 | pos = heap->head; | ||
| 277 | while (pos != node) { | ||
| 278 | prev = pos; | ||
| 279 | pos = pos->next; | ||
| 280 | } | ||
| 281 | /* we have prev, now remove node */ | ||
| 282 | if (prev) | ||
| 283 | prev->next = node->next; | ||
| 284 | else | ||
| 285 | heap->head = node->next; | ||
| 286 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
| 287 | } else | ||
| 288 | heap->min = NULL; | ||
| 289 | node->degree = NOT_IN_HEAP; | ||
| 290 | } | ||
| 291 | |||
| 292 | /* allocate a heap node for value and insert into the heap */ | ||
| 293 | int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, | ||
| 294 | void* value, int gfp_flags) | ||
| 295 | { | ||
| 296 | struct bheap_node* hn = bheap_node_alloc(gfp_flags); | ||
| 297 | if (likely(hn)) { | ||
| 298 | bheap_node_init(&hn, value); | ||
| 299 | bheap_insert(higher_prio, heap, hn); | ||
| 300 | } | ||
| 301 | return hn != NULL; | ||
| 302 | } | ||
| 303 | |||
| 304 | void* bheap_take_del(bheap_prio_t higher_prio, | ||
| 305 | struct bheap* heap) | ||
| 306 | { | ||
| 307 | struct bheap_node* hn = bheap_take(higher_prio, heap); | ||
| 308 | void* ret = NULL; | ||
| 309 | if (hn) { | ||
| 310 | ret = hn->value; | ||
| 311 | bheap_node_free(hn); | ||
| 312 | } | ||
| 313 | return ret; | ||
| 314 | } | ||
diff --git a/litmus/binheap.c b/litmus/binheap.c new file mode 100644 index 00000000000..40a913f4b5a --- /dev/null +++ b/litmus/binheap.c | |||
| @@ -0,0 +1,388 @@ | |||
| 1 | #include <litmus/binheap.h> | ||
| 2 | |||
| 3 | /* Returns true of the root ancestor of node is the root of the given heap. */ | ||
| 4 | int binheap_is_in_this_heap(struct binheap_node *node, | ||
| 5 | struct binheap* heap) | ||
| 6 | { | ||
| 7 | if(!binheap_is_in_heap(node)) { | ||
| 8 | return 0; | ||
| 9 | } | ||
| 10 | |||
| 11 | while(node->parent != NULL) { | ||
| 12 | node = node->parent; | ||
| 13 | } | ||
| 14 | |||
| 15 | return (node == heap->root); | ||
| 16 | } | ||
| 17 | |||
| 18 | |||
| 19 | /* Update the node reference pointers. Same logic as Litmus binomial heap. */ | ||
| 20 | static void __update_ref(struct binheap_node *parent, | ||
| 21 | struct binheap_node *child) | ||
| 22 | { | ||
| 23 | *(parent->ref_ptr) = child; | ||
| 24 | *(child->ref_ptr) = parent; | ||
| 25 | |||
| 26 | swap(parent->ref_ptr, child->ref_ptr); | ||
| 27 | } | ||
| 28 | |||
| 29 | |||
| 30 | /* Swaps data between two nodes. */ | ||
| 31 | static void __binheap_swap(struct binheap_node *parent, | ||
| 32 | struct binheap_node *child) | ||
| 33 | { | ||
| 34 | swap(parent->data, child->data); | ||
| 35 | __update_ref(parent, child); | ||
| 36 | } | ||
| 37 | |||
| 38 | |||
| 39 | /* Swaps memory and data between two nodes. Actual nodes swap instead of | ||
| 40 | * just data. Needed when we delete nodes from the heap. | ||
| 41 | */ | ||
| 42 | static void __binheap_swap_safe(struct binheap *handle, | ||
| 43 | struct binheap_node *a, | ||
| 44 | struct binheap_node *b) | ||
| 45 | { | ||
| 46 | swap(a->data, b->data); | ||
| 47 | __update_ref(a, b); | ||
| 48 | |||
| 49 | if((a->parent != NULL) && (a->parent == b->parent)) { | ||
| 50 | /* special case: shared parent */ | ||
| 51 | swap(a->parent->left, a->parent->right); | ||
| 52 | } | ||
| 53 | else { | ||
| 54 | /* Update pointers to swap parents. */ | ||
| 55 | |||
| 56 | if(a->parent) { | ||
| 57 | if(a == a->parent->left) { | ||
| 58 | a->parent->left = b; | ||
| 59 | } | ||
| 60 | else { | ||
| 61 | a->parent->right = b; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | if(b->parent) { | ||
| 66 | if(b == b->parent->left) { | ||
| 67 | b->parent->left = a; | ||
| 68 | } | ||
| 69 | else { | ||
| 70 | b->parent->right = a; | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | swap(a->parent, b->parent); | ||
| 75 | } | ||
| 76 | |||
| 77 | /* swap children */ | ||
| 78 | |||
| 79 | if(a->left) { | ||
| 80 | a->left->parent = b; | ||
| 81 | |||
| 82 | if(a->right) { | ||
| 83 | a->right->parent = b; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | if(b->left) { | ||
| 88 | b->left->parent = a; | ||
| 89 | |||
| 90 | if(b->right) { | ||
| 91 | b->right->parent = a; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | swap(a->left, b->left); | ||
| 96 | swap(a->right, b->right); | ||
| 97 | |||
| 98 | |||
| 99 | /* update next/last/root pointers */ | ||
| 100 | |||
| 101 | if(a == handle->next) { | ||
| 102 | handle->next = b; | ||
| 103 | } | ||
| 104 | else if(b == handle->next) { | ||
| 105 | handle->next = a; | ||
| 106 | } | ||
| 107 | |||
| 108 | if(a == handle->last) { | ||
| 109 | handle->last = b; | ||
| 110 | } | ||
| 111 | else if(b == handle->last) { | ||
| 112 | handle->last = a; | ||
| 113 | } | ||
| 114 | |||
| 115 | if(a == handle->root) { | ||
| 116 | handle->root = b; | ||
| 117 | } | ||
| 118 | else if(b == handle->root) { | ||
| 119 | handle->root = a; | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | |||
| 124 | /** | ||
| 125 | * Update the pointer to the last node in the complete binary tree. | ||
| 126 | * Called internally after the root node has been deleted. | ||
| 127 | */ | ||
| 128 | static void __binheap_update_last(struct binheap *handle) | ||
| 129 | { | ||
| 130 | struct binheap_node *temp = handle->last; | ||
| 131 | |||
| 132 | /* find a "bend" in the tree. */ | ||
| 133 | while(temp->parent && (temp == temp->parent->left)) { | ||
| 134 | temp = temp->parent; | ||
| 135 | } | ||
| 136 | |||
| 137 | /* step over to sibling if we're not at root */ | ||
| 138 | if(temp->parent != NULL) { | ||
| 139 | temp = temp->parent->left; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* now travel right as far as possible. */ | ||
| 143 | while(temp->right != NULL) { | ||
| 144 | temp = temp->right; | ||
| 145 | } | ||
| 146 | |||
| 147 | /* take one step to the left if we're not at the bottom-most level. */ | ||
| 148 | if(temp->left != NULL) { | ||
| 149 | temp = temp->left; | ||
| 150 | } | ||
| 151 | |||
| 152 | handle->last = temp; | ||
| 153 | } | ||
| 154 | |||
| 155 | |||
| 156 | /** | ||
| 157 | * Update the pointer to the node that will take the next inserted node. | ||
| 158 | * Called internally after a node has been inserted. | ||
| 159 | */ | ||
| 160 | static void __binheap_update_next(struct binheap *handle) | ||
| 161 | { | ||
| 162 | struct binheap_node *temp = handle->next; | ||
| 163 | |||
| 164 | /* find a "bend" in the tree. */ | ||
| 165 | while(temp->parent && (temp == temp->parent->right)) { | ||
| 166 | temp = temp->parent; | ||
| 167 | } | ||
| 168 | |||
| 169 | /* step over to sibling if we're not at root */ | ||
| 170 | if(temp->parent != NULL) { | ||
| 171 | temp = temp->parent->right; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* now travel left as far as possible. */ | ||
| 175 | while(temp->left != NULL) { | ||
| 176 | temp = temp->left; | ||
| 177 | } | ||
| 178 | |||
| 179 | handle->next = temp; | ||
| 180 | } | ||
| 181 | |||
| 182 | |||
| 183 | |||
| 184 | /* bubble node up towards root */ | ||
| 185 | static void __binheap_bubble_up(struct binheap *handle, | ||
| 186 | struct binheap_node *node) | ||
| 187 | { | ||
| 188 | /* let BINHEAP_POISON data bubble to the top */ | ||
| 189 | |||
| 190 | while((node->parent != NULL) && | ||
| 191 | ((node->data == BINHEAP_POISON) || | ||
| 192 | handle->compare(node, node->parent))) { | ||
| 193 | __binheap_swap(node->parent, node); | ||
| 194 | node = node->parent; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | |||
| 199 | /* bubble node down, swapping with min-child */ | ||
| 200 | static void __binheap_bubble_down(struct binheap *handle) | ||
| 201 | { | ||
| 202 | struct binheap_node *node = handle->root; | ||
| 203 | |||
| 204 | while(node->left != NULL) { | ||
| 205 | if(node->right && handle->compare(node->right, node->left)) { | ||
| 206 | if(handle->compare(node->right, node)) { | ||
| 207 | __binheap_swap(node, node->right); | ||
| 208 | node = node->right; | ||
| 209 | } | ||
| 210 | else { | ||
| 211 | break; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | else { | ||
| 215 | if(handle->compare(node->left, node)) { | ||
| 216 | __binheap_swap(node, node->left); | ||
| 217 | node = node->left; | ||
| 218 | } | ||
| 219 | else { | ||
| 220 | break; | ||
| 221 | } | ||
| 222 | } | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | |||
| 227 | void __binheap_add(struct binheap_node *new_node, | ||
| 228 | struct binheap *handle, | ||
| 229 | void *data) | ||
| 230 | { | ||
| 231 | new_node->data = data; | ||
| 232 | new_node->ref = new_node; | ||
| 233 | new_node->ref_ptr = &(new_node->ref); | ||
| 234 | |||
| 235 | if(!binheap_empty(handle)) { | ||
| 236 | /* insert left side first */ | ||
| 237 | if(handle->next->left == NULL) { | ||
| 238 | handle->next->left = new_node; | ||
| 239 | new_node->parent = handle->next; | ||
| 240 | new_node->left = NULL; | ||
| 241 | new_node->right = NULL; | ||
| 242 | |||
| 243 | handle->last = new_node; | ||
| 244 | |||
| 245 | __binheap_bubble_up(handle, new_node); | ||
| 246 | } | ||
| 247 | else { | ||
| 248 | /* left occupied. insert right. */ | ||
| 249 | handle->next->right = new_node; | ||
| 250 | new_node->parent = handle->next; | ||
| 251 | new_node->left = NULL; | ||
| 252 | new_node->right = NULL; | ||
| 253 | |||
| 254 | handle->last = new_node; | ||
| 255 | |||
| 256 | __binheap_update_next(handle); | ||
| 257 | __binheap_bubble_up(handle, new_node); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | else { | ||
| 261 | /* first node in heap */ | ||
| 262 | |||
| 263 | new_node->parent = NULL; | ||
| 264 | new_node->left = NULL; | ||
| 265 | new_node->right = NULL; | ||
| 266 | |||
| 267 | handle->root = new_node; | ||
| 268 | handle->next = new_node; | ||
| 269 | handle->last = new_node; | ||
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 | |||
| 274 | /** | ||
| 275 | * Removes the root node from the heap. The node is removed after coalescing | ||
| 276 | * the binheap_node with its original data pointer at the root of the tree. | ||
| 277 | * | ||
| 278 | * The 'last' node in the tree is then swapped up to the root and bubbled | ||
| 279 | * down. | ||
| 280 | */ | ||
| 281 | void __binheap_delete_root(struct binheap *handle, | ||
| 282 | struct binheap_node *container) | ||
| 283 | { | ||
| 284 | struct binheap_node *root = handle->root; | ||
| 285 | |||
| 286 | if(root != container) { | ||
| 287 | /* coalesce */ | ||
| 288 | __binheap_swap_safe(handle, root, container); | ||
| 289 | root = container; | ||
| 290 | } | ||
| 291 | |||
| 292 | if(handle->last != root) { | ||
| 293 | /* swap 'last' node up to root and bubble it down. */ | ||
| 294 | |||
| 295 | struct binheap_node *to_move = handle->last; | ||
| 296 | |||
| 297 | if(to_move->parent != root) { | ||
| 298 | handle->next = to_move->parent; | ||
| 299 | |||
| 300 | if(handle->next->right == to_move) { | ||
| 301 | /* disconnect from parent */ | ||
| 302 | to_move->parent->right = NULL; | ||
| 303 | handle->last = handle->next->left; | ||
| 304 | } | ||
| 305 | else { | ||
| 306 | /* find new 'last' before we disconnect */ | ||
| 307 | __binheap_update_last(handle); | ||
| 308 | |||
| 309 | /* disconnect from parent */ | ||
| 310 | to_move->parent->left = NULL; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | else { | ||
| 314 | /* 'last' is direct child of root */ | ||
| 315 | |||
| 316 | handle->next = to_move; | ||
| 317 | |||
| 318 | if(to_move == to_move->parent->right) { | ||
| 319 | to_move->parent->right = NULL; | ||
| 320 | handle->last = to_move->parent->left; | ||
| 321 | } | ||
| 322 | else { | ||
| 323 | to_move->parent->left = NULL; | ||
| 324 | handle->last = to_move; | ||
| 325 | } | ||
| 326 | } | ||
| 327 | to_move->parent = NULL; | ||
| 328 | |||
| 329 | /* reconnect as root. We can't just swap data ptrs since root node | ||
| 330 | * may be freed after this function returns. | ||
| 331 | */ | ||
| 332 | to_move->left = root->left; | ||
| 333 | to_move->right = root->right; | ||
| 334 | if(to_move->left != NULL) { | ||
| 335 | to_move->left->parent = to_move; | ||
| 336 | } | ||
| 337 | if(to_move->right != NULL) { | ||
| 338 | to_move->right->parent = to_move; | ||
| 339 | } | ||
| 340 | |||
| 341 | handle->root = to_move; | ||
| 342 | |||
| 343 | /* bubble down */ | ||
| 344 | __binheap_bubble_down(handle); | ||
| 345 | } | ||
| 346 | else { | ||
| 347 | /* removing last node in tree */ | ||
| 348 | handle->root = NULL; | ||
| 349 | handle->next = NULL; | ||
| 350 | handle->last = NULL; | ||
| 351 | } | ||
| 352 | |||
| 353 | /* mark as removed */ | ||
| 354 | container->parent = BINHEAP_POISON; | ||
| 355 | } | ||
| 356 | |||
| 357 | |||
| 358 | /** | ||
| 359 | * Delete an arbitrary node. Bubble node to delete up to the root, | ||
| 360 | * and then delete to root. | ||
| 361 | */ | ||
| 362 | void __binheap_delete(struct binheap_node *node_to_delete, | ||
| 363 | struct binheap *handle) | ||
| 364 | { | ||
| 365 | struct binheap_node *target = node_to_delete->ref; | ||
| 366 | void *temp_data = target->data; | ||
| 367 | |||
| 368 | /* temporarily set data to null to allow node to bubble up to the top. */ | ||
| 369 | target->data = BINHEAP_POISON; | ||
| 370 | |||
| 371 | __binheap_bubble_up(handle, target); | ||
| 372 | __binheap_delete_root(handle, node_to_delete); | ||
| 373 | |||
| 374 | node_to_delete->data = temp_data; /* restore node data pointer */ | ||
| 375 | } | ||
| 376 | |||
| 377 | |||
| 378 | /** | ||
| 379 | * Bubble up a node whose pointer has decreased in value. | ||
| 380 | */ | ||
| 381 | void __binheap_decrease(struct binheap_node *orig_node, | ||
| 382 | struct binheap *handle) | ||
| 383 | { | ||
| 384 | struct binheap_node *target = orig_node->ref; | ||
| 385 | |||
| 386 | __binheap_bubble_up(handle, target); | ||
| 387 | } | ||
| 388 | |||
diff --git a/litmus/budget.c b/litmus/budget.c new file mode 100644 index 00000000000..f7712be29ad --- /dev/null +++ b/litmus/budget.c | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | #include <linux/percpu.h> | ||
| 3 | #include <linux/hrtimer.h> | ||
| 4 | |||
| 5 | #include <litmus/litmus.h> | ||
| 6 | #include <litmus/preempt.h> | ||
| 7 | |||
| 8 | #include <litmus/budget.h> | ||
| 9 | |||
| 10 | struct enforcement_timer { | ||
| 11 | /* The enforcement timer is used to accurately police | ||
| 12 | * slice budgets. */ | ||
| 13 | struct hrtimer timer; | ||
| 14 | int armed; | ||
| 15 | }; | ||
| 16 | |||
| 17 | DEFINE_PER_CPU(struct enforcement_timer, budget_timer); | ||
| 18 | |||
| 19 | static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) | ||
| 20 | { | ||
| 21 | struct enforcement_timer* et = container_of(timer, | ||
| 22 | struct enforcement_timer, | ||
| 23 | timer); | ||
| 24 | unsigned long flags; | ||
| 25 | |||
| 26 | local_irq_save(flags); | ||
| 27 | TRACE("enforcement timer fired.\n"); | ||
| 28 | et->armed = 0; | ||
| 29 | /* activate scheduler */ | ||
| 30 | litmus_reschedule_local(); | ||
| 31 | local_irq_restore(flags); | ||
| 32 | |||
| 33 | return HRTIMER_NORESTART; | ||
| 34 | } | ||
| 35 | |||
| 36 | /* assumes called with IRQs off */ | ||
| 37 | static void cancel_enforcement_timer(struct enforcement_timer* et) | ||
| 38 | { | ||
| 39 | int ret; | ||
| 40 | |||
| 41 | TRACE("cancelling enforcement timer.\n"); | ||
| 42 | |||
| 43 | /* Since interrupts are disabled and et->armed is only | ||
| 44 | * modified locally, we do not need any locks. | ||
| 45 | */ | ||
| 46 | |||
| 47 | if (et->armed) { | ||
| 48 | ret = hrtimer_try_to_cancel(&et->timer); | ||
| 49 | /* Should never be inactive. */ | ||
| 50 | BUG_ON(ret == 0); | ||
| 51 | /* Should never be running concurrently. */ | ||
| 52 | BUG_ON(ret == -1); | ||
| 53 | |||
| 54 | et->armed = 0; | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | /* assumes called with IRQs off */ | ||
| 59 | static void arm_enforcement_timer(struct enforcement_timer* et, | ||
| 60 | struct task_struct* t) | ||
| 61 | { | ||
| 62 | lt_t when_to_fire; | ||
| 63 | TRACE_TASK(t, "arming enforcement timer.\n"); | ||
| 64 | |||
| 65 | /* Calling this when there is no budget left for the task | ||
| 66 | * makes no sense, unless the task is non-preemptive. */ | ||
| 67 | BUG_ON(budget_exhausted(t) && (!is_np(t))); | ||
| 68 | |||
| 69 | /* __hrtimer_start_range_ns() cancels the timer | ||
| 70 | * anyway, so we don't have to check whether it is still armed */ | ||
| 71 | |||
| 72 | if (likely(!is_np(t))) { | ||
| 73 | when_to_fire = litmus_clock() + budget_remaining(t); | ||
| 74 | __hrtimer_start_range_ns(&et->timer, | ||
| 75 | ns_to_ktime(when_to_fire), | ||
| 76 | 0 /* delta */, | ||
| 77 | HRTIMER_MODE_ABS_PINNED, | ||
| 78 | 0 /* no wakeup */); | ||
| 79 | et->armed = 1; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | |||
| 84 | /* expects to be called with IRQs off */ | ||
| 85 | void update_enforcement_timer(struct task_struct* t) | ||
| 86 | { | ||
| 87 | struct enforcement_timer* et = &__get_cpu_var(budget_timer); | ||
| 88 | |||
| 89 | if (t && budget_precisely_enforced(t)) { | ||
| 90 | /* Make sure we call into the scheduler when this budget | ||
| 91 | * expires. */ | ||
| 92 | arm_enforcement_timer(et, t); | ||
| 93 | } else if (et->armed) { | ||
| 94 | /* Make sure we don't cause unnecessary interrupts. */ | ||
| 95 | cancel_enforcement_timer(et); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | |||
| 100 | static int __init init_budget_enforcement(void) | ||
| 101 | { | ||
| 102 | int cpu; | ||
| 103 | struct enforcement_timer* et; | ||
| 104 | |||
| 105 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
| 106 | et = &per_cpu(budget_timer, cpu); | ||
| 107 | hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
| 108 | et->timer.function = on_enforcement_timeout; | ||
| 109 | } | ||
| 110 | return 0; | ||
| 111 | } | ||
| 112 | |||
| 113 | module_init(init_budget_enforcement); | ||
diff --git a/litmus/clustered.c b/litmus/clustered.c new file mode 100644 index 00000000000..6fe1b512f62 --- /dev/null +++ b/litmus/clustered.c | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | #include <linux/gfp.h> | ||
| 2 | #include <linux/cpumask.h> | ||
| 3 | #include <linux/list.h> | ||
| 4 | |||
| 5 | #include <litmus/clustered.h> | ||
| 6 | |||
| 7 | #ifndef CONFIG_X86 | ||
| 8 | /* fake get_shared_cpu_map() on non-x86 architectures */ | ||
| 9 | |||
| 10 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | ||
| 11 | { | ||
| 12 | if (index != 1) | ||
| 13 | return 1; | ||
| 14 | else { | ||
| 15 | /* Fake L1: CPU is all by itself. */ | ||
| 16 | cpumask_clear(mask); | ||
| 17 | cpumask_set_cpu(cpu, mask); | ||
| 18 | return 0; | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | #endif | ||
| 23 | |||
| 24 | int get_cluster_size(enum cache_level level) | ||
| 25 | { | ||
| 26 | cpumask_var_t mask; | ||
| 27 | int ok; | ||
| 28 | int num_cpus; | ||
| 29 | |||
| 30 | if (level == GLOBAL_CLUSTER) | ||
| 31 | return num_online_cpus(); | ||
| 32 | else { | ||
| 33 | if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
| 34 | return -ENOMEM; | ||
| 35 | /* assumes CPU 0 is representative of all CPUs */ | ||
| 36 | ok = get_shared_cpu_map(mask, 0, level); | ||
| 37 | /* ok == 0 means we got the map; otherwise it's an invalid cache level */ | ||
| 38 | if (ok == 0) | ||
| 39 | num_cpus = cpumask_weight(mask); | ||
| 40 | free_cpumask_var(mask); | ||
| 41 | |||
| 42 | if (ok == 0) | ||
| 43 | return num_cpus; | ||
| 44 | else | ||
| 45 | return -EINVAL; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | int assign_cpus_to_clusters(enum cache_level level, | ||
| 50 | struct scheduling_cluster* clusters[], | ||
| 51 | unsigned int num_clusters, | ||
| 52 | struct cluster_cpu* cpus[], | ||
| 53 | unsigned int num_cpus) | ||
| 54 | { | ||
| 55 | cpumask_var_t mask; | ||
| 56 | unsigned int i, free_cluster = 0, low_cpu; | ||
| 57 | int err = 0; | ||
| 58 | |||
| 59 | if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
| 60 | return -ENOMEM; | ||
| 61 | |||
| 62 | /* clear cluster pointers */ | ||
| 63 | for (i = 0; i < num_cpus; i++) { | ||
| 64 | cpus[i]->id = i; | ||
| 65 | cpus[i]->cluster = NULL; | ||
| 66 | } | ||
| 67 | |||
| 68 | /* initialize clusters */ | ||
| 69 | for (i = 0; i < num_clusters; i++) { | ||
| 70 | clusters[i]->id = i; | ||
| 71 | INIT_LIST_HEAD(&clusters[i]->cpus); | ||
| 72 | } | ||
| 73 | |||
| 74 | /* Assign each CPU. Two assumtions are made: | ||
| 75 | * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask). | ||
| 76 | * 2) All cpus that belong to some cluster are online. | ||
| 77 | */ | ||
| 78 | for_each_online_cpu(i) { | ||
| 79 | /* get lowest-id CPU in cluster */ | ||
| 80 | if (level != GLOBAL_CLUSTER) { | ||
| 81 | err = get_shared_cpu_map(mask, cpus[i]->id, level); | ||
| 82 | if (err != 0) { | ||
| 83 | /* ugh... wrong cache level? Either caller screwed up | ||
| 84 | * or the CPU topology is weird. */ | ||
| 85 | printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n", | ||
| 86 | level, err); | ||
| 87 | err = -EINVAL; | ||
| 88 | goto out; | ||
| 89 | } | ||
| 90 | low_cpu = cpumask_first(mask); | ||
| 91 | } else | ||
| 92 | low_cpu = 0; | ||
| 93 | if (low_cpu == i) { | ||
| 94 | /* caller must provide an appropriate number of clusters */ | ||
| 95 | BUG_ON(free_cluster >= num_clusters); | ||
| 96 | |||
| 97 | /* create new cluster */ | ||
| 98 | cpus[i]->cluster = clusters[free_cluster++]; | ||
| 99 | } else { | ||
| 100 | /* low_cpu points to the right cluster | ||
| 101 | * Assumption: low_cpu is actually online and was processed earlier. */ | ||
| 102 | cpus[i]->cluster = cpus[low_cpu]->cluster; | ||
| 103 | } | ||
| 104 | /* enqueue in cpus list */ | ||
| 105 | list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); | ||
| 106 | printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); | ||
| 107 | } | ||
| 108 | out: | ||
| 109 | free_cpumask_var(mask); | ||
| 110 | return err; | ||
| 111 | } | ||
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c new file mode 100644 index 00000000000..41919b2714c --- /dev/null +++ b/litmus/ctrldev.c | |||
| @@ -0,0 +1,160 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | #include <linux/mm.h> | ||
| 3 | #include <linux/fs.h> | ||
| 4 | #include <linux/miscdevice.h> | ||
| 5 | #include <linux/module.h> | ||
| 6 | |||
| 7 | #include <litmus/litmus.h> | ||
| 8 | |||
| 9 | /* only one page for now, but we might want to add a RO version at some point */ | ||
| 10 | |||
| 11 | #define CTRL_NAME "litmus/ctrl" | ||
| 12 | |||
| 13 | /* allocate t->rt_param.ctrl_page*/ | ||
| 14 | static int alloc_ctrl_page(struct task_struct *t) | ||
| 15 | { | ||
| 16 | int err = 0; | ||
| 17 | |||
| 18 | /* only allocate if the task doesn't have one yet */ | ||
| 19 | if (!tsk_rt(t)->ctrl_page) { | ||
| 20 | tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL); | ||
| 21 | if (!tsk_rt(t)->ctrl_page) | ||
| 22 | err = -ENOMEM; | ||
| 23 | /* will get de-allocated in task teardown */ | ||
| 24 | TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__, | ||
| 25 | tsk_rt(t)->ctrl_page); | ||
| 26 | } | ||
| 27 | return err; | ||
| 28 | } | ||
| 29 | |||
| 30 | static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma) | ||
| 31 | { | ||
| 32 | int err; | ||
| 33 | |||
| 34 | struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page); | ||
| 35 | |||
| 36 | TRACE_CUR(CTRL_NAME | ||
| 37 | ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n", | ||
| 38 | tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start, | ||
| 39 | vma->vm_page_prot); | ||
| 40 | |||
| 41 | /* Map it into the vma. */ | ||
| 42 | err = vm_insert_page(vma, vma->vm_start, ctrl); | ||
| 43 | |||
| 44 | if (err) | ||
| 45 | TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err); | ||
| 46 | |||
| 47 | return err; | ||
| 48 | } | ||
| 49 | |||
| 50 | static void litmus_ctrl_vm_close(struct vm_area_struct* vma) | ||
| 51 | { | ||
| 52 | TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__, | ||
| 53 | vma->vm_flags, vma->vm_page_prot); | ||
| 54 | |||
| 55 | TRACE_CUR(CTRL_NAME | ||
| 56 | ": %p:%p vma:%p vma->vm_private_data:%p closed.\n", | ||
| 57 | (void*) vma->vm_start, (void*) vma->vm_end, vma, | ||
| 58 | vma->vm_private_data); | ||
| 59 | } | ||
| 60 | |||
| 61 | static int litmus_ctrl_vm_fault(struct vm_area_struct* vma, | ||
| 62 | struct vm_fault* vmf) | ||
| 63 | { | ||
| 64 | TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__, | ||
| 65 | vma->vm_flags, vmf->pgoff); | ||
| 66 | |||
| 67 | /* This function should never be called, since all pages should have | ||
| 68 | * been mapped by mmap() already. */ | ||
| 69 | WARN_ONCE(1, "Page faults should be impossible in the control page\n"); | ||
| 70 | |||
| 71 | return VM_FAULT_SIGBUS; | ||
| 72 | } | ||
| 73 | |||
| 74 | static struct vm_operations_struct litmus_ctrl_vm_ops = { | ||
| 75 | .close = litmus_ctrl_vm_close, | ||
| 76 | .fault = litmus_ctrl_vm_fault, | ||
| 77 | }; | ||
| 78 | |||
| 79 | static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma) | ||
| 80 | { | ||
| 81 | int err = 0; | ||
| 82 | |||
| 83 | /* first make sure mapper knows what he's doing */ | ||
| 84 | |||
| 85 | /* you can only get one page */ | ||
| 86 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | ||
| 87 | return -EINVAL; | ||
| 88 | |||
| 89 | /* you can only map the "first" page */ | ||
| 90 | if (vma->vm_pgoff != 0) | ||
| 91 | return -EINVAL; | ||
| 92 | |||
| 93 | /* you can't share it with anyone */ | ||
| 94 | if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) | ||
| 95 | return -EINVAL; | ||
| 96 | |||
| 97 | vma->vm_ops = &litmus_ctrl_vm_ops; | ||
| 98 | /* This mapping should not be kept across forks, | ||
| 99 | * cannot be expanded, and is not a "normal" page. */ | ||
| 100 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO; | ||
| 101 | |||
| 102 | /* We don't want the first write access to trigger a "minor" page fault | ||
| 103 | * to mark the page as dirty. This is transient, private memory, we | ||
| 104 | * don't care if it was touched or not. __S011 means RW access, but not | ||
| 105 | * execute, and avoids copy-on-write behavior. | ||
| 106 | * See protection_map in mmap.c. */ | ||
| 107 | vma->vm_page_prot = __S011; | ||
| 108 | |||
| 109 | err = alloc_ctrl_page(current); | ||
| 110 | if (!err) | ||
| 111 | err = map_ctrl_page(current, vma); | ||
| 112 | |||
| 113 | TRACE_CUR("%s flags=0x%x prot=0x%lx\n", | ||
| 114 | __FUNCTION__, vma->vm_flags, vma->vm_page_prot); | ||
| 115 | |||
| 116 | return err; | ||
| 117 | } | ||
| 118 | |||
| 119 | static struct file_operations litmus_ctrl_fops = { | ||
| 120 | .owner = THIS_MODULE, | ||
| 121 | .mmap = litmus_ctrl_mmap, | ||
| 122 | }; | ||
| 123 | |||
| 124 | static struct miscdevice litmus_ctrl_dev = { | ||
| 125 | .name = CTRL_NAME, | ||
| 126 | .minor = MISC_DYNAMIC_MINOR, | ||
| 127 | .fops = &litmus_ctrl_fops, | ||
| 128 | }; | ||
| 129 | |||
| 130 | static int __init init_litmus_ctrl_dev(void) | ||
| 131 | { | ||
| 132 | int err; | ||
| 133 | |||
| 134 | BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE); | ||
| 135 | |||
| 136 | BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint64_t)); | ||
| 137 | |||
| 138 | BUILD_BUG_ON(offsetof(struct control_page, sched.raw) | ||
| 139 | != LITMUS_CP_OFFSET_SCHED); | ||
| 140 | BUILD_BUG_ON(offsetof(struct control_page, irq_count) | ||
| 141 | != LITMUS_CP_OFFSET_IRQ_COUNT); | ||
| 142 | BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start) | ||
| 143 | != LITMUS_CP_OFFSET_TS_SC_START); | ||
| 144 | BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start) | ||
| 145 | != LITMUS_CP_OFFSET_IRQ_SC_START); | ||
| 146 | |||
| 147 | printk("Initializing LITMUS^RT control device.\n"); | ||
| 148 | err = misc_register(&litmus_ctrl_dev); | ||
| 149 | if (err) | ||
| 150 | printk("Could not allocate %s device (%d).\n", CTRL_NAME, err); | ||
| 151 | return err; | ||
| 152 | } | ||
| 153 | |||
| 154 | static void __exit exit_litmus_ctrl_dev(void) | ||
| 155 | { | ||
| 156 | misc_deregister(&litmus_ctrl_dev); | ||
| 157 | } | ||
| 158 | |||
| 159 | module_init(init_litmus_ctrl_dev); | ||
| 160 | module_exit(exit_litmus_ctrl_dev); | ||
diff --git a/litmus/edf_common.c b/litmus/edf_common.c new file mode 100644 index 00000000000..5aca2934a7b --- /dev/null +++ b/litmus/edf_common.c | |||
| @@ -0,0 +1,200 @@ | |||
| 1 | /* | ||
| 2 | * kernel/edf_common.c | ||
| 3 | * | ||
| 4 | * Common functions for EDF based scheduler. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/percpu.h> | ||
| 8 | #include <linux/sched.h> | ||
| 9 | #include <linux/list.h> | ||
| 10 | |||
| 11 | #include <litmus/litmus.h> | ||
| 12 | #include <litmus/sched_plugin.h> | ||
| 13 | #include <litmus/sched_trace.h> | ||
| 14 | |||
| 15 | #include <litmus/edf_common.h> | ||
| 16 | |||
| 17 | #ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM | ||
| 18 | #include <litmus/fpmath.h> | ||
| 19 | #endif | ||
| 20 | |||
| 21 | #ifdef CONFIG_EDF_TIE_BREAK_HASH | ||
| 22 | #include <linux/hash.h> | ||
| 23 | static inline long edf_hash(struct task_struct *t) | ||
| 24 | { | ||
| 25 | /* pid is 32 bits, so normally we would shove that into the | ||
| 26 | * upper 32-bits and and put the job number in the bottom | ||
| 27 | * and hash the 64-bit number with hash_64(). Sadly, | ||
| 28 | * in testing, hash_64() doesn't distribute keys were the | ||
| 29 | * upper bits are close together (as would be the case with | ||
| 30 | * pids) and job numbers are equal (as would be the case with | ||
| 31 | * synchronous task sets with all relative deadlines equal). | ||
| 32 | * | ||
| 33 | * A 2006 Linux patch proposed the following solution | ||
| 34 | * (but for some reason it wasn't accepted...). | ||
| 35 | * | ||
| 36 | * At least this workaround works for 32-bit systems as well. | ||
| 37 | */ | ||
| 38 | return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32); | ||
| 39 | } | ||
| 40 | #endif | ||
| 41 | |||
| 42 | |||
| 43 | /* edf_higher_prio - returns true if first has a higher EDF priority | ||
| 44 | * than second. Deadline ties are broken by PID. | ||
| 45 | * | ||
| 46 | * both first and second may be NULL | ||
| 47 | */ | ||
| 48 | int edf_higher_prio(struct task_struct* first, | ||
| 49 | struct task_struct* second) | ||
| 50 | { | ||
| 51 | struct task_struct *first_task = first; | ||
| 52 | struct task_struct *second_task = second; | ||
| 53 | |||
| 54 | /* There is no point in comparing a task to itself. */ | ||
| 55 | if (first && first == second) { | ||
| 56 | TRACE_TASK(first, | ||
| 57 | "WARNING: pointless edf priority comparison.\n"); | ||
| 58 | return 0; | ||
| 59 | } | ||
| 60 | |||
| 61 | |||
| 62 | /* check for NULL tasks */ | ||
| 63 | if (!first || !second) | ||
| 64 | return first && !second; | ||
| 65 | |||
| 66 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 67 | |||
| 68 | /* Check for inherited priorities. Change task | ||
| 69 | * used for comparison in such a case. | ||
| 70 | */ | ||
| 71 | if (unlikely(first->rt_param.inh_task)) | ||
| 72 | first_task = first->rt_param.inh_task; | ||
| 73 | if (unlikely(second->rt_param.inh_task)) | ||
| 74 | second_task = second->rt_param.inh_task; | ||
| 75 | |||
| 76 | /* Check for priority boosting. Tie-break by start of boosting. | ||
| 77 | */ | ||
| 78 | if (unlikely(is_priority_boosted(first_task))) { | ||
| 79 | /* first_task is boosted, how about second_task? */ | ||
| 80 | if (!is_priority_boosted(second_task) || | ||
| 81 | lt_before(get_boost_start(first_task), | ||
| 82 | get_boost_start(second_task))) | ||
| 83 | return 1; | ||
| 84 | else | ||
| 85 | return 0; | ||
| 86 | } else if (unlikely(is_priority_boosted(second_task))) | ||
| 87 | /* second_task is boosted, first is not*/ | ||
| 88 | return 0; | ||
| 89 | |||
| 90 | #endif | ||
| 91 | |||
| 92 | if (earlier_deadline(first_task, second_task)) { | ||
| 93 | return 1; | ||
| 94 | } | ||
| 95 | else if (get_deadline(first_task) == get_deadline(second_task)) { | ||
| 96 | /* Need to tie break. All methods must set pid_break to 0/1 if | ||
| 97 | * first_task does not have priority over second_task. | ||
| 98 | */ | ||
| 99 | int pid_break; | ||
| 100 | |||
| 101 | |||
| 102 | #if defined(CONFIG_EDF_TIE_BREAK_LATENESS) | ||
| 103 | /* Tie break by lateness. Jobs with greater lateness get | ||
| 104 | * priority. This should spread tardiness across all tasks, | ||
| 105 | * especially in task sets where all tasks have the same | ||
| 106 | * period and relative deadlines. | ||
| 107 | */ | ||
| 108 | if (get_lateness(first_task) > get_lateness(second_task)) { | ||
| 109 | return 1; | ||
| 110 | } | ||
| 111 | pid_break = (get_lateness(first_task) == get_lateness(second_task)); | ||
| 112 | |||
| 113 | |||
| 114 | #elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM) | ||
| 115 | /* Tie break by lateness, normalized by relative deadline. Jobs with | ||
| 116 | * greater normalized lateness get priority. | ||
| 117 | * | ||
| 118 | * Note: Considered using the algebraically equivalent | ||
| 119 | * lateness(first)*relative_deadline(second) > | ||
| 120 | lateness(second)*relative_deadline(first) | ||
| 121 | * to avoid fixed-point math, but values are prone to overflow if inputs | ||
| 122 | * are on the order of several seconds, even in 64-bit. | ||
| 123 | */ | ||
| 124 | fp_t fnorm = _frac(get_lateness(first_task), | ||
| 125 | get_rt_relative_deadline(first_task)); | ||
| 126 | fp_t snorm = _frac(get_lateness(second_task), | ||
| 127 | get_rt_relative_deadline(second_task)); | ||
| 128 | if (_gt(fnorm, snorm)) { | ||
| 129 | return 1; | ||
| 130 | } | ||
| 131 | pid_break = _eq(fnorm, snorm); | ||
| 132 | |||
| 133 | |||
| 134 | #elif defined(CONFIG_EDF_TIE_BREAK_HASH) | ||
| 135 | /* Tie break by comparing hashs of (pid, job#) tuple. There should be | ||
| 136 | * a 50% chance that first_task has a higher priority than second_task. | ||
| 137 | */ | ||
| 138 | long fhash = edf_hash(first_task); | ||
| 139 | long shash = edf_hash(second_task); | ||
| 140 | if (fhash < shash) { | ||
| 141 | return 1; | ||
| 142 | } | ||
| 143 | pid_break = (fhash == shash); | ||
| 144 | #else | ||
| 145 | |||
| 146 | |||
| 147 | /* CONFIG_EDF_PID_TIE_BREAK */ | ||
| 148 | pid_break = 1; // fall through to tie-break by pid; | ||
| 149 | #endif | ||
| 150 | |||
| 151 | /* Tie break by pid */ | ||
| 152 | if(pid_break) { | ||
| 153 | if (first_task->pid < second_task->pid) { | ||
| 154 | return 1; | ||
| 155 | } | ||
| 156 | else if (first_task->pid == second_task->pid) { | ||
| 157 | /* If the PIDs are the same then the task with the | ||
| 158 | * inherited priority wins. | ||
| 159 | */ | ||
| 160 | if (!second->rt_param.inh_task) { | ||
| 161 | return 1; | ||
| 162 | } | ||
| 163 | } | ||
| 164 | } | ||
| 165 | } | ||
| 166 | return 0; /* fall-through. prio(second_task) > prio(first_task) */ | ||
| 167 | } | ||
| 168 | |||
| 169 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
| 170 | { | ||
| 171 | return edf_higher_prio(bheap2task(a), bheap2task(b)); | ||
| 172 | } | ||
| 173 | |||
| 174 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
| 175 | release_jobs_t release) | ||
| 176 | { | ||
| 177 | rt_domain_init(rt, edf_ready_order, resched, release); | ||
| 178 | } | ||
| 179 | |||
| 180 | /* need_to_preempt - check whether the task t needs to be preempted | ||
| 181 | * call only with irqs disabled and with ready_lock acquired | ||
| 182 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
| 183 | */ | ||
| 184 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
| 185 | { | ||
| 186 | /* we need the read lock for edf_ready_queue */ | ||
| 187 | /* no need to preempt if there is nothing pending */ | ||
| 188 | if (!__jobs_pending(rt)) | ||
| 189 | return 0; | ||
| 190 | /* we need to reschedule if t doesn't exist */ | ||
| 191 | if (!t) | ||
| 192 | return 1; | ||
| 193 | |||
| 194 | /* NOTE: We cannot check for non-preemptibility since we | ||
| 195 | * don't know what address space we're currently in. | ||
| 196 | */ | ||
| 197 | |||
| 198 | /* make sure to get non-rt stuff out of the way */ | ||
| 199 | return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t); | ||
| 200 | } | ||
diff --git a/litmus/fdso.c b/litmus/fdso.c new file mode 100644 index 00000000000..41852d7b14d --- /dev/null +++ b/litmus/fdso.c | |||
| @@ -0,0 +1,306 @@ | |||
| 1 | /* fdso.c - file descriptor attached shared objects | ||
| 2 | * | ||
| 3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
| 4 | * | ||
| 5 | * Notes: | ||
| 6 | * - objects descriptor (OD) tables are not cloned during a fork. | ||
| 7 | * - objects are created on-demand, and freed after the last reference | ||
| 8 | * is dropped. | ||
| 9 | * - for now, object types are hard coded. | ||
| 10 | * - As long as we have live objects, we keep a reference to the inode. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/errno.h> | ||
| 14 | #include <linux/sched.h> | ||
| 15 | #include <linux/mutex.h> | ||
| 16 | #include <linux/file.h> | ||
| 17 | #include <asm/uaccess.h> | ||
| 18 | |||
| 19 | #include <litmus/fdso.h> | ||
| 20 | |||
| 21 | extern struct fdso_ops generic_lock_ops; | ||
| 22 | |||
| 23 | static const struct fdso_ops* fdso_ops[] = { | ||
| 24 | &generic_lock_ops, /* FMLP_SEM */ | ||
| 25 | &generic_lock_ops, /* SRP_SEM */ | ||
| 26 | &generic_lock_ops, /* MPCP_SEM */ | ||
| 27 | &generic_lock_ops, /* MPCP_VS_SEM */ | ||
| 28 | &generic_lock_ops, /* DPCP_SEM */ | ||
| 29 | &generic_lock_ops, /* PCP_SEM */ | ||
| 30 | &generic_lock_ops, /* DGL_SEM */ | ||
| 31 | }; | ||
| 32 | |||
| 33 | static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) | ||
| 34 | { | ||
| 35 | BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1); | ||
| 36 | |||
| 37 | if (fdso_ops[type]->create) | ||
| 38 | return fdso_ops[type]->create(obj_ref, type, config); | ||
| 39 | else | ||
| 40 | return -EINVAL; | ||
| 41 | } | ||
| 42 | |||
| 43 | static void fdso_destroy(obj_type_t type, void* obj) | ||
| 44 | { | ||
| 45 | fdso_ops[type]->destroy(type, obj); | ||
| 46 | } | ||
| 47 | |||
| 48 | static int fdso_open(struct od_table_entry* entry, void* __user config) | ||
| 49 | { | ||
| 50 | if (fdso_ops[entry->obj->type]->open) | ||
| 51 | return fdso_ops[entry->obj->type]->open(entry, config); | ||
| 52 | else | ||
| 53 | return 0; | ||
| 54 | } | ||
| 55 | |||
| 56 | static int fdso_close(struct od_table_entry* entry) | ||
| 57 | { | ||
| 58 | if (fdso_ops[entry->obj->type]->close) | ||
| 59 | return fdso_ops[entry->obj->type]->close(entry); | ||
| 60 | else | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | /* inode must be locked already */ | ||
| 65 | static int alloc_inode_obj(struct inode_obj_id** obj_ref, | ||
| 66 | struct inode* inode, | ||
| 67 | obj_type_t type, | ||
| 68 | unsigned int id, | ||
| 69 | void* __user config) | ||
| 70 | { | ||
| 71 | struct inode_obj_id* obj; | ||
| 72 | void* raw_obj; | ||
| 73 | int err; | ||
| 74 | |||
| 75 | obj = kmalloc(sizeof(*obj), GFP_KERNEL); | ||
| 76 | if (!obj) { | ||
| 77 | return -ENOMEM; | ||
| 78 | } | ||
| 79 | |||
| 80 | err = fdso_create(&raw_obj, type, config); | ||
| 81 | if (err != 0) { | ||
| 82 | kfree(obj); | ||
| 83 | return err; | ||
| 84 | } | ||
| 85 | |||
| 86 | INIT_LIST_HEAD(&obj->list); | ||
| 87 | atomic_set(&obj->count, 1); | ||
| 88 | obj->type = type; | ||
| 89 | obj->id = id; | ||
| 90 | obj->obj = raw_obj; | ||
| 91 | obj->inode = inode; | ||
| 92 | |||
| 93 | list_add(&obj->list, &inode->i_obj_list); | ||
| 94 | atomic_inc(&inode->i_count); | ||
| 95 | |||
| 96 | printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); | ||
| 97 | |||
| 98 | *obj_ref = obj; | ||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | /* inode must be locked already */ | ||
| 103 | static struct inode_obj_id* get_inode_obj(struct inode* inode, | ||
| 104 | obj_type_t type, | ||
| 105 | unsigned int id) | ||
| 106 | { | ||
| 107 | struct list_head* pos; | ||
| 108 | struct inode_obj_id* obj = NULL; | ||
| 109 | |||
| 110 | list_for_each(pos, &inode->i_obj_list) { | ||
| 111 | obj = list_entry(pos, struct inode_obj_id, list); | ||
| 112 | if (obj->id == id && obj->type == type) { | ||
| 113 | atomic_inc(&obj->count); | ||
| 114 | return obj; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); | ||
| 118 | return NULL; | ||
| 119 | } | ||
| 120 | |||
| 121 | |||
| 122 | static void put_inode_obj(struct inode_obj_id* obj) | ||
| 123 | { | ||
| 124 | struct inode* inode; | ||
| 125 | int let_go = 0; | ||
| 126 | |||
| 127 | inode = obj->inode; | ||
| 128 | if (atomic_dec_and_test(&obj->count)) { | ||
| 129 | |||
| 130 | mutex_lock(&inode->i_obj_mutex); | ||
| 131 | /* no new references can be obtained */ | ||
| 132 | if (!atomic_read(&obj->count)) { | ||
| 133 | list_del(&obj->list); | ||
| 134 | fdso_destroy(obj->type, obj->obj); | ||
| 135 | kfree(obj); | ||
| 136 | let_go = 1; | ||
| 137 | } | ||
| 138 | mutex_unlock(&inode->i_obj_mutex); | ||
| 139 | if (let_go) | ||
| 140 | iput(inode); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | static struct od_table_entry* get_od_entry(struct task_struct* t) | ||
| 145 | { | ||
| 146 | struct od_table_entry* table; | ||
| 147 | int i; | ||
| 148 | |||
| 149 | |||
| 150 | table = t->od_table; | ||
| 151 | if (!table) { | ||
| 152 | table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, | ||
| 153 | GFP_KERNEL); | ||
| 154 | t->od_table = table; | ||
| 155 | } | ||
| 156 | |||
| 157 | for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) | ||
| 158 | if (!table[i].used) { | ||
| 159 | table[i].used = 1; | ||
| 160 | return table + i; | ||
| 161 | } | ||
| 162 | return NULL; | ||
| 163 | } | ||
| 164 | |||
| 165 | static int put_od_entry(struct od_table_entry* od) | ||
| 166 | { | ||
| 167 | put_inode_obj(od->obj); | ||
| 168 | od->used = 0; | ||
| 169 | return 0; | ||
| 170 | } | ||
| 171 | |||
| 172 | static long close_od_entry(struct od_table_entry *od) | ||
| 173 | { | ||
| 174 | long ret; | ||
| 175 | |||
| 176 | /* Give the class a chance to reject the close. */ | ||
| 177 | ret = fdso_close(od); | ||
| 178 | if (ret == 0) | ||
| 179 | ret = put_od_entry(od); | ||
| 180 | |||
| 181 | return ret; | ||
| 182 | } | ||
| 183 | |||
| 184 | void exit_od_table(struct task_struct* t) | ||
| 185 | { | ||
| 186 | int i; | ||
| 187 | |||
| 188 | if (t->od_table) { | ||
| 189 | for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) | ||
| 190 | if (t->od_table[i].used) | ||
| 191 | close_od_entry(t->od_table + i); | ||
| 192 | kfree(t->od_table); | ||
| 193 | t->od_table = NULL; | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | static int do_sys_od_open(struct file* file, obj_type_t type, int id, | ||
| 198 | void* __user config) | ||
| 199 | { | ||
| 200 | int idx = 0, err = 0; | ||
| 201 | struct inode* inode; | ||
| 202 | struct inode_obj_id* obj = NULL; | ||
| 203 | struct od_table_entry* entry; | ||
| 204 | |||
| 205 | inode = file->f_dentry->d_inode; | ||
| 206 | |||
| 207 | entry = get_od_entry(current); | ||
| 208 | if (!entry) | ||
| 209 | return -ENOMEM; | ||
| 210 | |||
| 211 | mutex_lock(&inode->i_obj_mutex); | ||
| 212 | obj = get_inode_obj(inode, type, id); | ||
| 213 | if (!obj) | ||
| 214 | err = alloc_inode_obj(&obj, inode, type, id, config); | ||
| 215 | if (err != 0) { | ||
| 216 | obj = NULL; | ||
| 217 | idx = err; | ||
| 218 | entry->used = 0; | ||
| 219 | } else { | ||
| 220 | entry->obj = obj; | ||
| 221 | entry->class = fdso_ops[type]; | ||
| 222 | idx = entry - current->od_table; | ||
| 223 | } | ||
| 224 | |||
| 225 | mutex_unlock(&inode->i_obj_mutex); | ||
| 226 | |||
| 227 | /* open only if creation succeeded */ | ||
| 228 | if (!err) | ||
| 229 | err = fdso_open(entry, config); | ||
| 230 | if (err < 0) { | ||
| 231 | /* The class rejected the open call. | ||
| 232 | * We need to clean up and tell user space. | ||
| 233 | */ | ||
| 234 | if (obj) | ||
| 235 | put_od_entry(entry); | ||
| 236 | idx = err; | ||
| 237 | } | ||
| 238 | |||
| 239 | return idx; | ||
| 240 | } | ||
| 241 | |||
| 242 | struct od_table_entry* get_entry_for_od(int od) | ||
| 243 | { | ||
| 244 | struct task_struct *t = current; | ||
| 245 | |||
| 246 | if (!t->od_table) | ||
| 247 | return NULL; | ||
| 248 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
| 249 | return NULL; | ||
| 250 | if (!t->od_table[od].used) | ||
| 251 | return NULL; | ||
| 252 | return t->od_table + od; | ||
| 253 | } | ||
| 254 | |||
| 255 | asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config) | ||
| 256 | { | ||
| 257 | int ret = 0; | ||
| 258 | struct file* file; | ||
| 259 | |||
| 260 | /* | ||
| 261 | 1) get file from fd, get inode from file | ||
| 262 | 2) lock inode | ||
| 263 | 3) try to lookup object | ||
| 264 | 4) if not present create and enqueue object, inc inode refcnt | ||
| 265 | 5) increment refcnt of object | ||
| 266 | 6) alloc od_table_entry, setup ptrs | ||
| 267 | 7) unlock inode | ||
| 268 | 8) return offset in od_table as OD | ||
| 269 | */ | ||
| 270 | |||
| 271 | if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { | ||
| 272 | ret = -EINVAL; | ||
| 273 | goto out; | ||
| 274 | } | ||
| 275 | |||
| 276 | file = fget(fd); | ||
| 277 | if (!file) { | ||
| 278 | ret = -EBADF; | ||
| 279 | goto out; | ||
| 280 | } | ||
| 281 | |||
| 282 | ret = do_sys_od_open(file, type, obj_id, config); | ||
| 283 | |||
| 284 | fput(file); | ||
| 285 | |||
| 286 | out: | ||
| 287 | return ret; | ||
| 288 | } | ||
| 289 | |||
| 290 | |||
| 291 | asmlinkage long sys_od_close(int od) | ||
| 292 | { | ||
| 293 | int ret = -EINVAL; | ||
| 294 | struct task_struct *t = current; | ||
| 295 | |||
| 296 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
| 297 | return ret; | ||
| 298 | |||
| 299 | if (!t->od_table || !t->od_table[od].used) | ||
| 300 | return ret; | ||
| 301 | |||
| 302 | |||
| 303 | ret = close_od_entry(t->od_table + od); | ||
| 304 | |||
| 305 | return ret; | ||
| 306 | } | ||
diff --git a/litmus/fp_common.c b/litmus/fp_common.c new file mode 100644 index 00000000000..964a4729def --- /dev/null +++ b/litmus/fp_common.c | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | /* | ||
| 2 | * litmus/fp_common.c | ||
| 3 | * | ||
| 4 | * Common functions for fixed-priority scheduler. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/percpu.h> | ||
| 8 | #include <linux/sched.h> | ||
| 9 | #include <linux/list.h> | ||
| 10 | |||
| 11 | #include <litmus/litmus.h> | ||
| 12 | #include <litmus/sched_plugin.h> | ||
| 13 | #include <litmus/sched_trace.h> | ||
| 14 | |||
| 15 | #include <litmus/fp_common.h> | ||
| 16 | |||
| 17 | /* fp_higher_prio - returns true if first has a higher static priority | ||
| 18 | * than second. Ties are broken by PID. | ||
| 19 | * | ||
| 20 | * both first and second may be NULL | ||
| 21 | */ | ||
| 22 | int fp_higher_prio(struct task_struct* first, | ||
| 23 | struct task_struct* second) | ||
| 24 | { | ||
| 25 | struct task_struct *first_task = first; | ||
| 26 | struct task_struct *second_task = second; | ||
| 27 | |||
| 28 | /* There is no point in comparing a task to itself. */ | ||
| 29 | if (unlikely(first && first == second)) { | ||
| 30 | TRACE_TASK(first, | ||
| 31 | "WARNING: pointless FP priority comparison.\n"); | ||
| 32 | return 0; | ||
| 33 | } | ||
| 34 | |||
| 35 | |||
| 36 | /* check for NULL tasks */ | ||
| 37 | if (!first || !second) | ||
| 38 | return first && !second; | ||
| 39 | |||
| 40 | if (!is_realtime(second_task)) | ||
| 41 | return 1; | ||
| 42 | |||
| 43 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 44 | |||
| 45 | /* Check for inherited priorities. Change task | ||
| 46 | * used for comparison in such a case. | ||
| 47 | */ | ||
| 48 | if (unlikely(first->rt_param.inh_task)) | ||
| 49 | first_task = first->rt_param.inh_task; | ||
| 50 | if (unlikely(second->rt_param.inh_task)) | ||
| 51 | second_task = second->rt_param.inh_task; | ||
| 52 | |||
| 53 | /* Check for priority boosting. Tie-break by start of boosting. | ||
| 54 | */ | ||
| 55 | if (unlikely(is_priority_boosted(first_task))) { | ||
| 56 | /* first_task is boosted, how about second_task? */ | ||
| 57 | if (is_priority_boosted(second_task)) | ||
| 58 | /* break by priority point */ | ||
| 59 | return lt_before(get_boost_start(first_task), | ||
| 60 | get_boost_start(second_task)); | ||
| 61 | else | ||
| 62 | /* priority boosting wins. */ | ||
| 63 | return 1; | ||
| 64 | } else if (unlikely(is_priority_boosted(second_task))) | ||
| 65 | /* second_task is boosted, first is not*/ | ||
| 66 | return 0; | ||
| 67 | |||
| 68 | #endif | ||
| 69 | |||
| 70 | /* Comparisons to itself are not expected; priority inheritance | ||
| 71 | * should also not cause this to happen. */ | ||
| 72 | BUG_ON(first_task == second_task); | ||
| 73 | |||
| 74 | if (get_priority(first_task) < get_priority(second_task)) | ||
| 75 | return 1; | ||
| 76 | else if (get_priority(first_task) == get_priority(second_task)) | ||
| 77 | /* Break by PID. */ | ||
| 78 | return first_task->pid < second_task->pid; | ||
| 79 | else | ||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | int fp_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
| 84 | { | ||
| 85 | return fp_higher_prio(bheap2task(a), bheap2task(b)); | ||
| 86 | } | ||
| 87 | |||
| 88 | void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
| 89 | release_jobs_t release) | ||
| 90 | { | ||
| 91 | rt_domain_init(rt, fp_ready_order, resched, release); | ||
| 92 | } | ||
| 93 | |||
| 94 | /* need_to_preempt - check whether the task t needs to be preempted | ||
| 95 | */ | ||
| 96 | int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t) | ||
| 97 | { | ||
| 98 | struct task_struct *pending; | ||
| 99 | |||
| 100 | pending = fp_prio_peek(q); | ||
| 101 | |||
| 102 | if (!pending) | ||
| 103 | return 0; | ||
| 104 | if (!t) | ||
| 105 | return 1; | ||
| 106 | |||
| 107 | /* make sure to get non-rt stuff out of the way */ | ||
| 108 | return !is_realtime(t) || fp_higher_prio(pending, t); | ||
| 109 | } | ||
| 110 | |||
| 111 | void fp_prio_queue_init(struct fp_prio_queue* q) | ||
| 112 | { | ||
| 113 | int i; | ||
| 114 | |||
| 115 | for (i = 0; i < FP_PRIO_BIT_WORDS; i++) | ||
| 116 | q->bitmask[i] = 0; | ||
| 117 | for (i = 0; i < LITMUS_MAX_PRIORITY; i++) | ||
| 118 | bheap_init(&q->queue[i]); | ||
| 119 | } | ||
diff --git a/litmus/ft_event.c b/litmus/ft_event.c new file mode 100644 index 00000000000..399a07becca --- /dev/null +++ b/litmus/ft_event.c | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | #include <linux/types.h> | ||
| 2 | |||
| 3 | #include <litmus/feather_trace.h> | ||
| 4 | |||
| 5 | #if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA) | ||
| 6 | /* provide dummy implementation */ | ||
| 7 | |||
| 8 | int ft_events[MAX_EVENTS]; | ||
| 9 | |||
| 10 | int ft_enable_event(unsigned long id) | ||
| 11 | { | ||
| 12 | if (id < MAX_EVENTS) { | ||
| 13 | ft_events[id]++; | ||
| 14 | return 1; | ||
| 15 | } else | ||
| 16 | return 0; | ||
| 17 | } | ||
| 18 | |||
| 19 | int ft_disable_event(unsigned long id) | ||
| 20 | { | ||
| 21 | if (id < MAX_EVENTS && ft_events[id]) { | ||
| 22 | ft_events[id]--; | ||
| 23 | return 1; | ||
| 24 | } else | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | int ft_disable_all_events(void) | ||
| 29 | { | ||
| 30 | int i; | ||
| 31 | |||
| 32 | for (i = 0; i < MAX_EVENTS; i++) | ||
| 33 | ft_events[i] = 0; | ||
| 34 | |||
| 35 | return MAX_EVENTS; | ||
| 36 | } | ||
| 37 | |||
| 38 | int ft_is_event_enabled(unsigned long id) | ||
| 39 | { | ||
| 40 | return id < MAX_EVENTS && ft_events[id]; | ||
| 41 | } | ||
| 42 | |||
| 43 | #endif | ||
diff --git a/litmus/ftdev.c b/litmus/ftdev.c new file mode 100644 index 00000000000..99bc39ffbce --- /dev/null +++ b/litmus/ftdev.c | |||
| @@ -0,0 +1,446 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | #include <linux/fs.h> | ||
| 3 | #include <linux/slab.h> | ||
| 4 | #include <linux/cdev.h> | ||
| 5 | #include <asm/uaccess.h> | ||
| 6 | #include <linux/module.h> | ||
| 7 | #include <linux/device.h> | ||
| 8 | |||
| 9 | #include <litmus/litmus.h> | ||
| 10 | #include <litmus/feather_trace.h> | ||
| 11 | #include <litmus/ftdev.h> | ||
| 12 | |||
| 13 | struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size) | ||
| 14 | { | ||
| 15 | struct ft_buffer* buf; | ||
| 16 | size_t total = (size + 1) * count; | ||
| 17 | char* mem; | ||
| 18 | int order = 0, pages = 1; | ||
| 19 | |||
| 20 | buf = kmalloc(sizeof(*buf), GFP_KERNEL); | ||
| 21 | if (!buf) | ||
| 22 | return NULL; | ||
| 23 | |||
| 24 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
| 25 | while (pages < total) { | ||
| 26 | order++; | ||
| 27 | pages *= 2; | ||
| 28 | } | ||
| 29 | |||
| 30 | mem = (char*) __get_free_pages(GFP_KERNEL, order); | ||
| 31 | if (!mem) { | ||
| 32 | kfree(buf); | ||
| 33 | return NULL; | ||
| 34 | } | ||
| 35 | |||
| 36 | if (!init_ft_buffer(buf, count, size, | ||
| 37 | mem + (count * size), /* markers at the end */ | ||
| 38 | mem)) { /* buffer objects */ | ||
| 39 | free_pages((unsigned long) mem, order); | ||
| 40 | kfree(buf); | ||
| 41 | return NULL; | ||
| 42 | } | ||
| 43 | return buf; | ||
| 44 | } | ||
| 45 | |||
| 46 | void free_ft_buffer(struct ft_buffer* buf) | ||
| 47 | { | ||
| 48 | int order = 0, pages = 1; | ||
| 49 | size_t total; | ||
| 50 | |||
| 51 | if (buf) { | ||
| 52 | total = (buf->slot_size + 1) * buf->slot_count; | ||
| 53 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
| 54 | while (pages < total) { | ||
| 55 | order++; | ||
| 56 | pages *= 2; | ||
| 57 | } | ||
| 58 | free_pages((unsigned long) buf->buffer_mem, order); | ||
| 59 | kfree(buf); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | struct ftdev_event { | ||
| 64 | int id; | ||
| 65 | struct ftdev_event* next; | ||
| 66 | }; | ||
| 67 | |||
| 68 | static int activate(struct ftdev_event** chain, int id) | ||
| 69 | { | ||
| 70 | struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL); | ||
| 71 | if (ev) { | ||
| 72 | printk(KERN_INFO | ||
| 73 | "Enabling feather-trace event %d.\n", (int) id); | ||
| 74 | ft_enable_event(id); | ||
| 75 | ev->id = id; | ||
| 76 | ev->next = *chain; | ||
| 77 | *chain = ev; | ||
| 78 | } | ||
| 79 | return ev ? 0 : -ENOMEM; | ||
| 80 | } | ||
| 81 | |||
| 82 | static void deactivate(struct ftdev_event** chain, int id) | ||
| 83 | { | ||
| 84 | struct ftdev_event **cur = chain; | ||
| 85 | struct ftdev_event *nxt; | ||
| 86 | while (*cur) { | ||
| 87 | if ((*cur)->id == id) { | ||
| 88 | nxt = (*cur)->next; | ||
| 89 | kfree(*cur); | ||
| 90 | *cur = nxt; | ||
| 91 | printk(KERN_INFO | ||
| 92 | "Disabling feather-trace event %d.\n", (int) id); | ||
| 93 | ft_disable_event(id); | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | cur = &(*cur)->next; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | static int ftdev_open(struct inode *in, struct file *filp) | ||
| 101 | { | ||
| 102 | struct ftdev* ftdev; | ||
| 103 | struct ftdev_minor* ftdm; | ||
| 104 | unsigned int buf_idx = iminor(in); | ||
| 105 | int err = 0; | ||
| 106 | |||
| 107 | ftdev = container_of(in->i_cdev, struct ftdev, cdev); | ||
| 108 | |||
| 109 | if (buf_idx >= ftdev->minor_cnt) { | ||
| 110 | err = -ENODEV; | ||
| 111 | goto out; | ||
| 112 | } | ||
| 113 | if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx))) | ||
| 114 | goto out; | ||
| 115 | |||
| 116 | ftdm = ftdev->minor + buf_idx; | ||
| 117 | ftdm->ftdev = ftdev; | ||
| 118 | filp->private_data = ftdm; | ||
| 119 | |||
| 120 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
| 121 | err = -ERESTARTSYS; | ||
| 122 | goto out; | ||
| 123 | } | ||
| 124 | |||
| 125 | if (!ftdm->readers && ftdev->alloc) | ||
| 126 | err = ftdev->alloc(ftdev, buf_idx); | ||
| 127 | if (0 == err) | ||
| 128 | ftdm->readers++; | ||
| 129 | |||
| 130 | mutex_unlock(&ftdm->lock); | ||
| 131 | out: | ||
| 132 | return err; | ||
| 133 | } | ||
| 134 | |||
| 135 | static int ftdev_release(struct inode *in, struct file *filp) | ||
| 136 | { | ||
| 137 | struct ftdev* ftdev; | ||
| 138 | struct ftdev_minor* ftdm; | ||
| 139 | unsigned int buf_idx = iminor(in); | ||
| 140 | int err = 0; | ||
| 141 | |||
| 142 | ftdev = container_of(in->i_cdev, struct ftdev, cdev); | ||
| 143 | |||
| 144 | if (buf_idx >= ftdev->minor_cnt) { | ||
| 145 | err = -ENODEV; | ||
| 146 | goto out; | ||
| 147 | } | ||
| 148 | ftdm = ftdev->minor + buf_idx; | ||
| 149 | |||
| 150 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
| 151 | err = -ERESTARTSYS; | ||
| 152 | goto out; | ||
| 153 | } | ||
| 154 | |||
| 155 | if (ftdm->readers == 1) { | ||
| 156 | while (ftdm->events) | ||
| 157 | deactivate(&ftdm->events, ftdm->events->id); | ||
| 158 | |||
| 159 | /* wait for any pending events to complete */ | ||
| 160 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 161 | schedule_timeout(HZ); | ||
| 162 | |||
| 163 | printk(KERN_ALERT "Failed trace writes: %u\n", | ||
| 164 | ftdm->buf->failed_writes); | ||
| 165 | |||
| 166 | if (ftdev->free) | ||
| 167 | ftdev->free(ftdev, buf_idx); | ||
| 168 | } | ||
| 169 | |||
| 170 | ftdm->readers--; | ||
| 171 | mutex_unlock(&ftdm->lock); | ||
| 172 | out: | ||
| 173 | return err; | ||
| 174 | } | ||
| 175 | |||
| 176 | /* based on ft_buffer_read | ||
| 177 | * @returns < 0 : page fault | ||
| 178 | * = 0 : no data available | ||
| 179 | * = 1 : one slot copied | ||
| 180 | */ | ||
| 181 | static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest) | ||
| 182 | { | ||
| 183 | unsigned int idx; | ||
| 184 | int err = 0; | ||
| 185 | if (buf->free_count != buf->slot_count) { | ||
| 186 | /* data available */ | ||
| 187 | idx = buf->read_idx % buf->slot_count; | ||
| 188 | if (buf->slots[idx] == SLOT_READY) { | ||
| 189 | err = copy_to_user(dest, ((char*) buf->buffer_mem) + | ||
| 190 | idx * buf->slot_size, | ||
| 191 | buf->slot_size); | ||
| 192 | if (err == 0) { | ||
| 193 | /* copy ok */ | ||
| 194 | buf->slots[idx] = SLOT_FREE; | ||
| 195 | buf->read_idx++; | ||
| 196 | fetch_and_inc(&buf->free_count); | ||
| 197 | err = 1; | ||
| 198 | } | ||
| 199 | } | ||
| 200 | } | ||
| 201 | return err; | ||
| 202 | } | ||
| 203 | |||
| 204 | static ssize_t ftdev_read(struct file *filp, | ||
| 205 | char __user *to, size_t len, loff_t *f_pos) | ||
| 206 | { | ||
| 207 | /* we ignore f_pos, this is strictly sequential */ | ||
| 208 | |||
| 209 | ssize_t err = 0; | ||
| 210 | size_t chunk; | ||
| 211 | int copied; | ||
| 212 | struct ftdev_minor* ftdm = filp->private_data; | ||
| 213 | |||
| 214 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
| 215 | err = -ERESTARTSYS; | ||
| 216 | goto out; | ||
| 217 | } | ||
| 218 | |||
| 219 | |||
| 220 | chunk = ftdm->buf->slot_size; | ||
| 221 | while (len >= chunk) { | ||
| 222 | copied = ft_buffer_copy_to_user(ftdm->buf, to); | ||
| 223 | if (copied == 1) { | ||
| 224 | len -= chunk; | ||
| 225 | to += chunk; | ||
| 226 | err += chunk; | ||
| 227 | } else if (err == 0 && copied == 0 && ftdm->events) { | ||
| 228 | /* Only wait if there are any events enabled and only | ||
| 229 | * if we haven't copied some data yet. We cannot wait | ||
| 230 | * here with copied data because that data would get | ||
| 231 | * lost if the task is interrupted (e.g., killed). | ||
| 232 | */ | ||
| 233 | mutex_unlock(&ftdm->lock); | ||
| 234 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 235 | |||
| 236 | schedule_timeout(50); | ||
| 237 | |||
| 238 | if (signal_pending(current)) { | ||
| 239 | if (err == 0) | ||
| 240 | /* nothing read yet, signal problem */ | ||
| 241 | err = -ERESTARTSYS; | ||
| 242 | goto out; | ||
| 243 | } | ||
| 244 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
| 245 | err = -ERESTARTSYS; | ||
| 246 | goto out; | ||
| 247 | } | ||
| 248 | } else if (copied < 0) { | ||
| 249 | /* page fault */ | ||
| 250 | err = copied; | ||
| 251 | break; | ||
| 252 | } else | ||
| 253 | /* nothing left to get, return to user space */ | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | mutex_unlock(&ftdm->lock); | ||
| 257 | out: | ||
| 258 | return err; | ||
| 259 | } | ||
| 260 | |||
| 261 | static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
| 262 | { | ||
| 263 | long err = -ENOIOCTLCMD; | ||
| 264 | struct ftdev_minor* ftdm = filp->private_data; | ||
| 265 | |||
| 266 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
| 267 | err = -ERESTARTSYS; | ||
| 268 | goto out; | ||
| 269 | } | ||
| 270 | |||
| 271 | /* FIXME: check id against list of acceptable events */ | ||
| 272 | |||
| 273 | switch (cmd) { | ||
| 274 | case FTDEV_ENABLE_CMD: | ||
| 275 | if (activate(&ftdm->events, arg)) | ||
| 276 | err = -ENOMEM; | ||
| 277 | else | ||
| 278 | err = 0; | ||
| 279 | break; | ||
| 280 | |||
| 281 | case FTDEV_DISABLE_CMD: | ||
| 282 | deactivate(&ftdm->events, arg); | ||
| 283 | err = 0; | ||
| 284 | break; | ||
| 285 | |||
| 286 | default: | ||
| 287 | printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg); | ||
| 288 | }; | ||
| 289 | |||
| 290 | mutex_unlock(&ftdm->lock); | ||
| 291 | out: | ||
| 292 | return err; | ||
| 293 | } | ||
| 294 | |||
| 295 | static ssize_t ftdev_write(struct file *filp, const char __user *from, | ||
| 296 | size_t len, loff_t *f_pos) | ||
| 297 | { | ||
| 298 | struct ftdev_minor* ftdm = filp->private_data; | ||
| 299 | ssize_t err = -EINVAL; | ||
| 300 | struct ftdev* ftdev = ftdm->ftdev; | ||
| 301 | |||
| 302 | /* dispatch write to buffer-specific code, if available */ | ||
| 303 | if (ftdev->write) | ||
| 304 | err = ftdev->write(ftdm->buf, len, from); | ||
| 305 | |||
| 306 | return err; | ||
| 307 | } | ||
| 308 | |||
| 309 | struct file_operations ftdev_fops = { | ||
| 310 | .owner = THIS_MODULE, | ||
| 311 | .open = ftdev_open, | ||
| 312 | .release = ftdev_release, | ||
| 313 | .write = ftdev_write, | ||
| 314 | .read = ftdev_read, | ||
| 315 | .unlocked_ioctl = ftdev_ioctl, | ||
| 316 | }; | ||
| 317 | |||
| 318 | int ftdev_init( struct ftdev* ftdev, struct module* owner, | ||
| 319 | const int minor_cnt, const char* name) | ||
| 320 | { | ||
| 321 | int i, err; | ||
| 322 | |||
| 323 | BUG_ON(minor_cnt < 1); | ||
| 324 | |||
| 325 | cdev_init(&ftdev->cdev, &ftdev_fops); | ||
| 326 | ftdev->name = name; | ||
| 327 | ftdev->minor_cnt = minor_cnt; | ||
| 328 | ftdev->cdev.owner = owner; | ||
| 329 | ftdev->cdev.ops = &ftdev_fops; | ||
| 330 | ftdev->alloc = NULL; | ||
| 331 | ftdev->free = NULL; | ||
| 332 | ftdev->can_open = NULL; | ||
| 333 | ftdev->write = NULL; | ||
| 334 | |||
| 335 | ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor), | ||
| 336 | GFP_KERNEL); | ||
| 337 | if (!ftdev->minor) { | ||
| 338 | printk(KERN_WARNING "ftdev(%s): Could not allocate memory\n", | ||
| 339 | ftdev->name); | ||
| 340 | err = -ENOMEM; | ||
| 341 | goto err_out; | ||
| 342 | } | ||
| 343 | |||
| 344 | for (i = 0; i < ftdev->minor_cnt; i++) { | ||
| 345 | mutex_init(&ftdev->minor[i].lock); | ||
| 346 | ftdev->minor[i].readers = 0; | ||
| 347 | ftdev->minor[i].buf = NULL; | ||
| 348 | ftdev->minor[i].events = NULL; | ||
| 349 | } | ||
| 350 | |||
| 351 | ftdev->class = class_create(owner, ftdev->name); | ||
| 352 | if (IS_ERR(ftdev->class)) { | ||
| 353 | err = PTR_ERR(ftdev->class); | ||
| 354 | printk(KERN_WARNING "ftdev(%s): " | ||
| 355 | "Could not create device class.\n", ftdev->name); | ||
| 356 | goto err_dealloc; | ||
| 357 | } | ||
| 358 | |||
| 359 | return 0; | ||
| 360 | |||
| 361 | err_dealloc: | ||
| 362 | kfree(ftdev->minor); | ||
| 363 | err_out: | ||
| 364 | return err; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* | ||
| 368 | * Destroy minor devices up to, but not including, up_to. | ||
| 369 | */ | ||
| 370 | static void ftdev_device_destroy(struct ftdev* ftdev, unsigned int up_to) | ||
| 371 | { | ||
| 372 | dev_t minor_cntr; | ||
| 373 | |||
| 374 | if (up_to < 1) | ||
| 375 | up_to = (ftdev->minor_cnt < 1) ? 0 : ftdev->minor_cnt; | ||
| 376 | |||
| 377 | for (minor_cntr = 0; minor_cntr < up_to; ++minor_cntr) | ||
| 378 | device_destroy(ftdev->class, MKDEV(ftdev->major, minor_cntr)); | ||
| 379 | } | ||
| 380 | |||
| 381 | void ftdev_exit(struct ftdev* ftdev) | ||
| 382 | { | ||
| 383 | printk("ftdev(%s): Exiting\n", ftdev->name); | ||
| 384 | ftdev_device_destroy(ftdev, -1); | ||
| 385 | cdev_del(&ftdev->cdev); | ||
| 386 | unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt); | ||
| 387 | class_destroy(ftdev->class); | ||
| 388 | kfree(ftdev->minor); | ||
| 389 | } | ||
| 390 | |||
| 391 | int register_ftdev(struct ftdev* ftdev) | ||
| 392 | { | ||
| 393 | struct device **device; | ||
| 394 | dev_t trace_dev_tmp, minor_cntr; | ||
| 395 | int err; | ||
| 396 | |||
| 397 | err = alloc_chrdev_region(&trace_dev_tmp, 0, ftdev->minor_cnt, | ||
| 398 | ftdev->name); | ||
| 399 | if (err) { | ||
| 400 | printk(KERN_WARNING "ftdev(%s): " | ||
| 401 | "Could not allocate char. device region (%d minors)\n", | ||
| 402 | ftdev->name, ftdev->minor_cnt); | ||
| 403 | goto err_out; | ||
| 404 | } | ||
| 405 | |||
| 406 | ftdev->major = MAJOR(trace_dev_tmp); | ||
| 407 | |||
| 408 | err = cdev_add(&ftdev->cdev, trace_dev_tmp, ftdev->minor_cnt); | ||
| 409 | if (err) { | ||
| 410 | printk(KERN_WARNING "ftdev(%s): " | ||
| 411 | "Could not add cdev for major %u with %u minor(s).\n", | ||
| 412 | ftdev->name, ftdev->major, ftdev->minor_cnt); | ||
| 413 | goto err_unregister; | ||
| 414 | } | ||
| 415 | |||
| 416 | /* create the minor device(s) */ | ||
| 417 | for (minor_cntr = 0; minor_cntr < ftdev->minor_cnt; ++minor_cntr) | ||
| 418 | { | ||
| 419 | trace_dev_tmp = MKDEV(ftdev->major, minor_cntr); | ||
| 420 | device = &ftdev->minor[minor_cntr].device; | ||
| 421 | |||
| 422 | *device = device_create(ftdev->class, NULL, trace_dev_tmp, NULL, | ||
| 423 | "litmus/%s%d", ftdev->name, minor_cntr); | ||
| 424 | if (IS_ERR(*device)) { | ||
| 425 | err = PTR_ERR(*device); | ||
| 426 | printk(KERN_WARNING "ftdev(%s): " | ||
| 427 | "Could not create device major/minor number " | ||
| 428 | "%u/%u\n", ftdev->name, ftdev->major, | ||
| 429 | minor_cntr); | ||
| 430 | printk(KERN_WARNING "ftdev(%s): " | ||
| 431 | "will attempt deletion of allocated devices.\n", | ||
| 432 | ftdev->name); | ||
| 433 | goto err_minors; | ||
| 434 | } | ||
| 435 | } | ||
| 436 | |||
| 437 | return 0; | ||
| 438 | |||
| 439 | err_minors: | ||
| 440 | ftdev_device_destroy(ftdev, minor_cntr); | ||
| 441 | cdev_del(&ftdev->cdev); | ||
| 442 | err_unregister: | ||
| 443 | unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt); | ||
| 444 | err_out: | ||
| 445 | return err; | ||
| 446 | } | ||
diff --git a/litmus/jobs.c b/litmus/jobs.c new file mode 100644 index 00000000000..7bc75bba863 --- /dev/null +++ b/litmus/jobs.c | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /* litmus/jobs.c - common job control code | ||
| 2 | */ | ||
| 3 | |||
| 4 | #include <linux/sched.h> | ||
| 5 | |||
| 6 | #include <litmus/litmus.h> | ||
| 7 | #include <litmus/jobs.h> | ||
| 8 | #include <litmus/trace.h> | ||
| 9 | |||
| 10 | static inline void setup_release(struct task_struct *t, lt_t release) | ||
| 11 | { | ||
| 12 | /* prepare next release */ | ||
| 13 | tsk_rt(t)->job_params.release = release; | ||
| 14 | tsk_rt(t)->job_params.deadline += release + get_rt_period(t); | ||
| 15 | tsk_rt(t)->job_params.exec_time = 0; | ||
| 16 | /* update job sequence number */ | ||
| 17 | tsk_rt(t)->job_params.job_no++; | ||
| 18 | |||
| 19 | /* don't confuse Linux */ | ||
| 20 | t->rt.time_slice = 1; | ||
| 21 | } | ||
| 22 | |||
| 23 | void prepare_for_next_period(struct task_struct *t) | ||
| 24 | { | ||
| 25 | BUG_ON(!t); | ||
| 26 | |||
| 27 | /* Record lateness before we set up the next job's | ||
| 28 | * release and deadline. Lateness may be negative. | ||
| 29 | */ | ||
| 30 | t->rt_param.job_params.lateness = | ||
| 31 | (long long)litmus_clock() - | ||
| 32 | (long long)t->rt_param.job_params.deadline; | ||
| 33 | |||
| 34 | setup_release(t, get_release(t) + get_rt_period(t)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void release_at(struct task_struct *t, lt_t start) | ||
| 38 | { | ||
| 39 | BUG_ON(!t); | ||
| 40 | setup_release(t, start); | ||
| 41 | tsk_rt(t)->completed = 0; | ||
| 42 | } | ||
| 43 | |||
| 44 | |||
| 45 | /* | ||
| 46 | * Deactivate current task until the beginning of the next period. | ||
| 47 | */ | ||
| 48 | long complete_job(void) | ||
| 49 | { | ||
| 50 | lt_t amount; | ||
| 51 | lt_t now = litmus_clock(); | ||
| 52 | lt_t exec_time = tsk_rt(current)->job_params.exec_time; | ||
| 53 | |||
| 54 | tsk_rt(current)->tot_exec_time += exec_time; | ||
| 55 | if (lt_before(tsk_rt(current)->max_exec_time, exec_time)) | ||
| 56 | tsk_rt(current)->max_exec_time = exec_time; | ||
| 57 | |||
| 58 | if (is_tardy(current, now)) { | ||
| 59 | amount = now - get_deadline(current); | ||
| 60 | if (lt_after(amount, tsk_rt(current)->max_tardy)) | ||
| 61 | tsk_rt(current)->max_tardy = amount; | ||
| 62 | tsk_rt(current)->total_tardy += amount; | ||
| 63 | ++tsk_rt(current)->missed; | ||
| 64 | } | ||
| 65 | |||
| 66 | /* Mark that we do not excute anymore */ | ||
| 67 | tsk_rt(current)->completed = 1; | ||
| 68 | /* call schedule, this will return when a new job arrives | ||
| 69 | * it also takes care of preparing for the next release | ||
| 70 | */ | ||
| 71 | schedule(); | ||
| 72 | return 0; | ||
| 73 | } | ||
diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 00000000000..dc94be71bfb --- /dev/null +++ b/litmus/litmus.c | |||
| @@ -0,0 +1,579 @@ | |||
| 1 | /* | ||
| 2 | * litmus.c -- Implementation of the LITMUS syscalls, | ||
| 3 | * the LITMUS intialization code, | ||
| 4 | * and the procfs interface.. | ||
| 5 | */ | ||
| 6 | #include <asm/uaccess.h> | ||
| 7 | #include <linux/uaccess.h> | ||
| 8 | #include <linux/sysrq.h> | ||
| 9 | #include <linux/sched.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/reboot.h> | ||
| 13 | #include <linux/stop_machine.h> | ||
| 14 | |||
| 15 | #include <litmus/litmus.h> | ||
| 16 | #include <litmus/bheap.h> | ||
| 17 | #include <litmus/trace.h> | ||
| 18 | #include <litmus/rt_domain.h> | ||
| 19 | #include <litmus/litmus_proc.h> | ||
| 20 | #include <litmus/sched_trace.h> | ||
| 21 | |||
| 22 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 23 | #include <litmus/affinity.h> | ||
| 24 | #endif | ||
| 25 | |||
| 26 | /* Number of RT tasks that exist in the system */ | ||
| 27 | atomic_t rt_task_count = ATOMIC_INIT(0); | ||
| 28 | |||
| 29 | /* Give log messages sequential IDs. */ | ||
| 30 | atomic_t __log_seq_no = ATOMIC_INIT(0); | ||
| 31 | |||
| 32 | #ifdef CONFIG_RELEASE_MASTER | ||
| 33 | /* current master CPU for handling timer IRQs */ | ||
| 34 | atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); | ||
| 35 | #endif | ||
| 36 | |||
| 37 | static struct kmem_cache * bheap_node_cache; | ||
| 38 | extern struct kmem_cache * release_heap_cache; | ||
| 39 | |||
| 40 | struct bheap_node* bheap_node_alloc(int gfp_flags) | ||
| 41 | { | ||
| 42 | return kmem_cache_alloc(bheap_node_cache, gfp_flags); | ||
| 43 | } | ||
| 44 | |||
| 45 | void bheap_node_free(struct bheap_node* hn) | ||
| 46 | { | ||
| 47 | kmem_cache_free(bheap_node_cache, hn); | ||
| 48 | } | ||
| 49 | |||
| 50 | struct release_heap* release_heap_alloc(int gfp_flags); | ||
| 51 | void release_heap_free(struct release_heap* rh); | ||
| 52 | |||
| 53 | /* | ||
| 54 | * sys_set_task_rt_param | ||
| 55 | * @pid: Pid of the task which scheduling parameters must be changed | ||
| 56 | * @param: New real-time extension parameters such as the execution cost and | ||
| 57 | * period | ||
| 58 | * Syscall for manipulating with task rt extension params | ||
| 59 | * Returns EFAULT if param is NULL. | ||
| 60 | * ESRCH if pid is not corrsponding | ||
| 61 | * to a valid task. | ||
| 62 | * EINVAL if either period or execution cost is <=0 | ||
| 63 | * EPERM if pid is a real-time task | ||
| 64 | * 0 if success | ||
| 65 | * | ||
| 66 | * Only non-real-time tasks may be configured with this system call | ||
| 67 | * to avoid races with the scheduler. In practice, this means that a | ||
| 68 | * task's parameters must be set _before_ calling sys_prepare_rt_task() | ||
| 69 | * | ||
| 70 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
| 71 | * target. | ||
| 72 | */ | ||
| 73 | asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
| 74 | { | ||
| 75 | struct rt_task tp; | ||
| 76 | struct task_struct *target; | ||
| 77 | int retval = -EINVAL; | ||
| 78 | |||
| 79 | printk("Setting up rt task parameters for process %d.\n", pid); | ||
| 80 | |||
| 81 | if (pid < 0 || param == 0) { | ||
| 82 | goto out; | ||
| 83 | } | ||
| 84 | if (copy_from_user(&tp, param, sizeof(tp))) { | ||
| 85 | retval = -EFAULT; | ||
| 86 | goto out; | ||
| 87 | } | ||
| 88 | |||
| 89 | /* Task search and manipulation must be protected */ | ||
| 90 | read_lock_irq(&tasklist_lock); | ||
| 91 | if (!(target = find_task_by_vpid(pid))) { | ||
| 92 | retval = -ESRCH; | ||
| 93 | goto out_unlock; | ||
| 94 | } | ||
| 95 | |||
| 96 | if (is_realtime(target)) { | ||
| 97 | /* The task is already a real-time task. | ||
| 98 | * We cannot not allow parameter changes at this point. | ||
| 99 | */ | ||
| 100 | retval = -EBUSY; | ||
| 101 | goto out_unlock; | ||
| 102 | } | ||
| 103 | |||
| 104 | /* set relative deadline to be implicit if left unspecified */ | ||
| 105 | if (tp.relative_deadline == 0) | ||
| 106 | tp.relative_deadline = tp.period; | ||
| 107 | |||
| 108 | if (tp.exec_cost <= 0) | ||
| 109 | goto out_unlock; | ||
| 110 | if (tp.period <= 0) | ||
| 111 | goto out_unlock; | ||
| 112 | if (!cpu_online(tp.cpu)) | ||
| 113 | goto out_unlock; | ||
| 114 | if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/ | ||
| 115 | { | ||
| 116 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
| 117 | "because task density > 1.0\n", pid); | ||
| 118 | goto out_unlock; | ||
| 119 | } | ||
| 120 | if (tp.cls != RT_CLASS_HARD && | ||
| 121 | tp.cls != RT_CLASS_SOFT && | ||
| 122 | tp.cls != RT_CLASS_BEST_EFFORT) | ||
| 123 | { | ||
| 124 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
| 125 | "because its class is invalid\n", pid); | ||
| 126 | goto out_unlock; | ||
| 127 | } | ||
| 128 | if (tp.budget_policy != NO_ENFORCEMENT && | ||
| 129 | tp.budget_policy != QUANTUM_ENFORCEMENT && | ||
| 130 | tp.budget_policy != PRECISE_ENFORCEMENT) | ||
| 131 | { | ||
| 132 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
| 133 | "because unsupported budget enforcement policy " | ||
| 134 | "specified (%d)\n", | ||
| 135 | pid, tp.budget_policy); | ||
| 136 | goto out_unlock; | ||
| 137 | } | ||
| 138 | |||
| 139 | target->rt_param.task_params = tp; | ||
| 140 | |||
| 141 | retval = 0; | ||
| 142 | out_unlock: | ||
| 143 | read_unlock_irq(&tasklist_lock); | ||
| 144 | out: | ||
| 145 | return retval; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* | ||
| 149 | * Getter of task's RT params | ||
| 150 | * returns EINVAL if param or pid is NULL | ||
| 151 | * returns ESRCH if pid does not correspond to a valid task | ||
| 152 | * returns EFAULT if copying of parameters has failed. | ||
| 153 | * | ||
| 154 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
| 155 | * target. | ||
| 156 | */ | ||
| 157 | asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
| 158 | { | ||
| 159 | int retval = -EINVAL; | ||
| 160 | struct task_struct *source; | ||
| 161 | struct rt_task lp; | ||
| 162 | if (param == 0 || pid < 0) | ||
| 163 | goto out; | ||
| 164 | read_lock(&tasklist_lock); | ||
| 165 | if (!(source = find_task_by_vpid(pid))) { | ||
| 166 | retval = -ESRCH; | ||
| 167 | goto out_unlock; | ||
| 168 | } | ||
| 169 | lp = source->rt_param.task_params; | ||
| 170 | read_unlock(&tasklist_lock); | ||
| 171 | /* Do copying outside the lock */ | ||
| 172 | retval = | ||
| 173 | copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; | ||
| 174 | return retval; | ||
| 175 | out_unlock: | ||
| 176 | read_unlock(&tasklist_lock); | ||
| 177 | out: | ||
| 178 | return retval; | ||
| 179 | |||
| 180 | } | ||
| 181 | |||
| 182 | /* | ||
| 183 | * This is the crucial function for periodic task implementation, | ||
| 184 | * It checks if a task is periodic, checks if such kind of sleep | ||
| 185 | * is permitted and calls plugin-specific sleep, which puts the | ||
| 186 | * task into a wait array. | ||
| 187 | * returns 0 on successful wakeup | ||
| 188 | * returns EPERM if current conditions do not permit such sleep | ||
| 189 | * returns EINVAL if current task is not able to go to sleep | ||
| 190 | */ | ||
| 191 | asmlinkage long sys_complete_job(void) | ||
| 192 | { | ||
| 193 | int retval = -EPERM; | ||
| 194 | if (!is_realtime(current)) { | ||
| 195 | retval = -EINVAL; | ||
| 196 | goto out; | ||
| 197 | } | ||
| 198 | /* Task with negative or zero period cannot sleep */ | ||
| 199 | if (get_rt_period(current) <= 0) { | ||
| 200 | retval = -EINVAL; | ||
| 201 | goto out; | ||
| 202 | } | ||
| 203 | /* The plugin has to put the task into an | ||
| 204 | * appropriate queue and call schedule | ||
| 205 | */ | ||
| 206 | retval = litmus->complete_job(); | ||
| 207 | out: | ||
| 208 | return retval; | ||
| 209 | } | ||
| 210 | |||
| 211 | /* This is an "improved" version of sys_complete_job that | ||
| 212 | * addresses the problem of unintentionally missing a job after | ||
| 213 | * an overrun. | ||
| 214 | * | ||
| 215 | * returns 0 on successful wakeup | ||
| 216 | * returns EPERM if current conditions do not permit such sleep | ||
| 217 | * returns EINVAL if current task is not able to go to sleep | ||
| 218 | */ | ||
| 219 | asmlinkage long sys_wait_for_job_release(unsigned int job) | ||
| 220 | { | ||
| 221 | int retval = -EPERM; | ||
| 222 | if (!is_realtime(current)) { | ||
| 223 | retval = -EINVAL; | ||
| 224 | goto out; | ||
| 225 | } | ||
| 226 | |||
| 227 | /* Task with negative or zero period cannot sleep */ | ||
| 228 | if (get_rt_period(current) <= 0) { | ||
| 229 | retval = -EINVAL; | ||
| 230 | goto out; | ||
| 231 | } | ||
| 232 | |||
| 233 | retval = 0; | ||
| 234 | |||
| 235 | /* first wait until we have "reached" the desired job | ||
| 236 | * | ||
| 237 | * This implementation has at least two problems: | ||
| 238 | * | ||
| 239 | * 1) It doesn't gracefully handle the wrap around of | ||
| 240 | * job_no. Since LITMUS is a prototype, this is not much | ||
| 241 | * of a problem right now. | ||
| 242 | * | ||
| 243 | * 2) It is theoretically racy if a job release occurs | ||
| 244 | * between checking job_no and calling sleep_next_period(). | ||
| 245 | * A proper solution would requiring adding another callback | ||
| 246 | * in the plugin structure and testing the condition with | ||
| 247 | * interrupts disabled. | ||
| 248 | * | ||
| 249 | * FIXME: At least problem 2 should be taken care of eventually. | ||
| 250 | */ | ||
| 251 | while (!retval && job > current->rt_param.job_params.job_no) | ||
| 252 | /* If the last job overran then job <= job_no and we | ||
| 253 | * don't send the task to sleep. | ||
| 254 | */ | ||
| 255 | retval = litmus->complete_job(); | ||
| 256 | out: | ||
| 257 | return retval; | ||
| 258 | } | ||
| 259 | |||
| 260 | /* This is a helper syscall to query the current job sequence number. | ||
| 261 | * | ||
| 262 | * returns 0 on successful query | ||
| 263 | * returns EPERM if task is not a real-time task. | ||
| 264 | * returns EFAULT if &job is not a valid pointer. | ||
| 265 | */ | ||
| 266 | asmlinkage long sys_query_job_no(unsigned int __user *job) | ||
| 267 | { | ||
| 268 | int retval = -EPERM; | ||
| 269 | if (is_realtime(current)) | ||
| 270 | retval = put_user(current->rt_param.job_params.job_no, job); | ||
| 271 | |||
| 272 | return retval; | ||
| 273 | } | ||
| 274 | |||
| 275 | /* sys_null_call() is only used for determining raw system call | ||
| 276 | * overheads (kernel entry, kernel exit). It has no useful side effects. | ||
| 277 | * If ts is non-NULL, then the current Feather-Trace time is recorded. | ||
| 278 | */ | ||
| 279 | asmlinkage long sys_null_call(cycles_t __user *ts) | ||
| 280 | { | ||
| 281 | long ret = 0; | ||
| 282 | cycles_t now; | ||
| 283 | |||
| 284 | if (ts) { | ||
| 285 | now = get_cycles(); | ||
| 286 | ret = put_user(now, ts); | ||
| 287 | } | ||
| 288 | |||
| 289 | return ret; | ||
| 290 | } | ||
| 291 | |||
| 292 | /* p is a real-time task. Re-init its state as a best-effort task. */ | ||
| 293 | static void reinit_litmus_state(struct task_struct* p, int restore) | ||
| 294 | { | ||
| 295 | struct rt_task user_config = {}; | ||
| 296 | void* ctrl_page = NULL; | ||
| 297 | |||
| 298 | if (restore) { | ||
| 299 | /* Safe user-space provided configuration data. | ||
| 300 | * and allocated page. */ | ||
| 301 | user_config = p->rt_param.task_params; | ||
| 302 | ctrl_page = p->rt_param.ctrl_page; | ||
| 303 | } | ||
| 304 | |||
| 305 | /* We probably should not be inheriting any task's priority | ||
| 306 | * at this point in time. | ||
| 307 | */ | ||
| 308 | WARN_ON(p->rt_param.inh_task); | ||
| 309 | |||
| 310 | /* Cleanup everything else. */ | ||
| 311 | memset(&p->rt_param, 0, sizeof(p->rt_param)); | ||
| 312 | |||
| 313 | /* Restore preserved fields. */ | ||
| 314 | if (restore) { | ||
| 315 | p->rt_param.task_params = user_config; | ||
| 316 | p->rt_param.ctrl_page = ctrl_page; | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | long litmus_admit_task(struct task_struct* tsk) | ||
| 321 | { | ||
| 322 | long retval = 0; | ||
| 323 | |||
| 324 | BUG_ON(is_realtime(tsk)); | ||
| 325 | |||
| 326 | tsk_rt(tsk)->heap_node = NULL; | ||
| 327 | tsk_rt(tsk)->rel_heap = NULL; | ||
| 328 | |||
| 329 | if (get_rt_relative_deadline(tsk) == 0 || | ||
| 330 | get_exec_cost(tsk) > | ||
| 331 | min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) { | ||
| 332 | TRACE_TASK(tsk, | ||
| 333 | "litmus admit: invalid task parameters " | ||
| 334 | "(e = %lu, p = %lu, d = %lu)\n", | ||
| 335 | get_exec_cost(tsk), get_rt_period(tsk), | ||
| 336 | get_rt_relative_deadline(tsk)); | ||
| 337 | retval = -EINVAL; | ||
| 338 | goto out; | ||
| 339 | } | ||
| 340 | |||
| 341 | if (!cpu_online(get_partition(tsk))) { | ||
| 342 | TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", | ||
| 343 | get_partition(tsk)); | ||
| 344 | retval = -EINVAL; | ||
| 345 | goto out; | ||
| 346 | } | ||
| 347 | |||
| 348 | INIT_LIST_HEAD(&tsk_rt(tsk)->list); | ||
| 349 | |||
| 350 | /* allocate heap node for this task */ | ||
| 351 | tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC); | ||
| 352 | tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC); | ||
| 353 | |||
| 354 | if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) { | ||
| 355 | printk(KERN_WARNING "litmus: no more heap node memory!?\n"); | ||
| 356 | |||
| 357 | retval = -ENOMEM; | ||
| 358 | goto out; | ||
| 359 | } else { | ||
| 360 | bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); | ||
| 361 | } | ||
| 362 | |||
| 363 | preempt_disable(); | ||
| 364 | |||
| 365 | retval = litmus->admit_task(tsk); | ||
| 366 | |||
| 367 | if (!retval) { | ||
| 368 | sched_trace_task_name(tsk); | ||
| 369 | sched_trace_task_param(tsk); | ||
| 370 | atomic_inc(&rt_task_count); | ||
| 371 | } | ||
| 372 | |||
| 373 | preempt_enable(); | ||
| 374 | |||
| 375 | out: | ||
| 376 | if (retval) { | ||
| 377 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
| 378 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
| 379 | } | ||
| 380 | return retval; | ||
| 381 | } | ||
| 382 | |||
| 383 | void litmus_exit_task(struct task_struct* tsk) | ||
| 384 | { | ||
| 385 | if (is_realtime(tsk)) { | ||
| 386 | sched_trace_task_completion(tsk, 1); | ||
| 387 | |||
| 388 | litmus->task_exit(tsk); | ||
| 389 | |||
| 390 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); | ||
| 391 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
| 392 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
| 393 | |||
| 394 | atomic_dec(&rt_task_count); | ||
| 395 | reinit_litmus_state(tsk, 1); | ||
| 396 | } | ||
| 397 | } | ||
| 398 | |||
| 399 | static int do_plugin_switch(void *_plugin) | ||
| 400 | { | ||
| 401 | int ret; | ||
| 402 | struct sched_plugin* plugin = _plugin; | ||
| 403 | |||
| 404 | /* don't switch if there are active real-time tasks */ | ||
| 405 | if (atomic_read(&rt_task_count) == 0) { | ||
| 406 | ret = litmus->deactivate_plugin(); | ||
| 407 | if (0 != ret) | ||
| 408 | goto out; | ||
| 409 | ret = plugin->activate_plugin(); | ||
| 410 | if (0 != ret) { | ||
| 411 | printk(KERN_INFO "Can't activate %s (%d).\n", | ||
| 412 | plugin->plugin_name, ret); | ||
| 413 | plugin = &linux_sched_plugin; | ||
| 414 | } | ||
| 415 | printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); | ||
| 416 | litmus = plugin; | ||
| 417 | } else | ||
| 418 | ret = -EBUSY; | ||
| 419 | out: | ||
| 420 | return ret; | ||
| 421 | } | ||
| 422 | |||
| 423 | /* Switching a plugin in use is tricky. | ||
| 424 | * We must watch out that no real-time tasks exists | ||
| 425 | * (and that none is created in parallel) and that the plugin is not | ||
| 426 | * currently in use on any processor (in theory). | ||
| 427 | */ | ||
| 428 | int switch_sched_plugin(struct sched_plugin* plugin) | ||
| 429 | { | ||
| 430 | BUG_ON(!plugin); | ||
| 431 | |||
| 432 | if (atomic_read(&rt_task_count) == 0) | ||
| 433 | return stop_machine(do_plugin_switch, plugin, NULL); | ||
| 434 | else | ||
| 435 | return -EBUSY; | ||
| 436 | } | ||
| 437 | |||
| 438 | /* Called upon fork. | ||
| 439 | * p is the newly forked task. | ||
| 440 | */ | ||
| 441 | void litmus_fork(struct task_struct* p) | ||
| 442 | { | ||
| 443 | if (is_realtime(p)) { | ||
| 444 | /* clean out any litmus related state, don't preserve anything */ | ||
| 445 | reinit_litmus_state(p, 0); | ||
| 446 | /* Don't let the child be a real-time task. */ | ||
| 447 | p->sched_reset_on_fork = 1; | ||
| 448 | } else | ||
| 449 | /* non-rt tasks might have ctrl_page set */ | ||
| 450 | tsk_rt(p)->ctrl_page = NULL; | ||
| 451 | |||
| 452 | /* od tables are never inherited across a fork */ | ||
| 453 | p->od_table = NULL; | ||
| 454 | } | ||
| 455 | |||
| 456 | /* Called upon execve(). | ||
| 457 | * current is doing the exec. | ||
| 458 | * Don't let address space specific stuff leak. | ||
| 459 | */ | ||
| 460 | void litmus_exec(void) | ||
| 461 | { | ||
| 462 | struct task_struct* p = current; | ||
| 463 | |||
| 464 | if (is_realtime(p)) { | ||
| 465 | WARN_ON(p->rt_param.inh_task); | ||
| 466 | if (tsk_rt(p)->ctrl_page) { | ||
| 467 | free_page((unsigned long) tsk_rt(p)->ctrl_page); | ||
| 468 | tsk_rt(p)->ctrl_page = NULL; | ||
| 469 | } | ||
| 470 | } | ||
| 471 | } | ||
| 472 | |||
| 473 | void exit_litmus(struct task_struct *dead_tsk) | ||
| 474 | { | ||
| 475 | /* We also allow non-RT tasks to | ||
| 476 | * allocate control pages to allow | ||
| 477 | * measurements with non-RT tasks. | ||
| 478 | * So check if we need to free the page | ||
| 479 | * in any case. | ||
| 480 | */ | ||
| 481 | if (tsk_rt(dead_tsk)->ctrl_page) { | ||
| 482 | TRACE_TASK(dead_tsk, | ||
| 483 | "freeing ctrl_page %p\n", | ||
| 484 | tsk_rt(dead_tsk)->ctrl_page); | ||
| 485 | free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); | ||
| 486 | } | ||
| 487 | |||
| 488 | /* main cleanup only for RT tasks */ | ||
| 489 | if (is_realtime(dead_tsk)) | ||
| 490 | litmus_exit_task(dead_tsk); | ||
| 491 | } | ||
| 492 | |||
| 493 | |||
| 494 | #ifdef CONFIG_MAGIC_SYSRQ | ||
| 495 | int sys_kill(int pid, int sig); | ||
| 496 | |||
| 497 | static void sysrq_handle_kill_rt_tasks(int key) | ||
| 498 | { | ||
| 499 | struct task_struct *t; | ||
| 500 | read_lock(&tasklist_lock); | ||
| 501 | for_each_process(t) { | ||
| 502 | if (is_realtime(t)) { | ||
| 503 | sys_kill(t->pid, SIGKILL); | ||
| 504 | } | ||
| 505 | } | ||
| 506 | read_unlock(&tasklist_lock); | ||
| 507 | } | ||
| 508 | |||
| 509 | static struct sysrq_key_op sysrq_kill_rt_tasks_op = { | ||
| 510 | .handler = sysrq_handle_kill_rt_tasks, | ||
| 511 | .help_msg = "quit-rt-tasks(X)", | ||
| 512 | .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks", | ||
| 513 | }; | ||
| 514 | #endif | ||
| 515 | |||
| 516 | extern struct sched_plugin linux_sched_plugin; | ||
| 517 | |||
| 518 | static int litmus_shutdown_nb(struct notifier_block *unused1, | ||
| 519 | unsigned long unused2, void *unused3) | ||
| 520 | { | ||
| 521 | /* Attempt to switch back to regular Linux scheduling. | ||
| 522 | * Forces the active plugin to clean up. | ||
| 523 | */ | ||
| 524 | if (litmus != &linux_sched_plugin) { | ||
| 525 | int ret = switch_sched_plugin(&linux_sched_plugin); | ||
| 526 | if (ret) { | ||
| 527 | printk("Auto-shutdown of active Litmus plugin failed.\n"); | ||
| 528 | } | ||
| 529 | } | ||
| 530 | return NOTIFY_DONE; | ||
| 531 | } | ||
| 532 | |||
| 533 | static struct notifier_block shutdown_notifier = { | ||
| 534 | .notifier_call = litmus_shutdown_nb, | ||
| 535 | }; | ||
| 536 | |||
| 537 | static int __init _init_litmus(void) | ||
| 538 | { | ||
| 539 | /* Common initializers, | ||
| 540 | * mode change lock is used to enforce single mode change | ||
| 541 | * operation. | ||
| 542 | */ | ||
| 543 | printk("Starting LITMUS^RT kernel\n"); | ||
| 544 | |||
| 545 | register_sched_plugin(&linux_sched_plugin); | ||
| 546 | |||
| 547 | bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); | ||
| 548 | release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC); | ||
| 549 | |||
| 550 | #ifdef CONFIG_MAGIC_SYSRQ | ||
| 551 | /* offer some debugging help */ | ||
| 552 | if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op)) | ||
| 553 | printk("Registered kill rt tasks magic sysrq.\n"); | ||
| 554 | else | ||
| 555 | printk("Could not register kill rt tasks magic sysrq.\n"); | ||
| 556 | #endif | ||
| 557 | |||
| 558 | init_litmus_proc(); | ||
| 559 | |||
| 560 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 561 | init_topology(); | ||
| 562 | #endif | ||
| 563 | |||
| 564 | register_reboot_notifier(&shutdown_notifier); | ||
| 565 | |||
| 566 | return 0; | ||
| 567 | } | ||
| 568 | |||
| 569 | static void _exit_litmus(void) | ||
| 570 | { | ||
| 571 | unregister_reboot_notifier(&shutdown_notifier); | ||
| 572 | |||
| 573 | exit_litmus_proc(); | ||
| 574 | kmem_cache_destroy(bheap_node_cache); | ||
| 575 | kmem_cache_destroy(release_heap_cache); | ||
| 576 | } | ||
| 577 | |||
| 578 | module_init(_init_litmus); | ||
| 579 | module_exit(_exit_litmus); | ||
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c new file mode 100644 index 00000000000..4bf725a36c9 --- /dev/null +++ b/litmus/litmus_proc.c | |||
| @@ -0,0 +1,347 @@ | |||
| 1 | /* | ||
| 2 | * litmus_proc.c -- Implementation of the /proc/litmus directory tree. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #include <linux/sched.h> | ||
| 6 | #include <linux/uaccess.h> | ||
| 7 | |||
| 8 | #include <litmus/litmus.h> | ||
| 9 | #include <litmus/litmus_proc.h> | ||
| 10 | |||
| 11 | #include <litmus/clustered.h> | ||
| 12 | |||
| 13 | /* in litmus/litmus.c */ | ||
| 14 | extern atomic_t rt_task_count; | ||
| 15 | |||
| 16 | static struct proc_dir_entry *litmus_dir = NULL, | ||
| 17 | *curr_file = NULL, | ||
| 18 | *stat_file = NULL, | ||
| 19 | *plugs_dir = NULL, | ||
| 20 | #ifdef CONFIG_RELEASE_MASTER | ||
| 21 | *release_master_file = NULL, | ||
| 22 | #endif | ||
| 23 | *plugs_file = NULL; | ||
| 24 | |||
| 25 | /* in litmus/sync.c */ | ||
| 26 | int count_tasks_waiting_for_release(void); | ||
| 27 | |||
| 28 | static int proc_read_stats(char *page, char **start, | ||
| 29 | off_t off, int count, | ||
| 30 | int *eof, void *data) | ||
| 31 | { | ||
| 32 | int len; | ||
| 33 | |||
| 34 | len = snprintf(page, PAGE_SIZE, | ||
| 35 | "real-time tasks = %d\n" | ||
| 36 | "ready for release = %d\n", | ||
| 37 | atomic_read(&rt_task_count), | ||
| 38 | count_tasks_waiting_for_release()); | ||
| 39 | return len; | ||
| 40 | } | ||
| 41 | |||
| 42 | static int proc_read_plugins(char *page, char **start, | ||
| 43 | off_t off, int count, | ||
| 44 | int *eof, void *data) | ||
| 45 | { | ||
| 46 | int len; | ||
| 47 | |||
| 48 | len = print_sched_plugins(page, PAGE_SIZE); | ||
| 49 | return len; | ||
| 50 | } | ||
| 51 | |||
| 52 | static int proc_read_curr(char *page, char **start, | ||
| 53 | off_t off, int count, | ||
| 54 | int *eof, void *data) | ||
| 55 | { | ||
| 56 | int len; | ||
| 57 | |||
| 58 | len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name); | ||
| 59 | return len; | ||
| 60 | } | ||
| 61 | |||
| 62 | /* in litmus/litmus.c */ | ||
| 63 | int switch_sched_plugin(struct sched_plugin*); | ||
| 64 | |||
| 65 | static int proc_write_curr(struct file *file, | ||
| 66 | const char *buffer, | ||
| 67 | unsigned long count, | ||
| 68 | void *data) | ||
| 69 | { | ||
| 70 | int len, ret; | ||
| 71 | char name[65]; | ||
| 72 | struct sched_plugin* found; | ||
| 73 | |||
| 74 | len = copy_and_chomp(name, sizeof(name), buffer, count); | ||
| 75 | if (len < 0) | ||
| 76 | return len; | ||
| 77 | |||
| 78 | found = find_sched_plugin(name); | ||
| 79 | |||
| 80 | if (found) { | ||
| 81 | ret = switch_sched_plugin(found); | ||
| 82 | if (ret != 0) | ||
| 83 | printk(KERN_INFO "Could not switch plugin: %d\n", ret); | ||
| 84 | } else | ||
| 85 | printk(KERN_INFO "Plugin '%s' is unknown.\n", name); | ||
| 86 | |||
| 87 | return len; | ||
| 88 | } | ||
| 89 | |||
| 90 | #ifdef CONFIG_RELEASE_MASTER | ||
| 91 | static int proc_read_release_master(char *page, char **start, | ||
| 92 | off_t off, int count, | ||
| 93 | int *eof, void *data) | ||
| 94 | { | ||
| 95 | int len, master; | ||
| 96 | master = atomic_read(&release_master_cpu); | ||
| 97 | if (master == NO_CPU) | ||
| 98 | len = snprintf(page, PAGE_SIZE, "NO_CPU\n"); | ||
| 99 | else | ||
| 100 | len = snprintf(page, PAGE_SIZE, "%d\n", master); | ||
| 101 | return len; | ||
| 102 | } | ||
| 103 | |||
| 104 | static int proc_write_release_master(struct file *file, | ||
| 105 | const char *buffer, | ||
| 106 | unsigned long count, | ||
| 107 | void *data) | ||
| 108 | { | ||
| 109 | int cpu, err, len, online = 0; | ||
| 110 | char msg[64]; | ||
| 111 | |||
| 112 | len = copy_and_chomp(msg, sizeof(msg), buffer, count); | ||
| 113 | |||
| 114 | if (len < 0) | ||
| 115 | return len; | ||
| 116 | |||
| 117 | if (strcmp(msg, "NO_CPU") == 0) | ||
| 118 | atomic_set(&release_master_cpu, NO_CPU); | ||
| 119 | else { | ||
| 120 | err = sscanf(msg, "%d", &cpu); | ||
| 121 | if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) { | ||
| 122 | atomic_set(&release_master_cpu, cpu); | ||
| 123 | } else { | ||
| 124 | TRACE("invalid release master: '%s' " | ||
| 125 | "(err:%d cpu:%d online:%d)\n", | ||
| 126 | msg, err, cpu, online); | ||
| 127 | len = -EINVAL; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | return len; | ||
| 131 | } | ||
| 132 | #endif | ||
| 133 | |||
| 134 | int __init init_litmus_proc(void) | ||
| 135 | { | ||
| 136 | litmus_dir = proc_mkdir("litmus", NULL); | ||
| 137 | if (!litmus_dir) { | ||
| 138 | printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); | ||
| 139 | return -ENOMEM; | ||
| 140 | } | ||
| 141 | |||
| 142 | curr_file = create_proc_entry("active_plugin", | ||
| 143 | 0644, litmus_dir); | ||
| 144 | if (!curr_file) { | ||
| 145 | printk(KERN_ERR "Could not allocate active_plugin " | ||
| 146 | "procfs entry.\n"); | ||
| 147 | return -ENOMEM; | ||
| 148 | } | ||
| 149 | curr_file->read_proc = proc_read_curr; | ||
| 150 | curr_file->write_proc = proc_write_curr; | ||
| 151 | |||
| 152 | #ifdef CONFIG_RELEASE_MASTER | ||
| 153 | release_master_file = create_proc_entry("release_master", | ||
| 154 | 0644, litmus_dir); | ||
| 155 | if (!release_master_file) { | ||
| 156 | printk(KERN_ERR "Could not allocate release_master " | ||
| 157 | "procfs entry.\n"); | ||
| 158 | return -ENOMEM; | ||
| 159 | } | ||
| 160 | release_master_file->read_proc = proc_read_release_master; | ||
| 161 | release_master_file->write_proc = proc_write_release_master; | ||
| 162 | #endif | ||
| 163 | |||
| 164 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, | ||
| 165 | proc_read_stats, NULL); | ||
| 166 | |||
| 167 | plugs_dir = proc_mkdir("plugins", litmus_dir); | ||
| 168 | if (!plugs_dir){ | ||
| 169 | printk(KERN_ERR "Could not allocate plugins directory " | ||
| 170 | "procfs entry.\n"); | ||
| 171 | return -ENOMEM; | ||
| 172 | } | ||
| 173 | |||
| 174 | plugs_file = create_proc_read_entry("loaded", 0444, plugs_dir, | ||
| 175 | proc_read_plugins, NULL); | ||
| 176 | |||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | |||
| 180 | void exit_litmus_proc(void) | ||
| 181 | { | ||
| 182 | if (plugs_file) | ||
| 183 | remove_proc_entry("loaded", plugs_dir); | ||
| 184 | if (plugs_dir) | ||
| 185 | remove_proc_entry("plugins", litmus_dir); | ||
| 186 | if (stat_file) | ||
| 187 | remove_proc_entry("stats", litmus_dir); | ||
| 188 | if (curr_file) | ||
| 189 | remove_proc_entry("active_plugin", litmus_dir); | ||
| 190 | #ifdef CONFIG_RELEASE_MASTER | ||
| 191 | if (release_master_file) | ||
| 192 | remove_proc_entry("release_master", litmus_dir); | ||
| 193 | #endif | ||
| 194 | if (litmus_dir) | ||
| 195 | remove_proc_entry("litmus", NULL); | ||
| 196 | } | ||
| 197 | |||
| 198 | long make_plugin_proc_dir(struct sched_plugin* plugin, | ||
| 199 | struct proc_dir_entry** pde_in) | ||
| 200 | { | ||
| 201 | struct proc_dir_entry *pde_new = NULL; | ||
| 202 | long rv; | ||
| 203 | |||
| 204 | if (!plugin || !plugin->plugin_name){ | ||
| 205 | printk(KERN_ERR "Invalid plugin struct passed to %s.\n", | ||
| 206 | __func__); | ||
| 207 | rv = -EINVAL; | ||
| 208 | goto out_no_pde; | ||
| 209 | } | ||
| 210 | |||
| 211 | if (!plugs_dir){ | ||
| 212 | printk(KERN_ERR "Could not make plugin sub-directory, because " | ||
| 213 | "/proc/litmus/plugins does not exist.\n"); | ||
| 214 | rv = -ENOENT; | ||
| 215 | goto out_no_pde; | ||
| 216 | } | ||
| 217 | |||
| 218 | pde_new = proc_mkdir(plugin->plugin_name, plugs_dir); | ||
| 219 | if (!pde_new){ | ||
| 220 | printk(KERN_ERR "Could not make plugin sub-directory: " | ||
| 221 | "out of memory?.\n"); | ||
| 222 | rv = -ENOMEM; | ||
| 223 | goto out_no_pde; | ||
| 224 | } | ||
| 225 | |||
| 226 | rv = 0; | ||
| 227 | *pde_in = pde_new; | ||
| 228 | goto out_ok; | ||
| 229 | |||
| 230 | out_no_pde: | ||
| 231 | *pde_in = NULL; | ||
| 232 | out_ok: | ||
| 233 | return rv; | ||
| 234 | } | ||
| 235 | |||
| 236 | void remove_plugin_proc_dir(struct sched_plugin* plugin) | ||
| 237 | { | ||
| 238 | if (!plugin || !plugin->plugin_name){ | ||
| 239 | printk(KERN_ERR "Invalid plugin struct passed to %s.\n", | ||
| 240 | __func__); | ||
| 241 | return; | ||
| 242 | } | ||
| 243 | remove_proc_entry(plugin->plugin_name, plugs_dir); | ||
| 244 | } | ||
| 245 | |||
| 246 | |||
| 247 | |||
| 248 | /* misc. I/O helper functions */ | ||
| 249 | |||
| 250 | int copy_and_chomp(char *kbuf, unsigned long ksize, | ||
| 251 | __user const char* ubuf, unsigned long ulength) | ||
| 252 | { | ||
| 253 | /* caller must provide buffer space */ | ||
| 254 | BUG_ON(!ksize); | ||
| 255 | |||
| 256 | ksize--; /* leave space for null byte */ | ||
| 257 | |||
| 258 | if (ksize > ulength) | ||
| 259 | ksize = ulength; | ||
| 260 | |||
| 261 | if(copy_from_user(kbuf, ubuf, ksize)) | ||
| 262 | return -EFAULT; | ||
| 263 | |||
| 264 | kbuf[ksize] = '\0'; | ||
| 265 | |||
| 266 | /* chomp kbuf */ | ||
| 267 | if (ksize > 0 && kbuf[ksize - 1] == '\n') | ||
| 268 | kbuf[ksize - 1] = '\0'; | ||
| 269 | |||
| 270 | return ksize; | ||
| 271 | } | ||
| 272 | |||
| 273 | /* helper functions for clustered plugins */ | ||
| 274 | static const char* cache_level_names[] = { | ||
| 275 | "ALL", | ||
| 276 | "L1", | ||
| 277 | "L2", | ||
| 278 | "L3", | ||
| 279 | }; | ||
| 280 | |||
| 281 | int parse_cache_level(const char *cache_name, enum cache_level *level) | ||
| 282 | { | ||
| 283 | int err = -EINVAL; | ||
| 284 | int i; | ||
| 285 | /* do a quick and dirty comparison to find the cluster size */ | ||
| 286 | for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++) | ||
| 287 | if (!strcmp(cache_name, cache_level_names[i])) { | ||
| 288 | *level = (enum cache_level) i; | ||
| 289 | err = 0; | ||
| 290 | break; | ||
| 291 | } | ||
| 292 | return err; | ||
| 293 | } | ||
| 294 | |||
| 295 | const char* cache_level_name(enum cache_level level) | ||
| 296 | { | ||
| 297 | int idx = level; | ||
| 298 | |||
| 299 | if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER) | ||
| 300 | return cache_level_names[idx]; | ||
| 301 | else | ||
| 302 | return "INVALID"; | ||
| 303 | } | ||
| 304 | |||
| 305 | |||
| 306 | /* proc file interface to configure the cluster size */ | ||
| 307 | static int proc_read_cluster_size(char *page, char **start, | ||
| 308 | off_t off, int count, | ||
| 309 | int *eof, void *data) | ||
| 310 | { | ||
| 311 | return snprintf(page, PAGE_SIZE, "%s\n", | ||
| 312 | cache_level_name(*((enum cache_level*) data)));; | ||
| 313 | } | ||
| 314 | |||
| 315 | static int proc_write_cluster_size(struct file *file, | ||
| 316 | const char *buffer, | ||
| 317 | unsigned long count, | ||
| 318 | void *data) | ||
| 319 | { | ||
| 320 | int len; | ||
| 321 | char cache_name[8]; | ||
| 322 | |||
| 323 | len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count); | ||
| 324 | |||
| 325 | if (len > 0 && parse_cache_level(cache_name, (enum cache_level*) data)) | ||
| 326 | printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name); | ||
| 327 | |||
| 328 | return len; | ||
| 329 | } | ||
| 330 | |||
| 331 | struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent, | ||
| 332 | enum cache_level* level) | ||
| 333 | { | ||
| 334 | struct proc_dir_entry* cluster_file; | ||
| 335 | |||
| 336 | cluster_file = create_proc_entry("cluster", 0644, parent); | ||
| 337 | if (!cluster_file) { | ||
| 338 | printk(KERN_ERR "Could not allocate %s/cluster " | ||
| 339 | "procfs entry.\n", parent->name); | ||
| 340 | } else { | ||
| 341 | cluster_file->read_proc = proc_read_cluster_size; | ||
| 342 | cluster_file->write_proc = proc_write_cluster_size; | ||
| 343 | cluster_file->data = level; | ||
| 344 | } | ||
| 345 | return cluster_file; | ||
| 346 | } | ||
| 347 | |||
diff --git a/litmus/locking.c b/litmus/locking.c new file mode 100644 index 00000000000..1d46d148e9e --- /dev/null +++ b/litmus/locking.c | |||
| @@ -0,0 +1,236 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | #include <litmus/litmus.h> | ||
| 3 | #include <litmus/fdso.h> | ||
| 4 | |||
| 5 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 6 | |||
| 7 | #include <linux/sched.h> | ||
| 8 | #include <litmus/litmus.h> | ||
| 9 | #include <litmus/sched_plugin.h> | ||
| 10 | #include <litmus/trace.h> | ||
| 11 | #include <litmus/wait.h> | ||
| 12 | |||
| 13 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); | ||
| 14 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg); | ||
| 15 | static int close_generic_lock(struct od_table_entry* entry); | ||
| 16 | static void destroy_generic_lock(obj_type_t type, void* sem); | ||
| 17 | |||
| 18 | struct fdso_ops generic_lock_ops = { | ||
| 19 | .create = create_generic_lock, | ||
| 20 | .open = open_generic_lock, | ||
| 21 | .close = close_generic_lock, | ||
| 22 | .destroy = destroy_generic_lock | ||
| 23 | }; | ||
| 24 | |||
| 25 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg) | ||
| 26 | { | ||
| 27 | struct litmus_lock* lock; | ||
| 28 | int err; | ||
| 29 | |||
| 30 | err = litmus->allocate_lock(&lock, type, arg); | ||
| 31 | if (err == 0) | ||
| 32 | *obj_ref = lock; | ||
| 33 | return err; | ||
| 34 | } | ||
| 35 | |||
| 36 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg) | ||
| 37 | { | ||
| 38 | struct litmus_lock* lock = get_lock(entry); | ||
| 39 | if (lock->ops->open) | ||
| 40 | return lock->ops->open(lock, arg); | ||
| 41 | else | ||
| 42 | return 0; /* default: any task can open it */ | ||
| 43 | } | ||
| 44 | |||
| 45 | static int close_generic_lock(struct od_table_entry* entry) | ||
| 46 | { | ||
| 47 | struct litmus_lock* lock = get_lock(entry); | ||
| 48 | if (lock->ops->close) | ||
| 49 | return lock->ops->close(lock); | ||
| 50 | else | ||
| 51 | return 0; /* default: closing succeeds */ | ||
| 52 | } | ||
| 53 | |||
| 54 | static void destroy_generic_lock(obj_type_t type, void* obj) | ||
| 55 | { | ||
| 56 | struct litmus_lock* lock = (struct litmus_lock*) obj; | ||
| 57 | lock->ops->deallocate(lock); | ||
| 58 | } | ||
| 59 | |||
| 60 | asmlinkage long sys_dynamic_group_lock(resource_mask_t lock_ods) | ||
| 61 | { | ||
| 62 | long err = -EINVAL; | ||
| 63 | struct od_table_entry* entry; | ||
| 64 | struct litmus_lock* l; | ||
| 65 | |||
| 66 | TS_LOCK_START; | ||
| 67 | |||
| 68 | entry = get_entry_for_od(ffs(lock_ods)-1); | ||
| 69 | if (entry && is_lock(entry)) { | ||
| 70 | l = get_lock(entry); | ||
| 71 | if (l->type == DGL_SEM){ | ||
| 72 | err = l->ops->dynamic_group_lock(l, lock_ods); | ||
| 73 | }else{ | ||
| 74 | TRACE("Attempted to DG-lock type: %d\n", l->type); | ||
| 75 | } | ||
| 76 | } else { | ||
| 77 | TRACE_CUR("Attempted to lock invalid entry %d\n", entry); | ||
| 78 | } | ||
| 79 | |||
| 80 | TS_LOCK_END; | ||
| 81 | |||
| 82 | return err; | ||
| 83 | } | ||
| 84 | |||
| 85 | asmlinkage long sys_dynamic_group_unlock(resource_mask_t lock_ods) | ||
| 86 | { | ||
| 87 | long err = -EINVAL; | ||
| 88 | struct od_table_entry* entry; | ||
| 89 | struct litmus_lock* l; | ||
| 90 | |||
| 91 | TS_UNLOCK_START; | ||
| 92 | |||
| 93 | entry = get_entry_for_od(ffs(lock_ods)-1); | ||
| 94 | if (entry && is_lock(entry)) { | ||
| 95 | l = get_lock(entry); | ||
| 96 | if (l->type == DGL_SEM){ | ||
| 97 | err = l->ops->dynamic_group_unlock(l, lock_ods); | ||
| 98 | } else{ | ||
| 99 | TRACE_CUR("Attempted to DG-unlock type: %d\n", l->type); | ||
| 100 | } | ||
| 101 | } else { | ||
| 102 | TRACE_CUR("Attempted to unlock invalid entry %d\n", entry); | ||
| 103 | } | ||
| 104 | |||
| 105 | TS_UNLOCK_END; | ||
| 106 | |||
| 107 | return err; | ||
| 108 | } | ||
| 109 | |||
| 110 | asmlinkage long sys_litmus_lock(int lock_od) | ||
| 111 | { | ||
| 112 | long err = -EINVAL; | ||
| 113 | struct od_table_entry* entry; | ||
| 114 | struct litmus_lock* l; | ||
| 115 | |||
| 116 | TS_LOCK_START; | ||
| 117 | |||
| 118 | entry = get_entry_for_od(lock_od); | ||
| 119 | if (entry && is_lock(entry)) { | ||
| 120 | l = get_lock(entry); | ||
| 121 | TRACE_CUR("attempts to lock 0x%p\n", l); | ||
| 122 | err = l->ops->lock(l); | ||
| 123 | } | ||
| 124 | |||
| 125 | /* Note: task my have been suspended or preempted in between! Take | ||
| 126 | * this into account when computing overheads. */ | ||
| 127 | TS_LOCK_END; | ||
| 128 | |||
| 129 | TS_SYSCALL_OUT_START; | ||
| 130 | |||
| 131 | return err; | ||
| 132 | } | ||
| 133 | |||
| 134 | asmlinkage long sys_litmus_unlock(int lock_od) | ||
| 135 | { | ||
| 136 | long err = -EINVAL; | ||
| 137 | struct od_table_entry* entry; | ||
| 138 | struct litmus_lock* l; | ||
| 139 | |||
| 140 | TS_SYSCALL_IN_START; | ||
| 141 | |||
| 142 | TS_SYSCALL_IN_END; | ||
| 143 | |||
| 144 | TS_UNLOCK_START; | ||
| 145 | |||
| 146 | entry = get_entry_for_od(lock_od); | ||
| 147 | if (entry && is_lock(entry)) { | ||
| 148 | l = get_lock(entry); | ||
| 149 | TRACE_CUR("attempts to unlock 0x%p\n", l); | ||
| 150 | err = l->ops->unlock(l); | ||
| 151 | } | ||
| 152 | |||
| 153 | /* Note: task my have been preempted in between! Take this into | ||
| 154 | * account when computing overheads. */ | ||
| 155 | TS_UNLOCK_END; | ||
| 156 | |||
| 157 | TS_SYSCALL_OUT_START; | ||
| 158 | |||
| 159 | return err; | ||
| 160 | } | ||
| 161 | |||
| 162 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) | ||
| 163 | { | ||
| 164 | wait_queue_t* q; | ||
| 165 | struct task_struct* t = NULL; | ||
| 166 | |||
| 167 | if (waitqueue_active(wq)) { | ||
| 168 | q = list_entry(wq->task_list.next, | ||
| 169 | wait_queue_t, task_list); | ||
| 170 | t = (struct task_struct*) q->private; | ||
| 171 | __remove_wait_queue(wq, q); | ||
| 172 | } | ||
| 173 | return(t); | ||
| 174 | } | ||
| 175 | |||
| 176 | struct task_struct* __waitqueue_peek_first(wait_queue_head_t *wq) | ||
| 177 | { | ||
| 178 | wait_queue_t* q; | ||
| 179 | struct task_struct* t = NULL; | ||
| 180 | |||
| 181 | if (waitqueue_active(wq)) { | ||
| 182 | q = list_entry(wq->task_list.next, | ||
| 183 | wait_queue_t, task_list); | ||
| 184 | t = (struct task_struct*) q->private; | ||
| 185 | } | ||
| 186 | return(t); | ||
| 187 | } | ||
| 188 | |||
| 189 | unsigned int __add_wait_queue_prio_exclusive( | ||
| 190 | wait_queue_head_t* head, | ||
| 191 | prio_wait_queue_t *new) | ||
| 192 | { | ||
| 193 | struct list_head *pos; | ||
| 194 | unsigned int passed = 0; | ||
| 195 | |||
| 196 | new->wq.flags |= WQ_FLAG_EXCLUSIVE; | ||
| 197 | |||
| 198 | /* find a spot where the new entry is less than the next */ | ||
| 199 | list_for_each(pos, &head->task_list) { | ||
| 200 | prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t, | ||
| 201 | wq.task_list); | ||
| 202 | |||
| 203 | if (unlikely(lt_before(new->priority, queued->priority) || | ||
| 204 | (new->priority == queued->priority && | ||
| 205 | new->tie_breaker < queued->tie_breaker))) { | ||
| 206 | /* pos is not less than new, thus insert here */ | ||
| 207 | __list_add(&new->wq.task_list, pos->prev, pos); | ||
| 208 | goto out; | ||
| 209 | } | ||
| 210 | passed++; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* if we get to this point either the list is empty or every entry | ||
| 214 | * queued element is less than new. | ||
| 215 | * Let's add new to the end. */ | ||
| 216 | list_add_tail(&new->wq.task_list, &head->task_list); | ||
| 217 | out: | ||
| 218 | return passed; | ||
| 219 | } | ||
| 220 | |||
| 221 | |||
| 222 | #else | ||
| 223 | |||
| 224 | struct fdso_ops generic_lock_ops = {}; | ||
| 225 | |||
| 226 | asmlinkage long sys_litmus_lock(int sem_od) | ||
| 227 | { | ||
| 228 | return -ENOSYS; | ||
| 229 | } | ||
| 230 | |||
| 231 | asmlinkage long sys_litmus_unlock(int sem_od) | ||
| 232 | { | ||
| 233 | return -ENOSYS; | ||
| 234 | } | ||
| 235 | |||
| 236 | #endif | ||
diff --git a/litmus/preempt.c b/litmus/preempt.c new file mode 100644 index 00000000000..a202d70c627 --- /dev/null +++ b/litmus/preempt.c | |||
| @@ -0,0 +1,137 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | |||
| 3 | #include <litmus/litmus.h> | ||
| 4 | #include <litmus/preempt.h> | ||
| 5 | #include <litmus/trace.h> | ||
| 6 | |||
| 7 | /* The rescheduling state of each processor. | ||
| 8 | */ | ||
| 9 | DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); | ||
| 10 | |||
| 11 | void sched_state_will_schedule(struct task_struct* tsk) | ||
| 12 | { | ||
| 13 | /* Litmus hack: we only care about processor-local invocations of | ||
| 14 | * set_tsk_need_resched(). We can't reliably set the flag remotely | ||
| 15 | * since it might race with other updates to the scheduling state. We | ||
| 16 | * can't rely on the runqueue lock protecting updates to the sched | ||
| 17 | * state since processors do not acquire the runqueue locks for all | ||
| 18 | * updates to the sched state (to avoid acquiring two runqueue locks at | ||
| 19 | * the same time). Further, if tsk is residing on a remote processor, | ||
| 20 | * then that processor doesn't actually know yet that it is going to | ||
| 21 | * reschedule; it still must receive an IPI (unless a local invocation | ||
| 22 | * races). | ||
| 23 | */ | ||
| 24 | if (likely(task_cpu(tsk) == smp_processor_id())) { | ||
| 25 | VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE); | ||
| 26 | if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) | ||
| 27 | set_sched_state(PICKED_WRONG_TASK); | ||
| 28 | else | ||
| 29 | set_sched_state(WILL_SCHEDULE); | ||
| 30 | } /* else */ | ||
| 31 | /* /\* Litmus tasks should never be subject to a remote */ | ||
| 32 | /* * set_tsk_need_resched(). *\/ */ | ||
| 33 | /* BUG_ON(is_realtime(tsk)); */ | ||
| 34 | #ifdef CONFIG_PREEMPT_STATE_TRACE | ||
| 35 | TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", | ||
| 36 | __builtin_return_address(0)); | ||
| 37 | #endif | ||
| 38 | } | ||
| 39 | |||
| 40 | /* Called by the IPI handler after another CPU called smp_send_resched(). */ | ||
| 41 | void sched_state_ipi(void) | ||
| 42 | { | ||
| 43 | /* If the IPI was slow, we might be in any state right now. The IPI is | ||
| 44 | * only meaningful if we are in SHOULD_SCHEDULE. */ | ||
| 45 | if (is_in_sched_state(SHOULD_SCHEDULE)) { | ||
| 46 | /* Cause scheduler to be invoked. | ||
| 47 | * This will cause a transition to WILL_SCHEDULE. */ | ||
| 48 | set_tsk_need_resched(current); | ||
| 49 | TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", | ||
| 50 | current->comm, current->pid); | ||
| 51 | TS_SEND_RESCHED_END; | ||
| 52 | } else { | ||
| 53 | /* ignore */ | ||
| 54 | TRACE_STATE("ignoring IPI in state %x (%s)\n", | ||
| 55 | get_sched_state(), | ||
| 56 | sched_state_name(get_sched_state())); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | /* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must | ||
| 61 | * hold the lock that is used to serialize scheduling decisions. */ | ||
| 62 | void litmus_reschedule(int cpu) | ||
| 63 | { | ||
| 64 | int picked_transition_ok = 0; | ||
| 65 | int scheduled_transition_ok = 0; | ||
| 66 | |||
| 67 | /* The (remote) CPU could be in any state. */ | ||
| 68 | |||
| 69 | /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU | ||
| 70 | * is not aware of the need to reschedule at this point. */ | ||
| 71 | |||
| 72 | /* is a context switch in progress? */ | ||
| 73 | if (cpu_is_in_sched_state(cpu, TASK_PICKED)) | ||
| 74 | picked_transition_ok = sched_state_transition_on( | ||
| 75 | cpu, TASK_PICKED, PICKED_WRONG_TASK); | ||
| 76 | |||
| 77 | if (!picked_transition_ok && | ||
| 78 | cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { | ||
| 79 | /* We either raced with the end of the context switch, or the | ||
| 80 | * CPU was in TASK_SCHEDULED anyway. */ | ||
| 81 | scheduled_transition_ok = sched_state_transition_on( | ||
| 82 | cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); | ||
| 83 | } | ||
| 84 | |||
| 85 | /* If the CPU was in state TASK_SCHEDULED, then we need to cause the | ||
| 86 | * scheduler to be invoked. */ | ||
| 87 | if (scheduled_transition_ok) { | ||
| 88 | if (smp_processor_id() == cpu) | ||
| 89 | set_tsk_need_resched(current); | ||
| 90 | else { | ||
| 91 | TS_SEND_RESCHED_START(cpu); | ||
| 92 | smp_send_reschedule(cpu); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | TRACE_STATE("%s picked-ok:%d sched-ok:%d\n", | ||
| 97 | __FUNCTION__, | ||
| 98 | picked_transition_ok, | ||
| 99 | scheduled_transition_ok); | ||
| 100 | } | ||
| 101 | |||
| 102 | void litmus_reschedule_local(void) | ||
| 103 | { | ||
| 104 | if (is_in_sched_state(TASK_PICKED)) | ||
| 105 | set_sched_state(PICKED_WRONG_TASK); | ||
| 106 | else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) { | ||
| 107 | set_sched_state(WILL_SCHEDULE); | ||
| 108 | set_tsk_need_resched(current); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | #ifdef CONFIG_DEBUG_KERNEL | ||
| 113 | |||
| 114 | void sched_state_plugin_check(void) | ||
| 115 | { | ||
| 116 | if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) { | ||
| 117 | TRACE("!!!! plugin did not call sched_state_task_picked()!" | ||
| 118 | "Calling sched_state_task_picked() is mandatory---fix this.\n"); | ||
| 119 | set_sched_state(TASK_PICKED); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | #define NAME_CHECK(x) case x: return #x | ||
| 124 | const char* sched_state_name(int s) | ||
| 125 | { | ||
| 126 | switch (s) { | ||
| 127 | NAME_CHECK(TASK_SCHEDULED); | ||
| 128 | NAME_CHECK(SHOULD_SCHEDULE); | ||
| 129 | NAME_CHECK(WILL_SCHEDULE); | ||
| 130 | NAME_CHECK(TASK_PICKED); | ||
| 131 | NAME_CHECK(PICKED_WRONG_TASK); | ||
| 132 | default: | ||
| 133 | return "UNKNOWN"; | ||
| 134 | }; | ||
| 135 | } | ||
| 136 | |||
| 137 | #endif | ||
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c new file mode 100644 index 00000000000..1683d384756 --- /dev/null +++ b/litmus/rt_domain.c | |||
| @@ -0,0 +1,349 @@ | |||
| 1 | /* | ||
| 2 | * litmus/rt_domain.c | ||
| 3 | * | ||
| 4 | * LITMUS real-time infrastructure. This file contains the | ||
| 5 | * functions that manipulate RT domains. RT domains are an abstraction | ||
| 6 | * of a ready queue and a release queue. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/percpu.h> | ||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/list.h> | ||
| 12 | #include <linux/slab.h> | ||
| 13 | |||
| 14 | #include <litmus/litmus.h> | ||
| 15 | #include <litmus/sched_plugin.h> | ||
| 16 | #include <litmus/sched_trace.h> | ||
| 17 | |||
| 18 | #include <litmus/rt_domain.h> | ||
| 19 | |||
| 20 | #include <litmus/trace.h> | ||
| 21 | |||
| 22 | #include <litmus/bheap.h> | ||
| 23 | |||
| 24 | /* Uncomment when debugging timer races... */ | ||
| 25 | #if 0 | ||
| 26 | #define VTRACE_TASK TRACE_TASK | ||
| 27 | #define VTRACE TRACE | ||
| 28 | #else | ||
| 29 | #define VTRACE_TASK(t, fmt, args...) /* shut up */ | ||
| 30 | #define VTRACE(fmt, args...) /* be quiet already */ | ||
| 31 | #endif | ||
| 32 | |||
| 33 | static int dummy_resched(rt_domain_t *rt) | ||
| 34 | { | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | |||
| 38 | static int dummy_order(struct bheap_node* a, struct bheap_node* b) | ||
| 39 | { | ||
| 40 | return 0; | ||
| 41 | } | ||
| 42 | |||
| 43 | /* default implementation: use default lock */ | ||
| 44 | static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
| 45 | { | ||
| 46 | merge_ready(rt, tasks); | ||
| 47 | } | ||
| 48 | |||
| 49 | static unsigned int time2slot(lt_t time) | ||
| 50 | { | ||
| 51 | return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS; | ||
| 52 | } | ||
| 53 | |||
| 54 | static enum hrtimer_restart on_release_timer(struct hrtimer *timer) | ||
| 55 | { | ||
| 56 | unsigned long flags; | ||
| 57 | struct release_heap* rh; | ||
| 58 | rh = container_of(timer, struct release_heap, timer); | ||
| 59 | |||
| 60 | TS_RELEASE_LATENCY(rh->release_time); | ||
| 61 | |||
| 62 | VTRACE("on_release_timer(0x%p) starts.\n", timer); | ||
| 63 | |||
| 64 | TS_RELEASE_START; | ||
| 65 | |||
| 66 | |||
| 67 | raw_spin_lock_irqsave(&rh->dom->release_lock, flags); | ||
| 68 | VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); | ||
| 69 | /* remove from release queue */ | ||
| 70 | list_del(&rh->list); | ||
| 71 | raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags); | ||
| 72 | VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); | ||
| 73 | |||
| 74 | /* call release callback */ | ||
| 75 | rh->dom->release_jobs(rh->dom, &rh->heap); | ||
| 76 | /* WARNING: rh can be referenced from other CPUs from now on. */ | ||
| 77 | |||
| 78 | TS_RELEASE_END; | ||
| 79 | |||
| 80 | VTRACE("on_release_timer(0x%p) ends.\n", timer); | ||
| 81 | |||
| 82 | return HRTIMER_NORESTART; | ||
| 83 | } | ||
| 84 | |||
| 85 | /* allocated in litmus.c */ | ||
| 86 | struct kmem_cache * release_heap_cache; | ||
| 87 | |||
| 88 | struct release_heap* release_heap_alloc(int gfp_flags) | ||
| 89 | { | ||
| 90 | struct release_heap* rh; | ||
| 91 | rh= kmem_cache_alloc(release_heap_cache, gfp_flags); | ||
| 92 | if (rh) { | ||
| 93 | /* initialize timer */ | ||
| 94 | hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
| 95 | rh->timer.function = on_release_timer; | ||
| 96 | } | ||
| 97 | return rh; | ||
| 98 | } | ||
| 99 | |||
| 100 | void release_heap_free(struct release_heap* rh) | ||
| 101 | { | ||
| 102 | /* make sure timer is no longer in use */ | ||
| 103 | hrtimer_cancel(&rh->timer); | ||
| 104 | kmem_cache_free(release_heap_cache, rh); | ||
| 105 | } | ||
| 106 | |||
| 107 | /* Caller must hold release lock. | ||
| 108 | * Will return heap for given time. If no such heap exists prior to | ||
| 109 | * the invocation it will be created. | ||
| 110 | */ | ||
| 111 | static struct release_heap* get_release_heap(rt_domain_t *rt, | ||
| 112 | struct task_struct* t, | ||
| 113 | int use_task_heap) | ||
| 114 | { | ||
| 115 | struct list_head* pos; | ||
| 116 | struct release_heap* heap = NULL; | ||
| 117 | struct release_heap* rh; | ||
| 118 | lt_t release_time = get_release(t); | ||
| 119 | unsigned int slot = time2slot(release_time); | ||
| 120 | |||
| 121 | /* initialize pos for the case that the list is empty */ | ||
| 122 | pos = rt->release_queue.slot[slot].next; | ||
| 123 | list_for_each(pos, &rt->release_queue.slot[slot]) { | ||
| 124 | rh = list_entry(pos, struct release_heap, list); | ||
| 125 | if (release_time == rh->release_time) { | ||
| 126 | /* perfect match -- this happens on hyperperiod | ||
| 127 | * boundaries | ||
| 128 | */ | ||
| 129 | heap = rh; | ||
| 130 | break; | ||
| 131 | } else if (lt_before(release_time, rh->release_time)) { | ||
| 132 | /* we need to insert a new node since rh is | ||
| 133 | * already in the future | ||
| 134 | */ | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | if (!heap && use_task_heap) { | ||
| 139 | /* use pre-allocated release heap */ | ||
| 140 | rh = tsk_rt(t)->rel_heap; | ||
| 141 | |||
| 142 | rh->dom = rt; | ||
| 143 | rh->release_time = release_time; | ||
| 144 | |||
| 145 | /* add to release queue */ | ||
| 146 | list_add(&rh->list, pos->prev); | ||
| 147 | heap = rh; | ||
| 148 | } | ||
| 149 | return heap; | ||
| 150 | } | ||
| 151 | |||
| 152 | static void reinit_release_heap(struct task_struct* t) | ||
| 153 | { | ||
| 154 | struct release_heap* rh; | ||
| 155 | |||
| 156 | /* use pre-allocated release heap */ | ||
| 157 | rh = tsk_rt(t)->rel_heap; | ||
| 158 | |||
| 159 | /* Make sure it is safe to use. The timer callback could still | ||
| 160 | * be executing on another CPU; hrtimer_cancel() will wait | ||
| 161 | * until the timer callback has completed. However, under no | ||
| 162 | * circumstances should the timer be active (= yet to be | ||
| 163 | * triggered). | ||
| 164 | * | ||
| 165 | * WARNING: If the CPU still holds the release_lock at this point, | ||
| 166 | * deadlock may occur! | ||
| 167 | */ | ||
| 168 | BUG_ON(hrtimer_cancel(&rh->timer)); | ||
| 169 | |||
| 170 | /* initialize */ | ||
| 171 | bheap_init(&rh->heap); | ||
| 172 | #ifdef CONFIG_RELEASE_MASTER | ||
| 173 | atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); | ||
| 174 | #endif | ||
| 175 | } | ||
| 176 | /* arm_release_timer() - start local release timer or trigger | ||
| 177 | * remote timer (pull timer) | ||
| 178 | * | ||
| 179 | * Called by add_release() with: | ||
| 180 | * - tobe_lock taken | ||
| 181 | * - IRQ disabled | ||
| 182 | */ | ||
| 183 | #ifdef CONFIG_RELEASE_MASTER | ||
| 184 | #define arm_release_timer(t) arm_release_timer_on((t), NO_CPU) | ||
| 185 | static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu) | ||
| 186 | #else | ||
| 187 | static void arm_release_timer(rt_domain_t *_rt) | ||
| 188 | #endif | ||
| 189 | { | ||
| 190 | rt_domain_t *rt = _rt; | ||
| 191 | struct list_head list; | ||
| 192 | struct list_head *pos, *safe; | ||
| 193 | struct task_struct* t; | ||
| 194 | struct release_heap* rh; | ||
| 195 | |||
| 196 | VTRACE("arm_release_timer() at %llu\n", litmus_clock()); | ||
| 197 | list_replace_init(&rt->tobe_released, &list); | ||
| 198 | |||
| 199 | list_for_each_safe(pos, safe, &list) { | ||
| 200 | /* pick task of work list */ | ||
| 201 | t = list_entry(pos, struct task_struct, rt_param.list); | ||
| 202 | sched_trace_task_release(t); | ||
| 203 | list_del(pos); | ||
| 204 | |||
| 205 | /* put into release heap while holding release_lock */ | ||
| 206 | raw_spin_lock(&rt->release_lock); | ||
| 207 | VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); | ||
| 208 | |||
| 209 | rh = get_release_heap(rt, t, 0); | ||
| 210 | if (!rh) { | ||
| 211 | /* need to use our own, but drop lock first */ | ||
| 212 | raw_spin_unlock(&rt->release_lock); | ||
| 213 | VTRACE_TASK(t, "Dropped release_lock 0x%p\n", | ||
| 214 | &rt->release_lock); | ||
| 215 | |||
| 216 | reinit_release_heap(t); | ||
| 217 | VTRACE_TASK(t, "release_heap ready\n"); | ||
| 218 | |||
| 219 | raw_spin_lock(&rt->release_lock); | ||
| 220 | VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n", | ||
| 221 | &rt->release_lock); | ||
| 222 | |||
| 223 | rh = get_release_heap(rt, t, 1); | ||
| 224 | } | ||
| 225 | bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); | ||
| 226 | VTRACE_TASK(t, "arm_release_timer(): added to release heap\n"); | ||
| 227 | |||
| 228 | raw_spin_unlock(&rt->release_lock); | ||
| 229 | VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); | ||
| 230 | |||
| 231 | /* To avoid arming the timer multiple times, we only let the | ||
| 232 | * owner do the arming (which is the "first" task to reference | ||
| 233 | * this release_heap anyway). | ||
| 234 | */ | ||
| 235 | if (rh == tsk_rt(t)->rel_heap) { | ||
| 236 | VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer); | ||
| 237 | /* we cannot arm the timer using hrtimer_start() | ||
| 238 | * as it may deadlock on rq->lock | ||
| 239 | * | ||
| 240 | * PINNED mode is ok on both local and remote CPU | ||
| 241 | */ | ||
| 242 | #ifdef CONFIG_RELEASE_MASTER | ||
| 243 | if (rt->release_master == NO_CPU && | ||
| 244 | target_cpu == NO_CPU) | ||
| 245 | #endif | ||
| 246 | __hrtimer_start_range_ns(&rh->timer, | ||
| 247 | ns_to_ktime(rh->release_time), | ||
| 248 | 0, HRTIMER_MODE_ABS_PINNED, 0); | ||
| 249 | #ifdef CONFIG_RELEASE_MASTER | ||
| 250 | else | ||
| 251 | hrtimer_start_on( | ||
| 252 | /* target_cpu overrides release master */ | ||
| 253 | (target_cpu != NO_CPU ? | ||
| 254 | target_cpu : rt->release_master), | ||
| 255 | &rh->info, &rh->timer, | ||
| 256 | ns_to_ktime(rh->release_time), | ||
| 257 | HRTIMER_MODE_ABS_PINNED); | ||
| 258 | #endif | ||
| 259 | } else | ||
| 260 | VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); | ||
| 261 | } | ||
| 262 | } | ||
| 263 | |||
| 264 | void rt_domain_init(rt_domain_t *rt, | ||
| 265 | bheap_prio_t order, | ||
| 266 | check_resched_needed_t check, | ||
| 267 | release_jobs_t release | ||
| 268 | ) | ||
| 269 | { | ||
| 270 | int i; | ||
| 271 | |||
| 272 | BUG_ON(!rt); | ||
| 273 | if (!check) | ||
| 274 | check = dummy_resched; | ||
| 275 | if (!release) | ||
| 276 | release = default_release_jobs; | ||
| 277 | if (!order) | ||
| 278 | order = dummy_order; | ||
| 279 | |||
| 280 | #ifdef CONFIG_RELEASE_MASTER | ||
| 281 | rt->release_master = NO_CPU; | ||
| 282 | #endif | ||
| 283 | |||
| 284 | bheap_init(&rt->ready_queue); | ||
| 285 | INIT_LIST_HEAD(&rt->tobe_released); | ||
| 286 | for (i = 0; i < RELEASE_QUEUE_SLOTS; i++) | ||
| 287 | INIT_LIST_HEAD(&rt->release_queue.slot[i]); | ||
| 288 | |||
| 289 | raw_spin_lock_init(&rt->ready_lock); | ||
| 290 | raw_spin_lock_init(&rt->release_lock); | ||
| 291 | raw_spin_lock_init(&rt->tobe_lock); | ||
| 292 | |||
| 293 | rt->check_resched = check; | ||
| 294 | rt->release_jobs = release; | ||
| 295 | rt->order = order; | ||
| 296 | } | ||
| 297 | |||
| 298 | /* add_ready - add a real-time task to the rt ready queue. It must be runnable. | ||
| 299 | * @new: the newly released task | ||
| 300 | */ | ||
| 301 | void __add_ready(rt_domain_t* rt, struct task_struct *new) | ||
| 302 | { | ||
| 303 | TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu " | ||
| 304 | "to ready queue at %llu\n", | ||
| 305 | new->comm, new->pid, | ||
| 306 | get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), | ||
| 307 | get_release(new), litmus_clock()); | ||
| 308 | |||
| 309 | BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); | ||
| 310 | |||
| 311 | bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node); | ||
| 312 | rt->check_resched(rt); | ||
| 313 | } | ||
| 314 | |||
| 315 | /* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable. | ||
| 316 | * @tasks - the newly released tasks | ||
| 317 | */ | ||
| 318 | void __merge_ready(rt_domain_t* rt, struct bheap* tasks) | ||
| 319 | { | ||
| 320 | bheap_union(rt->order, &rt->ready_queue, tasks); | ||
| 321 | rt->check_resched(rt); | ||
| 322 | } | ||
| 323 | |||
| 324 | |||
| 325 | #ifdef CONFIG_RELEASE_MASTER | ||
| 326 | void __add_release_on(rt_domain_t* rt, struct task_struct *task, | ||
| 327 | int target_cpu) | ||
| 328 | { | ||
| 329 | TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n", | ||
| 330 | get_release(task), target_cpu); | ||
| 331 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
| 332 | task->rt_param.domain = rt; | ||
| 333 | |||
| 334 | arm_release_timer_on(rt, target_cpu); | ||
| 335 | } | ||
| 336 | #endif | ||
| 337 | |||
| 338 | /* add_release - add a real-time task to the rt release queue. | ||
| 339 | * @task: the sleeping task | ||
| 340 | */ | ||
| 341 | void __add_release(rt_domain_t* rt, struct task_struct *task) | ||
| 342 | { | ||
| 343 | TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); | ||
| 344 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
| 345 | task->rt_param.domain = rt; | ||
| 346 | |||
| 347 | arm_release_timer(rt); | ||
| 348 | } | ||
| 349 | |||
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c new file mode 100644 index 00000000000..6e1327bbf50 --- /dev/null +++ b/litmus/sched_cedf.c | |||
| @@ -0,0 +1,857 @@ | |||
| 1 | /* | ||
| 2 | * litmus/sched_cedf.c | ||
| 3 | * | ||
| 4 | * Implementation of the C-EDF scheduling algorithm. | ||
| 5 | * | ||
| 6 | * This implementation is based on G-EDF: | ||
| 7 | * - CPUs are clustered around L2 or L3 caches. | ||
| 8 | * - Clusters topology is automatically detected (this is arch dependent | ||
| 9 | * and is working only on x86 at the moment --- and only with modern | ||
| 10 | * cpus that exports cpuid4 information) | ||
| 11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
| 12 | * the programmer needs to be aware of the topology to place tasks | ||
| 13 | * in the desired cluster | ||
| 14 | * - default clustering is around L2 cache (cache index = 2) | ||
| 15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
| 16 | * online_cpus are placed in a single cluster). | ||
| 17 | * | ||
| 18 | * For details on functions, take a look at sched_gsn_edf.c | ||
| 19 | * | ||
| 20 | * Currently, we do not support changes in the number of online cpus. | ||
| 21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
| 22 | * | ||
| 23 | * This version uses the simple approach and serializes all scheduling | ||
| 24 | * decisions by the use of a queue lock. This is probably not the | ||
| 25 | * best way to do it, but it should suffice for now. | ||
| 26 | */ | ||
| 27 | |||
| 28 | #include <linux/spinlock.h> | ||
| 29 | #include <linux/percpu.h> | ||
| 30 | #include <linux/sched.h> | ||
| 31 | #include <linux/slab.h> | ||
| 32 | |||
| 33 | #include <linux/module.h> | ||
| 34 | |||
| 35 | #include <litmus/litmus.h> | ||
| 36 | #include <litmus/jobs.h> | ||
| 37 | #include <litmus/preempt.h> | ||
| 38 | #include <litmus/budget.h> | ||
| 39 | #include <litmus/sched_plugin.h> | ||
| 40 | #include <litmus/edf_common.h> | ||
| 41 | #include <litmus/sched_trace.h> | ||
| 42 | |||
| 43 | #include <litmus/clustered.h> | ||
| 44 | |||
| 45 | #include <litmus/bheap.h> | ||
| 46 | |||
| 47 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 48 | #include <litmus/affinity.h> | ||
| 49 | #endif | ||
| 50 | |||
| 51 | /* to configure the cluster size */ | ||
| 52 | #include <litmus/litmus_proc.h> | ||
| 53 | #include <linux/uaccess.h> | ||
| 54 | |||
| 55 | /* Reference configuration variable. Determines which cache level is used to | ||
| 56 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
| 57 | * all CPUs form a single cluster (just like GSN-EDF). | ||
| 58 | */ | ||
| 59 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
| 60 | |||
| 61 | struct clusterdomain; | ||
| 62 | |||
| 63 | /* cpu_entry_t - maintain the linked and scheduled state | ||
| 64 | * | ||
| 65 | * A cpu also contains a pointer to the cedf_domain_t cluster | ||
| 66 | * that owns it (struct clusterdomain*) | ||
| 67 | */ | ||
| 68 | typedef struct { | ||
| 69 | int cpu; | ||
| 70 | struct clusterdomain* cluster; /* owning cluster */ | ||
| 71 | struct task_struct* linked; /* only RT tasks */ | ||
| 72 | struct task_struct* scheduled; /* only RT tasks */ | ||
| 73 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
| 74 | struct bheap_node* hn; | ||
| 75 | } cpu_entry_t; | ||
| 76 | |||
| 77 | /* one cpu_entry_t per CPU */ | ||
| 78 | DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); | ||
| 79 | |||
| 80 | #define set_will_schedule() \ | ||
| 81 | (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1)) | ||
| 82 | #define clear_will_schedule() \ | ||
| 83 | (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0)) | ||
| 84 | #define test_will_schedule(cpu) \ | ||
| 85 | (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) | ||
| 86 | |||
| 87 | /* | ||
| 88 | * In C-EDF there is a cedf domain _per_ cluster | ||
| 89 | * The number of clusters is dynamically determined accordingly to the | ||
| 90 | * total cpu number and the cluster size | ||
| 91 | */ | ||
| 92 | typedef struct clusterdomain { | ||
| 93 | /* rt_domain for this cluster */ | ||
| 94 | rt_domain_t domain; | ||
| 95 | /* cpus in this cluster */ | ||
| 96 | cpu_entry_t* *cpus; | ||
| 97 | /* map of this cluster cpus */ | ||
| 98 | cpumask_var_t cpu_map; | ||
| 99 | /* the cpus queue themselves according to priority in here */ | ||
| 100 | struct bheap_node *heap_node; | ||
| 101 | struct bheap cpu_heap; | ||
| 102 | /* lock for this cluster */ | ||
| 103 | #define cluster_lock domain.ready_lock | ||
| 104 | } cedf_domain_t; | ||
| 105 | |||
| 106 | /* a cedf_domain per cluster; allocation is done at init/activation time */ | ||
| 107 | cedf_domain_t *cedf; | ||
| 108 | |||
| 109 | #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) | ||
| 110 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
| 111 | |||
| 112 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
| 113 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
| 114 | * information during the initialization of the plugin (e.g., topology) | ||
| 115 | #define WANT_ALL_SCHED_EVENTS | ||
| 116 | */ | ||
| 117 | #define VERBOSE_INIT | ||
| 118 | |||
| 119 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
| 120 | { | ||
| 121 | cpu_entry_t *a, *b; | ||
| 122 | a = _a->value; | ||
| 123 | b = _b->value; | ||
| 124 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
| 125 | * the top of the heap. | ||
| 126 | */ | ||
| 127 | return edf_higher_prio(b->linked, a->linked); | ||
| 128 | } | ||
| 129 | |||
| 130 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
| 131 | * order in the cpu queue. Caller must hold cedf lock. | ||
| 132 | */ | ||
| 133 | static void update_cpu_position(cpu_entry_t *entry) | ||
| 134 | { | ||
| 135 | cedf_domain_t *cluster = entry->cluster; | ||
| 136 | |||
| 137 | if (likely(bheap_node_in_heap(entry->hn))) | ||
| 138 | bheap_delete(cpu_lower_prio, | ||
| 139 | &cluster->cpu_heap, | ||
| 140 | entry->hn); | ||
| 141 | |||
| 142 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | ||
| 143 | } | ||
| 144 | |||
| 145 | /* caller must hold cedf lock */ | ||
| 146 | static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) | ||
| 147 | { | ||
| 148 | struct bheap_node* hn; | ||
| 149 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
| 150 | return hn->value; | ||
| 151 | } | ||
| 152 | |||
| 153 | |||
| 154 | /* link_task_to_cpu - Update the link of a CPU. | ||
| 155 | * Handles the case where the to-be-linked task is already | ||
| 156 | * scheduled on a different CPU. | ||
| 157 | */ | ||
| 158 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
| 159 | cpu_entry_t *entry) | ||
| 160 | { | ||
| 161 | cpu_entry_t *sched; | ||
| 162 | struct task_struct* tmp; | ||
| 163 | int on_cpu; | ||
| 164 | |||
| 165 | BUG_ON(linked && !is_realtime(linked)); | ||
| 166 | |||
| 167 | /* Currently linked task is set to be unlinked. */ | ||
| 168 | if (entry->linked) { | ||
| 169 | entry->linked->rt_param.linked_on = NO_CPU; | ||
| 170 | } | ||
| 171 | |||
| 172 | /* Link new task to CPU. */ | ||
| 173 | if (linked) { | ||
| 174 | tsk_rt(linked)->completed = 0; | ||
| 175 | /* handle task is already scheduled somewhere! */ | ||
| 176 | on_cpu = linked->rt_param.scheduled_on; | ||
| 177 | if (on_cpu != NO_CPU) { | ||
| 178 | sched = &per_cpu(cedf_cpu_entries, on_cpu); | ||
| 179 | /* this should only happen if not linked already */ | ||
| 180 | BUG_ON(sched->linked == linked); | ||
| 181 | |||
| 182 | /* If we are already scheduled on the CPU to which we | ||
| 183 | * wanted to link, we don't need to do the swap -- | ||
| 184 | * we just link ourselves to the CPU and depend on | ||
| 185 | * the caller to get things right. | ||
| 186 | */ | ||
| 187 | if (entry != sched) { | ||
| 188 | TRACE_TASK(linked, | ||
| 189 | "already scheduled on %d, updating link.\n", | ||
| 190 | sched->cpu); | ||
| 191 | tmp = sched->linked; | ||
| 192 | linked->rt_param.linked_on = sched->cpu; | ||
| 193 | sched->linked = linked; | ||
| 194 | update_cpu_position(sched); | ||
| 195 | linked = tmp; | ||
| 196 | } | ||
| 197 | } | ||
| 198 | if (linked) /* might be NULL due to swap */ | ||
| 199 | linked->rt_param.linked_on = entry->cpu; | ||
| 200 | } | ||
| 201 | entry->linked = linked; | ||
| 202 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 203 | if (linked) | ||
| 204 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
| 205 | else | ||
| 206 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
| 207 | #endif | ||
| 208 | update_cpu_position(entry); | ||
| 209 | } | ||
| 210 | |||
| 211 | /* unlink - Make sure a task is not linked any longer to an entry | ||
| 212 | * where it was linked before. Must hold cedf_lock. | ||
| 213 | */ | ||
| 214 | static noinline void unlink(struct task_struct* t) | ||
| 215 | { | ||
| 216 | cpu_entry_t *entry; | ||
| 217 | |||
| 218 | if (t->rt_param.linked_on != NO_CPU) { | ||
| 219 | /* unlink */ | ||
| 220 | entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); | ||
| 221 | t->rt_param.linked_on = NO_CPU; | ||
| 222 | link_task_to_cpu(NULL, entry); | ||
| 223 | } else if (is_queued(t)) { | ||
| 224 | /* This is an interesting situation: t is scheduled, | ||
| 225 | * but was just recently unlinked. It cannot be | ||
| 226 | * linked anywhere else (because then it would have | ||
| 227 | * been relinked to this CPU), thus it must be in some | ||
| 228 | * queue. We must remove it from the list in this | ||
| 229 | * case. | ||
| 230 | * | ||
| 231 | * in C-EDF case is should be somewhere in the queue for | ||
| 232 | * its domain, therefore and we can get the domain using | ||
| 233 | * task_cpu_cluster | ||
| 234 | */ | ||
| 235 | remove(&(task_cpu_cluster(t))->domain, t); | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | |||
| 240 | /* preempt - force a CPU to reschedule | ||
| 241 | */ | ||
| 242 | static void preempt(cpu_entry_t *entry) | ||
| 243 | { | ||
| 244 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
| 245 | } | ||
| 246 | |||
| 247 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
| 248 | * Caller must hold cedf_lock. | ||
| 249 | */ | ||
| 250 | static noinline void requeue(struct task_struct* task) | ||
| 251 | { | ||
| 252 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
| 253 | BUG_ON(!task); | ||
| 254 | /* sanity check before insertion */ | ||
| 255 | BUG_ON(is_queued(task)); | ||
| 256 | |||
| 257 | if (is_early_releasing(task) || is_released(task, litmus_clock())) | ||
| 258 | __add_ready(&cluster->domain, task); | ||
| 259 | else { | ||
| 260 | /* it has got to wait */ | ||
| 261 | add_release(&cluster->domain, task); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 266 | static cpu_entry_t* cedf_get_nearest_available_cpu( | ||
| 267 | cedf_domain_t *cluster, cpu_entry_t *start) | ||
| 268 | { | ||
| 269 | cpu_entry_t *affinity; | ||
| 270 | |||
| 271 | get_nearest_available_cpu(affinity, start, cedf_cpu_entries, | ||
| 272 | #ifdef CONFIG_RELEASE_MASTER | ||
| 273 | cluster->domain.release_master | ||
| 274 | #else | ||
| 275 | NO_CPU | ||
| 276 | #endif | ||
| 277 | ); | ||
| 278 | |||
| 279 | /* make sure CPU is in our cluster */ | ||
| 280 | if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) | ||
| 281 | return(affinity); | ||
| 282 | else | ||
| 283 | return(NULL); | ||
| 284 | } | ||
| 285 | #endif | ||
| 286 | |||
| 287 | |||
| 288 | /* check for any necessary preemptions */ | ||
| 289 | static void check_for_preemptions(cedf_domain_t *cluster) | ||
| 290 | { | ||
| 291 | struct task_struct *task; | ||
| 292 | cpu_entry_t *last; | ||
| 293 | |||
| 294 | for(last = lowest_prio_cpu(cluster); | ||
| 295 | edf_preemption_needed(&cluster->domain, last->linked); | ||
| 296 | last = lowest_prio_cpu(cluster)) { | ||
| 297 | /* preemption necessary */ | ||
| 298 | task = __take_ready(&cluster->domain); | ||
| 299 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
| 300 | task->pid, last->cpu); | ||
| 301 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 302 | { | ||
| 303 | cpu_entry_t *affinity = | ||
| 304 | cedf_get_nearest_available_cpu(cluster, | ||
| 305 | &per_cpu(cedf_cpu_entries, task_cpu(task))); | ||
| 306 | if(affinity) | ||
| 307 | last = affinity; | ||
| 308 | else if(requeue_preempted_job(last->linked)) | ||
| 309 | requeue(last->linked); | ||
| 310 | } | ||
| 311 | #else | ||
| 312 | if (requeue_preempted_job(last->linked)) | ||
| 313 | requeue(last->linked); | ||
| 314 | #endif | ||
| 315 | link_task_to_cpu(task, last); | ||
| 316 | preempt(last); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | /* cedf_job_arrival: task is either resumed or released */ | ||
| 321 | static noinline void cedf_job_arrival(struct task_struct* task) | ||
| 322 | { | ||
| 323 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
| 324 | BUG_ON(!task); | ||
| 325 | |||
| 326 | requeue(task); | ||
| 327 | check_for_preemptions(cluster); | ||
| 328 | } | ||
| 329 | |||
| 330 | static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
| 331 | { | ||
| 332 | cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); | ||
| 333 | unsigned long flags; | ||
| 334 | |||
| 335 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
| 336 | |||
| 337 | __merge_ready(&cluster->domain, tasks); | ||
| 338 | check_for_preemptions(cluster); | ||
| 339 | |||
| 340 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
| 341 | } | ||
| 342 | |||
| 343 | /* caller holds cedf_lock */ | ||
| 344 | static noinline void job_completion(struct task_struct *t, int forced) | ||
| 345 | { | ||
| 346 | BUG_ON(!t); | ||
| 347 | |||
| 348 | sched_trace_task_completion(t, forced); | ||
| 349 | |||
| 350 | TRACE_TASK(t, "job_completion().\n"); | ||
| 351 | |||
| 352 | /* set flags */ | ||
| 353 | tsk_rt(t)->completed = 1; | ||
| 354 | /* prepare for next period */ | ||
| 355 | prepare_for_next_period(t); | ||
| 356 | if (is_early_releasing(t) || is_released(t, litmus_clock())) | ||
| 357 | sched_trace_task_release(t); | ||
| 358 | /* unlink */ | ||
| 359 | unlink(t); | ||
| 360 | /* requeue | ||
| 361 | * But don't requeue a blocking task. */ | ||
| 362 | if (is_running(t)) | ||
| 363 | cedf_job_arrival(t); | ||
| 364 | } | ||
| 365 | |||
| 366 | /* cedf_tick - this function is called for every local timer | ||
| 367 | * interrupt. | ||
| 368 | * | ||
| 369 | * checks whether the current task has expired and checks | ||
| 370 | * whether we need to preempt it if it has not expired | ||
| 371 | */ | ||
| 372 | static void cedf_tick(struct task_struct* t) | ||
| 373 | { | ||
| 374 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
| 375 | if (!is_np(t)) { | ||
| 376 | /* np tasks will be preempted when they become | ||
| 377 | * preemptable again | ||
| 378 | */ | ||
| 379 | litmus_reschedule_local(); | ||
| 380 | set_will_schedule(); | ||
| 381 | TRACE("cedf_scheduler_tick: " | ||
| 382 | "%d is preemptable " | ||
| 383 | " => FORCE_RESCHED\n", t->pid); | ||
| 384 | } else if (is_user_np(t)) { | ||
| 385 | TRACE("cedf_scheduler_tick: " | ||
| 386 | "%d is non-preemptable, " | ||
| 387 | "preemption delayed.\n", t->pid); | ||
| 388 | request_exit_np(t); | ||
| 389 | } | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
| 394 | * assumptions on the state of the current task since it may be called for a | ||
| 395 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
| 396 | * was necessary, because sys_exit_np() was called, because some Linux | ||
| 397 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
| 398 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
| 399 | * current state is. | ||
| 400 | * | ||
| 401 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
| 402 | * | ||
| 403 | * The following assertions for the scheduled task could hold: | ||
| 404 | * | ||
| 405 | * - !is_running(scheduled) // the job blocks | ||
| 406 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
| 407 | * - is_completed() // the job completed (by syscall) | ||
| 408 | * - linked != scheduled // we need to reschedule (for any reason) | ||
| 409 | * - is_np(scheduled) // rescheduling must be delayed, | ||
| 410 | * sys_exit_np must be requested | ||
| 411 | * | ||
| 412 | * Any of these can occur together. | ||
| 413 | */ | ||
| 414 | static struct task_struct* cedf_schedule(struct task_struct * prev) | ||
| 415 | { | ||
| 416 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
| 417 | cedf_domain_t *cluster = entry->cluster; | ||
| 418 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
| 419 | struct task_struct* next = NULL; | ||
| 420 | |||
| 421 | #ifdef CONFIG_RELEASE_MASTER | ||
| 422 | /* Bail out early if we are the release master. | ||
| 423 | * The release master never schedules any real-time tasks. | ||
| 424 | */ | ||
| 425 | if (unlikely(cluster->domain.release_master == entry->cpu)) { | ||
| 426 | sched_state_task_picked(); | ||
| 427 | return NULL; | ||
| 428 | } | ||
| 429 | #endif | ||
| 430 | |||
| 431 | raw_spin_lock(&cluster->cluster_lock); | ||
| 432 | clear_will_schedule(); | ||
| 433 | |||
| 434 | /* sanity checking */ | ||
| 435 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
| 436 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
| 437 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
| 438 | |||
| 439 | /* (0) Determine state */ | ||
| 440 | exists = entry->scheduled != NULL; | ||
| 441 | blocks = exists && !is_running(entry->scheduled); | ||
| 442 | out_of_time = exists && | ||
| 443 | budget_enforced(entry->scheduled) && | ||
| 444 | budget_exhausted(entry->scheduled); | ||
| 445 | np = exists && is_np(entry->scheduled); | ||
| 446 | sleep = exists && is_completed(entry->scheduled); | ||
| 447 | preempt = entry->scheduled != entry->linked; | ||
| 448 | |||
| 449 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 450 | TRACE_TASK(prev, "invoked cedf_schedule.\n"); | ||
| 451 | #endif | ||
| 452 | |||
| 453 | if (exists) | ||
| 454 | TRACE_TASK(prev, | ||
| 455 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
| 456 | "state:%d sig:%d\n", | ||
| 457 | blocks, out_of_time, np, sleep, preempt, | ||
| 458 | prev->state, signal_pending(prev)); | ||
| 459 | if (entry->linked && preempt) | ||
| 460 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
| 461 | entry->linked->comm, entry->linked->pid); | ||
| 462 | |||
| 463 | |||
| 464 | /* If a task blocks we have no choice but to reschedule. | ||
| 465 | */ | ||
| 466 | if (blocks) | ||
| 467 | unlink(entry->scheduled); | ||
| 468 | |||
| 469 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
| 470 | * We need to make sure to update the link structure anyway in case | ||
| 471 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
| 472 | * hurt. | ||
| 473 | */ | ||
| 474 | if (np && (out_of_time || preempt || sleep)) { | ||
| 475 | unlink(entry->scheduled); | ||
| 476 | request_exit_np(entry->scheduled); | ||
| 477 | } | ||
| 478 | |||
| 479 | /* Any task that is preemptable and either exhausts its execution | ||
| 480 | * budget or wants to sleep completes. We may have to reschedule after | ||
| 481 | * this. Don't do a job completion if we block (can't have timers running | ||
| 482 | * for blocked jobs). | ||
| 483 | */ | ||
| 484 | if (!np && (out_of_time || sleep) && !blocks) | ||
| 485 | job_completion(entry->scheduled, !sleep); | ||
| 486 | |||
| 487 | /* Link pending task if we became unlinked. | ||
| 488 | */ | ||
| 489 | if (!entry->linked) | ||
| 490 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
| 491 | |||
| 492 | /* The final scheduling decision. Do we need to switch for some reason? | ||
| 493 | * If linked is different from scheduled, then select linked as next. | ||
| 494 | */ | ||
| 495 | if ((!np || blocks) && | ||
| 496 | entry->linked != entry->scheduled) { | ||
| 497 | /* Schedule a linked job? */ | ||
| 498 | if (entry->linked) { | ||
| 499 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
| 500 | next = entry->linked; | ||
| 501 | } | ||
| 502 | if (entry->scheduled) { | ||
| 503 | /* not gonna be scheduled soon */ | ||
| 504 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
| 505 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
| 506 | } | ||
| 507 | } else | ||
| 508 | /* Only override Linux scheduler if we have a real-time task | ||
| 509 | * scheduled that needs to continue. | ||
| 510 | */ | ||
| 511 | if (exists) | ||
| 512 | next = prev; | ||
| 513 | |||
| 514 | sched_state_task_picked(); | ||
| 515 | raw_spin_unlock(&cluster->cluster_lock); | ||
| 516 | |||
| 517 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 518 | TRACE("cedf_lock released, next=0x%p\n", next); | ||
| 519 | |||
| 520 | if (next) | ||
| 521 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
| 522 | else if (exists && !next) | ||
| 523 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
| 524 | #endif | ||
| 525 | |||
| 526 | |||
| 527 | return next; | ||
| 528 | } | ||
| 529 | |||
| 530 | |||
| 531 | /* _finish_switch - we just finished the switch away from prev | ||
| 532 | */ | ||
| 533 | static void cedf_finish_switch(struct task_struct *prev) | ||
| 534 | { | ||
| 535 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
| 536 | |||
| 537 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
| 538 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 539 | TRACE_TASK(prev, "switched away from\n"); | ||
| 540 | #endif | ||
| 541 | } | ||
| 542 | |||
| 543 | |||
| 544 | /* Prepare a task for running in RT mode | ||
| 545 | */ | ||
| 546 | static void cedf_task_new(struct task_struct * t, int on_rq, int running) | ||
| 547 | { | ||
| 548 | unsigned long flags; | ||
| 549 | cpu_entry_t* entry; | ||
| 550 | cedf_domain_t* cluster; | ||
| 551 | |||
| 552 | TRACE("gsn edf: task new %d\n", t->pid); | ||
| 553 | |||
| 554 | /* the cluster doesn't change even if t is running */ | ||
| 555 | cluster = task_cpu_cluster(t); | ||
| 556 | |||
| 557 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
| 558 | |||
| 559 | /* setup job params */ | ||
| 560 | release_at(t, litmus_clock()); | ||
| 561 | |||
| 562 | if (running) { | ||
| 563 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); | ||
| 564 | BUG_ON(entry->scheduled); | ||
| 565 | |||
| 566 | #ifdef CONFIG_RELEASE_MASTER | ||
| 567 | if (entry->cpu != cluster->domain.release_master) { | ||
| 568 | #endif | ||
| 569 | entry->scheduled = t; | ||
| 570 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
| 571 | #ifdef CONFIG_RELEASE_MASTER | ||
| 572 | } else { | ||
| 573 | /* do not schedule on release master */ | ||
| 574 | preempt(entry); /* force resched */ | ||
| 575 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
| 576 | } | ||
| 577 | #endif | ||
| 578 | } else { | ||
| 579 | t->rt_param.scheduled_on = NO_CPU; | ||
| 580 | } | ||
| 581 | t->rt_param.linked_on = NO_CPU; | ||
| 582 | |||
| 583 | cedf_job_arrival(t); | ||
| 584 | raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags); | ||
| 585 | } | ||
| 586 | |||
| 587 | static void cedf_task_wake_up(struct task_struct *task) | ||
| 588 | { | ||
| 589 | unsigned long flags; | ||
| 590 | lt_t now; | ||
| 591 | cedf_domain_t *cluster; | ||
| 592 | |||
| 593 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
| 594 | |||
| 595 | cluster = task_cpu_cluster(task); | ||
| 596 | |||
| 597 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
| 598 | now = litmus_clock(); | ||
| 599 | if (is_sporadic(task) && is_tardy(task, now)) { | ||
| 600 | /* new sporadic release */ | ||
| 601 | release_at(task, now); | ||
| 602 | sched_trace_task_release(task); | ||
| 603 | } | ||
| 604 | else { | ||
| 605 | if (task->rt.time_slice) { | ||
| 606 | /* came back in time before deadline | ||
| 607 | */ | ||
| 608 | tsk_rt(task)->completed = 0; | ||
| 609 | } | ||
| 610 | } | ||
| 611 | cedf_job_arrival(task); | ||
| 612 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
| 613 | } | ||
| 614 | |||
| 615 | static void cedf_task_block(struct task_struct *t) | ||
| 616 | { | ||
| 617 | unsigned long flags; | ||
| 618 | cedf_domain_t *cluster; | ||
| 619 | |||
| 620 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
| 621 | |||
| 622 | cluster = task_cpu_cluster(t); | ||
| 623 | |||
| 624 | /* unlink if necessary */ | ||
| 625 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
| 626 | unlink(t); | ||
| 627 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
| 628 | |||
| 629 | BUG_ON(!is_realtime(t)); | ||
| 630 | } | ||
| 631 | |||
| 632 | |||
| 633 | static void cedf_task_exit(struct task_struct * t) | ||
| 634 | { | ||
| 635 | unsigned long flags; | ||
| 636 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
| 637 | |||
| 638 | /* unlink if necessary */ | ||
| 639 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
| 640 | unlink(t); | ||
| 641 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
| 642 | cpu_entry_t *cpu; | ||
| 643 | cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on); | ||
| 644 | cpu->scheduled = NULL; | ||
| 645 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
| 646 | } | ||
| 647 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
| 648 | |||
| 649 | BUG_ON(!is_realtime(t)); | ||
| 650 | TRACE_TASK(t, "RIP\n"); | ||
| 651 | } | ||
| 652 | |||
| 653 | static long cedf_admit_task(struct task_struct* tsk) | ||
| 654 | { | ||
| 655 | return (remote_cluster(task_cpu(tsk)) == task_cpu_cluster(tsk)) ? | ||
| 656 | 0 : -EINVAL; | ||
| 657 | } | ||
| 658 | |||
| 659 | /* total number of cluster */ | ||
| 660 | static int num_clusters; | ||
| 661 | /* we do not support cluster of different sizes */ | ||
| 662 | static unsigned int cluster_size; | ||
| 663 | |||
| 664 | #ifdef VERBOSE_INIT | ||
| 665 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
| 666 | { | ||
| 667 | int chk; | ||
| 668 | char buf[255]; | ||
| 669 | |||
| 670 | chk = cpulist_scnprintf(buf, 254, mask); | ||
| 671 | buf[chk] = '\0'; | ||
| 672 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
| 673 | |||
| 674 | } | ||
| 675 | #endif | ||
| 676 | |||
| 677 | static int clusters_allocated = 0; | ||
| 678 | |||
| 679 | static void cleanup_cedf(void) | ||
| 680 | { | ||
| 681 | int i; | ||
| 682 | |||
| 683 | if (clusters_allocated) { | ||
| 684 | for (i = 0; i < num_clusters; i++) { | ||
| 685 | kfree(cedf[i].cpus); | ||
| 686 | kfree(cedf[i].heap_node); | ||
| 687 | free_cpumask_var(cedf[i].cpu_map); | ||
| 688 | } | ||
| 689 | |||
| 690 | kfree(cedf); | ||
| 691 | } | ||
| 692 | } | ||
| 693 | |||
| 694 | static long cedf_activate_plugin(void) | ||
| 695 | { | ||
| 696 | int i, j, cpu, ccpu, cpu_count; | ||
| 697 | cpu_entry_t *entry; | ||
| 698 | |||
| 699 | cpumask_var_t mask; | ||
| 700 | int chk = 0; | ||
| 701 | |||
| 702 | /* de-allocate old clusters, if any */ | ||
| 703 | cleanup_cedf(); | ||
| 704 | |||
| 705 | printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n", | ||
| 706 | cluster_config); | ||
| 707 | |||
| 708 | /* need to get cluster_size first */ | ||
| 709 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
| 710 | return -ENOMEM; | ||
| 711 | |||
| 712 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
| 713 | cluster_size = num_online_cpus(); | ||
| 714 | } else { | ||
| 715 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
| 716 | if (chk) { | ||
| 717 | /* if chk != 0 then it is the max allowed index */ | ||
| 718 | printk(KERN_INFO "C-EDF: Cluster configuration = %d " | ||
| 719 | "is not supported on this hardware.\n", | ||
| 720 | cluster_config); | ||
| 721 | /* User should notice that the configuration failed, so | ||
| 722 | * let's bail out. */ | ||
| 723 | return -EINVAL; | ||
| 724 | } | ||
| 725 | |||
| 726 | cluster_size = cpumask_weight(mask); | ||
| 727 | } | ||
| 728 | |||
| 729 | if ((num_online_cpus() % cluster_size) != 0) { | ||
| 730 | /* this can't be right, some cpus are left out */ | ||
| 731 | printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n", | ||
| 732 | num_online_cpus(), cluster_size); | ||
| 733 | return -1; | ||
| 734 | } | ||
| 735 | |||
| 736 | num_clusters = num_online_cpus() / cluster_size; | ||
| 737 | printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", | ||
| 738 | num_clusters, cluster_size); | ||
| 739 | |||
| 740 | /* initialize clusters */ | ||
| 741 | cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); | ||
| 742 | for (i = 0; i < num_clusters; i++) { | ||
| 743 | |||
| 744 | cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
| 745 | GFP_ATOMIC); | ||
| 746 | cedf[i].heap_node = kmalloc( | ||
| 747 | cluster_size * sizeof(struct bheap_node), | ||
| 748 | GFP_ATOMIC); | ||
| 749 | bheap_init(&(cedf[i].cpu_heap)); | ||
| 750 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); | ||
| 751 | |||
| 752 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | ||
| 753 | return -ENOMEM; | ||
| 754 | #ifdef CONFIG_RELEASE_MASTER | ||
| 755 | cedf[i].domain.release_master = atomic_read(&release_master_cpu); | ||
| 756 | #endif | ||
| 757 | } | ||
| 758 | |||
| 759 | /* cycle through cluster and add cpus to them */ | ||
| 760 | for (i = 0; i < num_clusters; i++) { | ||
| 761 | |||
| 762 | for_each_online_cpu(cpu) { | ||
| 763 | /* check if the cpu is already in a cluster */ | ||
| 764 | for (j = 0; j < num_clusters; j++) | ||
| 765 | if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
| 766 | break; | ||
| 767 | /* if it is in a cluster go to next cpu */ | ||
| 768 | if (j < num_clusters && | ||
| 769 | cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
| 770 | continue; | ||
| 771 | |||
| 772 | /* this cpu isn't in any cluster */ | ||
| 773 | /* get the shared cpus */ | ||
| 774 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
| 775 | cpumask_copy(mask, cpu_online_mask); | ||
| 776 | else | ||
| 777 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
| 778 | |||
| 779 | cpumask_copy(cedf[i].cpu_map, mask); | ||
| 780 | #ifdef VERBOSE_INIT | ||
| 781 | print_cluster_topology(mask, cpu); | ||
| 782 | #endif | ||
| 783 | /* add cpus to current cluster and init cpu_entry_t */ | ||
| 784 | cpu_count = 0; | ||
| 785 | for_each_cpu(ccpu, cedf[i].cpu_map) { | ||
| 786 | |||
| 787 | entry = &per_cpu(cedf_cpu_entries, ccpu); | ||
| 788 | cedf[i].cpus[cpu_count] = entry; | ||
| 789 | atomic_set(&entry->will_schedule, 0); | ||
| 790 | entry->cpu = ccpu; | ||
| 791 | entry->cluster = &cedf[i]; | ||
| 792 | entry->hn = &(cedf[i].heap_node[cpu_count]); | ||
| 793 | bheap_node_init(&entry->hn, entry); | ||
| 794 | |||
| 795 | cpu_count++; | ||
| 796 | |||
| 797 | entry->linked = NULL; | ||
| 798 | entry->scheduled = NULL; | ||
| 799 | #ifdef CONFIG_RELEASE_MASTER | ||
| 800 | /* only add CPUs that should schedule jobs */ | ||
| 801 | if (entry->cpu != entry->cluster->domain.release_master) | ||
| 802 | #endif | ||
| 803 | update_cpu_position(entry); | ||
| 804 | } | ||
| 805 | /* done with this cluster */ | ||
| 806 | break; | ||
| 807 | } | ||
| 808 | } | ||
| 809 | |||
| 810 | free_cpumask_var(mask); | ||
| 811 | clusters_allocated = 1; | ||
| 812 | return 0; | ||
| 813 | } | ||
| 814 | |||
| 815 | /* Plugin object */ | ||
| 816 | static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | ||
| 817 | .plugin_name = "C-EDF", | ||
| 818 | .finish_switch = cedf_finish_switch, | ||
| 819 | .tick = cedf_tick, | ||
| 820 | .task_new = cedf_task_new, | ||
| 821 | .complete_job = complete_job, | ||
| 822 | .task_exit = cedf_task_exit, | ||
| 823 | .schedule = cedf_schedule, | ||
| 824 | .task_wake_up = cedf_task_wake_up, | ||
| 825 | .task_block = cedf_task_block, | ||
| 826 | .admit_task = cedf_admit_task, | ||
| 827 | .activate_plugin = cedf_activate_plugin, | ||
| 828 | }; | ||
| 829 | |||
| 830 | static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; | ||
| 831 | |||
| 832 | static int __init init_cedf(void) | ||
| 833 | { | ||
| 834 | int err, fs; | ||
| 835 | |||
| 836 | err = register_sched_plugin(&cedf_plugin); | ||
| 837 | if (!err) { | ||
| 838 | fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir); | ||
| 839 | if (!fs) | ||
| 840 | cluster_file = create_cluster_file(cedf_dir, &cluster_config); | ||
| 841 | else | ||
| 842 | printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n"); | ||
| 843 | } | ||
| 844 | return err; | ||
| 845 | } | ||
| 846 | |||
| 847 | static void clean_cedf(void) | ||
| 848 | { | ||
| 849 | cleanup_cedf(); | ||
| 850 | if (cluster_file) | ||
| 851 | remove_proc_entry("cluster", cedf_dir); | ||
| 852 | if (cedf_dir) | ||
| 853 | remove_plugin_proc_dir(&cedf_plugin); | ||
| 854 | } | ||
| 855 | |||
| 856 | module_init(init_cedf); | ||
| 857 | module_exit(clean_cedf); | ||
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c new file mode 100644 index 00000000000..5956978ccdb --- /dev/null +++ b/litmus/sched_gsn_edf.c | |||
| @@ -0,0 +1,1030 @@ | |||
| 1 | /* | ||
| 2 | * litmus/sched_gsn_edf.c | ||
| 3 | * | ||
| 4 | * Implementation of the GSN-EDF scheduling algorithm. | ||
| 5 | * | ||
| 6 | * This version uses the simple approach and serializes all scheduling | ||
| 7 | * decisions by the use of a queue lock. This is probably not the | ||
| 8 | * best way to do it, but it should suffice for now. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/spinlock.h> | ||
| 12 | #include <linux/percpu.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/slab.h> | ||
| 15 | |||
| 16 | #include <litmus/litmus.h> | ||
| 17 | #include <litmus/jobs.h> | ||
| 18 | #include <litmus/sched_plugin.h> | ||
| 19 | #include <litmus/edf_common.h> | ||
| 20 | #include <litmus/sched_trace.h> | ||
| 21 | #include <litmus/trace.h> | ||
| 22 | |||
| 23 | #include <litmus/preempt.h> | ||
| 24 | #include <litmus/budget.h> | ||
| 25 | |||
| 26 | #include <litmus/bheap.h> | ||
| 27 | |||
| 28 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 29 | #include <litmus/affinity.h> | ||
| 30 | #endif | ||
| 31 | |||
| 32 | #include <linux/module.h> | ||
| 33 | |||
| 34 | /* Overview of GSN-EDF operations. | ||
| 35 | * | ||
| 36 | * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This | ||
| 37 | * description only covers how the individual operations are implemented in | ||
| 38 | * LITMUS. | ||
| 39 | * | ||
| 40 | * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage | ||
| 41 | * structure (NOT the actually scheduled | ||
| 42 | * task). If there is another linked task To | ||
| 43 | * already it will set To->linked_on = NO_CPU | ||
| 44 | * (thereby removing its association with this | ||
| 45 | * CPU). However, it will not requeue the | ||
| 46 | * previously linked task (if any). It will set | ||
| 47 | * T's state to not completed and check whether | ||
| 48 | * it is already running somewhere else. If T | ||
| 49 | * is scheduled somewhere else it will link | ||
| 50 | * it to that CPU instead (and pull the linked | ||
| 51 | * task to cpu). T may be NULL. | ||
| 52 | * | ||
| 53 | * unlink(T) - Unlink removes T from all scheduler data | ||
| 54 | * structures. If it is linked to some CPU it | ||
| 55 | * will link NULL to that CPU. If it is | ||
| 56 | * currently queued in the gsnedf queue it will | ||
| 57 | * be removed from the rt_domain. It is safe to | ||
| 58 | * call unlink(T) if T is not linked. T may not | ||
| 59 | * be NULL. | ||
| 60 | * | ||
| 61 | * requeue(T) - Requeue will insert T into the appropriate | ||
| 62 | * queue. If the system is in real-time mode and | ||
| 63 | * the T is released already, it will go into the | ||
| 64 | * ready queue. If the system is not in | ||
| 65 | * real-time mode is T, then T will go into the | ||
| 66 | * release queue. If T's release time is in the | ||
| 67 | * future, it will go into the release | ||
| 68 | * queue. That means that T's release time/job | ||
| 69 | * no/etc. has to be updated before requeu(T) is | ||
| 70 | * called. It is not safe to call requeue(T) | ||
| 71 | * when T is already queued. T may not be NULL. | ||
| 72 | * | ||
| 73 | * gsnedf_job_arrival(T) - This is the catch all function when T enters | ||
| 74 | * the system after either a suspension or at a | ||
| 75 | * job release. It will queue T (which means it | ||
| 76 | * is not safe to call gsnedf_job_arrival(T) if | ||
| 77 | * T is already queued) and then check whether a | ||
| 78 | * preemption is necessary. If a preemption is | ||
| 79 | * necessary it will update the linkage | ||
| 80 | * accordingly and cause scheduled to be called | ||
| 81 | * (either with an IPI or need_resched). It is | ||
| 82 | * safe to call gsnedf_job_arrival(T) if T's | ||
| 83 | * next job has not been actually released yet | ||
| 84 | * (releast time in the future). T will be put | ||
| 85 | * on the release queue in that case. | ||
| 86 | * | ||
| 87 | * job_completion(T) - Take care of everything that needs to be done | ||
| 88 | * to prepare T for its next release and place | ||
| 89 | * it in the right queue with | ||
| 90 | * gsnedf_job_arrival(). | ||
| 91 | * | ||
| 92 | * | ||
| 93 | * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is | ||
| 94 | * equivalent to unlink(T). Note that if you unlink a task from a CPU none of | ||
| 95 | * the functions will automatically propagate pending task from the ready queue | ||
| 96 | * to a linked task. This is the job of the calling function ( by means of | ||
| 97 | * __take_ready). | ||
| 98 | */ | ||
| 99 | |||
| 100 | |||
| 101 | /* cpu_entry_t - maintain the linked and scheduled state | ||
| 102 | */ | ||
| 103 | typedef struct { | ||
| 104 | int cpu; | ||
| 105 | struct task_struct* linked; /* only RT tasks */ | ||
| 106 | struct task_struct* scheduled; /* only RT tasks */ | ||
| 107 | struct bheap_node* hn; | ||
| 108 | } cpu_entry_t; | ||
| 109 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | ||
| 110 | |||
| 111 | cpu_entry_t* gsnedf_cpus[NR_CPUS]; | ||
| 112 | |||
| 113 | /* the cpus queue themselves according to priority in here */ | ||
| 114 | static struct bheap_node gsnedf_heap_node[NR_CPUS]; | ||
| 115 | static struct bheap gsnedf_cpu_heap; | ||
| 116 | |||
| 117 | static rt_domain_t gsnedf; | ||
| 118 | #define gsnedf_lock (gsnedf.ready_lock) | ||
| 119 | |||
| 120 | |||
| 121 | /* Uncomment this if you want to see all scheduling decisions in the | ||
| 122 | * TRACE() log. | ||
| 123 | #define WANT_ALL_SCHED_EVENTS | ||
| 124 | */ | ||
| 125 | |||
| 126 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
| 127 | { | ||
| 128 | cpu_entry_t *a, *b; | ||
| 129 | a = _a->value; | ||
| 130 | b = _b->value; | ||
| 131 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
| 132 | * the top of the heap. | ||
| 133 | */ | ||
| 134 | return edf_higher_prio(b->linked, a->linked); | ||
| 135 | } | ||
| 136 | |||
| 137 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
| 138 | * order in the cpu queue. Caller must hold gsnedf lock. | ||
| 139 | */ | ||
| 140 | static void update_cpu_position(cpu_entry_t *entry) | ||
| 141 | { | ||
| 142 | if (likely(bheap_node_in_heap(entry->hn))) | ||
| 143 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
| 144 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
| 145 | } | ||
| 146 | |||
| 147 | /* caller must hold gsnedf lock */ | ||
| 148 | static cpu_entry_t* lowest_prio_cpu(void) | ||
| 149 | { | ||
| 150 | struct bheap_node* hn; | ||
| 151 | hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); | ||
| 152 | return hn->value; | ||
| 153 | } | ||
| 154 | |||
| 155 | |||
| 156 | /* link_task_to_cpu - Update the link of a CPU. | ||
| 157 | * Handles the case where the to-be-linked task is already | ||
| 158 | * scheduled on a different CPU. | ||
| 159 | */ | ||
| 160 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
| 161 | cpu_entry_t *entry) | ||
| 162 | { | ||
| 163 | cpu_entry_t *sched; | ||
| 164 | struct task_struct* tmp; | ||
| 165 | int on_cpu; | ||
| 166 | |||
| 167 | BUG_ON(linked && !is_realtime(linked)); | ||
| 168 | |||
| 169 | /* Currently linked task is set to be unlinked. */ | ||
| 170 | if (entry->linked) { | ||
| 171 | entry->linked->rt_param.linked_on = NO_CPU; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* Link new task to CPU. */ | ||
| 175 | if (linked) { | ||
| 176 | tsk_rt(linked)->completed = 0; | ||
| 177 | /* handle task is already scheduled somewhere! */ | ||
| 178 | on_cpu = linked->rt_param.scheduled_on; | ||
| 179 | if (on_cpu != NO_CPU) { | ||
| 180 | sched = &per_cpu(gsnedf_cpu_entries, on_cpu); | ||
| 181 | /* this should only happen if not linked already */ | ||
| 182 | BUG_ON(sched->linked == linked); | ||
| 183 | |||
| 184 | /* If we are already scheduled on the CPU to which we | ||
| 185 | * wanted to link, we don't need to do the swap -- | ||
| 186 | * we just link ourselves to the CPU and depend on | ||
| 187 | * the caller to get things right. | ||
| 188 | */ | ||
| 189 | if (entry != sched) { | ||
| 190 | TRACE_TASK(linked, | ||
| 191 | "already scheduled on %d, updating link.\n", | ||
| 192 | sched->cpu); | ||
| 193 | tmp = sched->linked; | ||
| 194 | linked->rt_param.linked_on = sched->cpu; | ||
| 195 | sched->linked = linked; | ||
| 196 | update_cpu_position(sched); | ||
| 197 | linked = tmp; | ||
| 198 | } | ||
| 199 | } | ||
| 200 | if (linked) /* might be NULL due to swap */ | ||
| 201 | linked->rt_param.linked_on = entry->cpu; | ||
| 202 | } | ||
| 203 | entry->linked = linked; | ||
| 204 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 205 | if (linked) | ||
| 206 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
| 207 | else | ||
| 208 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
| 209 | #endif | ||
| 210 | update_cpu_position(entry); | ||
| 211 | } | ||
| 212 | |||
| 213 | /* unlink - Make sure a task is not linked any longer to an entry | ||
| 214 | * where it was linked before. Must hold gsnedf_lock. | ||
| 215 | */ | ||
| 216 | static noinline void unlink(struct task_struct* t) | ||
| 217 | { | ||
| 218 | cpu_entry_t *entry; | ||
| 219 | |||
| 220 | if (t->rt_param.linked_on != NO_CPU) { | ||
| 221 | /* unlink */ | ||
| 222 | entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on); | ||
| 223 | t->rt_param.linked_on = NO_CPU; | ||
| 224 | link_task_to_cpu(NULL, entry); | ||
| 225 | } else if (is_queued(t)) { | ||
| 226 | /* This is an interesting situation: t is scheduled, | ||
| 227 | * but was just recently unlinked. It cannot be | ||
| 228 | * linked anywhere else (because then it would have | ||
| 229 | * been relinked to this CPU), thus it must be in some | ||
| 230 | * queue. We must remove it from the list in this | ||
| 231 | * case. | ||
| 232 | */ | ||
| 233 | remove(&gsnedf, t); | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | |||
| 238 | /* preempt - force a CPU to reschedule | ||
| 239 | */ | ||
| 240 | static void preempt(cpu_entry_t *entry) | ||
| 241 | { | ||
| 242 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
| 243 | } | ||
| 244 | |||
| 245 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
| 246 | * Caller must hold gsnedf_lock. | ||
| 247 | */ | ||
| 248 | static noinline void requeue(struct task_struct* task) | ||
| 249 | { | ||
| 250 | BUG_ON(!task); | ||
| 251 | /* sanity check before insertion */ | ||
| 252 | BUG_ON(is_queued(task)); | ||
| 253 | |||
| 254 | if (is_early_releasing(task) || is_released(task, litmus_clock())) | ||
| 255 | __add_ready(&gsnedf, task); | ||
| 256 | else { | ||
| 257 | /* it has got to wait */ | ||
| 258 | add_release(&gsnedf, task); | ||
| 259 | } | ||
| 260 | } | ||
| 261 | |||
| 262 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 263 | static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start) | ||
| 264 | { | ||
| 265 | cpu_entry_t *affinity; | ||
| 266 | |||
| 267 | get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries, | ||
| 268 | #ifdef CONFIG_RELEASE_MASTER | ||
| 269 | gsnedf.release_master | ||
| 270 | #else | ||
| 271 | NO_CPU | ||
| 272 | #endif | ||
| 273 | ); | ||
| 274 | |||
| 275 | return(affinity); | ||
| 276 | } | ||
| 277 | #endif | ||
| 278 | |||
| 279 | /* check for any necessary preemptions */ | ||
| 280 | static void check_for_preemptions(void) | ||
| 281 | { | ||
| 282 | struct task_struct *task; | ||
| 283 | cpu_entry_t *last; | ||
| 284 | |||
| 285 | for (last = lowest_prio_cpu(); | ||
| 286 | edf_preemption_needed(&gsnedf, last->linked); | ||
| 287 | last = lowest_prio_cpu()) { | ||
| 288 | /* preemption necessary */ | ||
| 289 | task = __take_ready(&gsnedf); | ||
| 290 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
| 291 | task->pid, last->cpu); | ||
| 292 | |||
| 293 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
| 294 | { | ||
| 295 | cpu_entry_t *affinity = | ||
| 296 | gsnedf_get_nearest_available_cpu( | ||
| 297 | &per_cpu(gsnedf_cpu_entries, task_cpu(task))); | ||
| 298 | if (affinity) | ||
| 299 | last = affinity; | ||
| 300 | else if (requeue_preempted_job(last->linked)) | ||
| 301 | requeue(last->linked); | ||
| 302 | } | ||
| 303 | #else | ||
| 304 | if (requeue_preempted_job(last->linked)) | ||
| 305 | requeue(last->linked); | ||
| 306 | #endif | ||
| 307 | |||
| 308 | link_task_to_cpu(task, last); | ||
| 309 | preempt(last); | ||
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 | /* gsnedf_job_arrival: task is either resumed or released */ | ||
| 314 | static noinline void gsnedf_job_arrival(struct task_struct* task) | ||
| 315 | { | ||
| 316 | BUG_ON(!task); | ||
| 317 | |||
| 318 | requeue(task); | ||
| 319 | check_for_preemptions(); | ||
| 320 | } | ||
| 321 | |||
| 322 | static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
| 323 | { | ||
| 324 | unsigned long flags; | ||
| 325 | |||
| 326 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
| 327 | |||
| 328 | __merge_ready(rt, tasks); | ||
| 329 | check_for_preemptions(); | ||
| 330 | |||
| 331 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
| 332 | } | ||
| 333 | |||
| 334 | /* caller holds gsnedf_lock */ | ||
| 335 | static noinline void job_completion(struct task_struct *t, int forced) | ||
| 336 | { | ||
| 337 | BUG_ON(!t); | ||
| 338 | |||
| 339 | sched_trace_task_completion(t, forced); | ||
| 340 | |||
| 341 | TRACE_TASK(t, "job_completion().\n"); | ||
| 342 | |||
| 343 | /* set flags */ | ||
| 344 | tsk_rt(t)->completed = 1; | ||
| 345 | /* prepare for next period */ | ||
| 346 | prepare_for_next_period(t); | ||
| 347 | if (is_early_releasing(t) || is_released(t, litmus_clock())) | ||
| 348 | sched_trace_task_release(t); | ||
| 349 | /* unlink */ | ||
| 350 | unlink(t); | ||
| 351 | /* requeue | ||
| 352 | * But don't requeue a blocking task. */ | ||
| 353 | if (is_running(t)) | ||
| 354 | gsnedf_job_arrival(t); | ||
| 355 | } | ||
| 356 | |||
| 357 | /* gsnedf_tick - this function is called for every local timer | ||
| 358 | * interrupt. | ||
| 359 | * | ||
| 360 | * checks whether the current task has expired and checks | ||
| 361 | * whether we need to preempt it if it has not expired | ||
| 362 | */ | ||
| 363 | static void gsnedf_tick(struct task_struct* t) | ||
| 364 | { | ||
| 365 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
| 366 | if (!is_np(t)) { | ||
| 367 | /* np tasks will be preempted when they become | ||
| 368 | * preemptable again | ||
| 369 | */ | ||
| 370 | litmus_reschedule_local(); | ||
| 371 | TRACE("gsnedf_scheduler_tick: " | ||
| 372 | "%d is preemptable " | ||
| 373 | " => FORCE_RESCHED\n", t->pid); | ||
| 374 | } else if (is_user_np(t)) { | ||
| 375 | TRACE("gsnedf_scheduler_tick: " | ||
| 376 | "%d is non-preemptable, " | ||
| 377 | "preemption delayed.\n", t->pid); | ||
| 378 | request_exit_np(t); | ||
| 379 | } | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
| 384 | * assumptions on the state of the current task since it may be called for a | ||
| 385 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
| 386 | * was necessary, because sys_exit_np() was called, because some Linux | ||
| 387 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
| 388 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
| 389 | * current state is. | ||
| 390 | * | ||
| 391 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
| 392 | * | ||
| 393 | * The following assertions for the scheduled task could hold: | ||
| 394 | * | ||
| 395 | * - !is_running(scheduled) // the job blocks | ||
| 396 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
| 397 | * - is_completed() // the job completed (by syscall) | ||
| 398 | * - linked != scheduled // we need to reschedule (for any reason) | ||
| 399 | * - is_np(scheduled) // rescheduling must be delayed, | ||
| 400 | * sys_exit_np must be requested | ||
| 401 | * | ||
| 402 | * Any of these can occur together. | ||
| 403 | */ | ||
| 404 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) | ||
| 405 | { | ||
| 406 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
| 407 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
| 408 | struct task_struct* next = NULL; | ||
| 409 | |||
| 410 | #ifdef CONFIG_RELEASE_MASTER | ||
| 411 | /* Bail out early if we are the release master. | ||
| 412 | * The release master never schedules any real-time tasks. | ||
| 413 | */ | ||
| 414 | if (unlikely(gsnedf.release_master == entry->cpu)) { | ||
| 415 | sched_state_task_picked(); | ||
| 416 | return NULL; | ||
| 417 | } | ||
| 418 | #endif | ||
| 419 | |||
| 420 | raw_spin_lock(&gsnedf_lock); | ||
| 421 | |||
| 422 | /* sanity checking */ | ||
| 423 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
| 424 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
| 425 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
| 426 | |||
| 427 | /* (0) Determine state */ | ||
| 428 | exists = entry->scheduled != NULL; | ||
| 429 | blocks = exists && !is_running(entry->scheduled); | ||
| 430 | out_of_time = exists && budget_enforced(entry->scheduled) | ||
| 431 | && budget_exhausted(entry->scheduled); | ||
| 432 | np = exists && is_np(entry->scheduled); | ||
| 433 | sleep = exists && is_completed(entry->scheduled); | ||
| 434 | preempt = entry->scheduled != entry->linked; | ||
| 435 | |||
| 436 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 437 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); | ||
| 438 | #endif | ||
| 439 | |||
| 440 | if (exists) | ||
| 441 | TRACE_TASK(prev, | ||
| 442 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
| 443 | "state:%d sig:%d\n", | ||
| 444 | blocks, out_of_time, np, sleep, preempt, | ||
| 445 | prev->state, signal_pending(prev)); | ||
| 446 | if (entry->linked && preempt) | ||
| 447 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
| 448 | entry->linked->comm, entry->linked->pid); | ||
| 449 | |||
| 450 | |||
| 451 | /* If a task blocks we have no choice but to reschedule. | ||
| 452 | */ | ||
| 453 | if (blocks) | ||
| 454 | unlink(entry->scheduled); | ||
| 455 | |||
| 456 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
| 457 | * We need to make sure to update the link structure anyway in case | ||
| 458 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
| 459 | * hurt. | ||
| 460 | */ | ||
| 461 | if (np && (out_of_time || preempt || sleep)) { | ||
| 462 | unlink(entry->scheduled); | ||
| 463 | request_exit_np(entry->scheduled); | ||
| 464 | } | ||
| 465 | |||
| 466 | /* Any task that is preemptable and either exhausts its execution | ||
| 467 | * budget or wants to sleep completes. We may have to reschedule after | ||
| 468 | * this. Don't do a job completion if we block (can't have timers running | ||
| 469 | * for blocked jobs). | ||
| 470 | */ | ||
| 471 | if (!np && (out_of_time || sleep) && !blocks) | ||
| 472 | job_completion(entry->scheduled, !sleep); | ||
| 473 | |||
| 474 | /* Link pending task if we became unlinked. | ||
| 475 | */ | ||
| 476 | if (!entry->linked) | ||
| 477 | link_task_to_cpu(__take_ready(&gsnedf), entry); | ||
| 478 | |||
| 479 | /* The final scheduling decision. Do we need to switch for some reason? | ||
| 480 | * If linked is different from scheduled, then select linked as next. | ||
| 481 | */ | ||
| 482 | if ((!np || blocks) && | ||
| 483 | entry->linked != entry->scheduled) { | ||
| 484 | /* Schedule a linked job? */ | ||
| 485 | if (entry->linked) { | ||
| 486 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
| 487 | next = entry->linked; | ||
| 488 | TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id()); | ||
| 489 | } | ||
| 490 | if (entry->scheduled) { | ||
| 491 | /* not gonna be scheduled soon */ | ||
| 492 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
| 493 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
| 494 | } | ||
| 495 | } else | ||
| 496 | /* Only override Linux scheduler if we have a real-time task | ||
| 497 | * scheduled that needs to continue. | ||
| 498 | */ | ||
| 499 | if (exists) | ||
| 500 | next = prev; | ||
| 501 | |||
| 502 | sched_state_task_picked(); | ||
| 503 | |||
| 504 | raw_spin_unlock(&gsnedf_lock); | ||
| 505 | |||
| 506 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 507 | TRACE("gsnedf_lock released, next=0x%p\n", next); | ||
| 508 | |||
| 509 | if (next) | ||
| 510 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
| 511 | else if (exists && !next) | ||
| 512 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
| 513 | #endif | ||
| 514 | |||
| 515 | |||
| 516 | return next; | ||
| 517 | } | ||
| 518 | |||
| 519 | |||
| 520 | /* _finish_switch - we just finished the switch away from prev | ||
| 521 | */ | ||
| 522 | static void gsnedf_finish_switch(struct task_struct *prev) | ||
| 523 | { | ||
| 524 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
| 525 | |||
| 526 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
| 527 | #ifdef WANT_ALL_SCHED_EVENTS | ||
| 528 | TRACE_TASK(prev, "switched away from\n"); | ||
| 529 | #endif | ||
| 530 | } | ||
| 531 | |||
| 532 | |||
| 533 | /* Prepare a task for running in RT mode | ||
| 534 | */ | ||
| 535 | static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
| 536 | { | ||
| 537 | unsigned long flags; | ||
| 538 | cpu_entry_t* entry; | ||
| 539 | |||
| 540 | TRACE("gsn edf: task new %d\n", t->pid); | ||
| 541 | |||
| 542 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
| 543 | |||
| 544 | /* setup job params */ | ||
| 545 | release_at(t, litmus_clock()); | ||
| 546 | |||
| 547 | if (running) { | ||
| 548 | entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t)); | ||
| 549 | BUG_ON(entry->scheduled); | ||
| 550 | |||
| 551 | #ifdef CONFIG_RELEASE_MASTER | ||
| 552 | if (entry->cpu != gsnedf.release_master) { | ||
| 553 | #endif | ||
| 554 | entry->scheduled = t; | ||
| 555 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
| 556 | #ifdef CONFIG_RELEASE_MASTER | ||
| 557 | } else { | ||
| 558 | /* do not schedule on release master */ | ||
| 559 | preempt(entry); /* force resched */ | ||
| 560 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
| 561 | } | ||
| 562 | #endif | ||
| 563 | } else { | ||
| 564 | t->rt_param.scheduled_on = NO_CPU; | ||
| 565 | } | ||
| 566 | t->rt_param.linked_on = NO_CPU; | ||
| 567 | |||
| 568 | gsnedf_job_arrival(t); | ||
| 569 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
| 570 | } | ||
| 571 | |||
| 572 | static void gsnedf_task_wake_up(struct task_struct *task) | ||
| 573 | { | ||
| 574 | unsigned long flags; | ||
| 575 | lt_t now; | ||
| 576 | |||
| 577 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
| 578 | |||
| 579 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
| 580 | now = litmus_clock(); | ||
| 581 | if (is_sporadic(task) && is_tardy(task, now)) { | ||
| 582 | /* new sporadic release */ | ||
| 583 | release_at(task, now); | ||
| 584 | sched_trace_task_release(task); | ||
| 585 | } | ||
| 586 | else { | ||
| 587 | if (task->rt.time_slice) { | ||
| 588 | /* came back in time before deadline | ||
| 589 | */ | ||
| 590 | tsk_rt(task)->completed = 0; | ||
| 591 | } | ||
| 592 | } | ||
| 593 | gsnedf_job_arrival(task); | ||
| 594 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
| 595 | } | ||
| 596 | |||
| 597 | static void gsnedf_task_block(struct task_struct *t) | ||
| 598 | { | ||
| 599 | unsigned long flags; | ||
| 600 | |||
| 601 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
| 602 | |||
| 603 | /* unlink if necessary */ | ||
| 604 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
| 605 | unlink(t); | ||
| 606 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
| 607 | |||
| 608 | BUG_ON(!is_realtime(t)); | ||
| 609 | } | ||
| 610 | |||
| 611 | |||
| 612 | static void gsnedf_task_exit(struct task_struct * t) | ||
| 613 | { | ||
| 614 | unsigned long flags; | ||
| 615 | |||
| 616 | /* unlink if necessary */ | ||
| 617 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
| 618 | unlink(t); | ||
| 619 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
| 620 | gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | ||
| 621 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
| 622 | } | ||
| 623 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
| 624 | |||
| 625 | BUG_ON(!is_realtime(t)); | ||
| 626 | TRACE_TASK(t, "RIP\n"); | ||
| 627 | } | ||
| 628 | |||
| 629 | |||
| 630 | static long gsnedf_admit_task(struct task_struct* tsk) | ||
| 631 | { | ||
| 632 | return 0; | ||
| 633 | } | ||
| 634 | |||
| 635 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 636 | |||
| 637 | #include <litmus/fdso.h> | ||
| 638 | |||
| 639 | /* called with IRQs off */ | ||
| 640 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
| 641 | { | ||
| 642 | int linked_on; | ||
| 643 | int check_preempt = 0; | ||
| 644 | |||
| 645 | raw_spin_lock(&gsnedf_lock); | ||
| 646 | |||
| 647 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
| 648 | tsk_rt(t)->inh_task = prio_inh; | ||
| 649 | |||
| 650 | linked_on = tsk_rt(t)->linked_on; | ||
| 651 | |||
| 652 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
| 653 | if (linked_on != NO_CPU) { | ||
| 654 | TRACE_TASK(t, "%s: linked on %d\n", | ||
| 655 | __FUNCTION__, linked_on); | ||
| 656 | /* Holder is scheduled; need to re-order CPUs. | ||
| 657 | * We can't use heap_decrease() here since | ||
| 658 | * the cpu_heap is ordered in reverse direction, so | ||
| 659 | * it is actually an increase. */ | ||
| 660 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, | ||
| 661 | gsnedf_cpus[linked_on]->hn); | ||
| 662 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, | ||
| 663 | gsnedf_cpus[linked_on]->hn); | ||
| 664 | } else { | ||
| 665 | /* holder may be queued: first stop queue changes */ | ||
| 666 | raw_spin_lock(&gsnedf.release_lock); | ||
| 667 | if (is_queued(t)) { | ||
| 668 | TRACE_TASK(t, "%s: is queued\n", | ||
| 669 | __FUNCTION__); | ||
| 670 | /* We need to update the position of holder in some | ||
| 671 | * heap. Note that this could be a release heap if we | ||
| 672 | * budget enforcement is used and this job overran. */ | ||
| 673 | check_preempt = | ||
| 674 | !bheap_decrease(edf_ready_order, | ||
| 675 | tsk_rt(t)->heap_node); | ||
| 676 | } else { | ||
| 677 | /* Nothing to do: if it is not queued and not linked | ||
| 678 | * then it is either sleeping or currently being moved | ||
| 679 | * by other code (e.g., a timer interrupt handler) that | ||
| 680 | * will use the correct priority when enqueuing the | ||
| 681 | * task. */ | ||
| 682 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
| 683 | __FUNCTION__); | ||
| 684 | } | ||
| 685 | raw_spin_unlock(&gsnedf.release_lock); | ||
| 686 | |||
| 687 | /* If holder was enqueued in a release heap, then the following | ||
| 688 | * preemption check is pointless, but we can't easily detect | ||
| 689 | * that case. If you want to fix this, then consider that | ||
| 690 | * simply adding a state flag requires O(n) time to update when | ||
| 691 | * releasing n tasks, which conflicts with the goal to have | ||
| 692 | * O(log n) merges. */ | ||
| 693 | if (check_preempt) { | ||
| 694 | /* heap_decrease() hit the top level of the heap: make | ||
| 695 | * sure preemption checks get the right task, not the | ||
| 696 | * potentially stale cache. */ | ||
| 697 | bheap_uncache_min(edf_ready_order, | ||
| 698 | &gsnedf.ready_queue); | ||
| 699 | check_for_preemptions(); | ||
| 700 | } | ||
| 701 | } | ||
| 702 | |||
| 703 | raw_spin_unlock(&gsnedf_lock); | ||
| 704 | } | ||
| 705 | |||
| 706 | /* called with IRQs off */ | ||
| 707 | static void clear_priority_inheritance(struct task_struct* t) | ||
| 708 | { | ||
| 709 | raw_spin_lock(&gsnedf_lock); | ||
| 710 | |||
| 711 | /* A job only stops inheriting a priority when it releases a | ||
| 712 | * resource. Thus we can make the following assumption.*/ | ||
| 713 | BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); | ||
| 714 | |||
| 715 | TRACE_TASK(t, "priority restored\n"); | ||
| 716 | tsk_rt(t)->inh_task = NULL; | ||
| 717 | |||
| 718 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
| 719 | * since the priority was effectively lowered. */ | ||
| 720 | unlink(t); | ||
| 721 | gsnedf_job_arrival(t); | ||
| 722 | |||
| 723 | raw_spin_unlock(&gsnedf_lock); | ||
| 724 | } | ||
| 725 | |||
| 726 | |||
| 727 | /* ******************** FMLP support ********************** */ | ||
| 728 | |||
| 729 | /* struct for semaphore with priority inheritance */ | ||
| 730 | struct fmlp_semaphore { | ||
| 731 | struct litmus_lock litmus_lock; | ||
| 732 | |||
| 733 | /* current resource holder */ | ||
| 734 | struct task_struct *owner; | ||
| 735 | |||
| 736 | /* highest-priority waiter */ | ||
| 737 | struct task_struct *hp_waiter; | ||
| 738 | |||
| 739 | /* FIFO queue of waiting tasks */ | ||
| 740 | wait_queue_head_t wait; | ||
| 741 | }; | ||
| 742 | |||
| 743 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
| 744 | { | ||
| 745 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
| 746 | } | ||
| 747 | |||
| 748 | /* caller is responsible for locking */ | ||
| 749 | struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem, | ||
| 750 | struct task_struct* skip) | ||
| 751 | { | ||
| 752 | struct list_head *pos; | ||
| 753 | struct task_struct *queued, *found = NULL; | ||
| 754 | |||
| 755 | list_for_each(pos, &sem->wait.task_list) { | ||
| 756 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
| 757 | task_list)->private; | ||
| 758 | |||
| 759 | /* Compare task prios, find high prio task. */ | ||
| 760 | if (queued != skip && edf_higher_prio(queued, found)) | ||
| 761 | found = queued; | ||
| 762 | } | ||
| 763 | return found; | ||
| 764 | } | ||
| 765 | |||
| 766 | int gsnedf_fmlp_lock(struct litmus_lock* l) | ||
| 767 | { | ||
| 768 | struct task_struct* t = current; | ||
| 769 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 770 | wait_queue_t wait; | ||
| 771 | unsigned long flags; | ||
| 772 | |||
| 773 | if (!is_realtime(t)) | ||
| 774 | return -EPERM; | ||
| 775 | |||
| 776 | /* prevent nested lock acquisition --- not supported by FMLP */ | ||
| 777 | if (tsk_rt(t)->num_locks_held) | ||
| 778 | return -EBUSY; | ||
| 779 | |||
| 780 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 781 | |||
| 782 | if (sem->owner) { | ||
| 783 | /* resource is not free => must suspend and wait */ | ||
| 784 | |||
| 785 | init_waitqueue_entry(&wait, t); | ||
| 786 | |||
| 787 | /* FIXME: interruptible would be nice some day */ | ||
| 788 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
| 789 | |||
| 790 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
| 791 | |||
| 792 | /* check if we need to activate priority inheritance */ | ||
| 793 | if (edf_higher_prio(t, sem->hp_waiter)) { | ||
| 794 | sem->hp_waiter = t; | ||
| 795 | if (edf_higher_prio(t, sem->owner)) | ||
| 796 | set_priority_inheritance(sem->owner, sem->hp_waiter); | ||
| 797 | } | ||
| 798 | |||
| 799 | TS_LOCK_SUSPEND; | ||
| 800 | |||
| 801 | /* release lock before sleeping */ | ||
| 802 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 803 | |||
| 804 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
| 805 | * when we wake up; we are guaranteed to have the lock since | ||
| 806 | * there is only one wake up per release. | ||
| 807 | */ | ||
| 808 | |||
| 809 | schedule(); | ||
| 810 | |||
| 811 | TS_LOCK_RESUME; | ||
| 812 | |||
| 813 | /* Since we hold the lock, no other task will change | ||
| 814 | * ->owner. We can thus check it without acquiring the spin | ||
| 815 | * lock. */ | ||
| 816 | BUG_ON(sem->owner != t); | ||
| 817 | } else { | ||
| 818 | /* it's ours now */ | ||
| 819 | sem->owner = t; | ||
| 820 | |||
| 821 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 822 | } | ||
| 823 | |||
| 824 | tsk_rt(t)->num_locks_held++; | ||
| 825 | |||
| 826 | return 0; | ||
| 827 | } | ||
| 828 | |||
| 829 | int gsnedf_fmlp_unlock(struct litmus_lock* l) | ||
| 830 | { | ||
| 831 | struct task_struct *t = current, *next; | ||
| 832 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 833 | unsigned long flags; | ||
| 834 | int err = 0; | ||
| 835 | |||
| 836 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 837 | |||
| 838 | if (sem->owner != t) { | ||
| 839 | err = -EINVAL; | ||
| 840 | goto out; | ||
| 841 | } | ||
| 842 | |||
| 843 | tsk_rt(t)->num_locks_held--; | ||
| 844 | |||
| 845 | /* check if there are jobs waiting for this resource */ | ||
| 846 | next = __waitqueue_remove_first(&sem->wait); | ||
| 847 | if (next) { | ||
| 848 | /* next becomes the resouce holder */ | ||
| 849 | sem->owner = next; | ||
| 850 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
| 851 | |||
| 852 | /* determine new hp_waiter if necessary */ | ||
| 853 | if (next == sem->hp_waiter) { | ||
| 854 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
| 855 | /* next has the highest priority --- it doesn't need to | ||
| 856 | * inherit. However, we need to make sure that the | ||
| 857 | * next-highest priority in the queue is reflected in | ||
| 858 | * hp_waiter. */ | ||
| 859 | sem->hp_waiter = find_hp_waiter(sem, next); | ||
| 860 | if (sem->hp_waiter) | ||
| 861 | TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n"); | ||
| 862 | else | ||
| 863 | TRACE("no further waiters\n"); | ||
| 864 | } else { | ||
| 865 | /* Well, if next is not the highest-priority waiter, | ||
| 866 | * then it ought to inherit the highest-priority | ||
| 867 | * waiter's priority. */ | ||
| 868 | set_priority_inheritance(next, sem->hp_waiter); | ||
| 869 | } | ||
| 870 | |||
| 871 | /* wake up next */ | ||
| 872 | wake_up_process(next); | ||
| 873 | } else | ||
| 874 | /* becomes available */ | ||
| 875 | sem->owner = NULL; | ||
| 876 | |||
| 877 | /* we lose the benefit of priority inheritance (if any) */ | ||
| 878 | if (tsk_rt(t)->inh_task) | ||
| 879 | clear_priority_inheritance(t); | ||
| 880 | |||
| 881 | out: | ||
| 882 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 883 | |||
| 884 | return err; | ||
| 885 | } | ||
| 886 | |||
| 887 | int gsnedf_fmlp_close(struct litmus_lock* l) | ||
| 888 | { | ||
| 889 | struct task_struct *t = current; | ||
| 890 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 891 | unsigned long flags; | ||
| 892 | |||
| 893 | int owner; | ||
| 894 | |||
| 895 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 896 | |||
| 897 | owner = sem->owner == t; | ||
| 898 | |||
| 899 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 900 | |||
| 901 | if (owner) | ||
| 902 | gsnedf_fmlp_unlock(l); | ||
| 903 | |||
| 904 | return 0; | ||
| 905 | } | ||
| 906 | |||
| 907 | void gsnedf_fmlp_free(struct litmus_lock* lock) | ||
| 908 | { | ||
| 909 | kfree(fmlp_from_lock(lock)); | ||
| 910 | } | ||
| 911 | |||
| 912 | static struct litmus_lock_ops gsnedf_fmlp_lock_ops = { | ||
| 913 | .close = gsnedf_fmlp_close, | ||
| 914 | .lock = gsnedf_fmlp_lock, | ||
| 915 | .unlock = gsnedf_fmlp_unlock, | ||
| 916 | .deallocate = gsnedf_fmlp_free, | ||
| 917 | }; | ||
| 918 | |||
| 919 | static struct litmus_lock* gsnedf_new_fmlp(void) | ||
| 920 | { | ||
| 921 | struct fmlp_semaphore* sem; | ||
| 922 | |||
| 923 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 924 | if (!sem) | ||
| 925 | return NULL; | ||
| 926 | |||
| 927 | sem->owner = NULL; | ||
| 928 | sem->hp_waiter = NULL; | ||
| 929 | init_waitqueue_head(&sem->wait); | ||
| 930 | sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops; | ||
| 931 | |||
| 932 | return &sem->litmus_lock; | ||
| 933 | } | ||
| 934 | |||
| 935 | /* **** lock constructor **** */ | ||
| 936 | |||
| 937 | |||
| 938 | static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, | ||
| 939 | void* __user unused) | ||
| 940 | { | ||
| 941 | int err = -ENXIO; | ||
| 942 | |||
| 943 | /* GSN-EDF currently only supports the FMLP for global resources. */ | ||
| 944 | switch (type) { | ||
| 945 | |||
| 946 | case FMLP_SEM: | ||
| 947 | /* Flexible Multiprocessor Locking Protocol */ | ||
| 948 | *lock = gsnedf_new_fmlp(); | ||
| 949 | if (*lock) | ||
| 950 | err = 0; | ||
| 951 | else | ||
| 952 | err = -ENOMEM; | ||
| 953 | break; | ||
| 954 | |||
| 955 | }; | ||
| 956 | |||
| 957 | return err; | ||
| 958 | } | ||
| 959 | |||
| 960 | #endif | ||
| 961 | |||
| 962 | |||
| 963 | static long gsnedf_activate_plugin(void) | ||
| 964 | { | ||
| 965 | int cpu; | ||
| 966 | cpu_entry_t *entry; | ||
| 967 | |||
| 968 | bheap_init(&gsnedf_cpu_heap); | ||
| 969 | #ifdef CONFIG_RELEASE_MASTER | ||
| 970 | gsnedf.release_master = atomic_read(&release_master_cpu); | ||
| 971 | #endif | ||
| 972 | |||
| 973 | for_each_online_cpu(cpu) { | ||
| 974 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
| 975 | bheap_node_init(&entry->hn, entry); | ||
| 976 | entry->linked = NULL; | ||
| 977 | entry->scheduled = NULL; | ||
| 978 | #ifdef CONFIG_RELEASE_MASTER | ||
| 979 | if (cpu != gsnedf.release_master) { | ||
| 980 | #endif | ||
| 981 | TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu); | ||
| 982 | update_cpu_position(entry); | ||
| 983 | #ifdef CONFIG_RELEASE_MASTER | ||
| 984 | } else { | ||
| 985 | TRACE("GSN-EDF: CPU %d is release master.\n", cpu); | ||
| 986 | } | ||
| 987 | #endif | ||
| 988 | } | ||
| 989 | return 0; | ||
| 990 | } | ||
| 991 | |||
| 992 | /* Plugin object */ | ||
| 993 | static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { | ||
| 994 | .plugin_name = "GSN-EDF", | ||
| 995 | .finish_switch = gsnedf_finish_switch, | ||
| 996 | .tick = gsnedf_tick, | ||
| 997 | .task_new = gsnedf_task_new, | ||
| 998 | .complete_job = complete_job, | ||
| 999 | .task_exit = gsnedf_task_exit, | ||
| 1000 | .schedule = gsnedf_schedule, | ||
| 1001 | .task_wake_up = gsnedf_task_wake_up, | ||
| 1002 | .task_block = gsnedf_task_block, | ||
| 1003 | .admit_task = gsnedf_admit_task, | ||
| 1004 | .activate_plugin = gsnedf_activate_plugin, | ||
| 1005 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 1006 | .allocate_lock = gsnedf_allocate_lock, | ||
| 1007 | #endif | ||
| 1008 | }; | ||
| 1009 | |||
| 1010 | |||
| 1011 | static int __init init_gsn_edf(void) | ||
| 1012 | { | ||
| 1013 | int cpu; | ||
| 1014 | cpu_entry_t *entry; | ||
| 1015 | |||
| 1016 | bheap_init(&gsnedf_cpu_heap); | ||
| 1017 | /* initialize CPU state */ | ||
| 1018 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
| 1019 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
| 1020 | gsnedf_cpus[cpu] = entry; | ||
| 1021 | entry->cpu = cpu; | ||
| 1022 | entry->hn = &gsnedf_heap_node[cpu]; | ||
| 1023 | bheap_node_init(&entry->hn, entry); | ||
| 1024 | } | ||
| 1025 | edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); | ||
| 1026 | return register_sched_plugin(&gsn_edf_plugin); | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | |||
| 1030 | module_init(init_gsn_edf); | ||
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c new file mode 100644 index 00000000000..6b32cf09abb --- /dev/null +++ b/litmus/sched_litmus.c | |||
| @@ -0,0 +1,330 @@ | |||
| 1 | /* This file is included from kernel/sched.c */ | ||
| 2 | |||
| 3 | #include <litmus/litmus.h> | ||
| 4 | #include <litmus/budget.h> | ||
| 5 | #include <litmus/sched_plugin.h> | ||
| 6 | #include <litmus/preempt.h> | ||
| 7 | |||
| 8 | static void update_time_litmus(struct rq *rq, struct task_struct *p) | ||
| 9 | { | ||
| 10 | u64 delta = rq->clock - p->se.exec_start; | ||
| 11 | if (unlikely((s64)delta < 0)) | ||
| 12 | delta = 0; | ||
| 13 | /* per job counter */ | ||
| 14 | p->rt_param.job_params.exec_time += delta; | ||
| 15 | /* task counter */ | ||
| 16 | p->se.sum_exec_runtime += delta; | ||
| 17 | /* sched_clock() */ | ||
| 18 | p->se.exec_start = rq->clock; | ||
| 19 | cpuacct_charge(p, delta); | ||
| 20 | } | ||
| 21 | |||
| 22 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
| 23 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2); | ||
| 24 | |||
| 25 | /* | ||
| 26 | * litmus_tick gets called by scheduler_tick() with HZ freq | ||
| 27 | * Interrupts are disabled | ||
| 28 | */ | ||
| 29 | static void litmus_tick(struct rq *rq, struct task_struct *p) | ||
| 30 | { | ||
| 31 | TS_PLUGIN_TICK_START; | ||
| 32 | |||
| 33 | if (is_realtime(p)) | ||
| 34 | update_time_litmus(rq, p); | ||
| 35 | |||
| 36 | /* plugin tick */ | ||
| 37 | litmus->tick(p); | ||
| 38 | |||
| 39 | TS_PLUGIN_TICK_END; | ||
| 40 | |||
| 41 | return; | ||
| 42 | } | ||
| 43 | |||
| 44 | static struct task_struct * | ||
| 45 | litmus_schedule(struct rq *rq, struct task_struct *prev) | ||
| 46 | { | ||
| 47 | struct rq* other_rq; | ||
| 48 | struct task_struct *next; | ||
| 49 | |||
| 50 | long was_running; | ||
| 51 | lt_t _maybe_deadlock = 0; | ||
| 52 | |||
| 53 | /* let the plugin schedule */ | ||
| 54 | next = litmus->schedule(prev); | ||
| 55 | |||
| 56 | sched_state_plugin_check(); | ||
| 57 | |||
| 58 | /* check if a global plugin pulled a task from a different RQ */ | ||
| 59 | if (next && task_rq(next) != rq) { | ||
| 60 | /* we need to migrate the task */ | ||
| 61 | other_rq = task_rq(next); | ||
| 62 | TRACE_TASK(next, "migrate from %d\n", other_rq->cpu); | ||
| 63 | |||
| 64 | /* while we drop the lock, the prev task could change its | ||
| 65 | * state | ||
| 66 | */ | ||
| 67 | was_running = is_running(prev); | ||
| 68 | mb(); | ||
| 69 | raw_spin_unlock(&rq->lock); | ||
| 70 | |||
| 71 | /* Don't race with a concurrent switch. This could deadlock in | ||
| 72 | * the case of cross or circular migrations. It's the job of | ||
| 73 | * the plugin to make sure that doesn't happen. | ||
| 74 | */ | ||
| 75 | TRACE_TASK(next, "stack_in_use=%d\n", | ||
| 76 | next->rt_param.stack_in_use); | ||
| 77 | if (next->rt_param.stack_in_use != NO_CPU) { | ||
| 78 | TRACE_TASK(next, "waiting to deschedule\n"); | ||
| 79 | _maybe_deadlock = litmus_clock(); | ||
| 80 | } | ||
| 81 | while (next->rt_param.stack_in_use != NO_CPU) { | ||
| 82 | cpu_relax(); | ||
| 83 | mb(); | ||
| 84 | if (next->rt_param.stack_in_use == NO_CPU) | ||
| 85 | TRACE_TASK(next,"descheduled. Proceeding.\n"); | ||
| 86 | |||
| 87 | if (lt_before(_maybe_deadlock + 10000000, | ||
| 88 | litmus_clock())) { | ||
| 89 | /* We've been spinning for 10ms. | ||
| 90 | * Something can't be right! | ||
| 91 | * Let's abandon the task and bail out; at least | ||
| 92 | * we will have debug info instead of a hard | ||
| 93 | * deadlock. | ||
| 94 | */ | ||
| 95 | TRACE_TASK(next,"stack too long in use. " | ||
| 96 | "Deadlock?\n"); | ||
| 97 | next = NULL; | ||
| 98 | |||
| 99 | /* bail out */ | ||
| 100 | raw_spin_lock(&rq->lock); | ||
| 101 | return next; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW | ||
| 105 | if (next->on_cpu) | ||
| 106 | TRACE_TASK(next, "waiting for !oncpu"); | ||
| 107 | while (next->on_cpu) { | ||
| 108 | cpu_relax(); | ||
| 109 | mb(); | ||
| 110 | } | ||
| 111 | #endif | ||
| 112 | double_rq_lock(rq, other_rq); | ||
| 113 | mb(); | ||
| 114 | if (is_realtime(prev) && is_running(prev) != was_running) { | ||
| 115 | TRACE_TASK(prev, | ||
| 116 | "state changed while we dropped" | ||
| 117 | " the lock: is_running=%d, was_running=%d\n", | ||
| 118 | is_running(prev), was_running); | ||
| 119 | if (is_running(prev) && !was_running) { | ||
| 120 | /* prev task became unblocked | ||
| 121 | * we need to simulate normal sequence of events | ||
| 122 | * to scheduler plugins. | ||
| 123 | */ | ||
| 124 | litmus->task_block(prev); | ||
| 125 | litmus->task_wake_up(prev); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | set_task_cpu(next, smp_processor_id()); | ||
| 130 | |||
| 131 | /* DEBUG: now that we have the lock we need to make sure a | ||
| 132 | * couple of things still hold: | ||
| 133 | * - it is still a real-time task | ||
| 134 | * - it is still runnable (could have been stopped) | ||
| 135 | * If either is violated, then the active plugin is | ||
| 136 | * doing something wrong. | ||
| 137 | */ | ||
| 138 | if (!is_realtime(next) || !is_running(next)) { | ||
| 139 | /* BAD BAD BAD */ | ||
| 140 | TRACE_TASK(next,"BAD: migration invariant FAILED: " | ||
| 141 | "rt=%d running=%d\n", | ||
| 142 | is_realtime(next), | ||
| 143 | is_running(next)); | ||
| 144 | /* drop the task */ | ||
| 145 | next = NULL; | ||
| 146 | } | ||
| 147 | /* release the other CPU's runqueue, but keep ours */ | ||
| 148 | raw_spin_unlock(&other_rq->lock); | ||
| 149 | } | ||
| 150 | if (next) { | ||
| 151 | next->rt_param.stack_in_use = rq->cpu; | ||
| 152 | next->se.exec_start = rq->clock; | ||
| 153 | } | ||
| 154 | |||
| 155 | update_enforcement_timer(next); | ||
| 156 | return next; | ||
| 157 | } | ||
| 158 | |||
| 159 | static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, | ||
| 160 | int flags) | ||
| 161 | { | ||
| 162 | if (flags & ENQUEUE_WAKEUP) { | ||
| 163 | sched_trace_task_resume(p); | ||
| 164 | tsk_rt(p)->present = 1; | ||
| 165 | /* LITMUS^RT plugins need to update the state | ||
| 166 | * _before_ making it available in global structures. | ||
| 167 | * Linux gets away with being lazy about the task state | ||
| 168 | * update. We can't do that, hence we update the task | ||
| 169 | * state already here. | ||
| 170 | * | ||
| 171 | * WARNING: this needs to be re-evaluated when porting | ||
| 172 | * to newer kernel versions. | ||
| 173 | */ | ||
| 174 | p->state = TASK_RUNNING; | ||
| 175 | litmus->task_wake_up(p); | ||
| 176 | |||
| 177 | rq->litmus.nr_running++; | ||
| 178 | } else | ||
| 179 | TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); | ||
| 180 | } | ||
| 181 | |||
| 182 | static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, | ||
| 183 | int flags) | ||
| 184 | { | ||
| 185 | if (flags & DEQUEUE_SLEEP) { | ||
| 186 | litmus->task_block(p); | ||
| 187 | tsk_rt(p)->present = 0; | ||
| 188 | sched_trace_task_block(p); | ||
| 189 | |||
| 190 | rq->litmus.nr_running--; | ||
| 191 | } else | ||
| 192 | TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n"); | ||
| 193 | } | ||
| 194 | |||
| 195 | static void yield_task_litmus(struct rq *rq) | ||
| 196 | { | ||
| 197 | TS_SYSCALL_IN_START; | ||
| 198 | TS_SYSCALL_IN_END; | ||
| 199 | |||
| 200 | BUG_ON(rq->curr != current); | ||
| 201 | /* sched_yield() is called to trigger delayed preemptions. | ||
| 202 | * Thus, mark the current task as needing to be rescheduled. | ||
| 203 | * This will cause the scheduler plugin to be invoked, which can | ||
| 204 | * then determine if a preemption is still required. | ||
| 205 | */ | ||
| 206 | clear_exit_np(current); | ||
| 207 | litmus_reschedule_local(); | ||
| 208 | |||
| 209 | TS_SYSCALL_OUT_START; | ||
| 210 | } | ||
| 211 | |||
| 212 | /* Plugins are responsible for this. | ||
| 213 | */ | ||
| 214 | static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags) | ||
| 215 | { | ||
| 216 | } | ||
| 217 | |||
| 218 | static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) | ||
| 219 | { | ||
| 220 | } | ||
| 221 | |||
| 222 | static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev) | ||
| 223 | { | ||
| 224 | update_time_litmus(rq, prev); | ||
| 225 | if (!is_running(prev)) | ||
| 226 | tsk_rt(prev)->present = 0; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* pick_next_task_litmus() - litmus_schedule() function | ||
| 230 | * | ||
| 231 | * return the next task to be scheduled | ||
| 232 | */ | ||
| 233 | static struct task_struct *pick_next_task_litmus(struct rq *rq) | ||
| 234 | { | ||
| 235 | /* get the to-be-switched-out task (prev) */ | ||
| 236 | struct task_struct *prev = rq->litmus.prev; | ||
| 237 | struct task_struct *next; | ||
| 238 | |||
| 239 | /* if not called from schedule() but from somewhere | ||
| 240 | * else (e.g., migration), return now! | ||
| 241 | */ | ||
| 242 | if(!rq->litmus.prev) | ||
| 243 | return NULL; | ||
| 244 | |||
| 245 | rq->litmus.prev = NULL; | ||
| 246 | |||
| 247 | TS_PLUGIN_SCHED_START; | ||
| 248 | next = litmus_schedule(rq, prev); | ||
| 249 | TS_PLUGIN_SCHED_END; | ||
| 250 | |||
| 251 | return next; | ||
| 252 | } | ||
| 253 | |||
| 254 | static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) | ||
| 255 | { | ||
| 256 | /* nothing to do; tick related tasks are done by litmus_tick() */ | ||
| 257 | return; | ||
| 258 | } | ||
| 259 | |||
| 260 | static void switched_to_litmus(struct rq *rq, struct task_struct *p) | ||
| 261 | { | ||
| 262 | } | ||
| 263 | |||
| 264 | static void prio_changed_litmus(struct rq *rq, struct task_struct *p, | ||
| 265 | int oldprio) | ||
| 266 | { | ||
| 267 | } | ||
| 268 | |||
| 269 | unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p) | ||
| 270 | { | ||
| 271 | /* return infinity */ | ||
| 272 | return 0; | ||
| 273 | } | ||
| 274 | |||
| 275 | /* This is called when a task became a real-time task, either due to a SCHED_* | ||
| 276 | * class transition or due to PI mutex inheritance. We don't handle Linux PI | ||
| 277 | * mutex inheritance yet (and probably never will). Use LITMUS provided | ||
| 278 | * synchronization primitives instead. | ||
| 279 | */ | ||
| 280 | static void set_curr_task_litmus(struct rq *rq) | ||
| 281 | { | ||
| 282 | rq->curr->se.exec_start = rq->clock; | ||
| 283 | } | ||
| 284 | |||
| 285 | |||
| 286 | #ifdef CONFIG_SMP | ||
| 287 | /* execve tries to rebalance task in this scheduling domain. | ||
| 288 | * We don't care about the scheduling domain; can gets called from | ||
| 289 | * exec, fork, wakeup. | ||
| 290 | */ | ||
| 291 | static int | ||
| 292 | select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags) | ||
| 293 | { | ||
| 294 | /* preemption is already disabled. | ||
| 295 | * We don't want to change cpu here | ||
| 296 | */ | ||
| 297 | return task_cpu(p); | ||
| 298 | } | ||
| 299 | #endif | ||
| 300 | |||
| 301 | static const struct sched_class litmus_sched_class = { | ||
| 302 | /* From 34f971f6 the stop/migrate worker threads have a class on | ||
| 303 | * their own, which is the highest prio class. We don't support | ||
| 304 | * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0 | ||
| 305 | * CPU capacity. | ||
| 306 | */ | ||
| 307 | .next = &stop_sched_class, | ||
| 308 | .enqueue_task = enqueue_task_litmus, | ||
| 309 | .dequeue_task = dequeue_task_litmus, | ||
| 310 | .yield_task = yield_task_litmus, | ||
| 311 | |||
| 312 | .check_preempt_curr = check_preempt_curr_litmus, | ||
| 313 | |||
| 314 | .pick_next_task = pick_next_task_litmus, | ||
| 315 | .put_prev_task = put_prev_task_litmus, | ||
| 316 | |||
| 317 | #ifdef CONFIG_SMP | ||
| 318 | .select_task_rq = select_task_rq_litmus, | ||
| 319 | |||
| 320 | .pre_schedule = pre_schedule_litmus, | ||
| 321 | #endif | ||
| 322 | |||
| 323 | .set_curr_task = set_curr_task_litmus, | ||
| 324 | .task_tick = task_tick_litmus, | ||
| 325 | |||
| 326 | .get_rr_interval = get_rr_interval_litmus, | ||
| 327 | |||
| 328 | .prio_changed = prio_changed_litmus, | ||
| 329 | .switched_to = switched_to_litmus, | ||
| 330 | }; | ||
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c new file mode 100644 index 00000000000..d5fb3a832ad --- /dev/null +++ b/litmus/sched_pfair.c | |||
| @@ -0,0 +1,1074 @@ | |||
| 1 | /* | ||
| 2 | * kernel/sched_pfair.c | ||
| 3 | * | ||
| 4 | * Implementation of the PD^2 pfair scheduling algorithm. This | ||
| 5 | * implementation realizes "early releasing," i.e., it is work-conserving. | ||
| 6 | * | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <asm/div64.h> | ||
| 10 | #include <linux/delay.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/percpu.h> | ||
| 14 | #include <linux/sched.h> | ||
| 15 | #include <linux/list.h> | ||
| 16 | #include <linux/slab.h> | ||
| 17 | |||
| 18 | #include <litmus/litmus.h> | ||
| 19 | #include <litmus/jobs.h> | ||
| 20 | #include <litmus/preempt.h> | ||
| 21 | #include <litmus/rt_domain.h> | ||
| 22 | #include <litmus/sched_plugin.h> | ||
| 23 | #include <litmus/sched_trace.h> | ||
| 24 | |||
| 25 | #include <litmus/bheap.h> | ||
| 26 | |||
| 27 | /* to configure the cluster size */ | ||
| 28 | #include <litmus/litmus_proc.h> | ||
| 29 | |||
| 30 | #include <litmus/clustered.h> | ||
| 31 | |||
| 32 | static enum cache_level pfair_cluster_level = GLOBAL_CLUSTER; | ||
| 33 | |||
| 34 | struct subtask { | ||
| 35 | /* measured in quanta relative to job release */ | ||
| 36 | quanta_t release; | ||
| 37 | quanta_t deadline; | ||
| 38 | quanta_t overlap; /* called "b bit" by PD^2 */ | ||
| 39 | quanta_t group_deadline; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct pfair_param { | ||
| 43 | quanta_t quanta; /* number of subtasks */ | ||
| 44 | quanta_t cur; /* index of current subtask */ | ||
| 45 | |||
| 46 | quanta_t release; /* in quanta */ | ||
| 47 | quanta_t period; /* in quanta */ | ||
| 48 | |||
| 49 | quanta_t last_quantum; /* when scheduled last */ | ||
| 50 | int last_cpu; /* where scheduled last */ | ||
| 51 | |||
| 52 | struct pfair_cluster* cluster; /* where this task is scheduled */ | ||
| 53 | |||
| 54 | struct subtask subtasks[0]; /* allocate together with pfair_param */ | ||
| 55 | }; | ||
| 56 | |||
| 57 | #define tsk_pfair(tsk) ((tsk)->rt_param.pfair) | ||
| 58 | |||
| 59 | struct pfair_state { | ||
| 60 | struct cluster_cpu topology; | ||
| 61 | |||
| 62 | volatile quanta_t cur_tick; /* updated by the CPU that is advancing | ||
| 63 | * the time */ | ||
| 64 | volatile quanta_t local_tick; /* What tick is the local CPU currently | ||
| 65 | * executing? Updated only by the local | ||
| 66 | * CPU. In QEMU, this may lag behind the | ||
| 67 | * current tick. In a real system, with | ||
| 68 | * proper timers and aligned quanta, | ||
| 69 | * that should only be the case for a | ||
| 70 | * very short time after the time | ||
| 71 | * advanced. With staggered quanta, it | ||
| 72 | * will lag for the duration of the | ||
| 73 | * offset. | ||
| 74 | */ | ||
| 75 | |||
| 76 | struct task_struct* linked; /* the task that should be executing */ | ||
| 77 | struct task_struct* local; /* the local copy of linked */ | ||
| 78 | struct task_struct* scheduled; /* what is actually scheduled */ | ||
| 79 | |||
| 80 | lt_t offset; /* stagger offset */ | ||
| 81 | unsigned int missed_updates; | ||
| 82 | unsigned int missed_quanta; | ||
| 83 | }; | ||
| 84 | |||
| 85 | struct pfair_cluster { | ||
| 86 | struct scheduling_cluster topology; | ||
| 87 | |||
| 88 | /* The "global" time in this cluster. */ | ||
| 89 | quanta_t pfair_time; /* the "official" PFAIR clock */ | ||
| 90 | |||
| 91 | /* The ready queue for this cluster. */ | ||
| 92 | rt_domain_t pfair; | ||
| 93 | |||
| 94 | /* The set of jobs that should have their release enacted at the next | ||
| 95 | * quantum boundary. | ||
| 96 | */ | ||
| 97 | struct bheap release_queue; | ||
| 98 | raw_spinlock_t release_lock; | ||
| 99 | }; | ||
| 100 | |||
| 101 | #define RT_F_REQUEUE 0x2 | ||
| 102 | |||
| 103 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) | ||
| 104 | { | ||
| 105 | return container_of(state->topology.cluster, struct pfair_cluster, topology); | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline int cpu_id(struct pfair_state* state) | ||
| 109 | { | ||
| 110 | return state->topology.id; | ||
| 111 | } | ||
| 112 | |||
| 113 | static inline struct pfair_state* from_cluster_list(struct list_head* pos) | ||
| 114 | { | ||
| 115 | return list_entry(pos, struct pfair_state, topology.cluster_list); | ||
| 116 | } | ||
| 117 | |||
| 118 | static inline struct pfair_cluster* from_domain(rt_domain_t* rt) | ||
| 119 | { | ||
| 120 | return container_of(rt, struct pfair_cluster, pfair); | ||
| 121 | } | ||
| 122 | |||
| 123 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) | ||
| 124 | { | ||
| 125 | /* The ready_lock is used to serialize all scheduling events. */ | ||
| 126 | return &cluster->pfair.ready_lock; | ||
| 127 | } | ||
| 128 | |||
| 129 | static inline raw_spinlock_t* cpu_lock(struct pfair_state* state) | ||
| 130 | { | ||
| 131 | return cluster_lock(cpu_cluster(state)); | ||
| 132 | } | ||
| 133 | |||
| 134 | DEFINE_PER_CPU(struct pfair_state, pfair_state); | ||
| 135 | struct pfair_state* *pstate; /* short cut */ | ||
| 136 | |||
| 137 | static struct pfair_cluster* pfair_clusters; | ||
| 138 | static int num_pfair_clusters; | ||
| 139 | |||
| 140 | /* Enable for lots of trace info. | ||
| 141 | * #define PFAIR_DEBUG | ||
| 142 | */ | ||
| 143 | |||
| 144 | #ifdef PFAIR_DEBUG | ||
| 145 | #define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args) | ||
| 146 | #define PTRACE(f, args...) TRACE(f, ## args) | ||
| 147 | #else | ||
| 148 | #define PTRACE_TASK(t, f, args...) | ||
| 149 | #define PTRACE(f, args...) | ||
| 150 | #endif | ||
| 151 | |||
| 152 | /* gcc will inline all of these accessor functions... */ | ||
| 153 | static struct subtask* cur_subtask(struct task_struct* t) | ||
| 154 | { | ||
| 155 | return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur; | ||
| 156 | } | ||
| 157 | |||
| 158 | static quanta_t cur_deadline(struct task_struct* t) | ||
| 159 | { | ||
| 160 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; | ||
| 161 | } | ||
| 162 | |||
| 163 | static quanta_t cur_release(struct task_struct* t) | ||
| 164 | { | ||
| 165 | /* This is early releasing: only the release of the first subtask | ||
| 166 | * counts. */ | ||
| 167 | return tsk_pfair(t)->release; | ||
| 168 | } | ||
| 169 | |||
| 170 | static quanta_t cur_overlap(struct task_struct* t) | ||
| 171 | { | ||
| 172 | return cur_subtask(t)->overlap; | ||
| 173 | } | ||
| 174 | |||
| 175 | static quanta_t cur_group_deadline(struct task_struct* t) | ||
| 176 | { | ||
| 177 | quanta_t gdl = cur_subtask(t)->group_deadline; | ||
| 178 | if (gdl) | ||
| 179 | return gdl + tsk_pfair(t)->release; | ||
| 180 | else | ||
| 181 | return gdl; | ||
| 182 | } | ||
| 183 | |||
| 184 | |||
| 185 | static int pfair_higher_prio(struct task_struct* first, | ||
| 186 | struct task_struct* second) | ||
| 187 | { | ||
| 188 | return /* first task must exist */ | ||
| 189 | first && ( | ||
| 190 | /* Does the second task exist and is it a real-time task? If | ||
| 191 | * not, the first task (which is a RT task) has higher | ||
| 192 | * priority. | ||
| 193 | */ | ||
| 194 | !second || !is_realtime(second) || | ||
| 195 | |||
| 196 | /* Is the (subtask) deadline of the first task earlier? | ||
| 197 | * Then it has higher priority. | ||
| 198 | */ | ||
| 199 | time_before(cur_deadline(first), cur_deadline(second)) || | ||
| 200 | |||
| 201 | /* Do we have a deadline tie? | ||
| 202 | * Then break by B-bit. | ||
| 203 | */ | ||
| 204 | (cur_deadline(first) == cur_deadline(second) && | ||
| 205 | (cur_overlap(first) > cur_overlap(second) || | ||
| 206 | |||
| 207 | /* Do we have a B-bit tie? | ||
| 208 | * Then break by group deadline. | ||
| 209 | */ | ||
| 210 | (cur_overlap(first) == cur_overlap(second) && | ||
| 211 | (time_after(cur_group_deadline(first), | ||
| 212 | cur_group_deadline(second)) || | ||
| 213 | |||
| 214 | /* Do we have a group deadline tie? | ||
| 215 | * Then break by PID, which are unique. | ||
| 216 | */ | ||
| 217 | (cur_group_deadline(first) == | ||
| 218 | cur_group_deadline(second) && | ||
| 219 | first->pid < second->pid)))))); | ||
| 220 | } | ||
| 221 | |||
| 222 | int pfair_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
| 223 | { | ||
| 224 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); | ||
| 225 | } | ||
| 226 | |||
| 227 | static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
| 228 | { | ||
| 229 | struct pfair_cluster* cluster = from_domain(rt); | ||
| 230 | unsigned long flags; | ||
| 231 | |||
| 232 | raw_spin_lock_irqsave(&cluster->release_lock, flags); | ||
| 233 | |||
| 234 | bheap_union(pfair_ready_order, &cluster->release_queue, tasks); | ||
| 235 | |||
| 236 | raw_spin_unlock_irqrestore(&cluster->release_lock, flags); | ||
| 237 | } | ||
| 238 | |||
| 239 | static void prepare_release(struct task_struct* t, quanta_t at) | ||
| 240 | { | ||
| 241 | tsk_pfair(t)->release = at; | ||
| 242 | tsk_pfair(t)->cur = 0; | ||
| 243 | } | ||
| 244 | |||
| 245 | /* pull released tasks from the release queue */ | ||
| 246 | static void poll_releases(struct pfair_cluster* cluster) | ||
| 247 | { | ||
| 248 | raw_spin_lock(&cluster->release_lock); | ||
| 249 | __merge_ready(&cluster->pfair, &cluster->release_queue); | ||
| 250 | raw_spin_unlock(&cluster->release_lock); | ||
| 251 | } | ||
| 252 | |||
| 253 | static void check_preempt(struct task_struct* t) | ||
| 254 | { | ||
| 255 | int cpu = NO_CPU; | ||
| 256 | if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on && | ||
| 257 | is_present(t)) { | ||
| 258 | /* the task can be scheduled and | ||
| 259 | * is not scheduled where it ought to be scheduled | ||
| 260 | */ | ||
| 261 | cpu = tsk_rt(t)->linked_on != NO_CPU ? | ||
| 262 | tsk_rt(t)->linked_on : | ||
| 263 | tsk_rt(t)->scheduled_on; | ||
| 264 | PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n", | ||
| 265 | tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on); | ||
| 266 | /* preempt */ | ||
| 267 | litmus_reschedule(cpu); | ||
| 268 | } | ||
| 269 | } | ||
| 270 | |||
| 271 | /* caller must hold pfair.ready_lock */ | ||
| 272 | static void drop_all_references(struct task_struct *t) | ||
| 273 | { | ||
| 274 | int cpu; | ||
| 275 | struct pfair_state* s; | ||
| 276 | struct pfair_cluster* cluster; | ||
| 277 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { | ||
| 278 | /* It must be in the ready queue; drop references isn't called | ||
| 279 | * when the job is in a release queue. */ | ||
| 280 | cluster = tsk_pfair(t)->cluster; | ||
| 281 | bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue, | ||
| 282 | tsk_rt(t)->heap_node); | ||
| 283 | } | ||
| 284 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
| 285 | s = &per_cpu(pfair_state, cpu); | ||
| 286 | if (s->linked == t) | ||
| 287 | s->linked = NULL; | ||
| 288 | if (s->local == t) | ||
| 289 | s->local = NULL; | ||
| 290 | if (s->scheduled == t) | ||
| 291 | s->scheduled = NULL; | ||
| 292 | } | ||
| 293 | /* make sure we don't have a stale linked_on field */ | ||
| 294 | tsk_rt(t)->linked_on = NO_CPU; | ||
| 295 | } | ||
| 296 | |||
| 297 | static void pfair_prepare_next_period(struct task_struct* t) | ||
| 298 | { | ||
| 299 | struct pfair_param* p = tsk_pfair(t); | ||
| 300 | |||
| 301 | prepare_for_next_period(t); | ||
| 302 | tsk_rt(t)->completed = 0; | ||
| 303 | p->release += p->period; | ||
| 304 | } | ||
| 305 | |||
| 306 | /* returns 1 if the task needs to go the release queue */ | ||
| 307 | static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) | ||
| 308 | { | ||
| 309 | struct pfair_param* p = tsk_pfair(t); | ||
| 310 | int to_relq; | ||
| 311 | p->cur = (p->cur + 1) % p->quanta; | ||
| 312 | if (!p->cur) { | ||
| 313 | if (is_present(t)) { | ||
| 314 | /* The job overran; we start a new budget allocation. */ | ||
| 315 | pfair_prepare_next_period(t); | ||
| 316 | } else { | ||
| 317 | /* remove task from system until it wakes */ | ||
| 318 | drop_all_references(t); | ||
| 319 | tsk_rt(t)->flags = RT_F_REQUEUE; | ||
| 320 | TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", | ||
| 321 | cpu, p->cur); | ||
| 322 | return 0; | ||
| 323 | } | ||
| 324 | } | ||
| 325 | to_relq = time_after(cur_release(t), time); | ||
| 326 | TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n", | ||
| 327 | cpu, p->cur, to_relq, cur_release(t), time); | ||
| 328 | return to_relq; | ||
| 329 | } | ||
| 330 | |||
| 331 | static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) | ||
| 332 | { | ||
| 333 | struct task_struct* l; | ||
| 334 | struct pfair_param* p; | ||
| 335 | struct list_head* pos; | ||
| 336 | struct pfair_state* cpu; | ||
| 337 | |||
| 338 | list_for_each(pos, &cluster->topology.cpus) { | ||
| 339 | cpu = from_cluster_list(pos); | ||
| 340 | l = cpu->linked; | ||
| 341 | cpu->missed_updates += cpu->linked != cpu->local; | ||
| 342 | if (l) { | ||
| 343 | p = tsk_pfair(l); | ||
| 344 | p->last_quantum = time; | ||
| 345 | p->last_cpu = cpu_id(cpu); | ||
| 346 | if (advance_subtask(time, l, cpu_id(cpu))) { | ||
| 347 | //cpu->linked = NULL; | ||
| 348 | PTRACE_TASK(l, "should go to release queue. " | ||
| 349 | "scheduled_on=%d present=%d\n", | ||
| 350 | tsk_rt(l)->scheduled_on, | ||
| 351 | tsk_rt(l)->present); | ||
| 352 | } | ||
| 353 | } | ||
| 354 | } | ||
| 355 | } | ||
| 356 | |||
| 357 | static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu) | ||
| 358 | { | ||
| 359 | int cpu; | ||
| 360 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
| 361 | /* always observe scheduled_on linkage */ | ||
| 362 | default_cpu = tsk_rt(t)->scheduled_on; | ||
| 363 | } else if (tsk_pfair(t)->last_quantum == time - 1) { | ||
| 364 | /* back2back quanta */ | ||
| 365 | /* Only observe last_quantum if no scheduled_on is in the way. | ||
| 366 | * This should only kick in if a CPU missed quanta, and that | ||
| 367 | * *should* only happen in QEMU. | ||
| 368 | */ | ||
| 369 | cpu = tsk_pfair(t)->last_cpu; | ||
| 370 | if (!pstate[cpu]->linked || | ||
| 371 | tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) { | ||
| 372 | default_cpu = cpu; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | return default_cpu; | ||
| 376 | } | ||
| 377 | |||
| 378 | /* returns one if linking was redirected */ | ||
| 379 | static int pfair_link(quanta_t time, int cpu, | ||
| 380 | struct task_struct* t) | ||
| 381 | { | ||
| 382 | int target = target_cpu(time, t, cpu); | ||
| 383 | struct task_struct* prev = pstate[cpu]->linked; | ||
| 384 | struct task_struct* other; | ||
| 385 | struct pfair_cluster* cluster = cpu_cluster(pstate[cpu]); | ||
| 386 | |||
| 387 | if (target != cpu) { | ||
| 388 | BUG_ON(pstate[target]->topology.cluster != pstate[cpu]->topology.cluster); | ||
| 389 | other = pstate[target]->linked; | ||
| 390 | pstate[target]->linked = t; | ||
| 391 | tsk_rt(t)->linked_on = target; | ||
| 392 | if (!other) | ||
| 393 | /* linked ok, but reschedule this CPU */ | ||
| 394 | return 1; | ||
| 395 | if (target < cpu) { | ||
| 396 | /* link other to cpu instead */ | ||
| 397 | tsk_rt(other)->linked_on = cpu; | ||
| 398 | pstate[cpu]->linked = other; | ||
| 399 | if (prev) { | ||
| 400 | /* prev got pushed back into the ready queue */ | ||
| 401 | tsk_rt(prev)->linked_on = NO_CPU; | ||
| 402 | __add_ready(&cluster->pfair, prev); | ||
| 403 | } | ||
| 404 | /* we are done with this cpu */ | ||
| 405 | return 0; | ||
| 406 | } else { | ||
| 407 | /* re-add other, it's original CPU was not considered yet */ | ||
| 408 | tsk_rt(other)->linked_on = NO_CPU; | ||
| 409 | __add_ready(&cluster->pfair, other); | ||
| 410 | /* reschedule this CPU */ | ||
| 411 | return 1; | ||
| 412 | } | ||
| 413 | } else { | ||
| 414 | pstate[cpu]->linked = t; | ||
| 415 | tsk_rt(t)->linked_on = cpu; | ||
| 416 | if (prev) { | ||
| 417 | /* prev got pushed back into the ready queue */ | ||
| 418 | tsk_rt(prev)->linked_on = NO_CPU; | ||
| 419 | __add_ready(&cluster->pfair, prev); | ||
| 420 | } | ||
| 421 | /* we are done with this CPU */ | ||
| 422 | return 0; | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time) | ||
| 427 | { | ||
| 428 | int retry; | ||
| 429 | struct list_head *pos; | ||
| 430 | struct pfair_state *cpu_state; | ||
| 431 | |||
| 432 | list_for_each(pos, &cluster->topology.cpus) { | ||
| 433 | cpu_state = from_cluster_list(pos); | ||
| 434 | retry = 1; | ||
| 435 | #ifdef CONFIG_RELEASE_MASTER | ||
| 436 | /* skip release master */ | ||
| 437 | if (cluster->pfair.release_master == cpu_id(cpu_state)) | ||
| 438 | continue; | ||
| 439 | #endif | ||
| 440 | while (retry) { | ||
| 441 | if (pfair_higher_prio(__peek_ready(&cluster->pfair), | ||
| 442 | cpu_state->linked)) | ||
| 443 | retry = pfair_link(time, cpu_id(cpu_state), | ||
| 444 | __take_ready(&cluster->pfair)); | ||
| 445 | else | ||
| 446 | retry = 0; | ||
| 447 | } | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time) | ||
| 452 | { | ||
| 453 | struct pfair_state *cpu; | ||
| 454 | struct list_head* pos; | ||
| 455 | |||
| 456 | /* called with interrupts disabled */ | ||
| 457 | PTRACE("--- Q %lu at %llu PRE-SPIN\n", | ||
| 458 | time, litmus_clock()); | ||
| 459 | raw_spin_lock(cluster_lock(cluster)); | ||
| 460 | PTRACE("<<< Q %lu at %llu\n", | ||
| 461 | time, litmus_clock()); | ||
| 462 | |||
| 463 | sched_trace_quantum_boundary(); | ||
| 464 | |||
| 465 | advance_subtasks(cluster, time); | ||
| 466 | poll_releases(cluster); | ||
| 467 | schedule_subtasks(cluster, time); | ||
| 468 | |||
| 469 | list_for_each(pos, &cluster->topology.cpus) { | ||
| 470 | cpu = from_cluster_list(pos); | ||
| 471 | if (cpu->linked) | ||
| 472 | PTRACE_TASK(cpu->linked, | ||
| 473 | " linked on %d.\n", cpu_id(cpu)); | ||
| 474 | else | ||
| 475 | PTRACE("(null) linked on %d.\n", cpu_id(cpu)); | ||
| 476 | } | ||
| 477 | /* We are done. Advance time. */ | ||
| 478 | mb(); | ||
| 479 | list_for_each(pos, &cluster->topology.cpus) { | ||
| 480 | cpu = from_cluster_list(pos); | ||
| 481 | if (cpu->local_tick != cpu->cur_tick) { | ||
| 482 | TRACE("BAD Quantum not acked on %d " | ||
| 483 | "(l:%lu c:%lu p:%lu)\n", | ||
| 484 | cpu_id(cpu), | ||
| 485 | cpu->local_tick, | ||
| 486 | cpu->cur_tick, | ||
| 487 | cluster->pfair_time); | ||
| 488 | cpu->missed_quanta++; | ||
| 489 | } | ||
| 490 | cpu->cur_tick = time; | ||
| 491 | } | ||
| 492 | PTRACE(">>> Q %lu at %llu\n", | ||
| 493 | time, litmus_clock()); | ||
| 494 | raw_spin_unlock(cluster_lock(cluster)); | ||
| 495 | } | ||
| 496 | |||
| 497 | static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state) | ||
| 498 | { | ||
| 499 | quanta_t loc; | ||
| 500 | |||
| 501 | goto first; /* skip mb() on first iteration */ | ||
| 502 | do { | ||
| 503 | cpu_relax(); | ||
| 504 | mb(); | ||
| 505 | first: loc = state->cur_tick; | ||
| 506 | /* FIXME: what if loc > cur? */ | ||
| 507 | } while (time_before(loc, q)); | ||
| 508 | PTRACE("observed cur_tick:%lu >= q:%lu\n", | ||
| 509 | loc, q); | ||
| 510 | } | ||
| 511 | |||
| 512 | static quanta_t current_quantum(struct pfair_state* state) | ||
| 513 | { | ||
| 514 | lt_t t = litmus_clock() - state->offset; | ||
| 515 | return time2quanta(t, FLOOR); | ||
| 516 | } | ||
| 517 | |||
| 518 | static void catchup_quanta(quanta_t from, quanta_t target, | ||
| 519 | struct pfair_state* state) | ||
| 520 | { | ||
| 521 | quanta_t cur = from, time; | ||
| 522 | TRACE("+++< BAD catching up quanta from %lu to %lu\n", | ||
| 523 | from, target); | ||
| 524 | while (time_before(cur, target)) { | ||
| 525 | wait_for_quantum(cur, state); | ||
| 526 | cur++; | ||
| 527 | time = cmpxchg(&cpu_cluster(state)->pfair_time, | ||
| 528 | cur - 1, /* expected */ | ||
| 529 | cur /* next */ | ||
| 530 | ); | ||
| 531 | if (time == cur - 1) | ||
| 532 | schedule_next_quantum(cpu_cluster(state), cur); | ||
| 533 | } | ||
| 534 | TRACE("+++> catching up done\n"); | ||
| 535 | } | ||
| 536 | |||
| 537 | /* pfair_tick - this function is called for every local timer | ||
| 538 | * interrupt. | ||
| 539 | */ | ||
| 540 | static void pfair_tick(struct task_struct* t) | ||
| 541 | { | ||
| 542 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
| 543 | quanta_t time, cur; | ||
| 544 | int retry = 10; | ||
| 545 | |||
| 546 | do { | ||
| 547 | cur = current_quantum(state); | ||
| 548 | PTRACE("q %lu at %llu\n", cur, litmus_clock()); | ||
| 549 | |||
| 550 | /* Attempt to advance time. First CPU to get here | ||
| 551 | * will prepare the next quantum. | ||
| 552 | */ | ||
| 553 | time = cmpxchg(&cpu_cluster(state)->pfair_time, | ||
| 554 | cur - 1, /* expected */ | ||
| 555 | cur /* next */ | ||
| 556 | ); | ||
| 557 | if (time == cur - 1) { | ||
| 558 | /* exchange succeeded */ | ||
| 559 | wait_for_quantum(cur - 1, state); | ||
| 560 | schedule_next_quantum(cpu_cluster(state), cur); | ||
| 561 | retry = 0; | ||
| 562 | } else if (time_before(time, cur - 1)) { | ||
| 563 | /* the whole system missed a tick !? */ | ||
| 564 | catchup_quanta(time, cur, state); | ||
| 565 | retry--; | ||
| 566 | } else if (time_after(time, cur)) { | ||
| 567 | /* our timer lagging behind!? */ | ||
| 568 | TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur); | ||
| 569 | retry--; | ||
| 570 | } else { | ||
| 571 | /* Some other CPU already started scheduling | ||
| 572 | * this quantum. Let it do its job and then update. | ||
| 573 | */ | ||
| 574 | retry = 0; | ||
| 575 | } | ||
| 576 | } while (retry); | ||
| 577 | |||
| 578 | /* Spin locally until time advances. */ | ||
| 579 | wait_for_quantum(cur, state); | ||
| 580 | |||
| 581 | /* copy assignment */ | ||
| 582 | /* FIXME: what if we race with a future update? Corrupted state? */ | ||
| 583 | state->local = state->linked; | ||
| 584 | /* signal that we are done */ | ||
| 585 | mb(); | ||
| 586 | state->local_tick = state->cur_tick; | ||
| 587 | |||
| 588 | if (state->local != current | ||
| 589 | && (is_realtime(current) || is_present(state->local))) | ||
| 590 | litmus_reschedule_local(); | ||
| 591 | } | ||
| 592 | |||
| 593 | static int safe_to_schedule(struct task_struct* t, int cpu) | ||
| 594 | { | ||
| 595 | int where = tsk_rt(t)->scheduled_on; | ||
| 596 | if (where != NO_CPU && where != cpu) { | ||
| 597 | TRACE_TASK(t, "BAD: can't be scheduled on %d, " | ||
| 598 | "scheduled already on %d.\n", cpu, where); | ||
| 599 | return 0; | ||
| 600 | } else | ||
| 601 | return is_present(t) && !is_completed(t); | ||
| 602 | } | ||
| 603 | |||
| 604 | static struct task_struct* pfair_schedule(struct task_struct * prev) | ||
| 605 | { | ||
| 606 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
| 607 | struct pfair_cluster* cluster = cpu_cluster(state); | ||
| 608 | int blocks, completion, out_of_time; | ||
| 609 | struct task_struct* next = NULL; | ||
| 610 | |||
| 611 | #ifdef CONFIG_RELEASE_MASTER | ||
| 612 | /* Bail out early if we are the release master. | ||
| 613 | * The release master never schedules any real-time tasks. | ||
| 614 | */ | ||
| 615 | if (unlikely(cluster->pfair.release_master == cpu_id(state))) { | ||
| 616 | sched_state_task_picked(); | ||
| 617 | return NULL; | ||
| 618 | } | ||
| 619 | #endif | ||
| 620 | |||
| 621 | raw_spin_lock(cpu_lock(state)); | ||
| 622 | |||
| 623 | blocks = is_realtime(prev) && !is_running(prev); | ||
| 624 | completion = is_realtime(prev) && is_completed(prev); | ||
| 625 | out_of_time = is_realtime(prev) && time_after(cur_release(prev), | ||
| 626 | state->local_tick); | ||
| 627 | |||
| 628 | if (is_realtime(prev)) | ||
| 629 | PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n", | ||
| 630 | blocks, completion, out_of_time); | ||
| 631 | |||
| 632 | if (completion) { | ||
| 633 | sched_trace_task_completion(prev, 0); | ||
| 634 | pfair_prepare_next_period(prev); | ||
| 635 | prepare_release(prev, cur_release(prev)); | ||
| 636 | } | ||
| 637 | |||
| 638 | if (!blocks && (completion || out_of_time)) { | ||
| 639 | drop_all_references(prev); | ||
| 640 | sched_trace_task_release(prev); | ||
| 641 | add_release(&cluster->pfair, prev); | ||
| 642 | } | ||
| 643 | |||
| 644 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) | ||
| 645 | next = state->local; | ||
| 646 | |||
| 647 | if (prev != next) { | ||
| 648 | tsk_rt(prev)->scheduled_on = NO_CPU; | ||
| 649 | if (next) | ||
| 650 | tsk_rt(next)->scheduled_on = cpu_id(state); | ||
| 651 | } | ||
| 652 | sched_state_task_picked(); | ||
| 653 | raw_spin_unlock(cpu_lock(state)); | ||
| 654 | |||
| 655 | if (next) | ||
| 656 | TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n", | ||
| 657 | tsk_pfair(next)->release, cpu_cluster(state)->pfair_time, litmus_clock()); | ||
| 658 | else if (is_realtime(prev)) | ||
| 659 | TRACE("Becomes idle at %lu (%llu)\n", cpu_cluster(state)->pfair_time, litmus_clock()); | ||
| 660 | |||
| 661 | return next; | ||
| 662 | } | ||
| 663 | |||
| 664 | static void pfair_task_new(struct task_struct * t, int on_rq, int running) | ||
| 665 | { | ||
| 666 | unsigned long flags; | ||
| 667 | struct pfair_cluster* cluster; | ||
| 668 | |||
| 669 | TRACE("pfair: task new %d state:%d\n", t->pid, t->state); | ||
| 670 | |||
| 671 | cluster = tsk_pfair(t)->cluster; | ||
| 672 | |||
| 673 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
| 674 | |||
| 675 | prepare_release(t, cluster->pfair_time + 1); | ||
| 676 | |||
| 677 | t->rt_param.scheduled_on = NO_CPU; | ||
| 678 | |||
| 679 | if (running) { | ||
| 680 | #ifdef CONFIG_RELEASE_MASTER | ||
| 681 | if (task_cpu(t) != cluster->pfair.release_master) | ||
| 682 | #endif | ||
| 683 | t->rt_param.scheduled_on = task_cpu(t); | ||
| 684 | __add_ready(&cluster->pfair, t); | ||
| 685 | } | ||
| 686 | |||
| 687 | check_preempt(t); | ||
| 688 | |||
| 689 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
| 690 | } | ||
| 691 | |||
| 692 | static void pfair_task_wake_up(struct task_struct *t) | ||
| 693 | { | ||
| 694 | unsigned long flags; | ||
| 695 | lt_t now; | ||
| 696 | int requeue = 0; | ||
| 697 | struct pfair_cluster* cluster; | ||
| 698 | |||
| 699 | cluster = tsk_pfair(t)->cluster; | ||
| 700 | |||
| 701 | TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n", | ||
| 702 | litmus_clock(), cur_release(t), cluster->pfair_time); | ||
| 703 | |||
| 704 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
| 705 | |||
| 706 | /* If a task blocks and wakes before its next job release, | ||
| 707 | * then it may resume if it is currently linked somewhere | ||
| 708 | * (as if it never blocked at all). Otherwise, we have a | ||
| 709 | * new sporadic job release. | ||
| 710 | */ | ||
| 711 | requeue = tsk_rt(t)->flags == RT_F_REQUEUE; | ||
| 712 | now = litmus_clock(); | ||
| 713 | if (is_tardy(t, now)) { | ||
| 714 | TRACE_TASK(t, "sporadic release!\n"); | ||
| 715 | release_at(t, now); | ||
| 716 | prepare_release(t, time2quanta(now, CEIL)); | ||
| 717 | sched_trace_task_release(t); | ||
| 718 | } | ||
| 719 | |||
| 720 | /* only add to ready queue if the task isn't still linked somewhere */ | ||
| 721 | if (requeue) { | ||
| 722 | TRACE_TASK(t, "requeueing required\n"); | ||
| 723 | tsk_rt(t)->completed = 0; | ||
| 724 | __add_ready(&cluster->pfair, t); | ||
| 725 | } | ||
| 726 | |||
| 727 | check_preempt(t); | ||
| 728 | |||
| 729 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
| 730 | TRACE_TASK(t, "wake up done at %llu\n", litmus_clock()); | ||
| 731 | } | ||
| 732 | |||
| 733 | static void pfair_task_block(struct task_struct *t) | ||
| 734 | { | ||
| 735 | BUG_ON(!is_realtime(t)); | ||
| 736 | TRACE_TASK(t, "blocks at %llu, state:%d\n", | ||
| 737 | litmus_clock(), t->state); | ||
| 738 | } | ||
| 739 | |||
| 740 | static void pfair_task_exit(struct task_struct * t) | ||
| 741 | { | ||
| 742 | unsigned long flags; | ||
| 743 | struct pfair_cluster *cluster; | ||
| 744 | |||
| 745 | BUG_ON(!is_realtime(t)); | ||
| 746 | |||
| 747 | cluster = tsk_pfair(t)->cluster; | ||
| 748 | |||
| 749 | /* Remote task from release or ready queue, and ensure | ||
| 750 | * that it is not the scheduled task for ANY CPU. We | ||
| 751 | * do this blanket check because occassionally when | ||
| 752 | * tasks exit while blocked, the task_cpu of the task | ||
| 753 | * might not be the same as the CPU that the PFAIR scheduler | ||
| 754 | * has chosen for it. | ||
| 755 | */ | ||
| 756 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
| 757 | |||
| 758 | TRACE_TASK(t, "RIP, state:%d\n", t->state); | ||
| 759 | drop_all_references(t); | ||
| 760 | |||
| 761 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
| 762 | |||
| 763 | kfree(t->rt_param.pfair); | ||
| 764 | t->rt_param.pfair = NULL; | ||
| 765 | } | ||
| 766 | |||
| 767 | |||
| 768 | static void pfair_release_at(struct task_struct* task, lt_t start) | ||
| 769 | { | ||
| 770 | unsigned long flags; | ||
| 771 | quanta_t release; | ||
| 772 | |||
| 773 | struct pfair_cluster *cluster; | ||
| 774 | |||
| 775 | cluster = tsk_pfair(task)->cluster; | ||
| 776 | |||
| 777 | BUG_ON(!is_realtime(task)); | ||
| 778 | |||
| 779 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
| 780 | release_at(task, start); | ||
| 781 | release = time2quanta(start, CEIL); | ||
| 782 | |||
| 783 | TRACE_TASK(task, "sys release at %lu\n", release); | ||
| 784 | |||
| 785 | drop_all_references(task); | ||
| 786 | prepare_release(task, release); | ||
| 787 | add_release(&cluster->pfair, task); | ||
| 788 | |||
| 789 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
| 790 | } | ||
| 791 | |||
| 792 | static void init_subtask(struct subtask* sub, unsigned long i, | ||
| 793 | lt_t quanta, lt_t period) | ||
| 794 | { | ||
| 795 | /* since i is zero-based, the formulas are shifted by one */ | ||
| 796 | lt_t tmp; | ||
| 797 | |||
| 798 | /* release */ | ||
| 799 | tmp = period * i; | ||
| 800 | do_div(tmp, quanta); /* floor */ | ||
| 801 | sub->release = (quanta_t) tmp; | ||
| 802 | |||
| 803 | /* deadline */ | ||
| 804 | tmp = period * (i + 1); | ||
| 805 | if (do_div(tmp, quanta)) /* ceil */ | ||
| 806 | tmp++; | ||
| 807 | sub->deadline = (quanta_t) tmp; | ||
| 808 | |||
| 809 | /* next release */ | ||
| 810 | tmp = period * (i + 1); | ||
| 811 | do_div(tmp, quanta); /* floor */ | ||
| 812 | sub->overlap = sub->deadline - (quanta_t) tmp; | ||
| 813 | |||
| 814 | /* Group deadline. | ||
| 815 | * Based on the formula given in Uma's thesis. | ||
| 816 | */ | ||
| 817 | if (2 * quanta >= period) { | ||
| 818 | /* heavy */ | ||
| 819 | tmp = (sub->deadline - (i + 1)) * period; | ||
| 820 | if (period > quanta && | ||
| 821 | do_div(tmp, (period - quanta))) /* ceil */ | ||
| 822 | tmp++; | ||
| 823 | sub->group_deadline = (quanta_t) tmp; | ||
| 824 | } else | ||
| 825 | sub->group_deadline = 0; | ||
| 826 | } | ||
| 827 | |||
| 828 | static void dump_subtasks(struct task_struct* t) | ||
| 829 | { | ||
| 830 | unsigned long i; | ||
| 831 | for (i = 0; i < t->rt_param.pfair->quanta; i++) | ||
| 832 | TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n", | ||
| 833 | i + 1, | ||
| 834 | t->rt_param.pfair->subtasks[i].release, | ||
| 835 | t->rt_param.pfair->subtasks[i].deadline, | ||
| 836 | t->rt_param.pfair->subtasks[i].overlap, | ||
| 837 | t->rt_param.pfair->subtasks[i].group_deadline); | ||
| 838 | } | ||
| 839 | |||
| 840 | static long pfair_admit_task(struct task_struct* t) | ||
| 841 | { | ||
| 842 | lt_t quanta; | ||
| 843 | lt_t period; | ||
| 844 | s64 quantum_length = ktime_to_ns(tick_period); | ||
| 845 | struct pfair_param* param; | ||
| 846 | unsigned long i; | ||
| 847 | |||
| 848 | /* first check that the task is in the right cluster */ | ||
| 849 | if (cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]) != | ||
| 850 | cpu_cluster(pstate[task_cpu(t)])) | ||
| 851 | return -EINVAL; | ||
| 852 | |||
| 853 | if (get_rt_period(t) != get_rt_relative_deadline(t)) { | ||
| 854 | printk(KERN_INFO "%s: Admission rejected. " | ||
| 855 | "Only implicit deadlines are currently supported.\n", | ||
| 856 | litmus->plugin_name); | ||
| 857 | return -EINVAL; | ||
| 858 | } | ||
| 859 | |||
| 860 | /* Pfair is a tick-based method, so the time | ||
| 861 | * of interest is jiffies. Calculate tick-based | ||
| 862 | * times for everything. | ||
| 863 | * (Ceiling of exec cost, floor of period.) | ||
| 864 | */ | ||
| 865 | |||
| 866 | quanta = get_exec_cost(t); | ||
| 867 | period = get_rt_period(t); | ||
| 868 | |||
| 869 | quanta = time2quanta(get_exec_cost(t), CEIL); | ||
| 870 | |||
| 871 | if (do_div(period, quantum_length)) | ||
| 872 | printk(KERN_WARNING | ||
| 873 | "The period of %s/%d is not a multiple of %llu.\n", | ||
| 874 | t->comm, t->pid, (unsigned long long) quantum_length); | ||
| 875 | |||
| 876 | if (quanta == period) { | ||
| 877 | /* special case: task has weight 1.0 */ | ||
| 878 | printk(KERN_INFO | ||
| 879 | "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n", | ||
| 880 | t->comm, t->pid, quanta, period); | ||
| 881 | quanta = 1; | ||
| 882 | period = 1; | ||
| 883 | } | ||
| 884 | |||
| 885 | param = kmalloc(sizeof(*param) + | ||
| 886 | quanta * sizeof(struct subtask), GFP_ATOMIC); | ||
| 887 | |||
| 888 | if (!param) | ||
| 889 | return -ENOMEM; | ||
| 890 | |||
| 891 | param->quanta = quanta; | ||
| 892 | param->cur = 0; | ||
| 893 | param->release = 0; | ||
| 894 | param->period = period; | ||
| 895 | |||
| 896 | param->cluster = cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]); | ||
| 897 | |||
| 898 | for (i = 0; i < quanta; i++) | ||
| 899 | init_subtask(param->subtasks + i, i, quanta, period); | ||
| 900 | |||
| 901 | if (t->rt_param.pfair) | ||
| 902 | /* get rid of stale allocation */ | ||
| 903 | kfree(t->rt_param.pfair); | ||
| 904 | |||
| 905 | t->rt_param.pfair = param; | ||
| 906 | |||
| 907 | /* spew out some debug info */ | ||
| 908 | dump_subtasks(t); | ||
| 909 | |||
| 910 | return 0; | ||
| 911 | } | ||
| 912 | |||
| 913 | static void pfair_init_cluster(struct pfair_cluster* cluster) | ||
| 914 | { | ||
| 915 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs); | ||
| 916 | bheap_init(&cluster->release_queue); | ||
| 917 | raw_spin_lock_init(&cluster->release_lock); | ||
| 918 | INIT_LIST_HEAD(&cluster->topology.cpus); | ||
| 919 | } | ||
| 920 | |||
| 921 | static void cleanup_clusters(void) | ||
| 922 | { | ||
| 923 | int i; | ||
| 924 | |||
| 925 | if (num_pfair_clusters) | ||
| 926 | kfree(pfair_clusters); | ||
| 927 | pfair_clusters = NULL; | ||
| 928 | num_pfair_clusters = 0; | ||
| 929 | |||
| 930 | /* avoid stale pointers */ | ||
| 931 | for (i = 0; i < num_online_cpus(); i++) { | ||
| 932 | pstate[i]->topology.cluster = NULL; | ||
| 933 | printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]), | ||
| 934 | pstate[i]->missed_updates, pstate[i]->missed_quanta); | ||
| 935 | } | ||
| 936 | } | ||
| 937 | |||
| 938 | static long pfair_activate_plugin(void) | ||
| 939 | { | ||
| 940 | int err, i; | ||
| 941 | struct pfair_state* state; | ||
| 942 | struct pfair_cluster* cluster ; | ||
| 943 | quanta_t now; | ||
| 944 | int cluster_size; | ||
| 945 | struct cluster_cpu* cpus[NR_CPUS]; | ||
| 946 | struct scheduling_cluster* clust[NR_CPUS]; | ||
| 947 | |||
| 948 | cluster_size = get_cluster_size(pfair_cluster_level); | ||
| 949 | |||
| 950 | if (cluster_size <= 0 || num_online_cpus() % cluster_size != 0) | ||
| 951 | return -EINVAL; | ||
| 952 | |||
| 953 | num_pfair_clusters = num_online_cpus() / cluster_size; | ||
| 954 | |||
| 955 | pfair_clusters = kzalloc(num_pfair_clusters * sizeof(struct pfair_cluster), GFP_ATOMIC); | ||
| 956 | if (!pfair_clusters) { | ||
| 957 | num_pfair_clusters = 0; | ||
| 958 | printk(KERN_ERR "Could not allocate Pfair clusters!\n"); | ||
| 959 | return -ENOMEM; | ||
| 960 | } | ||
| 961 | |||
| 962 | state = &__get_cpu_var(pfair_state); | ||
| 963 | now = current_quantum(state); | ||
| 964 | TRACE("Activating PFAIR at q=%lu\n", now); | ||
| 965 | |||
| 966 | for (i = 0; i < num_pfair_clusters; i++) { | ||
| 967 | cluster = &pfair_clusters[i]; | ||
| 968 | pfair_init_cluster(cluster); | ||
| 969 | cluster->pfair_time = now; | ||
| 970 | clust[i] = &cluster->topology; | ||
| 971 | #ifdef CONFIG_RELEASE_MASTER | ||
| 972 | cluster->pfair.release_master = atomic_read(&release_master_cpu); | ||
| 973 | #endif | ||
| 974 | } | ||
| 975 | |||
| 976 | for (i = 0; i < num_online_cpus(); i++) { | ||
| 977 | state = &per_cpu(pfair_state, i); | ||
| 978 | state->cur_tick = now; | ||
| 979 | state->local_tick = now; | ||
| 980 | state->missed_quanta = 0; | ||
| 981 | state->missed_updates = 0; | ||
| 982 | state->offset = cpu_stagger_offset(i); | ||
| 983 | printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus()); | ||
| 984 | cpus[i] = &state->topology; | ||
| 985 | } | ||
| 986 | |||
| 987 | err = assign_cpus_to_clusters(pfair_cluster_level, clust, num_pfair_clusters, | ||
| 988 | cpus, num_online_cpus()); | ||
| 989 | |||
| 990 | if (err < 0) | ||
| 991 | cleanup_clusters(); | ||
| 992 | |||
| 993 | return err; | ||
| 994 | } | ||
| 995 | |||
| 996 | static long pfair_deactivate_plugin(void) | ||
| 997 | { | ||
| 998 | cleanup_clusters(); | ||
| 999 | return 0; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | /* Plugin object */ | ||
| 1003 | static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = { | ||
| 1004 | .plugin_name = "PFAIR", | ||
| 1005 | .tick = pfair_tick, | ||
| 1006 | .task_new = pfair_task_new, | ||
| 1007 | .task_exit = pfair_task_exit, | ||
| 1008 | .schedule = pfair_schedule, | ||
| 1009 | .task_wake_up = pfair_task_wake_up, | ||
| 1010 | .task_block = pfair_task_block, | ||
| 1011 | .admit_task = pfair_admit_task, | ||
| 1012 | .release_at = pfair_release_at, | ||
| 1013 | .complete_job = complete_job, | ||
| 1014 | .activate_plugin = pfair_activate_plugin, | ||
| 1015 | .deactivate_plugin = pfair_deactivate_plugin, | ||
| 1016 | }; | ||
| 1017 | |||
| 1018 | |||
| 1019 | static struct proc_dir_entry *cluster_file = NULL, *pfair_dir = NULL; | ||
| 1020 | |||
| 1021 | static int __init init_pfair(void) | ||
| 1022 | { | ||
| 1023 | int cpu, err, fs; | ||
| 1024 | struct pfair_state *state; | ||
| 1025 | |||
| 1026 | /* | ||
| 1027 | * initialize short_cut for per-cpu pfair state; | ||
| 1028 | * there may be a problem here if someone removes a cpu | ||
| 1029 | * while we are doing this initialization... and if cpus | ||
| 1030 | * are added / removed later... but we don't support CPU hotplug atm anyway. | ||
| 1031 | */ | ||
| 1032 | pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL); | ||
| 1033 | |||
| 1034 | /* initialize CPU state */ | ||
| 1035 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
| 1036 | state = &per_cpu(pfair_state, cpu); | ||
| 1037 | state->topology.id = cpu; | ||
| 1038 | state->cur_tick = 0; | ||
| 1039 | state->local_tick = 0; | ||
| 1040 | state->linked = NULL; | ||
| 1041 | state->local = NULL; | ||
| 1042 | state->scheduled = NULL; | ||
| 1043 | state->missed_quanta = 0; | ||
| 1044 | state->offset = cpu_stagger_offset(cpu); | ||
| 1045 | pstate[cpu] = state; | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | pfair_clusters = NULL; | ||
| 1049 | num_pfair_clusters = 0; | ||
| 1050 | |||
| 1051 | err = register_sched_plugin(&pfair_plugin); | ||
| 1052 | if (!err) { | ||
| 1053 | fs = make_plugin_proc_dir(&pfair_plugin, &pfair_dir); | ||
| 1054 | if (!fs) | ||
| 1055 | cluster_file = create_cluster_file(pfair_dir, &pfair_cluster_level); | ||
| 1056 | else | ||
| 1057 | printk(KERN_ERR "Could not allocate PFAIR procfs dir.\n"); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | return err; | ||
| 1061 | } | ||
| 1062 | |||
| 1063 | static void __exit clean_pfair(void) | ||
| 1064 | { | ||
| 1065 | kfree(pstate); | ||
| 1066 | |||
| 1067 | if (cluster_file) | ||
| 1068 | remove_proc_entry("cluster", pfair_dir); | ||
| 1069 | if (pfair_dir) | ||
| 1070 | remove_plugin_proc_dir(&pfair_plugin); | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | module_init(init_pfair); | ||
| 1074 | module_exit(clean_pfair); | ||
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c new file mode 100644 index 00000000000..aade0904491 --- /dev/null +++ b/litmus/sched_pfp.c | |||
| @@ -0,0 +1,1751 @@ | |||
| 1 | /* | ||
| 2 | * litmus/sched_pfp.c | ||
| 3 | * | ||
| 4 | * Implementation of partitioned fixed-priority scheduling. | ||
| 5 | * Based on PSN-EDF. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/percpu.h> | ||
| 9 | #include <linux/sched.h> | ||
| 10 | #include <linux/list.h> | ||
| 11 | #include <linux/spinlock.h> | ||
| 12 | #include <linux/module.h> | ||
| 13 | |||
| 14 | #include <litmus/litmus.h> | ||
| 15 | #include <litmus/wait.h> | ||
| 16 | #include <litmus/jobs.h> | ||
| 17 | #include <litmus/preempt.h> | ||
| 18 | #include <litmus/fp_common.h> | ||
| 19 | #include <litmus/sched_plugin.h> | ||
| 20 | #include <litmus/sched_trace.h> | ||
| 21 | #include <litmus/trace.h> | ||
| 22 | #include <litmus/budget.h> | ||
| 23 | |||
| 24 | #include <linux/uaccess.h> | ||
| 25 | |||
| 26 | |||
| 27 | typedef struct { | ||
| 28 | rt_domain_t domain; | ||
| 29 | struct fp_prio_queue ready_queue; | ||
| 30 | int cpu; | ||
| 31 | struct task_struct* scheduled; /* only RT tasks */ | ||
| 32 | /* | ||
| 33 | * scheduling lock slock | ||
| 34 | * protects the domain and serializes scheduling decisions | ||
| 35 | */ | ||
| 36 | #define slock domain.ready_lock | ||
| 37 | |||
| 38 | } pfp_domain_t; | ||
| 39 | |||
| 40 | DEFINE_PER_CPU(pfp_domain_t, pfp_domains); | ||
| 41 | |||
| 42 | pfp_domain_t* pfp_doms[NR_CPUS]; | ||
| 43 | |||
| 44 | #define local_pfp (&__get_cpu_var(pfp_domains)) | ||
| 45 | #define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain) | ||
| 46 | #define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu)) | ||
| 47 | #define task_dom(task) remote_dom(get_partition(task)) | ||
| 48 | #define task_pfp(task) remote_pfp(get_partition(task)) | ||
| 49 | |||
| 50 | /* we assume the lock is being held */ | ||
| 51 | static void preempt(pfp_domain_t *pfp) | ||
| 52 | { | ||
| 53 | preempt_if_preemptable(pfp->scheduled, pfp->cpu); | ||
| 54 | } | ||
| 55 | |||
| 56 | static unsigned int priority_index(struct task_struct* t) | ||
| 57 | { | ||
| 58 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 59 | if (unlikely(t->rt_param.inh_task)) | ||
| 60 | /* use effective priority */ | ||
| 61 | t = t->rt_param.inh_task; | ||
| 62 | |||
| 63 | if (is_priority_boosted(t)) { | ||
| 64 | /* zero is reserved for priority-boosted tasks */ | ||
| 65 | return 0; | ||
| 66 | } else | ||
| 67 | #endif | ||
| 68 | return get_priority(t); | ||
| 69 | } | ||
| 70 | |||
| 71 | |||
| 72 | static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
| 73 | { | ||
| 74 | pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain); | ||
| 75 | unsigned long flags; | ||
| 76 | struct task_struct* t; | ||
| 77 | struct bheap_node* hn; | ||
| 78 | |||
| 79 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
| 80 | |||
| 81 | while (!bheap_empty(tasks)) { | ||
| 82 | hn = bheap_take(fp_ready_order, tasks); | ||
| 83 | t = bheap2task(hn); | ||
| 84 | TRACE_TASK(t, "released (part:%d prio:%d)\n", | ||
| 85 | get_partition(t), get_priority(t)); | ||
| 86 | fp_prio_add(&pfp->ready_queue, t, priority_index(t)); | ||
| 87 | } | ||
| 88 | |||
| 89 | /* do we need to preempt? */ | ||
| 90 | if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) { | ||
| 91 | TRACE_CUR("preempted by new release\n"); | ||
| 92 | preempt(pfp); | ||
| 93 | } | ||
| 94 | |||
| 95 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
| 96 | } | ||
| 97 | |||
| 98 | static void pfp_preempt_check(pfp_domain_t *pfp) | ||
| 99 | { | ||
| 100 | if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) | ||
| 101 | preempt(pfp); | ||
| 102 | } | ||
| 103 | |||
| 104 | static void pfp_domain_init(pfp_domain_t* pfp, | ||
| 105 | int cpu) | ||
| 106 | { | ||
| 107 | fp_domain_init(&pfp->domain, NULL, pfp_release_jobs); | ||
| 108 | pfp->cpu = cpu; | ||
| 109 | pfp->scheduled = NULL; | ||
| 110 | fp_prio_queue_init(&pfp->ready_queue); | ||
| 111 | } | ||
| 112 | |||
| 113 | static void requeue(struct task_struct* t, pfp_domain_t *pfp) | ||
| 114 | { | ||
| 115 | BUG_ON(!is_running(t)); | ||
| 116 | |||
| 117 | tsk_rt(t)->completed = 0; | ||
| 118 | if (is_released(t, litmus_clock())) | ||
| 119 | fp_prio_add(&pfp->ready_queue, t, priority_index(t)); | ||
| 120 | else | ||
| 121 | add_release(&pfp->domain, t); /* it has got to wait */ | ||
| 122 | } | ||
| 123 | |||
| 124 | static void job_completion(struct task_struct* t, int forced) | ||
| 125 | { | ||
| 126 | sched_trace_task_completion(t,forced); | ||
| 127 | TRACE_TASK(t, "job_completion().\n"); | ||
| 128 | |||
| 129 | tsk_rt(t)->completed = 1; | ||
| 130 | prepare_for_next_period(t); | ||
| 131 | if (is_released(t, litmus_clock())) | ||
| 132 | sched_trace_task_release(t); | ||
| 133 | } | ||
| 134 | |||
| 135 | static void pfp_tick(struct task_struct *t) | ||
| 136 | { | ||
| 137 | pfp_domain_t *pfp = local_pfp; | ||
| 138 | |||
| 139 | /* Check for inconsistency. We don't need the lock for this since | ||
| 140 | * ->scheduled is only changed in schedule, which obviously is not | ||
| 141 | * executing in parallel on this CPU | ||
| 142 | */ | ||
| 143 | BUG_ON(is_realtime(t) && t != pfp->scheduled); | ||
| 144 | |||
| 145 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
| 146 | if (!is_np(t)) { | ||
| 147 | litmus_reschedule_local(); | ||
| 148 | TRACE("pfp_scheduler_tick: " | ||
| 149 | "%d is preemptable " | ||
| 150 | " => FORCE_RESCHED\n", t->pid); | ||
| 151 | } else if (is_user_np(t)) { | ||
| 152 | TRACE("pfp_scheduler_tick: " | ||
| 153 | "%d is non-preemptable, " | ||
| 154 | "preemption delayed.\n", t->pid); | ||
| 155 | request_exit_np(t); | ||
| 156 | } | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | static struct task_struct* pfp_schedule(struct task_struct * prev) | ||
| 161 | { | ||
| 162 | pfp_domain_t* pfp = local_pfp; | ||
| 163 | struct task_struct* next; | ||
| 164 | |||
| 165 | int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate; | ||
| 166 | |||
| 167 | raw_spin_lock(&pfp->slock); | ||
| 168 | |||
| 169 | /* sanity checking | ||
| 170 | * differently from gedf, when a task exits (dead) | ||
| 171 | * pfp->schedule may be null and prev _is_ realtime | ||
| 172 | */ | ||
| 173 | BUG_ON(pfp->scheduled && pfp->scheduled != prev); | ||
| 174 | BUG_ON(pfp->scheduled && !is_realtime(prev)); | ||
| 175 | |||
| 176 | /* (0) Determine state */ | ||
| 177 | exists = pfp->scheduled != NULL; | ||
| 178 | blocks = exists && !is_running(pfp->scheduled); | ||
| 179 | out_of_time = exists && | ||
| 180 | budget_enforced(pfp->scheduled) && | ||
| 181 | budget_exhausted(pfp->scheduled); | ||
| 182 | np = exists && is_np(pfp->scheduled); | ||
| 183 | sleep = exists && is_completed(pfp->scheduled); | ||
| 184 | migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; | ||
| 185 | preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev)); | ||
| 186 | |||
| 187 | /* If we need to preempt do so. | ||
| 188 | * The following checks set resched to 1 in case of special | ||
| 189 | * circumstances. | ||
| 190 | */ | ||
| 191 | resched = preempt; | ||
| 192 | |||
| 193 | /* If a task blocks we have no choice but to reschedule. | ||
| 194 | */ | ||
| 195 | if (blocks) | ||
| 196 | resched = 1; | ||
| 197 | |||
| 198 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
| 199 | * Multiple calls to request_exit_np() don't hurt. | ||
| 200 | */ | ||
| 201 | if (np && (out_of_time || preempt || sleep)) | ||
| 202 | request_exit_np(pfp->scheduled); | ||
| 203 | |||
| 204 | /* Any task that is preemptable and either exhausts its execution | ||
| 205 | * budget or wants to sleep completes. We may have to reschedule after | ||
| 206 | * this. | ||
| 207 | */ | ||
| 208 | if (!np && (out_of_time || sleep) && !blocks && !migrate) { | ||
| 209 | job_completion(pfp->scheduled, !sleep); | ||
| 210 | resched = 1; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* The final scheduling decision. Do we need to switch for some reason? | ||
| 214 | * Switch if we are in RT mode and have no task or if we need to | ||
| 215 | * resched. | ||
| 216 | */ | ||
| 217 | next = NULL; | ||
| 218 | if ((!np || blocks) && (resched || !exists)) { | ||
| 219 | /* When preempting a task that does not block, then | ||
| 220 | * re-insert it into either the ready queue or the | ||
| 221 | * release queue (if it completed). requeue() picks | ||
| 222 | * the appropriate queue. | ||
| 223 | */ | ||
| 224 | if (pfp->scheduled && !blocks && !migrate) | ||
| 225 | requeue(pfp->scheduled, pfp); | ||
| 226 | next = fp_prio_take(&pfp->ready_queue); | ||
| 227 | if (next == prev) { | ||
| 228 | struct task_struct *t = fp_prio_peek(&pfp->ready_queue); | ||
| 229 | TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d " | ||
| 230 | "boost=%d empty=%d prio-idx=%u prio=%u\n", | ||
| 231 | sleep, out_of_time, np, preempt, migrate, | ||
| 232 | is_priority_boosted(next), | ||
| 233 | t == NULL, | ||
| 234 | priority_index(next), | ||
| 235 | get_priority(next)); | ||
| 236 | if (t) | ||
| 237 | TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n", | ||
| 238 | is_priority_boosted(t), | ||
| 239 | priority_index(t), | ||
| 240 | get_priority(t)); | ||
| 241 | } | ||
| 242 | /* If preempt is set, we should not see the same task again. */ | ||
| 243 | BUG_ON(preempt && next == prev); | ||
| 244 | /* Similarly, if preempt is set, then next may not be NULL, | ||
| 245 | * unless it's a migration. */ | ||
| 246 | BUG_ON(preempt && !migrate && next == NULL); | ||
| 247 | } else | ||
| 248 | /* Only override Linux scheduler if we have a real-time task | ||
| 249 | * scheduled that needs to continue. | ||
| 250 | */ | ||
| 251 | if (exists) | ||
| 252 | next = prev; | ||
| 253 | |||
| 254 | if (next) { | ||
| 255 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
| 256 | tsk_rt(next)->completed = 0; | ||
| 257 | } else { | ||
| 258 | TRACE("becoming idle at %llu\n", litmus_clock()); | ||
| 259 | } | ||
| 260 | |||
| 261 | pfp->scheduled = next; | ||
| 262 | sched_state_task_picked(); | ||
| 263 | raw_spin_unlock(&pfp->slock); | ||
| 264 | |||
| 265 | return next; | ||
| 266 | } | ||
| 267 | |||
| 268 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 269 | |||
| 270 | /* prev is no longer scheduled --- see if it needs to migrate */ | ||
| 271 | static void pfp_finish_switch(struct task_struct *prev) | ||
| 272 | { | ||
| 273 | pfp_domain_t *to; | ||
| 274 | |||
| 275 | if (is_realtime(prev) && | ||
| 276 | is_running(prev) && | ||
| 277 | get_partition(prev) != smp_processor_id()) { | ||
| 278 | TRACE_TASK(prev, "needs to migrate from P%d to P%d\n", | ||
| 279 | smp_processor_id(), get_partition(prev)); | ||
| 280 | |||
| 281 | to = task_pfp(prev); | ||
| 282 | |||
| 283 | raw_spin_lock(&to->slock); | ||
| 284 | |||
| 285 | TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu); | ||
| 286 | requeue(prev, to); | ||
| 287 | if (fp_preemption_needed(&to->ready_queue, to->scheduled)) | ||
| 288 | preempt(to); | ||
| 289 | |||
| 290 | raw_spin_unlock(&to->slock); | ||
| 291 | |||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | #endif | ||
| 296 | |||
| 297 | /* Prepare a task for running in RT mode | ||
| 298 | */ | ||
| 299 | static void pfp_task_new(struct task_struct * t, int on_rq, int running) | ||
| 300 | { | ||
| 301 | pfp_domain_t* pfp = task_pfp(t); | ||
| 302 | unsigned long flags; | ||
| 303 | |||
| 304 | TRACE_TASK(t, "P-FP: task new, cpu = %d\n", | ||
| 305 | t->rt_param.task_params.cpu); | ||
| 306 | |||
| 307 | /* setup job parameters */ | ||
| 308 | release_at(t, litmus_clock()); | ||
| 309 | |||
| 310 | /* The task should be running in the queue, otherwise signal | ||
| 311 | * code will try to wake it up with fatal consequences. | ||
| 312 | */ | ||
| 313 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
| 314 | if (running) { | ||
| 315 | /* there shouldn't be anything else running at the time */ | ||
| 316 | BUG_ON(pfp->scheduled); | ||
| 317 | pfp->scheduled = t; | ||
| 318 | } else { | ||
| 319 | requeue(t, pfp); | ||
| 320 | /* maybe we have to reschedule */ | ||
| 321 | pfp_preempt_check(pfp); | ||
| 322 | } | ||
| 323 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
| 324 | } | ||
| 325 | |||
| 326 | static void pfp_task_wake_up(struct task_struct *task) | ||
| 327 | { | ||
| 328 | unsigned long flags; | ||
| 329 | pfp_domain_t* pfp = task_pfp(task); | ||
| 330 | lt_t now; | ||
| 331 | |||
| 332 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
| 333 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
| 334 | |||
| 335 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 336 | /* Should only be queued when processing a fake-wake up due to a | ||
| 337 | * migration-related state change. */ | ||
| 338 | if (unlikely(is_queued(task))) { | ||
| 339 | TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n"); | ||
| 340 | goto out_unlock; | ||
| 341 | } | ||
| 342 | #else | ||
| 343 | BUG_ON(is_queued(task)); | ||
| 344 | #endif | ||
| 345 | now = litmus_clock(); | ||
| 346 | if (is_sporadic(task) && is_tardy(task, now) | ||
| 347 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 348 | /* We need to take suspensions because of semaphores into | ||
| 349 | * account! If a job resumes after being suspended due to acquiring | ||
| 350 | * a semaphore, it should never be treated as a new job release. | ||
| 351 | */ | ||
| 352 | && !is_priority_boosted(task) | ||
| 353 | #endif | ||
| 354 | ) { | ||
| 355 | /* new sporadic release */ | ||
| 356 | release_at(task, now); | ||
| 357 | sched_trace_task_release(task); | ||
| 358 | } | ||
| 359 | |||
| 360 | /* Only add to ready queue if it is not the currently-scheduled | ||
| 361 | * task. This could be the case if a task was woken up concurrently | ||
| 362 | * on a remote CPU before the executing CPU got around to actually | ||
| 363 | * de-scheduling the task, i.e., wake_up() raced with schedule() | ||
| 364 | * and won. Also, don't requeue if it is still queued, which can | ||
| 365 | * happen under the DPCP due wake-ups racing with migrations. | ||
| 366 | */ | ||
| 367 | if (pfp->scheduled != task) { | ||
| 368 | requeue(task, pfp); | ||
| 369 | pfp_preempt_check(pfp); | ||
| 370 | } | ||
| 371 | |||
| 372 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 373 | out_unlock: | ||
| 374 | #endif | ||
| 375 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
| 376 | TRACE_TASK(task, "wake up done\n"); | ||
| 377 | } | ||
| 378 | |||
| 379 | static void pfp_task_block(struct task_struct *t) | ||
| 380 | { | ||
| 381 | /* only running tasks can block, thus t is in no queue */ | ||
| 382 | TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); | ||
| 383 | |||
| 384 | BUG_ON(!is_realtime(t)); | ||
| 385 | |||
| 386 | /* If this task blocked normally, it shouldn't be queued. The exception is | ||
| 387 | * if this is a simulated block()/wakeup() pair from the pull-migration code path. | ||
| 388 | * This should only happen if the DPCP is being used. | ||
| 389 | */ | ||
| 390 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 391 | if (unlikely(is_queued(t))) | ||
| 392 | TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n"); | ||
| 393 | #else | ||
| 394 | BUG_ON(is_queued(t)); | ||
| 395 | #endif | ||
| 396 | } | ||
| 397 | |||
| 398 | static void pfp_task_exit(struct task_struct * t) | ||
| 399 | { | ||
| 400 | unsigned long flags; | ||
| 401 | pfp_domain_t* pfp = task_pfp(t); | ||
| 402 | rt_domain_t* dom; | ||
| 403 | |||
| 404 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
| 405 | if (is_queued(t)) { | ||
| 406 | BUG(); /* This currently doesn't work. */ | ||
| 407 | /* dequeue */ | ||
| 408 | dom = task_dom(t); | ||
| 409 | remove(dom, t); | ||
| 410 | } | ||
| 411 | if (pfp->scheduled == t) { | ||
| 412 | pfp->scheduled = NULL; | ||
| 413 | preempt(pfp); | ||
| 414 | } | ||
| 415 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
| 416 | |||
| 417 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
| 418 | } | ||
| 419 | |||
| 420 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 421 | |||
| 422 | #include <litmus/fdso.h> | ||
| 423 | #include <litmus/srp.h> | ||
| 424 | |||
| 425 | static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t) | ||
| 426 | { | ||
| 427 | BUG_ON(pfp->scheduled == t && is_queued(t)); | ||
| 428 | if (is_queued(t)) | ||
| 429 | fp_prio_remove(&pfp->ready_queue, t, priority_index(t)); | ||
| 430 | } | ||
| 431 | |||
| 432 | static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t, | ||
| 433 | struct task_struct* prio_inh) | ||
| 434 | { | ||
| 435 | int requeue; | ||
| 436 | |||
| 437 | if (!t || t->rt_param.inh_task == prio_inh) { | ||
| 438 | /* no update required */ | ||
| 439 | if (t) | ||
| 440 | TRACE_TASK(t, "no prio-inh update required\n"); | ||
| 441 | return; | ||
| 442 | } | ||
| 443 | |||
| 444 | requeue = is_queued(t); | ||
| 445 | TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue); | ||
| 446 | |||
| 447 | if (requeue) | ||
| 448 | /* first remove */ | ||
| 449 | fp_dequeue(pfp, t); | ||
| 450 | |||
| 451 | t->rt_param.inh_task = prio_inh; | ||
| 452 | |||
| 453 | if (requeue) | ||
| 454 | /* add again to the right queue */ | ||
| 455 | fp_prio_add(&pfp->ready_queue, t, priority_index(t)); | ||
| 456 | } | ||
| 457 | |||
| 458 | static int effective_agent_priority(int prio) | ||
| 459 | { | ||
| 460 | /* make sure agents have higher priority */ | ||
| 461 | return prio - LITMUS_MAX_PRIORITY; | ||
| 462 | } | ||
| 463 | |||
| 464 | static lt_t prio_point(int eprio) | ||
| 465 | { | ||
| 466 | /* make sure we have non-negative prio points */ | ||
| 467 | return eprio + LITMUS_MAX_PRIORITY; | ||
| 468 | } | ||
| 469 | |||
| 470 | static int prio_from_point(lt_t prio_point) | ||
| 471 | { | ||
| 472 | return ((int) prio_point) - LITMUS_MAX_PRIORITY; | ||
| 473 | } | ||
| 474 | |||
| 475 | static void boost_priority(struct task_struct* t, lt_t priority_point) | ||
| 476 | { | ||
| 477 | unsigned long flags; | ||
| 478 | pfp_domain_t* pfp = task_pfp(t); | ||
| 479 | |||
| 480 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
| 481 | |||
| 482 | |||
| 483 | TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock()); | ||
| 484 | |||
| 485 | tsk_rt(t)->priority_boosted = 1; | ||
| 486 | /* tie-break by protocol-specific priority point */ | ||
| 487 | tsk_rt(t)->boost_start_time = priority_point; | ||
| 488 | |||
| 489 | /* Priority boosting currently only takes effect for already-scheduled | ||
| 490 | * tasks. This is sufficient since priority boosting only kicks in as | ||
| 491 | * part of lock acquisitions. */ | ||
| 492 | BUG_ON(pfp->scheduled != t); | ||
| 493 | |||
| 494 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
| 495 | } | ||
| 496 | |||
| 497 | static void unboost_priority(struct task_struct* t) | ||
| 498 | { | ||
| 499 | unsigned long flags; | ||
| 500 | pfp_domain_t* pfp = task_pfp(t); | ||
| 501 | lt_t now; | ||
| 502 | |||
| 503 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
| 504 | now = litmus_clock(); | ||
| 505 | |||
| 506 | /* assumption: this only happens when the job is scheduled */ | ||
| 507 | BUG_ON(pfp->scheduled != t); | ||
| 508 | |||
| 509 | TRACE_TASK(t, "priority restored at %llu\n", now); | ||
| 510 | |||
| 511 | /* priority boosted jobs must be scheduled */ | ||
| 512 | BUG_ON(pfp->scheduled != t); | ||
| 513 | |||
| 514 | tsk_rt(t)->priority_boosted = 0; | ||
| 515 | tsk_rt(t)->boost_start_time = 0; | ||
| 516 | |||
| 517 | /* check if this changes anything */ | ||
| 518 | if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled)) | ||
| 519 | preempt(pfp); | ||
| 520 | |||
| 521 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
| 522 | } | ||
| 523 | |||
| 524 | /* ******************** SRP support ************************ */ | ||
| 525 | |||
| 526 | static unsigned int pfp_get_srp_prio(struct task_struct* t) | ||
| 527 | { | ||
| 528 | return get_priority(t); | ||
| 529 | } | ||
| 530 | |||
| 531 | /* ******************** FMLP support ********************** */ | ||
| 532 | |||
| 533 | struct fmlp_semaphore { | ||
| 534 | struct litmus_lock litmus_lock; | ||
| 535 | |||
| 536 | /* current resource holder */ | ||
| 537 | struct task_struct *owner; | ||
| 538 | |||
| 539 | /* FIFO queue of waiting tasks */ | ||
| 540 | wait_queue_head_t wait; | ||
| 541 | }; | ||
| 542 | |||
| 543 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
| 544 | { | ||
| 545 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
| 546 | } | ||
| 547 | int pfp_fmlp_lock(struct litmus_lock* l) | ||
| 548 | { | ||
| 549 | struct task_struct* t = current; | ||
| 550 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 551 | wait_queue_t wait; | ||
| 552 | unsigned long flags; | ||
| 553 | lt_t time_of_request; | ||
| 554 | |||
| 555 | if (!is_realtime(t)) | ||
| 556 | return -EPERM; | ||
| 557 | |||
| 558 | /* prevent nested lock acquisition --- not supported by FMLP */ | ||
| 559 | if (tsk_rt(t)->num_locks_held || | ||
| 560 | tsk_rt(t)->num_local_locks_held) | ||
| 561 | return -EBUSY; | ||
| 562 | |||
| 563 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 564 | |||
| 565 | /* tie-break by this point in time */ | ||
| 566 | time_of_request = litmus_clock(); | ||
| 567 | |||
| 568 | /* Priority-boost ourself *before* we suspend so that | ||
| 569 | * our priority is boosted when we resume. */ | ||
| 570 | boost_priority(t, time_of_request); | ||
| 571 | |||
| 572 | if (sem->owner) { | ||
| 573 | /* resource is not free => must suspend and wait */ | ||
| 574 | |||
| 575 | init_waitqueue_entry(&wait, t); | ||
| 576 | |||
| 577 | /* FIXME: interruptible would be nice some day */ | ||
| 578 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
| 579 | |||
| 580 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
| 581 | |||
| 582 | TS_LOCK_SUSPEND; | ||
| 583 | |||
| 584 | /* release lock before sleeping */ | ||
| 585 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 586 | |||
| 587 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
| 588 | * when we wake up; we are guaranteed to have the lock since | ||
| 589 | * there is only one wake up per release. | ||
| 590 | */ | ||
| 591 | |||
| 592 | schedule(); | ||
| 593 | |||
| 594 | TS_LOCK_RESUME; | ||
| 595 | |||
| 596 | /* Since we hold the lock, no other task will change | ||
| 597 | * ->owner. We can thus check it without acquiring the spin | ||
| 598 | * lock. */ | ||
| 599 | BUG_ON(sem->owner != t); | ||
| 600 | } else { | ||
| 601 | /* it's ours now */ | ||
| 602 | sem->owner = t; | ||
| 603 | |||
| 604 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 605 | } | ||
| 606 | |||
| 607 | tsk_rt(t)->num_locks_held++; | ||
| 608 | |||
| 609 | return 0; | ||
| 610 | } | ||
| 611 | |||
| 612 | int pfp_fmlp_unlock(struct litmus_lock* l) | ||
| 613 | { | ||
| 614 | struct task_struct *t = current, *next; | ||
| 615 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 616 | unsigned long flags; | ||
| 617 | int err = 0; | ||
| 618 | |||
| 619 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 620 | |||
| 621 | if (sem->owner != t) { | ||
| 622 | err = -EINVAL; | ||
| 623 | goto out; | ||
| 624 | } | ||
| 625 | |||
| 626 | tsk_rt(t)->num_locks_held--; | ||
| 627 | |||
| 628 | /* we lose the benefit of priority boosting */ | ||
| 629 | |||
| 630 | unboost_priority(t); | ||
| 631 | |||
| 632 | /* check if there are jobs waiting for this resource */ | ||
| 633 | next = __waitqueue_remove_first(&sem->wait); | ||
| 634 | if (next) { | ||
| 635 | /* next becomes the resouce holder */ | ||
| 636 | sem->owner = next; | ||
| 637 | |||
| 638 | /* Wake up next. The waiting job is already priority-boosted. */ | ||
| 639 | wake_up_process(next); | ||
| 640 | } else | ||
| 641 | /* resource becomes available */ | ||
| 642 | sem->owner = NULL; | ||
| 643 | |||
| 644 | out: | ||
| 645 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 646 | return err; | ||
| 647 | } | ||
| 648 | |||
| 649 | int pfp_fmlp_close(struct litmus_lock* l) | ||
| 650 | { | ||
| 651 | struct task_struct *t = current; | ||
| 652 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 653 | unsigned long flags; | ||
| 654 | |||
| 655 | int owner; | ||
| 656 | |||
| 657 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 658 | |||
| 659 | owner = sem->owner == t; | ||
| 660 | |||
| 661 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 662 | |||
| 663 | if (owner) | ||
| 664 | pfp_fmlp_unlock(l); | ||
| 665 | |||
| 666 | return 0; | ||
| 667 | } | ||
| 668 | |||
| 669 | void pfp_fmlp_free(struct litmus_lock* lock) | ||
| 670 | { | ||
| 671 | kfree(fmlp_from_lock(lock)); | ||
| 672 | } | ||
| 673 | |||
| 674 | static struct litmus_lock_ops pfp_fmlp_lock_ops = { | ||
| 675 | .close = pfp_fmlp_close, | ||
| 676 | .lock = pfp_fmlp_lock, | ||
| 677 | .unlock = pfp_fmlp_unlock, | ||
| 678 | .deallocate = pfp_fmlp_free, | ||
| 679 | }; | ||
| 680 | |||
| 681 | static struct litmus_lock* pfp_new_fmlp(void) | ||
| 682 | { | ||
| 683 | struct fmlp_semaphore* sem; | ||
| 684 | |||
| 685 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 686 | if (!sem) | ||
| 687 | return NULL; | ||
| 688 | |||
| 689 | sem->owner = NULL; | ||
| 690 | init_waitqueue_head(&sem->wait); | ||
| 691 | sem->litmus_lock.ops = &pfp_fmlp_lock_ops; | ||
| 692 | |||
| 693 | return &sem->litmus_lock; | ||
| 694 | } | ||
| 695 | |||
| 696 | /* ******************** MPCP support ********************** */ | ||
| 697 | |||
| 698 | struct mpcp_semaphore { | ||
| 699 | struct litmus_lock litmus_lock; | ||
| 700 | |||
| 701 | /* current resource holder */ | ||
| 702 | struct task_struct *owner; | ||
| 703 | |||
| 704 | /* priority queue of waiting tasks */ | ||
| 705 | wait_queue_head_t wait; | ||
| 706 | |||
| 707 | /* priority ceiling per cpu */ | ||
| 708 | unsigned int prio_ceiling[NR_CPUS]; | ||
| 709 | |||
| 710 | /* should jobs spin "virtually" for this resource? */ | ||
| 711 | int vspin; | ||
| 712 | }; | ||
| 713 | |||
| 714 | #define OMEGA_CEILING UINT_MAX | ||
| 715 | |||
| 716 | /* Since jobs spin "virtually" while waiting to acquire a lock, | ||
| 717 | * they first must aquire a local per-cpu resource. | ||
| 718 | */ | ||
| 719 | static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait); | ||
| 720 | static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin); | ||
| 721 | |||
| 722 | /* called with preemptions off <=> no local modifications */ | ||
| 723 | static void mpcp_vspin_enter(void) | ||
| 724 | { | ||
| 725 | struct task_struct* t = current; | ||
| 726 | |||
| 727 | while (1) { | ||
| 728 | if (__get_cpu_var(mpcpvs_vspin) == NULL) { | ||
| 729 | /* good, we get to issue our request */ | ||
| 730 | __get_cpu_var(mpcpvs_vspin) = t; | ||
| 731 | break; | ||
| 732 | } else { | ||
| 733 | /* some job is spinning => enqueue in request queue */ | ||
| 734 | prio_wait_queue_t wait; | ||
| 735 | wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait); | ||
| 736 | unsigned long flags; | ||
| 737 | |||
| 738 | /* ordered by regular priority */ | ||
| 739 | init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t))); | ||
| 740 | |||
| 741 | spin_lock_irqsave(&vspin->lock, flags); | ||
| 742 | |||
| 743 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
| 744 | |||
| 745 | __add_wait_queue_prio_exclusive(vspin, &wait); | ||
| 746 | |||
| 747 | spin_unlock_irqrestore(&vspin->lock, flags); | ||
| 748 | |||
| 749 | TS_LOCK_SUSPEND; | ||
| 750 | |||
| 751 | preempt_enable_no_resched(); | ||
| 752 | |||
| 753 | schedule(); | ||
| 754 | |||
| 755 | preempt_disable(); | ||
| 756 | |||
| 757 | TS_LOCK_RESUME; | ||
| 758 | /* Recheck if we got it --- some higher-priority process might | ||
| 759 | * have swooped in. */ | ||
| 760 | } | ||
| 761 | } | ||
| 762 | /* ok, now it is ours */ | ||
| 763 | } | ||
| 764 | |||
| 765 | /* called with preemptions off */ | ||
| 766 | static void mpcp_vspin_exit(void) | ||
| 767 | { | ||
| 768 | struct task_struct* t = current, *next; | ||
| 769 | unsigned long flags; | ||
| 770 | wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait); | ||
| 771 | |||
| 772 | BUG_ON(__get_cpu_var(mpcpvs_vspin) != t); | ||
| 773 | |||
| 774 | /* no spinning job */ | ||
| 775 | __get_cpu_var(mpcpvs_vspin) = NULL; | ||
| 776 | |||
| 777 | /* see if anyone is waiting for us to stop "spinning" */ | ||
| 778 | spin_lock_irqsave(&vspin->lock, flags); | ||
| 779 | next = __waitqueue_remove_first(vspin); | ||
| 780 | |||
| 781 | if (next) | ||
| 782 | wake_up_process(next); | ||
| 783 | |||
| 784 | spin_unlock_irqrestore(&vspin->lock, flags); | ||
| 785 | } | ||
| 786 | |||
| 787 | static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock) | ||
| 788 | { | ||
| 789 | return container_of(lock, struct mpcp_semaphore, litmus_lock); | ||
| 790 | } | ||
| 791 | |||
| 792 | int pfp_mpcp_lock(struct litmus_lock* l) | ||
| 793 | { | ||
| 794 | struct task_struct* t = current; | ||
| 795 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
| 796 | prio_wait_queue_t wait; | ||
| 797 | unsigned long flags; | ||
| 798 | |||
| 799 | if (!is_realtime(t)) | ||
| 800 | return -EPERM; | ||
| 801 | |||
| 802 | /* prevent nested lock acquisition */ | ||
| 803 | if (tsk_rt(t)->num_locks_held || | ||
| 804 | tsk_rt(t)->num_local_locks_held) | ||
| 805 | return -EBUSY; | ||
| 806 | |||
| 807 | preempt_disable(); | ||
| 808 | |||
| 809 | if (sem->vspin) | ||
| 810 | mpcp_vspin_enter(); | ||
| 811 | |||
| 812 | /* Priority-boost ourself *before* we suspend so that | ||
| 813 | * our priority is boosted when we resume. Use the priority | ||
| 814 | * ceiling for the local partition. */ | ||
| 815 | boost_priority(t, sem->prio_ceiling[get_partition(t)]); | ||
| 816 | |||
| 817 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 818 | |||
| 819 | preempt_enable_no_resched(); | ||
| 820 | |||
| 821 | if (sem->owner) { | ||
| 822 | /* resource is not free => must suspend and wait */ | ||
| 823 | |||
| 824 | /* ordered by regular priority */ | ||
| 825 | init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t))); | ||
| 826 | |||
| 827 | /* FIXME: interruptible would be nice some day */ | ||
| 828 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
| 829 | |||
| 830 | __add_wait_queue_prio_exclusive(&sem->wait, &wait); | ||
| 831 | |||
| 832 | TS_LOCK_SUSPEND; | ||
| 833 | |||
| 834 | /* release lock before sleeping */ | ||
| 835 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 836 | |||
| 837 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
| 838 | * when we wake up; we are guaranteed to have the lock since | ||
| 839 | * there is only one wake up per release. | ||
| 840 | */ | ||
| 841 | |||
| 842 | schedule(); | ||
| 843 | |||
| 844 | TS_LOCK_RESUME; | ||
| 845 | |||
| 846 | /* Since we hold the lock, no other task will change | ||
| 847 | * ->owner. We can thus check it without acquiring the spin | ||
| 848 | * lock. */ | ||
| 849 | BUG_ON(sem->owner != t); | ||
| 850 | } else { | ||
| 851 | /* it's ours now */ | ||
| 852 | sem->owner = t; | ||
| 853 | |||
| 854 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 855 | } | ||
| 856 | |||
| 857 | tsk_rt(t)->num_locks_held++; | ||
| 858 | |||
| 859 | return 0; | ||
| 860 | } | ||
| 861 | |||
| 862 | int pfp_mpcp_unlock(struct litmus_lock* l) | ||
| 863 | { | ||
| 864 | struct task_struct *t = current, *next; | ||
| 865 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
| 866 | unsigned long flags; | ||
| 867 | int err = 0; | ||
| 868 | |||
| 869 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 870 | |||
| 871 | if (sem->owner != t) { | ||
| 872 | err = -EINVAL; | ||
| 873 | goto out; | ||
| 874 | } | ||
| 875 | |||
| 876 | |||
| 877 | tsk_rt(t)->num_locks_held--; | ||
| 878 | |||
| 879 | /* we lose the benefit of priority boosting */ | ||
| 880 | |||
| 881 | unboost_priority(t); | ||
| 882 | |||
| 883 | /* check if there are jobs waiting for this resource */ | ||
| 884 | next = __waitqueue_remove_first(&sem->wait); | ||
| 885 | if (next) { | ||
| 886 | /* next becomes the resouce holder */ | ||
| 887 | sem->owner = next; | ||
| 888 | |||
| 889 | /* Wake up next. The waiting job is already priority-boosted. */ | ||
| 890 | wake_up_process(next); | ||
| 891 | } else | ||
| 892 | /* resource becomes available */ | ||
| 893 | sem->owner = NULL; | ||
| 894 | |||
| 895 | out: | ||
| 896 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 897 | |||
| 898 | if (sem->vspin && err == 0) { | ||
| 899 | preempt_disable(); | ||
| 900 | mpcp_vspin_exit(); | ||
| 901 | preempt_enable(); | ||
| 902 | } | ||
| 903 | |||
| 904 | return err; | ||
| 905 | } | ||
| 906 | |||
| 907 | int pfp_mpcp_open(struct litmus_lock* l, void* config) | ||
| 908 | { | ||
| 909 | struct task_struct *t = current; | ||
| 910 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
| 911 | int cpu, local_cpu; | ||
| 912 | unsigned long flags; | ||
| 913 | |||
| 914 | if (!is_realtime(t)) | ||
| 915 | /* we need to know the real-time priority */ | ||
| 916 | return -EPERM; | ||
| 917 | |||
| 918 | local_cpu = get_partition(t); | ||
| 919 | |||
| 920 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 921 | |||
| 922 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
| 923 | if (cpu != local_cpu) | ||
| 924 | { | ||
| 925 | sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu], | ||
| 926 | get_priority(t)); | ||
| 927 | TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n", | ||
| 928 | sem, sem->prio_ceiling[cpu], cpu); | ||
| 929 | } | ||
| 930 | |||
| 931 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 932 | |||
| 933 | return 0; | ||
| 934 | } | ||
| 935 | |||
| 936 | int pfp_mpcp_close(struct litmus_lock* l) | ||
| 937 | { | ||
| 938 | struct task_struct *t = current; | ||
| 939 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
| 940 | unsigned long flags; | ||
| 941 | |||
| 942 | int owner; | ||
| 943 | |||
| 944 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 945 | |||
| 946 | owner = sem->owner == t; | ||
| 947 | |||
| 948 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 949 | |||
| 950 | if (owner) | ||
| 951 | pfp_mpcp_unlock(l); | ||
| 952 | |||
| 953 | return 0; | ||
| 954 | } | ||
| 955 | |||
| 956 | void pfp_mpcp_free(struct litmus_lock* lock) | ||
| 957 | { | ||
| 958 | kfree(mpcp_from_lock(lock)); | ||
| 959 | } | ||
| 960 | |||
| 961 | static struct litmus_lock_ops pfp_mpcp_lock_ops = { | ||
| 962 | .close = pfp_mpcp_close, | ||
| 963 | .lock = pfp_mpcp_lock, | ||
| 964 | .open = pfp_mpcp_open, | ||
| 965 | .unlock = pfp_mpcp_unlock, | ||
| 966 | .deallocate = pfp_mpcp_free, | ||
| 967 | }; | ||
| 968 | |||
| 969 | static struct litmus_lock* pfp_new_mpcp(int vspin) | ||
| 970 | { | ||
| 971 | struct mpcp_semaphore* sem; | ||
| 972 | int cpu; | ||
| 973 | |||
| 974 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 975 | if (!sem) | ||
| 976 | return NULL; | ||
| 977 | |||
| 978 | sem->owner = NULL; | ||
| 979 | init_waitqueue_head(&sem->wait); | ||
| 980 | sem->litmus_lock.ops = &pfp_mpcp_lock_ops; | ||
| 981 | |||
| 982 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
| 983 | sem->prio_ceiling[cpu] = OMEGA_CEILING; | ||
| 984 | |||
| 985 | /* mark as virtual spinning */ | ||
| 986 | sem->vspin = vspin; | ||
| 987 | |||
| 988 | return &sem->litmus_lock; | ||
| 989 | } | ||
| 990 | |||
| 991 | |||
| 992 | /* ******************** PCP support ********************** */ | ||
| 993 | |||
| 994 | |||
| 995 | struct pcp_semaphore { | ||
| 996 | struct litmus_lock litmus_lock; | ||
| 997 | |||
| 998 | struct list_head ceiling; | ||
| 999 | |||
| 1000 | /* current resource holder */ | ||
| 1001 | struct task_struct *owner; | ||
| 1002 | |||
| 1003 | /* priority ceiling --- can be negative due to DPCP support */ | ||
| 1004 | int prio_ceiling; | ||
| 1005 | |||
| 1006 | /* on which processor is this PCP semaphore allocated? */ | ||
| 1007 | int on_cpu; | ||
| 1008 | }; | ||
| 1009 | |||
| 1010 | static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock) | ||
| 1011 | { | ||
| 1012 | return container_of(lock, struct pcp_semaphore, litmus_lock); | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | |||
| 1016 | struct pcp_state { | ||
| 1017 | struct list_head system_ceiling; | ||
| 1018 | |||
| 1019 | /* highest-priority waiting task */ | ||
| 1020 | struct task_struct* hp_waiter; | ||
| 1021 | |||
| 1022 | /* list of jobs waiting to get past the system ceiling */ | ||
| 1023 | wait_queue_head_t ceiling_blocked; | ||
| 1024 | }; | ||
| 1025 | |||
| 1026 | static void pcp_init_state(struct pcp_state* s) | ||
| 1027 | { | ||
| 1028 | INIT_LIST_HEAD(&s->system_ceiling); | ||
| 1029 | s->hp_waiter = NULL; | ||
| 1030 | init_waitqueue_head(&s->ceiling_blocked); | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | static DEFINE_PER_CPU(struct pcp_state, pcp_state); | ||
| 1034 | |||
| 1035 | /* assumes preemptions are off */ | ||
| 1036 | static struct pcp_semaphore* pcp_get_ceiling(void) | ||
| 1037 | { | ||
| 1038 | struct list_head* top = __get_cpu_var(pcp_state).system_ceiling.next; | ||
| 1039 | |||
| 1040 | if (top) | ||
| 1041 | return list_entry(top, struct pcp_semaphore, ceiling); | ||
| 1042 | else | ||
| 1043 | return NULL; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | /* assumes preempt off */ | ||
| 1047 | static void pcp_add_ceiling(struct pcp_semaphore* sem) | ||
| 1048 | { | ||
| 1049 | struct list_head *pos; | ||
| 1050 | struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling; | ||
| 1051 | struct pcp_semaphore* held; | ||
| 1052 | |||
| 1053 | BUG_ON(sem->on_cpu != smp_processor_id()); | ||
| 1054 | BUG_ON(in_list(&sem->ceiling)); | ||
| 1055 | |||
| 1056 | list_for_each(pos, in_use) { | ||
| 1057 | held = list_entry(pos, struct pcp_semaphore, ceiling); | ||
| 1058 | if (held->prio_ceiling >= sem->prio_ceiling) { | ||
| 1059 | __list_add(&sem->ceiling, pos->prev, pos); | ||
| 1060 | return; | ||
| 1061 | } | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | /* we hit the end of the list */ | ||
| 1065 | |||
| 1066 | list_add_tail(&sem->ceiling, in_use); | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | /* assumes preempt off */ | ||
| 1070 | static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling, | ||
| 1071 | struct task_struct* task, | ||
| 1072 | int effective_prio) | ||
| 1073 | { | ||
| 1074 | return ceiling == NULL || | ||
| 1075 | ceiling->prio_ceiling > effective_prio || | ||
| 1076 | ceiling->owner == task; | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | /* assumes preempt off */ | ||
| 1080 | static void pcp_priority_inheritance(void) | ||
| 1081 | { | ||
| 1082 | unsigned long flags; | ||
| 1083 | pfp_domain_t* pfp = local_pfp; | ||
| 1084 | |||
| 1085 | struct pcp_semaphore* ceiling = pcp_get_ceiling(); | ||
| 1086 | struct task_struct *blocker, *blocked; | ||
| 1087 | |||
| 1088 | blocker = ceiling ? ceiling->owner : NULL; | ||
| 1089 | blocked = __get_cpu_var(pcp_state).hp_waiter; | ||
| 1090 | |||
| 1091 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
| 1092 | |||
| 1093 | /* Current is no longer inheriting anything by default. This should be | ||
| 1094 | * the currently scheduled job, and hence not currently queued. */ | ||
| 1095 | BUG_ON(current != pfp->scheduled); | ||
| 1096 | |||
| 1097 | fp_set_prio_inh(pfp, current, NULL); | ||
| 1098 | fp_set_prio_inh(pfp, blocked, NULL); | ||
| 1099 | fp_set_prio_inh(pfp, blocker, NULL); | ||
| 1100 | |||
| 1101 | |||
| 1102 | /* Let blocking job inherit priority of blocked job, if required. */ | ||
| 1103 | if (blocker && blocked && | ||
| 1104 | fp_higher_prio(blocked, blocker)) { | ||
| 1105 | TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n", | ||
| 1106 | blocked->comm, blocked->pid, | ||
| 1107 | get_priority(blocker), get_priority(blocked)); | ||
| 1108 | fp_set_prio_inh(pfp, blocker, blocked); | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | /* Check if anything changed. If the blocked job is current, then it is | ||
| 1112 | * just blocking and hence is going to call the scheduler anyway. */ | ||
| 1113 | if (blocked != current && | ||
| 1114 | fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) | ||
| 1115 | preempt(pfp); | ||
| 1116 | |||
| 1117 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | /* called with preemptions off */ | ||
| 1121 | static void pcp_raise_ceiling(struct pcp_semaphore* sem, | ||
| 1122 | int effective_prio) | ||
| 1123 | { | ||
| 1124 | struct task_struct* t = current; | ||
| 1125 | struct pcp_semaphore* ceiling; | ||
| 1126 | prio_wait_queue_t wait; | ||
| 1127 | unsigned int waiting_higher_prio; | ||
| 1128 | |||
| 1129 | do { | ||
| 1130 | ceiling = pcp_get_ceiling(); | ||
| 1131 | if (pcp_exceeds_ceiling(ceiling, t, effective_prio)) | ||
| 1132 | break; | ||
| 1133 | |||
| 1134 | TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n", | ||
| 1135 | sem, ceiling->owner->comm, ceiling->owner->pid); | ||
| 1136 | |||
| 1137 | /* we need to wait until the ceiling is lowered */ | ||
| 1138 | |||
| 1139 | /* enqueue in priority order */ | ||
| 1140 | init_prio_waitqueue_entry(&wait, t, prio_point(effective_prio)); | ||
| 1141 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
| 1142 | waiting_higher_prio = add_wait_queue_prio_exclusive( | ||
| 1143 | &__get_cpu_var(pcp_state).ceiling_blocked, &wait); | ||
| 1144 | |||
| 1145 | if (waiting_higher_prio == 0) { | ||
| 1146 | TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n"); | ||
| 1147 | |||
| 1148 | /* we are the new highest-priority waiting job | ||
| 1149 | * => update inheritance */ | ||
| 1150 | __get_cpu_var(pcp_state).hp_waiter = t; | ||
| 1151 | pcp_priority_inheritance(); | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | TS_LOCK_SUSPEND; | ||
| 1155 | |||
| 1156 | preempt_enable_no_resched(); | ||
| 1157 | schedule(); | ||
| 1158 | preempt_disable(); | ||
| 1159 | |||
| 1160 | /* pcp_resume_unblocked() removed us from wait queue */ | ||
| 1161 | |||
| 1162 | TS_LOCK_RESUME; | ||
| 1163 | } while(1); | ||
| 1164 | |||
| 1165 | TRACE_CUR("PCP got the ceiling and sem %p\n", sem); | ||
| 1166 | |||
| 1167 | /* We are good to go. The semaphore should be available. */ | ||
| 1168 | BUG_ON(sem->owner != NULL); | ||
| 1169 | |||
| 1170 | sem->owner = t; | ||
| 1171 | |||
| 1172 | pcp_add_ceiling(sem); | ||
| 1173 | } | ||
| 1174 | |||
| 1175 | static void pcp_resume_unblocked(void) | ||
| 1176 | { | ||
| 1177 | wait_queue_head_t *blocked = &__get_cpu_var(pcp_state).ceiling_blocked; | ||
| 1178 | unsigned long flags; | ||
| 1179 | prio_wait_queue_t* q; | ||
| 1180 | struct task_struct* t = NULL; | ||
| 1181 | |||
| 1182 | struct pcp_semaphore* ceiling = pcp_get_ceiling(); | ||
| 1183 | |||
| 1184 | spin_lock_irqsave(&blocked->lock, flags); | ||
| 1185 | |||
| 1186 | while (waitqueue_active(blocked)) { | ||
| 1187 | /* check first == highest-priority waiting job */ | ||
| 1188 | q = list_entry(blocked->task_list.next, | ||
| 1189 | prio_wait_queue_t, wq.task_list); | ||
| 1190 | t = (struct task_struct*) q->wq.private; | ||
| 1191 | |||
| 1192 | /* can it proceed now? => let it go */ | ||
| 1193 | if (pcp_exceeds_ceiling(ceiling, t, | ||
| 1194 | prio_from_point(q->priority))) { | ||
| 1195 | __remove_wait_queue(blocked, &q->wq); | ||
| 1196 | wake_up_process(t); | ||
| 1197 | } else { | ||
| 1198 | /* We are done. Update highest-priority waiter. */ | ||
| 1199 | __get_cpu_var(pcp_state).hp_waiter = t; | ||
| 1200 | goto out; | ||
| 1201 | } | ||
| 1202 | } | ||
| 1203 | /* If we get here, then there are no more waiting | ||
| 1204 | * jobs. */ | ||
| 1205 | __get_cpu_var(pcp_state).hp_waiter = NULL; | ||
| 1206 | out: | ||
| 1207 | spin_unlock_irqrestore(&blocked->lock, flags); | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | /* assumes preempt off */ | ||
| 1211 | static void pcp_lower_ceiling(struct pcp_semaphore* sem) | ||
| 1212 | { | ||
| 1213 | BUG_ON(!in_list(&sem->ceiling)); | ||
| 1214 | BUG_ON(sem->owner != current); | ||
| 1215 | BUG_ON(sem->on_cpu != smp_processor_id()); | ||
| 1216 | |||
| 1217 | /* remove from ceiling list */ | ||
| 1218 | list_del(&sem->ceiling); | ||
| 1219 | |||
| 1220 | /* release */ | ||
| 1221 | sem->owner = NULL; | ||
| 1222 | |||
| 1223 | TRACE_CUR("PCP released sem %p\n", sem); | ||
| 1224 | |||
| 1225 | pcp_priority_inheritance(); | ||
| 1226 | |||
| 1227 | /* Wake up all ceiling-blocked jobs that now pass the ceiling. */ | ||
| 1228 | pcp_resume_unblocked(); | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | static void pcp_update_prio_ceiling(struct pcp_semaphore* sem, | ||
| 1232 | int effective_prio) | ||
| 1233 | { | ||
| 1234 | /* This needs to be synchronized on something. | ||
| 1235 | * Might as well use waitqueue lock for the processor. | ||
| 1236 | * We assume this happens only before the task set starts execution, | ||
| 1237 | * (i.e., during initialization), but it may happen on multiple processors | ||
| 1238 | * at the same time. | ||
| 1239 | */ | ||
| 1240 | unsigned long flags; | ||
| 1241 | |||
| 1242 | struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu); | ||
| 1243 | |||
| 1244 | spin_lock_irqsave(&s->ceiling_blocked.lock, flags); | ||
| 1245 | |||
| 1246 | sem->prio_ceiling = min(sem->prio_ceiling, effective_prio); | ||
| 1247 | |||
| 1248 | spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu) | ||
| 1252 | { | ||
| 1253 | sem->owner = NULL; | ||
| 1254 | INIT_LIST_HEAD(&sem->ceiling); | ||
| 1255 | sem->prio_ceiling = INT_MAX; | ||
| 1256 | sem->on_cpu = cpu; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | int pfp_pcp_lock(struct litmus_lock* l) | ||
| 1260 | { | ||
| 1261 | struct task_struct* t = current; | ||
| 1262 | struct pcp_semaphore *sem = pcp_from_lock(l); | ||
| 1263 | |||
| 1264 | int eprio = effective_agent_priority(get_priority(t)); | ||
| 1265 | int from = get_partition(t); | ||
| 1266 | int to = sem->on_cpu; | ||
| 1267 | |||
| 1268 | if (!is_realtime(t) || from != to) | ||
| 1269 | return -EPERM; | ||
| 1270 | |||
| 1271 | /* prevent nested lock acquisition in global critical section */ | ||
| 1272 | if (tsk_rt(t)->num_locks_held) | ||
| 1273 | return -EBUSY; | ||
| 1274 | |||
| 1275 | preempt_disable(); | ||
| 1276 | |||
| 1277 | pcp_raise_ceiling(sem, eprio); | ||
| 1278 | |||
| 1279 | preempt_enable(); | ||
| 1280 | |||
| 1281 | tsk_rt(t)->num_local_locks_held++; | ||
| 1282 | |||
| 1283 | return 0; | ||
| 1284 | } | ||
| 1285 | |||
| 1286 | int pfp_pcp_unlock(struct litmus_lock* l) | ||
| 1287 | { | ||
| 1288 | struct task_struct *t = current; | ||
| 1289 | struct pcp_semaphore *sem = pcp_from_lock(l); | ||
| 1290 | |||
| 1291 | int err = 0; | ||
| 1292 | |||
| 1293 | preempt_disable(); | ||
| 1294 | |||
| 1295 | if (sem->on_cpu != smp_processor_id() || sem->owner != t) { | ||
| 1296 | err = -EINVAL; | ||
| 1297 | goto out; | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | tsk_rt(t)->num_local_locks_held--; | ||
| 1301 | |||
| 1302 | /* give it back */ | ||
| 1303 | pcp_lower_ceiling(sem); | ||
| 1304 | |||
| 1305 | out: | ||
| 1306 | preempt_enable(); | ||
| 1307 | |||
| 1308 | return err; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | int pfp_pcp_open(struct litmus_lock* l, void* __user config) | ||
| 1312 | { | ||
| 1313 | struct task_struct *t = current; | ||
| 1314 | struct pcp_semaphore *sem = pcp_from_lock(l); | ||
| 1315 | |||
| 1316 | int cpu, eprio; | ||
| 1317 | |||
| 1318 | if (!is_realtime(t)) | ||
| 1319 | /* we need to know the real-time priority */ | ||
| 1320 | return -EPERM; | ||
| 1321 | |||
| 1322 | if (!config) | ||
| 1323 | cpu = get_partition(t); | ||
| 1324 | else if (get_user(cpu, (int*) config)) | ||
| 1325 | return -EFAULT; | ||
| 1326 | |||
| 1327 | /* make sure the resource location matches */ | ||
| 1328 | if (cpu != sem->on_cpu) | ||
| 1329 | return -EINVAL; | ||
| 1330 | |||
| 1331 | eprio = effective_agent_priority(get_priority(t)); | ||
| 1332 | |||
| 1333 | pcp_update_prio_ceiling(sem, eprio); | ||
| 1334 | |||
| 1335 | return 0; | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | int pfp_pcp_close(struct litmus_lock* l) | ||
| 1339 | { | ||
| 1340 | struct task_struct *t = current; | ||
| 1341 | struct pcp_semaphore *sem = pcp_from_lock(l); | ||
| 1342 | |||
| 1343 | int owner = 0; | ||
| 1344 | |||
| 1345 | preempt_disable(); | ||
| 1346 | |||
| 1347 | if (sem->on_cpu == smp_processor_id()) | ||
| 1348 | owner = sem->owner == t; | ||
| 1349 | |||
| 1350 | preempt_enable(); | ||
| 1351 | |||
| 1352 | if (owner) | ||
| 1353 | pfp_pcp_unlock(l); | ||
| 1354 | |||
| 1355 | return 0; | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | void pfp_pcp_free(struct litmus_lock* lock) | ||
| 1359 | { | ||
| 1360 | kfree(pcp_from_lock(lock)); | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | |||
| 1364 | static struct litmus_lock_ops pfp_pcp_lock_ops = { | ||
| 1365 | .close = pfp_pcp_close, | ||
| 1366 | .lock = pfp_pcp_lock, | ||
| 1367 | .open = pfp_pcp_open, | ||
| 1368 | .unlock = pfp_pcp_unlock, | ||
| 1369 | .deallocate = pfp_pcp_free, | ||
| 1370 | }; | ||
| 1371 | |||
| 1372 | |||
| 1373 | static struct litmus_lock* pfp_new_pcp(int on_cpu) | ||
| 1374 | { | ||
| 1375 | struct pcp_semaphore* sem; | ||
| 1376 | |||
| 1377 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 1378 | if (!sem) | ||
| 1379 | return NULL; | ||
| 1380 | |||
| 1381 | sem->litmus_lock.ops = &pfp_pcp_lock_ops; | ||
| 1382 | pcp_init_semaphore(sem, on_cpu); | ||
| 1383 | |||
| 1384 | return &sem->litmus_lock; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | /* ******************** DPCP support ********************** */ | ||
| 1388 | |||
| 1389 | struct dpcp_semaphore { | ||
| 1390 | struct litmus_lock litmus_lock; | ||
| 1391 | struct pcp_semaphore pcp; | ||
| 1392 | int owner_cpu; | ||
| 1393 | }; | ||
| 1394 | |||
| 1395 | static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock) | ||
| 1396 | { | ||
| 1397 | return container_of(lock, struct dpcp_semaphore, litmus_lock); | ||
| 1398 | } | ||
| 1399 | |||
| 1400 | /* called with preemptions disabled */ | ||
| 1401 | static void pfp_migrate_to(int target_cpu) | ||
| 1402 | { | ||
| 1403 | struct task_struct* t = current; | ||
| 1404 | pfp_domain_t *from; | ||
| 1405 | |||
| 1406 | if (get_partition(t) == target_cpu) | ||
| 1407 | return; | ||
| 1408 | |||
| 1409 | /* make sure target_cpu makes sense */ | ||
| 1410 | BUG_ON(!cpu_online(target_cpu)); | ||
| 1411 | |||
| 1412 | local_irq_disable(); | ||
| 1413 | |||
| 1414 | /* scheduled task should not be in any ready or release queue */ | ||
| 1415 | BUG_ON(is_queued(t)); | ||
| 1416 | |||
| 1417 | /* lock both pfp domains in order of address */ | ||
| 1418 | from = task_pfp(t); | ||
| 1419 | |||
| 1420 | raw_spin_lock(&from->slock); | ||
| 1421 | |||
| 1422 | /* switch partitions */ | ||
| 1423 | tsk_rt(t)->task_params.cpu = target_cpu; | ||
| 1424 | |||
| 1425 | raw_spin_unlock(&from->slock); | ||
| 1426 | |||
| 1427 | /* Don't trace scheduler costs as part of | ||
| 1428 | * locking overhead. Scheduling costs are accounted for | ||
| 1429 | * explicitly. */ | ||
| 1430 | TS_LOCK_SUSPEND; | ||
| 1431 | |||
| 1432 | local_irq_enable(); | ||
| 1433 | preempt_enable_no_resched(); | ||
| 1434 | |||
| 1435 | /* deschedule to be migrated */ | ||
| 1436 | schedule(); | ||
| 1437 | |||
| 1438 | /* we are now on the target processor */ | ||
| 1439 | preempt_disable(); | ||
| 1440 | |||
| 1441 | /* start recording costs again */ | ||
| 1442 | TS_LOCK_RESUME; | ||
| 1443 | |||
| 1444 | BUG_ON(smp_processor_id() != target_cpu); | ||
| 1445 | } | ||
| 1446 | |||
| 1447 | int pfp_dpcp_lock(struct litmus_lock* l) | ||
| 1448 | { | ||
| 1449 | struct task_struct* t = current; | ||
| 1450 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
| 1451 | int eprio = effective_agent_priority(get_priority(t)); | ||
| 1452 | int from = get_partition(t); | ||
| 1453 | int to = sem->pcp.on_cpu; | ||
| 1454 | |||
| 1455 | if (!is_realtime(t)) | ||
| 1456 | return -EPERM; | ||
| 1457 | |||
| 1458 | /* prevent nested lock accquisition */ | ||
| 1459 | if (tsk_rt(t)->num_locks_held || | ||
| 1460 | tsk_rt(t)->num_local_locks_held) | ||
| 1461 | return -EBUSY; | ||
| 1462 | |||
| 1463 | preempt_disable(); | ||
| 1464 | |||
| 1465 | /* Priority-boost ourself *before* we suspend so that | ||
| 1466 | * our priority is boosted when we resume. */ | ||
| 1467 | |||
| 1468 | boost_priority(t, get_priority(t)); | ||
| 1469 | |||
| 1470 | pfp_migrate_to(to); | ||
| 1471 | |||
| 1472 | pcp_raise_ceiling(&sem->pcp, eprio); | ||
| 1473 | |||
| 1474 | /* yep, we got it => execute request */ | ||
| 1475 | sem->owner_cpu = from; | ||
| 1476 | |||
| 1477 | preempt_enable(); | ||
| 1478 | |||
| 1479 | tsk_rt(t)->num_locks_held++; | ||
| 1480 | |||
| 1481 | return 0; | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | int pfp_dpcp_unlock(struct litmus_lock* l) | ||
| 1485 | { | ||
| 1486 | struct task_struct *t = current; | ||
| 1487 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
| 1488 | int err = 0; | ||
| 1489 | int home; | ||
| 1490 | |||
| 1491 | preempt_disable(); | ||
| 1492 | |||
| 1493 | if (sem->pcp.on_cpu != smp_processor_id() || sem->pcp.owner != t) { | ||
| 1494 | err = -EINVAL; | ||
| 1495 | goto out; | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | tsk_rt(t)->num_locks_held--; | ||
| 1499 | |||
| 1500 | home = sem->owner_cpu; | ||
| 1501 | |||
| 1502 | /* give it back */ | ||
| 1503 | pcp_lower_ceiling(&sem->pcp); | ||
| 1504 | |||
| 1505 | /* we lose the benefit of priority boosting */ | ||
| 1506 | unboost_priority(t); | ||
| 1507 | |||
| 1508 | pfp_migrate_to(home); | ||
| 1509 | |||
| 1510 | out: | ||
| 1511 | preempt_enable(); | ||
| 1512 | |||
| 1513 | return err; | ||
| 1514 | } | ||
| 1515 | |||
| 1516 | int pfp_dpcp_open(struct litmus_lock* l, void* __user config) | ||
| 1517 | { | ||
| 1518 | struct task_struct *t = current; | ||
| 1519 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
| 1520 | int cpu, eprio; | ||
| 1521 | |||
| 1522 | if (!is_realtime(t)) | ||
| 1523 | /* we need to know the real-time priority */ | ||
| 1524 | return -EPERM; | ||
| 1525 | |||
| 1526 | if (get_user(cpu, (int*) config)) | ||
| 1527 | return -EFAULT; | ||
| 1528 | |||
| 1529 | /* make sure the resource location matches */ | ||
| 1530 | if (cpu != sem->pcp.on_cpu) | ||
| 1531 | return -EINVAL; | ||
| 1532 | |||
| 1533 | eprio = effective_agent_priority(get_priority(t)); | ||
| 1534 | |||
| 1535 | pcp_update_prio_ceiling(&sem->pcp, eprio); | ||
| 1536 | |||
| 1537 | return 0; | ||
| 1538 | } | ||
| 1539 | |||
| 1540 | int pfp_dpcp_close(struct litmus_lock* l) | ||
| 1541 | { | ||
| 1542 | struct task_struct *t = current; | ||
| 1543 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
| 1544 | int owner = 0; | ||
| 1545 | |||
| 1546 | preempt_disable(); | ||
| 1547 | |||
| 1548 | if (sem->pcp.on_cpu == smp_processor_id()) | ||
| 1549 | owner = sem->pcp.owner == t; | ||
| 1550 | |||
| 1551 | preempt_enable(); | ||
| 1552 | |||
| 1553 | if (owner) | ||
| 1554 | pfp_dpcp_unlock(l); | ||
| 1555 | |||
| 1556 | return 0; | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | void pfp_dpcp_free(struct litmus_lock* lock) | ||
| 1560 | { | ||
| 1561 | kfree(dpcp_from_lock(lock)); | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | static struct litmus_lock_ops pfp_dpcp_lock_ops = { | ||
| 1565 | .close = pfp_dpcp_close, | ||
| 1566 | .lock = pfp_dpcp_lock, | ||
| 1567 | .open = pfp_dpcp_open, | ||
| 1568 | .unlock = pfp_dpcp_unlock, | ||
| 1569 | .deallocate = pfp_dpcp_free, | ||
| 1570 | }; | ||
| 1571 | |||
| 1572 | static struct litmus_lock* pfp_new_dpcp(int on_cpu) | ||
| 1573 | { | ||
| 1574 | struct dpcp_semaphore* sem; | ||
| 1575 | |||
| 1576 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 1577 | if (!sem) | ||
| 1578 | return NULL; | ||
| 1579 | |||
| 1580 | sem->litmus_lock.ops = &pfp_dpcp_lock_ops; | ||
| 1581 | sem->owner_cpu = NO_CPU; | ||
| 1582 | pcp_init_semaphore(&sem->pcp, on_cpu); | ||
| 1583 | |||
| 1584 | return &sem->litmus_lock; | ||
| 1585 | } | ||
| 1586 | |||
| 1587 | |||
| 1588 | /* **** lock constructor **** */ | ||
| 1589 | |||
| 1590 | |||
| 1591 | static long pfp_allocate_lock(struct litmus_lock **lock, int type, | ||
| 1592 | void* __user config) | ||
| 1593 | { | ||
| 1594 | int err = -ENXIO, cpu; | ||
| 1595 | struct srp_semaphore* srp; | ||
| 1596 | |||
| 1597 | /* P-FP currently supports the SRP for local resources and the FMLP | ||
| 1598 | * for global resources. */ | ||
| 1599 | switch (type) { | ||
| 1600 | case FMLP_SEM: | ||
| 1601 | /* FIFO Mutex Locking Protocol */ | ||
| 1602 | *lock = pfp_new_fmlp(); | ||
| 1603 | if (*lock) | ||
| 1604 | err = 0; | ||
| 1605 | else | ||
| 1606 | err = -ENOMEM; | ||
| 1607 | break; | ||
| 1608 | |||
| 1609 | case MPCP_SEM: | ||
| 1610 | /* Multiprocesor Priority Ceiling Protocol */ | ||
| 1611 | *lock = pfp_new_mpcp(0); | ||
| 1612 | if (*lock) | ||
| 1613 | err = 0; | ||
| 1614 | else | ||
| 1615 | err = -ENOMEM; | ||
| 1616 | break; | ||
| 1617 | |||
| 1618 | case MPCP_VS_SEM: | ||
| 1619 | /* Multiprocesor Priority Ceiling Protocol with virtual spinning */ | ||
| 1620 | *lock = pfp_new_mpcp(1); | ||
| 1621 | if (*lock) | ||
| 1622 | err = 0; | ||
| 1623 | else | ||
| 1624 | err = -ENOMEM; | ||
| 1625 | break; | ||
| 1626 | |||
| 1627 | case DPCP_SEM: | ||
| 1628 | /* Distributed Priority Ceiling Protocol */ | ||
| 1629 | if (get_user(cpu, (int*) config)) | ||
| 1630 | return -EFAULT; | ||
| 1631 | |||
| 1632 | if (!cpu_online(cpu)) | ||
| 1633 | return -EINVAL; | ||
| 1634 | |||
| 1635 | *lock = pfp_new_dpcp(cpu); | ||
| 1636 | if (*lock) | ||
| 1637 | err = 0; | ||
| 1638 | else | ||
| 1639 | err = -ENOMEM; | ||
| 1640 | break; | ||
| 1641 | |||
| 1642 | case SRP_SEM: | ||
| 1643 | /* Baker's Stack Resource Policy */ | ||
| 1644 | srp = allocate_srp_semaphore(); | ||
| 1645 | if (srp) { | ||
| 1646 | *lock = &srp->litmus_lock; | ||
| 1647 | err = 0; | ||
| 1648 | } else | ||
| 1649 | err = -ENOMEM; | ||
| 1650 | break; | ||
| 1651 | |||
| 1652 | case PCP_SEM: | ||
| 1653 | /* Priority Ceiling Protocol */ | ||
| 1654 | if (!config) | ||
| 1655 | cpu = get_partition(current); | ||
| 1656 | else if (get_user(cpu, (int*) config)) | ||
| 1657 | return -EFAULT; | ||
| 1658 | |||
| 1659 | if (!cpu_online(cpu)) | ||
| 1660 | return -EINVAL; | ||
| 1661 | |||
| 1662 | *lock = pfp_new_pcp(cpu); | ||
| 1663 | if (*lock) | ||
| 1664 | err = 0; | ||
| 1665 | else | ||
| 1666 | err = -ENOMEM; | ||
| 1667 | break; | ||
| 1668 | }; | ||
| 1669 | |||
| 1670 | return err; | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | #endif | ||
| 1674 | |||
| 1675 | static long pfp_admit_task(struct task_struct* tsk) | ||
| 1676 | { | ||
| 1677 | if (task_cpu(tsk) == tsk->rt_param.task_params.cpu && | ||
| 1678 | #ifdef CONFIG_RELEASE_MASTER | ||
| 1679 | /* don't allow tasks on release master CPU */ | ||
| 1680 | task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master && | ||
| 1681 | #endif | ||
| 1682 | litmus_is_valid_fixed_prio(get_priority(tsk))) | ||
| 1683 | return 0; | ||
| 1684 | else | ||
| 1685 | return -EINVAL; | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | static long pfp_activate_plugin(void) | ||
| 1689 | { | ||
| 1690 | #if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING) | ||
| 1691 | int cpu; | ||
| 1692 | #endif | ||
| 1693 | |||
| 1694 | #ifdef CONFIG_RELEASE_MASTER | ||
| 1695 | for_each_online_cpu(cpu) { | ||
| 1696 | remote_dom(cpu)->release_master = atomic_read(&release_master_cpu); | ||
| 1697 | } | ||
| 1698 | #endif | ||
| 1699 | |||
| 1700 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 1701 | get_srp_prio = pfp_get_srp_prio; | ||
| 1702 | |||
| 1703 | for_each_online_cpu(cpu) { | ||
| 1704 | init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu)); | ||
| 1705 | per_cpu(mpcpvs_vspin, cpu) = NULL; | ||
| 1706 | |||
| 1707 | pcp_init_state(&per_cpu(pcp_state, cpu)); | ||
| 1708 | pfp_doms[cpu] = remote_pfp(cpu); | ||
| 1709 | } | ||
| 1710 | |||
| 1711 | #endif | ||
| 1712 | |||
| 1713 | return 0; | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | |||
| 1717 | /* Plugin object */ | ||
| 1718 | static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = { | ||
| 1719 | .plugin_name = "P-FP", | ||
| 1720 | .tick = pfp_tick, | ||
| 1721 | .task_new = pfp_task_new, | ||
| 1722 | .complete_job = complete_job, | ||
| 1723 | .task_exit = pfp_task_exit, | ||
| 1724 | .schedule = pfp_schedule, | ||
| 1725 | .task_wake_up = pfp_task_wake_up, | ||
| 1726 | .task_block = pfp_task_block, | ||
| 1727 | .admit_task = pfp_admit_task, | ||
| 1728 | .activate_plugin = pfp_activate_plugin, | ||
| 1729 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 1730 | .allocate_lock = pfp_allocate_lock, | ||
| 1731 | .finish_switch = pfp_finish_switch, | ||
| 1732 | #endif | ||
| 1733 | }; | ||
| 1734 | |||
| 1735 | |||
| 1736 | static int __init init_pfp(void) | ||
| 1737 | { | ||
| 1738 | int i; | ||
| 1739 | |||
| 1740 | /* We do not really want to support cpu hotplug, do we? ;) | ||
| 1741 | * However, if we are so crazy to do so, | ||
| 1742 | * we cannot use num_online_cpu() | ||
| 1743 | */ | ||
| 1744 | for (i = 0; i < num_online_cpus(); i++) { | ||
| 1745 | pfp_domain_init(remote_pfp(i), i); | ||
| 1746 | } | ||
| 1747 | return register_sched_plugin(&pfp_plugin); | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | module_init(init_pfp); | ||
| 1751 | |||
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c new file mode 100644 index 00000000000..00a1900d645 --- /dev/null +++ b/litmus/sched_plugin.c | |||
| @@ -0,0 +1,227 @@ | |||
| 1 | /* sched_plugin.c -- core infrastructure for the scheduler plugin system | ||
| 2 | * | ||
| 3 | * This file includes the initialization of the plugin system, the no-op Linux | ||
| 4 | * scheduler plugin, some dummy functions, and some helper functions. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/list.h> | ||
| 8 | #include <linux/spinlock.h> | ||
| 9 | #include <linux/sched.h> | ||
| 10 | |||
| 11 | #include <litmus/litmus.h> | ||
| 12 | #include <litmus/sched_plugin.h> | ||
| 13 | #include <litmus/preempt.h> | ||
| 14 | #include <litmus/jobs.h> | ||
| 15 | |||
| 16 | /* | ||
| 17 | * Generic function to trigger preemption on either local or remote cpu | ||
| 18 | * from scheduler plugins. The key feature is that this function is | ||
| 19 | * non-preemptive section aware and does not invoke the scheduler / send | ||
| 20 | * IPIs if the to-be-preempted task is actually non-preemptive. | ||
| 21 | */ | ||
| 22 | void preempt_if_preemptable(struct task_struct* t, int cpu) | ||
| 23 | { | ||
| 24 | /* t is the real-time task executing on CPU on_cpu If t is NULL, then | ||
| 25 | * on_cpu is currently scheduling background work. | ||
| 26 | */ | ||
| 27 | |||
| 28 | int reschedule = 0; | ||
| 29 | |||
| 30 | if (!t) | ||
| 31 | /* move non-real-time task out of the way */ | ||
| 32 | reschedule = 1; | ||
| 33 | else { | ||
| 34 | if (smp_processor_id() == cpu) { | ||
| 35 | /* local CPU case */ | ||
| 36 | /* check if we need to poke userspace */ | ||
| 37 | if (is_user_np(t)) | ||
| 38 | /* Yes, poke it. This doesn't have to be atomic since | ||
| 39 | * the task is definitely not executing. */ | ||
| 40 | request_exit_np(t); | ||
| 41 | else if (!is_kernel_np(t)) | ||
| 42 | /* only if we are allowed to preempt the | ||
| 43 | * currently-executing task */ | ||
| 44 | reschedule = 1; | ||
| 45 | } else { | ||
| 46 | /* Remote CPU case. Only notify if it's not a kernel | ||
| 47 | * NP section and if we didn't set the userspace | ||
| 48 | * flag. */ | ||
| 49 | reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | if (likely(reschedule)) | ||
| 53 | litmus_reschedule(cpu); | ||
| 54 | } | ||
| 55 | |||
| 56 | |||
| 57 | /************************************************************* | ||
| 58 | * Dummy plugin functions * | ||
| 59 | *************************************************************/ | ||
| 60 | |||
| 61 | static void litmus_dummy_finish_switch(struct task_struct * prev) | ||
| 62 | { | ||
| 63 | } | ||
| 64 | |||
| 65 | static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) | ||
| 66 | { | ||
| 67 | sched_state_task_picked(); | ||
| 68 | return NULL; | ||
| 69 | } | ||
| 70 | |||
| 71 | static void litmus_dummy_tick(struct task_struct* tsk) | ||
| 72 | { | ||
| 73 | } | ||
| 74 | |||
| 75 | static long litmus_dummy_admit_task(struct task_struct* tsk) | ||
| 76 | { | ||
| 77 | printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", | ||
| 78 | tsk->comm, tsk->pid); | ||
| 79 | return -EINVAL; | ||
| 80 | } | ||
| 81 | |||
| 82 | static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) | ||
| 83 | { | ||
| 84 | } | ||
| 85 | |||
| 86 | static void litmus_dummy_task_wake_up(struct task_struct *task) | ||
| 87 | { | ||
| 88 | } | ||
| 89 | |||
| 90 | static void litmus_dummy_task_block(struct task_struct *task) | ||
| 91 | { | ||
| 92 | } | ||
| 93 | |||
| 94 | static void litmus_dummy_task_exit(struct task_struct *task) | ||
| 95 | { | ||
| 96 | } | ||
| 97 | |||
| 98 | static long litmus_dummy_complete_job(void) | ||
| 99 | { | ||
| 100 | return -ENOSYS; | ||
| 101 | } | ||
| 102 | |||
| 103 | static long litmus_dummy_activate_plugin(void) | ||
| 104 | { | ||
| 105 | return 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | static long litmus_dummy_deactivate_plugin(void) | ||
| 109 | { | ||
| 110 | return 0; | ||
| 111 | } | ||
| 112 | |||
| 113 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 114 | |||
| 115 | static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, | ||
| 116 | void* __user config) | ||
| 117 | { | ||
| 118 | return -ENXIO; | ||
| 119 | } | ||
| 120 | |||
| 121 | #endif | ||
| 122 | |||
| 123 | |||
| 124 | /* The default scheduler plugin. It doesn't do anything and lets Linux do its | ||
| 125 | * job. | ||
| 126 | */ | ||
| 127 | struct sched_plugin linux_sched_plugin = { | ||
| 128 | .plugin_name = "Linux", | ||
| 129 | .tick = litmus_dummy_tick, | ||
| 130 | .task_new = litmus_dummy_task_new, | ||
| 131 | .task_exit = litmus_dummy_task_exit, | ||
| 132 | .task_wake_up = litmus_dummy_task_wake_up, | ||
| 133 | .task_block = litmus_dummy_task_block, | ||
| 134 | .complete_job = litmus_dummy_complete_job, | ||
| 135 | .schedule = litmus_dummy_schedule, | ||
| 136 | .finish_switch = litmus_dummy_finish_switch, | ||
| 137 | .activate_plugin = litmus_dummy_activate_plugin, | ||
| 138 | .deactivate_plugin = litmus_dummy_deactivate_plugin, | ||
| 139 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 140 | .allocate_lock = litmus_dummy_allocate_lock, | ||
| 141 | #endif | ||
| 142 | .admit_task = litmus_dummy_admit_task | ||
| 143 | }; | ||
| 144 | |||
| 145 | /* | ||
| 146 | * The reference to current plugin that is used to schedule tasks within | ||
| 147 | * the system. It stores references to actual function implementations | ||
| 148 | * Should be initialized by calling "init_***_plugin()" | ||
| 149 | */ | ||
| 150 | struct sched_plugin *litmus = &linux_sched_plugin; | ||
| 151 | |||
| 152 | /* the list of registered scheduling plugins */ | ||
| 153 | static LIST_HEAD(sched_plugins); | ||
| 154 | static DEFINE_RAW_SPINLOCK(sched_plugins_lock); | ||
| 155 | |||
| 156 | #define CHECK(func) {\ | ||
| 157 | if (!plugin->func) \ | ||
| 158 | plugin->func = litmus_dummy_ ## func;} | ||
| 159 | |||
| 160 | /* FIXME: get reference to module */ | ||
| 161 | int register_sched_plugin(struct sched_plugin* plugin) | ||
| 162 | { | ||
| 163 | printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", | ||
| 164 | plugin->plugin_name); | ||
| 165 | |||
| 166 | /* make sure we don't trip over null pointers later */ | ||
| 167 | CHECK(finish_switch); | ||
| 168 | CHECK(schedule); | ||
| 169 | CHECK(tick); | ||
| 170 | CHECK(task_wake_up); | ||
| 171 | CHECK(task_exit); | ||
| 172 | CHECK(task_block); | ||
| 173 | CHECK(task_new); | ||
| 174 | CHECK(complete_job); | ||
| 175 | CHECK(activate_plugin); | ||
| 176 | CHECK(deactivate_plugin); | ||
| 177 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 178 | CHECK(allocate_lock); | ||
| 179 | #endif | ||
| 180 | CHECK(admit_task); | ||
| 181 | |||
| 182 | if (!plugin->release_at) | ||
| 183 | plugin->release_at = release_at; | ||
| 184 | |||
| 185 | raw_spin_lock(&sched_plugins_lock); | ||
| 186 | list_add(&plugin->list, &sched_plugins); | ||
| 187 | raw_spin_unlock(&sched_plugins_lock); | ||
| 188 | |||
| 189 | return 0; | ||
| 190 | } | ||
| 191 | |||
| 192 | |||
| 193 | /* FIXME: reference counting, etc. */ | ||
| 194 | struct sched_plugin* find_sched_plugin(const char* name) | ||
| 195 | { | ||
| 196 | struct list_head *pos; | ||
| 197 | struct sched_plugin *plugin; | ||
| 198 | |||
| 199 | raw_spin_lock(&sched_plugins_lock); | ||
| 200 | list_for_each(pos, &sched_plugins) { | ||
| 201 | plugin = list_entry(pos, struct sched_plugin, list); | ||
| 202 | if (!strcmp(plugin->plugin_name, name)) | ||
| 203 | goto out_unlock; | ||
| 204 | } | ||
| 205 | plugin = NULL; | ||
| 206 | |||
| 207 | out_unlock: | ||
| 208 | raw_spin_unlock(&sched_plugins_lock); | ||
| 209 | return plugin; | ||
| 210 | } | ||
| 211 | |||
| 212 | int print_sched_plugins(char* buf, int max) | ||
| 213 | { | ||
| 214 | int count = 0; | ||
| 215 | struct list_head *pos; | ||
| 216 | struct sched_plugin *plugin; | ||
| 217 | |||
| 218 | raw_spin_lock(&sched_plugins_lock); | ||
| 219 | list_for_each(pos, &sched_plugins) { | ||
| 220 | plugin = list_entry(pos, struct sched_plugin, list); | ||
| 221 | count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name); | ||
| 222 | if (max - count <= 0) | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | raw_spin_unlock(&sched_plugins_lock); | ||
| 226 | return count; | ||
| 227 | } | ||
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 00000000000..6f4d4adcec0 --- /dev/null +++ b/litmus/sched_psn_edf.c | |||
| @@ -0,0 +1,999 @@ | |||
| 1 | /* | ||
| 2 | * kernel/sched_psn_edf.c | ||
| 3 | * | ||
| 4 | * Implementation of the PSN-EDF scheduler plugin. | ||
| 5 | * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c. | ||
| 6 | * | ||
| 7 | * Suspensions and non-preemptable sections are supported. | ||
| 8 | * Priority inheritance is not supported. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/percpu.h> | ||
| 12 | #include <linux/sched.h> | ||
| 13 | #include <linux/list.h> | ||
| 14 | #include <linux/spinlock.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | |||
| 17 | #include <litmus/litmus.h> | ||
| 18 | #include <litmus/jobs.h> | ||
| 19 | #include <litmus/preempt.h> | ||
| 20 | #include <litmus/budget.h> | ||
| 21 | #include <litmus/sched_plugin.h> | ||
| 22 | #include <litmus/edf_common.h> | ||
| 23 | #include <litmus/sched_trace.h> | ||
| 24 | #include <litmus/trace.h> | ||
| 25 | #include <litmus/fdso.h> | ||
| 26 | |||
| 27 | typedef struct { | ||
| 28 | rt_domain_t domain; | ||
| 29 | int cpu; | ||
| 30 | struct task_struct* scheduled; /* only RT tasks */ | ||
| 31 | /* | ||
| 32 | * scheduling lock slock | ||
| 33 | * protects the domain and serializes scheduling decisions | ||
| 34 | */ | ||
| 35 | #define slock domain.ready_lock | ||
| 36 | |||
| 37 | } psnedf_domain_t; | ||
| 38 | |||
| 39 | DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains); | ||
| 40 | |||
| 41 | #define local_edf (&__get_cpu_var(psnedf_domains).domain) | ||
| 42 | #define local_pedf (&__get_cpu_var(psnedf_domains)) | ||
| 43 | #define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain) | ||
| 44 | #define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu)) | ||
| 45 | #define task_edf(task) remote_edf(get_partition(task)) | ||
| 46 | #define task_pedf(task) remote_pedf(get_partition(task)) | ||
| 47 | |||
| 48 | |||
| 49 | static void psnedf_domain_init(psnedf_domain_t* pedf, | ||
| 50 | check_resched_needed_t check, | ||
| 51 | release_jobs_t release, | ||
| 52 | int cpu) | ||
| 53 | { | ||
| 54 | edf_domain_init(&pedf->domain, check, release); | ||
| 55 | pedf->cpu = cpu; | ||
| 56 | pedf->scheduled = NULL; | ||
| 57 | } | ||
| 58 | |||
| 59 | static void requeue(struct task_struct* t, rt_domain_t *edf) | ||
| 60 | { | ||
| 61 | if (t->state != TASK_RUNNING) | ||
| 62 | STRACE_TASK(t, "requeue: !TASK_RUNNING\n"); | ||
| 63 | |||
| 64 | tsk_rt(t)->completed = 0; | ||
| 65 | if (is_early_releasing(t) || is_released(t, litmus_clock())) | ||
| 66 | __add_ready(edf, t); | ||
| 67 | else | ||
| 68 | add_release(edf, t); /* it has got to wait */ | ||
| 69 | } | ||
| 70 | |||
| 71 | /* we assume the lock is being held */ | ||
| 72 | static void preempt(psnedf_domain_t *pedf) | ||
| 73 | { | ||
| 74 | preempt_if_preemptable(pedf->scheduled, pedf->cpu); | ||
| 75 | } | ||
| 76 | |||
| 77 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 78 | |||
| 79 | static void boost_priority(struct task_struct* t) | ||
| 80 | { | ||
| 81 | unsigned long flags; | ||
| 82 | psnedf_domain_t* pedf = task_pedf(t); | ||
| 83 | lt_t now; | ||
| 84 | |||
| 85 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
| 86 | now = litmus_clock(); | ||
| 87 | |||
| 88 | STRACE_TASK(t, "priority boosted at %llu\n", now); | ||
| 89 | |||
| 90 | tsk_rt(t)->priority_boosted = 1; | ||
| 91 | tsk_rt(t)->boost_start_time = now; | ||
| 92 | |||
| 93 | if (pedf->scheduled != t) { | ||
| 94 | /* holder may be queued: first stop queue changes */ | ||
| 95 | raw_spin_lock(&pedf->domain.release_lock); | ||
| 96 | if (is_queued(t) && | ||
| 97 | /* If it is queued, then we need to re-order. */ | ||
| 98 | bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) && | ||
| 99 | /* If we bubbled to the top, then we need to check for preemptions. */ | ||
| 100 | edf_preemption_needed(&pedf->domain, pedf->scheduled)) | ||
| 101 | preempt(pedf); | ||
| 102 | raw_spin_unlock(&pedf->domain.release_lock); | ||
| 103 | } /* else: nothing to do since the job is not queued while scheduled */ | ||
| 104 | |||
| 105 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
| 106 | } | ||
| 107 | |||
| 108 | static void unboost_priority(struct task_struct* t) | ||
| 109 | { | ||
| 110 | unsigned long flags; | ||
| 111 | psnedf_domain_t* pedf = task_pedf(t); | ||
| 112 | lt_t now; | ||
| 113 | |||
| 114 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
| 115 | now = litmus_clock(); | ||
| 116 | |||
| 117 | /* assumption: this only happens when the job is scheduled */ | ||
| 118 | BUG_ON(pedf->scheduled != t); | ||
| 119 | |||
| 120 | STRACE_TASK(t, "priority restored at %llu\n", now); | ||
| 121 | |||
| 122 | /* priority boosted jobs must be scheduled */ | ||
| 123 | BUG_ON(pedf->scheduled != t); | ||
| 124 | |||
| 125 | tsk_rt(t)->priority_boosted = 0; | ||
| 126 | tsk_rt(t)->boost_start_time = 0; | ||
| 127 | |||
| 128 | /* check if this changes anything */ | ||
| 129 | if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) | ||
| 130 | preempt(pedf); | ||
| 131 | |||
| 132 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
| 133 | } | ||
| 134 | |||
| 135 | #endif | ||
| 136 | |||
| 137 | static int psnedf_preempt_check(psnedf_domain_t *pedf) | ||
| 138 | { | ||
| 139 | if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) { | ||
| 140 | preempt(pedf); | ||
| 141 | return 1; | ||
| 142 | } else | ||
| 143 | return 0; | ||
| 144 | } | ||
| 145 | |||
| 146 | /* This check is trivial in partioned systems as we only have to consider | ||
| 147 | * the CPU of the partition. | ||
| 148 | */ | ||
| 149 | static int psnedf_check_resched(rt_domain_t *edf) | ||
| 150 | { | ||
| 151 | psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain); | ||
| 152 | |||
| 153 | /* because this is a callback from rt_domain_t we already hold | ||
| 154 | * the necessary lock for the ready queue | ||
| 155 | */ | ||
| 156 | return psnedf_preempt_check(pedf); | ||
| 157 | } | ||
| 158 | |||
| 159 | static void job_completion(struct task_struct* t, int forced) | ||
| 160 | { | ||
| 161 | sched_trace_task_completion(t,forced); | ||
| 162 | STRACE_TASK(t, "job_completion().\n"); | ||
| 163 | |||
| 164 | tsk_rt(t)->completed = 1; | ||
| 165 | prepare_for_next_period(t); | ||
| 166 | } | ||
| 167 | |||
| 168 | static void psnedf_tick(struct task_struct *t) | ||
| 169 | { | ||
| 170 | psnedf_domain_t *pedf = local_pedf; | ||
| 171 | |||
| 172 | /* Check for inconsistency. We don't need the lock for this since | ||
| 173 | * ->scheduled is only changed in schedule, which obviously is not | ||
| 174 | * executing in parallel on this CPU | ||
| 175 | */ | ||
| 176 | BUG_ON(is_realtime(t) && t != pedf->scheduled); | ||
| 177 | |||
| 178 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
| 179 | if (!is_np(t)) { | ||
| 180 | litmus_reschedule_local(); | ||
| 181 | TRACE("psnedf_scheduler_tick: " | ||
| 182 | "%d is preemptable " | ||
| 183 | " => FORCE_RESCHED\n", t->pid); | ||
| 184 | } else if (is_user_np(t)) { | ||
| 185 | TRACE("psnedf_scheduler_tick: " | ||
| 186 | "%d is non-preemptable, " | ||
| 187 | "preemption delayed.\n", t->pid); | ||
| 188 | request_exit_np(t); | ||
| 189 | } | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | static struct task_struct* psnedf_schedule(struct task_struct * prev) | ||
| 194 | { | ||
| 195 | psnedf_domain_t* pedf = local_pedf; | ||
| 196 | rt_domain_t* edf = &pedf->domain; | ||
| 197 | struct task_struct* next; | ||
| 198 | |||
| 199 | int out_of_time, sleep, preempt, | ||
| 200 | np, exists, blocks, resched; | ||
| 201 | |||
| 202 | if (prev && is_realtime(prev)){ | ||
| 203 | TRACE_TASK(prev, "Rescheduling\n"); | ||
| 204 | }else{ | ||
| 205 | TRACE("Rescheduling\n"); | ||
| 206 | } | ||
| 207 | |||
| 208 | raw_spin_lock(&pedf->slock); | ||
| 209 | |||
| 210 | /* sanity checking | ||
| 211 | * differently from gedf, when a task exits (dead) | ||
| 212 | * pedf->schedule may be null and prev _is_ realtime | ||
| 213 | */ | ||
| 214 | BUG_ON(pedf->scheduled && pedf->scheduled != prev); | ||
| 215 | BUG_ON(pedf->scheduled && !is_realtime(prev)); | ||
| 216 | |||
| 217 | /* (0) Determine state */ | ||
| 218 | exists = pedf->scheduled != NULL; | ||
| 219 | blocks = exists && !is_running(pedf->scheduled); | ||
| 220 | out_of_time = exists && | ||
| 221 | budget_enforced(pedf->scheduled) && | ||
| 222 | budget_exhausted(pedf->scheduled); | ||
| 223 | np = exists && is_np(pedf->scheduled); | ||
| 224 | sleep = exists && is_completed(pedf->scheduled); | ||
| 225 | preempt = edf_preemption_needed(edf, prev); | ||
| 226 | |||
| 227 | /* If we need to preempt do so. | ||
| 228 | * The following checks set resched to 1 in case of special | ||
| 229 | * circumstances. | ||
| 230 | */ | ||
| 231 | resched = preempt; | ||
| 232 | |||
| 233 | /* If a task blocks we have no choice but to reschedule. | ||
| 234 | */ | ||
| 235 | if (blocks) | ||
| 236 | resched = 1; | ||
| 237 | |||
| 238 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
| 239 | * Multiple calls to request_exit_np() don't hurt. | ||
| 240 | */ | ||
| 241 | if (np && (out_of_time || preempt || sleep)) | ||
| 242 | request_exit_np(pedf->scheduled); | ||
| 243 | |||
| 244 | /* Any task that is preemptable and either exhausts its execution | ||
| 245 | * budget or wants to sleep completes. We may have to reschedule after | ||
| 246 | * this. | ||
| 247 | */ | ||
| 248 | if (!np && (out_of_time || sleep) && !blocks) { | ||
| 249 | job_completion(pedf->scheduled, !sleep); | ||
| 250 | resched = 1; | ||
| 251 | } | ||
| 252 | |||
| 253 | /* The final scheduling decision. Do we need to switch for some reason? | ||
| 254 | * Switch if we are in RT mode and have no task or if we need to | ||
| 255 | * resched. | ||
| 256 | */ | ||
| 257 | next = NULL; | ||
| 258 | if ((!np || blocks) && (resched || !exists)) { | ||
| 259 | /* When preempting a task that does not block, then | ||
| 260 | * re-insert it into either the ready queue or the | ||
| 261 | * release queue (if it completed). requeue() picks | ||
| 262 | * the appropriate queue. | ||
| 263 | */ | ||
| 264 | if (pedf->scheduled && !blocks) | ||
| 265 | requeue(pedf->scheduled, edf); | ||
| 266 | next = __take_ready(edf); | ||
| 267 | } else | ||
| 268 | /* Only override Linux scheduler if we have a real-time task | ||
| 269 | * scheduled that needs to continue. | ||
| 270 | */ | ||
| 271 | if (exists) | ||
| 272 | next = prev; | ||
| 273 | |||
| 274 | if (next) { | ||
| 275 | STRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
| 276 | tsk_rt(next)->completed = 0; | ||
| 277 | } else { | ||
| 278 | STRACE("becoming idle at %llu\n", litmus_clock()); | ||
| 279 | } | ||
| 280 | |||
| 281 | pedf->scheduled = next; | ||
| 282 | sched_state_task_picked(); | ||
| 283 | raw_spin_unlock(&pedf->slock); | ||
| 284 | |||
| 285 | return next; | ||
| 286 | } | ||
| 287 | |||
| 288 | |||
| 289 | /* Prepare a task for running in RT mode | ||
| 290 | */ | ||
| 291 | static void psnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
| 292 | { | ||
| 293 | rt_domain_t* edf = task_edf(t); | ||
| 294 | psnedf_domain_t* pedf = task_pedf(t); | ||
| 295 | unsigned long flags; | ||
| 296 | |||
| 297 | TRACE_TASK(t, "psn edf: task new, cpu = %d\n", | ||
| 298 | t->rt_param.task_params.cpu); | ||
| 299 | |||
| 300 | /* setup job parameters */ | ||
| 301 | release_at(t, litmus_clock()); | ||
| 302 | |||
| 303 | /* The task should be running in the queue, otherwise signal | ||
| 304 | * code will try to wake it up with fatal consequences. | ||
| 305 | */ | ||
| 306 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
| 307 | if (running) { | ||
| 308 | /* there shouldn't be anything else running at the time */ | ||
| 309 | BUG_ON(pedf->scheduled); | ||
| 310 | pedf->scheduled = t; | ||
| 311 | } else { | ||
| 312 | requeue(t, edf); | ||
| 313 | /* maybe we have to reschedule */ | ||
| 314 | psnedf_preempt_check(pedf); | ||
| 315 | } | ||
| 316 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
| 317 | } | ||
| 318 | |||
| 319 | static void psnedf_task_wake_up(struct task_struct *task) | ||
| 320 | { | ||
| 321 | unsigned long flags; | ||
| 322 | psnedf_domain_t* pedf = task_pedf(task); | ||
| 323 | rt_domain_t* edf = task_edf(task); | ||
| 324 | lt_t now; | ||
| 325 | |||
| 326 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
| 327 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
| 328 | BUG_ON(is_queued(task)); | ||
| 329 | now = litmus_clock(); | ||
| 330 | if (is_sporadic(task) && is_tardy(task, now) | ||
| 331 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 332 | /* We need to take suspensions because of semaphores into | ||
| 333 | * account! If a job resumes after being suspended due to acquiring | ||
| 334 | * a semaphore, it should never be treated as a new job release. | ||
| 335 | */ | ||
| 336 | && !is_priority_boosted(task) | ||
| 337 | #endif | ||
| 338 | ) { | ||
| 339 | /* new sporadic release */ | ||
| 340 | release_at(task, now); | ||
| 341 | sched_trace_task_release(task); | ||
| 342 | } | ||
| 343 | |||
| 344 | /* Only add to ready queue if it is not the currently-scheduled | ||
| 345 | * task. This could be the case if a task was woken up concurrently | ||
| 346 | * on a remote CPU before the executing CPU got around to actually | ||
| 347 | * de-scheduling the task, i.e., wake_up() raced with schedule() | ||
| 348 | * and won. | ||
| 349 | */ | ||
| 350 | if (pedf->scheduled != task) { | ||
| 351 | requeue(task, edf); | ||
| 352 | psnedf_preempt_check(pedf); | ||
| 353 | } | ||
| 354 | |||
| 355 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
| 356 | STRACE_TASK(task, "wake up done\n"); | ||
| 357 | } | ||
| 358 | |||
| 359 | static void psnedf_task_block(struct task_struct *t) | ||
| 360 | { | ||
| 361 | /* only running tasks can block, thus t is in no queue */ | ||
| 362 | TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); | ||
| 363 | |||
| 364 | BUG_ON(!is_realtime(t)); | ||
| 365 | BUG_ON(is_queued(t)); | ||
| 366 | } | ||
| 367 | |||
| 368 | static void psnedf_task_exit(struct task_struct * t) | ||
| 369 | { | ||
| 370 | unsigned long flags; | ||
| 371 | psnedf_domain_t* pedf = task_pedf(t); | ||
| 372 | rt_domain_t* edf; | ||
| 373 | |||
| 374 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
| 375 | if (is_queued(t)) { | ||
| 376 | /* dequeue */ | ||
| 377 | edf = task_edf(t); | ||
| 378 | remove(edf, t); | ||
| 379 | } | ||
| 380 | if (pedf->scheduled == t) | ||
| 381 | pedf->scheduled = NULL; | ||
| 382 | |||
| 383 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
| 384 | |||
| 385 | preempt(pedf); | ||
| 386 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
| 387 | } | ||
| 388 | |||
| 389 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 390 | |||
| 391 | #include <litmus/fdso.h> | ||
| 392 | #include <litmus/srp.h> | ||
| 393 | |||
| 394 | /* ******************** SRP support ************************ */ | ||
| 395 | |||
| 396 | static unsigned int psnedf_get_srp_prio(struct task_struct* t) | ||
| 397 | { | ||
| 398 | /* assumes implicit deadlines */ | ||
| 399 | return get_rt_period(t); | ||
| 400 | } | ||
| 401 | |||
| 402 | /* ******************** FMLP support ********************** */ | ||
| 403 | |||
| 404 | /* struct for semaphore with priority inheritance */ | ||
| 405 | struct fmlp_semaphore { | ||
| 406 | struct litmus_lock litmus_lock; | ||
| 407 | |||
| 408 | /* current resource holder */ | ||
| 409 | struct task_struct *owner; | ||
| 410 | |||
| 411 | /* FIFO queue of waiting tasks */ | ||
| 412 | wait_queue_head_t wait; | ||
| 413 | }; | ||
| 414 | |||
| 415 | struct dgl_semaphore { | ||
| 416 | struct litmus_lock litmus_lock; | ||
| 417 | |||
| 418 | /* bitmask of resources that are currently locked. */ | ||
| 419 | resource_mask_t locked; | ||
| 420 | |||
| 421 | /* bitmask of resources in the file descriptor table that are controlled by | ||
| 422 | * this dgl_semaphore. | ||
| 423 | */ | ||
| 424 | resource_mask_t dgl_resources; | ||
| 425 | |||
| 426 | /* There can be no more than $m$ resource holders, because under | ||
| 427 | * partitioned scheduling, the resource holders are priority boosted, and | ||
| 428 | * it is impossible to have $>m$ boosted jobs. | ||
| 429 | */ | ||
| 430 | bool boosted[NR_CPUS]; | ||
| 431 | |||
| 432 | /* FIFO queue of waiting tasks */ | ||
| 433 | wait_queue_head_t wait; | ||
| 434 | }; | ||
| 435 | |||
| 436 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
| 437 | { | ||
| 438 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
| 439 | } | ||
| 440 | |||
| 441 | static inline struct dgl_semaphore* dgl_from_lock(struct litmus_lock* lock) | ||
| 442 | { | ||
| 443 | return container_of(lock, struct dgl_semaphore, litmus_lock); | ||
| 444 | } | ||
| 445 | |||
| 446 | int psnedf_fmlp_lock(struct litmus_lock* l) | ||
| 447 | { | ||
| 448 | struct task_struct* t = current; | ||
| 449 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 450 | wait_queue_t wait; | ||
| 451 | unsigned long flags; | ||
| 452 | |||
| 453 | if (!is_realtime(t)) | ||
| 454 | return -EPERM; | ||
| 455 | |||
| 456 | /* prevent nested lock acquisition --- not supported by FMLP */ | ||
| 457 | if (tsk_rt(t)->num_locks_held || | ||
| 458 | tsk_rt(t)->num_local_locks_held) | ||
| 459 | return -EBUSY; | ||
| 460 | |||
| 461 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 462 | |||
| 463 | if (sem->owner) { | ||
| 464 | /* resource is not free => must suspend and wait */ | ||
| 465 | |||
| 466 | init_waitqueue_entry(&wait, t); | ||
| 467 | |||
| 468 | /* FIXME: interruptible would be nice some day */ | ||
| 469 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
| 470 | |||
| 471 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
| 472 | |||
| 473 | TS_LOCK_SUSPEND; | ||
| 474 | |||
| 475 | /* release lock before sleeping */ | ||
| 476 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 477 | |||
| 478 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
| 479 | * when we wake up; we are guaranteed to have the lock since | ||
| 480 | * there is only one wake up per release. | ||
| 481 | */ | ||
| 482 | |||
| 483 | schedule(); | ||
| 484 | |||
| 485 | TS_LOCK_RESUME; | ||
| 486 | |||
| 487 | /* Since we hold the lock, no other task will change | ||
| 488 | * ->owner. We can thus check it without acquiring the spin | ||
| 489 | * lock. */ | ||
| 490 | BUG_ON(sem->owner != t); | ||
| 491 | } else { | ||
| 492 | /* it's ours now */ | ||
| 493 | sem->owner = t; | ||
| 494 | |||
| 495 | /* mark the task as priority-boosted. */ | ||
| 496 | boost_priority(t); | ||
| 497 | |||
| 498 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 499 | } | ||
| 500 | |||
| 501 | tsk_rt(t)->num_locks_held++; | ||
| 502 | |||
| 503 | return 0; | ||
| 504 | } | ||
| 505 | |||
| 506 | int psnedf_fmlp_unlock(struct litmus_lock* l) | ||
| 507 | { | ||
| 508 | struct task_struct *t = current, *next; | ||
| 509 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 510 | unsigned long flags; | ||
| 511 | int err = 0; | ||
| 512 | |||
| 513 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 514 | |||
| 515 | if (sem->owner != t) { | ||
| 516 | err = -EINVAL; | ||
| 517 | goto out; | ||
| 518 | } | ||
| 519 | |||
| 520 | tsk_rt(t)->num_locks_held--; | ||
| 521 | |||
| 522 | /* we lose the benefit of priority boosting */ | ||
| 523 | |||
| 524 | unboost_priority(t); | ||
| 525 | |||
| 526 | /* check if there are jobs waiting for this resource */ | ||
| 527 | next = __waitqueue_remove_first(&sem->wait); | ||
| 528 | if (next) { | ||
| 529 | /* boost next job */ | ||
| 530 | boost_priority(next); | ||
| 531 | |||
| 532 | /* next becomes the resouce holder */ | ||
| 533 | sem->owner = next; | ||
| 534 | |||
| 535 | /* wake up next */ | ||
| 536 | wake_up_process(next); | ||
| 537 | } else | ||
| 538 | /* resource becomes available */ | ||
| 539 | sem->owner = NULL; | ||
| 540 | |||
| 541 | out: | ||
| 542 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 543 | return err; | ||
| 544 | } | ||
| 545 | |||
| 546 | int psnedf_fmlp_close(struct litmus_lock* l) | ||
| 547 | { | ||
| 548 | struct task_struct *t = current; | ||
| 549 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
| 550 | unsigned long flags; | ||
| 551 | |||
| 552 | int owner; | ||
| 553 | |||
| 554 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 555 | |||
| 556 | owner = sem->owner == t; | ||
| 557 | |||
| 558 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 559 | |||
| 560 | if (owner) | ||
| 561 | psnedf_fmlp_unlock(l); | ||
| 562 | |||
| 563 | return 0; | ||
| 564 | } | ||
| 565 | |||
| 566 | void psnedf_fmlp_free(struct litmus_lock* lock) | ||
| 567 | { | ||
| 568 | kfree(fmlp_from_lock(lock)); | ||
| 569 | } | ||
| 570 | |||
| 571 | static struct litmus_lock_ops psnedf_fmlp_lock_ops = { | ||
| 572 | .close = psnedf_fmlp_close, | ||
| 573 | .lock = psnedf_fmlp_lock, | ||
| 574 | .unlock = psnedf_fmlp_unlock, | ||
| 575 | .deallocate = psnedf_fmlp_free, | ||
| 576 | }; | ||
| 577 | |||
| 578 | int psnedf_dgl_close(struct litmus_lock* l) | ||
| 579 | { | ||
| 580 | return 0; | ||
| 581 | } | ||
| 582 | |||
| 583 | /* for compatibility, assume lock requests the whole group. */ | ||
| 584 | int psnedf_dgl_lock(struct litmus_lock* l) | ||
| 585 | { | ||
| 586 | return l->ops->dynamic_group_lock(l, dgl_from_lock(l)->dgl_resources); | ||
| 587 | } | ||
| 588 | |||
| 589 | /* for compatibility, assume unlock releases the whole group. */ | ||
| 590 | int psnedf_dgl_unlock(struct litmus_lock* l) | ||
| 591 | { | ||
| 592 | return l->ops->dynamic_group_unlock(l, dgl_from_lock(l)->dgl_resources); | ||
| 593 | } | ||
| 594 | |||
| 595 | /** | ||
| 596 | * This function checks to ensure that all resources requested in the mask are | ||
| 597 | * controlled by the dgl in l. This is validated in one of two ways. | ||
| 598 | * | ||
| 599 | * The dgl struct maintains a cache of resources known to be controlled by that | ||
| 600 | * particular dgl. If the requested resources are in that cache, return true. | ||
| 601 | * | ||
| 602 | * Note that this cache is not immediately updated when a resource is added to | ||
| 603 | * a group (because I didn't see an easy way to do it). The first time a resource | ||
| 604 | * is requested, the cache is updated (in the while loop). This is done by | ||
| 605 | * checking that two fdso point to the same lock object. | ||
| 606 | */ | ||
| 607 | bool is_mask_valid(struct litmus_lock* l, resource_mask_t mask) | ||
| 608 | { | ||
| 609 | struct dgl_semaphore* d; | ||
| 610 | struct od_table_entry* entry; | ||
| 611 | resource_mask_t tmp; | ||
| 612 | int prev = -1; | ||
| 613 | |||
| 614 | if (l->type != DGL_SEM) | ||
| 615 | return false; | ||
| 616 | |||
| 617 | d = dgl_from_lock(l); | ||
| 618 | |||
| 619 | // mask -> d->dgl_resources (bitwise logical implication) | ||
| 620 | tmp = ~mask | d->dgl_resources; | ||
| 621 | |||
| 622 | //n.b. if tmp is 0xffffffff, ffs(~tmp) - 1 = -1, and -1 >= prev always, so stop. | ||
| 623 | while (prev < ffs(~tmp) - 1) | ||
| 624 | { | ||
| 625 | prev = ffs(~tmp) - 1; | ||
| 626 | entry = get_entry_for_od( ffs(~tmp) - 1); | ||
| 627 | if (entry && is_lock(entry) && get_lock(entry) == l){ | ||
| 628 | d->dgl_resources = d->dgl_resources | ( 1 << (ffs(~tmp) -1) ); | ||
| 629 | } | ||
| 630 | tmp = ~mask|d->dgl_resources; | ||
| 631 | } | ||
| 632 | |||
| 633 | // 2's complement: -1 is 0xffffffff | ||
| 634 | |||
| 635 | if ( tmp == -1){ | ||
| 636 | return true; | ||
| 637 | } else { | ||
| 638 | return false; | ||
| 639 | } | ||
| 640 | |||
| 641 | } | ||
| 642 | |||
| 643 | #define DGL_CONTAINER 1 | ||
| 644 | #define for_each_bit(field, idx) \ | ||
| 645 | for (idx = find_first_bit(&field, sizeof(field)*8); \ | ||
| 646 | idx < sizeof(field)*8; \ | ||
| 647 | idx = find_next_bit(&field, sizeof(field)*8, idx)) | ||
| 648 | |||
| 649 | int psnedf_dgl_dynamic_group_lock(struct litmus_lock* l, resource_mask_t resources) | ||
| 650 | { | ||
| 651 | struct task_struct* t = current; | ||
| 652 | struct dgl_semaphore *sem = dgl_from_lock(l); | ||
| 653 | wait_queue_t wait; | ||
| 654 | int resource; | ||
| 655 | unsigned long flags; | ||
| 656 | |||
| 657 | TRACE_CUR("Trying to lock a DGL\n"); | ||
| 658 | |||
| 659 | if (!is_realtime(t)) | ||
| 660 | return -EPERM; | ||
| 661 | |||
| 662 | if ( !is_mask_valid(l, resources) ) | ||
| 663 | return -EINVAL; | ||
| 664 | |||
| 665 | t->resources = resources; | ||
| 666 | |||
| 667 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 668 | |||
| 669 | // if sem->locked & resources == 0, then all resources are available, | ||
| 670 | // otherwise we must suspend. | ||
| 671 | if (sem->locked & resources){ | ||
| 672 | |||
| 673 | STRACE("Resources locked, suspending\n"); | ||
| 674 | |||
| 675 | init_waitqueue_entry(&wait, t); | ||
| 676 | |||
| 677 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
| 678 | |||
| 679 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
| 680 | |||
| 681 | TS_LOCK_SUSPEND; | ||
| 682 | |||
| 683 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 684 | |||
| 685 | schedule(); | ||
| 686 | |||
| 687 | TS_LOCK_RESUME; | ||
| 688 | } else { | ||
| 689 | |||
| 690 | STRACE("Acquired a resource\n"); | ||
| 691 | |||
| 692 | sem->locked = sem->locked | resources; | ||
| 693 | |||
| 694 | // if a job requests a resource, then it was scheduled, and therefore | ||
| 695 | // there was not another boosted job, so this is safe. | ||
| 696 | BUG_ON(sem->boosted[task_cpu(t)]); | ||
| 697 | |||
| 698 | boost_priority(t); | ||
| 699 | |||
| 700 | sem->boosted[task_cpu(t)] = true; | ||
| 701 | |||
| 702 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 703 | } | ||
| 704 | |||
| 705 | for_each_bit(resources, resource) | ||
| 706 | sched_trace_server_switch_to(resource, 0, t->pid, get_job_no(t), | ||
| 707 | get_partition(t)); | ||
| 708 | |||
| 709 | return 0; | ||
| 710 | } | ||
| 711 | |||
| 712 | inline int num_boosted(struct dgl_semaphore *sem) | ||
| 713 | { | ||
| 714 | int ret = 0; | ||
| 715 | int i; | ||
| 716 | for(i = 0; i < NR_CPUS; i++){ | ||
| 717 | ret += sem->boosted[i]; | ||
| 718 | } | ||
| 719 | return ret; | ||
| 720 | } | ||
| 721 | |||
| 722 | int psnedf_dgl_dynamic_group_unlock(struct litmus_lock* l, resource_mask_t resources) | ||
| 723 | { | ||
| 724 | struct task_struct *t = current, *tsk; | ||
| 725 | struct dgl_semaphore *sem = dgl_from_lock(l); | ||
| 726 | unsigned long flags; | ||
| 727 | int err = 0, resource; | ||
| 728 | resource_mask_t logically_locked; | ||
| 729 | struct list_head *pos, *tmp; | ||
| 730 | |||
| 731 | TRACE_CUR("Trying to unlock a DGL\n"); | ||
| 732 | |||
| 733 | //Unlocking but priority is not boosted | ||
| 734 | BUG_ON(!sem->boosted[task_cpu(t)]); | ||
| 735 | |||
| 736 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
| 737 | |||
| 738 | // ~resources | t->resources checks that t owns the resources being released | ||
| 739 | // note that a job can release a subset of the resources it has acquired. | ||
| 740 | if ( !is_mask_valid(l, resources)){ | ||
| 741 | STRACE("Invalid mask %d\n", resources); | ||
| 742 | err = -EINVAL; | ||
| 743 | goto out; | ||
| 744 | } else if ( (~resources | t->resources) != -1){ | ||
| 745 | STRACE("Trying to lock unowned resources: %d\t%d\n", resources, t->resources); | ||
| 746 | err = -EINVAL; | ||
| 747 | goto out; | ||
| 748 | } else { | ||
| 749 | sem->locked -= resources; | ||
| 750 | } | ||
| 751 | |||
| 752 | // if the job released all of the resources it owned, then unboost. | ||
| 753 | if (resources == t->resources){ | ||
| 754 | STRACE("Released all resources\n"); | ||
| 755 | unboost_priority(t); | ||
| 756 | sem->boosted[task_cpu(t)] = false; | ||
| 757 | } else { | ||
| 758 | // update t->resources to reflect the resources currently owned. | ||
| 759 | STRACE("Unlocked a subset of locked resources\n"); | ||
| 760 | t->resources = t->resources & ~resources; | ||
| 761 | } | ||
| 762 | |||
| 763 | logically_locked = sem->locked; | ||
| 764 | |||
| 765 | list_for_each_safe(pos, tmp, &sem->wait.task_list) { | ||
| 766 | tsk = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
| 767 | task_list)->private; | ||
| 768 | STRACE_TASK(tsk, "Evaluating\n"); | ||
| 769 | |||
| 770 | if ( (logically_locked == -1) || (num_boosted(sem) == NR_CPUS) ){ | ||
| 771 | STRACE_TASK(tsk, "All procs boosted, or all resources locked\n"); | ||
| 772 | break; | ||
| 773 | } | ||
| 774 | |||
| 775 | //STRACE_TASK(tsk, "Logically locked: %o\n", logically_locked); | ||
| 776 | //STRACE_TASK(tsk, "tsk->resources: %o\n", tsk->resources); | ||
| 777 | //STRACE_TASK(tsk, "!(tsk->resources & logically_locked): %o\n", !(tsk->resources & logically_locked)); | ||
| 778 | //STRACE_TASK(tsk, "!sem->boosted: %d\n", !sem->boosted[task_cpu(tsk)]); | ||
| 779 | |||
| 780 | // the resources requested are unlocked, tsk acquires its resources | ||
| 781 | if( !(tsk->resources & logically_locked) && !sem->boosted[task_cpu(tsk)]) { | ||
| 782 | |||
| 783 | STRACE_TASK(tsk, "Acquired a resource\n"); | ||
| 784 | |||
| 785 | list_del_init(pos); | ||
| 786 | |||
| 787 | sem->locked = sem->locked | tsk->resources; | ||
| 788 | |||
| 789 | sem->boosted[task_cpu(tsk)] = true; | ||
| 790 | boost_priority(tsk); | ||
| 791 | |||
| 792 | wake_up_process(tsk); | ||
| 793 | } | ||
| 794 | |||
| 795 | logically_locked = logically_locked | tsk->resources; | ||
| 796 | } | ||
| 797 | |||
| 798 | for_each_bit(resources, resource) | ||
| 799 | sched_trace_server_switch_away(resource, 0, t->pid, get_job_no(t), | ||
| 800 | get_partition(t)); | ||
| 801 | |||
| 802 | out: | ||
| 803 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
| 804 | return err; | ||
| 805 | } | ||
| 806 | |||
| 807 | void psnedf_dgl_free(struct litmus_lock* l) | ||
| 808 | { | ||
| 809 | //if (l) | ||
| 810 | // kfree(dgl_from_lock(l)); | ||
| 811 | TRACE("I'll free things later!\n"); | ||
| 812 | } | ||
| 813 | |||
| 814 | static struct litmus_lock_ops psnedf_dgl_lock_ops = { | ||
| 815 | .close = psnedf_dgl_close, | ||
| 816 | .lock = psnedf_dgl_lock, | ||
| 817 | .unlock = psnedf_dgl_unlock, | ||
| 818 | .dynamic_group_lock = psnedf_dgl_dynamic_group_lock, | ||
| 819 | .dynamic_group_unlock = psnedf_dgl_dynamic_group_unlock, | ||
| 820 | .deallocate = psnedf_dgl_free, | ||
| 821 | }; | ||
| 822 | |||
| 823 | static struct litmus_lock* psnedf_new_fmlp(void) | ||
| 824 | { | ||
| 825 | struct fmlp_semaphore* sem; | ||
| 826 | |||
| 827 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 828 | if (!sem) | ||
| 829 | return NULL; | ||
| 830 | |||
| 831 | sem->owner = NULL; | ||
| 832 | init_waitqueue_head(&sem->wait); | ||
| 833 | sem->litmus_lock.ops = &psnedf_fmlp_lock_ops; | ||
| 834 | |||
| 835 | return &sem->litmus_lock; | ||
| 836 | } | ||
| 837 | |||
| 838 | static struct litmus_lock* psnedf_new_dgl(void) | ||
| 839 | { | ||
| 840 | struct dgl_semaphore* sem; | ||
| 841 | int i; | ||
| 842 | |||
| 843 | TRACE("Creating another DGL\n"); | ||
| 844 | |||
| 845 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 846 | if (!sem) | ||
| 847 | return NULL; | ||
| 848 | |||
| 849 | sem->locked = 0; | ||
| 850 | sem->dgl_resources = 0; | ||
| 851 | |||
| 852 | for(i = 0; i < NR_CPUS; i++) | ||
| 853 | sem->boosted[i] = false; | ||
| 854 | |||
| 855 | init_waitqueue_head(&sem->wait); | ||
| 856 | sem->litmus_lock.ops = &psnedf_dgl_lock_ops; | ||
| 857 | sem->litmus_lock.type = DGL_SEM; | ||
| 858 | |||
| 859 | sched_trace_container_param(DGL_CONTAINER, "dgl"); | ||
| 860 | for (i = 0; i < sizeof(sem->dgl_resources)*8; ++i) { | ||
| 861 | sched_trace_server_param(i, DGL_CONTAINER, 0, 0); | ||
| 862 | } | ||
| 863 | |||
| 864 | return &sem->litmus_lock; | ||
| 865 | } | ||
| 866 | |||
| 867 | /* **** lock constructor **** */ | ||
| 868 | |||
| 869 | |||
| 870 | static long psnedf_allocate_lock(struct litmus_lock **lock, int type, | ||
| 871 | void* __user config) | ||
| 872 | { | ||
| 873 | int err = -ENXIO; | ||
| 874 | int config_num; | ||
| 875 | struct srp_semaphore* srp; | ||
| 876 | struct od_table_entry* entry; | ||
| 877 | |||
| 878 | /* PSN-EDF currently supports the SRP for local resources and the FMLP | ||
| 879 | * for global resources. */ | ||
| 880 | switch (type) { | ||
| 881 | case FMLP_SEM: | ||
| 882 | /* Flexible Multiprocessor Locking Protocol */ | ||
| 883 | *lock = psnedf_new_fmlp(); | ||
| 884 | if (*lock) | ||
| 885 | err = 0; | ||
| 886 | else | ||
| 887 | err = -ENOMEM; | ||
| 888 | break; | ||
| 889 | |||
| 890 | case SRP_SEM: | ||
| 891 | /* Baker's Stack Resource Policy */ | ||
| 892 | srp = allocate_srp_semaphore(); | ||
| 893 | if (srp) { | ||
| 894 | *lock = &srp->litmus_lock; | ||
| 895 | err = 0; | ||
| 896 | } else | ||
| 897 | err = -ENOMEM; | ||
| 898 | break; | ||
| 899 | |||
| 900 | case DGL_SEM: | ||
| 901 | /* assume that config is an int, and that config < 0 means create a new DGL | ||
| 902 | * and that a config > 0 means point this resource to the existing DGL | ||
| 903 | * for the resource in entry number config. | ||
| 904 | */ | ||
| 905 | config_num = *(int*)(config); | ||
| 906 | TRACE("config: %d\n", config_num); | ||
| 907 | if (config_num < 0){ | ||
| 908 | *lock = psnedf_new_dgl(); | ||
| 909 | if (*lock) | ||
| 910 | err = 0; | ||
| 911 | else | ||
| 912 | err = -ENOMEM; | ||
| 913 | /* In this case, we are adding a resource to an existing lock */ | ||
| 914 | } else { | ||
| 915 | entry = get_entry_for_od(config_num); | ||
| 916 | if (entry && entry->obj && entry->obj->type == DGL_SEM){ | ||
| 917 | *lock = (struct litmus_lock*) entry->obj->obj; | ||
| 918 | err = 0; | ||
| 919 | } else { | ||
| 920 | err = -EINVAL; | ||
| 921 | printk(KERN_DEBUG "Cannot add to that group!\n"); | ||
| 922 | while(1); | ||
| 923 | } | ||
| 924 | } | ||
| 925 | |||
| 926 | break; | ||
| 927 | }; | ||
| 928 | |||
| 929 | return err; | ||
| 930 | } | ||
| 931 | |||
| 932 | #endif | ||
| 933 | |||
| 934 | static long psnedf_activate_plugin(void) | ||
| 935 | { | ||
| 936 | #ifdef CONFIG_RELEASE_MASTER | ||
| 937 | int cpu; | ||
| 938 | |||
| 939 | for_each_online_cpu(cpu) { | ||
| 940 | remote_edf(cpu)->release_master = atomic_read(&release_master_cpu); | ||
| 941 | } | ||
| 942 | #endif | ||
| 943 | |||
| 944 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 945 | get_srp_prio = psnedf_get_srp_prio; | ||
| 946 | #endif | ||
| 947 | |||
| 948 | return 0; | ||
| 949 | } | ||
| 950 | |||
| 951 | static long psnedf_admit_task(struct task_struct* tsk) | ||
| 952 | { | ||
| 953 | if (task_cpu(tsk) == tsk->rt_param.task_params.cpu | ||
| 954 | #ifdef CONFIG_RELEASE_MASTER | ||
| 955 | /* don't allow tasks on release master CPU */ | ||
| 956 | && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master | ||
| 957 | #endif | ||
| 958 | ) | ||
| 959 | return 0; | ||
| 960 | else | ||
| 961 | return -EINVAL; | ||
| 962 | } | ||
| 963 | |||
| 964 | /* Plugin object */ | ||
| 965 | static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = { | ||
| 966 | .plugin_name = "PSN-EDF", | ||
| 967 | .tick = psnedf_tick, | ||
| 968 | .task_new = psnedf_task_new, | ||
| 969 | .complete_job = complete_job, | ||
| 970 | .task_exit = psnedf_task_exit, | ||
| 971 | .schedule = psnedf_schedule, | ||
| 972 | .task_wake_up = psnedf_task_wake_up, | ||
| 973 | .task_block = psnedf_task_block, | ||
| 974 | .admit_task = psnedf_admit_task, | ||
| 975 | .activate_plugin = psnedf_activate_plugin, | ||
| 976 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 977 | .allocate_lock = psnedf_allocate_lock, | ||
| 978 | #endif | ||
| 979 | }; | ||
| 980 | |||
| 981 | |||
| 982 | static int __init init_psn_edf(void) | ||
| 983 | { | ||
| 984 | int i; | ||
| 985 | |||
| 986 | /* We do not really want to support cpu hotplug, do we? ;) | ||
| 987 | * However, if we are so crazy to do so, | ||
| 988 | * we cannot use num_online_cpu() | ||
| 989 | */ | ||
| 990 | for (i = 0; i < num_online_cpus(); i++) { | ||
| 991 | psnedf_domain_init(remote_pedf(i), | ||
| 992 | psnedf_check_resched, | ||
| 993 | NULL, i); | ||
| 994 | } | ||
| 995 | return register_sched_plugin(&psn_edf_plugin); | ||
| 996 | } | ||
| 997 | |||
| 998 | module_init(init_psn_edf); | ||
| 999 | |||
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c new file mode 100644 index 00000000000..3c42dfedac1 --- /dev/null +++ b/litmus/sched_task_trace.c | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | /* | ||
| 2 | * sched_task_trace.c -- record scheduling events to a byte stream | ||
| 3 | */ | ||
| 4 | |||
| 5 | #define NO_TASK_TRACE_DECLS | ||
| 6 | |||
| 7 | #include <linux/module.h> | ||
| 8 | #include <linux/sched.h> | ||
| 9 | #include <linux/percpu.h> | ||
| 10 | |||
| 11 | #include <litmus/ftdev.h> | ||
| 12 | #include <litmus/litmus.h> | ||
| 13 | |||
| 14 | #include <litmus/sched_trace.h> | ||
| 15 | #include <litmus/feather_trace.h> | ||
| 16 | #include <litmus/ftdev.h> | ||
| 17 | |||
| 18 | #ifdef CONFIG_SCHED_LITMUS_TRACEPOINT | ||
| 19 | #define CREATE_TRACE_POINTS | ||
| 20 | #endif | ||
| 21 | |||
| 22 | #define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) | ||
| 23 | |||
| 24 | #define now() litmus_clock() | ||
| 25 | |||
| 26 | struct local_buffer { | ||
| 27 | struct st_event_record record[NO_EVENTS]; | ||
| 28 | char flag[NO_EVENTS]; | ||
| 29 | struct ft_buffer ftbuf; | ||
| 30 | }; | ||
| 31 | |||
| 32 | DEFINE_PER_CPU(struct local_buffer, st_event_buffer); | ||
| 33 | |||
| 34 | static struct ftdev st_dev; | ||
| 35 | |||
| 36 | static int st_dev_can_open(struct ftdev *dev, unsigned int cpu) | ||
| 37 | { | ||
| 38 | return cpu_online(cpu) ? 0 : -ENODEV; | ||
| 39 | } | ||
| 40 | |||
| 41 | static int __init init_sched_task_trace(void) | ||
| 42 | { | ||
| 43 | struct local_buffer* buf; | ||
| 44 | int i, ok = 0, err; | ||
| 45 | printk("Allocated %u sched_trace_xxx() events per CPU " | ||
| 46 | "(buffer size: %d bytes)\n", | ||
| 47 | NO_EVENTS, (int) sizeof(struct local_buffer)); | ||
| 48 | |||
| 49 | err = ftdev_init(&st_dev, THIS_MODULE, | ||
| 50 | num_online_cpus(), "sched_trace"); | ||
| 51 | if (err) | ||
| 52 | goto err_out; | ||
| 53 | |||
| 54 | for (i = 0; i < st_dev.minor_cnt; i++) { | ||
| 55 | buf = &per_cpu(st_event_buffer, i); | ||
| 56 | ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, | ||
| 57 | sizeof(struct st_event_record), | ||
| 58 | buf->flag, | ||
| 59 | buf->record); | ||
| 60 | st_dev.minor[i].buf = &buf->ftbuf; | ||
| 61 | } | ||
| 62 | if (ok == st_dev.minor_cnt) { | ||
| 63 | st_dev.can_open = st_dev_can_open; | ||
| 64 | err = register_ftdev(&st_dev); | ||
| 65 | if (err) | ||
| 66 | goto err_dealloc; | ||
| 67 | } else { | ||
| 68 | err = -EINVAL; | ||
| 69 | goto err_dealloc; | ||
| 70 | } | ||
| 71 | |||
| 72 | return 0; | ||
| 73 | |||
| 74 | err_dealloc: | ||
| 75 | ftdev_exit(&st_dev); | ||
| 76 | err_out: | ||
| 77 | printk(KERN_WARNING "Could not register sched_trace module\n"); | ||
| 78 | return err; | ||
| 79 | } | ||
| 80 | |||
| 81 | static void __exit exit_sched_task_trace(void) | ||
| 82 | { | ||
| 83 | ftdev_exit(&st_dev); | ||
| 84 | } | ||
| 85 | |||
| 86 | module_init(init_sched_task_trace); | ||
| 87 | module_exit(exit_sched_task_trace); | ||
| 88 | |||
| 89 | |||
| 90 | static inline struct st_event_record* get_record(u8 type, struct task_struct* t) | ||
| 91 | { | ||
| 92 | struct st_event_record* rec = NULL; | ||
| 93 | struct local_buffer* buf; | ||
| 94 | |||
| 95 | buf = &get_cpu_var(st_event_buffer); | ||
| 96 | if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) { | ||
| 97 | rec->hdr.type = type; | ||
| 98 | rec->hdr.cpu = smp_processor_id(); | ||
| 99 | rec->hdr.pid = t ? t->pid : 0; | ||
| 100 | rec->hdr.job = t ? t->rt_param.job_params.job_no : 0; | ||
| 101 | } else { | ||
| 102 | put_cpu_var(st_event_buffer); | ||
| 103 | } | ||
| 104 | /* rec will be NULL if it failed */ | ||
| 105 | return rec; | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline void put_record(struct st_event_record* rec) | ||
| 109 | { | ||
| 110 | struct local_buffer* buf; | ||
| 111 | buf = &__get_cpu_var(st_event_buffer); | ||
| 112 | ft_buffer_finish_write(&buf->ftbuf, rec); | ||
| 113 | put_cpu_var(st_event_buffer); | ||
| 114 | } | ||
| 115 | |||
| 116 | feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task) | ||
| 117 | { | ||
| 118 | struct task_struct *t = (struct task_struct*) _task; | ||
| 119 | struct st_event_record* rec = get_record(ST_NAME, t); | ||
| 120 | int i; | ||
| 121 | if (rec) { | ||
| 122 | for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++) | ||
| 123 | rec->data.name.cmd[i] = t->comm[i]; | ||
| 124 | put_record(rec); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task) | ||
| 129 | { | ||
| 130 | struct task_struct *t = (struct task_struct*) _task; | ||
| 131 | struct st_event_record* rec = get_record(ST_PARAM, t); | ||
| 132 | if (rec) { | ||
| 133 | rec->data.param.wcet = get_exec_cost(t); | ||
| 134 | rec->data.param.period = get_rt_period(t); | ||
| 135 | rec->data.param.phase = get_rt_phase(t); | ||
| 136 | rec->data.param.partition = get_partition(t); | ||
| 137 | rec->data.param.class = get_class(t); | ||
| 138 | put_record(rec); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task) | ||
| 143 | { | ||
| 144 | struct task_struct *t = (struct task_struct*) _task; | ||
| 145 | struct st_event_record* rec = get_record(ST_RELEASE, t); | ||
| 146 | if (rec) { | ||
| 147 | rec->data.release.release = get_release(t); | ||
| 148 | rec->data.release.deadline = get_deadline(t); | ||
| 149 | put_record(rec); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | /* skipped: st_assigned_data, we don't use it atm */ | ||
| 154 | |||
| 155 | feather_callback void do_sched_trace_task_switch_to(unsigned long id, | ||
| 156 | unsigned long _task) | ||
| 157 | { | ||
| 158 | struct task_struct *t = (struct task_struct*) _task; | ||
| 159 | struct st_event_record* rec; | ||
| 160 | if (is_realtime(t)) { | ||
| 161 | rec = get_record(ST_SWITCH_TO, t); | ||
| 162 | if (rec) { | ||
| 163 | rec->data.switch_to.when = now(); | ||
| 164 | rec->data.switch_to.exec_time = get_exec_time(t); | ||
| 165 | put_record(rec); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | feather_callback void do_sched_trace_task_switch_away(unsigned long id, | ||
| 171 | unsigned long _task) | ||
| 172 | { | ||
| 173 | struct task_struct *t = (struct task_struct*) _task; | ||
| 174 | struct st_event_record* rec; | ||
| 175 | if (is_realtime(t)) { | ||
| 176 | rec = get_record(ST_SWITCH_AWAY, t); | ||
| 177 | if (rec) { | ||
| 178 | rec->data.switch_away.when = now(); | ||
| 179 | rec->data.switch_away.exec_time = get_exec_time(t); | ||
| 180 | put_record(rec); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | feather_callback void do_sched_trace_task_completion(unsigned long id, | ||
| 186 | unsigned long _task, | ||
| 187 | unsigned long forced) | ||
| 188 | { | ||
| 189 | struct task_struct *t = (struct task_struct*) _task; | ||
| 190 | struct st_event_record* rec = get_record(ST_COMPLETION, t); | ||
| 191 | if (rec) { | ||
| 192 | rec->data.completion.when = get_exec_time(t); | ||
| 193 | rec->data.completion.forced = forced; | ||
| 194 | put_record(rec); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | feather_callback void do_sched_trace_task_block(unsigned long id, | ||
| 199 | unsigned long _task) | ||
| 200 | { | ||
| 201 | struct task_struct *t = (struct task_struct*) _task; | ||
| 202 | struct st_event_record* rec = get_record(ST_BLOCK, t); | ||
| 203 | if (rec) { | ||
| 204 | rec->data.block.when = now(); | ||
| 205 | put_record(rec); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | feather_callback void do_sched_trace_task_resume(unsigned long id, | ||
| 210 | unsigned long _task) | ||
| 211 | { | ||
| 212 | struct task_struct *t = (struct task_struct*) _task; | ||
| 213 | struct st_event_record* rec = get_record(ST_RESUME, t); | ||
| 214 | if (rec) { | ||
| 215 | rec->data.resume.when = now(); | ||
| 216 | put_record(rec); | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | feather_callback void do_sched_trace_sys_release(unsigned long id, | ||
| 221 | unsigned long _start) | ||
| 222 | { | ||
| 223 | lt_t *start = (lt_t*) _start; | ||
| 224 | struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL); | ||
| 225 | if (rec) { | ||
| 226 | rec->data.sys_release.when = now(); | ||
| 227 | rec->data.sys_release.release = *start; | ||
| 228 | put_record(rec); | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | feather_callback void do_sched_trace_task_exit(unsigned long id, | ||
| 233 | unsigned long _task) | ||
| 234 | { | ||
| 235 | struct task_struct *t = (struct task_struct*) _task; | ||
| 236 | const lt_t max_exec_time = tsk_rt(t)->max_exec_time; | ||
| 237 | const lt_t avg_exec_time = tsk_rt(t)->tot_exec_time / (get_job_no(t) - 1); | ||
| 238 | |||
| 239 | struct st_event_record *rec = get_record(ST_TASK_EXIT, t); | ||
| 240 | if (rec) { | ||
| 241 | rec->data.task_exit.avg_exec_time = avg_exec_time; | ||
| 242 | rec->data.task_exit.max_exec_time = max_exec_time; | ||
| 243 | put_record(rec); | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | feather_callback void do_sched_trace_task_tardy(unsigned long id, | ||
| 248 | unsigned long _task) | ||
| 249 | { | ||
| 250 | struct task_struct *t = (struct task_struct*) _task; | ||
| 251 | struct st_event_record *rec = get_record(ST_TASK_TARDY, t); | ||
| 252 | if (rec) { | ||
| 253 | rec->data.task_tardy.max_tardy = tsk_rt(t)->max_tardy; | ||
| 254 | rec->data.task_tardy.total_tardy = tsk_rt(t)->total_tardy; | ||
| 255 | rec->data.task_tardy.missed = tsk_rt(t)->missed; | ||
| 256 | put_record(rec); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | feather_callback void do_sched_trace_action(unsigned long id, | ||
| 261 | unsigned long _task, | ||
| 262 | unsigned long action) | ||
| 263 | { | ||
| 264 | struct task_struct *t = (struct task_struct*) _task; | ||
| 265 | struct st_event_record* rec = get_record(ST_ACTION, t); | ||
| 266 | |||
| 267 | if (rec) { | ||
| 268 | rec->data.action.when = now(); | ||
| 269 | rec->data.action.action = action; | ||
| 270 | put_record(rec); | ||
| 271 | } | ||
| 272 | } | ||
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c new file mode 100644 index 00000000000..f4171fddbbb --- /dev/null +++ b/litmus/sched_trace.c | |||
| @@ -0,0 +1,252 @@ | |||
| 1 | /* | ||
| 2 | * sched_trace.c -- record scheduling events to a byte stream. | ||
| 3 | */ | ||
| 4 | #include <linux/spinlock.h> | ||
| 5 | #include <linux/mutex.h> | ||
| 6 | |||
| 7 | #include <linux/fs.h> | ||
| 8 | #include <linux/slab.h> | ||
| 9 | #include <linux/miscdevice.h> | ||
| 10 | #include <asm/uaccess.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/sysrq.h> | ||
| 13 | |||
| 14 | #include <linux/kfifo.h> | ||
| 15 | |||
| 16 | #include <litmus/sched_trace.h> | ||
| 17 | #include <litmus/litmus.h> | ||
| 18 | |||
| 19 | #define SCHED_TRACE_NAME "litmus/log" | ||
| 20 | |||
| 21 | /* Compute size of TRACE() buffer */ | ||
| 22 | #define LITMUS_TRACE_BUF_SIZE (1 << CONFIG_SCHED_DEBUG_TRACE_SHIFT) | ||
| 23 | |||
| 24 | /* Max length of one read from the buffer */ | ||
| 25 | #define MAX_READ_LEN (64 * 1024) | ||
| 26 | |||
| 27 | /* Max length for one write --- by TRACE() --- to the buffer. This is used to | ||
| 28 | * allocate a per-cpu buffer for printf() formatting. */ | ||
| 29 | #define MSG_SIZE 255 | ||
| 30 | |||
| 31 | |||
| 32 | static DEFINE_MUTEX(reader_mutex); | ||
| 33 | static atomic_t reader_cnt = ATOMIC_INIT(0); | ||
| 34 | static DEFINE_KFIFO(debug_buffer, char, LITMUS_TRACE_BUF_SIZE); | ||
| 35 | |||
| 36 | |||
| 37 | static DEFINE_RAW_SPINLOCK(log_buffer_lock); | ||
| 38 | static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer); | ||
| 39 | |||
| 40 | /* | ||
| 41 | * sched_trace_log_message - Write to the trace buffer (log_buffer) | ||
| 42 | * | ||
| 43 | * This is the only function accessing the log_buffer from inside the | ||
| 44 | * kernel for writing. | ||
| 45 | * Concurrent access to sched_trace_log_message must be serialized using | ||
| 46 | * log_buffer_lock | ||
| 47 | * The maximum length of a formatted message is 255 | ||
| 48 | */ | ||
| 49 | void sched_trace_log_message(const char* fmt, ...) | ||
| 50 | { | ||
| 51 | unsigned long flags; | ||
| 52 | va_list args; | ||
| 53 | size_t len; | ||
| 54 | char* buf; | ||
| 55 | |||
| 56 | if (!atomic_read(&reader_cnt)) | ||
| 57 | /* early exit if nobody is listening */ | ||
| 58 | return; | ||
| 59 | |||
| 60 | va_start(args, fmt); | ||
| 61 | local_irq_save(flags); | ||
| 62 | |||
| 63 | /* format message */ | ||
| 64 | buf = __get_cpu_var(fmt_buffer); | ||
| 65 | len = vscnprintf(buf, MSG_SIZE, fmt, args); | ||
| 66 | |||
| 67 | raw_spin_lock(&log_buffer_lock); | ||
| 68 | /* Don't copy the trailing null byte, we don't want null bytes in a | ||
| 69 | * text file. | ||
| 70 | */ | ||
| 71 | kfifo_in(&debug_buffer, buf, len); | ||
| 72 | raw_spin_unlock(&log_buffer_lock); | ||
| 73 | |||
| 74 | local_irq_restore(flags); | ||
| 75 | va_end(args); | ||
| 76 | } | ||
| 77 | |||
| 78 | |||
| 79 | /* | ||
| 80 | * log_read - Read the trace buffer | ||
| 81 | * | ||
| 82 | * This function is called as a file operation from userspace. | ||
| 83 | * Readers can sleep. Access is serialized through reader_mutex | ||
| 84 | */ | ||
| 85 | static ssize_t log_read(struct file *filp, | ||
| 86 | char __user *to, size_t len, | ||
| 87 | loff_t *f_pos) | ||
| 88 | { | ||
| 89 | /* we ignore f_pos, this is strictly sequential */ | ||
| 90 | |||
| 91 | ssize_t error = -EINVAL; | ||
| 92 | char* mem; | ||
| 93 | |||
| 94 | if (mutex_lock_interruptible(&reader_mutex)) { | ||
| 95 | error = -ERESTARTSYS; | ||
| 96 | goto out; | ||
| 97 | } | ||
| 98 | |||
| 99 | if (len > MAX_READ_LEN) | ||
| 100 | len = MAX_READ_LEN; | ||
| 101 | |||
| 102 | mem = kmalloc(len, GFP_KERNEL); | ||
| 103 | if (!mem) { | ||
| 104 | error = -ENOMEM; | ||
| 105 | goto out_unlock; | ||
| 106 | } | ||
| 107 | |||
| 108 | error = kfifo_out(&debug_buffer, mem, len); | ||
| 109 | while (!error) { | ||
| 110 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 111 | schedule_timeout(110); | ||
| 112 | if (signal_pending(current)) | ||
| 113 | error = -ERESTARTSYS; | ||
| 114 | else | ||
| 115 | error = kfifo_out(&debug_buffer, mem, len); | ||
| 116 | } | ||
| 117 | |||
| 118 | if (error > 0 && copy_to_user(to, mem, error)) | ||
| 119 | error = -EFAULT; | ||
| 120 | |||
| 121 | kfree(mem); | ||
| 122 | out_unlock: | ||
| 123 | mutex_unlock(&reader_mutex); | ||
| 124 | out: | ||
| 125 | return error; | ||
| 126 | } | ||
| 127 | |||
| 128 | /* | ||
| 129 | * Enable redirection of printk() messages to the trace buffer. | ||
| 130 | * Defined in kernel/printk.c | ||
| 131 | */ | ||
| 132 | extern int trace_override; | ||
| 133 | extern int trace_recurse; | ||
| 134 | |||
| 135 | /* | ||
| 136 | * log_open - open the global log message ring buffer. | ||
| 137 | */ | ||
| 138 | static int log_open(struct inode *in, struct file *filp) | ||
| 139 | { | ||
| 140 | int error = -EINVAL; | ||
| 141 | |||
| 142 | if (mutex_lock_interruptible(&reader_mutex)) { | ||
| 143 | error = -ERESTARTSYS; | ||
| 144 | goto out; | ||
| 145 | } | ||
| 146 | |||
| 147 | atomic_inc(&reader_cnt); | ||
| 148 | error = 0; | ||
| 149 | |||
| 150 | printk(KERN_DEBUG | ||
| 151 | "sched_trace kfifo with buffer starting at: 0x%p\n", | ||
| 152 | debug_buffer.buf); | ||
| 153 | |||
| 154 | /* override printk() */ | ||
| 155 | trace_override++; | ||
| 156 | |||
| 157 | mutex_unlock(&reader_mutex); | ||
| 158 | out: | ||
| 159 | return error; | ||
| 160 | } | ||
| 161 | |||
| 162 | static int log_release(struct inode *in, struct file *filp) | ||
| 163 | { | ||
| 164 | int error = -EINVAL; | ||
| 165 | |||
| 166 | if (mutex_lock_interruptible(&reader_mutex)) { | ||
| 167 | error = -ERESTARTSYS; | ||
| 168 | goto out; | ||
| 169 | } | ||
| 170 | |||
| 171 | atomic_dec(&reader_cnt); | ||
| 172 | |||
| 173 | /* release printk() overriding */ | ||
| 174 | trace_override--; | ||
| 175 | |||
| 176 | printk(KERN_DEBUG "sched_trace kfifo released\n"); | ||
| 177 | |||
| 178 | mutex_unlock(&reader_mutex); | ||
| 179 | out: | ||
| 180 | return error; | ||
| 181 | } | ||
| 182 | |||
| 183 | /* | ||
| 184 | * log_fops - The file operations for accessing the global LITMUS log message | ||
| 185 | * buffer. | ||
| 186 | * | ||
| 187 | * Except for opening the device file it uses the same operations as trace_fops. | ||
| 188 | */ | ||
| 189 | static struct file_operations log_fops = { | ||
| 190 | .owner = THIS_MODULE, | ||
| 191 | .open = log_open, | ||
| 192 | .release = log_release, | ||
| 193 | .read = log_read, | ||
| 194 | }; | ||
| 195 | |||
| 196 | static struct miscdevice litmus_log_dev = { | ||
| 197 | .name = SCHED_TRACE_NAME, | ||
| 198 | .minor = MISC_DYNAMIC_MINOR, | ||
| 199 | .fops = &log_fops, | ||
| 200 | }; | ||
| 201 | |||
| 202 | #ifdef CONFIG_MAGIC_SYSRQ | ||
| 203 | void dump_trace_buffer(int max) | ||
| 204 | { | ||
| 205 | char line[80]; | ||
| 206 | int len; | ||
| 207 | int count = 0; | ||
| 208 | |||
| 209 | /* potential, but very unlikely, race... */ | ||
| 210 | trace_recurse = 1; | ||
| 211 | while ((max == 0 || count++ < max) && | ||
| 212 | (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) { | ||
| 213 | line[len] = '\0'; | ||
| 214 | printk("%s", line); | ||
| 215 | } | ||
| 216 | trace_recurse = 0; | ||
| 217 | } | ||
| 218 | |||
| 219 | static void sysrq_dump_trace_buffer(int key) | ||
| 220 | { | ||
| 221 | dump_trace_buffer(100); | ||
| 222 | } | ||
| 223 | |||
| 224 | static struct sysrq_key_op sysrq_dump_trace_buffer_op = { | ||
| 225 | .handler = sysrq_dump_trace_buffer, | ||
| 226 | .help_msg = "dump-trace-buffer(Y)", | ||
| 227 | .action_msg = "writing content of TRACE() buffer", | ||
| 228 | }; | ||
| 229 | #endif | ||
| 230 | |||
| 231 | static int __init init_sched_trace(void) | ||
| 232 | { | ||
| 233 | printk("Initializing TRACE() device\n"); | ||
| 234 | |||
| 235 | #ifdef CONFIG_MAGIC_SYSRQ | ||
| 236 | /* offer some debugging help */ | ||
| 237 | if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op)) | ||
| 238 | printk("Registered dump-trace-buffer(Y) magic sysrq.\n"); | ||
| 239 | else | ||
| 240 | printk("Could not register dump-trace-buffer(Y) magic sysrq.\n"); | ||
| 241 | #endif | ||
| 242 | |||
| 243 | return misc_register(&litmus_log_dev); | ||
| 244 | } | ||
| 245 | |||
| 246 | static void __exit exit_sched_trace(void) | ||
| 247 | { | ||
| 248 | misc_deregister(&litmus_log_dev); | ||
| 249 | } | ||
| 250 | |||
| 251 | module_init(init_sched_trace); | ||
| 252 | module_exit(exit_sched_trace); | ||
diff --git a/litmus/srp.c b/litmus/srp.c new file mode 100644 index 00000000000..c88dbf2f580 --- /dev/null +++ b/litmus/srp.c | |||
| @@ -0,0 +1,305 @@ | |||
| 1 | /* ************************************************************************** */ | ||
| 2 | /* STACK RESOURCE POLICY */ | ||
| 3 | /* ************************************************************************** */ | ||
| 4 | |||
| 5 | #include <asm/atomic.h> | ||
| 6 | #include <linux/sched.h> | ||
| 7 | #include <linux/wait.h> | ||
| 8 | |||
| 9 | #include <litmus/litmus.h> | ||
| 10 | #include <litmus/sched_plugin.h> | ||
| 11 | #include <litmus/fdso.h> | ||
| 12 | #include <litmus/trace.h> | ||
| 13 | |||
| 14 | |||
| 15 | #ifdef CONFIG_LITMUS_LOCKING | ||
| 16 | |||
| 17 | #include <litmus/srp.h> | ||
| 18 | |||
| 19 | srp_prioritization_t get_srp_prio; | ||
| 20 | |||
| 21 | struct srp { | ||
| 22 | struct list_head ceiling; | ||
| 23 | wait_queue_head_t ceiling_blocked; | ||
| 24 | }; | ||
| 25 | #define system_ceiling(srp) list2prio(srp->ceiling.next) | ||
| 26 | #define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling) | ||
| 27 | |||
| 28 | #define UNDEF_SEM -2 | ||
| 29 | |||
| 30 | atomic_t srp_objects_in_use = ATOMIC_INIT(0); | ||
| 31 | |||
| 32 | DEFINE_PER_CPU(struct srp, srp); | ||
| 33 | |||
| 34 | /* Initialize SRP semaphores at boot time. */ | ||
| 35 | static int __init srp_init(void) | ||
| 36 | { | ||
| 37 | int i; | ||
| 38 | |||
| 39 | printk("Initializing SRP per-CPU ceilings..."); | ||
| 40 | for (i = 0; i < NR_CPUS; i++) { | ||
| 41 | init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked); | ||
| 42 | INIT_LIST_HEAD(&per_cpu(srp, i).ceiling); | ||
| 43 | } | ||
| 44 | printk(" done!\n"); | ||
| 45 | |||
| 46 | return 0; | ||
| 47 | } | ||
| 48 | module_init(srp_init); | ||
| 49 | |||
| 50 | /* SRP task priority comparison function. Smaller numeric values have higher | ||
| 51 | * priority, tie-break is PID. Special case: priority == 0 <=> no priority | ||
| 52 | */ | ||
| 53 | static int srp_higher_prio(struct srp_priority* first, | ||
| 54 | struct srp_priority* second) | ||
| 55 | { | ||
| 56 | if (!first->priority) | ||
| 57 | return 0; | ||
| 58 | else | ||
| 59 | return !second->priority || | ||
| 60 | first->priority < second->priority || ( | ||
| 61 | first->priority == second->priority && | ||
| 62 | first->pid < second->pid); | ||
| 63 | } | ||
| 64 | |||
| 65 | |||
| 66 | static int srp_exceeds_ceiling(struct task_struct* first, | ||
| 67 | struct srp* srp) | ||
| 68 | { | ||
| 69 | struct srp_priority prio; | ||
| 70 | |||
| 71 | if (list_empty(&srp->ceiling)) | ||
| 72 | return 1; | ||
| 73 | else { | ||
| 74 | prio.pid = first->pid; | ||
| 75 | prio.priority = get_srp_prio(first); | ||
| 76 | return srp_higher_prio(&prio, system_ceiling(srp)) || | ||
| 77 | ceiling2sem(system_ceiling(srp))->owner == first; | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | static void srp_add_prio(struct srp* srp, struct srp_priority* prio) | ||
| 82 | { | ||
| 83 | struct list_head *pos; | ||
| 84 | if (in_list(&prio->list)) { | ||
| 85 | printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in " | ||
| 86 | "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio)); | ||
| 87 | return; | ||
| 88 | } | ||
| 89 | list_for_each(pos, &srp->ceiling) | ||
| 90 | if (unlikely(srp_higher_prio(prio, list2prio(pos)))) { | ||
| 91 | __list_add(&prio->list, pos->prev, pos); | ||
| 92 | return; | ||
| 93 | } | ||
| 94 | |||
| 95 | list_add_tail(&prio->list, &srp->ceiling); | ||
| 96 | } | ||
| 97 | |||
| 98 | |||
| 99 | static int lock_srp_semaphore(struct litmus_lock* l) | ||
| 100 | { | ||
| 101 | struct task_struct* t = current; | ||
| 102 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
| 103 | |||
| 104 | if (!is_realtime(t)) | ||
| 105 | return -EPERM; | ||
| 106 | |||
| 107 | /* prevent acquisition of local locks in global critical sections */ | ||
| 108 | if (tsk_rt(t)->num_locks_held) | ||
| 109 | return -EBUSY; | ||
| 110 | |||
| 111 | preempt_disable(); | ||
| 112 | |||
| 113 | /* Update ceiling. */ | ||
| 114 | srp_add_prio(&__get_cpu_var(srp), &sem->ceiling); | ||
| 115 | |||
| 116 | /* SRP invariant: all resources available */ | ||
| 117 | BUG_ON(sem->owner != NULL); | ||
| 118 | |||
| 119 | sem->owner = t; | ||
| 120 | TRACE_CUR("acquired srp 0x%p\n", sem); | ||
| 121 | |||
| 122 | tsk_rt(t)->num_local_locks_held++; | ||
| 123 | |||
| 124 | preempt_enable(); | ||
| 125 | |||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | |||
| 129 | static int unlock_srp_semaphore(struct litmus_lock* l) | ||
| 130 | { | ||
| 131 | struct task_struct* t = current; | ||
| 132 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
| 133 | int err = 0; | ||
| 134 | |||
| 135 | preempt_disable(); | ||
| 136 | |||
| 137 | if (sem->owner != t) { | ||
| 138 | err = -EINVAL; | ||
| 139 | } else { | ||
| 140 | /* Determine new system priority ceiling for this CPU. */ | ||
| 141 | BUG_ON(!in_list(&sem->ceiling.list)); | ||
| 142 | |||
| 143 | list_del(&sem->ceiling.list); | ||
| 144 | sem->owner = NULL; | ||
| 145 | |||
| 146 | /* Wake tasks on this CPU, if they exceed current ceiling. */ | ||
| 147 | TRACE_CUR("released srp 0x%p\n", sem); | ||
| 148 | wake_up_all(&__get_cpu_var(srp).ceiling_blocked); | ||
| 149 | |||
| 150 | tsk_rt(t)->num_local_locks_held--; | ||
| 151 | } | ||
| 152 | |||
| 153 | preempt_enable(); | ||
| 154 | return err; | ||
| 155 | } | ||
| 156 | |||
| 157 | static int open_srp_semaphore(struct litmus_lock* l, void* __user arg) | ||
| 158 | { | ||
| 159 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
| 160 | int err = 0; | ||
| 161 | struct task_struct* t = current; | ||
| 162 | struct srp_priority t_prio; | ||
| 163 | |||
| 164 | if (!is_realtime(t)) | ||
| 165 | return -EPERM; | ||
| 166 | |||
| 167 | TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu); | ||
| 168 | |||
| 169 | preempt_disable(); | ||
| 170 | |||
| 171 | if (sem->owner != NULL) | ||
| 172 | err = -EBUSY; | ||
| 173 | |||
| 174 | if (err == 0) { | ||
| 175 | if (sem->cpu == UNDEF_SEM) | ||
| 176 | sem->cpu = get_partition(t); | ||
| 177 | else if (sem->cpu != get_partition(t)) | ||
| 178 | err = -EPERM; | ||
| 179 | } | ||
| 180 | |||
| 181 | if (err == 0) { | ||
| 182 | t_prio.priority = get_srp_prio(t); | ||
| 183 | t_prio.pid = t->pid; | ||
| 184 | if (srp_higher_prio(&t_prio, &sem->ceiling)) { | ||
| 185 | sem->ceiling.priority = t_prio.priority; | ||
| 186 | sem->ceiling.pid = t_prio.pid; | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | preempt_enable(); | ||
| 191 | |||
| 192 | return err; | ||
| 193 | } | ||
| 194 | |||
| 195 | static int close_srp_semaphore(struct litmus_lock* l) | ||
| 196 | { | ||
| 197 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
| 198 | int err = 0; | ||
| 199 | |||
| 200 | preempt_disable(); | ||
| 201 | |||
| 202 | if (sem->owner == current) | ||
| 203 | unlock_srp_semaphore(l); | ||
| 204 | |||
| 205 | preempt_enable(); | ||
| 206 | |||
| 207 | return err; | ||
| 208 | } | ||
| 209 | |||
| 210 | static void deallocate_srp_semaphore(struct litmus_lock* l) | ||
| 211 | { | ||
| 212 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
| 213 | atomic_dec(&srp_objects_in_use); | ||
| 214 | kfree(sem); | ||
| 215 | } | ||
| 216 | |||
| 217 | static struct litmus_lock_ops srp_lock_ops = { | ||
| 218 | .open = open_srp_semaphore, | ||
| 219 | .close = close_srp_semaphore, | ||
| 220 | .lock = lock_srp_semaphore, | ||
| 221 | .unlock = unlock_srp_semaphore, | ||
| 222 | .deallocate = deallocate_srp_semaphore, | ||
| 223 | }; | ||
| 224 | |||
| 225 | struct srp_semaphore* allocate_srp_semaphore(void) | ||
| 226 | { | ||
| 227 | struct srp_semaphore* sem; | ||
| 228 | |||
| 229 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
| 230 | if (!sem) | ||
| 231 | return NULL; | ||
| 232 | |||
| 233 | INIT_LIST_HEAD(&sem->ceiling.list); | ||
| 234 | sem->ceiling.priority = 0; | ||
| 235 | sem->cpu = UNDEF_SEM; | ||
| 236 | sem->owner = NULL; | ||
| 237 | |||
| 238 | sem->litmus_lock.ops = &srp_lock_ops; | ||
| 239 | |||
| 240 | atomic_inc(&srp_objects_in_use); | ||
| 241 | return sem; | ||
| 242 | } | ||
| 243 | |||
| 244 | static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync, | ||
| 245 | void *key) | ||
| 246 | { | ||
| 247 | int cpu = smp_processor_id(); | ||
| 248 | struct task_struct *tsk = wait->private; | ||
| 249 | if (cpu != get_partition(tsk)) | ||
| 250 | TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b", | ||
| 251 | get_partition(tsk)); | ||
| 252 | else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
| 253 | return default_wake_function(wait, mode, sync, key); | ||
| 254 | return 0; | ||
| 255 | } | ||
| 256 | |||
| 257 | static void do_ceiling_block(struct task_struct *tsk) | ||
| 258 | { | ||
| 259 | wait_queue_t wait = { | ||
| 260 | .private = tsk, | ||
| 261 | .func = srp_wake_up, | ||
| 262 | .task_list = {NULL, NULL} | ||
| 263 | }; | ||
| 264 | |||
| 265 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
| 266 | add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
| 267 | tsk->rt_param.srp_non_recurse = 1; | ||
| 268 | preempt_enable_no_resched(); | ||
| 269 | schedule(); | ||
| 270 | preempt_disable(); | ||
| 271 | tsk->rt_param.srp_non_recurse = 0; | ||
| 272 | remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
| 273 | } | ||
| 274 | |||
| 275 | /* Wait for current task priority to exceed system-wide priority ceiling. | ||
| 276 | * FIXME: the hotpath should be inline. | ||
| 277 | */ | ||
| 278 | void srp_ceiling_block(void) | ||
| 279 | { | ||
| 280 | struct task_struct *tsk = current; | ||
| 281 | |||
| 282 | /* Only applies to real-time tasks, but optimize for RT tasks. */ | ||
| 283 | if (unlikely(!is_realtime(tsk))) | ||
| 284 | return; | ||
| 285 | |||
| 286 | /* Avoid recursive ceiling blocking. */ | ||
| 287 | if (unlikely(tsk->rt_param.srp_non_recurse)) | ||
| 288 | return; | ||
| 289 | |||
| 290 | /* Bail out early if there aren't any SRP resources around. */ | ||
| 291 | if (likely(!atomic_read(&srp_objects_in_use))) | ||
| 292 | return; | ||
| 293 | |||
| 294 | preempt_disable(); | ||
| 295 | if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) { | ||
| 296 | TRACE_CUR("is priority ceiling blocked.\n"); | ||
| 297 | while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
| 298 | do_ceiling_block(tsk); | ||
| 299 | TRACE_CUR("finally exceeds system ceiling.\n"); | ||
| 300 | } else | ||
| 301 | TRACE_CUR("is not priority ceiling blocked\n"); | ||
| 302 | preempt_enable(); | ||
| 303 | } | ||
| 304 | |||
| 305 | #endif | ||
diff --git a/litmus/sync.c b/litmus/sync.c new file mode 100644 index 00000000000..3e79e0a12a5 --- /dev/null +++ b/litmus/sync.c | |||
| @@ -0,0 +1,152 @@ | |||
| 1 | /* litmus/sync.c - Support for synchronous and asynchronous task system releases. | ||
| 2 | * | ||
| 3 | * | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <asm/atomic.h> | ||
| 7 | #include <asm/uaccess.h> | ||
| 8 | #include <linux/spinlock.h> | ||
| 9 | #include <linux/list.h> | ||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/completion.h> | ||
| 12 | |||
| 13 | #include <litmus/litmus.h> | ||
| 14 | #include <litmus/sched_plugin.h> | ||
| 15 | #include <litmus/jobs.h> | ||
| 16 | |||
| 17 | #include <litmus/sched_trace.h> | ||
| 18 | |||
| 19 | struct ts_release_wait { | ||
| 20 | struct list_head list; | ||
| 21 | struct completion completion; | ||
| 22 | lt_t ts_release_time; | ||
| 23 | }; | ||
| 24 | |||
| 25 | #define DECLARE_TS_RELEASE_WAIT(symb) \ | ||
| 26 | struct ts_release_wait symb = \ | ||
| 27 | { \ | ||
| 28 | LIST_HEAD_INIT(symb.list), \ | ||
| 29 | COMPLETION_INITIALIZER_ONSTACK(symb.completion), \ | ||
| 30 | 0 \ | ||
| 31 | } | ||
| 32 | |||
| 33 | static LIST_HEAD(task_release_list); | ||
| 34 | static DEFINE_MUTEX(task_release_lock); | ||
| 35 | |||
| 36 | static long do_wait_for_ts_release(void) | ||
| 37 | { | ||
| 38 | DECLARE_TS_RELEASE_WAIT(wait); | ||
| 39 | |||
| 40 | long ret = -ERESTARTSYS; | ||
| 41 | |||
| 42 | if (mutex_lock_interruptible(&task_release_lock)) | ||
| 43 | goto out; | ||
| 44 | |||
| 45 | list_add(&wait.list, &task_release_list); | ||
| 46 | |||
| 47 | mutex_unlock(&task_release_lock); | ||
| 48 | |||
| 49 | /* We are enqueued, now we wait for someone to wake us up. */ | ||
| 50 | ret = wait_for_completion_interruptible(&wait.completion); | ||
| 51 | |||
| 52 | if (!ret) { | ||
| 53 | /* Completion succeeded, setup release. */ | ||
| 54 | litmus->release_at(current, wait.ts_release_time | ||
| 55 | + current->rt_param.task_params.phase | ||
| 56 | - current->rt_param.task_params.period); | ||
| 57 | /* trigger advance to next job release at the programmed time */ | ||
| 58 | ret = complete_job(); | ||
| 59 | } else { | ||
| 60 | /* We were interrupted, must cleanup list. */ | ||
| 61 | mutex_lock(&task_release_lock); | ||
| 62 | if (!wait.completion.done) | ||
| 63 | list_del(&wait.list); | ||
| 64 | mutex_unlock(&task_release_lock); | ||
| 65 | } | ||
| 66 | |||
| 67 | out: | ||
| 68 | return ret; | ||
| 69 | } | ||
| 70 | |||
| 71 | int count_tasks_waiting_for_release(void) | ||
| 72 | { | ||
| 73 | int task_count = 0; | ||
| 74 | struct list_head *pos; | ||
| 75 | |||
| 76 | mutex_lock(&task_release_lock); | ||
| 77 | |||
| 78 | list_for_each(pos, &task_release_list) { | ||
| 79 | task_count++; | ||
| 80 | } | ||
| 81 | |||
| 82 | mutex_unlock(&task_release_lock); | ||
| 83 | |||
| 84 | |||
| 85 | return task_count; | ||
| 86 | } | ||
| 87 | |||
| 88 | static long do_release_ts(lt_t start) | ||
| 89 | { | ||
| 90 | long task_count = 0; | ||
| 91 | |||
| 92 | struct list_head *pos, *safe; | ||
| 93 | struct ts_release_wait *wait; | ||
| 94 | |||
| 95 | if (mutex_lock_interruptible(&task_release_lock)) { | ||
| 96 | task_count = -ERESTARTSYS; | ||
| 97 | goto out; | ||
| 98 | } | ||
| 99 | |||
| 100 | TRACE("<<<<<< synchronous task system release >>>>>>\n"); | ||
| 101 | sched_trace_sys_release(&start); | ||
| 102 | |||
| 103 | task_count = 0; | ||
| 104 | list_for_each_safe(pos, safe, &task_release_list) { | ||
| 105 | wait = (struct ts_release_wait*) | ||
| 106 | list_entry(pos, struct ts_release_wait, list); | ||
| 107 | |||
| 108 | task_count++; | ||
| 109 | wait->ts_release_time = start; | ||
| 110 | complete(&wait->completion); | ||
| 111 | } | ||
| 112 | |||
| 113 | /* clear stale list */ | ||
| 114 | INIT_LIST_HEAD(&task_release_list); | ||
| 115 | |||
| 116 | mutex_unlock(&task_release_lock); | ||
| 117 | |||
| 118 | out: | ||
| 119 | return task_count; | ||
| 120 | } | ||
| 121 | |||
| 122 | |||
| 123 | asmlinkage long sys_wait_for_ts_release(void) | ||
| 124 | { | ||
| 125 | long ret = -EPERM; | ||
| 126 | struct task_struct *t = current; | ||
| 127 | |||
| 128 | if (is_realtime(t)) | ||
| 129 | ret = do_wait_for_ts_release(); | ||
| 130 | |||
| 131 | return ret; | ||
| 132 | } | ||
| 133 | |||
| 134 | #define ONE_MS 1000000 | ||
| 135 | |||
| 136 | asmlinkage long sys_release_ts(lt_t __user *__delay) | ||
| 137 | { | ||
| 138 | long ret; | ||
| 139 | lt_t delay; | ||
| 140 | lt_t start_time; | ||
| 141 | |||
| 142 | /* FIXME: check capabilities... */ | ||
| 143 | |||
| 144 | ret = copy_from_user(&delay, __delay, sizeof(delay)); | ||
| 145 | if (ret == 0) { | ||
| 146 | /* round up to next larger integral millisecond */ | ||
| 147 | start_time = ((litmus_clock() / ONE_MS) + 1) * ONE_MS; | ||
| 148 | ret = do_release_ts(start_time + delay); | ||
| 149 | } | ||
| 150 | |||
| 151 | return ret; | ||
| 152 | } | ||
diff --git a/litmus/trace.c b/litmus/trace.c new file mode 100644 index 00000000000..7dbb98e4a3c --- /dev/null +++ b/litmus/trace.c | |||
| @@ -0,0 +1,300 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/uaccess.h> | ||
| 4 | |||
| 5 | #include <litmus/ftdev.h> | ||
| 6 | #include <litmus/litmus.h> | ||
| 7 | #include <litmus/trace.h> | ||
| 8 | |||
| 9 | /******************************************************************************/ | ||
| 10 | /* Allocation */ | ||
| 11 | /******************************************************************************/ | ||
| 12 | |||
| 13 | static struct ftdev overhead_dev; | ||
| 14 | |||
| 15 | #define trace_ts_buf overhead_dev.minor[0].buf | ||
| 16 | |||
| 17 | static unsigned int ts_seq_no = 0; | ||
| 18 | |||
| 19 | DEFINE_PER_CPU(atomic_t, irq_fired_count); | ||
| 20 | |||
| 21 | void ft_irq_fired(void) | ||
| 22 | { | ||
| 23 | /* Only called with preemptions disabled. */ | ||
| 24 | atomic_inc(&__get_cpu_var(irq_fired_count)); | ||
| 25 | |||
| 26 | if (has_control_page(current)) | ||
| 27 | get_control_page(current)->irq_count++; | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline void clear_irq_fired(void) | ||
| 31 | { | ||
| 32 | atomic_set(&__raw_get_cpu_var(irq_fired_count), 0); | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline unsigned int get_and_clear_irq_fired(void) | ||
| 36 | { | ||
| 37 | /* This is potentially not atomic since we might migrate if | ||
| 38 | * preemptions are not disabled. As a tradeoff between | ||
| 39 | * accuracy and tracing overheads, this seems acceptable. | ||
| 40 | * If it proves to be a problem, then one could add a callback | ||
| 41 | * from the migration code to invalidate irq_fired_count. | ||
| 42 | */ | ||
| 43 | return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0); | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void save_irq_flags(struct timestamp *ts, unsigned int irq_count) | ||
| 47 | { | ||
| 48 | /* Store how many interrupts occurred. */ | ||
| 49 | ts->irq_count = irq_count; | ||
| 50 | /* Extra flag because ts->irq_count overflows quickly. */ | ||
| 51 | ts->irq_flag = irq_count > 0; | ||
| 52 | |||
| 53 | } | ||
| 54 | |||
| 55 | static inline void write_timestamp(uint8_t event, | ||
| 56 | uint8_t type, | ||
| 57 | uint8_t cpu, | ||
| 58 | uint16_t pid_fragment, | ||
| 59 | unsigned int irq_count, | ||
| 60 | int record_irq, | ||
| 61 | int hide_irq, | ||
| 62 | uint64_t timestamp, | ||
| 63 | int record_timestamp) | ||
| 64 | { | ||
| 65 | unsigned long flags; | ||
| 66 | unsigned int seq_no; | ||
| 67 | struct timestamp *ts; | ||
| 68 | |||
| 69 | /* Avoid preemptions while recording the timestamp. This reduces the | ||
| 70 | * number of "out of order" timestamps in the stream and makes | ||
| 71 | * post-processing easier. */ | ||
| 72 | |||
| 73 | local_irq_save(flags); | ||
| 74 | |||
| 75 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
| 76 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
| 77 | ts->event = event; | ||
| 78 | ts->seq_no = seq_no; | ||
| 79 | |||
| 80 | ts->task_type = type; | ||
| 81 | ts->pid = pid_fragment; | ||
| 82 | |||
| 83 | ts->cpu = cpu; | ||
| 84 | |||
| 85 | if (record_irq) | ||
| 86 | irq_count = get_and_clear_irq_fired(); | ||
| 87 | |||
| 88 | save_irq_flags(ts, irq_count - hide_irq); | ||
| 89 | |||
| 90 | if (record_timestamp) | ||
| 91 | timestamp = ft_timestamp(); | ||
| 92 | |||
| 93 | ts->timestamp = timestamp; | ||
| 94 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
| 95 | } | ||
| 96 | |||
| 97 | local_irq_restore(flags); | ||
| 98 | } | ||
| 99 | |||
| 100 | static void __add_timestamp_user(struct timestamp *pre_recorded) | ||
| 101 | { | ||
| 102 | unsigned long flags; | ||
| 103 | unsigned int seq_no; | ||
| 104 | struct timestamp *ts; | ||
| 105 | |||
| 106 | |||
| 107 | local_irq_save(flags); | ||
| 108 | |||
| 109 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
| 110 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
| 111 | *ts = *pre_recorded; | ||
| 112 | ts->seq_no = seq_no; | ||
| 113 | ts->cpu = raw_smp_processor_id(); | ||
| 114 | save_irq_flags(ts, get_and_clear_irq_fired()); | ||
| 115 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
| 116 | } | ||
| 117 | |||
| 118 | local_irq_restore(flags); | ||
| 119 | } | ||
| 120 | |||
| 121 | feather_callback void save_timestamp(unsigned long event) | ||
| 122 | { | ||
| 123 | write_timestamp(event, TSK_UNKNOWN, | ||
| 124 | raw_smp_processor_id(), | ||
| 125 | current->pid, | ||
| 126 | 0, 1, 0, | ||
| 127 | 0, 1); | ||
| 128 | } | ||
| 129 | |||
| 130 | feather_callback void save_timestamp_def(unsigned long event, | ||
| 131 | unsigned long type) | ||
| 132 | { | ||
| 133 | write_timestamp(event, type, | ||
| 134 | raw_smp_processor_id(), | ||
| 135 | current->pid, | ||
| 136 | 0, 1, 0, | ||
| 137 | 0, 1); | ||
| 138 | } | ||
| 139 | |||
| 140 | feather_callback void save_timestamp_task(unsigned long event, | ||
| 141 | unsigned long t_ptr) | ||
| 142 | { | ||
| 143 | struct task_struct *t = (struct task_struct *) t_ptr; | ||
| 144 | int rt = is_realtime(t); | ||
| 145 | |||
| 146 | write_timestamp(event, rt ? TSK_RT : TSK_BE, | ||
| 147 | raw_smp_processor_id(), | ||
| 148 | t->pid, | ||
| 149 | 0, 1, 0, | ||
| 150 | 0, 1); | ||
| 151 | } | ||
| 152 | |||
| 153 | feather_callback void save_timestamp_cpu(unsigned long event, | ||
| 154 | unsigned long cpu) | ||
| 155 | { | ||
| 156 | write_timestamp(event, TSK_UNKNOWN, cpu, current->pid, | ||
| 157 | 0, 1, 0, | ||
| 158 | 0, 1); | ||
| 159 | } | ||
| 160 | |||
| 161 | feather_callback void save_task_latency(unsigned long event, | ||
| 162 | unsigned long when_ptr) | ||
| 163 | { | ||
| 164 | lt_t now = litmus_clock(); | ||
| 165 | lt_t *when = (lt_t*) when_ptr; | ||
| 166 | |||
| 167 | write_timestamp(event, TSK_RT, raw_smp_processor_id(), 0, | ||
| 168 | 0, 1, 0, | ||
| 169 | now - *when, 0); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* fake timestamp to user-reported time */ | ||
| 173 | feather_callback void save_timestamp_time(unsigned long event, | ||
| 174 | unsigned long ptr) | ||
| 175 | { | ||
| 176 | uint64_t* time = (uint64_t*) ptr; | ||
| 177 | |||
| 178 | write_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE, | ||
| 179 | raw_smp_processor_id(), current->pid, | ||
| 180 | 0, 1, 0, | ||
| 181 | *time, 0); | ||
| 182 | } | ||
| 183 | |||
| 184 | /* Record user-reported IRQ count */ | ||
| 185 | feather_callback void save_timestamp_irq(unsigned long event, | ||
| 186 | unsigned long irq_counter_ptr) | ||
| 187 | { | ||
| 188 | uint64_t* irqs = (uint64_t*) irq_counter_ptr; | ||
| 189 | |||
| 190 | write_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE, | ||
| 191 | raw_smp_processor_id(), current->pid, | ||
| 192 | *irqs, 0, 0, | ||
| 193 | 0, 1); | ||
| 194 | } | ||
| 195 | |||
| 196 | /* Suppress one IRQ from the irq count. Used by TS_SEND_RESCHED_END, which is | ||
| 197 | * called from within an interrupt that is expected. */ | ||
| 198 | feather_callback void save_timestamp_hide_irq(unsigned long event) | ||
| 199 | { | ||
| 200 | write_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE, | ||
| 201 | raw_smp_processor_id(), current->pid, | ||
| 202 | 0, 1, 1, | ||
| 203 | 0, 1); | ||
| 204 | } | ||
| 205 | |||
| 206 | /******************************************************************************/ | ||
| 207 | /* DEVICE FILE DRIVER */ | ||
| 208 | /******************************************************************************/ | ||
| 209 | |||
| 210 | /* | ||
| 211 | * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) | ||
| 212 | * and we might not get as much | ||
| 213 | */ | ||
| 214 | #define NO_TIMESTAMPS (2 << 16) | ||
| 215 | |||
| 216 | static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
| 217 | { | ||
| 218 | unsigned int count = NO_TIMESTAMPS; | ||
| 219 | |||
| 220 | /* An overhead-tracing timestamp should be exactly 16 bytes long. */ | ||
| 221 | BUILD_BUG_ON(sizeof(struct timestamp) != 16); | ||
| 222 | |||
| 223 | while (count && !trace_ts_buf) { | ||
| 224 | printk("time stamp buffer: trying to allocate %u time stamps.\n", count); | ||
| 225 | ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); | ||
| 226 | count /= 2; | ||
| 227 | } | ||
| 228 | return ftdev->minor[idx].buf ? 0 : -ENOMEM; | ||
| 229 | } | ||
| 230 | |||
| 231 | static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
| 232 | { | ||
| 233 | free_ft_buffer(ftdev->minor[idx].buf); | ||
| 234 | ftdev->minor[idx].buf = NULL; | ||
| 235 | } | ||
| 236 | |||
| 237 | static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len, | ||
| 238 | const char __user *from) | ||
| 239 | { | ||
| 240 | ssize_t consumed = 0; | ||
| 241 | struct timestamp ts; | ||
| 242 | |||
| 243 | /* don't give us partial timestamps */ | ||
| 244 | if (len % sizeof(ts)) | ||
| 245 | return -EINVAL; | ||
| 246 | |||
| 247 | while (len >= sizeof(ts)) { | ||
| 248 | if (copy_from_user(&ts, from, sizeof(ts))) { | ||
| 249 | consumed = -EFAULT; | ||
| 250 | goto out; | ||
| 251 | } | ||
| 252 | len -= sizeof(ts); | ||
| 253 | from += sizeof(ts); | ||
| 254 | consumed += sizeof(ts); | ||
| 255 | |||
| 256 | __add_timestamp_user(&ts); | ||
| 257 | } | ||
| 258 | |||
| 259 | out: | ||
| 260 | return consumed; | ||
| 261 | } | ||
| 262 | |||
| 263 | static int __init init_ft_overhead_trace(void) | ||
| 264 | { | ||
| 265 | int err, cpu; | ||
| 266 | |||
| 267 | printk("Initializing Feather-Trace overhead tracing device.\n"); | ||
| 268 | err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace"); | ||
| 269 | if (err) | ||
| 270 | goto err_out; | ||
| 271 | |||
| 272 | overhead_dev.alloc = alloc_timestamp_buffer; | ||
| 273 | overhead_dev.free = free_timestamp_buffer; | ||
| 274 | overhead_dev.write = write_timestamp_from_user; | ||
| 275 | |||
| 276 | err = register_ftdev(&overhead_dev); | ||
| 277 | if (err) | ||
| 278 | goto err_dealloc; | ||
| 279 | |||
| 280 | /* initialize IRQ flags */ | ||
| 281 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
| 282 | clear_irq_fired(); | ||
| 283 | } | ||
| 284 | |||
| 285 | return 0; | ||
| 286 | |||
| 287 | err_dealloc: | ||
| 288 | ftdev_exit(&overhead_dev); | ||
| 289 | err_out: | ||
| 290 | printk(KERN_WARNING "Could not register ft_trace module.\n"); | ||
| 291 | return err; | ||
| 292 | } | ||
| 293 | |||
| 294 | static void __exit exit_ft_overhead_trace(void) | ||
| 295 | { | ||
| 296 | ftdev_exit(&overhead_dev); | ||
| 297 | } | ||
| 298 | |||
| 299 | module_init(init_ft_overhead_trace); | ||
| 300 | module_exit(exit_ft_overhead_trace); | ||
diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c new file mode 100644 index 00000000000..06a6a7c1798 --- /dev/null +++ b/litmus/uncachedev.c | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | #include <linux/kernel.h> | ||
| 3 | #include <linux/mm.h> | ||
| 4 | #include <linux/fs.h> | ||
| 5 | #include <linux/errno.h> | ||
| 6 | #include <linux/highmem.h> | ||
| 7 | #include <asm/page.h> | ||
| 8 | #include <linux/miscdevice.h> | ||
| 9 | #include <linux/module.h> | ||
| 10 | |||
| 11 | #include <litmus/litmus.h> | ||
| 12 | |||
| 13 | /* device for allocating pages not cached by the CPU */ | ||
| 14 | |||
| 15 | #define UNCACHE_NAME "litmus/uncache" | ||
| 16 | |||
| 17 | void litmus_uncache_vm_open(struct vm_area_struct *vma) | ||
| 18 | { | ||
| 19 | } | ||
| 20 | |||
| 21 | void litmus_uncache_vm_close(struct vm_area_struct *vma) | ||
| 22 | { | ||
| 23 | } | ||
| 24 | |||
| 25 | int litmus_uncache_vm_fault(struct vm_area_struct* vma, | ||
| 26 | struct vm_fault* vmf) | ||
| 27 | { | ||
| 28 | /* modeled after SG DMA video4linux, but without DMA. */ | ||
| 29 | /* (see drivers/media/video/videobuf-dma-sg.c) */ | ||
| 30 | struct page *page; | ||
| 31 | |||
| 32 | page = alloc_page(GFP_USER); | ||
| 33 | if (!page) | ||
| 34 | return VM_FAULT_OOM; | ||
| 35 | |||
| 36 | clear_user_highpage(page, (unsigned long)vmf->virtual_address); | ||
| 37 | vmf->page = page; | ||
| 38 | |||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | static struct vm_operations_struct litmus_uncache_vm_ops = { | ||
| 43 | .open = litmus_uncache_vm_open, | ||
| 44 | .close = litmus_uncache_vm_close, | ||
| 45 | .fault = litmus_uncache_vm_fault, | ||
| 46 | }; | ||
| 47 | |||
| 48 | static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma) | ||
| 49 | { | ||
| 50 | /* first make sure mapper knows what he's doing */ | ||
| 51 | |||
| 52 | /* you can only map the "first" page */ | ||
| 53 | if (vma->vm_pgoff != 0) | ||
| 54 | return -EINVAL; | ||
| 55 | |||
| 56 | /* you can't share it with anyone */ | ||
| 57 | if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) | ||
| 58 | return -EINVAL; | ||
| 59 | |||
| 60 | /* cannot be expanded, and is not a "normal" page. */ | ||
| 61 | vma->vm_flags |= VM_DONTEXPAND; | ||
| 62 | |||
| 63 | /* noncached pages are not explicitly locked in memory (for now). */ | ||
| 64 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
| 65 | |||
| 66 | vma->vm_ops = &litmus_uncache_vm_ops; | ||
| 67 | |||
| 68 | return 0; | ||
| 69 | } | ||
| 70 | |||
| 71 | static struct file_operations litmus_uncache_fops = { | ||
| 72 | .owner = THIS_MODULE, | ||
| 73 | .mmap = litmus_uncache_mmap, | ||
| 74 | }; | ||
| 75 | |||
| 76 | static struct miscdevice litmus_uncache_dev = { | ||
| 77 | .name = UNCACHE_NAME, | ||
| 78 | .minor = MISC_DYNAMIC_MINOR, | ||
| 79 | .fops = &litmus_uncache_fops, | ||
| 80 | /* pages are not locked, so there is no reason why | ||
| 81 | anyone cannot allocate an uncache pages */ | ||
| 82 | .mode = (S_IRUGO | S_IWUGO), | ||
| 83 | }; | ||
| 84 | |||
| 85 | static int __init init_litmus_uncache_dev(void) | ||
| 86 | { | ||
| 87 | int err; | ||
| 88 | |||
| 89 | printk("Initializing LITMUS^RT uncache device.\n"); | ||
| 90 | err = misc_register(&litmus_uncache_dev); | ||
| 91 | if (err) | ||
| 92 | printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err); | ||
| 93 | return err; | ||
| 94 | } | ||
| 95 | |||
| 96 | static void __exit exit_litmus_uncache_dev(void) | ||
| 97 | { | ||
| 98 | misc_deregister(&litmus_uncache_dev); | ||
| 99 | } | ||
| 100 | |||
| 101 | module_init(init_litmus_uncache_dev); | ||
| 102 | module_exit(exit_litmus_uncache_dev); | ||
