aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/vm/page-types.c71
-rw-r--r--arch/sparc/Kconfig9
-rw-r--r--arch/sparc/include/asm/spinlock_64.h92
-rw-r--r--arch/sparc/include/asm/spinlock_types.h5
-rw-r--r--arch/sparc/include/asm/unistd.h6
-rw-r--r--arch/sparc/kernel/systbls_32.S3
-rw-r--r--arch/sparc/kernel/systbls_64.S8
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c16
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--include/linux/hrtimer.h2
-rw-r--r--include/litmus/budget.h43
-rw-r--r--include/litmus/ce_domain.h27
-rw-r--r--include/litmus/color.h51
-rw-r--r--include/litmus/dgl.h65
-rw-r--r--include/litmus/domain.h50
-rw-r--r--include/litmus/event_group.h91
-rw-r--r--include/litmus/fifo_common.h25
-rw-r--r--include/litmus/litmus.h47
-rw-r--r--include/litmus/locking.h1
-rw-r--r--include/litmus/preempt.h4
-rw-r--r--include/litmus/rm_common.h25
-rw-r--r--include/litmus/rt_domain.h30
-rw-r--r--include/litmus/rt_param.h56
-rw-r--r--include/litmus/rt_server.h31
-rw-r--r--include/litmus/sched_mc.h134
-rw-r--r--include/litmus/sched_plugin.h7
-rw-r--r--include/litmus/sched_trace.h20
-rw-r--r--include/litmus/trace.h27
-rw-r--r--include/litmus/unistd_32.h3
-rw-r--r--include/litmus/unistd_64.h4
-rw-r--r--include/trace/events/litmus.h92
-rw-r--r--kernel/hrtimer.c32
-rw-r--r--kernel/sched.c6
-rw-r--r--litmus/Kconfig79
-rw-r--r--litmus/Makefile35
-rw-r--r--litmus/bheap.c3
-rw-r--r--litmus/budget.c55
-rw-r--r--litmus/ce_domain.c102
-rw-r--r--litmus/color.c357
-rw-r--r--litmus/color_dev.c351
-rw-r--r--litmus/color_proc.c220
-rw-r--r--litmus/dgl.c300
-rw-r--r--litmus/domain.c21
-rw-r--r--litmus/event_group.c334
-rw-r--r--litmus/fifo_common.c58
-rw-r--r--litmus/ftdev.c10
-rw-r--r--litmus/jobs.c15
-rw-r--r--litmus/litmus.c142
-rw-r--r--litmus/locking.c8
-rw-r--r--litmus/preempt.c3
-rw-r--r--litmus/rm_common.c91
-rw-r--r--litmus/rt_domain.c246
-rw-r--r--litmus/rt_server.c23
-rw-r--r--litmus/sched_color.c889
-rw-r--r--litmus/sched_gsn_edf.c6
-rw-r--r--litmus/sched_mc.c1373
-rw-r--r--litmus/sched_mc_ce.c1052
-rw-r--r--litmus/sched_plugin.c6
-rw-r--r--litmus/sched_psn_edf.c3
-rw-r--r--litmus/sched_task_trace.c13
-rw-r--r--litmus/sync.c3
-rw-r--r--litmus/trace.c51
-rw-r--r--mm/memory.c35
63 files changed, 6686 insertions, 282 deletions
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 7445caa26d05..fe230def50d6 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -36,6 +36,8 @@
36#include <sys/statfs.h> 36#include <sys/statfs.h>
37#include "../../include/linux/magic.h" 37#include "../../include/linux/magic.h"
38 38
39#define COLOR_MASK 0x1fULL
40
39 41
40#ifndef MAX_PATH 42#ifndef MAX_PATH
41# define MAX_PATH 256 43# define MAX_PATH 256
@@ -181,13 +183,13 @@ static pid_t opt_pid; /* process to walk */
181 183
182#define MAX_ADDR_RANGES 1024 184#define MAX_ADDR_RANGES 1024
183static int nr_addr_ranges; 185static int nr_addr_ranges;
184static unsigned long opt_offset[MAX_ADDR_RANGES]; 186static unsigned long long opt_offset[MAX_ADDR_RANGES];
185static unsigned long opt_size[MAX_ADDR_RANGES]; 187static unsigned long long opt_size[MAX_ADDR_RANGES];
186 188
187#define MAX_VMAS 10240 189#define MAX_VMAS 10240
188static int nr_vmas; 190static int nr_vmas;
189static unsigned long pg_start[MAX_VMAS]; 191static unsigned long long pg_start[MAX_VMAS];
190static unsigned long pg_end[MAX_VMAS]; 192static unsigned long long pg_end[MAX_VMAS];
191 193
192#define MAX_BIT_FILTERS 64 194#define MAX_BIT_FILTERS 64
193static int nr_bit_filters; 195static int nr_bit_filters;
@@ -259,21 +261,26 @@ static int checked_open(const char *pathname, int flags)
259 return fd; 261 return fd;
260} 262}
261 263
264#define _LARGEFILE64_SOURCE
265
262/* 266/*
263 * pagemap/kpageflags routines 267 * pagemap/kpageflags routines
264 */ 268 */
265 269
266static unsigned long do_u64_read(int fd, char *name, 270static unsigned long do_u64_read(int fd, char *name,
267 uint64_t *buf, 271 uint64_t *buf,
268 unsigned long index, 272 unsigned long long index,
269 unsigned long count) 273 unsigned long count)
270{ 274{
271 long bytes; 275 long bytes;
276 long long lseek_ret;
272 277
273 if (index > ULONG_MAX / 8) 278 if (index > ULLONG_MAX / 8)
274 fatal("index overflow: %lu\n", index); 279 fatal("index overflow: %llu\n", index);
275 280
276 if (lseek(fd, index * 8, SEEK_SET) < 0) { 281
282 lseek_ret = lseek64(fd, index * 8, SEEK_SET);
283 if (lseek_ret < 0) {
277 perror(name); 284 perror(name);
278 exit(EXIT_FAILURE); 285 exit(EXIT_FAILURE);
279 } 286 }
@@ -290,14 +297,14 @@ static unsigned long do_u64_read(int fd, char *name,
290} 297}
291 298
292static unsigned long kpageflags_read(uint64_t *buf, 299static unsigned long kpageflags_read(uint64_t *buf,
293 unsigned long index, 300 unsigned long long index,
294 unsigned long pages) 301 unsigned long pages)
295{ 302{
296 return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); 303 return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
297} 304}
298 305
299static unsigned long pagemap_read(uint64_t *buf, 306static unsigned long pagemap_read(uint64_t *buf,
300 unsigned long index, 307 unsigned long long index,
301 unsigned long pages) 308 unsigned long pages)
302{ 309{
303 return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); 310 return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
@@ -364,7 +371,7 @@ static char *page_flag_longname(uint64_t flags)
364 */ 371 */
365 372
366static void show_page_range(unsigned long voffset, 373static void show_page_range(unsigned long voffset,
367 unsigned long offset, uint64_t flags) 374 unsigned long long offset, uint64_t flags)
368{ 375{
369 static uint64_t flags0; 376 static uint64_t flags0;
370 static unsigned long voff; 377 static unsigned long voff;
@@ -391,11 +398,11 @@ static void show_page_range(unsigned long voffset,
391} 398}
392 399
393static void show_page(unsigned long voffset, 400static void show_page(unsigned long voffset,
394 unsigned long offset, uint64_t flags) 401 unsigned long long offset, uint64_t flags)
395{ 402{
396 if (opt_pid) 403 if (opt_pid)
397 printf("%lx\t", voffset); 404 printf("%lx\t", voffset);
398 printf("%lx\t%s\n", offset, page_flag_name(flags)); 405 printf("%llx (%llu)\t%s\n", offset, offset & COLOR_MASK, page_flag_name(flags));
399} 406}
400 407
401static void show_summary(void) 408static void show_summary(void)
@@ -640,7 +647,7 @@ static int hash_slot(uint64_t flags)
640} 647}
641 648
642static void add_page(unsigned long voffset, 649static void add_page(unsigned long voffset,
643 unsigned long offset, uint64_t flags) 650 unsigned long long offset, uint64_t flags)
644{ 651{
645 flags = kpageflags_flags(flags); 652 flags = kpageflags_flags(flags);
646 653
@@ -663,7 +670,7 @@ static void add_page(unsigned long voffset,
663 670
664#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ 671#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */
665static void walk_pfn(unsigned long voffset, 672static void walk_pfn(unsigned long voffset,
666 unsigned long index, 673 unsigned long long index,
667 unsigned long count) 674 unsigned long count)
668{ 675{
669 uint64_t buf[KPAGEFLAGS_BATCH]; 676 uint64_t buf[KPAGEFLAGS_BATCH];
@@ -686,10 +693,10 @@ static void walk_pfn(unsigned long voffset,
686} 693}
687 694
688#define PAGEMAP_BATCH (64 << 10) 695#define PAGEMAP_BATCH (64 << 10)
689static void walk_vma(unsigned long index, unsigned long count) 696static void walk_vma(unsigned long long index, unsigned long count)
690{ 697{
691 uint64_t buf[PAGEMAP_BATCH]; 698 uint64_t buf[PAGEMAP_BATCH];
692 unsigned long batch; 699 unsigned long long batch;
693 unsigned long pages; 700 unsigned long pages;
694 unsigned long pfn; 701 unsigned long pfn;
695 unsigned long i; 702 unsigned long i;
@@ -711,10 +718,10 @@ static void walk_vma(unsigned long index, unsigned long count)
711 } 718 }
712} 719}
713 720
714static void walk_task(unsigned long index, unsigned long count) 721static void walk_task(unsigned long long index, unsigned long long count)
715{ 722{
716 const unsigned long end = index + count; 723 const unsigned long long end = index + count;
717 unsigned long start; 724 unsigned long long start;
718 int i = 0; 725 int i = 0;
719 726
720 while (index < end) { 727 while (index < end) {
@@ -725,21 +732,21 @@ static void walk_task(unsigned long index, unsigned long count)
725 if (pg_start[i] >= end) 732 if (pg_start[i] >= end)
726 return; 733 return;
727 734
728 start = max_t(unsigned long, pg_start[i], index); 735 start = max_t(unsigned long long, pg_start[i], index);
729 index = min_t(unsigned long, pg_end[i], end); 736 index = min_t(unsigned long long, pg_end[i], end);
730 737
731 assert(start < index); 738 assert(start < index);
732 walk_vma(start, index - start); 739 walk_vma(start, index - start);
733 } 740 }
734} 741}
735 742
736static void add_addr_range(unsigned long offset, unsigned long size) 743static void add_addr_range(unsigned long long offset, unsigned long long size)
737{ 744{
738 if (nr_addr_ranges >= MAX_ADDR_RANGES) 745 if (nr_addr_ranges >= MAX_ADDR_RANGES)
739 fatal("too many addr ranges\n"); 746 fatal("too many addr ranges\n");
740 747
741 opt_offset[nr_addr_ranges] = offset; 748 opt_offset[nr_addr_ranges] = offset;
742 opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); 749 opt_size[nr_addr_ranges] = min_t(unsigned long long, size, ULLONG_MAX-offset);
743 nr_addr_ranges++; 750 nr_addr_ranges++;
744} 751}
745 752
@@ -750,7 +757,7 @@ static void walk_addr_ranges(void)
750 kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); 757 kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
751 758
752 if (!nr_addr_ranges) 759 if (!nr_addr_ranges)
753 add_addr_range(0, ULONG_MAX); 760 add_addr_range(0, ULLONG_MAX);
754 761
755 for (i = 0; i < nr_addr_ranges; i++) 762 for (i = 0; i < nr_addr_ranges; i++)
756 if (!opt_pid) 763 if (!opt_pid)
@@ -857,15 +864,15 @@ static void parse_pid(const char *str)
857 } 864 }
858 865
859 while (fgets(buf, sizeof(buf), file) != NULL) { 866 while (fgets(buf, sizeof(buf), file) != NULL) {
860 unsigned long vm_start; 867 unsigned long long vm_start;
861 unsigned long vm_end; 868 unsigned long long vm_end;
862 unsigned long long pgoff; 869 unsigned long long pgoff;
863 int major, minor; 870 int major, minor;
864 char r, w, x, s; 871 char r, w, x, s;
865 unsigned long ino; 872 unsigned long ino;
866 int n; 873 int n;
867 874
868 n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", 875 n = sscanf(buf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
869 &vm_start, 876 &vm_start,
870 &vm_end, 877 &vm_end,
871 &r, &w, &x, &s, 878 &r, &w, &x, &s,
@@ -892,8 +899,8 @@ static void parse_file(const char *name)
892 899
893static void parse_addr_range(const char *optarg) 900static void parse_addr_range(const char *optarg)
894{ 901{
895 unsigned long offset; 902 unsigned long long offset;
896 unsigned long size; 903 unsigned long long size;
897 char *p; 904 char *p;
898 905
899 p = strchr(optarg, ','); 906 p = strchr(optarg, ',');
@@ -906,12 +913,12 @@ static void parse_addr_range(const char *optarg)
906 } else if (p) { 913 } else if (p) {
907 offset = parse_number(optarg); 914 offset = parse_number(optarg);
908 if (p[1] == '\0') 915 if (p[1] == '\0')
909 size = ULONG_MAX; 916 size = ULLONG_MAX;
910 else { 917 else {
911 size = parse_number(p + 1); 918 size = parse_number(p + 1);
912 if (*p == ',') { 919 if (*p == ',') {
913 if (size < offset) 920 if (size < offset)
914 fatal("invalid range: %lu,%lu\n", 921 fatal("invalid range: %llu,%llu\n",
915 offset, size); 922 offset, size);
916 size -= offset; 923 size -= offset;
917 } 924 }
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 253986bd6bb6..6bc40d9e3007 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -606,3 +606,12 @@ source "security/Kconfig"
606source "crypto/Kconfig" 606source "crypto/Kconfig"
607 607
608source "lib/Kconfig" 608source "lib/Kconfig"
609
610config ARCH_HAS_FEATHER_TRACE
611 def_bool n
612
613# Probably add these later
614config ARCH_HAS_SEND_PULL_TIMERS
615 def_bool n
616
617source "litmus/Kconfig"
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 073936a8b275..0415e1867e92 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -13,82 +13,72 @@
13 * and rebuild your kernel. 13 * and rebuild your kernel.
14 */ 14 */
15 15
16/* Because we play games to save cycles in the non-contention case, we 16#define arch_spin_is_locked(lp) ((lp)->tail != (lp)->head)
17 * need to be extra careful about branch targets into the "spinning"
18 * code. They live in their own section, but the newer V9 branches
19 * have a shorter range than the traditional 32-bit sparc branch
20 * variants. The rule is that the branches that go into and out of
21 * the spinner sections must be pre-V9 branches.
22 */
23
24#define arch_spin_is_locked(lp) ((lp)->lock != 0)
25 17
26#define arch_spin_unlock_wait(lp) \ 18#define arch_spin_unlock_wait(lp) \
27 do { rmb(); \ 19 do { rmb(); \
28 } while((lp)->lock) 20 } while((lp)->tail != (lp)->head)
29 21
30static inline void arch_spin_lock(arch_spinlock_t *lock) 22static inline void arch_spin_lock(arch_spinlock_t *lock)
31{ 23{
32 unsigned long tmp; 24 int ticket, tmp;
33 25
34 __asm__ __volatile__( 26 __asm__ __volatile__(
35"1: ldstub [%1], %0\n" 27"1: lduw [%2], %0 \n" /* read ticket */
36" brnz,pn %0, 2f\n" 28" add %0, 1, %1 \n"
37" nop\n" 29" cas [%2], %0, %1 \n"
38" .subsection 2\n" 30" cmp %0, %1 \n"
39"2: ldub [%1], %0\n" 31" be,a,pt %%icc, 2f \n"
40" brnz,pt %0, 2b\n" 32" nop \n"
41" nop\n" 33" ba 1b\n"
42" ba,a,pt %%xcc, 1b\n" 34" nop \n"
43" .previous" 35"2: lduw [%3], %1 \n"
44 : "=&r" (tmp) 36" cmp %0, %1 \n"
45 : "r" (lock) 37" be,a,pt %%icc, 3f \n"
38" nop \n"
39" ba 2b\n"
40"3: nop"
41 : "=&r" (ticket), "=&r" (tmp)
42 : "r" (&lock->tail), "r" (&lock->head)
46 : "memory"); 43 : "memory");
47} 44}
48 45
49static inline int arch_spin_trylock(arch_spinlock_t *lock) 46static inline int arch_spin_trylock(arch_spinlock_t *lock)
50{ 47{
51 unsigned long result; 48 int tail, head;
52
53 __asm__ __volatile__( 49 __asm__ __volatile__(
54" ldstub [%1], %0\n" 50" lduw [%2], %0 \n" /* read tail */
55 : "=r" (result) 51" lduw [%3], %1 \n" /* read head */
56 : "r" (lock) 52" cmp %0, %1 \n"
53" bne,a,pn %%icc, 1f \n"
54" nop \n"
55" inc %1 \n"
56" cas [%2], %0, %1 \n" /* try to inc ticket */
57"1: "
58 : "=&r" (tail), "=&r" (head)
59 : "r" (&lock->tail), "r" (&lock->head)
57 : "memory"); 60 : "memory");
58 61
59 return (result == 0UL); 62 return (tail == head);
60} 63}
61 64
62static inline void arch_spin_unlock(arch_spinlock_t *lock) 65static inline void arch_spin_unlock(arch_spinlock_t *lock)
63{ 66{
67 int tmp;
64 __asm__ __volatile__( 68 __asm__ __volatile__(
65" stb %%g0, [%0]" 69" lduw [%1], %0 \n"
66 : /* No outputs */ 70" inc %0 \n"
67 : "r" (lock) 71" st %0, [%1] \n"
72 : "=&r" (tmp)
73 : "r" (&lock->head)
68 : "memory"); 74 : "memory");
69} 75}
70 76
71static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) 77/* We don't handle this yet, but it looks like not re-enabling the interrupts
72{ 78 * works fine, too. For example, lockdep also does it like this.
73 unsigned long tmp1, tmp2; 79 */
80#define arch_spin_lock_flags(l, f) arch_spin_lock(l)
74 81
75 __asm__ __volatile__(
76"1: ldstub [%2], %0\n"
77" brnz,pn %0, 2f\n"
78" nop\n"
79" .subsection 2\n"
80"2: rdpr %%pil, %1\n"
81" wrpr %3, %%pil\n"
82"3: ldub [%2], %0\n"
83" brnz,pt %0, 3b\n"
84" nop\n"
85" ba,pt %%xcc, 1b\n"
86" wrpr %1, %%pil\n"
87" .previous"
88 : "=&r" (tmp1), "=&r" (tmp2)
89 : "r"(lock), "r"(flags)
90 : "memory");
91}
92 82
93/* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ 83/* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
94 84
diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h
index 9c454fdeaad8..49b89fe2ccfc 100644
--- a/arch/sparc/include/asm/spinlock_types.h
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -6,10 +6,11 @@
6#endif 6#endif
7 7
8typedef struct { 8typedef struct {
9 volatile unsigned char lock; 9 volatile int tail;
10 volatile int head;
10} arch_spinlock_t; 11} arch_spinlock_t;
11 12
12#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } 13#define __ARCH_SPIN_LOCK_UNLOCKED { 0, 0 }
13 14
14typedef struct { 15typedef struct {
15 volatile unsigned int lock; 16 volatile unsigned int lock;
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 6260d5deeabc..54c43b6bc1d2 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -407,7 +407,11 @@
407#define __NR_sendmmsg 336 407#define __NR_sendmmsg 336
408#define __NR_setns 337 408#define __NR_setns 337
409 409
410#define NR_syscalls 338 410#define __NR_LITMUS 338
411
412#include "litmus/unistd_32.h"
413
414#define NR_syscalls 338 + NR_litmus_syscalls
411 415
412#ifdef __32bit_syscall_numbers__ 416#ifdef __32bit_syscall_numbers__
413/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, 417/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 6e492d59f6b1..941f04216a6c 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -85,3 +85,6 @@ sys_call_table:
85/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init 85/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
86/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime 86/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
87/*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns 87/*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns
88/*338*/ sys_set_rt_task_param, sys_get_rt_task_param
89/*340*/ sys_complete_job, sys_od_open, sys_od_close, sys_litmus_lock, sys_litmus_unlock
90/*345*/ sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts, sys_null_call
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index f566518483b5..8543ae0db2d4 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,9 @@ sys_call_table32:
85/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv 85/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
86 .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init 86 .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
87/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime 87/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
88 .word sys_syncfs, compat_sys_sendmmsg, sys_setns 88 .word sys_syncfs, sys_sendmmsg, sys_setns, sys_set_rt_task_param, sys_get_rt_task_param
89/*340*/ .word sys_complete_job, sys_od_open, sys_od_close, sys_litmus_lock, sys_litmus_unlock
90 .word sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts, sys_null_call
89 91
90#endif /* CONFIG_COMPAT */ 92#endif /* CONFIG_COMPAT */
91 93
@@ -162,4 +164,6 @@ sys_call_table:
162/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv 164/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
163 .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init 165 .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
164/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime 166/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
165 .word sys_syncfs, sys_sendmmsg, sys_setns 167 .word sys_syncfs, sys_sendmmsg, sys_setns, sys_set_rt_task_param, sys_get_rt_task_param
168/*340*/ .word sys_complete_job, sys_od_open, sys_od_close, sys_litmus_lock, sys_litmus_unlock
169 .word sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts, sys_null_call
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0bf12644aa73..95d5636720a6 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1215,6 +1215,21 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1215 .notifier_call = cacheinfo_cpu_callback, 1215 .notifier_call = cacheinfo_cpu_callback,
1216}; 1216};
1217 1217
1218#include <litmus/color.h>
1219
1220static void set_color_vars(void)
1221{
1222 struct _cpuid4_info *leaf = CPUID4_INFO_IDX(
1223 smp_processor_id(), num_cache_leaves - 1);
1224 color_cache_info.size = leaf->size;
1225 color_cache_info.line_size =
1226 (unsigned long)leaf->ebx.split.coherency_line_size + 1;
1227 color_cache_info.ways =
1228 (unsigned long)leaf->ebx.split.ways_of_associativity + 1;
1229 color_cache_info.sets =
1230 (unsigned long)leaf->ecx.split.number_of_sets + 1;
1231}
1232
1218static int __cpuinit cache_sysfs_init(void) 1233static int __cpuinit cache_sysfs_init(void)
1219{ 1234{
1220 int i; 1235 int i;
@@ -1231,6 +1246,7 @@ static int __cpuinit cache_sysfs_init(void)
1231 return err; 1246 return err;
1232 } 1247 }
1233 register_hotcpu_notifier(&cacheinfo_cpu_notifier); 1248 register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1249 set_color_vars();
1234 return 0; 1250 return 0;
1235} 1251}
1236 1252
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d0126222b394..a40c15970421 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -358,3 +358,4 @@ ENTRY(sys_call_table)
358 .long sys_wait_for_ts_release 358 .long sys_wait_for_ts_release
359 .long sys_release_ts /* +10 */ 359 .long sys_release_ts /* +10 */
360 .long sys_null_call 360 .long sys_null_call
361 .long sys_set_rt_task_mc_param
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index d91bba539ca8..dbb3fcd28928 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -385,6 +385,8 @@ extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info);
385extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info, 385extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
386 struct hrtimer *timer, ktime_t time, 386 struct hrtimer *timer, ktime_t time,
387 const enum hrtimer_mode mode); 387 const enum hrtimer_mode mode);
388extern int hrtimer_pull_cancel(int cpu, struct hrtimer *timer,
389 struct hrtimer_start_on_info *info);
388#endif 390#endif
389 391
390extern int hrtimer_cancel(struct hrtimer *timer); 392extern int hrtimer_cancel(struct hrtimer *timer);
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
index 33344ee8d5f9..d1c73f5cf73e 100644
--- a/include/litmus/budget.h
+++ b/include/litmus/budget.h
@@ -1,10 +1,49 @@
1#ifndef _LITMUS_BUDGET_H_ 1#ifndef _LITMUS_BUDGET_H_
2#define _LITMUS_BUDGET_H_ 2#define _LITMUS_BUDGET_H_
3 3
4/* Update the per-processor enforcement timer (arm/reproram/cancel) for 4struct enforcement_timer {
5 * the next task. */ 5 struct hrtimer timer;
6 int armed;
7};
8
9/**
10 * update_enforcement_timer() - Update per-processor enforcement timer for
11 * the next scheduled task.
12 *
13 * If @t is not NULL and has a precisely enforced budget, the timer will be
14 * armed to trigger a reschedule when the budget is exhausted. Otherwise,
15 * the timer will be cancelled.
16*/
6void update_enforcement_timer(struct task_struct* t); 17void update_enforcement_timer(struct task_struct* t);
7 18
19void init_enforcement_timer(struct enforcement_timer *et);
20
21void arm_enforcement_timer(struct enforcement_timer* et, struct task_struct* t);
22
23void cancel_enforcement_timer(struct enforcement_timer* et);
24
25/* True if a task's server has progressed farther than the task
26 * itself. This happens when budget enforcement has caused a task to be
27 * booted off until the next period.
28 */
29#define behind_server(t)\
30 (lt_before((t)->rt_param.job_params.real_release, get_release(t)))
31
32/**
33 * server_release() - Prepare the task server parameters for the next period.
34 * The server for @t is what is actually executed from the schedulers
35 * perspective.
36 */
37void server_release(struct task_struct *t);
38
39/**
40 * task_release() - Prepare actual task parameters for the next period.
41 * The actual task parameters for @t, real_deadline and real_release, are
42 * the deadline and release from the tasks perspective. We only record these
43 * so that we can write them to feather trace.
44 */
45void task_release(struct task_struct *t);
46
8inline static int budget_exhausted(struct task_struct* t) 47inline static int budget_exhausted(struct task_struct* t)
9{ 48{
10 return get_exec_time(t) >= get_exec_cost(t); 49 return get_exec_time(t) >= get_exec_cost(t);
diff --git a/include/litmus/ce_domain.h b/include/litmus/ce_domain.h
new file mode 100644
index 000000000000..5d5fdf7d1efc
--- /dev/null
+++ b/include/litmus/ce_domain.h
@@ -0,0 +1,27 @@
1#ifndef _LITMUS_CE_DOMAIN_H
2#define _LITMUS_CE_DOMAIN_H
3
4/*
5 * Functions that the MC plugin needs to call through a domain pointer.
6 */
7void ce_requeue(domain_t*, struct task_struct*);
8struct task_struct* ce_peek_and_take_ready(domain_t*);
9int ce_higher_prio(struct task_struct*, struct task_struct*);
10
11#ifdef CONFIG_MERGE_TIMERS
12typedef void (*ce_timer_callback_t)(struct rt_event*);
13#else
14typedef enum hrtimer_restart (*ce_timer_callback_t)(struct hrtimer*);
15#endif
16
17void ce_domain_init(domain_t*,
18 raw_spinlock_t*,
19 requeue_t,
20 peek_ready_t,
21 take_ready_t,
22 preempt_needed_t,
23 task_prio_t,
24 struct ce_dom_data*,
25 const int,
26 ce_timer_callback_t);
27#endif
diff --git a/include/litmus/color.h b/include/litmus/color.h
new file mode 100644
index 000000000000..eefb6c6dddf5
--- /dev/null
+++ b/include/litmus/color.h
@@ -0,0 +1,51 @@
1#ifndef LITMUS_COLOR_H
2#define LITMUS_COLOR_H
3
4#ifdef __KERNEL__
5
6#define ONE_COLOR_LEN 11
7#define ONE_COLOR_FMT "%4lu: %4d\n"
8
9struct color_cache_info {
10 unsigned long size;
11 unsigned long line_size;
12 unsigned long ways;
13 unsigned long sets;
14 unsigned long nr_colors;
15};
16
17/* defined in litmus/color.c */
18extern struct color_cache_info color_cache_info;
19extern unsigned long color_chunk;
20
21struct page* get_colored_page(unsigned long);
22void add_page_to_color_list(struct page*);
23void add_page_to_alloced_list(struct page*, struct vm_area_struct*);
24void reclaim_pages(struct vm_area_struct*);
25
26int color_server_params(int cpu, unsigned long *wcet, unsigned long *period);
27
28int color_add_pages_handler(struct ctl_table *, int, void __user *,
29 size_t *, loff_t *);
30int color_nr_pages_handler(struct ctl_table *, int, void __user *,
31 size_t *, loff_t *);
32int color_reclaim_pages_handler(struct ctl_table *, int, void __user *,
33 size_t *, loff_t *);
34
35#ifdef CONFIG_LOCKDEP
36#define LITMUS_LOCKDEP_NAME_MAX_LEN 50
37#define LOCKDEP_DYNAMIC_ALLOC(lock, key, name_buf, fmt, args...) \
38 do { \
39 snprintf(name_buf, LITMUS_LOCKDEP_NAME_MAX_LEN, \
40 fmt, ## args); \
41 lockdep_set_class_and_name(lock, key, name_buf); \
42 } while (0)
43#else
44#define LITMUS_LOCKDEP_NAME_MAX_LEN 0
45#define LOCKDEP_DYNAMIC_ALLOC(lock, key, name_buf, fmt, args) \
46 do { (void)(key); } while (0)
47#endif
48
49#endif
50
51#endif
diff --git a/include/litmus/dgl.h b/include/litmus/dgl.h
new file mode 100644
index 000000000000..acd58f80b58b
--- /dev/null
+++ b/include/litmus/dgl.h
@@ -0,0 +1,65 @@
1#ifndef __DGL_H_
2#define __DGL_H_
3
4#include <litmus/color.h>
5#include <linux/list.h>
6
7/*
8 * A request for @replica amount of a single resource.
9 */
10struct dgl_req {
11 unsigned short replicas;
12 struct list_head list;
13 struct dgl_group_req *greq;
14};
15
16/*
17 * Simultaneous @requests for multiple resources.
18 */
19struct dgl_group_req {
20 int cpu;
21 unsigned long *requested;
22 unsigned long *waiting;
23
24 struct dgl_req *requests;
25
26 unsigned long long ts;
27};
28
29/*
30 * A single resource.
31 */
32struct dgl_resource {
33 unsigned long free_replicas;
34 struct list_head waiting;
35};
36
37/*
38 * A group of resources.
39 */
40struct dgl {
41 struct dgl_resource *resources;
42 struct dgl_group_req* *acquired;
43
44 char requests;
45 char running;
46 unsigned long long ts;
47
48 unsigned long num_resources;
49 unsigned long num_replicas;
50};
51
52void dgl_init(struct dgl *dgl, unsigned long num_resources,
53 unsigned long num_replicas);
54void dgl_free(struct dgl *dgl);
55
56void dgl_group_req_init(struct dgl *dgl, struct dgl_group_req *greq);
57void dgl_group_req_free(struct dgl_group_req *greq);
58
59void set_req(struct dgl *dgl, struct dgl_group_req *greq,
60 int resource, int replicas);
61
62void add_group_req(struct dgl *dgl, struct dgl_group_req *greq, int cpu);
63void remove_group_req(struct dgl *dgl, struct dgl_group_req *greq);
64
65#endif
diff --git a/include/litmus/domain.h b/include/litmus/domain.h
new file mode 100644
index 000000000000..d16ed1872a52
--- /dev/null
+++ b/include/litmus/domain.h
@@ -0,0 +1,50 @@
1/**
2 * --Todo--
3 * Naming: this should become rt_domain while the old rt_domain should be
4 * changed to sd_domain (sporadic) or pd_domain (periodic).
5 * task_new: need to add and use this method
6 */
7#ifndef _LITMUS_DOMAIN_H_
8#define _LITMUS_DOMAIN_H_
9
10struct domain;
11
12typedef void (*requeue_t)(struct domain*, struct task_struct*);
13typedef void (*remove_t)(struct domain*, struct task_struct*);
14typedef struct task_struct* (*peek_ready_t)(struct domain*);
15typedef struct task_struct* (*take_ready_t)(struct domain*);
16typedef int (*preempt_needed_t)(struct domain*, struct task_struct*);
17typedef int (*task_prio_t)(struct task_struct*, struct task_struct*);
18
19typedef struct domain {
20 raw_spinlock_t* lock; /* for coarse serialization */
21 struct list_head list; /* list membership */
22 void* data; /* implementation-specific data */
23 char* name; /* for debugging */
24
25 /* add a task to the domain */
26 requeue_t requeue;
27 /* prevent a task from being returned by the domain */
28 remove_t remove;
29 /* return next ready task */
30 peek_ready_t peek_ready;
31 /* remove and return next ready task */
32 take_ready_t take_ready;
33 /* return true if the domain has a task which should preempt the
34 * task given
35 */
36 preempt_needed_t preempt_needed;
37 /* for tasks within this domain, returns true if the first has
38 * has a higher priority than the second
39 */
40 task_prio_t higher_prio;
41} domain_t;
42
43void domain_init(domain_t *dom,
44 raw_spinlock_t *lock,
45 requeue_t requeue,
46 peek_ready_t peek_ready,
47 take_ready_t take_ready,
48 preempt_needed_t preempt_needed,
49 task_prio_t priority);
50#endif
diff --git a/include/litmus/event_group.h b/include/litmus/event_group.h
new file mode 100644
index 000000000000..b0654e0ec5e6
--- /dev/null
+++ b/include/litmus/event_group.h
@@ -0,0 +1,91 @@
1#ifndef _LINUX_EVENT_QUEUE_H_
2#define _LINUX_EVENT_QUEUE_H_
3
4#define EVENT_QUEUE_SLOTS 127 /* prime */
5
6#define NUM_EVENT_PRIORITIES 4 /* num crit levels really */
7
8struct rt_event;
9typedef void (*fire_event_t)(struct rt_event *e);
10
11struct event_group {
12 lt_t res;
13 int cpu;
14 struct list_head event_queue[EVENT_QUEUE_SLOTS];
15 raw_spinlock_t queue_lock;
16};
17
18/**
19 * A group of actions to fire at a given time
20 */
21struct event_list {
22 /* Use multiple list heads so that inserts are O(1) */
23 struct list_head events[NUM_EVENT_PRIORITIES];
24
25 /* For timer firing */
26 lt_t fire_time;
27 struct hrtimer timer;
28 struct hrtimer_start_on_info info;
29
30 struct list_head queue_node; /* For event_queue */
31 struct event_group* group; /* For callback */
32};
33
34/**
35 * A single action to fire at a time
36 */
37struct rt_event {
38 /* Function to call on event expiration */
39 fire_event_t function;
40 /* Priority of this event (lower is better) */
41 int prio;
42
43 /* For membership in the event_list */
44 struct list_head events_node;
45 /* To avoid runtime allocation. This is NOT necessarily
46 * the event_list containing this event. This is just a
47 * pre-allocated event list which can be used for merging
48 * events.
49 */
50 struct event_list* event_list;
51 /* Pointer set by add_event() so that we can cancel this event
52 * without knowing what group it is in (don't touch it).
53 */
54 struct event_group* _event_group;
55};
56
57/**
58 * add_event() - Add timer to event group.
59 * @group Group with which to merge event. If NULL, use the event
60 * group of whatever CPU currently executing on.
61 * @e Event to be fired at a specific time
62 * @time Time to fire event
63 */
64void add_event(struct event_group* group, struct rt_event* e, lt_t time);
65
66/**
67 * cancel_event() - Remove event from the group.
68 */
69void cancel_event(struct rt_event*);
70
71/**
72 * init_event() - Create an event.
73 * @e Event to create
74 * @prio Priority of the event (lower is better)
75 * @function Function to fire when event expires
76 * @el Pre-allocated event list for timer merging
77 */
78void init_event(struct rt_event* e, int prio, fire_event_t function,
79 struct event_list *el);
80
81struct event_list* event_list_alloc(int);
82void event_list_free(struct event_list *el);
83
84/**
85 * get_event_group_for() - Get the event group for a CPU.
86 * @cpu The CPU to get the event group for. Use NO_CPU to get the
87 * event group of the CPU that the call is executing on.
88 */
89struct event_group *get_event_group_for(const int cpu);
90
91#endif
diff --git a/include/litmus/fifo_common.h b/include/litmus/fifo_common.h
new file mode 100644
index 000000000000..4756f77bd511
--- /dev/null
+++ b/include/litmus/fifo_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __FIFO_COMMON_H__
11#define __FIFO_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int fifo_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int fifo_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 3e78b9c61580..c3b91fe8115c 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -44,6 +44,8 @@ void litmus_exit_task(struct task_struct *tsk);
44 44
45#define tsk_rt(t) (&(t)->rt_param) 45#define tsk_rt(t) (&(t)->rt_param)
46 46
47#define get_server_job(t) (tsk_rt(t)->job_params.fake_job_no)
48
47/* Realtime utility macros */ 49/* Realtime utility macros */
48#define get_rt_flags(t) (tsk_rt(t)->flags) 50#define get_rt_flags(t) (tsk_rt(t)->flags)
49#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f)) 51#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
@@ -55,6 +57,7 @@ void litmus_exit_task(struct task_struct *tsk);
55#define get_rt_period(t) (tsk_rt(t)->task_params.period) 57#define get_rt_period(t) (tsk_rt(t)->task_params.period)
56#define get_rt_relative_deadline(t) (tsk_rt(t)->task_params.relative_deadline) 58#define get_rt_relative_deadline(t) (tsk_rt(t)->task_params.relative_deadline)
57#define get_rt_phase(t) (tsk_rt(t)->task_params.phase) 59#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
60#define get_rt_job(t) (tsk_rt(t)->job_params.job_no)
58#define get_partition(t) (tsk_rt(t)->task_params.cpu) 61#define get_partition(t) (tsk_rt(t)->task_params.cpu)
59#define get_priority(t) (tsk_rt(t)->task_params.priority) 62#define get_priority(t) (tsk_rt(t)->task_params.priority)
60#define get_class(t) (tsk_rt(t)->task_params.cls) 63#define get_class(t) (tsk_rt(t)->task_params.cls)
@@ -64,6 +67,14 @@ void litmus_exit_task(struct task_struct *tsk);
64#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) 67#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
65#define get_deadline(t) (tsk_rt(t)->job_params.deadline) 68#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
66#define get_release(t) (tsk_rt(t)->job_params.release) 69#define get_release(t) (tsk_rt(t)->job_params.release)
70#define get_class(t) (tsk_rt(t)->task_params.cls)
71
72#define get_task_domain(t) (tsk_rt(t)->_domain)
73#define is_server(t) (tsk_rt(t)->is_server)
74#define get_task_server(task) (tsk_rt(task)->server)
75
76#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
77#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
67#define get_lateness(t) (tsk_rt(t)->job_params.lateness) 78#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
68 79
69#define is_hrt(t) \ 80#define is_hrt(t) \
@@ -116,6 +127,16 @@ void srp_ceiling_block(void);
116 127
117#define bheap2task(hn) ((struct task_struct*) hn->value) 128#define bheap2task(hn) ((struct task_struct*) hn->value)
118 129
130static inline struct control_page* get_control_page(struct task_struct *t)
131{
132 return tsk_rt(t)->ctrl_page;
133}
134
135static inline int has_control_page(struct task_struct* t)
136{
137 return tsk_rt(t)->ctrl_page != NULL;
138}
139
119#ifdef CONFIG_NP_SECTION 140#ifdef CONFIG_NP_SECTION
120 141
121static inline int is_kernel_np(struct task_struct *t) 142static inline int is_kernel_np(struct task_struct *t)
@@ -142,7 +163,7 @@ static inline void request_exit_np(struct task_struct *t)
142 163
143static inline void make_np(struct task_struct *t) 164static inline void make_np(struct task_struct *t)
144{ 165{
145 tsk_rt(t)->kernel_np++; 166 tsk_rt(t)->kernel_np = 1;
146} 167}
147 168
148/* Caller should check if preemption is necessary when 169/* Caller should check if preemption is necessary when
@@ -150,7 +171,7 @@ static inline void make_np(struct task_struct *t)
150 */ 171 */
151static inline int take_np(struct task_struct *t) 172static inline int take_np(struct task_struct *t)
152{ 173{
153 return --tsk_rt(t)->kernel_np; 174 return tsk_rt(t)->kernel_np = 0;
154} 175}
155 176
156/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */ 177/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
@@ -183,6 +204,20 @@ static inline int request_exit_np_atomic(struct task_struct *t)
183 204
184#else 205#else
185 206
207
208static inline void make_np(struct task_struct *t)
209{
210
211}
212
213/* Caller should check if preemption is necessary when
214 * the function return 0.
215 */
216static inline int take_np(struct task_struct *t)
217{
218 return 0;
219}
220
186static inline int is_kernel_np(struct task_struct* t) 221static inline int is_kernel_np(struct task_struct* t)
187{ 222{
188 return 0; 223 return 0;
@@ -218,10 +253,6 @@ static inline int is_np(struct task_struct *t)
218 int kernel, user; 253 int kernel, user;
219 kernel = is_kernel_np(t); 254 kernel = is_kernel_np(t);
220 user = is_user_np(t); 255 user = is_user_np(t);
221 if (kernel || user)
222 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
223
224 kernel, user);
225 return kernel || user; 256 return kernel || user;
226#else 257#else
227 return unlikely(is_kernel_np(t) || is_user_np(t)); 258 return unlikely(is_kernel_np(t) || is_user_np(t));
@@ -260,4 +291,8 @@ static inline quanta_t time2quanta(lt_t time, enum round round)
260/* By how much is cpu staggered behind CPU 0? */ 291/* By how much is cpu staggered behind CPU 0? */
261u64 cpu_stagger_offset(int cpu); 292u64 cpu_stagger_offset(int cpu);
262 293
294#define TS_SYSCALL_IN_START \
295 if (has_control_page(current)) \
296 __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start);
297
263#endif 298#endif
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index 4d7b870cb443..41991d5af01b 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -9,6 +9,7 @@ struct litmus_lock_ops;
9struct litmus_lock { 9struct litmus_lock {
10 struct litmus_lock_ops *ops; 10 struct litmus_lock_ops *ops;
11 int type; 11 int type;
12 int id;
12}; 13};
13 14
14struct litmus_lock_ops { 15struct litmus_lock_ops {
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
index 380b886d78ff..5b69e26fc57d 100644
--- a/include/litmus/preempt.h
+++ b/include/litmus/preempt.h
@@ -8,11 +8,13 @@
8 8
9#include <litmus/debug_trace.h> 9#include <litmus/debug_trace.h>
10 10
11
11extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); 12extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
12 13
13#ifdef CONFIG_PREEMPT_STATE_TRACE 14#ifdef CONFIG_PREEMPT_STATE_TRACE
14const char* sched_state_name(int s); 15const char* sched_state_name(int s);
15#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args) 16//#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
17#define TRACE_STATE(fmt, args...) /* ignore */
16#else 18#else
17#define TRACE_STATE(fmt, args...) /* ignore */ 19#define TRACE_STATE(fmt, args...) /* ignore */
18#endif 20#endif
diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h
new file mode 100644
index 000000000000..3e03d9b5d140
--- /dev/null
+++ b/include/litmus/rm_common.h
@@ -0,0 +1,25 @@
1/*
2 * RM common data structures and utility functions shared by all RM
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_RM_COMMON_H__
11#define __UNC_RM_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int rm_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int rm_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
index ac249292e866..03826228dbd9 100644
--- a/include/litmus/rt_domain.h
+++ b/include/litmus/rt_domain.h
@@ -6,6 +6,9 @@
6#define __UNC_RT_DOMAIN_H__ 6#define __UNC_RT_DOMAIN_H__
7 7
8#include <litmus/bheap.h> 8#include <litmus/bheap.h>
9#include <litmus/domain.h>
10#include <litmus/event_group.h>
11#include <litmus/sched_mc.h>
9 12
10#define RELEASE_QUEUE_SLOTS 127 /* prime */ 13#define RELEASE_QUEUE_SLOTS 127 /* prime */
11 14
@@ -29,7 +32,11 @@ typedef struct _rt_domain {
29 raw_spinlock_t release_lock; 32 raw_spinlock_t release_lock;
30 struct release_queue release_queue; 33 struct release_queue release_queue;
31 34
32#ifdef CONFIG_RELEASE_MASTER 35#if defined(CONFIG_MERGE_TIMERS)
36 struct event_group* event_group;
37 int prio;
38#endif
39#if defined(CONFIG_RELEASE_MASTER)
33 int release_master; 40 int release_master;
34#endif 41#endif
35 42
@@ -45,6 +52,8 @@ typedef struct _rt_domain {
45 52
46 /* how are tasks ordered in the ready queue? */ 53 /* how are tasks ordered in the ready queue? */
47 bheap_prio_t order; 54 bheap_prio_t order;
55
56 enum crit_level level;
48} rt_domain_t; 57} rt_domain_t;
49 58
50struct release_heap { 59struct release_heap {
@@ -53,13 +62,18 @@ struct release_heap {
53 lt_t release_time; 62 lt_t release_time;
54 /* all tasks to be released at release_time */ 63 /* all tasks to be released at release_time */
55 struct bheap heap; 64 struct bheap heap;
65
66#ifdef CONFIG_MERGE_TIMERS
67 /* used to merge timer calls */
68 struct rt_event event;
69#else
56 /* used to trigger the release */ 70 /* used to trigger the release */
57 struct hrtimer timer; 71 struct hrtimer timer;
58
59#ifdef CONFIG_RELEASE_MASTER 72#ifdef CONFIG_RELEASE_MASTER
60 /* used to delegate releases */ 73 /* used to delegate releases */
61 struct hrtimer_start_on_info info; 74 struct hrtimer_start_on_info info;
62#endif 75#endif
76#endif
63 /* required for the timer callback */ 77 /* required for the timer callback */
64 rt_domain_t* dom; 78 rt_domain_t* dom;
65}; 79};
@@ -76,12 +90,22 @@ static inline struct task_struct* __next_ready(rt_domain_t* rt)
76 90
77void rt_domain_init(rt_domain_t *rt, bheap_prio_t order, 91void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
78 check_resched_needed_t check, 92 check_resched_needed_t check,
79 release_jobs_t relase); 93 release_jobs_t release);
94
95void pd_domain_init(domain_t *dom,
96 rt_domain_t *rt,
97 bheap_prio_t order,
98 check_resched_needed_t check,
99 release_jobs_t release,
100 preempt_needed_t preempt_needed,
101 task_prio_t priority);
80 102
81void __add_ready(rt_domain_t* rt, struct task_struct *new); 103void __add_ready(rt_domain_t* rt, struct task_struct *new);
82void __merge_ready(rt_domain_t* rt, struct bheap *tasks); 104void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
83void __add_release(rt_domain_t* rt, struct task_struct *task); 105void __add_release(rt_domain_t* rt, struct task_struct *task);
84 106
107struct release_heap* release_heap_alloc(int gfp_flags);
108
85static inline struct task_struct* __take_ready(rt_domain_t* rt) 109static inline struct task_struct* __take_ready(rt_domain_t* rt)
86{ 110{
87 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue); 111 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 20268190757f..a8c82eed5562 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -91,7 +91,31 @@ union np_flag {
91struct control_page { 91struct control_page {
92 volatile union np_flag sched; 92 volatile union np_flag sched;
93 93
94 /* to be extended */ 94 /* locking overhead tracing: time stamp prior to system call */
95 uint64_t ts_syscall_start; /* Feather-Trace cycles */
96
97 int colors_updated:8;
98};
99
100#ifndef __KERNEL__
101/*
102 * XXX This is a terrible hack so liblitmus can use the PAGE_SIZE macro.
103 * We should fix liblitmus to do setup the page size at runtime.
104 */
105#define CACHE_LINE_SIZE 64
106#if (ARCH == x86_64)
107#define PAGE_SIZE 4096
108#elif (ARCH == sparc64)
109#define PAGE_SIZE 8192
110#endif
111#endif /* ifndef __KERNEL__ */
112
113typedef uint8_t color_t;
114#define COLORS_PER_CONTROL_PAGE (PAGE_SIZE / (2 * sizeof(color_t)))
115struct color_ctrl_page {
116 color_t colors[COLORS_PER_CONTROL_PAGE];
117 /* must be same type to guarantee equal array sizes */
118 color_t pages[COLORS_PER_CONTROL_PAGE];
95}; 119};
96 120
97/* don't export internal data structures to user space (liblitmus) */ 121/* don't export internal data structures to user space (liblitmus) */
@@ -100,6 +124,9 @@ struct control_page {
100struct _rt_domain; 124struct _rt_domain;
101struct bheap_node; 125struct bheap_node;
102struct release_heap; 126struct release_heap;
127struct domain;
128struct rt_server;
129struct dgl_group_req;
103 130
104struct rt_job { 131struct rt_job {
105 /* Time instant the the job was or will be released. */ 132 /* Time instant the the job was or will be released. */
@@ -107,6 +134,10 @@ struct rt_job {
107 /* What is the current deadline? */ 134 /* What is the current deadline? */
108 lt_t deadline; 135 lt_t deadline;
109 136
137 lt_t real_release;
138 lt_t real_deadline;
139 unsigned int fake_job_no;
140
110 /* How much service has this job received so far? */ 141 /* How much service has this job received so far? */
111 lt_t exec_time; 142 lt_t exec_time;
112 143
@@ -127,6 +158,9 @@ struct rt_job {
127}; 158};
128 159
129struct pfair_param; 160struct pfair_param;
161#ifdef CONFIG_PLUGIN_MC
162struct mc_data;
163#endif
130 164
131/* RT task parameters for scheduling extensions 165/* RT task parameters for scheduling extensions
132 * These parameters are inherited during clone and therefore must 166 * These parameters are inherited during clone and therefore must
@@ -142,6 +176,8 @@ struct rt_param {
142 /* is the task present? (true if it can be scheduled) */ 176 /* is the task present? (true if it can be scheduled) */
143 unsigned int present:1; 177 unsigned int present:1;
144 178
179 unsigned int is_server:1;
180
145#ifdef CONFIG_LITMUS_LOCKING 181#ifdef CONFIG_LITMUS_LOCKING
146 /* Is the task being priority-boosted by a locking protocol? */ 182 /* Is the task being priority-boosted by a locking protocol? */
147 unsigned int priority_boosted:1; 183 unsigned int priority_boosted:1;
@@ -149,6 +185,17 @@ struct rt_param {
149 lt_t boost_start_time; 185 lt_t boost_start_time;
150#endif 186#endif
151 187
188#ifdef CONFIG_PLUGIN_MC
189 /* mixed criticality specific data */
190 struct mc_data *mc_data;
191#endif
192#ifdef CONFIG_MERGE_TIMERS
193 struct rt_event *event;
194#endif
195
196 struct rt_server *server;
197
198
152 /* user controlled parameters */ 199 /* user controlled parameters */
153 struct rt_task task_params; 200 struct rt_task task_params;
154 201
@@ -203,6 +250,9 @@ struct rt_param {
203 int old_policy; 250 int old_policy;
204 int old_prio; 251 int old_prio;
205 252
253 /* TODO: rename */
254 struct domain *_domain;
255
206 /* ready queue for this task */ 256 /* ready queue for this task */
207 struct _rt_domain* domain; 257 struct _rt_domain* domain;
208 258
@@ -229,8 +279,12 @@ struct rt_param {
229 lt_t total_tardy; 279 lt_t total_tardy;
230 lt_t max_tardy; 280 lt_t max_tardy;
231 unsigned int missed; 281 unsigned int missed;
282
232 lt_t max_exec_time; 283 lt_t max_exec_time;
233 lt_t tot_exec_time; 284 lt_t tot_exec_time;
285 lt_t last_exec_time;
286 struct color_ctrl_page *color_ctrl_page;
287 struct dgl_group_req *req;
234}; 288};
235 289
236/* Possible RT flags */ 290/* Possible RT flags */
diff --git a/include/litmus/rt_server.h b/include/litmus/rt_server.h
new file mode 100644
index 000000000000..0e2feb6c6b0e
--- /dev/null
+++ b/include/litmus/rt_server.h
@@ -0,0 +1,31 @@
1#ifndef __RT_SERVER_H
2#define __RT_SERVER_H
3
4#include <linux/sched.h>
5#include <litmus/litmus.h>
6#include <litmus/rt_domain.h>
7
8struct rt_server;
9
10typedef int (*need_preempt_t)(rt_domain_t *rt, struct task_struct *t);
11typedef void (*server_update_t)(struct rt_server *srv);
12
13struct rt_server {
14 int sid;
15 int cpu;
16 struct task_struct* linked;
17 rt_domain_t* domain;
18 int running;
19
20 /* Does this server have a higher-priority task? */
21 need_preempt_t need_preempt;
22 /* System state has changed, so should server */
23 server_update_t update;
24};
25
26void init_rt_server(struct rt_server *server,
27 int sid, int cpu, rt_domain_t *domain,
28 need_preempt_t need_preempt,
29 server_update_t update);
30
31#endif
diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h
new file mode 100644
index 000000000000..1d491ce6a31a
--- /dev/null
+++ b/include/litmus/sched_mc.h
@@ -0,0 +1,134 @@
1#ifndef _LINUX_SCHED_MC_H_
2#define _LINUX_SCHED_MC_H_
3
4/* criticality levels */
5enum crit_level {
6 /* probably don't need to assign these (paranoid) */
7 CRIT_LEVEL_A = 0,
8 CRIT_LEVEL_B = 1,
9 CRIT_LEVEL_C = 2,
10 NUM_CRIT_LEVELS = 3,
11};
12
13struct mc_task {
14 enum crit_level crit;
15 int lvl_a_id;
16 int lvl_a_eligible;
17};
18
19struct mc_job {
20 int is_ghost:1;
21 lt_t ghost_budget;
22};
23
24#ifdef __KERNEL__
25/*
26 * These are used only in the kernel. Userspace programs like RTSpin won't see
27 * them.
28 */
29struct mc_data {
30 struct mc_task mc_task;
31 struct mc_job mc_job;
32};
33
34#define tsk_mc_data(t) (tsk_rt(t)->mc_data)
35#define tsk_mc_crit(t) (tsk_mc_data(t) ? tsk_mc_data(t)->mc_task.crit : CRIT_LEVEL_C)
36#define is_ghost(t) (tsk_mc_data(t)->mc_job.is_ghost)
37
38#define TS "(%s/%d:%d:%s)"
39#define TA(t) (t) ? tsk_mc_data(t) ? is_ghost(t) ? "ghost" : t->comm \
40 : t->comm : "NULL", \
41 (t) ? t->pid : 1, \
42 (t) ? t->rt_param.job_params.job_no : 1, \
43 (t && get_task_domain(t)) ? get_task_domain(t)->name : ""
44#define STRACE(fmt, args...) \
45 sched_trace_log_message("%d P%d [%s@%s:%d]: " fmt, \
46 TRACE_ARGS, ## args)
47#define TRACE_MC_TASK(t, fmt, args...) \
48 STRACE(TS " " fmt, TA(t), ##args)
49
50/*
51 * The MC-CE scheduler uses this as domain data.
52 */
53struct ce_dom_data {
54 int cpu;
55 struct task_struct *scheduled, *should_schedule;
56#ifdef CONFIG_MERGE_TIMERS
57 struct rt_event event;
58#else
59 struct hrtimer_start_on_info timer_info;
60 struct hrtimer timer;
61#endif
62};
63
64/**
65 * enum crit_state - Logically add / remove CPUs from criticality levels.
66 *
67 * Global crit levels need to use a two step process to remove CPUs so
68 * that the CPUs can be removed without holding domain locks.
69 *
70 * @CS_ACTIVE The criticality entry can run a task
71 * @CS_ACTIVATE The criticality entry can run a task, but hasn't had its
72 * position updated in a global heap. Set with ONLY CPU lock.
73 * @CS_REMOVE The criticality entry is logically removed, but hasn't had its
74 * position adjusted in a global heap. This should be set when
75 * ONLY the CPU state is locked.
76 * @CS_REMOVED The criticality entry has been removed from the crit level
77 */
78enum crit_state { CS_ACTIVE, CS_ACTIVATE, CS_REMOVE, CS_REMOVED };
79
80/**
81 * struct crit_entry - State of a CPU within each criticality level system.
82 * @level Criticality level of this entry
83 * @linked Logically running task, ghost or regular
84 * @domain Domain from which to draw tasks
85 * @usable False if a higher criticality task is running
86 * @event For ghost task budget enforcement (merge timers)
87 * @timer For ghost task budget enforcement (not merge timers)
88 * @node Used to sort crit_entries by preemptability in global domains
89 */
90struct crit_entry {
91 enum crit_level level;
92 struct task_struct* linked;
93 struct domain* domain;
94 enum crit_state state;
95#ifdef CONFIG_MERGE_TIMERS
96 struct rt_event event;
97#else
98 struct hrtimer timer;
99#endif
100 struct bheap_node* node;
101};
102
103/**
104 * struct domain_data - Wrap domains with related CPU state
105 * @domain A domain for a criticality level
106 * @heap The preemptable heap of crit entries (for global domains)
107 * @crit_entry The crit entry for this domain (for partitioned domains)
108 */
109struct domain_data {
110 struct domain domain;
111 struct bheap* heap;
112 struct crit_entry* crit_entry;
113};
114
115/*
116 * Functions that are used with the MC-CE plugin.
117 */
118long mc_ce_set_domains(const int, struct domain_data*[]);
119unsigned int mc_ce_get_expected_job(const int, const int);
120
121/*
122 * These functions are (lazily) inserted into the MC plugin code so that it
123 * manipulates the MC-CE state.
124 */
125long mc_ce_admit_task_common(struct task_struct*);
126void mc_ce_task_exit_common(struct task_struct*);
127lt_t mc_ce_timer_callback_common(domain_t*);
128void mc_ce_release_at_common(struct task_struct*, lt_t);
129long mc_ce_activate_plugin_common(void);
130long mc_ce_deactivate_plugin_common(void);
131
132#endif /* __KERNEL__ */
133
134#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..0f529fa78b4d 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -11,6 +11,8 @@
11#include <litmus/locking.h> 11#include <litmus/locking.h>
12#endif 12#endif
13 13
14struct litmus_lock;
15
14/************************ setup/tear down ********************/ 16/************************ setup/tear down ********************/
15 17
16typedef long (*activate_plugin_t) (void); 18typedef long (*activate_plugin_t) (void);
@@ -67,6 +69,9 @@ typedef long (*admit_task_t)(struct task_struct* tsk);
67 69
68typedef void (*release_at_t)(struct task_struct *t, lt_t start); 70typedef void (*release_at_t)(struct task_struct *t, lt_t start);
69 71
72/* TODO remove me */
73typedef void (*release_ts_t)(lt_t time);
74
70struct sched_plugin { 75struct sched_plugin {
71 struct list_head list; 76 struct list_head list;
72 /* basic info */ 77 /* basic info */
@@ -93,6 +98,8 @@ struct sched_plugin {
93 task_block_t task_block; 98 task_block_t task_block;
94 task_exit_t task_exit; 99 task_exit_t task_exit;
95 100
101 release_ts_t release_ts;
102
96#ifdef CONFIG_LITMUS_LOCKING 103#ifdef CONFIG_LITMUS_LOCKING
97 /* locking protocols */ 104 /* locking protocols */
98 allocate_lock_t allocate_lock; 105 allocate_lock_t allocate_lock;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 2f992789affb..0e050ac3748c 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -24,7 +24,8 @@ struct st_param_data { /* regular params */
24 u32 phase; 24 u32 phase;
25 u8 partition; 25 u8 partition;
26 u8 class; 26 u8 class;
27 u8 __unused[2]; 27 u8 level;
28 u8 __unused[1];
28}; 29};
29 30
30struct st_release_data { /* A job is was/is going to be released. */ 31struct st_release_data { /* A job is was/is going to be released. */
@@ -71,8 +72,8 @@ struct st_resume_data { /* A task resumes. */
71 72
72struct st_action_data { 73struct st_action_data {
73 u64 when; 74 u64 when;
74 u8 action; 75 u32 action;
75 u8 __unused[7]; 76 u8 __unused[4];
76}; 77};
77 78
78struct st_sys_release_data { 79struct st_sys_release_data {
@@ -195,8 +196,9 @@ feather_callback void do_sched_trace_task_tardy(unsigned long id,
195#define trace_litmus_switch_to(t) 196#define trace_litmus_switch_to(t)
196#define trace_litmus_switch_away(prev) 197#define trace_litmus_switch_away(prev)
197#define trace_litmus_task_completion(t, forced) 198#define trace_litmus_task_completion(t, forced)
198#define trace_litmus_task_block(t) 199
199#define trace_litmus_task_resume(t) 200#define trace_litmus_task_block(t, i)
201#define trace_litmus_task_resume(t, i)
200#define trace_litmus_sys_release(start) 202#define trace_litmus_sys_release(start)
201#define trace_litmus_task_exit(t) 203#define trace_litmus_task_exit(t)
202#define trace_litmus_task_tardy(t) 204#define trace_litmus_task_tardy(t)
@@ -325,14 +327,14 @@ feather_callback void do_sched_trace_task_tardy(unsigned long id,
325 trace_litmus_server_param(sid, cid, wcet, period); \ 327 trace_litmus_server_param(sid, cid, wcet, period); \
326 } while(0) 328 } while(0)
327 329
328#define sched_trace_server_switch_to(sid, job, tid) \ 330#define sched_trace_server_switch_to(sid, job, tid, tjob) \
329 do { \ 331 do { \
330 trace_litmus_server_switch_to(sid, job, tid); \ 332 trace_litmus_server_switch_to(sid, job, tid, tjob); \
331 } while(0) 333 } while(0)
332 334
333#define sched_trace_server_switch_away(sid, job, tid) \ 335#define sched_trace_server_switch_away(sid, job, tid, tjob) \
334 do { \ 336 do { \
335 trace_litmus_server_switch_away(sid, job, tid); \ 337 trace_litmus_server_switch_away(sid, job, tid, tjob); \
336 } while (0) 338 } while (0)
337 339
338#define sched_trace_server_release(sid, job, rel, dead) \ 340#define sched_trace_server_release(sid, job, rel, dead) \
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index e809376d6487..d868144f6928 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -12,7 +12,10 @@
12enum task_type_marker { 12enum task_type_marker {
13 TSK_BE, 13 TSK_BE,
14 TSK_RT, 14 TSK_RT,
15 TSK_UNKNOWN 15 TSK_UNKNOWN,
16 TSK_LVLA,
17 TSK_LVLB,
18 TSK_LVLC
16}; 19};
17 20
18struct timestamp { 21struct timestamp {
@@ -68,8 +71,6 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
68 * always the next number after the start time event id. 71 * always the next number after the start time event id.
69 */ 72 */
70 73
71
72
73#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only 74#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
74 * care 75 * care
75 * about 76 * about
@@ -87,6 +88,26 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
87#define TS_TICK_START(t) TTIMESTAMP(110, t) 88#define TS_TICK_START(t) TTIMESTAMP(110, t)
88#define TS_TICK_END(t) TTIMESTAMP(111, t) 89#define TS_TICK_END(t) TTIMESTAMP(111, t)
89 90
91#define TS_LVLA_RELEASE_START DTIMESTAMP(112, TSK_RT)
92#define TS_LVLA_RELEASE_END DTIMESTAMP(113, TSK_RT)
93
94#define TS_LVLA_SCHED_START DTIMESTAMP(114, TSK_UNKNOWN)
95#define TS_LVLA_SCHED_END_ID 115
96#define TS_LVLA_SCHED_END(t) TTIMESTAMP(TS_LVLA_SCHED_END_ID, t)
97
98#define TS_LVLB_RELEASE_START DTIMESTAMP(116, TSK_RT)
99#define TS_LVLB_RELEASE_END DTIMESTAMP(117, TSK_RT)
100
101#define TS_LVLB_SCHED_START DTIMESTAMP(118, TSK_UNKNOWN)
102#define TS_LVLB_SCHED_END_ID 119
103#define TS_LVLB_SCHED_END(t) TTIMESTAMP(TS_LVLB_SCHED_END_ID, t)
104
105#define TS_LVLC_RELEASE_START DTIMESTAMP(120, TSK_RT)
106#define TS_LVLC_RELEASE_END DTIMESTAMP(121, TSK_RT)
107
108#define TS_LVLC_SCHED_START DTIMESTAMP(122, TSK_UNKNOWN)
109#define TS_LVLC_SCHED_END_ID 123
110#define TS_LVLC_SCHED_END(t) TTIMESTAMP(TS_LVLC_SCHED_END_ID, t)
90 111
91#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ 112#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
92#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ 113#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 94264c27d9ac..71be3cd8d469 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -17,5 +17,6 @@
17#define __NR_wait_for_ts_release __LSC(9) 17#define __NR_wait_for_ts_release __LSC(9)
18#define __NR_release_ts __LSC(10) 18#define __NR_release_ts __LSC(10)
19#define __NR_null_call __LSC(11) 19#define __NR_null_call __LSC(11)
20#define __NR_set_rt_task_mc_param __LSC(12)
20 21
21#define NR_litmus_syscalls 12 22#define NR_litmus_syscalls 13
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index d5ced0d2642c..95cb74495104 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -29,5 +29,7 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
29__SYSCALL(__NR_release_ts, sys_release_ts) 29__SYSCALL(__NR_release_ts, sys_release_ts)
30#define __NR_null_call __LSC(11) 30#define __NR_null_call __LSC(11)
31__SYSCALL(__NR_null_call, sys_null_call) 31__SYSCALL(__NR_null_call, sys_null_call)
32#define __NR_set_rt_task_mc_param __LSC(12)
33__SYSCALL(__NR_set_rt_task_mc_param, sys_set_rt_task_mc_param)
32 34
33#define NR_litmus_syscalls 12 35#define NR_litmus_syscalls 13
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
index c3a92f8ec6ef..474aa129c233 100644
--- a/include/trace/events/litmus.h
+++ b/include/trace/events/litmus.h
@@ -11,6 +11,7 @@
11 11
12#include <litmus/litmus.h> 12#include <litmus/litmus.h>
13#include <litmus/rt_param.h> 13#include <litmus/rt_param.h>
14
14TRACE_EVENT(litmus_task_param, 15TRACE_EVENT(litmus_task_param,
15 16
16 TP_PROTO(struct task_struct *t), 17 TP_PROTO(struct task_struct *t),
@@ -80,20 +81,17 @@ TRACE_EVENT(litmus_switch_to,
80 TP_STRUCT__entry( 81 TP_STRUCT__entry(
81 __field( pid_t, pid ) 82 __field( pid_t, pid )
82 __field( unsigned int, job ) 83 __field( unsigned int, job )
83 __field( unsigned long long, when )
84 __field( unsigned long long, exec_time ) 84 __field( unsigned long long, exec_time )
85 ), 85 ),
86 86
87 TP_fast_assign( 87 TP_fast_assign(
88 __entry->pid = is_realtime(t) ? t->pid : 0; 88 __entry->pid = is_realtime(t) ? t->pid : 0;
89 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0; 89 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
90 __entry->when = litmus_clock();
91 __entry->exec_time = get_exec_time(t); 90 __entry->exec_time = get_exec_time(t);
92 ), 91 ),
93 92
94 TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n", 93 TP_printk("switch_to(job(%u, %u)): (exec: %Lu)\n",
95 __entry->pid, __entry->job, 94 __entry->pid, __entry->job, __entry->exec_time)
96 __entry->when, __entry->exec_time)
97); 95);
98 96
99/* 97/*
@@ -108,20 +106,17 @@ TRACE_EVENT(litmus_switch_away,
108 TP_STRUCT__entry( 106 TP_STRUCT__entry(
109 __field( pid_t, pid ) 107 __field( pid_t, pid )
110 __field( unsigned int, job ) 108 __field( unsigned int, job )
111 __field( unsigned long long, when )
112 __field( unsigned long long, exec_time ) 109 __field( unsigned long long, exec_time )
113 ), 110 ),
114 111
115 TP_fast_assign( 112 TP_fast_assign(
116 __entry->pid = is_realtime(t) ? t->pid : 0; 113 __entry->pid = is_realtime(t) ? t->pid : 0;
117 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0; 114 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
118 __entry->when = litmus_clock();
119 __entry->exec_time = get_exec_time(t); 115 __entry->exec_time = get_exec_time(t);
120 ), 116 ),
121 117
122 TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n", 118 TP_printk("switch_away(job(%u, %u)): (exec: %Lu)\n",
123 __entry->pid, __entry->job, 119 __entry->pid, __entry->job, __entry->exec_time)
124 __entry->when, __entry->exec_time)
125); 120);
126 121
127/* 122/*
@@ -136,20 +131,17 @@ TRACE_EVENT(litmus_task_completion,
136 TP_STRUCT__entry( 131 TP_STRUCT__entry(
137 __field( pid_t, pid ) 132 __field( pid_t, pid )
138 __field( unsigned int, job ) 133 __field( unsigned int, job )
139 __field( unsigned long long, when )
140 __field( unsigned long, forced ) 134 __field( unsigned long, forced )
141 ), 135 ),
142 136
143 TP_fast_assign( 137 TP_fast_assign(
144 __entry->pid = t ? t->pid : 0; 138 __entry->pid = t ? t->pid : 0;
145 __entry->job = t ? t->rt_param.job_params.job_no : 0; 139 __entry->job = t ? t->rt_param.job_params.job_no : 0;
146 __entry->when = litmus_clock();
147 __entry->forced = forced; 140 __entry->forced = forced;
148 ), 141 ),
149 142
150 TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n", 143 TP_printk("completed(job(%u, %u)): (forced: %lu)\n",
151 __entry->pid, __entry->job, 144 __entry->pid, __entry->job, __entry->forced)
152 __entry->when, __entry->forced)
153); 145);
154 146
155/* 147/*
@@ -164,17 +156,14 @@ TRACE_EVENT(litmus_task_block,
164 TP_STRUCT__entry( 156 TP_STRUCT__entry(
165 __field( pid_t, pid ) 157 __field( pid_t, pid )
166 __field( int, lid ) 158 __field( int, lid )
167 __field( unsigned long long, when )
168 ), 159 ),
169 160
170 TP_fast_assign( 161 TP_fast_assign(
171 __entry->pid = t ? t->pid : 0; 162 __entry->pid = t ? t->pid : 0;
172 __entry->lid = lid; 163 __entry->lid = lid;
173 __entry->when = litmus_clock();
174 ), 164 ),
175 165
176 TP_printk("(%u) blocks on %d: %Lu\n", __entry->pid, 166 TP_printk("(%u) blocks on %d\n", __entry->pid, __entry->lid)
177 __entry->lid, __entry->when)
178); 167);
179 168
180/* 169/*
@@ -189,17 +178,14 @@ TRACE_EVENT(litmus_resource_acquire,
189 TP_STRUCT__entry( 178 TP_STRUCT__entry(
190 __field( pid_t, pid ) 179 __field( pid_t, pid )
191 __field( int, lid ) 180 __field( int, lid )
192 __field( unsigned long long, when )
193 ), 181 ),
194 182
195 TP_fast_assign( 183 TP_fast_assign(
196 __entry->pid = t ? t->pid : 0; 184 __entry->pid = t ? t->pid : 0;
197 __entry->lid = lid; 185 __entry->lid = lid;
198 __entry->when = litmus_clock();
199 ), 186 ),
200 187
201 TP_printk("(%u) acquires %d: %Lu\n", __entry->pid, 188 TP_printk("(%u) acquires %d\n", __entry->pid, __entry->lid)
202 __entry->lid, __entry->when)
203); 189);
204 190
205TRACE_EVENT(litmus_resource_release, 191TRACE_EVENT(litmus_resource_release,
@@ -211,17 +197,39 @@ TRACE_EVENT(litmus_resource_release,
211 TP_STRUCT__entry( 197 TP_STRUCT__entry(
212 __field( pid_t, pid ) 198 __field( pid_t, pid )
213 __field( int, lid ) 199 __field( int, lid )
214 __field( unsigned long long, when )
215 ), 200 ),
216 201
217 TP_fast_assign( 202 TP_fast_assign(
218 __entry->pid = t ? t->pid : 0; 203 __entry->pid = t ? t->pid : 0;
219 __entry->lid = lid; 204 __entry->lid = lid;
220 __entry->when = litmus_clock();
221 ), 205 ),
222 206
223 TP_printk("(%u) releases %d: %Lu\n", __entry->pid, 207 TP_printk("(%u) releases %d\n", __entry->pid,
224 __entry->lid, __entry->when) 208 __entry->lid)
209);
210
211TRACE_EVENT(litmus_priority_donate,
212
213 TP_PROTO(struct task_struct *t, struct task_struct *donor, int lid),
214
215 TP_ARGS(t, donor, lid),
216
217 TP_STRUCT__entry(
218 __field( pid_t, t_pid )
219 __field( pid_t, d_pid )
220 __field( unsigned long long, prio)
221 __field( int, lid )
222 ),
223
224 TP_fast_assign(
225 __entry->t_pid = t ? t->pid : 0;
226 __entry->d_pid = donor ? donor->pid : 0;
227 __entry->prio = get_deadline(donor);
228 __entry->lid = lid;
229 ),
230
231 TP_printk("(%u) inherits %llu from (%u) on %d\n", __entry->t_pid,
232 __entry->d_pid, __entry->prio, __entry->lid)
225); 233);
226 234
227/* 235/*
@@ -237,19 +245,16 @@ TRACE_EVENT(litmus_task_resume,
237 __field( pid_t, pid ) 245 __field( pid_t, pid )
238 __field( int, lid ) 246 __field( int, lid )
239 __field( unsigned int, job ) 247 __field( unsigned int, job )
240 __field( unsigned long long, when )
241 ), 248 ),
242 249
243 TP_fast_assign( 250 TP_fast_assign(
244 __entry->pid = t ? t->pid : 0; 251 __entry->pid = t ? t->pid : 0;
245 __entry->job = t ? t->rt_param.job_params.job_no : 0; 252 __entry->job = t ? t->rt_param.job_params.job_no : 0;
246 __entry->when = litmus_clock();
247 __entry->lid = lid; 253 __entry->lid = lid;
248 ), 254 ),
249 255
250 TP_printk("resume(job(%u, %u)) on %d: %Lu\n", 256 TP_printk("resume(job(%u, %u)) on %d\n",
251 __entry->pid, __entry->job, 257 __entry->pid, __entry->job, __entry->lid)
252 __entry->lid, __entry->when)
253); 258);
254 259
255/* 260/*
@@ -263,15 +268,13 @@ TRACE_EVENT(litmus_sys_release,
263 268
264 TP_STRUCT__entry( 269 TP_STRUCT__entry(
265 __field( unsigned long long, rel ) 270 __field( unsigned long long, rel )
266 __field( unsigned long long, when )
267 ), 271 ),
268 272
269 TP_fast_assign( 273 TP_fast_assign(
270 __entry->rel = *start; 274 __entry->rel = *start;
271 __entry->when = litmus_clock();
272 ), 275 ),
273 276
274 TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when) 277 TP_printk("SynRelease(%Lu)\n", __entry->rel)
275); 278);
276 279
277/* 280/*
@@ -344,43 +347,50 @@ TRACE_EVENT(litmus_server_param,
344 347
345TRACE_EVENT(litmus_server_switch_to, 348TRACE_EVENT(litmus_server_switch_to,
346 349
347 TP_PROTO(int sid, unsigned int job, int tid), 350 TP_PROTO(int sid, unsigned int job, int tid, unsigned int tjob),
348 351
349 TP_ARGS(sid, job, tid), 352 TP_ARGS(sid, job, tid, tjob),
350 353
351 TP_STRUCT__entry( 354 TP_STRUCT__entry(
352 __field( int, sid) 355 __field( int, sid)
353 __field( unsigned int, job) 356 __field( unsigned int, job)
354 __field( int, tid) 357 __field( int, tid)
358 __field( unsigned int, tjob)
355 ), 359 ),
356 360
357 TP_fast_assign( 361 TP_fast_assign(
358 __entry->sid = sid; 362 __entry->sid = sid;
359 __entry->tid = tid; 363 __entry->tid = tid;
360 __entry->job = job; 364 __entry->job = job;
365 __entry->tjob = tjob;
361 ), 366 ),
362 367
363 TP_printk("switch_to(server(%d, %u)): %d\n", __entry->sid, __entry->job, __entry->tid) 368 TP_printk("switch_to(server(%d, %u)): (%d, %d)\n",
369 __entry->sid, __entry->job, __entry->tid, __entry->tjob)
364); 370);
365 371
366TRACE_EVENT(litmus_server_switch_away, 372TRACE_EVENT(litmus_server_switch_away,
367 373
368 TP_PROTO(int sid, unsigned int job, int tid), 374 TP_PROTO(int sid, unsigned int job, int tid, unsigned int tjob),
369 375
370 TP_ARGS(sid, job, tid), 376 TP_ARGS(sid, job, tid, tjob),
371 377
372 TP_STRUCT__entry( 378 TP_STRUCT__entry(
373 __field( int, sid) 379 __field( int, sid)
374 __field( unsigned int, job) 380 __field( unsigned int, job)
375 __field( int, tid) 381 __field( int, tid)
382 __field( unsigned int, tjob)
376 ), 383 ),
377 384
378 TP_fast_assign( 385 TP_fast_assign(
379 __entry->sid = sid; 386 __entry->sid = sid;
380 __entry->tid = tid; 387 __entry->tid = tid;
388 __entry->job = job;
389 __entry->tjob = tjob;
381 ), 390 ),
382 391
383 TP_printk("switch_away(server(%d, %u)): %d\n", __entry->sid, __entry->job, __entry->tid) 392 TP_printk("switch_away(server(%d, %u)): (%d, %d)\n",
393 __entry->sid, __entry->job, __entry->tid, __entry->tjob)
384); 394);
385 395
386TRACE_EVENT(litmus_server_release, 396TRACE_EVENT(litmus_server_release,
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 11e896903828..6cf73d371203 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1037,6 +1037,7 @@ void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
1037{ 1037{
1038 memset(info, 0, sizeof(struct hrtimer_start_on_info)); 1038 memset(info, 0, sizeof(struct hrtimer_start_on_info));
1039 atomic_set(&info->state, HRTIMER_START_ON_INACTIVE); 1039 atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
1040 INIT_LIST_HEAD(&info->list);
1040} 1041}
1041 1042
1042/** 1043/**
@@ -1055,12 +1056,32 @@ void hrtimer_pull(void)
1055 list_for_each_safe(pos, safe, &list) { 1056 list_for_each_safe(pos, safe, &list) {
1056 info = list_entry(pos, struct hrtimer_start_on_info, list); 1057 info = list_entry(pos, struct hrtimer_start_on_info, list);
1057 TRACE("pulled timer 0x%x\n", info->timer); 1058 TRACE("pulled timer 0x%x\n", info->timer);
1058 list_del(pos); 1059 list_del_init(pos);
1059 hrtimer_start(info->timer, info->time, info->mode); 1060 if (!info->timer) continue;
1061 if (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE)
1062 hrtimer_start(info->timer, info->time, info->mode);
1063 if (atomic_read(&info->state) == HRTIMER_START_ON_INACTIVE)
1064 hrtimer_cancel(info->timer);
1060 } 1065 }
1061} 1066}
1062 1067
1063/** 1068/**
1069 * hrtimer_pull_cancel - Cancel a remote timer pull
1070 */
1071int hrtimer_pull_cancel(int cpu, struct hrtimer *timer,
1072 struct hrtimer_start_on_info *info)
1073{
1074 struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
1075
1076 raw_spin_lock(&base->lock);
1077 list_del_init(&info->list);
1078 raw_spin_unlock(&base->lock);
1079
1080 atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
1081 return hrtimer_try_to_cancel(timer);
1082}
1083
1084/**
1064 * hrtimer_start_on - trigger timer arming on remote cpu 1085 * hrtimer_start_on - trigger timer arming on remote cpu
1065 * @cpu: remote cpu 1086 * @cpu: remote cpu
1066 * @info: save timer information for enqueuing on remote cpu 1087 * @info: save timer information for enqueuing on remote cpu
@@ -1069,8 +1090,8 @@ void hrtimer_pull(void)
1069 * @mode: timer mode 1090 * @mode: timer mode
1070 */ 1091 */
1071int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info, 1092int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
1072 struct hrtimer *timer, ktime_t time, 1093 struct hrtimer *timer, ktime_t time,
1073 const enum hrtimer_mode mode) 1094 const enum hrtimer_mode mode)
1074{ 1095{
1075 unsigned long flags; 1096 unsigned long flags;
1076 struct hrtimer_cpu_base* base; 1097 struct hrtimer_cpu_base* base;
@@ -1102,7 +1123,8 @@ int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
1102 __hrtimer_start_range_ns(info->timer, info->time, 1123 __hrtimer_start_range_ns(info->timer, info->time,
1103 0, info->mode, 0); 1124 0, info->mode, 0);
1104 } else { 1125 } else {
1105 TRACE("hrtimer_start_on: pulling to remote CPU\n"); 1126 TRACE("hrtimer_start_on: pulling 0x%x to remote CPU\n",
1127 info->timer);
1106 base = &per_cpu(hrtimer_bases, cpu); 1128 base = &per_cpu(hrtimer_bases, cpu);
1107 raw_spin_lock_irqsave(&base->lock, flags); 1129 raw_spin_lock_irqsave(&base->lock, flags);
1108 was_empty = list_empty(&base->to_pull); 1130 was_empty = list_empty(&base->to_pull);
diff --git a/kernel/sched.c b/kernel/sched.c
index 65aba7ec564d..2739b3339ffb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4338,6 +4338,9 @@ need_resched:
4338 */ 4338 */
4339litmus_need_resched_nonpreemptible: 4339litmus_need_resched_nonpreemptible:
4340 TS_SCHED_START; 4340 TS_SCHED_START;
4341 TS_LVLA_SCHED_START;
4342 TS_LVLB_SCHED_START;
4343 TS_LVLC_SCHED_START;
4341 sched_trace_task_switch_away(prev); 4344 sched_trace_task_switch_away(prev);
4342 4345
4343 schedule_debug(prev); 4346 schedule_debug(prev);
@@ -4396,6 +4399,9 @@ litmus_need_resched_nonpreemptible:
4396 rq->curr = next; 4399 rq->curr = next;
4397 ++*switch_count; 4400 ++*switch_count;
4398 4401
4402 TS_LVLA_SCHED_END(next);
4403 TS_LVLB_SCHED_END(next);
4404 TS_LVLC_SCHED_END(next);
4399 TS_SCHED_END(next); 4405 TS_SCHED_END(next);
4400 TS_CXS_START(next); 4406 TS_CXS_START(next);
4401 context_switch(rq, prev, next); /* unlocks the rq */ 4407 context_switch(rq, prev, next); /* unlocks the rq */
diff --git a/litmus/Kconfig b/litmus/Kconfig
index f2dbfb396883..91bf81ea9fae 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -12,6 +12,19 @@ config PLUGIN_CEDF
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF 12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches. 13 makes little sense since there aren't any shared caches.
14 14
15config PLUGIN_COLOR
16 bool "Scheduling with Colors"
17 default y
18 help
19 Include the scheduling with colors scheduler.
20
21config PLUGIN_COLOR_UNCACHABLE
22 bool "Colored memory is not cachable"
23 depends on PLUGIN_COLOR && X86_PAT
24 default n
25 help
26 Any memory allocated to the color plugin is not CPU cached.
27
15config PLUGIN_PFAIR 28config PLUGIN_PFAIR
16 bool "PFAIR" 29 bool "PFAIR"
17 depends on HIGH_RES_TIMERS && !NO_HZ 30 depends on HIGH_RES_TIMERS && !NO_HZ
@@ -23,16 +36,67 @@ config PLUGIN_PFAIR
23 36
24 If unsure, say Yes. 37 If unsure, say Yes.
25 38
39config MERGE_TIMERS
40 bool "Timer-merging Support"
41 depends on HIGH_RES_TIMERS
42 default y
43 help
44 Include support for merging timers.
45
46config MERGE_TIMERS_WINDOW
47 int "Timer-merging Window (in nanoseconds)"
48 depends on MERGE_TIMERS
49 default 1000
50 help
51 Window within which seperate timers may be merged.
52
26config RELEASE_MASTER 53config RELEASE_MASTER
27 bool "Release-master Support" 54 bool "Release-master Support"
28 depends on ARCH_HAS_SEND_PULL_TIMERS 55 depends on ARCH_HAS_SEND_PULL_TIMERS
29 default n 56 default n
30 help 57 help
31 Allow one processor to act as a dedicated interrupt processor 58 In GSN-EDF, allow one processor to act as a dedicated interrupt
32 that services all timer interrupts, but that does not schedule 59 processor that services all timer interrupts, but that does not schedule
33 real-time tasks. See RTSS'09 paper for details 60 real-time tasks. See RTSS'09 paper for details
34 (http://www.cs.unc.edu/~anderson/papers.html). 61 (http://www.cs.unc.edu/~anderson/papers.html).
35 Currently only supported by GSN-EDF. 62
63menu "Mixed Criticality"
64
65config PLUGIN_MC
66 bool "Mixed Criticality Scheduler"
67 depends on X86 && SYSFS
68 default y
69 help
70 Include the mixed criticality scheduler. This plugin depends
71 on the global release-master processor for its _REDIRECT and
72 _RELEASE_MASTER options.
73
74 If unsure, say Yes.
75
76config PLUGIN_MC_LEVEL_A_MAX_TASKS
77 int "Maximum level A tasks"
78 depends on PLUGIN_MC
79 range 1 128
80 default 32
81 help
82 The maximum number of level A tasks allowed (per-cpu) in level A.
83
84config PLUGIN_MC_RELEASE_MASTER
85 bool "Release-master support for MC"
86 depends on PLUGIN_MC && RELEASE_MASTER
87 default y
88 help
89 Send all timer interrupts to the system-wide release-master CPU.
90
91config PLUGIN_MC_REDIRECT
92 bool "Redirect Work to Release-master"
93 depends on PLUGIN_MC && RELEASE_MASTER
94 default y
95 help
96 Allow processors to send work involving global state to the
97 release-master cpu in order to avoid excess overheads during
98 partitioned decisions.
99endmenu
36 100
37endmenu 101endmenu
38 102
@@ -51,7 +115,6 @@ config NP_SECTION
51 115
52config LITMUS_LOCKING 116config LITMUS_LOCKING
53 bool "Support for real-time locking protocols" 117 bool "Support for real-time locking protocols"
54 depends on NP_SECTION
55 default n 118 default n
56 help 119 help
57 Enable LITMUS^RT's deterministic multiprocessor real-time 120 Enable LITMUS^RT's deterministic multiprocessor real-time
@@ -167,7 +230,7 @@ config SCHED_TASK_TRACE
167config SCHED_TASK_TRACE_SHIFT 230config SCHED_TASK_TRACE_SHIFT
168 int "Buffer size for sched_trace_xxx() events" 231 int "Buffer size for sched_trace_xxx() events"
169 depends on SCHED_TASK_TRACE 232 depends on SCHED_TASK_TRACE
170 range 8 13 233 range 8 20
171 default 9 234 default 9
172 help 235 help
173 236
@@ -226,7 +289,7 @@ config SCHED_DEBUG_TRACE
226config SCHED_DEBUG_TRACE_SHIFT 289config SCHED_DEBUG_TRACE_SHIFT
227 int "Buffer size for TRACE() buffer" 290 int "Buffer size for TRACE() buffer"
228 depends on SCHED_DEBUG_TRACE 291 depends on SCHED_DEBUG_TRACE
229 range 14 22 292 range 14 24
230 default 18 293 default 18
231 help 294 help
232 295
diff --git a/litmus/Makefile b/litmus/Makefile
index d26ca7076b62..76a07e8531c6 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -3,30 +3,41 @@
3# 3#
4 4
5obj-y = sched_plugin.o litmus.o \ 5obj-y = sched_plugin.o litmus.o \
6 preempt.o \ 6 bheap.o \
7 litmus_proc.o \ 7 binheap.o \
8 budget.o \ 8 budget.o \
9 clustered.o \ 9 clustered.o \
10 jobs.o \ 10 color.o \
11 sync.o \ 11 color_dev.o \
12 rt_domain.o \ 12 color_proc.o \
13 ctrldev.o \
14 dgl.o \
15 domain.o \
13 edf_common.o \ 16 edf_common.o \
14 fp_common.o \
15 fdso.o \ 17 fdso.o \
18 fifo_common.o \
19 fp_common.o \
20 jobs.o \
21 litmus_proc.o \
16 locking.o \ 22 locking.o \
17 srp.o \ 23 preempt.o \
18 bheap.o \ 24 rm_common.o \
19 binheap.o \ 25 rt_domain.o \
20 ctrldev.o \ 26 rt_server.o \
21 sched_gsn_edf.o \ 27 sched_gsn_edf.o \
28 sched_pfp.o \
22 sched_psn_edf.o \ 29 sched_psn_edf.o \
23 sched_pfp.o 30 srp.o \
31 sync.o
24 32
25obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 33obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
26obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 34obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
35obj-$(CONFIG_PLUGIN_COLOR) += sched_color.o
27obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o 36obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
28 37obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o ce_domain.o
38obj-$(CONFIG_MERGE_TIMERS) += event_group.o
29obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 39obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
30obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 40obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 41obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 42obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
43
diff --git a/litmus/bheap.c b/litmus/bheap.c
index 528af97f18a6..42122d86be4c 100644
--- a/litmus/bheap.c
+++ b/litmus/bheap.c
@@ -248,13 +248,14 @@ int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
248void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap, 248void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
249 struct bheap_node* node) 249 struct bheap_node* node)
250{ 250{
251 struct bheap_node *parent, *prev, *pos; 251 struct bheap_node *parent, *prev, *pos, *old;
252 struct bheap_node** tmp_ref; 252 struct bheap_node** tmp_ref;
253 void* tmp; 253 void* tmp;
254 254
255 if (heap->min != node) { 255 if (heap->min != node) {
256 /* bubble up */ 256 /* bubble up */
257 parent = node->parent; 257 parent = node->parent;
258 old = node;
258 while (parent) { 259 while (parent) {
259 /* swap parent and node */ 260 /* swap parent and node */
260 tmp = parent->value; 261 tmp = parent->value;
diff --git a/litmus/budget.c b/litmus/budget.c
index f7712be29adb..f7505b0f86e5 100644
--- a/litmus/budget.c
+++ b/litmus/budget.c
@@ -4,19 +4,12 @@
4 4
5#include <litmus/litmus.h> 5#include <litmus/litmus.h>
6#include <litmus/preempt.h> 6#include <litmus/preempt.h>
7
8#include <litmus/budget.h> 7#include <litmus/budget.h>
9 8#include <litmus/sched_trace.h>
10struct enforcement_timer {
11 /* The enforcement timer is used to accurately police
12 * slice budgets. */
13 struct hrtimer timer;
14 int armed;
15};
16 9
17DEFINE_PER_CPU(struct enforcement_timer, budget_timer); 10DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
18 11
19static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) 12enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
20{ 13{
21 struct enforcement_timer* et = container_of(timer, 14 struct enforcement_timer* et = container_of(timer,
22 struct enforcement_timer, 15 struct enforcement_timer,
@@ -34,7 +27,7 @@ static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
34} 27}
35 28
36/* assumes called with IRQs off */ 29/* assumes called with IRQs off */
37static void cancel_enforcement_timer(struct enforcement_timer* et) 30void cancel_enforcement_timer(struct enforcement_timer* et)
38{ 31{
39 int ret; 32 int ret;
40 33
@@ -56,11 +49,10 @@ static void cancel_enforcement_timer(struct enforcement_timer* et)
56} 49}
57 50
58/* assumes called with IRQs off */ 51/* assumes called with IRQs off */
59static void arm_enforcement_timer(struct enforcement_timer* et, 52void arm_enforcement_timer(struct enforcement_timer* et,
60 struct task_struct* t) 53 struct task_struct* t)
61{ 54{
62 lt_t when_to_fire; 55 lt_t when_to_fire;
63 TRACE_TASK(t, "arming enforcement timer.\n");
64 56
65 /* Calling this when there is no budget left for the task 57 /* Calling this when there is no budget left for the task
66 * makes no sense, unless the task is non-preemptive. */ 58 * makes no sense, unless the task is non-preemptive. */
@@ -69,8 +61,11 @@ static void arm_enforcement_timer(struct enforcement_timer* et,
69 /* __hrtimer_start_range_ns() cancels the timer 61 /* __hrtimer_start_range_ns() cancels the timer
70 * anyway, so we don't have to check whether it is still armed */ 62 * anyway, so we don't have to check whether it is still armed */
71 63
72 if (likely(!is_np(t))) { 64 if (likely(!is_user_np(t))) {
73 when_to_fire = litmus_clock() + budget_remaining(t); 65 when_to_fire = litmus_clock() + budget_remaining(t);
66 TRACE_TASK(t, "arming enforcement timer for %llu.\n",
67 when_to_fire);
68
74 __hrtimer_start_range_ns(&et->timer, 69 __hrtimer_start_range_ns(&et->timer,
75 ns_to_ktime(when_to_fire), 70 ns_to_ktime(when_to_fire),
76 0 /* delta */, 71 0 /* delta */,
@@ -96,6 +91,11 @@ void update_enforcement_timer(struct task_struct* t)
96 } 91 }
97} 92}
98 93
94void init_enforcement_timer(struct enforcement_timer *et)
95{
96 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
97 et->timer.function = on_enforcement_timeout;
98}
99 99
100static int __init init_budget_enforcement(void) 100static int __init init_budget_enforcement(void)
101{ 101{
@@ -104,10 +104,33 @@ static int __init init_budget_enforcement(void)
104 104
105 for (cpu = 0; cpu < NR_CPUS; cpu++) { 105 for (cpu = 0; cpu < NR_CPUS; cpu++) {
106 et = &per_cpu(budget_timer, cpu); 106 et = &per_cpu(budget_timer, cpu);
107 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 107 init_enforcement_timer(et);
108 et->timer.function = on_enforcement_timeout;
109 } 108 }
110 return 0; 109 return 0;
111} 110}
112 111
112void task_release(struct task_struct *t)
113{
114 /* Also wrong */
115 t->rt_param.job_params.real_release = t->rt_param.job_params.real_deadline;
116 t->rt_param.job_params.real_deadline += get_rt_period(t);
117 t->rt_param.job_params.job_no++;
118 sched_trace_task_release(t);
119}
120
121void server_release(struct task_struct *t)
122{
123 t->rt_param.job_params.exec_time = 0;
124 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
125 t->rt_param.job_params.deadline += get_rt_period(t);
126 t->rt_param.job_params.fake_job_no++;
127
128 /* don't confuse linux */
129 t->rt.time_slice = 1;
130
131 sched_trace_server_release(-t->pid, get_server_job(t),
132 t->rt_param.job_params.release,
133 t->rt_param.job_params.deadline);
134}
135
113module_init(init_budget_enforcement); 136module_init(init_budget_enforcement);
diff --git a/litmus/ce_domain.c b/litmus/ce_domain.c
new file mode 100644
index 000000000000..b2c5d4e935a5
--- /dev/null
+++ b/litmus/ce_domain.c
@@ -0,0 +1,102 @@
1#include <linux/pid.h>
2#include <linux/sched.h>
3#include <linux/hrtimer.h>
4#include <linux/slab.h>
5
6#include <litmus/litmus.h>
7#include <litmus/debug_trace.h>
8#include <litmus/rt_param.h>
9#include <litmus/domain.h>
10#include <litmus/event_group.h>
11#include <litmus/sched_mc.h>
12#include <litmus/ce_domain.h>
13
14/*
15 * Called for:
16 * task_new
17 * job_completion
18 * wake_up
19 */
20void ce_requeue(domain_t *dom, struct task_struct *ts)
21{
22 const struct ce_dom_data *ce_data = dom->data;
23 const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
24 const unsigned int just_finished = tsk_rt(ts)->job_params.job_no;
25 const unsigned int expected_job =
26 mc_ce_get_expected_job(ce_data->cpu, idx);
27 const int asleep = RT_F_SLEEP == get_rt_flags(ts);
28
29 TRACE_MC_TASK(ts, "entered ce_requeue. asleep: %d just_finished: %3u "
30 "expected_job: %3u\n",
31 asleep, just_finished, expected_job);
32
33 tsk_mc_data(ts)->mc_task.lvl_a_eligible = 1;
34
35 /* When coming from job completion, the task will be asleep. */
36 if (asleep && just_finished < expected_job) {
37 TRACE_MC_TASK(ts, "appears behind\n");
38 } else if (asleep && expected_job < just_finished) {
39 TRACE_MC_TASK(ts, "job %u completed in expected job %u which "
40 "seems too early\n", just_finished,
41 expected_job);
42 }
43}
44
45/*
46 *
47 */
48void ce_remove(domain_t *dom, struct task_struct *ts)
49{
50 tsk_mc_data(ts)->mc_task.lvl_a_eligible = 0;
51}
52
53/*
54 * ce_take_ready and ce_peek_ready
55 */
56struct task_struct* ce_peek_and_take_ready(domain_t *dom)
57{
58 const struct ce_dom_data *ce_data = dom->data;
59 struct task_struct *ret = NULL, *sched = ce_data->should_schedule;
60 const int exists = NULL != sched;
61 const int blocked = exists && !is_running(sched);
62 const int elig = exists && tsk_mc_data(sched) &&
63 tsk_mc_data(sched)->mc_task.lvl_a_eligible;
64
65 /* Return the task we should schedule if it is not blocked or sleeping. */
66 if (exists && !blocked && elig)
67 ret = sched;
68 return ret;
69}
70
71int ce_higher_prio(struct task_struct *a, struct task_struct *b)
72{
73 const domain_t *dom = get_task_domain(a);
74 const struct ce_dom_data *ce_data = dom->data;
75 return (a != b && a == ce_data->should_schedule);
76}
77
78void ce_domain_init(domain_t *dom,
79 raw_spinlock_t *lock,
80 requeue_t requeue,
81 peek_ready_t peek_ready,
82 take_ready_t take_ready,
83 preempt_needed_t preempt_needed,
84 task_prio_t task_prio,
85 struct ce_dom_data *dom_data,
86 const int cpu,
87 ce_timer_callback_t ce_timer_callback)
88{
89 domain_init(dom, lock, requeue, peek_ready, take_ready, preempt_needed,
90 task_prio);
91 dom->data = dom_data;
92 dom->remove = ce_remove;
93 dom_data->cpu = cpu;
94#ifdef CONFIG_MERGE_TIMERS
95 init_event(&dom_data->event, CRIT_LEVEL_A, ce_timer_callback,
96 event_list_alloc(GFP_ATOMIC));
97#else
98 hrtimer_start_on_info_init(&dom_data->timer_info);
99 hrtimer_init(&dom_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
100 dom_data->timer.function = ce_timer_callback;
101#endif
102}
diff --git a/litmus/color.c b/litmus/color.c
new file mode 100644
index 000000000000..ecc191137137
--- /dev/null
+++ b/litmus/color.c
@@ -0,0 +1,357 @@
1#include <linux/spinlock.h>
2
3#include <linux/module.h>
4#include <linux/mm.h>
5#include <linux/slab.h>
6#include <linux/sysctl.h>
7#include <linux/lockdep.h>
8#include <linux/sched.h> /* required by litmus.h */
9#include <asm/io.h> /* page_to_phys on SPARC */
10
11#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
12#include <asm/cacheflush.h> /* set_memory_uc */
13#endif
14
15#include <litmus/color.h>
16#include <litmus/litmus.h> /* for in_list(...) */
17
18#define PAGES_PER_COLOR 3072
19
20/*
21 * This is used only to "trick" lockdep into permitting dynamically allocated
22 * locks of different classes that are initialized on the same line.
23 */
24#define LOCKDEP_MAX_NR_COLORS 512
25static struct lock_class_key color_lock_keys[LOCKDEP_MAX_NR_COLORS];
26
27struct color_group {
28 spinlock_t lock;
29 char _lock_name[LITMUS_LOCKDEP_NAME_MAX_LEN];
30 struct list_head list;
31 atomic_t nr_pages;
32};
33
34static unsigned long color_mask;
35static struct color_group *color_groups;
36
37
38/* non-static: extern'ed in various files */
39struct color_cache_info color_cache_info;
40int color_sysctl_add_pages_data;
41
42static inline unsigned long page_color(struct page *page)
43{
44 return ((page_to_phys(page) & color_mask) >> PAGE_SHIFT);
45}
46
47/*
48 * Page's count should be one, it sould not be on any LRU list.
49 */
50void add_page_to_color_list(struct page *page)
51{
52 const unsigned long color = page_color(page);
53 struct color_group *cgroup = &color_groups[color];
54 BUG_ON(in_list(&page->lru) || PageLRU(page));
55 BUG_ON(page_count(page) > 1);
56 spin_lock(&cgroup->lock);
57 list_add_tail(&page->lru, &cgroup->list);
58 atomic_inc(&cgroup->nr_pages);
59 SetPageLRU(page);
60 spin_unlock(&cgroup->lock);
61}
62
63/*
64 * Increase's page's count to two.
65 */
66struct page* get_colored_page(unsigned long color)
67{
68 struct color_group *cgroup;
69 struct page *page = NULL;
70
71 if (color >= color_cache_info.nr_colors)
72 goto out;
73
74 cgroup = &color_groups[color];
75 spin_lock(&cgroup->lock);
76 if (unlikely(!atomic_read(&cgroup->nr_pages))) {
77 TRACE_CUR("No free %lu colored pages.\n", color);
78 printk(KERN_WARNING "no free %lu colored pages.\n", color);
79 goto out_unlock;
80 }
81 page = list_first_entry(&cgroup->list, struct page, lru);
82 BUG_ON(page_count(page) > 1);
83 get_page(page);
84 list_del(&page->lru);
85 atomic_dec(&cgroup->nr_pages);
86 ClearPageLRU(page);
87out_unlock:
88 spin_unlock(&cgroup->lock);
89out:
90 return page;
91}
92
93static unsigned long smallest_nr_pages(void)
94{
95 unsigned long i, min_pages = -1;
96 struct color_group *cgroup;
97 for (i = 0; i < color_cache_info.nr_colors; ++i) {
98 cgroup = &color_groups[i];
99 if (atomic_read(&cgroup->nr_pages) < min_pages)
100 min_pages = atomic_read(&cgroup->nr_pages);
101 }
102 return min_pages;
103}
104
105static int do_add_pages(void)
106{
107 struct page *page, *page_tmp;
108 LIST_HEAD(free_later);
109 unsigned long color;
110 int ret = 0;
111
112 while (smallest_nr_pages() < PAGES_PER_COLOR) {
113#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
114 unsigned long vaddr;
115#endif
116
117#if defined(CONFIG_X86)
118 page = alloc_page(GFP_HIGHUSER | __GFP_ZERO |
119 __GFP_MOVABLE);
120#elif defined(CONFIG_SPARC) /* X86 */
121 page = alloc_page(GFP_HIGHUSER | __GFP_MOVABLE);
122#else
123#error What architecture are you using?
124#endif
125 if (unlikely(!page)) {
126 printk(KERN_WARNING "Could not allocate pages.\n");
127 ret = -ENOMEM;
128 goto out;
129 }
130 color = page_color(page);
131 if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR) {
132 SetPageReserved(page);
133#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
134 vaddr = (unsigned long) pfn_to_kaddr(page_to_pfn(page));
135 if (set_memory_uc(vaddr, 1)) {
136 printk(KERN_ALERT "Could not set_memory_uc\n");
137 BUG();
138 }
139#endif
140 add_page_to_color_list(page);
141 } else
142 list_add_tail(&page->lru, &free_later);
143 }
144 list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
145 list_del(&page->lru);
146 __free_page(page);
147 }
148out:
149 return ret;
150}
151
152static struct alloced_pages {
153 spinlock_t lock;
154 struct list_head list;
155} alloced_pages;
156
157struct alloced_page {
158 struct page *page;
159 struct vm_area_struct *vma;
160 struct list_head list;
161};
162
163static struct alloced_page * new_alloced_page(struct page *page,
164 struct vm_area_struct *vma)
165{
166 struct alloced_page *ap = kmalloc(sizeof(*ap), GFP_KERNEL);
167 INIT_LIST_HEAD(&ap->list);
168 ap->page = page;
169 ap->vma = vma;
170 return ap;
171}
172
173/*
174 * Page's count should be two or more. It should not be on aly LRU list.
175 */
176void add_page_to_alloced_list(struct page *page, struct vm_area_struct *vma)
177{
178 struct alloced_page *ap;
179
180 BUG_ON(page_count(page) < 2);
181 ap = new_alloced_page(page, vma);
182 spin_lock(&alloced_pages.lock);
183 list_add_tail(&ap->list, &alloced_pages.list);
184 spin_unlock(&alloced_pages.lock);
185}
186
187/*
188 * Reclaim pages.
189 */
190void reclaim_pages(struct vm_area_struct *vma)
191{
192 struct alloced_page *ap, *ap_tmp;
193 unsigned long nr_reclaimed = 0;
194 spin_lock(&alloced_pages.lock);
195 list_for_each_entry_safe(ap, ap_tmp, &alloced_pages.list, list) {
196 if (vma == ap->vma) {
197 list_del(&ap->list);
198 put_page(ap->page);
199 add_page_to_color_list(ap->page);
200 nr_reclaimed++;
201 TRACE_CUR("reclaiming page (pa:0x%10llx, pfn:%8lu, "
202 "color:%3lu)\n", page_to_phys(ap->page),
203 page_to_pfn(ap->page), page_color(ap->page));
204 kfree(ap);
205 }
206 }
207 spin_unlock(&alloced_pages.lock);
208 TRACE_CUR("Reclaimed %lu pages.\n", nr_reclaimed);
209}
210
211/***********************************************************
212 * Proc
213***********************************************************/
214
215int color_add_pages_handler(struct ctl_table *table, int write, void __user *buffer,
216 size_t *lenp, loff_t *ppos)
217{
218 int ret = 0;
219 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
220 if (ret)
221 goto out;
222 if (write && color_sysctl_add_pages_data)
223 ret = do_add_pages();
224out:
225 return ret;
226}
227
228
229int color_nr_pages_handler(struct ctl_table *table, int write, void __user *buffer,
230 size_t *lenp, loff_t *ppos)
231{
232 struct color_group *cgroup;
233 char *buf;
234 unsigned long i;
235 int used = 0, ret = 0;
236
237 if (write) {
238 ret = -EPERM;
239 goto out;
240 }
241 for (i = 0; i < color_cache_info.nr_colors; ++i) {
242 cgroup = &color_groups[i];
243 buf = ((char*)table->data) + used;
244 used += snprintf(buf, table->maxlen - used, ONE_COLOR_FMT,
245 i, atomic_read(&cgroup->nr_pages));
246 }
247 ret = proc_dostring(table, write, buffer, lenp, ppos);
248out:
249 return ret;
250}
251
252/***********************************************************
253 * Initialization
254***********************************************************/
255
256#if defined(CONFIG_X86)
257/* slowest possible way to find a log, but we only do this once on boot */
258static unsigned int __init slow_log(unsigned int v)
259{
260 unsigned int r = 0;
261 while (v >>= 1)
262 r++;
263 return r;
264}
265
266static int __init init_mask(void)
267{
268 unsigned int line_size_log = slow_log(color_cache_info.line_size);
269 int err = 0;
270
271 BUG_ON(color_cache_info.size <= 1048576 ||
272 color_cache_info.ways < 15 ||
273 color_cache_info.line_size != 64);
274
275 printk("Cache size: %lu line-size: %lu ways: %lu sets: %lu\n",
276 color_cache_info.size, color_cache_info.line_size,
277 color_cache_info.ways, color_cache_info.sets);
278 if (!color_cache_info.size) {
279 printk(KERN_WARNING "No cache information found.\n");
280 err = -EINVAL;
281 goto out;
282 }
283
284
285 BUG_ON(color_cache_info.size / color_cache_info.line_size /
286 color_cache_info.ways != color_cache_info.sets);
287 BUG_ON(PAGE_SIZE >= (color_cache_info.sets << line_size_log));
288 color_mask = ((color_cache_info.sets << line_size_log) - 1) ^
289 (PAGE_SIZE - 1);
290 color_cache_info.nr_colors = (color_mask >> PAGE_SHIFT) + 1;
291out:
292 return err;
293}
294#elif defined(CONFIG_SPARC) /* X86 */
295static int __init init_mask(void)
296{
297 /*
298 * Static assuming we are using Flare (our Niagara machine).
299 * This machine has weirdness with cache banks, and I don't want
300 * to waste time trying to auto-detect this.
301 */
302 color_mask = 0x3e000UL; /* bits 17:13 */
303 color_cache_info.size = 3 * 1024 * 1024; /* 3 MB */
304 color_cache_info.line_size = 64;
305 color_cache_info.ways = 12;
306 color_cache_info.sets = 1024 * 4;
307 color_cache_info.nr_colors = (1 << hweight_long(color_mask));
308 return 0;
309}
310#endif /* SPARC/X86 */
311
312
313
314static int __init init_color_groups(void)
315{
316 struct color_group *cgroup;
317 unsigned long i;
318 int err = 0;
319
320 color_groups = kmalloc(color_cache_info.nr_colors *
321 sizeof(struct color_group), GFP_KERNEL);
322 if (!color_groups) {
323 printk(KERN_WARNING "Could not allocate color groups.\n");
324 err = -ENOMEM;
325 goto out;
326 }
327
328 for (i = 0; i < color_cache_info.nr_colors; ++i) {
329 cgroup = &color_groups[i];
330 atomic_set(&cgroup->nr_pages, 0);
331 INIT_LIST_HEAD(&cgroup->list);
332 spin_lock_init(&cgroup->lock);
333 LOCKDEP_DYNAMIC_ALLOC(&cgroup->lock, &color_lock_keys[i],
334 cgroup->_lock_name, "color%lu", i);
335 }
336out:
337 return err;
338}
339
340static int __init init_color(void)
341{
342 int err = 0;
343 printk("Initializing LITMUS^RT cache coloring.\n");
344
345 INIT_LIST_HEAD(&alloced_pages.list);
346 spin_lock_init(&alloced_pages.lock);
347
348 err = init_mask();
349 printk("PAGE_SIZE: %lu Color mask: 0x%lx Total colors: %lu\n",
350 PAGE_SIZE, color_mask, color_cache_info.nr_colors);
351
352 BUG_ON(LOCKDEP_MAX_NR_COLORS < color_cache_info.nr_colors);
353 err = init_color_groups();
354 return err;
355}
356
357module_init(init_color);
diff --git a/litmus/color_dev.c b/litmus/color_dev.c
new file mode 100644
index 000000000000..51760328418e
--- /dev/null
+++ b/litmus/color_dev.c
@@ -0,0 +1,351 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/spinlock.h>
6#include <linux/module.h>
7#include <linux/highmem.h>
8#include <asm/io.h> /* page_to_phys on SPARC */
9
10#include <litmus/litmus.h>
11#include <litmus/color.h>
12
13#define ALLOC_NAME "litmus/color_alloc"
14#define CTRL_NAME "litmus/color_ctrl"
15
16extern unsigned long nr_colors;
17
18/***********************************************************
19 * Control device
20***********************************************************/
21
22static void litmus_color_ctrl_vm_close(struct vm_area_struct *vma)
23{
24 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__,
25 vma->vm_flags, pgprot_val(vma->vm_page_prot));
26
27 TRACE_CUR(CTRL_NAME ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
28 (void*) vma->vm_start, (void*) vma->vm_end, vma,
29 vma->vm_private_data);
30}
31
32static int litmus_color_ctrl_vm_fault(struct vm_area_struct *vma,
33 struct vm_fault *vmf)
34{
35 /* This function should never be called, since
36 * all pages should have been mapped by mmap()
37 * already. */
38 TRACE_CUR("%s flags=0x%lx\n", __FUNCTION__, vma->vm_flags);
39 printk(KERN_WARNING "fault: %s flags=0x%lx\n", __FUNCTION__,
40 vma->vm_flags);
41
42 /* nope, you only get one page */
43 return VM_FAULT_SIGBUS;
44}
45
46static struct vm_operations_struct litmus_color_ctrl_vm_ops = {
47 .close = litmus_color_ctrl_vm_close,
48 .fault = litmus_color_ctrl_vm_fault,
49};
50
51static int mmap_common_checks(struct vm_area_struct *vma)
52{
53 /* you can only map the "first" page */
54 if (vma->vm_pgoff != 0)
55 return -EINVAL;
56
57#if 0
58 /* you can't share it with anyone */
59 /* well, maybe you can... */
60 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
61 return -EINVAL;
62#endif
63
64 return 0;
65}
66
67static int alloc_color_ctrl_page(void)
68{
69 struct task_struct *t;
70 int err = 0;
71
72 t = current;
73 /* only allocate if the task doesn't have one yet */
74 if (!tsk_rt(t)->color_ctrl_page) {
75 tsk_rt(t)->color_ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
76 if (!tsk_rt(t)->color_ctrl_page)
77 err = -ENOMEM;
78 /* will get de-allocated in task teardown */
79 TRACE_TASK(t, "%s color_ctrl_page = %p\n", __FUNCTION__,
80 tsk_rt(t)->color_ctrl_page);
81 }
82 return err;
83}
84
85static int map_color_ctrl_page(struct vm_area_struct *vma)
86{
87 int err;
88 unsigned long pfn;
89 struct task_struct *t = current;
90 struct page *color_ctrl = virt_to_page(tsk_rt(t)->color_ctrl_page);
91
92 t = current;
93 /* Increase ref count. Is decreased when vma is destroyed. */
94 get_page(color_ctrl);
95 pfn = page_to_pfn(color_ctrl);
96
97 TRACE_CUR(CTRL_NAME
98 ": mapping %p (pfn:%lx, %lx) to 0x%lx (flags:%lx prot:%lx)\n",
99 tsk_rt(t)->color_ctrl_page, pfn, page_to_pfn(color_ctrl),
100 vma->vm_start, vma->vm_flags, pgprot_val(vma->vm_page_prot));
101
102 /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise
103 * userspace actually gets a copy-on-write page. */
104 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
105
106 if (err)
107 TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err);
108
109 return err;
110}
111
112static int litmus_color_ctrl_mmap(struct file *filp, struct vm_area_struct *vma)
113{
114 int err = 0;
115
116 /* you can only get one page */
117 if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
118 TRACE_CUR(CTRL_NAME ": must allocate a multiple of PAGE_SIZE\n");
119 err = -EINVAL;
120 goto out;
121 }
122
123 err = mmap_common_checks(vma);
124 if (err) {
125 TRACE_CUR(CTRL_NAME ": failed common mmap checks.\n");
126 goto out;
127 }
128
129 vma->vm_ops = &litmus_color_ctrl_vm_ops;
130 /* this mapping should not be kept across forks,
131 * and cannot be expanded */
132 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
133
134 err = alloc_color_ctrl_page();
135 if (!err)
136 err = map_color_ctrl_page(vma);
137
138 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__, vma->vm_flags,
139 pgprot_val(vma->vm_page_prot));
140out:
141 return err;
142}
143
144
145/***********************************************************
146 * Allocation device
147***********************************************************/
148
149#define vma_nr_pages(vma) \
150 ({unsigned long v = ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); v;})
151
152static int do_map_colored_pages(struct vm_area_struct *vma)
153{
154 const unsigned long nr_pages = vma_nr_pages(vma);
155 struct color_ctrl_page *color_ctrl = tsk_rt(current)->color_ctrl_page;
156 unsigned long nr_mapped;
157 int i, err = 0;
158
159 TRACE_CUR(ALLOC_NAME ": allocating %lu pages (flags:%lx prot:%lx)\n",
160 nr_pages, vma->vm_flags, pgprot_val(vma->vm_page_prot));
161
162#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
163 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
164#endif
165
166 for (i = 0, nr_mapped = 0; nr_mapped < nr_pages; ++i) {
167 const unsigned long color_no = color_ctrl->colors[i];
168 unsigned int page_no = 0;
169
170 for (; page_no < color_ctrl->pages[i]; ++page_no, ++nr_mapped) {
171 const unsigned long addr = vma->vm_start +
172 (nr_mapped << PAGE_SHIFT);
173 struct page *page = get_colored_page(color_no);
174#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
175 const pgprot_t ins_prot = pgprot_noncached(PAGE_SHARED);
176#else
177 const pgprot_t ins_prot = PAGE_SHARED;
178#endif
179
180 if (!page) {
181 TRACE_CUR(ALLOC_NAME ": Could not get page with"
182 " color %lu.\n", color_no);
183 /* TODO unmap mapped pages */
184 err = -ENOMEM;
185 goto out;
186 }
187
188#ifdef CONFIG_SPARC
189 clear_user_highpage(page, addr);
190#endif
191
192 TRACE_CUR(ALLOC_NAME ": insert page (pa:0x%10llx, "
193 "pfn:%8lu, color:%3lu, prot:%lx) at 0x%lx "
194 "vma:(flags:%16lx prot:%16lx)\n",
195 page_to_phys(page),
196 page_to_pfn(page), color_no,
197 pgprot_val(ins_prot), addr,
198 vma->vm_flags,
199 pgprot_val(vma->vm_page_prot));
200
201 err = remap_pfn_range(vma, addr, page_to_pfn(page),
202 PAGE_SIZE, ins_prot);
203 if (err) {
204 TRACE_CUR(ALLOC_NAME ": remap_pfn_range() fail "
205 "(%d)\n", err);
206 /* TODO unmap mapped pages */
207 err = -EINVAL;
208 goto out;
209 }
210 add_page_to_alloced_list(page, vma);
211 }
212
213 if (!page_no) {
214 TRACE_CUR(ALLOC_NAME ": 0 pages given for color %lu\n",
215 color_no);
216 err = -EINVAL;
217 goto out;
218 }
219 }
220 out:
221 return err;
222}
223
224static int map_colored_pages(struct vm_area_struct *vma)
225{
226 int err = 0;
227
228 if (!tsk_rt(current)->color_ctrl_page) {
229 TRACE_CUR("Process has no color control page.\n");
230 err = -EINVAL;
231 goto out;
232 }
233
234 if (COLORS_PER_CONTROL_PAGE < vma_nr_pages(vma)) {
235 TRACE_CUR("Max page request %lu but want %lu.\n",
236 COLORS_PER_CONTROL_PAGE, vma_nr_pages(vma));
237 err = -EINVAL;
238 goto out;
239 }
240 err = do_map_colored_pages(vma);
241out:
242 return err;
243}
244
245static void litmus_color_alloc_vm_close(struct vm_area_struct *vma)
246{
247 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__,
248 vma->vm_flags, pgprot_val(vma->vm_page_prot));
249
250 TRACE_CUR(ALLOC_NAME ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
251 (void*) vma->vm_start, (void*) vma->vm_end, vma,
252 vma->vm_private_data);
253 reclaim_pages(vma);
254}
255
256static int litmus_color_alloc_vm_fault(struct vm_area_struct *vma,
257 struct vm_fault *vmf)
258{
259 /* This function should never be called, since
260 * all pages should have been mapped by mmap()
261 * already. */
262 TRACE_CUR("%s flags=0x%lx\n", __FUNCTION__, vma->vm_flags);
263 printk(KERN_WARNING "fault: %s flags=0x%lx\n", __FUNCTION__,
264 vma->vm_flags);
265
266 /* nope, you only get one page */
267 return VM_FAULT_SIGBUS;
268}
269
270static struct vm_operations_struct litmus_color_alloc_vm_ops = {
271 .close = litmus_color_alloc_vm_close,
272 .fault = litmus_color_alloc_vm_fault,
273};
274
275static int litmus_color_alloc_mmap(struct file *filp, struct vm_area_struct *vma)
276{
277 int err = 0;
278
279 /* you may only request integer multiple of PAGE_SIZE */
280 if (offset_in_page(vma->vm_end - vma->vm_start)) {
281 err = -EINVAL;
282 goto out;
283 }
284
285 err = mmap_common_checks(vma);
286 if (err)
287 goto out;
288
289 vma->vm_ops = &litmus_color_alloc_vm_ops;
290 /* this mapping should not be kept across forks,
291 * and cannot be expanded */
292 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
293
294 err = map_colored_pages(vma);
295
296 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__, vma->vm_flags,
297 pgprot_val(vma->vm_page_prot));
298out:
299 return err;
300}
301
302/***********************************************************
303 * Initilization
304***********************************************************/
305
306static struct file_operations litmus_color_ctrl_fops = {
307 .owner = THIS_MODULE,
308 .mmap = litmus_color_ctrl_mmap,
309};
310
311static struct miscdevice litmus_color_ctrl_dev = {
312 .name = CTRL_NAME,
313 .minor = MISC_DYNAMIC_MINOR,
314 .fops = &litmus_color_ctrl_fops,
315};
316
317static struct file_operations litmus_color_alloc_fops = {
318 .owner = THIS_MODULE,
319 .mmap = litmus_color_alloc_mmap,
320};
321
322static struct miscdevice litmus_color_alloc_dev = {
323 .name = ALLOC_NAME,
324 .minor = MISC_DYNAMIC_MINOR,
325 .fops = &litmus_color_alloc_fops,
326};
327
328static int __init init_dev(const char* name, struct miscdevice *dev)
329{
330 int err;
331 err = misc_register(dev);
332 if (err)
333 printk(KERN_WARNING "Could not allocate %s device (%d).\n",
334 name, err);
335 return err;
336}
337
338static int __init init_color_devices(void)
339{
340 int err;
341
342 printk("Allocating LITMUS^RT color devices.\n");
343 err = init_dev(ALLOC_NAME, &litmus_color_alloc_dev);
344 if (err)
345 goto out;
346 err = init_dev(CTRL_NAME, &litmus_color_ctrl_dev);
347out:
348 return err;
349}
350
351module_init(init_color_devices);
diff --git a/litmus/color_proc.c b/litmus/color_proc.c
new file mode 100644
index 000000000000..d770123c5f02
--- /dev/null
+++ b/litmus/color_proc.c
@@ -0,0 +1,220 @@
1#include <linux/module.h>
2#include <linux/sysctl.h>
3#include <linux/slab.h>
4
5#include <litmus/sched_trace.h>
6#include <litmus/color.h>
7
8extern int color_sysctl_add_pages_data; /* litmus/color.c */
9
10static int zero = 0;
11static int one = 1;
12/* used as names for server proc entries */
13static char *period_str = "period";
14static char *wcet_str = "wcet";
15
16/* servers have a WCET and period */
17#define NR_SERVER_PARAMS 2
18#define CPU_NAME_LEN 3
19struct color_cpu_server {
20 char name[CPU_NAME_LEN];
21 unsigned long wcet;
22 unsigned long period;
23 /* the + 1 is for the sentinel element */
24 struct ctl_table table[NR_SERVER_PARAMS + 1];
25};
26static struct color_cpu_server color_cpu_servers[NR_CPUS];
27
28/* the + 1 is for the sentinel element */
29static struct ctl_table color_cpu_tables[NR_CPUS + 1];
30
31unsigned long color_chunk;
32
33#define INFO_BUFFER_SIZE 100
34static char info_buffer[100];
35
36#define NR_PAGES_INDEX 0 /* location of nr_pages in the table below */
37static struct ctl_table color_table[] =
38{
39 {
40 /* you MUST update NR_PAGES_INDEX if you move this entry */
41 .procname = "nr_pages",
42 .mode = 0444,
43 .proc_handler = color_nr_pages_handler,
44 .data = NULL, /* dynamically set later */
45 .maxlen = 0, /* also set later */
46 },
47 {
48 .procname = "servers",
49 .mode = 0555,
50 .child = color_cpu_tables,
51 },
52 {
53 .procname = "add_pages",
54 .data = &color_sysctl_add_pages_data,
55 .maxlen = sizeof(int),
56 .mode = 0644,
57 .proc_handler = color_add_pages_handler,
58 .extra1 = &zero,
59 .extra2 = &one,
60 },
61 {
62 .procname = "cache_info",
63 .mode = 0444,
64 .proc_handler = proc_dostring,
65 .data = info_buffer,
66 .maxlen = INFO_BUFFER_SIZE,
67 },
68 {
69 .procname = "chunk_size",
70 .mode = 0666,
71 .proc_handler = proc_doulongvec_minmax,
72 .data = &color_chunk,
73 .maxlen = sizeof(color_chunk),
74 },
75 { }
76};
77
78static struct ctl_table litmus_table[] =
79{
80 {
81 .procname = "color",
82 .mode = 0555,
83 .child = color_table,
84 },
85 { }
86};
87
88static struct ctl_table litmus_dir_table[] = {
89 {
90 .procname = "litmus",
91 .mode = 0555,
92 .child = litmus_table,
93 },
94 { }
95};
96
97int color_server_params(int cpu, unsigned long *wcet, unsigned long *period)
98{
99 struct color_cpu_server *svr;
100
101 if (cpu >= num_online_cpus()) {
102 printk(KERN_WARNING "Cannot access illegal CPU: %d\n", cpu);
103 return -EFAULT;
104 }
105
106 svr = &color_cpu_servers[cpu];
107 if (svr->wcet == 0 || svr->period == 0) {
108 printk(KERN_WARNING "Server %d is uninitialized!\n", cpu);
109 return -EPERM;
110 }
111
112 *wcet = svr->wcet;
113 *period = svr->period;
114
115 TRACE("For %d: %lu, %lu\n", cpu, svr->wcet, svr->period);
116
117 return 0;
118}
119
120/* must be called AFTER nr_colors is set */
121static int __init init_sysctl_nr_colors(void)
122{
123 int ret = 0, maxlen = ONE_COLOR_LEN * color_cache_info.nr_colors;
124 color_table[NR_PAGES_INDEX].data = kmalloc(maxlen, GFP_KERNEL);
125 if (!color_table[NR_PAGES_INDEX].data) {
126 printk(KERN_WARNING "Could not allocate nr_pages buffer.\n");
127 ret = -ENOMEM;
128 goto out;
129 }
130 color_table[NR_PAGES_INDEX].maxlen = maxlen;
131out:
132 return ret;
133}
134
135static void __init init_server_entry(struct ctl_table *entry,
136 unsigned long *parameter,
137 char *name)
138{
139 entry->procname = name;
140 entry->mode = 0666;
141 entry->proc_handler = proc_doulongvec_minmax;
142 entry->data = parameter;
143 entry->maxlen = sizeof(*parameter);
144}
145
146static int __init init_cpu_entry(struct ctl_table *cpu_table,
147 struct color_cpu_server *svr, int cpu)
148{
149 struct ctl_table *entry = svr->table;
150
151 printk(KERN_INFO "Creating cpu %d\n", cpu);
152
153 init_server_entry(entry, &svr->wcet, wcet_str);
154 entry++;
155 init_server_entry(entry, &svr->period, period_str);
156
157 /* minus one for the null byte */
158 snprintf(svr->name, CPU_NAME_LEN - 1, "%d", cpu);
159 cpu_table->procname = svr->name;
160 cpu_table->mode = 0555;
161 cpu_table->child = svr->table;
162
163 return 0;
164}
165
166static int __init init_server_entries(void)
167{
168 int cpu, err = 0;
169 struct ctl_table *cpu_table;
170 struct color_cpu_server *svr;
171
172 for_each_online_cpu(cpu) {
173 cpu_table = &color_cpu_tables[cpu];
174 svr = &color_cpu_servers[cpu];
175 err = init_cpu_entry(cpu_table, svr, cpu);
176 if (err)
177 goto out;
178 }
179out:
180 return err;
181}
182
183
184static struct ctl_table_header *litmus_sysctls;
185
186static int __init litmus_sysctl_init(void)
187{
188 int ret = 0;
189
190 printk(KERN_INFO "Registering LITMUS^RT proc sysctl.\n");
191 litmus_sysctls = register_sysctl_table(litmus_dir_table);
192 if (!litmus_sysctls) {
193 printk(KERN_WARNING "Could not register LITMUS^RT sysctl.\n");
194 ret = -EFAULT;
195 goto out;
196 }
197 ret = init_sysctl_nr_colors();
198 if (ret)
199 goto out;
200
201 ret = init_server_entries();
202 if (ret)
203 goto out;
204
205 snprintf(info_buffer, INFO_BUFFER_SIZE,
206 "Cache size\t: %lu B\n"
207 "Line size\t: %lu B\n"
208 "Page size\t: %lu B\n"
209 "Ways\t\t: %lu\n"
210 "Sets\t\t: %lu\n"
211 "Colors\t\t: %lu",
212 color_cache_info.size, color_cache_info.line_size, PAGE_SIZE,
213 color_cache_info.ways, color_cache_info.sets,
214 color_cache_info.nr_colors);
215
216out:
217 return ret;
218}
219
220module_init(litmus_sysctl_init);
diff --git a/litmus/dgl.c b/litmus/dgl.c
new file mode 100644
index 000000000000..dd2a42cc9ca6
--- /dev/null
+++ b/litmus/dgl.c
@@ -0,0 +1,300 @@
1#include <linux/sched.h>
2#include <linux/slab.h>
3
4#include <litmus/litmus.h>
5#include <litmus/dgl.h>
6#include <litmus/sched_trace.h>
7
8#define MASK_SIZE (sizeof(unsigned long) * 8)
9
10/* Return number of MASK_SIZE fields needed to store a mask in d */
11#define WP(num, word) (num / word + (num % word != 0))
12#define MASK_WORDS(d) WP(d->num_resources, MASK_SIZE)
13
14/* Word, bit -> resource id */
15#define ri(w, b) (w * MASK_SIZE + b)
16
17 /* For loop, where @i iterates over each set bit in @bit_arr */
18#define for_each_resource(bit_arr, d, w, b, i) \
19 for(w = 0; w < MASK_WORDS(d); ++w) \
20 for(b = find_first_bit(&bit_arr[w],MASK_SIZE), i = ri(w, b); \
21 b < MASK_SIZE; \
22 b = find_next_bit(&bit_arr[w],MASK_SIZE,b+1), i = ri(w, b))
23
24/* Return resource id in dgl @d for resource @r */
25#define resource_id(d, r) ((((void*)r) - (void*)((d)->resources))/ sizeof(*r))
26
27/* Return request group of req @r for resource @i */
28#define req_group(r, i) (container_of(((void*)r) - sizeof(*r)*(i), \
29 struct dgl_group_req, requests))
30
31/* Resource id -> word, bit */
32static inline void mask_idx(int resource, int *word, int *bit)
33{
34 *word = resource / MASK_SIZE;
35 *bit = resource % MASK_SIZE;
36}
37
38
39static void print_waiting(struct dgl *dgl, struct dgl_resource *resource)
40{
41 struct dgl_req *pos;
42 struct dgl_group_req *greq;
43 unsigned long long last = 0;
44
45 TRACE("List for rid %d\n", resource_id(dgl, resource));
46 list_for_each_entry(pos, &resource->waiting, list) {
47 greq = pos->greq;
48 TRACE(" 0x%p with timestamp %llu\n", greq, greq->ts);
49 BUG_ON(greq->ts < last);
50 last = greq->ts;
51 }
52}
53
54void dgl_init(struct dgl *dgl, unsigned long num_resources,
55 unsigned long num_replicas)
56{
57 int i;
58 struct dgl_resource *resource;
59
60 dgl->num_replicas = num_replicas;
61 dgl->num_resources = num_resources;
62
63 dgl->resources = kmalloc(sizeof(*dgl->resources) * num_resources,
64 GFP_ATOMIC);
65 dgl->acquired = kmalloc(sizeof(*dgl->acquired) * num_online_cpus(),
66 GFP_ATOMIC);
67
68 for (i = 0; i < num_online_cpus(); ++i)
69 dgl->acquired[i] = NULL;
70
71 for (i = 0; i < num_resources; i++) {
72 resource = &dgl->resources[i];
73
74 INIT_LIST_HEAD(&resource->waiting);
75 resource->free_replicas = dgl->num_replicas;
76 }
77
78 dgl->requests = 0;
79 dgl->running = 0;
80 dgl->ts = 0;
81}
82
83void dgl_free(struct dgl *dgl)
84{
85 kfree(dgl->resources);
86 kfree(dgl->acquired);
87}
88
89void dgl_group_req_init(struct dgl *dgl, struct dgl_group_req *greq)
90{
91 int i;
92
93 greq->requested = kmalloc(sizeof(*greq->requested) * MASK_WORDS(dgl),
94 GFP_ATOMIC);
95 greq->waiting = kmalloc(sizeof(*greq->waiting) * MASK_WORDS(dgl),
96 GFP_ATOMIC);
97 greq->requests = kmalloc(sizeof(*greq->requests) * dgl->num_resources,
98 GFP_ATOMIC);
99
100 BUG_ON(!greq->requested);
101 BUG_ON(!greq->waiting);
102 BUG_ON(!greq->requests);
103
104 greq->cpu = NO_CPU;
105 for (i = 0; i < MASK_WORDS(dgl); ++i) {
106 greq->requested[i] = 0;
107 greq->waiting[i] = 0;
108 }
109}
110
111void dgl_group_req_free(struct dgl_group_req *greq)
112{
113 kfree(greq->requested);
114 kfree(greq->waiting);
115 kfree(greq->requests);
116}
117
118/**
119 * set_req - create request for @replicas of @resource.
120 */
121void set_req(struct dgl *dgl, struct dgl_group_req *greq,
122 int resource, int replicas)
123{
124 int word, bit;
125 struct dgl_req *req;
126
127 if (replicas > dgl->num_replicas)
128 replicas = dgl->num_replicas;
129
130 mask_idx(resource, &word, &bit);
131 __set_bit(bit, &greq->requested[word]);
132
133 TRACE("0x%p requesting %d of %d\n", greq, replicas, resource);
134
135 req = &greq->requests[resource];
136 req->greq = greq;
137 INIT_LIST_HEAD(&req->list);
138 req->replicas = replicas;
139}
140
141/*
142 * Attempt to fulfill request @req for @resource.
143 * Return 1 if successful. If the matching group request has acquired all of
144 * its needed resources, this will then set that req as dgl->acquired[cpu].
145 */
146static unsigned long try_acquire(struct dgl *dgl, struct dgl_resource *resource,
147 struct dgl_req *req)
148{
149 int word, bit, rid, head, empty, room;
150 unsigned long waiting;
151 struct dgl_group_req *greq;
152
153 rid = resource_id(dgl, resource);
154 greq = req->greq;
155
156 TRACE("0x%p greq\n", greq);
157
158 head = resource->waiting.next == &req->list;
159 empty = list_empty(&resource->waiting);
160 room = resource->free_replicas >= req->replicas;
161
162 if (! (room && (head || empty)) ) {
163 TRACE("0x%p cannot acquire %d replicas, %d free\n",
164 greq, req->replicas, resource->free_replicas,
165 room, head, empty);
166 return 0;
167 }
168
169 resource->free_replicas -= req->replicas;
170
171 TRACE("0x%p acquired %d replicas of rid %d\n",
172 greq, req->replicas, rid);
173
174 mask_idx(rid, &word, &bit);
175
176
177 TRACE("0x%p, %lu, 0x%p\n", greq->waiting, greq->waiting[word],
178 &greq->waiting[word]);
179
180 clear_bit(bit, &greq->waiting[word]);
181
182 waiting = 0;
183 for (word = 0; word < MASK_WORDS(dgl); word++) {
184 waiting |= greq->waiting[word];
185 if (waiting)
186 break;
187 }
188
189 if (!waiting) {
190 TRACE("0x%p acquired all resources\n", greq);
191 BUG_ON(dgl->acquired[greq->cpu]);
192 dgl->acquired[greq->cpu] = greq;
193 litmus_reschedule(greq->cpu);
194 dgl->running++;
195 }
196
197 return 1;
198}
199
200/**
201 * add_group_req - initiate group request.
202 */
203void add_group_req(struct dgl *dgl, struct dgl_group_req *greq, int cpu)
204{
205 int b, w, i, succ, all_succ = 1;
206 struct dgl_req *req;
207 struct dgl_resource *resource;
208
209 greq->cpu = cpu;
210 greq->ts = dgl->ts++;
211
212 TRACE("0x%p group request added for CPU %d\n", greq, cpu);
213 BUG_ON(dgl->acquired[cpu] == greq);
214
215 ++dgl->requests;
216
217 for_each_resource(greq->requested, dgl, w, b, i) {
218 __set_bit(b, &greq->waiting[w]);
219 }
220
221 for_each_resource(greq->requested, dgl, w, b, i) {
222 req = &greq->requests[i];
223 resource = &dgl->resources[i];
224
225 succ = try_acquire(dgl, resource, req);
226 all_succ &= succ;
227
228 if (!succ) {
229 TRACE("0x%p waiting on rid %d\n", greq, i);
230 list_add_tail(&req->list, &resource->waiting);
231 }
232 }
233
234 /* Grant empty requests */
235 if (all_succ && !dgl->acquired[cpu]) {
236 TRACE("0x%p empty group request acquired cpu %d\n", greq, cpu);
237 dgl->acquired[cpu] = greq;
238 ++dgl->running;
239 }
240
241 BUG_ON(dgl->requests && !dgl->running);
242}
243
244/**
245 * remove_group_req - abandon group request.
246 *
247 * This will also progress the waiting queues of resources acquired by @greq.
248 */
249void remove_group_req(struct dgl *dgl, struct dgl_group_req *greq)
250{
251 int b, w, i;
252 struct dgl_req *req, *next;
253 struct dgl_resource *resource;
254
255 TRACE("0x%p removing group request for CPU %d\n", greq, greq->cpu);
256
257 --dgl->requests;
258
259 if (dgl->acquired[greq->cpu] == greq) {
260 TRACE("0x%p no longer acquired on CPU %d\n", greq, greq->cpu);
261 dgl->acquired[greq->cpu] = NULL;
262 --dgl->running;
263 }
264
265 for_each_resource(greq->requested, dgl, w, b, i) {
266 req = &greq->requests[i];
267 resource = &dgl->resources[i];
268
269 if (!list_empty(&req->list)) {
270 /* Waiting on resource */
271 clear_bit(b, &greq->waiting[w]);
272 list_del_init(&req->list);
273 TRACE("Quitting 0x%p from rid %d\n",
274 req, i);
275 } else {
276 /* Have resource */
277 resource->free_replicas += req->replicas;
278 BUG_ON(resource->free_replicas > dgl->num_replicas);
279 TRACE("0x%p releasing %d of %d replicas, rid %d\n",
280 greq, req->replicas, resource->free_replicas, i);
281
282 if (!list_empty(&resource->waiting)) {
283 /* Give it to the next guy */
284 next = list_first_entry(&resource->waiting,
285 struct dgl_req,
286 list);
287
288 BUG_ON(next->greq->ts < greq->ts);
289
290 if (try_acquire(dgl, resource, next)) {
291 list_del_init(&next->list);
292 print_waiting(dgl, resource);
293
294 }
295 }
296 }
297 }
298
299 BUG_ON(dgl->requests && !dgl->running);
300}
diff --git a/litmus/domain.c b/litmus/domain.c
new file mode 100644
index 000000000000..4dc3649a0389
--- /dev/null
+++ b/litmus/domain.c
@@ -0,0 +1,21 @@
1#include <linux/list.h>
2#include <linux/spinlock_types.h>
3
4#include <litmus/domain.h>
5
6void domain_init(domain_t *dom,
7 raw_spinlock_t *lock,
8 requeue_t requeue,
9 peek_ready_t peek_ready,
10 take_ready_t take_ready,
11 preempt_needed_t preempt_needed,
12 task_prio_t priority)
13{
14 INIT_LIST_HEAD(&dom->list);
15 dom->lock = lock;
16 dom->requeue = requeue;
17 dom->peek_ready = peek_ready;
18 dom->take_ready = take_ready;
19 dom->preempt_needed = preempt_needed;
20 dom->higher_prio = priority;
21}
diff --git a/litmus/event_group.c b/litmus/event_group.c
new file mode 100644
index 000000000000..478698a6d17a
--- /dev/null
+++ b/litmus/event_group.c
@@ -0,0 +1,334 @@
1#include <linux/slab.h>
2#include <linux/sched.h>
3#include <linux/module.h>
4
5#include <litmus/litmus.h>
6#include <litmus/trace.h>
7#include <litmus/sched_trace.h>
8#include <litmus/event_group.h>
9
10#if 1
11#define VTRACE(fmt, args...) \
12sched_trace_log_message("%d P%d [%s@%s:%d]: " fmt, \
13 TRACE_ARGS, ## args)
14#else
15#define VTRACE(fmt, args...)
16#endif
17
18/*
19 * Return event_queue slot for the given time.
20 */
21static unsigned int time2slot(lt_t time)
22{
23 return (unsigned int) time2quanta(time, FLOOR) % EVENT_QUEUE_SLOTS;
24}
25
26/*
27 * Executes events from an event_list in priority order.
28 * Events can requeue themselves when they are called.
29 */
30static enum hrtimer_restart on_timer(struct hrtimer *timer)
31{
32 int prio, num;
33 unsigned long flags;
34 struct event_list *el;
35 struct rt_event *e;
36 struct list_head *pos, events[NUM_EVENT_PRIORITIES];
37 raw_spinlock_t *queue_lock;
38
39 el = container_of(timer, struct event_list, timer);
40 queue_lock = &el->group->queue_lock;
41
42 raw_spin_lock_irqsave(queue_lock, flags);
43
44 /* Remove event_list from hashtable so that no more events
45 * are added to it.
46 */
47 VTRACE("Removing event list 0x%x\n", el);
48 list_del_init(&el->queue_node);
49
50 /* Copy over events so that the event_list can re-used when the lock
51 * is released.
52 */
53 VTRACE("Emptying event list 0x%x\n", el);
54 for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++) {
55 list_replace_init(&el->events[prio], &events[prio]);
56 }
57
58 for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++) {
59 /* Fire events. Complicated loop is used so that events
60 * in the list can be canceled (removed) while other events are
61 * executing.
62 */
63 for (pos = events[prio].next, num = 0;
64 prefetch(pos->next), events[prio].next != &events[prio];
65 pos = events[prio].next, num++) {
66
67 e = list_entry(pos, struct rt_event, events_node);
68 list_del_init(pos);
69 raw_spin_unlock_irqrestore(queue_lock, flags);
70
71 VTRACE("Dequeueing event 0x%x with prio %d from 0x%x\n",
72 e, e->prio, el);
73 e->function(e);
74
75 raw_spin_lock_irqsave(queue_lock, flags);
76 }
77 }
78 raw_spin_unlock_irqrestore(queue_lock, flags);
79
80 VTRACE("Exhausted %d events from list 0x%x\n", num, el);
81
82 return HRTIMER_NORESTART;
83}
84
85/*
86 * Return event_list for the given event and time. If no event_list
87 * is being used yet and use_event_heap is 1, will create the list
88 * and return it. Otherwise it will return NULL.
89 */
90static struct event_list* get_event_list(struct event_group *group,
91 struct rt_event *e,
92 lt_t fire,
93 int use_event_list)
94{
95 struct list_head* pos;
96 struct event_list *el = NULL, *tmp;
97 unsigned int slot = time2slot(fire);
98 int remaining = 300;
99
100 VTRACE("Getting list for time %llu, event 0x%x\n", fire, e);
101
102 /* Initialize pos for the case that the list is empty */
103 pos = group->event_queue[slot].next;
104 list_for_each(pos, &group->event_queue[slot]) {
105 BUG_ON(remaining-- < 0);
106 tmp = list_entry(pos, struct event_list, queue_node);
107 if (lt_after_eq(fire, tmp->fire_time) &&
108 lt_before(fire, tmp->fire_time + group->res)) {
109 VTRACE("Found match 0x%x at time %llu\n",
110 tmp, tmp->fire_time);
111 el = tmp;
112 break;
113 } else if (lt_before(fire, tmp->fire_time)) {
114 /* We need to insert a new node since el is
115 * already in the future
116 */
117 VTRACE("Time %llu was before %llu\n",
118 fire, tmp->fire_time);
119 break;
120 } else {
121 VTRACE("Time %llu was after %llu\n",
122 fire, tmp->fire_time + group->res);
123 }
124 }
125 if (!el && use_event_list) {
126 /* Use pre-allocated list */
127 tmp = e->event_list;
128 tmp->fire_time = fire;
129 tmp->group = group;
130 /* Add to queue */
131 VTRACE("Using list 0x%x for priority %d and time %llu\n",
132 tmp, e->prio, fire);
133 BUG_ON(!list_empty(&tmp->queue_node));
134 list_add(&tmp->queue_node, pos->prev);
135 el = tmp;
136 }
137 return el;
138}
139
140/*
141 * Prepare a release list for a new set of events.
142 */
143static void reinit_event_list(struct event_group *group, struct rt_event *e)
144{
145 int prio, t_ret;
146 struct event_list *el = e->event_list;
147
148 VTRACE("Reinitting list 0x%x for event 0x%x\n", el, e);
149
150 /* Cancel timer */
151 t_ret = hrtimer_pull_cancel(group->cpu, &el->timer, &el->info);
152 BUG_ON(t_ret == 1);
153 if (t_ret == -1) {
154 /* The on_timer callback is running for this list */
155 VTRACE("Timer is running concurrently!\n");
156 }
157 /* Clear event lists */
158 for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++)
159 INIT_LIST_HEAD(&el->events[prio]);
160}
161
162/**
163 * add_event() - Add timer to event group.
164 */
165void add_event(struct event_group *group, struct rt_event *e, lt_t fire)
166{
167 struct event_list *el;
168 int in_use;
169
170 VTRACE("Adding event 0x%x with priority %d for time %llu\n",
171 e, e->prio, fire);
172
173 /* A NULL group means use the group of the currently executing CPU */
174 if (NULL == group)
175 group = get_event_group_for(NO_CPU);
176 /* Saving the group is important for cancellations */
177 e->_event_group = group;
178
179 raw_spin_lock(&group->queue_lock);
180 el = get_event_list(group, e, fire, 0);
181 if (!el) {
182 /* Use our own, but drop lock first */
183 raw_spin_unlock(&group->queue_lock);
184 reinit_event_list(group, e);
185 raw_spin_lock(&group->queue_lock);
186 el = get_event_list(group, e, fire, 1);
187 }
188
189 /* Add event to sorted list */
190 VTRACE("Inserting event 0x%x at end of event_list 0x%x\n", e, el);
191 list_add(&e->events_node, &el->events[e->prio]);
192 raw_spin_unlock(&group->queue_lock);
193
194 /* Arm timer if we are the owner */
195 if (el == e->event_list) {
196 VTRACE("Arming timer on event 0x%x for %llu\n", e, fire);
197 in_use = hrtimer_start_on(group->cpu, &el->info,
198 &el->timer, ns_to_ktime(el->fire_time),
199 HRTIMER_MODE_ABS_PINNED);
200 BUG_ON(in_use);
201 } else {
202 VTRACE("Not my timer @%llu\n", fire);
203 }
204}
205
206/**
207 * cancel_event() - Remove event from the group.
208 */
209void cancel_event(struct rt_event *e)
210{
211 int prio, cancel;
212 struct rt_event *swap, *entry;
213 struct event_list *tmp;
214 struct event_group *group;
215 struct list_head *list, *pos;
216
217 VTRACE("Canceling event 0x%x with priority %d\n", e, e->prio);
218 group = e->_event_group;
219 if (!group) return;
220
221 raw_spin_lock(&group->queue_lock);
222
223 /* Relies on the fact that an event_list's owner is ALWAYS present
224 * as one of the event_list's events.
225 */
226 for (prio = 0, cancel = 0, swap = NULL;
227 prio < NUM_EVENT_PRIORITIES && !swap;
228 prio++) {
229
230 list = &e->event_list->events[prio];
231 cancel |= !list_empty(list);
232
233 /* Find any element which is not the event_list's owner */
234 list_for_each(pos, list) {
235 entry = list_entry(pos, struct rt_event, events_node);
236 if (entry != e) {
237 swap = entry;
238 break;
239 }
240 }
241 }
242
243 if (swap) {
244 /* Give the other guy ownership of the event_list */
245 VTRACE("Swapping list 0x%x with event 0x%x event list 0x%x\n",
246 e->event_list, swap, swap->event_list);
247 tmp = swap->event_list;
248 swap->event_list = e->event_list;
249 BUG_ON(!tmp);
250 e->event_list = tmp;
251 } else if (cancel) {
252 /* Cancel the event_list we own */
253 hrtimer_pull_cancel(group->cpu,
254 &e->event_list->timer,
255 &e->event_list->info);
256 list_del_init(&e->event_list->queue_node);
257 }
258 /* Remove ourselves from any list we may be a part of */
259 list_del_init(&e->events_node);
260 e->_event_group = NULL;
261
262 raw_spin_unlock(&group->queue_lock);
263}
264
265struct kmem_cache *event_list_cache;
266
267struct event_list* event_list_alloc(int gfp_flags)
268{
269 int prio;
270 struct event_list *el = kmem_cache_alloc(event_list_cache, gfp_flags);
271 if (el) {
272 hrtimer_init(&el->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
273 INIT_LIST_HEAD(&el->queue_node);
274 el->timer.function = on_timer;
275 hrtimer_start_on_info_init(&el->info);
276 for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++)
277 INIT_LIST_HEAD(&el->events[prio]);
278 } else {
279 VTRACE("Failed to allocate event list!\n");
280 printk(KERN_CRIT "Failed to allocate event list.\n");
281 BUG();
282 }
283 return el;
284}
285
286void init_event(struct rt_event *e, int prio, fire_event_t function,
287 struct event_list *el)
288{
289 e->prio = prio;
290 e->function = function;
291 e->event_list = el;
292 e->_event_group = NULL;
293 INIT_LIST_HEAD(&e->events_node);
294}
295
296/**
297 * init_event_group() - Prepare group for events.
298 * @group Group to prepare
299 * @res Timer resolution. Two events of @res distance will be merged
300 * @cpu Cpu on which to fire timers
301 */
302static void init_event_group(struct event_group *group, lt_t res, int cpu)
303{
304 int i;
305 VTRACE("Creating group with resolution %llu on CPU %d", res, cpu);
306 group->res = res;
307 group->cpu = cpu;
308 for (i = 0; i < EVENT_QUEUE_SLOTS; i++)
309 INIT_LIST_HEAD(&group->event_queue[i]);
310 raw_spin_lock_init(&group->queue_lock);
311}
312
313
314DEFINE_PER_CPU(struct event_group, _event_groups);
315
316struct event_group *get_event_group_for(const int cpu)
317{
318 return &per_cpu(_event_groups,
319 (NO_CPU == cpu) ? smp_processor_id() : cpu);
320}
321
322static int __init _init_event_groups(void)
323{
324 int cpu;
325 printk("Initializing LITMUS^RT event groups.\n");
326
327 for_each_online_cpu(cpu) {
328 init_event_group(get_event_group_for(cpu),
329 CONFIG_MERGE_TIMERS_WINDOW, cpu);
330 }
331 return 0;
332}
333
334module_init(_init_event_groups);
diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c
new file mode 100644
index 000000000000..84ae98e42ae4
--- /dev/null
+++ b/litmus/fifo_common.c
@@ -0,0 +1,58 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fifo_common.h>
16
17int fifo_higher_prio(struct task_struct* first,
18 struct task_struct* second)
19{
20 /* There is no point in comparing a task to itself. */
21 if (first && first == second) {
22 TRACE_TASK(first,
23 "WARNING: pointless fifo priority comparison.\n");
24 BUG_ON(1);
25 return 0;
26 }
27
28 if (!first || !second)
29 return first && !second;
30
31 /* Tiebreak by PID */
32 return (get_release(first) == get_release(second) &&
33 first->pid > second->pid) ||
34 (get_release(first) < get_release(second));
35
36
37}
38
39int fifo_ready_order(struct bheap_node* a, struct bheap_node* b)
40{
41 return fifo_higher_prio(bheap2task(a), bheap2task(b));
42}
43
44void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
45 release_jobs_t release)
46{
47 rt_domain_init(rt, fifo_ready_order, resched, release);
48}
49
50int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t)
51{
52 if (!__jobs_pending(rt))
53 return 0;
54 if (!t)
55 return 1;
56
57 return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t);
58}
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
index 06fcf4cf77dc..999290fc8302 100644
--- a/litmus/ftdev.c
+++ b/litmus/ftdev.c
@@ -230,13 +230,20 @@ static ssize_t ftdev_read(struct file *filp,
230 * here with copied data because that data would get 230 * here with copied data because that data would get
231 * lost if the task is interrupted (e.g., killed). 231 * lost if the task is interrupted (e.g., killed).
232 */ 232 */
233 mutex_unlock(&ftdm->lock);
233 set_current_state(TASK_INTERRUPTIBLE); 234 set_current_state(TASK_INTERRUPTIBLE);
235
234 schedule_timeout(50); 236 schedule_timeout(50);
237
235 if (signal_pending(current)) { 238 if (signal_pending(current)) {
236 if (err == 0) 239 if (err == 0)
237 /* nothing read yet, signal problem */ 240 /* nothing read yet, signal problem */
238 err = -ERESTARTSYS; 241 err = -ERESTARTSYS;
239 break; 242 goto out;
243 }
244 if (mutex_lock_interruptible(&ftdm->lock)) {
245 err = -ERESTARTSYS;
246 goto out;
240 } 247 }
241 } else if (copied < 0) { 248 } else if (copied < 0) {
242 /* page fault */ 249 /* page fault */
@@ -303,7 +310,6 @@ struct file_operations ftdev_fops = {
303 .owner = THIS_MODULE, 310 .owner = THIS_MODULE,
304 .open = ftdev_open, 311 .open = ftdev_open,
305 .release = ftdev_release, 312 .release = ftdev_release,
306 .write = ftdev_write,
307 .read = ftdev_read, 313 .read = ftdev_read,
308 .unlocked_ioctl = ftdev_ioctl, 314 .unlocked_ioctl = ftdev_ioctl,
309}; 315};
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 8a0c889e2cb8..bd3175baefae 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -11,13 +11,17 @@ static inline void setup_release(struct task_struct *t, lt_t release)
11{ 11{
12 /* prepare next release */ 12 /* prepare next release */
13 tsk_rt(t)->job_params.release = release; 13 tsk_rt(t)->job_params.release = release;
14 tsk_rt(t)->job_params.deadline += release + get_rt_period(t); 14 tsk_rt(t)->job_params.deadline = release + get_rt_relative_deadline(t);
15 tsk_rt(t)->job_params.exec_time = 0; 15 tsk_rt(t)->job_params.exec_time = 0;
16
16 /* update job sequence number */ 17 /* update job sequence number */
17 tsk_rt(t)->job_params.job_no++; 18 tsk_rt(t)->job_params.job_no++;
18 19
19 /* don't confuse Linux */ 20 /* don't confuse Linux */
20 t->rt.time_slice = 1; 21 t->rt.time_slice = 1;
22
23 TRACE_TASK(t, "Releasing at %llu, deadline: %llu, period: %llu, now: %llu\n",
24 release, get_deadline(t), get_rt_period(t), litmus_clock());
21} 25}
22 26
23void prepare_for_next_period(struct task_struct *t) 27void prepare_for_next_period(struct task_struct *t)
@@ -41,9 +45,8 @@ void release_at(struct task_struct *t, lt_t start)
41 set_rt_flags(t, RT_F_RUNNING); 45 set_rt_flags(t, RT_F_RUNNING);
42} 46}
43 47
44
45/* 48/*
46 * Deactivate current task until the beginning of the next period. 49 * Deactivate current task until the beginning of the next period.
47 */ 50 */
48long complete_job(void) 51long complete_job(void)
49{ 52{
@@ -51,11 +54,14 @@ long complete_job(void)
51 lt_t now = litmus_clock(); 54 lt_t now = litmus_clock();
52 lt_t exec_time = tsk_rt(current)->job_params.exec_time; 55 lt_t exec_time = tsk_rt(current)->job_params.exec_time;
53 56
57 /* Task statistic summaries */
54 tsk_rt(current)->tot_exec_time += exec_time; 58 tsk_rt(current)->tot_exec_time += exec_time;
55 if (lt_before(tsk_rt(current)->max_exec_time, exec_time)) 59 if (lt_before(tsk_rt(current)->max_exec_time, exec_time))
56 tsk_rt(current)->max_exec_time = exec_time; 60 tsk_rt(current)->max_exec_time = exec_time;
57 61
58 if (is_tardy(current, now)) { 62 if (is_tardy(current, now)) {
63 TRACE_TASK(current, "is tardy, now: %llu, deadline: %llu\n",
64 now, get_deadline(current));
59 amount = now - get_deadline(current); 65 amount = now - get_deadline(current);
60 if (lt_after(amount, tsk_rt(current)->max_tardy)) 66 if (lt_after(amount, tsk_rt(current)->max_tardy))
61 tsk_rt(current)->max_tardy = amount; 67 tsk_rt(current)->max_tardy = amount;
@@ -63,8 +69,9 @@ long complete_job(void)
63 ++tsk_rt(current)->missed; 69 ++tsk_rt(current)->missed;
64 } 70 }
65 71
66 /* Mark that we do not excute anymore */ 72 /* Mark that we do not execute anymore */
67 set_rt_flags(current, RT_F_SLEEP); 73 set_rt_flags(current, RT_F_SLEEP);
74
68 /* call schedule, this will return when a new job arrives 75 /* call schedule, this will return when a new job arrives
69 * it also takes care of preparing for the next release 76 * it also takes care of preparing for the next release
70 */ 77 */
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 5e8221da35e9..cb41548d3e2d 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -13,10 +13,19 @@
13#include <litmus/litmus.h> 13#include <litmus/litmus.h>
14#include <litmus/bheap.h> 14#include <litmus/bheap.h>
15#include <litmus/trace.h> 15#include <litmus/trace.h>
16#include <litmus/event_group.h>
16#include <litmus/rt_domain.h> 17#include <litmus/rt_domain.h>
17#include <litmus/litmus_proc.h> 18#include <litmus/litmus_proc.h>
18#include <litmus/sched_trace.h> 19#include <litmus/sched_trace.h>
19 20
21#ifdef CONFIG_PLUGIN_MC
22#include <linux/pid.h>
23#include <linux/hrtimer.h>
24#include <litmus/sched_mc.h>
25#else
26struct mc_task;
27#endif
28
20#ifdef CONFIG_SCHED_CPU_AFFINITY 29#ifdef CONFIG_SCHED_CPU_AFFINITY
21#include <litmus/affinity.h> 30#include <litmus/affinity.h>
22#endif 31#endif
@@ -35,8 +44,16 @@ atomic_t __log_seq_no = ATOMIC_INIT(0);
35atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); 44atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
36#endif 45#endif
37 46
38static struct kmem_cache * bheap_node_cache; 47static struct kmem_cache *bheap_node_cache;
39extern struct kmem_cache * release_heap_cache; 48extern struct kmem_cache *release_heap_cache;
49
50#ifdef CONFIG_MERGE_TIMERS
51extern struct kmem_cache *event_list_cache;
52#endif
53
54#ifdef CONFIG_PLUGIN_MC
55static struct kmem_cache *mc_data_cache;
56#endif
40 57
41struct bheap_node* bheap_node_alloc(int gfp_flags) 58struct bheap_node* bheap_node_alloc(int gfp_flags)
42{ 59{
@@ -290,17 +307,92 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
290 return ret; 307 return ret;
291} 308}
292 309
310#ifdef CONFIG_PLUGIN_MC
311asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param)
312{
313 struct mc_task mc;
314 struct mc_data *mc_data;
315 struct task_struct *target;
316 int retval = -EINVAL;
317
318 printk("Setting up mixed-criticality task parameters for process %d.\n",
319 pid);
320
321 if (pid < 0 || param == 0) {
322 goto out;
323 }
324 if (copy_from_user(&mc, param, sizeof(mc))) {
325 retval = -EFAULT;
326 goto out;
327 }
328
329 /* Task search and manipulation must be protected */
330 read_lock_irq(&tasklist_lock);
331 if (!(target = find_task_by_vpid(pid))) {
332 retval = -ESRCH;
333 goto out_unlock;
334 }
335
336 if (is_realtime(target)) {
337 /* The task is already a real-time task.
338 * We cannot not allow parameter changes at this point.
339 */
340 retval = -EBUSY;
341 goto out_unlock;
342 }
343
344 /* check parameters passed in are valid */
345 if (mc.crit < CRIT_LEVEL_A || mc.crit >= NUM_CRIT_LEVELS) {
346 printk(KERN_WARNING "litmus: real-time task %d rejected because "
347 "of invalid criticality level\n", pid);
348 goto out_unlock;
349 }
350 if (CRIT_LEVEL_A == mc.crit &&
351 (mc.lvl_a_id < 0 ||
352 mc.lvl_a_id >= CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS)) {
353 printk(KERN_WARNING "litmus: real-time task %d rejected because "
354 "of invalid level A id\n", pid);
355 goto out_unlock;
356 }
357
358 mc_data = tsk_rt(target)->mc_data;
359 if (!mc_data) {
360 mc_data = kmem_cache_alloc(mc_data_cache, GFP_ATOMIC);
361 if (!mc_data) {
362 retval = -ENOMEM;
363 goto out_unlock;
364 }
365 tsk_rt(target)->mc_data = mc_data;
366 }
367 mc_data->mc_task = mc;
368
369 retval = 0;
370out_unlock:
371 read_unlock_irq(&tasklist_lock);
372out:
373 return retval;
374}
375#else
376asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param)
377{
378 /* don't allow this syscall if the plugin is not enabled */
379 return -EINVAL;
380}
381#endif
382
293/* p is a real-time task. Re-init its state as a best-effort task. */ 383/* p is a real-time task. Re-init its state as a best-effort task. */
294static void reinit_litmus_state(struct task_struct* p, int restore) 384static void reinit_litmus_state(struct task_struct* p, int restore)
295{ 385{
296 struct rt_task user_config = {}; 386 struct rt_task user_config = {};
297 void* ctrl_page = NULL; 387 void* ctrl_page = NULL;
388 void* color_ctrl_page = NULL;
298 389
299 if (restore) { 390 if (restore) {
300 /* Safe user-space provided configuration data. 391 /* Safe user-space provided configuration data.
301 * and allocated page. */ 392 * and allocated page. */
302 user_config = p->rt_param.task_params; 393 user_config = p->rt_param.task_params;
303 ctrl_page = p->rt_param.ctrl_page; 394 ctrl_page = p->rt_param.ctrl_page;
395 color_ctrl_page = p->rt_param.color_ctrl_page;
304 } 396 }
305 397
306 /* We probably should not be inheriting any task's priority 398 /* We probably should not be inheriting any task's priority
@@ -313,8 +405,9 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
313 405
314 /* Restore preserved fields. */ 406 /* Restore preserved fields. */
315 if (restore) { 407 if (restore) {
316 p->rt_param.task_params = user_config; 408 p->rt_param.task_params = user_config;
317 p->rt_param.ctrl_page = ctrl_page; 409 p->rt_param.ctrl_page = ctrl_page;
410 p->rt_param.color_ctrl_page = color_ctrl_page;
318 } 411 }
319} 412}
320 413
@@ -460,9 +553,11 @@ void litmus_fork(struct task_struct* p)
460 reinit_litmus_state(p, 0); 553 reinit_litmus_state(p, 0);
461 /* Don't let the child be a real-time task. */ 554 /* Don't let the child be a real-time task. */
462 p->sched_reset_on_fork = 1; 555 p->sched_reset_on_fork = 1;
463 } else 556 } else {
464 /* non-rt tasks might have ctrl_page set */ 557 /* non-rt tasks might have ctrl_page set */
465 tsk_rt(p)->ctrl_page = NULL; 558 tsk_rt(p)->ctrl_page = NULL;
559 tsk_rt(p)->color_ctrl_page = NULL;
560 }
466 561
467 /* od tables are never inherited across a fork */ 562 /* od tables are never inherited across a fork */
468 p->od_table = NULL; 563 p->od_table = NULL;
@@ -482,6 +577,10 @@ void litmus_exec(void)
482 free_page((unsigned long) tsk_rt(p)->ctrl_page); 577 free_page((unsigned long) tsk_rt(p)->ctrl_page);
483 tsk_rt(p)->ctrl_page = NULL; 578 tsk_rt(p)->ctrl_page = NULL;
484 } 579 }
580 if (tsk_rt(p)->color_ctrl_page) {
581 free_page((unsigned long) tsk_rt(p)->color_ctrl_page);
582 tsk_rt(p)->color_ctrl_page = NULL;
583 }
485 } 584 }
486} 585}
487 586
@@ -499,6 +598,21 @@ void exit_litmus(struct task_struct *dead_tsk)
499 tsk_rt(dead_tsk)->ctrl_page); 598 tsk_rt(dead_tsk)->ctrl_page);
500 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); 599 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
501 } 600 }
601 if (tsk_rt(dead_tsk)->color_ctrl_page) {
602 TRACE_TASK(dead_tsk,
603 "freeing color_ctrl_page %p\n",
604 tsk_rt(dead_tsk)->color_ctrl_page);
605 free_page((unsigned long) tsk_rt(dead_tsk)->color_ctrl_page);
606 }
607
608#ifdef CONFIG_PLUGIN_MC
609 /* The MC-setup syscall might succeed and allocate mc_data, but the
610 * task may not exit in real-time mode, and that memory will leak.
611 * Check and free it here.
612 */
613 if (tsk_rt(dead_tsk)->mc_data)
614 kmem_cache_free(mc_data_cache, tsk_rt(dead_tsk)->mc_data);
615#endif
502 616
503 /* main cleanup only for RT tasks */ 617 /* main cleanup only for RT tasks */
504 if (is_realtime(dead_tsk)) 618 if (is_realtime(dead_tsk))
@@ -542,8 +656,14 @@ static int __init _init_litmus(void)
542 656
543 register_sched_plugin(&linux_sched_plugin); 657 register_sched_plugin(&linux_sched_plugin);
544 658
545 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); 659 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
546 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC); 660 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
661#ifdef CONFIG_MERGE_TIMERS
662 event_list_cache = KMEM_CACHE(event_list, SLAB_PANIC);
663#endif
664#ifdef CONFIG_PLUGIN_MC
665 mc_data_cache = KMEM_CACHE(mc_data, SLAB_PANIC);
666#endif
547 667
548#ifdef CONFIG_MAGIC_SYSRQ 668#ifdef CONFIG_MAGIC_SYSRQ
549 /* offer some debugging help */ 669 /* offer some debugging help */
@@ -567,6 +687,12 @@ static void _exit_litmus(void)
567 exit_litmus_proc(); 687 exit_litmus_proc();
568 kmem_cache_destroy(bheap_node_cache); 688 kmem_cache_destroy(bheap_node_cache);
569 kmem_cache_destroy(release_heap_cache); 689 kmem_cache_destroy(release_heap_cache);
690#ifdef CONFIG_MERGE_TIMERS
691 kmem_cache_destroy(event_list_cache);
692#endif
693#ifdef CONFIG_PLUGIN_MC
694 kmem_cache_destroy(mc_data_cache);
695#endif
570} 696}
571 697
572module_init(_init_litmus); 698module_init(_init_litmus);
diff --git a/litmus/locking.c b/litmus/locking.c
index ca5a073a989e..1d32dcd8e726 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -1,3 +1,5 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
1#include <litmus/fdso.h> 3#include <litmus/fdso.h>
2 4
3#ifdef CONFIG_LITMUS_LOCKING 5#ifdef CONFIG_LITMUS_LOCKING
@@ -29,14 +31,18 @@ static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
29 return (struct litmus_lock*) entry->obj->obj; 31 return (struct litmus_lock*) entry->obj->obj;
30} 32}
31 33
34atomic_t lock_id = ATOMIC_INIT(0);
35
32static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg) 36static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
33{ 37{
34 struct litmus_lock* lock; 38 struct litmus_lock* lock;
35 int err; 39 int err;
36 40
37 err = litmus->allocate_lock(&lock, type, arg); 41 err = litmus->allocate_lock(&lock, type, arg);
38 if (err == 0) 42 if (err == 0) {
43 lock->id = atomic_add_return(1, &lock_id);
39 *obj_ref = lock; 44 *obj_ref = lock;
45 }
40 return err; 46 return err;
41} 47}
42 48
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 92c5d1b26a13..3606cd7ffae7 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -6,6 +6,8 @@
6/* The rescheduling state of each processor. 6/* The rescheduling state of each processor.
7 */ 7 */
8DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); 8DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
9#define TRACE_TASK(t, fmt, args...)
10#define TRACE(fmt, args...)
9 11
10void sched_state_will_schedule(struct task_struct* tsk) 12void sched_state_will_schedule(struct task_struct* tsk)
11{ 13{
@@ -30,6 +32,7 @@ void sched_state_will_schedule(struct task_struct* tsk)
30 /* /\* Litmus tasks should never be subject to a remote */ 32 /* /\* Litmus tasks should never be subject to a remote */
31 /* * set_tsk_need_resched(). *\/ */ 33 /* * set_tsk_need_resched(). *\/ */
32 /* BUG_ON(is_realtime(tsk)); */ 34 /* BUG_ON(is_realtime(tsk)); */
35
33#ifdef CONFIG_PREEMPT_STATE_TRACE 36#ifdef CONFIG_PREEMPT_STATE_TRACE
34 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", 37 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
35 __builtin_return_address(0)); 38 __builtin_return_address(0));
diff --git a/litmus/rm_common.c b/litmus/rm_common.c
new file mode 100644
index 000000000000..f608a084d3b8
--- /dev/null
+++ b/litmus/rm_common.c
@@ -0,0 +1,91 @@
1/*
2 * kernel/rm_common.c
3 *
4 * Common functions for RM based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/rm_common.h>
16
17/* rm_higher_prio - returns true if first has a higher RM priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int rm_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (first && first == second) {
30 TRACE_TASK(first,
31 "WARNING: pointless rm priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40 return !is_realtime(second_task) ||
41
42 /* is the deadline of the first task earlier?
43 * Then it has higher priority.
44 */
45 lt_before(get_rt_period(first_task), get_rt_period(second_task)) ||
46
47 /* Do we have a deadline tie?
48 * Then break by PID.
49 */
50 (get_rt_period(first_task) == get_rt_period(second_task) &&
51 (first_task->pid < second_task->pid ||
52
53 /* If the PIDs are the same then the task with the inherited
54 * priority wins.
55 */
56 (first_task->pid == second_task->pid &&
57 !second->rt_param.inh_task)));
58}
59
60int rm_ready_order(struct bheap_node* a, struct bheap_node* b)
61{
62 return rm_higher_prio(bheap2task(a), bheap2task(b));
63}
64
65void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
66 release_jobs_t release)
67{
68 rt_domain_init(rt, rm_ready_order, resched, release);
69}
70
71/* need_to_preempt - check whether the task t needs to be preempted
72 * call only with irqs disabled and with ready_lock acquired
73 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
74 */
75int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
76{
77 /* we need the read lock for rm_ready_queue */
78 /* no need to preempt if there is nothing pending */
79 if (!__jobs_pending(rt))
80 return 0;
81 /* we need to reschedule if t doesn't exist */
82 if (!t)
83 return 1;
84
85 /* NOTE: We cannot check for non-preemptibility since we
86 * don't know what address space we're currently in.
87 */
88
89 /* make sure to get non-rt stuff out of the way */
90 return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t);
91}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index d0b796611bea..c63bd0303916 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -12,17 +12,15 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13 13
14#include <litmus/litmus.h> 14#include <litmus/litmus.h>
15#include <litmus/event_group.h>
15#include <litmus/sched_plugin.h> 16#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h> 17#include <litmus/sched_trace.h>
17
18#include <litmus/rt_domain.h> 18#include <litmus/rt_domain.h>
19
20#include <litmus/trace.h> 19#include <litmus/trace.h>
21
22#include <litmus/bheap.h> 20#include <litmus/bheap.h>
23 21
24/* Uncomment when debugging timer races... */ 22/* Uncomment when debugging timer races... */
25#if 0 23#if 1
26#define VTRACE_TASK TRACE_TASK 24#define VTRACE_TASK TRACE_TASK
27#define VTRACE TRACE 25#define VTRACE TRACE
28#else 26#else
@@ -51,36 +49,51 @@ static unsigned int time2slot(lt_t time)
51 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS; 49 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
52} 50}
53 51
54static enum hrtimer_restart on_release_timer(struct hrtimer *timer) 52static void do_release(struct release_heap *rh)
55{ 53{
56 unsigned long flags; 54 unsigned long flags;
57 struct release_heap* rh; 55
58 rh = container_of(timer, struct release_heap, timer); 56 if (CRIT_LEVEL_B == rh->dom->level) {
57 TS_LVLB_RELEASE_START;
58 } else {
59 TS_LVLC_RELEASE_START;
60 }
59 61
60 TS_RELEASE_LATENCY(rh->release_time); 62 TS_RELEASE_LATENCY(rh->release_time);
61 63
62 VTRACE("on_release_timer(0x%p) starts.\n", timer); 64 VTRACE("on_release_timer starts.\n");
63 65
64 TS_RELEASE_START; 66 TS_RELEASE_START;
65 67
66
67 raw_spin_lock_irqsave(&rh->dom->release_lock, flags); 68 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
68 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); 69 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
69 /* remove from release queue */ 70 /* remove from release queue */
70 list_del(&rh->list); 71 list_del_init(&rh->list);
71 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags); 72 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
72 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); 73 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
73 74
74 /* call release callback */ 75 /* call release callback */
75 rh->dom->release_jobs(rh->dom, &rh->heap); 76 rh->dom->release_jobs(rh->dom, &rh->heap);
76 /* WARNING: rh can be referenced from other CPUs from now on. */
77
78 TS_RELEASE_END;
79 77
80 VTRACE("on_release_timer(0x%p) ends.\n", timer); 78 if (CRIT_LEVEL_B == rh->dom->level) {
79 TS_LVLB_RELEASE_END;
80 } else {
81 TS_LVLC_RELEASE_END;
82 }
83}
81 84
82 return HRTIMER_NORESTART; 85#ifdef CONFIG_MERGE_TIMERS
86static void on_release(struct rt_event *e)
87{
88 do_release(container_of(e, struct release_heap, event));
83} 89}
90#else
91static enum hrtimer_restart on_release(struct hrtimer *timer)
92{
93 do_release(container_of(timer, struct release_heap, timer));
94 return HRTIMER_NORESTART;
95}
96#endif
84 97
85/* allocated in litmus.c */ 98/* allocated in litmus.c */
86struct kmem_cache * release_heap_cache; 99struct kmem_cache * release_heap_cache;
@@ -88,19 +101,35 @@ struct kmem_cache * release_heap_cache;
88struct release_heap* release_heap_alloc(int gfp_flags) 101struct release_heap* release_heap_alloc(int gfp_flags)
89{ 102{
90 struct release_heap* rh; 103 struct release_heap* rh;
91 rh= kmem_cache_alloc(release_heap_cache, gfp_flags); 104 rh = kmem_cache_alloc(release_heap_cache, gfp_flags);
92 if (rh) { 105 if (rh) {
106#ifdef CONFIG_MERGE_TIMERS
107 init_event(&rh->event, 0, on_release,
108 event_list_alloc(GFP_ATOMIC));
109#else
93 /* initialize timer */ 110 /* initialize timer */
94 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 111 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
95 rh->timer.function = on_release_timer; 112 rh->timer.function = on_release;
113#endif
96 } 114 }
97 return rh; 115 return rh;
98} 116}
99 117
118#ifdef CONFIG_MERGE_TIMERS
119extern struct kmem_cache *event_list_cache;
120#endif
121
100void release_heap_free(struct release_heap* rh) 122void release_heap_free(struct release_heap* rh)
101{ 123{
102 /* make sure timer is no longer in use */ 124 /* make sure timer is no longer in use */
125#ifdef CONFIG_MERGE_TIMERS
126 if (rh->dom) {
127 cancel_event(&rh->event);
128 kmem_cache_free(event_list_cache, rh->event.event_list);
129 }
130#else
103 hrtimer_cancel(&rh->timer); 131 hrtimer_cancel(&rh->timer);
132#endif
104 kmem_cache_free(release_heap_cache, rh); 133 kmem_cache_free(release_heap_cache, rh);
105} 134}
106 135
@@ -149,13 +178,17 @@ static struct release_heap* get_release_heap(rt_domain_t *rt,
149 return heap; 178 return heap;
150} 179}
151 180
152static void reinit_release_heap(struct task_struct* t) 181static void reinit_release_heap(rt_domain_t *rt, struct task_struct* t)
153{ 182{
154 struct release_heap* rh; 183 struct release_heap* rh;
155 184
156 /* use pre-allocated release heap */ 185 /* use pre-allocated release heap */
157 rh = tsk_rt(t)->rel_heap; 186 rh = tsk_rt(t)->rel_heap;
158 187
188#ifdef CONFIG_MERGE_TIMERS
189 rh->event.prio = rt->prio;
190 cancel_event(&rh->event);
191#else
159 /* Make sure it is safe to use. The timer callback could still 192 /* Make sure it is safe to use. The timer callback could still
160 * be executing on another CPU; hrtimer_cancel() will wait 193 * be executing on another CPU; hrtimer_cancel() will wait
161 * until the timer callback has completed. However, under no 194 * until the timer callback has completed. However, under no
@@ -167,13 +200,50 @@ static void reinit_release_heap(struct task_struct* t)
167 */ 200 */
168 BUG_ON(hrtimer_cancel(&rh->timer)); 201 BUG_ON(hrtimer_cancel(&rh->timer));
169 202
203#ifdef CONFIG_RELEASE_MASTER
204 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
205#endif
206#endif
170 /* initialize */ 207 /* initialize */
171 bheap_init(&rh->heap); 208 bheap_init(&rh->heap);
209
210}
211
172#ifdef CONFIG_RELEASE_MASTER 212#ifdef CONFIG_RELEASE_MASTER
173 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); 213static void arm_release_timer_on(struct release_heap *rh, int target_cpu)
214#else
215static void arm_release_timer(struct release_heap *rh)
216#endif
217{
218#ifdef CONFIG_MERGE_TIMERS
219 add_event(rh->dom->event_group, &rh->event, rh->release_time);
220#else
221 VTRACE("arming timer 0x%p\n", &rh->timer);
222 /* we cannot arm the timer using hrtimer_start()
223 * as it may deadlock on rq->lock
224 * PINNED mode is ok on both local and remote CPU
225 */
226
227#ifdef CONFIG_RELEASE_MASTER
228 if (rh->dom->release_master == NO_CPU && target_cpu == NO_CPU)
229#endif
230 __hrtimer_start_range_ns(&rh->timer,
231 ns_to_ktime(rh->release_time),
232 0, HRTIMER_MODE_ABS_PINNED, 0);
233#ifdef CONFIG_RELEASE_MASTER
234 else
235 hrtimer_start_on(/* target_cpu overrides release master */
236 (target_cpu != NO_CPU ?
237 target_cpu : rh->dom->release_master),
238 &rh->info, &rh->timer,
239 ns_to_ktime(rh->release_time),
240 HRTIMER_MODE_ABS_PINNED);
241#endif
174#endif 242#endif
175} 243}
176/* arm_release_timer() - start local release timer or trigger 244
245
246/* setup_release() - start local release timer or trigger
177 * remote timer (pull timer) 247 * remote timer (pull timer)
178 * 248 *
179 * Called by add_release() with: 249 * Called by add_release() with:
@@ -181,10 +251,10 @@ static void reinit_release_heap(struct task_struct* t)
181 * - IRQ disabled 251 * - IRQ disabled
182 */ 252 */
183#ifdef CONFIG_RELEASE_MASTER 253#ifdef CONFIG_RELEASE_MASTER
184#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU) 254#define setup_release(t) setup_release_on((t), NO_CPU)
185static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu) 255static void setup_release_on(rt_domain_t *_rt , int target_cpu)
186#else 256#else
187static void arm_release_timer(rt_domain_t *_rt) 257static void setup_release(rt_domain_t *_rt)
188#endif 258#endif
189{ 259{
190 rt_domain_t *rt = _rt; 260 rt_domain_t *rt = _rt;
@@ -193,14 +263,14 @@ static void arm_release_timer(rt_domain_t *_rt)
193 struct task_struct* t; 263 struct task_struct* t;
194 struct release_heap* rh; 264 struct release_heap* rh;
195 265
196 VTRACE("arm_release_timer() at %llu\n", litmus_clock()); 266 VTRACE("setup_release() at %llu\n", litmus_clock());
197 list_replace_init(&rt->tobe_released, &list); 267 list_replace_init(&rt->tobe_released, &list);
198 268
199 list_for_each_safe(pos, safe, &list) { 269 list_for_each_safe(pos, safe, &list) {
200 /* pick task of work list */ 270 /* pick task of work list */
201 t = list_entry(pos, struct task_struct, rt_param.list); 271 t = list_entry(pos, struct task_struct, rt_param.list);
202 sched_trace_task_release(t); 272 sched_trace_task_release(t);
203 list_del(pos); 273 list_del_init(pos);
204 274
205 /* put into release heap while holding release_lock */ 275 /* put into release heap while holding release_lock */
206 raw_spin_lock(&rt->release_lock); 276 raw_spin_lock(&rt->release_lock);
@@ -213,7 +283,7 @@ static void arm_release_timer(rt_domain_t *_rt)
213 VTRACE_TASK(t, "Dropped release_lock 0x%p\n", 283 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
214 &rt->release_lock); 284 &rt->release_lock);
215 285
216 reinit_release_heap(t); 286 reinit_release_heap(rt, t);
217 VTRACE_TASK(t, "release_heap ready\n"); 287 VTRACE_TASK(t, "release_heap ready\n");
218 288
219 raw_spin_lock(&rt->release_lock); 289 raw_spin_lock(&rt->release_lock);
@@ -223,7 +293,7 @@ static void arm_release_timer(rt_domain_t *_rt)
223 rh = get_release_heap(rt, t, 1); 293 rh = get_release_heap(rt, t, 1);
224 } 294 }
225 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); 295 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
226 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n"); 296 VTRACE_TASK(t, "setup_release(): added to release heap\n");
227 297
228 raw_spin_unlock(&rt->release_lock); 298 raw_spin_unlock(&rt->release_lock);
229 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); 299 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
@@ -233,39 +303,19 @@ static void arm_release_timer(rt_domain_t *_rt)
233 * this release_heap anyway). 303 * this release_heap anyway).
234 */ 304 */
235 if (rh == tsk_rt(t)->rel_heap) { 305 if (rh == tsk_rt(t)->rel_heap) {
236 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
237 /* we cannot arm the timer using hrtimer_start()
238 * as it may deadlock on rq->lock
239 *
240 * PINNED mode is ok on both local and remote CPU
241 */
242#ifdef CONFIG_RELEASE_MASTER 306#ifdef CONFIG_RELEASE_MASTER
243 if (rt->release_master == NO_CPU && 307 arm_release_timer_on(rh, target_cpu);
244 target_cpu == NO_CPU) 308#else
245#endif 309 arm_release_timer(rh);
246 __hrtimer_start_range_ns(&rh->timer,
247 ns_to_ktime(rh->release_time),
248 0, HRTIMER_MODE_ABS_PINNED, 0);
249#ifdef CONFIG_RELEASE_MASTER
250 else
251 hrtimer_start_on(
252 /* target_cpu overrides release master */
253 (target_cpu != NO_CPU ?
254 target_cpu : rt->release_master),
255 &rh->info, &rh->timer,
256 ns_to_ktime(rh->release_time),
257 HRTIMER_MODE_ABS_PINNED);
258#endif 310#endif
259 } else 311 }
260 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
261 } 312 }
262} 313}
263 314
264void rt_domain_init(rt_domain_t *rt, 315void rt_domain_init(rt_domain_t *rt,
265 bheap_prio_t order, 316 bheap_prio_t order,
266 check_resched_needed_t check, 317 check_resched_needed_t check,
267 release_jobs_t release 318 release_jobs_t release)
268 )
269{ 319{
270 int i; 320 int i;
271 321
@@ -277,7 +327,7 @@ void rt_domain_init(rt_domain_t *rt,
277 if (!order) 327 if (!order)
278 order = dummy_order; 328 order = dummy_order;
279 329
280#ifdef CONFIG_RELEASE_MASTER 330#if defined(CONFIG_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
281 rt->release_master = NO_CPU; 331 rt->release_master = NO_CPU;
282#endif 332#endif
283 333
@@ -300,14 +350,13 @@ void rt_domain_init(rt_domain_t *rt,
300 */ 350 */
301void __add_ready(rt_domain_t* rt, struct task_struct *new) 351void __add_ready(rt_domain_t* rt, struct task_struct *new)
302{ 352{
303 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu " 353 VTRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
304 "to ready queue at %llu\n", 354 new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
305 new->comm, new->pid, 355 get_release(new), litmus_clock());
306 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
307 get_release(new), litmus_clock());
308 356
309 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); 357 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
310 358
359 new->rt_param.domain = rt;
311 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node); 360 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
312 rt->check_resched(rt); 361 rt->check_resched(rt);
313} 362}
@@ -326,7 +375,7 @@ void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
326void __add_release_on(rt_domain_t* rt, struct task_struct *task, 375void __add_release_on(rt_domain_t* rt, struct task_struct *task,
327 int target_cpu) 376 int target_cpu)
328{ 377{
329 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n", 378 VTRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
330 get_release(task), target_cpu); 379 get_release(task), target_cpu);
331 list_add(&tsk_rt(task)->list, &rt->tobe_released); 380 list_add(&tsk_rt(task)->list, &rt->tobe_released);
332 task->rt_param.domain = rt; 381 task->rt_param.domain = rt;
@@ -334,7 +383,7 @@ void __add_release_on(rt_domain_t* rt, struct task_struct *task,
334 /* start release timer */ 383 /* start release timer */
335 TS_SCHED2_START(task); 384 TS_SCHED2_START(task);
336 385
337 arm_release_timer_on(rt, target_cpu); 386 setup_release_on(rt, target_cpu);
338 387
339 TS_SCHED2_END(task); 388 TS_SCHED2_END(task);
340} 389}
@@ -345,15 +394,88 @@ void __add_release_on(rt_domain_t* rt, struct task_struct *task,
345 */ 394 */
346void __add_release(rt_domain_t* rt, struct task_struct *task) 395void __add_release(rt_domain_t* rt, struct task_struct *task)
347{ 396{
348 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); 397 VTRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
349 list_add(&tsk_rt(task)->list, &rt->tobe_released); 398 list_add(&tsk_rt(task)->list, &rt->tobe_released);
350 task->rt_param.domain = rt; 399 task->rt_param.domain = rt;
351 400
352 /* start release timer */ 401 /* start release timer */
353 TS_SCHED2_START(task); 402 TS_SCHED2_START(task);
354 403
355 arm_release_timer(rt); 404 setup_release(rt);
356 405
357 TS_SCHED2_END(task); 406 TS_SCHED2_END(task);
358} 407}
359 408
409/******************************************************************************
410 * domain_t wrapper
411 ******************************************************************************/
412
413/* pd_requeue - calls underlying rt_domain add methods.
414 * If the task is not yet released, it is inserted into the rt_domain
415 * ready queue. Otherwise, it is queued for release.
416 *
417 * Assumes the caller already holds dom->lock.
418 */
419static void pd_requeue(domain_t *dom, struct task_struct *task)
420{
421 rt_domain_t *domain = (rt_domain_t*)dom->data;
422
423 TRACE_TASK(task, "Requeueing\n");
424 BUG_ON(!task || !is_realtime(task));
425 BUG_ON(is_queued(task));
426 BUG_ON(get_task_domain(task) != dom);
427
428 if (is_released(task, litmus_clock())) {
429 __add_ready(domain, task);
430 VTRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
431 task->comm, task->pid, get_exec_cost(task), get_rt_period(task),
432 get_release(task), litmus_clock());
433 } else {
434 /* task has to wait for next release */
435 VTRACE_TASK(task, "add release(), rel=%llu\n", get_release(task));
436 add_release(domain, task);
437 }
438
439}
440
441/* pd_take_ready - removes and returns the next ready task from the rt_domain
442 *
443 * Assumes the caller already holds dom->lock.
444 */
445static struct task_struct* pd_take_ready(domain_t *dom)
446{
447 return __take_ready((rt_domain_t*)dom->data);
448 }
449
450/* pd_peek_ready - returns the head of the rt_domain ready queue
451 *
452 * Assumes the caller already holds dom->lock.
453 */
454static struct task_struct* pd_peek_ready(domain_t *dom)
455{
456 return __next_ready((rt_domain_t*)dom->data);
457}
458
459static void pd_remove(domain_t *dom, struct task_struct *task)
460{
461 if (is_queued(task))
462 remove((rt_domain_t*)dom->data, task);
463}
464
465/* pd_domain_init - create a generic domain wrapper for an rt_domain
466 */
467void pd_domain_init(domain_t *dom,
468 rt_domain_t *domain,
469 bheap_prio_t order,
470 check_resched_needed_t check,
471 release_jobs_t release,
472 preempt_needed_t preempt_needed,
473 task_prio_t priority)
474{
475 rt_domain_init(domain, order, check, release);
476 domain_init(dom, &domain->ready_lock,
477 pd_requeue, pd_peek_ready, pd_take_ready,
478 preempt_needed, priority);
479 dom->remove = pd_remove;
480 dom->data = domain;
481}
diff --git a/litmus/rt_server.c b/litmus/rt_server.c
new file mode 100644
index 000000000000..74d7c7b0f81a
--- /dev/null
+++ b/litmus/rt_server.c
@@ -0,0 +1,23 @@
1#include <litmus/rt_server.h>
2
3static void default_server_update(struct rt_server *srv)
4{
5}
6
7void init_rt_server(struct rt_server *server,
8 int sid, int cpu, rt_domain_t *domain,
9 need_preempt_t need_preempt,
10 server_update_t update)
11{
12 if (!need_preempt)
13 BUG_ON(1);
14
15 server->need_preempt = need_preempt;
16 server->update = (update) ? update : default_server_update;
17
18 server->sid = sid;
19 server->cpu = cpu;
20 server->linked = NULL;
21 server->domain = domain;
22 server->running = 0;
23}
diff --git a/litmus/sched_color.c b/litmus/sched_color.c
new file mode 100644
index 000000000000..66ce40fd1b57
--- /dev/null
+++ b/litmus/sched_color.c
@@ -0,0 +1,889 @@
1#include <linux/percpu.h>
2#include <linux/sched.h>
3#include <linux/list.h>
4#include <linux/spinlock.h>
5#include <linux/module.h>
6#include <linux/slab.h>
7
8#include <litmus/litmus.h>
9#include <litmus/jobs.h>
10#include <litmus/preempt.h>
11#include <litmus/sched_plugin.h>
12#include <litmus/rm_common.h>
13#include <litmus/sched_trace.h>
14#include <litmus/color.h>
15#include <litmus/fifo_common.h>
16#include <litmus/budget.h>
17#include <litmus/rt_server.h>
18#include <litmus/dgl.h>
19
20/**
21 * @rt_server Common server functionality.
22 * @task Task used to schedule server.
23 * @timer Budget enforcement for @task
24 * @start_time If set, time at which server began running.
25 */
26struct fifo_server {
27 struct rt_server server;
28 struct task_struct* task;
29 struct enforcement_timer timer;
30 lt_t start_time;
31};
32
33/**
34 * @server Common server functionality.
35 * @rm_domain PRM domain.
36 * @scheduled Task physically running on CPU.
37 * @fifo_server Server partitioned to this CPU.
38 */
39struct cpu_entry {
40 struct rt_server server;
41 rt_domain_t rm_domain;
42 struct task_struct* scheduled;
43 struct fifo_server fifo_server;
44 struct hrtimer chunk_timer;
45};
46
47DEFINE_PER_CPU(struct cpu_entry, color_cpus);
48
49static rt_domain_t fifo_domain;
50static raw_spinlock_t fifo_lock;
51
52static struct dgl group_lock;
53static raw_spinlock_t dgl_lock;
54
55#define local_entry (&__get_cpu_var(color_cpus))
56#define remote_entry(cpu) (&per_cpu(color_cpus, cpu))
57#define task_entry(task) remote_entry(get_partition(task))
58#define task_fserver(task) (&task_entry(task)->fifo_server.server)
59#define entry_lock(entry) (&(entry)->rm_domain.ready_lock)
60
61
62#define task_dom(entry, task) (is_be(task) ? &fifo_domain : &entry->rm_domain)
63#define task_lock(entry, task) (is_be(task) ? &fifo_lock : entry_lock(entry))
64#define is_fifo_server(s) ((s)->sid > num_online_cpus())
65#define lock_if(lock, cond) do { if (cond) raw_spin_lock(lock);} while(0)
66#define unlock_if(lock, cond) do { if (cond) raw_spin_unlock(lock);} while(0)
67
68#ifdef CONFIG_NP_SECTION
69#define has_resources(t, c) (tsk_rt(t)->req == group_lock.acquired[c])
70#else
71#define has_resources(t, c) (1)
72#endif
73
74/*
75 * Requeue onto domain's release or ready queue based on task state.
76 */
77static void requeue(rt_domain_t *dom, struct task_struct* t)
78{
79 if (is_server(t) && !tsk_rt(t)->present)
80 /* Remove stopped server from the system */
81 return;
82
83 TRACE_TASK(t, "Requeueing\n");
84 if (is_queued(t)) {
85 TRACE_TASK(t, "Already queued!\n");
86 return;
87 }
88
89 set_rt_flags(t, RT_F_RUNNING);
90 if (is_released(t, litmus_clock()))
91 __add_ready(dom, t);
92 else
93 add_release(dom, t);
94}
95
96enum hrtimer_restart chunk_fire(struct hrtimer *timer)
97{
98 unsigned long flags;
99 local_irq_save(flags);
100 TRACE("Chunk timer fired.\n");
101 litmus_reschedule_local();
102 local_irq_restore(flags);
103 return HRTIMER_NORESTART;
104}
105
106void chunk_arm(struct cpu_entry *entry)
107{
108 unsigned long fire;
109 if (color_chunk) {
110 fire = litmus_clock() + color_chunk;
111 TRACE("Arming chunk timer for %llu\n", fire);
112 __hrtimer_start_range_ns(&entry->chunk_timer,
113 ns_to_ktime(fire), 0,
114 HRTIMER_MODE_ABS_PINNED, 0);
115 }
116}
117
118void chunk_cancel(struct cpu_entry *entry)
119{
120 TRACE("Cancelling chunk timer\n");
121 hrtimer_try_to_cancel(&entry->chunk_timer);
122}
123
124/*
125 * Relinquish resources held by @t (or its children).
126 */
127static void release_resources(struct task_struct *t)
128{
129 struct task_struct *sched;
130#ifdef CONFIG_NP_SECTION
131
132 TRACE_TASK(t, "Releasing resources\n");
133
134 if (is_server(t)) {
135 sched = task_fserver(t)->linked;
136 if (sched)
137 release_resources(sched);
138 } else if (is_kernel_np(t))
139 remove_group_req(&group_lock, tsk_rt(t)->req);
140 take_np(t);
141#endif
142}
143
144/*
145 * Put in requests for resources needed by @t. If @t is a server, this will
146 * set @t's np flag to reflect resources held by @t's children.
147 */
148static void acquire_resources(struct task_struct *t)
149{
150 int cpu;
151 struct rt_server *server;
152 struct task_struct *sched;
153
154#ifdef CONFIG_NP_SECTION
155 /* Can't acquire resources if t is not running */
156 BUG_ON(!get_task_server(t));
157
158 if (is_kernel_np(t)) {
159 TRACE_TASK(t, "Already contending for resources\n");
160 return;
161 }
162 cpu = get_task_server(t)->cpu;
163
164 if (is_server(t)) {
165 server = task_fserver(t);
166 sched = server->linked;
167
168 /* Happens when server is booted off on completion or
169 * has just completed executing a task.
170 */
171 if (sched && !is_kernel_np(sched))
172 acquire_resources(sched);
173
174 /* Become np if there is a running task */
175 if (sched && has_resources(sched, cpu)) {
176 TRACE_TASK(t, "Running task with resource\n");
177 make_np(t);
178 } else {
179 TRACE_TASK(t, "Running no resources\n");
180 take_np(t);
181 }
182 } else {
183 TRACE_TASK(t, "Acquiring resources\n");
184 if (!has_resources(t, cpu))
185 add_group_req(&group_lock, tsk_rt(t)->req, cpu);
186 make_np(t);
187 }
188#endif
189}
190
191/*
192 * Stop logically running the currently linked task.
193 */
194static void unlink(struct rt_server *server)
195{
196 BUG_ON(!server->linked);
197
198 if (is_server(server->linked))
199 task_fserver(server->linked)->running = 0;
200
201
202 sched_trace_server_switch_away(server->sid, 0,
203 server->linked->pid,
204 get_rt_job(server->linked));
205 TRACE_TASK(server->linked, "No longer run by server %d\n", server->sid);
206
207 raw_spin_lock(&dgl_lock);
208 release_resources(server->linked);
209 raw_spin_unlock(&dgl_lock);
210
211 get_task_server(server->linked) = NULL;
212 server->linked = NULL;
213}
214
215static struct task_struct* schedule_server(struct rt_server *server);
216
217/*
218 * Logically run @task.
219 */
220static void link(struct rt_server *server, struct task_struct *task)
221{
222 struct rt_server *tserv;
223
224 BUG_ON(server->linked);
225 BUG_ON(!server->running);
226 BUG_ON(is_kernel_np(task));
227
228 TRACE_TASK(task, "Run by server %d\n", server->sid);
229
230 if (is_server(task)) {
231 tserv = task_fserver(task);
232 tserv->running = 1;
233 schedule_server(tserv);
234 }
235
236 server->linked = task;
237 get_task_server(task) = server;
238
239 sched_trace_server_switch_to(server->sid, 0,
240 task->pid, get_rt_job(task));
241}
242
243/*
244 * Triggers preemption on first FIFO server which is running NULL.
245 */
246static void check_for_fifo_preempt(void)
247{
248 int ret = 0, cpu;
249 struct cpu_entry *entry;
250 struct rt_server *cpu_server, *fifo_server;
251
252 TRACE("Checking for FIFO preempt\n");
253
254 for_each_online_cpu(cpu) {
255 entry = remote_entry(cpu);
256 cpu_server = &entry->server;
257 fifo_server = &entry->fifo_server.server;
258
259 raw_spin_lock(entry_lock(entry));
260 raw_spin_lock(&fifo_lock);
261
262 if (cpu_server->linked && is_server(cpu_server->linked) &&
263 !fifo_server->linked) {
264 litmus_reschedule(cpu);
265 ret = 1;
266 }
267
268 raw_spin_unlock(&fifo_lock);
269 raw_spin_unlock(entry_lock(entry));
270
271 if (ret)
272 break;
273 }
274}
275
276/*
277 * Rejoin a task into the system.
278 */
279static void job_arrival(struct task_struct *t)
280{
281 rt_domain_t *dom = task_dom(task_entry(t), t);
282
283 lock_if(&fifo_lock, is_be(t));
284 requeue(dom, t);
285 unlock_if(&fifo_lock, is_be(t));
286}
287
288/*
289 * Complete job for task linked to @server.
290 */
291static void job_completion(struct rt_server *server)
292{
293 struct task_struct *t = server->linked;
294 lt_t et, now = litmus_clock();
295
296 TRACE_TASK(t, "Job completed\n");
297 if (is_server(t))
298 sched_trace_server_completion(t->pid, get_rt_job(t));
299 else
300 sched_trace_task_completion(t, 0);
301
302 if (1 < get_rt_job(t)) {
303 /* our releases happen at the second job */
304 et = get_exec_time(t);
305 if (et > tsk_rt(t)->max_exec_time)
306 tsk_rt(t)->max_exec_time = et;
307 }
308
309 if (is_tardy(t, now)) {
310 lt_t miss = now - get_deadline(t);
311 ++tsk_rt(t)->missed;
312 tsk_rt(t)->total_tardy += miss;
313 if (lt_before(tsk_rt(t)->max_tardy, miss)) {
314 tsk_rt(t)->max_tardy = miss;
315 }
316 }
317
318 unlink(server);
319 set_rt_flags(t, RT_F_SLEEP);
320 prepare_for_next_period(t);
321
322 if (is_server(t))
323 sched_trace_server_release(t->pid, get_rt_job(t),
324 get_release(t), get_deadline(t));
325 else
326 sched_trace_task_release(t);
327
328 if (is_running(t))
329 job_arrival(t);
330}
331
332/*
333 * Update @server state to reflect task's state.
334 */
335static void update_task(struct rt_server *server)
336{
337 int oot, sleep, block, np, chunked;
338 struct task_struct *t = server->linked;
339 lt_t last = tsk_rt(t)->last_exec_time;
340
341 block = !is_running(t);
342 oot = budget_enforced(t) && budget_exhausted(t);
343 np = is_kernel_np(t);
344 sleep = get_rt_flags(t) == RT_F_SLEEP;
345
346 chunked = color_chunk && last && (lt_after(litmus_clock() - last, color_chunk));
347
348 TRACE_TASK(t, "Updating task, block: %d, oot: %d, np: %d, sleep: %d, chunk: %d\n",
349 block, oot, np, sleep, chunked);
350
351 if (block)
352 unlink(server);
353 else if (oot || sleep)
354 job_completion(server);
355 else if (chunked) {
356 unlink(server);
357 job_arrival(t);
358 }
359}
360
361/*
362 * Link next task for @server.
363 */
364static struct task_struct* schedule_server(struct rt_server *server)
365{
366 struct task_struct *next;
367 struct rt_server *lserver;
368
369 TRACE("Scheduling server %d\n", server->sid);
370
371 if (server->linked) {
372 if (is_server(server->linked)) {
373 lserver = task_fserver(server->linked);
374 lserver->update(lserver);
375 }
376 update_task(server);
377 }
378
379 next = server->linked;
380 lock_if(&fifo_lock, is_fifo_server(server));
381 if ((!next || !is_np(next)) &&
382 server->need_preempt(server->domain, next)) {
383 if (next) {
384 TRACE_TASK(next, "Preempted\n");
385 unlink(server);
386 requeue(server->domain, next);
387 }
388 next = __take_ready(server->domain);
389 link(server, next);
390 }
391 unlock_if(&fifo_lock, is_fifo_server(server));
392
393 return next;
394}
395
396/*
397 * Update server state, including picking next running task and incrementing
398 * server execution time.
399 */
400static void fifo_update(struct rt_server *server)
401{
402 lt_t delta;
403 struct fifo_server *fserver;
404
405 fserver = container_of(server, struct fifo_server, server);
406 TRACE_TASK(fserver->task, "Updating FIFO server\n");
407
408 if (!server->linked || has_resources(server->linked, server->cpu)) {
409 /* Running here means linked to a parent server */
410 /* BUG_ON(!server->running); */
411
412 /* Stop executing */
413 if (fserver->start_time) {
414 delta = litmus_clock() - fserver->start_time;
415 tsk_rt(fserver->task)->job_params.exec_time += delta;
416 fserver->start_time = 0;
417 cancel_enforcement_timer(&fserver->timer);
418 } else {
419 /* Server is linked, but not executing */
420 /* BUG_ON(fserver->timer.armed); */
421 }
422
423 /* Calculate next task */
424 schedule_server(&fserver->server);
425
426 /* Reserve needed resources */
427 raw_spin_lock(&dgl_lock);
428 acquire_resources(fserver->task);
429 raw_spin_unlock(&dgl_lock);
430 }
431}
432
433/*
434 * Triggers preemption on rm-scheduled "linked" field only.
435 */
436static void color_rm_release(rt_domain_t *rm, struct bheap *tasks)
437{
438 unsigned long flags;
439 struct cpu_entry *entry;
440
441 TRACE_TASK(bheap2task(bheap_peek(rm->order, tasks)),
442 "Released set of RM tasks\n");
443
444 entry = container_of(rm, struct cpu_entry, rm_domain);
445 raw_spin_lock_irqsave(entry_lock(entry), flags);
446
447 __merge_ready(rm, tasks);
448
449 if (rm_preemption_needed(rm, entry->server.linked) &&
450 (!entry->server.linked || !is_kernel_np(entry->server.linked))) {
451 litmus_reschedule(entry->server.cpu);
452 }
453
454 raw_spin_unlock_irqrestore(entry_lock(entry), flags);
455}
456
457static void color_fifo_release(rt_domain_t *dom, struct bheap *tasks)
458{
459 unsigned long flags;
460
461 TRACE_TASK(bheap2task(bheap_peek(dom->order, tasks)),
462 "Released set of FIFO tasks\n");
463 local_irq_save(flags);
464
465 raw_spin_lock(&fifo_lock);
466 __merge_ready(dom, tasks);
467 raw_spin_unlock(&fifo_lock);
468
469 check_for_fifo_preempt();
470
471 local_irq_restore(flags);
472}
473
474#define cpu_empty(entry, run) \
475 (!(run) || (is_server(run) && !(entry)->fifo_server.server.linked))
476
477static struct task_struct* color_schedule(struct task_struct *prev)
478{
479 unsigned long flags;
480 int server_running;
481 struct cpu_entry *entry = local_entry;
482 struct task_struct *next, *plink = entry->server.linked;
483
484 TRACE("Reschedule on %d at %llu\n", entry->server.cpu, litmus_clock());
485 BUG_ON(entry->scheduled && entry->scheduled != prev);
486 BUG_ON(entry->scheduled && !is_realtime(prev));
487
488 raw_spin_lock_irqsave(entry_lock(entry), flags);
489
490 if (entry->scheduled && cpu_empty(entry, plink) && is_running(prev)) {
491 TRACE_TASK(prev, "Snuck in on new!\n");
492 job_arrival(entry->scheduled);
493 }
494
495 /* Pick next top-level task */
496 next = schedule_server(&entry->server);
497 /* Schedule hierarchically */
498 server_running = next && is_server(next);
499 if (server_running)
500 next = task_fserver(next)->linked;
501
502 /* Selected tasks must contend for group lock */
503 if (next) {
504 raw_spin_lock(&dgl_lock);
505 acquire_resources(next);
506 if (has_resources(next, entry->server.cpu)) {
507 TRACE_TASK(next, "Has group lock\n");
508 sched_trace_task_resume_on(next, 1);
509 } else {
510 TRACE_TASK(next, "Does not have lock, 0x%p does\n",
511 group_lock.acquired[entry->server.cpu]);
512 if (next != prev)
513 sched_trace_task_block_on(next, 1);
514 next = NULL;
515 server_running = 0;
516 }
517 raw_spin_unlock(&dgl_lock);
518 }
519
520 /* Server is blocked if its running task is blocked. Note that if the
521 * server has no running task, the server will now execute NULL.
522 */
523 if (server_running) {
524 TRACE_TASK(entry->server.linked, "Server running\n");
525 arm_enforcement_timer(&entry->fifo_server.timer,
526 entry->fifo_server.task);
527 entry->fifo_server.start_time = litmus_clock();
528 }
529
530 if (prev) {
531 tsk_rt(prev)->scheduled_on = NO_CPU;
532 tsk_rt(prev)->last_exec_time = 0;
533 chunk_cancel(entry);
534 }
535 if (next) {
536 tsk_rt(next)->scheduled_on = entry->server.cpu;
537 tsk_rt(next)->last_exec_time = litmus_clock();
538 chunk_arm(entry);
539 }
540
541 entry->scheduled = next;
542 sched_state_task_picked();
543
544 raw_spin_unlock_irqrestore(entry_lock(entry), flags);
545
546 return entry->scheduled;
547}
548
549static void color_task_new(struct task_struct *t, int on_rq, int running)
550{
551 int i;
552 unsigned long flags;
553 struct cpu_entry *entry;
554 struct dgl_group_req *req;
555 struct control_page *cp = tsk_rt(t)->ctrl_page;
556 struct color_ctrl_page *ccp = tsk_rt(t)->color_ctrl_page;
557
558
559 TRACE_TASK(t, "New colored task\n");
560 entry = (is_be(t)) ? local_entry : task_entry(t);
561
562 raw_spin_lock_irqsave(entry_lock(entry), flags);
563
564 req = kmalloc(sizeof(*req), GFP_ATOMIC);
565 tsk_rt(t)->req = req;
566 tsk_rt(t)->tot_exec_time = 0;
567 tsk_rt(t)->max_exec_time = 0;
568 tsk_rt(t)->max_tardy = 0;
569 tsk_rt(t)->missed = 0;
570 tsk_rt(t)->total_tardy = 0;
571 tsk_rt(t)->ctrl_page->colors_updated = 1;
572 tsk_rt(t)->last_exec_time = 0;
573
574 release_at(t, litmus_clock());
575
576 /* Fill request */
577 if (cp && ccp && cp->colors_updated) {
578 TRACE_TASK(t, "Initializing group request\n");
579 cp->colors_updated = 0;
580 dgl_group_req_init(&group_lock, req);
581 for (i = 0; ccp->pages[i]; ++i)
582 set_req(&group_lock, req, ccp->colors[i], ccp->pages[i]);
583 } else {
584 TRACE("Oh noz: %p %p %d\n", cp, ccp, ((cp) ? cp->colors_updated : -1));
585 }
586
587 if (running) {
588 /* No need to lock with irqs disabled */
589 TRACE_TASK(t, "Already scheduled on %d\n", entry->server.cpu);
590 BUG_ON(entry->scheduled);
591 entry->scheduled = t;
592 tsk_rt(t)->scheduled_on = entry->server.cpu;
593 } else {
594 job_arrival(t);
595 }
596
597 raw_spin_unlock(entry_lock(entry));
598
599 if (is_be(t))
600 check_for_fifo_preempt();
601 else
602 litmus_reschedule_local();
603
604 local_irq_restore(flags);
605}
606
607static void color_task_wake_up(struct task_struct *task)
608{
609 unsigned long flags;
610 struct cpu_entry* entry = local_entry;
611 int sched;
612 lt_t now = litmus_clock();
613
614 TRACE_TASK(task, "Wake up at %llu\n", now);
615
616 raw_spin_lock_irqsave(entry_lock(entry), flags);
617
618 /* Abuse sporadic model */
619 if (is_tardy(task, now)) {
620 release_at(task, now);
621 sched_trace_task_release(task);
622 }
623
624 sched = (entry->scheduled == task);
625
626 if (!sched)
627 job_arrival(task);
628 else
629 TRACE_TASK(task, "Is already scheduled on %d!\n",
630 entry->scheduled);
631
632 raw_spin_unlock(entry_lock(entry));
633 if (is_be(task))
634 check_for_fifo_preempt();
635 else
636 litmus_reschedule_local();
637
638
639 local_irq_restore(flags);
640}
641
642static void color_task_block(struct task_struct *t)
643{
644 TRACE_TASK(t, "Block at %llu, state=%d\n", litmus_clock(), t->state);
645 BUG_ON(!is_realtime(t));
646 BUG_ON(is_queued(t));
647}
648
649static void color_task_exit(struct task_struct *t)
650{
651 unsigned long flags;
652 struct cpu_entry *entry = task_entry(t);
653 raw_spinlock_t *lock = task_lock(entry, t);
654
655 TRACE_TASK(t, "RIP, now reschedule\n");
656
657 local_irq_save(flags);
658
659 sched_trace_task_exit(t);
660 sched_trace_task_tardy(t);
661
662 /* Remove from scheduler consideration */
663 if (is_queued(t)) {
664 raw_spin_lock(lock);
665 remove(task_dom(entry, t), t);
666 raw_spin_unlock(lock);
667 }
668
669 /* Stop parent server */
670 if (get_task_server(t))
671 unlink(get_task_server(t));
672
673 /* Unschedule running task */
674 if (tsk_rt(t)->scheduled_on != NO_CPU) {
675 entry = remote_entry(tsk_rt(t)->scheduled_on);
676
677 raw_spin_lock(entry_lock(entry));
678
679 tsk_rt(t)->scheduled_on = NO_CPU;
680 entry->scheduled = NULL;
681 litmus_reschedule(entry->server.cpu);
682
683 raw_spin_unlock(entry_lock(entry));
684 }
685
686 /* Remove dgl request from system */
687 raw_spin_lock(&dgl_lock);
688 release_resources(t);
689 raw_spin_unlock(&dgl_lock);
690
691 dgl_group_req_free(tsk_rt(t)->req);
692 kfree(tsk_rt(t)->req);
693
694 local_irq_restore(flags);
695}
696
697/*
698 * Non-be tasks must have migrated to the right CPU.
699 */
700static long color_admit_task(struct task_struct* t)
701{
702 int ret = is_be(t) || task_cpu(t) == get_partition(t) ? 0 : -EINVAL;
703 if (!ret) {
704 printk(KERN_WARNING "Task failed to migrate to CPU %d\n",
705 get_partition(t));
706 }
707 return ret;
708}
709
710/*
711 * Load server parameters.
712 */
713static long color_activate_plugin(void)
714{
715 int cpu, ret = 0;
716 struct rt_task tp;
717 struct task_struct *server_task;
718 struct cpu_entry *entry;
719
720 color_chunk = 0;
721
722 for_each_online_cpu(cpu) {
723 entry = remote_entry(cpu);
724 server_task = entry->fifo_server.task;
725
726 raw_spin_lock(entry_lock(entry));
727
728 ret = color_server_params(cpu, ((unsigned long*)&tp.exec_cost),
729 ((unsigned long*)&tp.period));
730 if (ret) {
731 printk(KERN_WARNING "Uninitialized server for CPU %d\n",
732 entry->server.cpu);
733 goto loop_end;
734 }
735
736 /* Fill rt parameters */
737 tp.phase = 0;
738 tp.cpu = cpu;
739 tp.cls = RT_CLASS_SOFT;
740 tp.budget_policy = PRECISE_ENFORCEMENT;
741 tsk_rt(server_task)->task_params = tp;
742 tsk_rt(server_task)->present = 1;
743
744 entry->scheduled = NULL;
745
746 TRACE_TASK(server_task, "Created server with wcet: %llu, "
747 "period: %llu\n", tp.exec_cost, tp.period);
748
749 loop_end:
750 raw_spin_unlock(entry_lock(entry));
751 }
752
753 return ret;
754}
755
756/*
757 * Mark servers as unused, making future calls to requeue fail.
758 */
759static long color_deactivate_plugin(void)
760{
761 int cpu;
762 struct cpu_entry *entry;
763
764 for_each_online_cpu(cpu) {
765 entry = remote_entry(cpu);
766 if (entry->fifo_server.task) {
767 tsk_rt(entry->fifo_server.task)->present = 0;
768 }
769 }
770 return 0;
771}
772
773/*
774 * Dump container and server parameters for tracing.
775 */
776static void color_release_ts(lt_t time)
777{
778 int cpu, fifo_cid;
779 char fifo_name[TASK_COMM_LEN], cpu_name[TASK_COMM_LEN];
780 struct cpu_entry *entry;
781 struct task_struct *stask;
782
783 strcpy(cpu_name, "CPU");
784 strcpy(fifo_name, "BE");
785
786 fifo_cid = num_online_cpus();
787 trace_litmus_container_param(fifo_cid, fifo_name);
788
789 for_each_online_cpu(cpu) {
790 entry = remote_entry(cpu);
791 trace_litmus_container_param(cpu, cpu_name);
792 trace_litmus_server_param(entry->server.sid, cpu, 0, 0);
793 stask = entry->fifo_server.task;
794 trace_litmus_server_param(stask->pid, fifo_cid,
795 get_exec_cost(stask),
796 get_rt_period(stask));
797
798 /* Make runnable */
799 release_at(stask, time);
800 entry->fifo_server.start_time = 0;
801
802 cancel_enforcement_timer(&entry->fifo_server.timer);
803
804 if (!is_queued(stask))
805 requeue(&entry->rm_domain, stask);
806 }
807}
808
809static struct sched_plugin color_plugin __cacheline_aligned_in_smp = {
810 .plugin_name = "COLOR",
811 .task_new = color_task_new,
812 .complete_job = complete_job,
813 .task_exit = color_task_exit,
814 .schedule = color_schedule,
815 .task_wake_up = color_task_wake_up,
816 .task_block = color_task_block,
817 .admit_task = color_admit_task,
818
819 .release_ts = color_release_ts,
820
821 .activate_plugin = color_activate_plugin,
822 .deactivate_plugin = color_deactivate_plugin,
823};
824
825static int __init init_color(void)
826{
827 int cpu;
828 struct cpu_entry *entry;
829 struct task_struct *server_task;
830 struct fifo_server *fifo_server;
831 struct rt_server *cpu_server;
832
833 for_each_online_cpu(cpu) {
834 entry = remote_entry(cpu);
835 rm_domain_init(&entry->rm_domain, NULL, color_rm_release);
836
837 entry->scheduled = NULL;
838
839 /* Create FIFO server */
840 fifo_server = &entry->fifo_server;
841 init_rt_server(&fifo_server->server,
842 cpu + num_online_cpus() + 1,
843 cpu,
844 &fifo_domain,
845 fifo_preemption_needed, fifo_update);
846
847
848 /* Create task struct for FIFO server */
849 server_task = kmalloc(sizeof(struct task_struct), GFP_ATOMIC);
850 memset(server_task, 0, sizeof(*server_task));
851 server_task->policy = SCHED_LITMUS;
852 strcpy(server_task->comm, "server");
853 server_task->pid = fifo_server->server.sid;
854 fifo_server->task = server_task;
855
856 /* Create rt_params for FIFO server */
857 tsk_rt(server_task)->heap_node = bheap_node_alloc(GFP_ATOMIC);
858 tsk_rt(server_task)->rel_heap = release_heap_alloc(GFP_ATOMIC);
859 bheap_node_init(&tsk_rt(server_task)->heap_node, server_task);
860 tsk_rt(server_task)->is_server = 1;
861
862 /* Create CPU server */
863 cpu_server = &entry->server;
864 init_rt_server(cpu_server, cpu + 1, cpu,
865 &entry->rm_domain, rm_preemption_needed, NULL);
866 cpu_server->running = 1;
867
868 init_enforcement_timer(&fifo_server->timer);
869 hrtimer_init(&entry->chunk_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
870 entry->chunk_timer.function = chunk_fire;
871 }
872
873 fifo_domain_init(&fifo_domain, NULL, color_fifo_release);
874 raw_spin_lock_init(&fifo_lock);
875
876 dgl_init(&group_lock, color_cache_info.nr_colors,
877 color_cache_info.ways);
878 raw_spin_lock_init(&dgl_lock);
879
880 return register_sched_plugin(&color_plugin);
881}
882
883static void exit_color(void)
884{
885 dgl_free(&group_lock);
886}
887
888module_init(init_color);
889module_exit(exit_color);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index c3344b9d288f..4f93d16b4d52 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -796,8 +796,10 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
796 /* check if we need to activate priority inheritance */ 796 /* check if we need to activate priority inheritance */
797 if (edf_higher_prio(t, sem->hp_waiter)) { 797 if (edf_higher_prio(t, sem->hp_waiter)) {
798 sem->hp_waiter = t; 798 sem->hp_waiter = t;
799 if (edf_higher_prio(t, sem->owner)) 799 if (edf_higher_prio(t, sem->owner)) {
800 set_priority_inheritance(sem->owner, sem->hp_waiter); 800 set_priority_inheritance(sem->owner, sem->hp_waiter);
801
802 }
801 } 803 }
802 804
803 TS_LOCK_SUSPEND; 805 TS_LOCK_SUSPEND;
@@ -805,6 +807,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
805 /* release lock before sleeping */ 807 /* release lock before sleeping */
806 spin_unlock_irqrestore(&sem->wait.lock, flags); 808 spin_unlock_irqrestore(&sem->wait.lock, flags);
807 809
810 sched_trace_task_block(t);
811
808 /* We depend on the FIFO order. Thus, we don't need to recheck 812 /* We depend on the FIFO order. Thus, we don't need to recheck
809 * when we wake up; we are guaranteed to have the lock since 813 * when we wake up; we are guaranteed to have the lock since
810 * there is only one wake up per release. 814 * there is only one wake up per release.
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
new file mode 100644
index 000000000000..2f4eb82b2220
--- /dev/null
+++ b/litmus/sched_mc.c
@@ -0,0 +1,1373 @@
1/**
2 * litmus/sched_mc.c
3 *
4 * Implementation of the Mixed Criticality scheduling algorithm.
5 *
6 * (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010)
7 *
8 * Absolute first: relative time spent doing different parts of release
9 * and scheduling overhead needs to be measured and graphed.
10 *
11 * Domain locks should be more fine-grained. There is no reason to hold the
12 * ready-queue lock when adding a task to the release-queue.
13 *
14 * The levels should be converted to linked-lists so that they are more
15 * adaptable and need not be identical on all processors.
16 *
17 * The interaction between remove_from_all and other concurrent operations
18 * should be re-examined. If a job_completion and a preemption happen
19 * simultaneously, a task could be requeued, removed, then requeued again.
20 *
21 * Level-C tasks should be able to swap CPUs a-la GSN-EDF. They should also
22 * try and swap with the last CPU they were on. This could be complicated for
23 * ghost tasks.
24 *
25 * Locking for timer-merging could be infinitely more fine-grained. A second
26 * hash could select a lock to use based on queue slot. This approach might
27 * also help with add_release in rt_domains.
28 *
29 * It should be possible to reserve a CPU for ftdumping.
30 *
31 * The real_deadline business seems sloppy.
32 *
33 * The amount of data in the header file should be cut down. The use of the
34 * header file in general needs to be re-examined.
35 *
36 * The plugin needs to be modified so that it doesn't freeze when it is
37 * deactivated in a VM.
38 *
39 * The locking in check_for_preempt is not fine-grained enough.
40 *
41 * The size of the structures could be smaller. Debugging info might be
42 * excessive as things currently stand.
43 *
44 * The macro can_requeue has been expanded too much. Anything beyond
45 * scheduled_on is a hack!
46 *
47 * Domain names (rt_domain) are still clumsy.
48 *
49 * Should BE be moved into the kernel? This will require benchmarking.
50 */
51
52#include <linux/spinlock.h>
53#include <linux/percpu.h>
54#include <linux/sched.h>
55#include <linux/hrtimer.h>
56#include <linux/slab.h>
57#include <linux/module.h>
58#include <linux/poison.h>
59#include <linux/pid.h>
60
61#include <litmus/litmus.h>
62#include <litmus/trace.h>
63#include <litmus/jobs.h>
64#include <litmus/sched_plugin.h>
65#include <litmus/edf_common.h>
66#include <litmus/sched_trace.h>
67#include <litmus/domain.h>
68#include <litmus/bheap.h>
69#include <litmus/event_group.h>
70#include <litmus/budget.h>
71
72#include <litmus/sched_mc.h>
73#include <litmus/ce_domain.h>
74
75/**
76 * struct cpu_entry - State of a CPU for the entire MC system
77 * @cpu CPU id
78 * @scheduled Task that is physically running
79 * @linked Task that should be running / is logically running
80 * @lock For serialization
81 * @crit_entries Array of CPU state per criticality level
82 * @redir List of redirected work for this CPU.
83 * @redir_lock Lock for @redir.
84 * @event_group Event group for timer merging.
85 */
86struct cpu_entry {
87 int cpu;
88 struct task_struct* scheduled;
89 struct task_struct* will_schedule;
90 struct task_struct* linked;
91 raw_spinlock_t lock;
92 struct crit_entry crit_entries[NUM_CRIT_LEVELS];
93#ifdef CONFIG_PLUGIN_MC_REDIRECT
94 struct list_head redir;
95 raw_spinlock_t redir_lock;
96#endif
97#ifdef CONFIG_MERGE_TIMERS
98 struct event_group *event_group;
99#endif
100};
101
102DEFINE_PER_CPU(struct cpu_entry, cpus);
103#ifdef CONFIG_RELEASE_MASTER
104static int interrupt_cpu;
105#endif
106
107#define domain_data(dom) (container_of(dom, struct domain_data, domain))
108#define is_global(dom) (domain_data(dom)->heap)
109#define is_global_task(t) (is_global(get_task_domain(t)))
110#define can_use(ce) \
111 ((ce)->state == CS_ACTIVE || (ce->state == CS_ACTIVATE))
112#define can_requeue(t) \
113 ((t)->rt_param.linked_on == NO_CPU && /* Not linked anywhere */ \
114 !is_queued(t) && /* Not gonna be linked */ \
115 (!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU))
116#define entry_level(e) \
117 (((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1)
118#define crit_cpu(ce) \
119 (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries))
120#define get_crit_entry_for(cpu, level) (&per_cpu(cpus, cpu).crit_entries[level])
121#define TRACE_ENTRY(e, fmt, args...) \
122 STRACE("P%d, linked=" TS " " fmt, e->cpu, TA(e->linked), ##args)
123#define TRACE_CRIT_ENTRY(ce, fmt, args...) \
124 STRACE("%s P%d, linked=" TS " " fmt, \
125 (ce)->domain->name, crit_cpu(ce)->cpu, TA((ce)->linked), ##args)
126
127static int sid(struct crit_entry *ce)
128{
129 int level = ce->level * num_online_cpus() + crit_cpu(ce)->cpu + 1;
130 BUG_ON(level >= 0);
131 return -level;
132}
133
134/*
135 * Sort CPUs within a global domain's heap.
136 */
137static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b)
138{
139 struct domain *domain;
140 struct crit_entry *first, *second;
141 struct task_struct *first_link, *second_link;
142
143 first = a->value;
144 second = b->value;
145 first_link = first->linked;
146 second_link = second->linked;
147
148 if (first->state == CS_REMOVED || second->state == CS_REMOVED) {
149 /* Removed entries go at the back of the heap */
150 return first->state != CS_REMOVED &&
151 second->state != CS_REMOVED;
152 } else if (!first_link || !second_link) {
153 /* Entry with nothing scheduled is lowest priority */
154 return second_link && !first_link;
155 } else {
156 /* Sort by deadlines of tasks */
157 domain = get_task_domain(first_link);
158 return domain->higher_prio(second_link, first_link);
159 }
160}
161
162/*
163 * Return true if the domain has a higher priority ready task. The @curr
164 * task must belong to the domain.
165 */
166static int mc_preempt_needed(struct domain *dom, struct task_struct* curr)
167{
168 struct task_struct *next = dom->peek_ready(dom);
169 if (!next || !curr) {
170 return next && !curr;
171 } else {
172 BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr));
173 return get_task_domain(next)->higher_prio(next, curr);
174 }
175}
176
177/*
178 * Update crit entry position in a global heap. Caller must hold
179 * @ce's domain lock.
180 */
181static inline void update_crit_position(struct crit_entry *ce)
182{
183 struct bheap *heap;
184 if (is_global(ce->domain)) {
185 heap = domain_data(ce->domain)->heap;
186 BUG_ON(!heap);
187 BUG_ON(!bheap_node_in_heap(ce->node));
188 bheap_delete(cpu_lower_prio, heap, ce->node);
189 bheap_insert(cpu_lower_prio, heap, ce->node);
190 }
191}
192
193/*
194 * Update crit entry position in a global heap if it has been marked
195 * for update. Caller must hold @ce's domain lock.
196 */
197static void fix_crit_position(struct crit_entry *ce)
198{
199 if (is_global(ce->domain)) {
200 if (CS_ACTIVATE == ce->state) {
201 ce->state = CS_ACTIVE;
202 update_crit_position(ce);
203 } else if (CS_REMOVE == ce->state) {
204 ce->state = CS_REMOVED;
205 update_crit_position(ce);
206 }
207 }
208}
209
210/*
211 * Return next CPU which should preempted or NULL if the domain has no
212 * preemptable CPUs. Caller must hold the @dom lock.
213 */
214static inline struct crit_entry* lowest_prio_cpu(struct domain *dom)
215{
216 struct bheap *heap = domain_data(dom)->heap;
217 struct bheap_node* hn;
218 struct crit_entry *ce, *res = NULL;
219 do {
220 hn = bheap_peek(cpu_lower_prio, heap);
221 ce = (hn) ? hn->value : NULL;
222 if (ce) {
223 if (ce->state == CS_ACTIVE)
224 res = ce;
225 else if (ce->state == CS_REMOVED)
226 ce = NULL;
227 else
228 fix_crit_position(ce);
229 }
230 } while (ce && !res);
231 return res;
232}
233
234/*
235 * Cancel ghost timer.
236 */
237static inline void cancel_ghost(struct crit_entry *ce)
238{
239#ifdef CONFIG_MERGE_TIMERS
240 cancel_event(&ce->event);
241#else
242 hrtimer_try_to_cancel(&ce->timer);
243#endif
244}
245
246/*
247 * Arm ghost timer. Will merge timers if the option is specified.
248 */
249static inline void arm_ghost(struct crit_entry *ce, lt_t fire)
250{
251#ifdef CONFIG_MERGE_TIMERS
252 add_event(crit_cpu(ce)->event_group, &ce->event, fire);
253#else
254 __hrtimer_start_range_ns(&ce->timer,
255 ns_to_ktime(fire),
256 0 /* delta */,
257 HRTIMER_MODE_ABS_PINNED,
258 0 /* no wakeup */);
259#endif
260}
261
262/*
263 * Time accounting for ghost tasks.
264 * Must be called before a decision is made involving the task's budget.
265 */
266static void update_ghost_time(struct task_struct *p)
267{
268 u64 clock = litmus_clock();
269 u64 delta = clock - p->se.exec_start;
270 BUG_ON(!is_ghost(p));
271 if (unlikely ((s64)delta < 0)) {
272 delta = 0;
273 TRACE_MC_TASK(p, "WARNING: negative time delta\n");
274 }
275 if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) {
276 TRACE_MC_TASK(p, "Ghost job could have ended\n");
277 tsk_mc_data(p)->mc_job.ghost_budget = 0;
278 p->se.exec_start = clock;
279 } else {
280 TRACE_MC_TASK(p, "Ghost job updated, but didn't finish\n");
281 tsk_mc_data(p)->mc_job.ghost_budget -= delta;
282 p->se.exec_start = clock;
283 }
284}
285
286/**
287 * link_task_to_crit() - Logically run a task at a criticality level.
288 * Caller must hold @ce's CPU lock.
289 */
290static void link_task_to_crit(struct crit_entry *ce,
291 struct task_struct *task)
292{
293 lt_t when_to_fire;
294
295 TRACE_CRIT_ENTRY(ce, "Linking " TS "\n", TA(task));
296 BUG_ON(!can_use(ce) && task);
297 BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
298 BUG_ON(task && is_global(ce->domain) &&
299 !bheap_node_in_heap(ce->node));
300
301 /* Unlink last task */
302 if (ce->linked) {
303 TRACE_MC_TASK(ce->linked, "Unlinking\n");
304 ce->linked->rt_param.linked_on = NO_CPU;
305 if (is_ghost(ce->linked)) {
306 cancel_ghost(ce);
307 if (tsk_mc_data(ce->linked)->mc_job.ghost_budget > 0) {
308 /* Job isn't finished, so do accounting */
309 update_ghost_time(ce->linked);
310 }
311 }
312 sched_trace_server_switch_away(sid(ce), 0, ce->linked->pid,
313 tsk_rt(ce->linked)->job_params.job_no);
314 }
315
316 /* Actually link task */
317 ce->linked = task;
318 if (task) {
319 task->rt_param.linked_on = crit_cpu(ce)->cpu;
320 if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) {
321 /* There is a level-A timer that will force a
322 * preemption, so we don't set this for level-A
323 * tasks. Otherwise reset the budget timer.
324 */
325 task->se.exec_start = litmus_clock();
326 when_to_fire = task->se.exec_start +
327 tsk_mc_data(task)->mc_job.ghost_budget;
328 arm_ghost(ce, when_to_fire);
329
330 sched_trace_server_switch_to(sid(ce), 0, 0, 0);
331 } else {
332 sched_trace_server_switch_to(sid(ce), 0, task->pid,
333 get_rt_job(ce->linked));
334 }
335 }
336}
337
338static void check_for_preempt(struct domain*);
339
340/**
341 * job_arrival() - Called when a task re-enters the system.
342 * Caller must hold no locks.
343 */
344static void job_arrival(struct task_struct *task)
345{
346 struct domain *dom = get_task_domain(task);
347
348 TRACE_MC_TASK(task, "Job arriving\n");
349 BUG_ON(!task);
350
351 raw_spin_lock(dom->lock);
352 if (can_requeue(task)) {
353 BUG_ON(task->rt_param.linked_on != NO_CPU);
354 dom->requeue(dom, task);
355 check_for_preempt(dom);
356 } else {
357 /* If a global task is scheduled on one cpu, it CANNOT
358 * be requeued into a global domain. Another cpu might
359 * dequeue the global task before it is descheduled,
360 * causing the system to crash when the task is scheduled
361 * in two places simultaneously.
362 */
363 TRACE_MC_TASK(task, "Delayed arrival of scheduled task\n");
364 }
365 raw_spin_unlock(dom->lock);
366}
367
368/**
369 * low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will
370 * redirect a lower priority job_arrival work to the interrupt_cpu.
371 */
372static void low_prio_arrival(struct task_struct *task)
373{
374 struct cpu_entry *entry;
375
376 /* Race conditions! */
377 if (!can_requeue(task)) return;
378
379#ifdef CONFIG_PLUGIN_MC_REDIRECT
380 if (!is_global_task(task))
381 goto arrive;
382 if (smp_processor_id() != interrupt_cpu) {
383 entry = &__get_cpu_var(cpus);
384 raw_spin_lock(&entry->redir_lock);
385 TRACE_MC_TASK(task, "Adding to redirect queue\n");
386 list_add(&tsk_rt(task)->list, &entry->redir);
387 raw_spin_unlock(&entry->redir_lock);
388 litmus_reschedule(interrupt_cpu);
389 } else
390#endif
391 {
392arrive:
393 job_arrival(task);
394 }
395}
396
397#ifdef CONFIG_PLUGIN_MC_REDIRECT
398/**
399 * fix_global_levels() - Execute redirected job arrivals on this cpu.
400 */
401static void fix_global_levels(void)
402{
403 int c;
404 struct cpu_entry *e;
405 struct list_head *pos, *safe;
406 struct task_struct *t;
407
408 STRACE("Fixing global levels\n");
409 for_each_online_cpu(c) {
410 e = &per_cpu(cpus, c);
411 raw_spin_lock(&e->redir_lock);
412 list_for_each_safe(pos, safe, &e->redir) {
413 t = list_entry(pos, struct task_struct, rt_param.list);
414 BUG_ON(!t);
415 TRACE_MC_TASK(t, "Dequeued redirected job\n");
416 list_del_init(pos);
417 job_arrival(t);
418 }
419 raw_spin_unlock(&e->redir_lock);
420 }
421}
422#endif
423
424/**
425 * link_task_to_cpu() - Logically run a task on a CPU.
426 * The task must first have been linked to one of the CPU's crit_entries.
427 * Caller must hold the entry lock.
428 */
429static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task)
430{
431 int i = entry_level(entry);
432 struct crit_entry *ce;
433 TRACE_MC_TASK(task, "Linking to P%d\n", entry->cpu);
434 BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu);
435 BUG_ON(task && is_ghost(task));
436
437 if (entry->linked) {
438 sched_trace_server_switch_away(-entry->linked->pid,
439 get_server_job(entry->linked),
440 entry->linked->pid,
441 get_rt_job(entry->linked));
442 }
443
444 if (task){
445 set_rt_flags(task, RT_F_RUNNING);
446 sched_trace_server_switch_to(-task->pid,
447 get_server_job(task),
448 task->pid,
449 get_rt_job(task));
450 }
451 entry->linked = task;
452
453 /* Higher criticality crit entries are now usable */
454 for (; i < entry_level(entry) + 1; i++) {
455 ce = &entry->crit_entries[i];
456 if (!can_use(ce)) {
457 ce->state = CS_ACTIVATE;
458 }
459 }
460}
461
462/**
463 * preempt() - Preempt a logically running task with a higher priority one.
464 * @dom Domain from which to draw higher priority task
465 * @ce CPU criticality level to preempt
466 *
467 * Caller must hold the lock for @dom and @ce's CPU lock.
468 */
469static void preempt(struct domain *dom, struct crit_entry *ce)
470{
471 struct task_struct *task = dom->take_ready(dom);
472 struct cpu_entry *entry = crit_cpu(ce);
473 struct task_struct *old = ce->linked;
474
475 BUG_ON(!task);
476 TRACE_CRIT_ENTRY(ce, "Preempted by " TS "\n", TA(task));
477
478 /* Per-domain preemption */
479 link_task_to_crit(ce, task);
480 if (old && can_requeue(old)) {
481 dom->requeue(dom, old);
482 }
483 update_crit_position(ce);
484
485 /* Preempt actual execution if this is a running task */
486 if (!is_ghost(task)) {
487 link_task_to_cpu(entry, task);
488 preempt_if_preemptable(entry->scheduled, entry->cpu);
489 } else if (old && old == entry->linked) {
490 /* Preempted a running task with a ghost job. Null needs to be
491 * running.
492 */
493 link_task_to_cpu(entry, NULL);
494 preempt_if_preemptable(entry->scheduled, entry->cpu);
495 }
496}
497
498/**
499 * update_crit_levels() - Update criticality entries for the new cpu state.
500 * This should be called after a new task has been linked to @entry.
501 * The caller must hold the @entry->lock, but this method will release it.
502 */
503static void update_crit_levels(struct cpu_entry *entry)
504{
505 int i, global_preempted;
506 struct crit_entry *ce;
507 struct task_struct *readmit[NUM_CRIT_LEVELS];
508 enum crit_level level = entry_level(entry);
509
510 /* Remove lower priority tasks from the entry */
511 for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
512 ce = &entry->crit_entries[i];
513
514 global_preempted = ce->linked &&
515 /* This task is running on a cpu */
516 ce->linked->rt_param.scheduled_on == entry->cpu &&
517 /* But it was preempted */
518 ce->linked != entry->linked &&
519 /* And it is an eligible global task */
520 !is_ghost(ce->linked) && is_global(ce->domain);
521
522 /* Do not readmit global tasks which are preempted! These can't
523 * ever be re-admitted until they are descheduled for reasons
524 * explained in job_arrival.
525 */
526 readmit[i] = (!global_preempted) ? ce->linked : NULL;
527
528 ce->state = CS_REMOVE;
529 if (ce->linked)
530 link_task_to_crit(ce, NULL);
531 }
532 /* Need to unlock so we can access domains */
533 raw_spin_unlock(&entry->lock);
534
535 /* Re-admit tasks to the system */
536 for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
537 ce = &entry->crit_entries[i];
538 if (readmit[i]) {
539 low_prio_arrival(readmit[i]);
540 }
541 }
542}
543
544/**
545 * check_for_preempt() - Causes a preemption if higher-priority tasks are ready.
546 * Caller must hold domain lock.
547 * Makes gigantic nasty assumption that there is 1 global criticality level,
548 * and it is the last one in each list, so it doesn't call update_crit..
549 */
550static void check_for_preempt(struct domain *dom)
551{
552 int recheck = 1;
553 struct cpu_entry *entry;
554 struct crit_entry *ce;
555
556 if (is_global(dom)) {
557 /* Loop until we find a non-preemptable CPU */
558 while ((ce = lowest_prio_cpu(dom)) && recheck) {
559 entry = crit_cpu(ce);
560 recheck = 1;
561
562 /* Cache next task */
563 dom->peek_ready(dom);
564
565 raw_spin_lock(&entry->lock);
566 if (!can_use(ce))
567 /* CPU disabled while locking! */
568 fix_crit_position(ce);
569 else if (dom->preempt_needed(dom, ce->linked))
570 /* Success! Check for more preemptions */
571 preempt(dom, ce);
572 else {
573 /* Failure! */
574 recheck = 0;
575 TRACE_CRIT_ENTRY(ce, "Stopped global check\n");
576 }
577 raw_spin_unlock(&entry->lock);
578 }
579 } else /* Partitioned */ {
580 ce = domain_data(dom)->crit_entry;
581 entry = crit_cpu(ce);
582
583 /* Cache next task */
584 dom->peek_ready(dom);
585
586 raw_spin_lock(&entry->lock);
587 if (can_use(ce) && dom->preempt_needed(dom, ce->linked)) {
588 preempt(dom, ce);
589 update_crit_levels(entry);
590 } else {
591 raw_spin_unlock(&entry->lock);
592 }
593 }
594}
595
596/**
597 * remove_from_all() - Logically remove a task from all structures.
598 * Caller must hold no locks.
599 */
600static void remove_from_all(struct task_struct* task)
601{
602 int update = 0;
603 struct cpu_entry *entry;
604 struct crit_entry *ce;
605 struct domain *dom = get_task_domain(task);
606
607 TRACE_MC_TASK(task, "Removing from everything\n");
608 BUG_ON(!task);
609
610 raw_spin_lock(dom->lock);
611
612 /* Remove the task from any CPU state */
613 if (task->rt_param.linked_on != NO_CPU) {
614 entry = &per_cpu(cpus, task->rt_param.linked_on);
615 raw_spin_lock(&entry->lock);
616
617 /* Unlink only if task is still linked post lock */
618 ce = &entry->crit_entries[tsk_mc_crit(task)];
619 if (task->rt_param.linked_on != NO_CPU) {
620 BUG_ON(ce->linked != task);
621 link_task_to_crit(ce, NULL);
622 update_crit_position(ce);
623 if (!is_ghost(task) && entry->linked == task) {
624 update = 1;
625 link_task_to_cpu(entry, NULL);
626 }
627 } else {
628 TRACE_MC_TASK(task, "Unlinked before we got lock!\n");
629 }
630 if (update)
631 update_crit_levels(entry);
632 else
633 raw_spin_unlock(&entry->lock);
634 } else {
635 TRACE_MC_TASK(task, "Not linked to anything\n");
636 }
637
638 /* Ensure the task isn't returned by its domain */
639 dom->remove(dom, task);
640
641 raw_spin_unlock(dom->lock);
642}
643
644/**
645 * job_completion() - Update task state and re-enter it into the system.
646 * Converts tasks which have completed their execution early into ghost jobs.
647 * Caller must hold no locks.
648 */
649static void job_completion(struct task_struct *task, int forced)
650{
651 int behind;
652 TRACE_MC_TASK(task, "Completed\n");
653
654 /* Logically stop the task execution */
655 set_rt_flags(task, RT_F_SLEEP);
656 remove_from_all(task);
657
658 /* Level-A tasks cannot ever get behind */
659 behind = tsk_mc_crit(task) != CRIT_LEVEL_A && behind_server(task);
660
661 if (!forced && !is_ghost(task)) {
662 /* Task voluntarily ceased execution. Move on to next period */
663 task_release(task);
664 sched_trace_task_completion(task, forced);
665
666 /* Convert to ghost job */
667 tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task);
668 tsk_mc_data(task)->mc_job.is_ghost = 1;
669 }
670
671 /* If the task has no ghost budget, convert back from ghost.
672 * If the task is behind, undo ghost conversion so that it
673 * can catch up.
674 */
675 if (behind || tsk_mc_data(task)->mc_job.ghost_budget == 0) {
676 TRACE_MC_TASK(task, "Not a ghost task\n");
677 tsk_mc_data(task)->mc_job.is_ghost = 0;
678 tsk_mc_data(task)->mc_job.ghost_budget = 0;
679 }
680
681 /* If server has run out of budget, wait until next release */
682 if (budget_exhausted(task)) {
683 sched_trace_server_completion(-task->pid,
684 get_server_job(task));
685 server_release(task);
686 }
687
688 /* Requeue non-blocking tasks */
689 if (is_running(task))
690 job_arrival(task);
691}
692
693/**
694 * mc_ghost_exhausted() - Complete logically running ghost task.
695 */
696#ifdef CONFIG_MERGE_TIMERS
697static void mc_ghost_exhausted(struct rt_event *e)
698{
699 struct crit_entry *ce = container_of(e, struct crit_entry, event);
700#else
701static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
702{
703 struct crit_entry *ce = container_of(timer, struct crit_entry, timer);
704#endif
705
706 unsigned long flags;
707 struct task_struct *tmp = NULL;
708
709 local_irq_save(flags);
710 TRACE("Ghost exhausted\n");
711 TRACE_CRIT_ENTRY(ce, "Firing here\n");
712
713 /* Due to race conditions, we cannot just set the linked
714 * task's budget to 0 as it may no longer be the task
715 * for which this timer was armed. Instead, update the running
716 * task time and see if this causes exhaustion.
717 */
718 raw_spin_lock(&crit_cpu(ce)->lock);
719 if (ce->linked && is_ghost(ce->linked)) {
720 update_ghost_time(ce->linked);
721 if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) {
722 tmp = ce->linked;
723 }
724 }
725 raw_spin_unlock(&crit_cpu(ce)->lock);
726
727 if (tmp)
728 job_completion(tmp, 0);
729
730 local_irq_restore(flags);
731#ifndef CONFIG_MERGE_TIMERS
732 return HRTIMER_NORESTART;
733#endif
734}
735
736/*
737 * The MC-CE common timer callback code for merged and non-merged timers.
738 * Returns the next time the timer should fire.
739 */
740static lt_t __ce_timer_function(struct ce_dom_data *ce_data)
741{
742 struct crit_entry *ce = get_crit_entry_for(ce_data->cpu, CRIT_LEVEL_A);
743 struct domain *dom = ce->domain;
744 struct task_struct *old_link = NULL;
745 lt_t next_timer_abs;
746
747 TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
748
749 raw_spin_lock(dom->lock);
750
751 raw_spin_lock(&crit_cpu(ce)->lock);
752 if (ce->linked &&
753 ce->linked == ce_data->should_schedule &&
754 is_ghost(ce->linked))
755 {
756 old_link = ce->linked;
757 tsk_mc_data(ce->linked)->mc_job.ghost_budget = 0;
758 link_task_to_crit(ce, NULL);
759 }
760 raw_spin_unlock(&crit_cpu(ce)->lock);
761
762 next_timer_abs = mc_ce_timer_callback_common(dom);
763
764 /* Job completion will check for preemptions by means of calling job
765 * arrival if the task is not blocked */
766 if (NULL != old_link) {
767 STRACE("old_link " TS " so will call job completion\n", TA(old_link));
768 raw_spin_unlock(dom->lock);
769 job_completion(old_link, 0);
770 } else {
771 STRACE("old_link was null, so will call check for preempt\n");
772 raw_spin_unlock(dom->lock);
773 check_for_preempt(dom);
774 }
775 return next_timer_abs;
776}
777
778#ifdef CONFIG_MERGE_TIMERS
779static void ce_timer_function(struct rt_event *e)
780{
781 struct ce_dom_data *ce_data =
782 container_of(e, struct ce_dom_data, event);
783 unsigned long flags;
784 lt_t next_timer_abs;
785
786 TS_LVLA_RELEASE_START;
787
788 local_irq_save(flags);
789 next_timer_abs = __ce_timer_function(ce_data);
790 add_event(per_cpu(cpus, ce_data->cpu).event_group, e, next_timer_abs);
791 local_irq_restore(flags);
792
793 TS_LVLA_RELEASE_END;
794}
795#else /* else to CONFIG_MERGE_TIMERS */
796static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
797{
798 struct ce_dom_data *ce_data =
799 container_of(timer, struct ce_dom_data, timer);
800 unsigned long flags;
801 lt_t next_timer_abs;
802
803 TS_LVLA_RELEASE_START;
804
805 local_irq_save(flags);
806 next_timer_abs = __ce_timer_function(ce_data);
807 hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
808 local_irq_restore(flags);
809
810 TS_LVLA_RELEASE_END;
811
812 return HRTIMER_RESTART;
813}
814#endif /* CONFIG_MERGE_TIMERS */
815
816
817/**
818 * mc_release_jobs() - Add heap of tasks to the system, check for preemptions.
819 */
820static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
821{
822 unsigned long flags;
823 struct task_struct *first = bheap_peek(rt->order, tasks)->value;
824 struct domain *dom = get_task_domain(first);
825
826 raw_spin_lock_irqsave(dom->lock, flags);
827 TRACE(TS "Jobs released\n", TA(first));
828 __merge_ready(rt, tasks);
829 check_for_preempt(dom);
830 raw_spin_unlock_irqrestore(dom->lock, flags);
831}
832
833/**
834 * ms_task_new() - Setup new mixed-criticality task.
835 * Assumes that there are no partitioned domains after level B.
836 */
837static void mc_task_new(struct task_struct *t, int on_rq, int running)
838{
839 unsigned long flags;
840 struct cpu_entry* entry;
841 enum crit_level level = tsk_mc_crit(t);
842 char name[TASK_COMM_LEN];
843 strcpy(name, "rtspin");
844
845 local_irq_save(flags);
846 TRACE("New mixed criticality task %d\n", t->pid);
847
848 /* Assign domain */
849 if (level < CRIT_LEVEL_C)
850 entry = &per_cpu(cpus, get_partition(t));
851 else
852 entry = &per_cpu(cpus, task_cpu(t));
853 t->rt_param._domain = entry->crit_entries[level].domain;
854
855 sched_trace_container_param(t->pid, name);
856 sched_trace_server_param(-t->pid, t->pid,
857 get_exec_cost(t), get_rt_period(t));
858
859 /* Setup job params */
860 release_at(t, litmus_clock());
861 tsk_mc_data(t)->mc_job.ghost_budget = 0;
862 tsk_mc_data(t)->mc_job.is_ghost = 0;
863 if (running) {
864 BUG_ON(entry->scheduled);
865 entry->scheduled = t;
866 tsk_rt(t)->scheduled_on = entry->cpu;
867 } else {
868 t->rt_param.scheduled_on = NO_CPU;
869 }
870 t->rt_param.linked_on = NO_CPU;
871
872
873 job_arrival(t);
874
875 local_irq_restore(flags);
876}
877
878/**
879 * mc_task_new() - Add task back into its domain check for preemptions.
880 */
881static void mc_task_wake_up(struct task_struct *task)
882{
883 unsigned long flags;
884 lt_t now = litmus_clock();
885 local_irq_save(flags);
886
887 TRACE(TS " wakes up\n", TA(task));
888 if (is_tardy(task, now)) {
889 /* Task missed its last release */
890 release_at(task, now);
891 sched_trace_task_release(task);
892 }
893 if (!is_ghost(task))
894 job_arrival(task);
895
896 local_irq_restore(flags);
897}
898
899/**
900 * mc_task_block() - Remove task from state to prevent it being run anywhere.
901 */
902static void mc_task_block(struct task_struct *task)
903{
904 unsigned long flags;
905 local_irq_save(flags);
906 TRACE(TS " blocks\n", TA(task));
907 remove_from_all(task);
908 local_irq_restore(flags);
909}
910
911/**
912 * mc_task_exit() - Remove task from the system.
913 */
914static void mc_task_exit(struct task_struct *task)
915{
916 unsigned long flags;
917 local_irq_save(flags);
918 BUG_ON(!is_realtime(task));
919 TRACE(TS " RIP\n", TA(task));
920
921 remove_from_all(task);
922 if (tsk_rt(task)->scheduled_on != NO_CPU) {
923 per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL;
924 tsk_rt(task)->scheduled_on = NO_CPU;
925 }
926
927 if (CRIT_LEVEL_A == tsk_mc_crit(task))
928 mc_ce_task_exit_common(task);
929
930 local_irq_restore(flags);
931}
932
933/**
934 * mc_admit_task() - Return true if the task is valid.
935 * Assumes there are no partitioned levels after level B.
936 */
937static long mc_admit_task(struct task_struct* task)
938{
939 const enum crit_level crit = tsk_mc_crit(task);
940 long ret;
941 if (!tsk_mc_data(task)) {
942 printk(KERN_WARNING "Tried to admit task with no criticality "
943 "level\n");
944 ret = -EINVAL;
945 goto out;
946 }
947 if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
948 printk(KERN_WARNING "Tried to admit partitioned task with no "
949 "partition\n");
950 ret = -EINVAL;
951 goto out;
952 }
953 if (crit == CRIT_LEVEL_A) {
954 ret = mc_ce_admit_task_common(task);
955 if (ret)
956 goto out;
957 }
958 printk(KERN_INFO "Admitted task with criticality level %d\n",
959 tsk_mc_crit(task));
960 ret = 0;
961out:
962 return ret;
963}
964
965/**
966 * mc_schedule() - Return next task which should be scheduled.
967 */
968static struct task_struct* mc_schedule(struct task_struct* prev)
969{
970 unsigned long flags;
971 struct domain *dom;
972 struct crit_entry *ce;
973 struct cpu_entry* entry = &__get_cpu_var(cpus);
974 int i, out_of_time, sleep, preempt, exists, blocks, global, lower;
975 struct task_struct *dtask = NULL, *ready_task = NULL, *next = NULL;
976
977 local_irq_save(flags);
978
979 /* Litmus gave up because it couldn't access the stack of the CPU
980 * on which will_schedule was migrating from. Requeue it.
981 * This really only happens in VMs.
982 */
983 if (entry->will_schedule && entry->will_schedule != prev) {
984 entry->will_schedule->rt_param.scheduled_on = NO_CPU;
985 low_prio_arrival(entry->will_schedule);
986 }
987
988 raw_spin_lock(&entry->lock);
989
990 /* Sanity checking */
991 BUG_ON(entry->scheduled && entry->scheduled != prev);
992 BUG_ON(entry->scheduled && !is_realtime(prev));
993 BUG_ON(is_realtime(prev) && !entry->scheduled);
994
995 /* Determine state */
996 exists = entry->scheduled != NULL;
997 blocks = exists && !is_running(entry->scheduled);
998 out_of_time = exists && budget_enforced(entry->scheduled) &&
999 budget_exhausted(entry->scheduled);
1000 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
1001 global = exists && is_global_task(entry->scheduled);
1002 preempt = entry->scheduled != entry->linked;
1003 lower = exists && preempt && entry->linked &&
1004 tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked);
1005
1006 TRACE(TS " blocks:%d out_of_time:%d sleep:%d preempt:%d\n",
1007 TA(prev), blocks, out_of_time, sleep, preempt);
1008
1009 if (exists)
1010 prev->rt_param.scheduled_on = NO_CPU;
1011
1012 raw_spin_unlock(&entry->lock);
1013
1014
1015#ifdef CONFIG_PLUGIN_MC_REDIRECT
1016 if (smp_processor_id() == interrupt_cpu)
1017 fix_global_levels();
1018#endif
1019
1020 /* If a task blocks we have no choice but to reschedule */
1021 if (blocks)
1022 remove_from_all(entry->scheduled);
1023 /* Any task which exhausts its budget or sleeps waiting for its next
1024 * period completes unless its execution has been forcibly stopped.
1025 */
1026 if ((out_of_time || sleep) && !blocks)/* && !preempt)*/
1027 job_completion(entry->scheduled, !sleep);
1028 /* Global scheduled tasks must wait for a deschedule before they
1029 * can rejoin the global state. Rejoin them here.
1030 */
1031 else if (global && preempt && !blocks) {
1032 if (lower)
1033 low_prio_arrival(entry->scheduled);
1034 else
1035 job_arrival(entry->scheduled);
1036 }
1037
1038 /* Pick next task if none is linked */
1039 raw_spin_lock(&entry->lock);
1040 for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) {
1041 ce = &entry->crit_entries[i];
1042 dom = ce->domain;
1043
1044 /* Swap locks. We cannot acquire a domain lock while
1045 * holding an entry lock or deadlocks will happen.
1046 */
1047 raw_spin_unlock(&entry->lock);
1048 raw_spin_lock(dom->lock);
1049
1050 /* Do domain stuff before grabbing CPU locks */
1051 dtask = dom->peek_ready(dom);
1052 fix_crit_position(ce);
1053
1054 raw_spin_lock(&entry->lock);
1055
1056 if (!entry->linked && !ce->linked && dtask && can_use(ce)) {
1057 dom->take_ready(dom);
1058 link_task_to_crit(ce, dtask);
1059 update_crit_position(ce);
1060 ready_task = (is_ghost(dtask)) ? NULL : dtask;
1061
1062 /* Task found! */
1063 if (ready_task) {
1064 link_task_to_cpu(entry, ready_task);
1065 raw_spin_unlock(dom->lock);
1066 update_crit_levels(entry);
1067 raw_spin_lock(&entry->lock);
1068 continue;
1069 }
1070 }
1071 raw_spin_unlock(dom->lock);
1072 }
1073
1074 /* Schedule next task */
1075 next = entry->linked;
1076 if (entry->linked)
1077 entry->linked->rt_param.scheduled_on = entry->cpu;
1078 entry->will_schedule = entry->linked;
1079 sched_state_task_picked();
1080
1081 raw_spin_unlock(&entry->lock);
1082 local_irq_restore(flags);
1083 if (next) {
1084 TRACE_MC_TASK(next, "Picked this task\n");
1085 } else if (exists && !next)
1086 TRACE_ENTRY(entry, "Becomes idle at %llu\n", litmus_clock());
1087 return next;
1088}
1089
1090void mc_finish_switch(struct task_struct *prev)
1091{
1092 struct cpu_entry* entry = &__get_cpu_var(cpus);
1093 entry->scheduled = is_realtime(current) ? current : NULL;
1094 TRACE_TASK(prev, "Switched away from to " TS "\n",
1095 TA(entry->scheduled));
1096}
1097
1098/*
1099 * This is the plugin's release at function, called by the release task-set
1100 * system call. Other places in the file use the generic LITMUS release_at(),
1101 * which is not this.
1102 */
1103void mc_release_at(struct task_struct *ts, lt_t start)
1104{
1105 /* hack so that we can have CE timers start at the right time */
1106 if (CRIT_LEVEL_A == tsk_mc_crit(ts))
1107 mc_ce_release_at_common(ts, start);
1108 else
1109 release_at(ts, start);
1110}
1111
1112long mc_deactivate_plugin(void)
1113{
1114 return mc_ce_deactivate_plugin_common();
1115}
1116
1117/* **************************************************************************
1118 * Initialization
1119 * ************************************************************************** */
1120
1121/* Initialize values here so that they are allocated with the module
1122 * and destroyed when the module is unloaded.
1123 */
1124
1125/* LVL-A */
1126DEFINE_PER_CPU(struct domain_data, _mc_crit_a);
1127DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
1128DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
1129/* LVL-B */
1130DEFINE_PER_CPU(struct domain_data, _mc_crit_b);
1131DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
1132/* LVL-C */
1133static struct domain_data _mc_crit_c;
1134static rt_domain_t _mc_crit_c_rt;
1135struct bheap _mc_heap_c;
1136struct bheap_node _mc_nodes_c[NR_CPUS];
1137
1138static long mc_activate_plugin(void)
1139{
1140 struct domain_data *dom_data;
1141 struct domain *dom;
1142 struct domain_data *our_domains[NR_CPUS];
1143 int cpu, n = 0;
1144 long ret;
1145
1146#ifdef CONFIG_RELEASE_MASTER
1147 interrupt_cpu = atomic_read(&release_master_cpu);
1148#if defined(CONFIG_PLUGIN_MC_REDIRECT) || defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1149 if (NO_CPU == interrupt_cpu) {
1150 printk(KERN_ERR "LITMUS-MC: need a release master\n");
1151 ret = -EINVAL;
1152 goto out;
1153 }
1154#endif
1155#endif
1156
1157 for_each_online_cpu(cpu) {
1158 BUG_ON(NR_CPUS <= n);
1159 dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain;
1160 dom_data = domain_data(dom);
1161 our_domains[cpu] = dom_data;
1162#if defined(CONFIG_MERGE_TIMERS) && defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1163 per_cpu(cpus, cpu).event_group =
1164 get_event_group_for(interrupt_cpu);
1165#elif defined(CONFIG_MERGE_TIMERS) && !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1166 per_cpu(cpus, cpu).event_group = get_event_group_for(cpu);
1167#endif
1168 n++;
1169 }
1170 ret = mc_ce_set_domains(n, our_domains);
1171 if (ret)
1172 goto out;
1173 ret = mc_ce_activate_plugin_common();
1174out:
1175 return ret;
1176}
1177
1178
1179static void mc_release_ts(lt_t time)
1180{
1181 int i, cpu, base_id = 0, cont_id = -1;
1182 char name[TASK_COMM_LEN];
1183 enum crit_level level;
1184 struct cpu_entry *entry;
1185 struct crit_entry *ce;
1186
1187 level = CRIT_LEVEL_A;
1188 strcpy(name, "LVL-A");
1189 for_each_online_cpu(cpu) {
1190 entry = &per_cpu(cpus, cpu);
1191 trace_litmus_container_param(++cont_id, (const char*)&name);
1192 ce = &entry->crit_entries[level];
1193 sched_trace_server_param(sid(ce), cont_id, 0, 0);
1194 }
1195
1196 level = CRIT_LEVEL_B;
1197 strcpy(name, "LVL-B");
1198 for_each_online_cpu(cpu) {
1199 entry = &per_cpu(cpus, cpu);
1200 trace_litmus_container_param(++cont_id, (const char*)&name);
1201 ce = &entry->crit_entries[level];
1202 sched_trace_server_param(sid(ce), cont_id, 0, 0);
1203 }
1204
1205 level = CRIT_LEVEL_C;
1206 strcpy(name, "LVL-C");
1207 trace_litmus_container_param(++cont_id, (const char*)&name);
1208 for_each_online_cpu(cpu) {
1209 entry = &per_cpu(cpus, cpu);
1210 ce = &entry->crit_entries[level];
1211 sched_trace_server_param(sid(ce), cont_id, 0, 0);
1212 }
1213
1214
1215
1216}
1217
1218static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
1219 .plugin_name = "MC",
1220 .task_new = mc_task_new,
1221 .complete_job = complete_job,
1222 .task_exit = mc_task_exit,
1223 .schedule = mc_schedule,
1224 .task_wake_up = mc_task_wake_up,
1225 .task_block = mc_task_block,
1226 .admit_task = mc_admit_task,
1227 .activate_plugin = mc_activate_plugin,
1228 .release_at = mc_release_at,
1229 .deactivate_plugin = mc_deactivate_plugin,
1230 .finish_switch = mc_finish_switch,
1231 .release_ts = mc_release_ts,
1232};
1233
1234static void init_crit_entry(struct crit_entry *ce, enum crit_level level,
1235 struct domain_data *dom_data,
1236 struct bheap_node *node)
1237{
1238 ce->level = level;
1239 ce->linked = NULL;
1240 ce->node = node;
1241 ce->domain = &dom_data->domain;
1242 ce->state = CS_ACTIVE;
1243#ifdef CONFIG_MERGE_TIMERS
1244 init_event(&ce->event, level, mc_ghost_exhausted,
1245 event_list_alloc(GFP_ATOMIC));
1246#else
1247 hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1248 ce->timer.function = mc_ghost_exhausted;
1249#endif
1250
1251}
1252
1253static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data,
1254 enum crit_level level)
1255{
1256 dom_data->heap = NULL;
1257 dom_data->crit_entry = &entry->crit_entries[level];
1258 init_crit_entry(dom_data->crit_entry, level, dom_data, NULL);
1259}
1260
1261static void init_global_domain(struct domain_data *dom_data, enum crit_level level,
1262 struct bheap *heap, struct bheap_node *nodes)
1263{
1264 int cpu;
1265 struct cpu_entry *entry;
1266 struct crit_entry *ce;
1267 struct bheap_node *node;
1268
1269 dom_data->crit_entry = NULL;
1270 dom_data->heap = heap;
1271 bheap_init(heap);
1272
1273 for_each_online_cpu(cpu) {
1274 entry = &per_cpu(cpus, cpu);
1275 node = &nodes[cpu];
1276 ce = &entry->crit_entries[level];
1277 init_crit_entry(ce, level, dom_data, node);
1278 bheap_node_init(&ce->node, ce);
1279 bheap_insert(cpu_lower_prio, heap, node);
1280 }
1281}
1282
1283static inline void init_edf_domain(struct domain *dom, rt_domain_t *rt,
1284 enum crit_level prio, int is_partitioned, int cpu)
1285{
1286 pd_domain_init(dom, rt, edf_ready_order, NULL,
1287 mc_release_jobs, mc_preempt_needed,
1288 edf_higher_prio);
1289 rt->level = prio;
1290#if defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
1291 /* All timers are on one CPU and release-master is using the event
1292 * merging interface as well. */
1293 BUG_ON(NO_CPU == interrupt_cpu);
1294 rt->event_group = get_event_group_for(interrupt_cpu);
1295 rt->prio = prio;
1296#elif defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
1297 /* Using release master, but not merging timers. */
1298 rt->release_master = interrupt_cpu;
1299#elif !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
1300 /* Merge the timers, but don't move them to the release master. */
1301 if (is_partitioned) {
1302 rt->event_group = get_event_group_for(cpu);
1303 } else {
1304 /* Global timers will be added to the event groups that code is
1305 * executing on when add_event() is called.
1306 */
1307 rt->event_group = NULL;
1308 }
1309 rt->prio = prio;
1310#endif
1311}
1312
1313struct domain_data *ce_domain_for(int);
1314static int __init init_mc(void)
1315{
1316 int cpu;
1317 struct cpu_entry *entry;
1318 struct domain_data *dom_data;
1319 rt_domain_t *rt;
1320 raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */
1321 struct ce_dom_data *ce_data;
1322
1323 for_each_online_cpu(cpu) {
1324 entry = &per_cpu(cpus, cpu);
1325
1326 /* CPU */
1327 entry->cpu = cpu;
1328 entry->scheduled = NULL;
1329 entry->linked = NULL;
1330
1331 raw_spin_lock_init(&entry->lock);
1332
1333#ifdef CONFIG_PLUGIN_MC_REDIRECT
1334 raw_spin_lock_init(&entry->redir_lock);
1335 INIT_LIST_HEAD(&entry->redir);
1336#endif
1337
1338 /* CRIT_LEVEL_A */
1339 dom_data = &per_cpu(_mc_crit_a, cpu);
1340 ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
1341 a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
1342 raw_spin_lock_init(a_dom_lock);
1343 ce_domain_init(&dom_data->domain,
1344 a_dom_lock, ce_requeue, ce_peek_and_take_ready,
1345 ce_peek_and_take_ready, mc_preempt_needed,
1346 ce_higher_prio, ce_data, cpu,
1347 ce_timer_function);
1348 init_local_domain(entry, dom_data, CRIT_LEVEL_A);
1349 dom_data->domain.name = "LVL-A";
1350
1351 /* CRIT_LEVEL_B */
1352 dom_data = &per_cpu(_mc_crit_b, cpu);
1353 rt = &per_cpu(_mc_crit_b_rt, cpu);
1354 init_local_domain(entry, dom_data, CRIT_LEVEL_B);
1355 init_edf_domain(&dom_data->domain, rt, CRIT_LEVEL_B, 1, cpu);
1356 b_dom_lock = dom_data->domain.lock;
1357 raw_spin_lock_init(b_dom_lock);
1358 dom_data->domain.name = "LVL-B";
1359 }
1360
1361 /* CRIT_LEVEL_C */
1362 init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
1363 &_mc_heap_c, _mc_nodes_c);
1364 init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, CRIT_LEVEL_C,
1365 0, NO_CPU);
1366 c_dom_lock = _mc_crit_c.domain.lock;
1367 raw_spin_lock_init(c_dom_lock);
1368 _mc_crit_c.domain.name = "LVL-C";
1369
1370 return register_sched_plugin(&mc_plugin);
1371}
1372
1373module_init(init_mc);
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c
new file mode 100644
index 000000000000..702b46da93d5
--- /dev/null
+++ b/litmus/sched_mc_ce.c
@@ -0,0 +1,1052 @@
1/**
2 * litmus/sched_mc_ce.c
3 *
4 * The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling
5 * algorithm.
6 */
7
8#include <asm/atomic.h>
9#include <asm/uaccess.h>
10
11#include <linux/module.h>
12#include <linux/percpu.h>
13#include <linux/hrtimer.h>
14#include <linux/pid.h>
15#include <linux/sched.h>
16#include <linux/proc_fs.h>
17
18#include <litmus/litmus.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/rt_domain.h>
21#include <litmus/rt_param.h>
22#include <litmus/litmus_proc.h>
23#include <litmus/sched_trace.h>
24#include <litmus/jobs.h>
25#include <litmus/sched_mc.h>
26#include <litmus/ce_domain.h>
27
28static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
29
30#define using_linux_plugin() (litmus == &linux_sched_plugin)
31
32/* get a reference to struct domain for a CPU */
33#define get_domain_for(cpu) (&per_cpu(domains, cpu)->domain)
34
35#define get_pid_table(cpu) (&per_cpu(ce_pid_table, cpu))
36#define get_pid_entry(cpu, idx) (&(get_pid_table(cpu)->entries[idx]))
37
38static atomic_t start_time_set = ATOMIC_INIT(-1);
39static atomic64_t start_time = ATOMIC64_INIT(0);
40static struct proc_dir_entry *mc_ce_dir = NULL, *ce_file = NULL;
41
42/*
43 * Cache the budget along with the struct PID for a task so that we don't need
44 * to fetch its task_struct every time we check to see what should be
45 * scheduled.
46 */
47struct ce_pid_entry {
48 struct pid *pid;
49 lt_t budget;
50 /* accumulated (summed) budgets, including this one */
51 lt_t acc_time;
52 unsigned int expected_job;
53};
54
55/*
56 * Each CPU needs a mapping of level A ID (integer) to struct pid so that we
57 * can get its task struct.
58 */
59struct ce_pid_table {
60 struct ce_pid_entry entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
61 int num_pid_entries;
62 lt_t cycle_time;
63};
64
65DEFINE_PER_CPU(struct ce_pid_table, ce_pid_table);
66
67/*
68 * How we get the domain for a given CPU locally. Set with the
69 * mc_ce_set_domains function. Must be done before activating plugins. Be
70 * careful when using domains as a variable elsewhere in this file.
71 */
72
73DEFINE_PER_CPU(struct domain_data*, domains);
74
75/*
76 * The domains and other data used by the MC-CE plugin when it runs alone.
77 */
78DEFINE_PER_CPU(struct domain_data, _mc_ce_doms);
79DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
80DEFINE_PER_CPU(raw_spinlock_t, _mc_ce_dom_locks);
81
82#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
83static int interrupt_cpu;
84#endif
85
86long mc_ce_set_domains(const int n, struct domain_data *domains_in[])
87{
88 const int max = (NR_CPUS < n) ? NR_CPUS : n;
89 struct domain_data *new_dom = NULL;
90 int i, ret;
91 if (!using_linux_plugin()) {
92 printk(KERN_WARNING "can't set MC-CE domains when not using "
93 "Linux scheduler.\n");
94 ret = -EINVAL;
95 goto out;
96 }
97 for (i = 0; i < max; ++i) {
98 new_dom = domains_in[i];
99 per_cpu(domains, i) = new_dom;
100 }
101 ret = 0;
102out:
103 return ret;
104}
105
106unsigned int mc_ce_get_expected_job(const int cpu, const int idx)
107{
108 const struct ce_pid_table *pid_table = get_pid_table(cpu);
109 BUG_ON(0 > cpu);
110 BUG_ON(0 > idx);
111 BUG_ON(pid_table->num_pid_entries <= idx);
112 return pid_table->entries[idx].expected_job;
113}
114
115/*
116 * Get the offset into the cycle taking the start time into account.
117 */
118static inline lt_t get_cycle_offset(const lt_t when, const lt_t cycle_time)
119{
120 long long st = atomic64_read(&start_time);
121 lt_t offset = (when - st) % cycle_time;
122 TRACE("when: %llu cycle_time: %llu start_time: %lld offset %llu\n",
123 when, cycle_time, st, offset);
124 return offset;
125}
126
127/*
128 * The user land job completion call will set the RT_F_SLEEP flag and then
129 * call schedule. This function is used when schedule sleeps a task.
130 *
131 * Do not call prepare_for_next_period on Level-A tasks!
132 */
133static void mc_ce_job_completion(struct domain *dom, struct task_struct *ts)
134{
135 const int cpu = task_cpu(ts);
136 const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
137 const struct ce_pid_entry *pid_entry = get_pid_entry(cpu, idx);
138 unsigned int just_finished;
139
140 TRACE_TASK(ts, "Completed\n");
141
142 /* sched_trace_task_completion(ts, 0); */
143 /* post-increment is important here */
144 just_finished = (tsk_rt(ts)->job_params.job_no)++;
145
146 /* Job completes in expected window: everything is normal.
147 * Job completes in an earlier window: BUG(), that's wrong.
148 * Job completes in a later window: The job is behind.
149 */
150 if (just_finished < pid_entry->expected_job) {
151 /* this job is already released because it's running behind */
152 set_rt_flags(ts, RT_F_RUNNING);
153 TRACE_TASK(ts, "appears behind: the expected job is %u but "
154 "job %u just completed\n",
155 pid_entry->expected_job, just_finished);
156 } else if (pid_entry->expected_job < just_finished) {
157 printk(KERN_CRIT "job %u completed in expected job %u which "
158 "seems too early\n", just_finished,
159 pid_entry->expected_job);
160 BUG();
161 }
162}
163
164
165/*
166 * Return the index into the PID entries table of what to schedule next.
167 * Don't call if the table is empty. Assumes the caller has the domain lock.
168 * The offset parameter is the offset into the cycle.
169 *
170 * TODO Currently O(n) in the number of tasks on the CPU. Binary search?
171 */
172static int mc_ce_schedule_at(const struct domain *dom, lt_t offset)
173{
174 const struct ce_dom_data *ce_data = dom->data;
175 struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
176 const struct ce_pid_entry *pid_entry = NULL;
177 int idx;
178
179 BUG_ON(pid_table->cycle_time < 1);
180 BUG_ON(pid_table->num_pid_entries < 1);
181
182 for (idx = 0; idx < pid_table->num_pid_entries; ++idx) {
183 pid_entry = &pid_table->entries[idx];
184 if (offset < pid_entry->acc_time) {
185 /* found task to schedule in this window */
186 break;
187 }
188 }
189 /* can only happen if cycle_time is not right */
190 BUG_ON(pid_entry->acc_time > pid_table->cycle_time);
191 TRACE("schedule at returning task %d for CPU %d\n", idx, ce_data->cpu);
192 return idx;
193}
194
195static struct task_struct *mc_ce_schedule(struct task_struct *prev)
196{
197 struct domain *dom = get_domain_for(smp_processor_id());
198 struct ce_dom_data *ce_data = dom->data;
199 struct task_struct *next = NULL;
200 int exists, sleep, should_sched_exists, should_sched_blocked,
201 should_sched_asleep;
202
203 raw_spin_lock(dom->lock);
204
205 /* sanity checking */
206 BUG_ON(ce_data->scheduled && ce_data->scheduled != prev);
207 BUG_ON(ce_data->scheduled && !is_realtime(prev));
208 BUG_ON(is_realtime(prev) && !ce_data->scheduled);
209
210 exists = NULL != ce_data->scheduled;
211 sleep = exists && RT_F_SLEEP == get_rt_flags(ce_data->scheduled);
212
213 TRACE("exists: %d, sleep: %d\n", exists, sleep);
214
215 if (sleep)
216 mc_ce_job_completion(dom, ce_data->scheduled);
217
218 /* these checks must go after the call to mc_ce_job_completion in case
219 * a late task needs to be scheduled again right away and its the only
220 * task on a core
221 */
222 should_sched_exists = NULL != ce_data->should_schedule;
223 should_sched_blocked = should_sched_exists &&
224 !is_running(ce_data->should_schedule);
225 should_sched_asleep = should_sched_exists &&
226 RT_F_SLEEP == get_rt_flags(ce_data->should_schedule);
227
228 TRACE("should_sched_exists: %d, should_sched_blocked: %d, "
229 "should_sched_asleep: %d\n", should_sched_exists,
230 should_sched_blocked, should_sched_asleep);
231
232 if (should_sched_exists && !should_sched_blocked &&
233 !should_sched_asleep) {
234 /*
235 * schedule the task that should be executing in the cyclic
236 * schedule if it is not blocked and not sleeping
237 */
238 next = ce_data->should_schedule;
239 }
240 sched_state_task_picked();
241 raw_spin_unlock(dom->lock);
242 return next;
243}
244
245static void mc_ce_finish_switch(struct task_struct *prev)
246{
247 struct domain *dom = get_domain_for(smp_processor_id());
248 struct ce_dom_data *ce_data = dom->data;
249
250 TRACE("finish switch\n");
251
252 if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current))
253 ce_data->scheduled = current;
254 else
255 ce_data->scheduled = NULL;
256}
257
258/*
259 * Admit task called to see if this task is permitted to enter the system.
260 * Here we look up the task's PID structure and save it in the proper slot on
261 * the CPU this task will run on.
262 */
263long mc_ce_admit_task_common(struct task_struct *ts)
264{
265 struct domain *dom = get_domain_for(get_partition(ts));
266 struct ce_dom_data *ce_data = dom->data;
267 struct mc_data *mcd = tsk_mc_data(ts);
268 struct pid *pid = NULL;
269 long retval = -EINVAL;
270 const int lvl_a_id = mcd->mc_task.lvl_a_id;
271 struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
272
273 BUG_ON(get_partition(ts) != ce_data->cpu);
274
275 /* check the task has migrated to the right CPU (like in sched_cedf) */
276 if (task_cpu(ts) != get_partition(ts)) {
277 printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ",
278 ts->pid, task_cpu(ts), get_partition(ts));
279 goto out;
280 }
281
282 /* only level A tasks can be CE */
283 if (!mcd || CRIT_LEVEL_A != tsk_mc_crit(ts)) {
284 printk(KERN_INFO "litmus: non-MC or non level A task %d\n",
285 ts->pid);
286 goto out;
287 }
288
289 /* try and get the task's PID structure */
290 pid = get_task_pid(ts, PIDTYPE_PID);
291 if (IS_ERR_OR_NULL(pid)) {
292 printk(KERN_INFO "litmus: couldn't get pid struct for %d\n",
293 ts->pid);
294 goto out;
295 }
296
297 if (lvl_a_id >= pid_table->num_pid_entries) {
298 printk(KERN_INFO "litmus: level A id greater than expected "
299 "number of tasks %d for %d cpu %d\n",
300 pid_table->num_pid_entries, ts->pid,
301 get_partition(ts));
302 goto out_put_pid;
303 }
304 if (pid_table->entries[lvl_a_id].pid) {
305 printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
306 lvl_a_id, get_partition(ts));
307 goto out_put_pid;
308 }
309 if (get_exec_cost(ts) >= pid_table->entries[lvl_a_id].budget) {
310 printk(KERN_INFO "litmus: execution cost %llu is larger than "
311 "the budget %llu\n",
312 get_exec_cost(ts),
313 pid_table->entries[lvl_a_id].budget);
314 goto out_put_pid;
315 }
316 pid_table->entries[lvl_a_id].pid = pid;
317 retval = 0;
318 /* don't call put_pid if we are successful */
319 goto out;
320
321out_put_pid:
322 put_pid(pid);
323out:
324 return retval;
325}
326
327static long mc_ce_admit_task(struct task_struct *ts)
328{
329 struct domain *dom = get_domain_for(get_partition(ts));
330 unsigned long flags, retval;
331 raw_spin_lock_irqsave(dom->lock, flags);
332 retval = mc_ce_admit_task_common(ts);
333 raw_spin_unlock_irqrestore(dom->lock, flags);
334 return retval;
335}
336
337/*
338 * Called to set up a new real-time task (after the admit_task callback).
339 * At this point the task's struct PID is already hooked up on the destination
340 * CPU. The task may already be running.
341 */
342static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
343{
344 const int cpu = task_cpu(ts);
345 struct domain *dom = get_domain_for(cpu);
346 struct ce_dom_data *ce_data = dom->data;
347 struct ce_pid_table *pid_table = get_pid_table(cpu);
348 struct pid *pid_should_be_running;
349 struct ce_pid_entry *pid_entry;
350 unsigned long flags;
351 int idx, should_be_running;
352 lt_t offset;
353
354 raw_spin_lock_irqsave(dom->lock, flags);
355 pid_entry = get_pid_entry(cpu, tsk_mc_data(ts)->mc_task.lvl_a_id);
356 /* initialize some task state */
357 set_rt_flags(ts, RT_F_RUNNING);
358
359 /* have to call mc_ce_schedule_at because the task only gets a PID
360 * entry after calling admit_task */
361 offset = get_cycle_offset(litmus_clock(), pid_table->cycle_time);
362 idx = mc_ce_schedule_at(dom, offset);
363 pid_should_be_running = get_pid_entry(cpu, idx)->pid;
364 rcu_read_lock();
365 should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
366 rcu_read_unlock();
367 if (running) {
368 /* admit task checks that the task is not on the wrong CPU */
369 BUG_ON(task_cpu(ts) != get_partition(ts));
370 BUG_ON(ce_data->scheduled);
371 ce_data->scheduled = ts;
372
373 if (should_be_running)
374 ce_data->should_schedule = ts;
375 else
376 preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
377 } else if (!running && should_be_running) {
378 ce_data->should_schedule = ts;
379 preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
380 }
381 raw_spin_unlock_irqrestore(dom->lock, flags);
382}
383
384/*
385 * Called to re-introduce a task after blocking.
386 * Can potentailly be called multiple times.
387 */
388static void mc_ce_task_wake_up(struct task_struct *ts)
389{
390 struct domain *dom = get_domain_for(get_partition(ts));
391 struct ce_dom_data *ce_data = dom->data;
392 unsigned long flags;
393
394 TRACE_TASK(ts, "wake up\n");
395
396 raw_spin_lock_irqsave(dom->lock, flags);
397 if (ts == ce_data->should_schedule && ts != ce_data->scheduled)
398 preempt_if_preemptable(ts, ce_data->cpu);
399 raw_spin_unlock_irqrestore(dom->lock, flags);
400}
401
402/*
403 * Called to notify the plugin of a blocking real-time tasks. Only called for
404 * real-time tasks and before schedule is called.
405 */
406static void mc_ce_task_block(struct task_struct *ts)
407{
408 /* nothing to do because it will be taken care of in schedule */
409 TRACE_TASK(ts, "blocked\n");
410}
411
412/*
413 * Called when a task switches from RT mode back to normal mode.
414 */
415void mc_ce_task_exit_common(struct task_struct *ts)
416{
417 struct domain *dom = get_domain_for(get_partition(ts));
418 struct ce_dom_data *ce_data = dom->data;
419 unsigned long flags;
420 struct pid *pid;
421 const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;
422 struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
423
424 BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
425 BUG_ON(lvl_a_id >= pid_table->num_pid_entries);
426
427 raw_spin_lock_irqsave(dom->lock, flags);
428 pid = pid_table->entries[lvl_a_id].pid;
429 BUG_ON(!pid);
430 put_pid(pid);
431 pid_table->entries[lvl_a_id].pid = NULL;
432 if (ce_data->scheduled == ts)
433 ce_data->scheduled = NULL;
434 if (ce_data->should_schedule == ts)
435 ce_data->should_schedule = NULL;
436 raw_spin_unlock_irqrestore(dom->lock, flags);
437}
438
439/***********************************************************
440 * Timer stuff
441 **********************************************************/
442
443/*
444 * Returns the next absolute time that the timer should fire.
445 */
446lt_t mc_ce_timer_callback_common(struct domain *dom)
447{
448 /* relative and absolute times for cycles */
449 lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
450 struct task_struct *should_schedule;
451 struct ce_pid_table *pid_table;
452 struct ce_pid_entry *pid_entry;
453 struct ce_dom_data *ce_data;
454 int idx, budget_overrun;
455
456 ce_data = dom->data;
457 pid_table = get_pid_table(ce_data->cpu);
458
459 /* Based off of the current time, figure out the offset into the cycle
460 * and the cycle's start time, and determine what should be scheduled.
461 */
462 now = litmus_clock();
463 offset_rel = get_cycle_offset(now, pid_table->cycle_time);
464 cycle_start_abs = now - offset_rel;
465 idx = mc_ce_schedule_at(dom, offset_rel);
466 pid_entry = get_pid_entry(ce_data->cpu, idx);
467 next_timer_abs = cycle_start_abs + pid_entry->acc_time;
468
469 STRACE("timer: now: %llu offset_rel: %llu cycle_start_abs: %llu "
470 "next_timer_abs: %llu\n", now, offset_rel,
471 cycle_start_abs, next_timer_abs);
472
473 /* get the task_struct (pid_task can accept a NULL) */
474 rcu_read_lock();
475 should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID);
476 rcu_read_unlock();
477 ce_data->should_schedule = should_schedule;
478
479 if (should_schedule && 0 == atomic_read(&start_time_set)) {
480 /*
481 * If jobs are not overrunning their budgets, then this
482 * should not happen.
483 */
484 pid_entry->expected_job++;
485 budget_overrun = pid_entry->expected_job !=
486 tsk_rt(should_schedule)->job_params.job_no;
487 if (budget_overrun)
488 TRACE_MC_TASK(should_schedule,
489 "timer expected job number: %u "
490 "but current job: %u\n",
491 pid_entry->expected_job,
492 tsk_rt(should_schedule)->job_params.job_no);
493 }
494
495 if (ce_data->should_schedule) {
496 tsk_rt(should_schedule)->job_params.deadline =
497 cycle_start_abs + pid_entry->acc_time;
498 tsk_rt(should_schedule)->job_params.release =
499 tsk_rt(should_schedule)->job_params.deadline -
500 pid_entry->budget;
501 tsk_rt(should_schedule)->job_params.exec_time = 0;
502 /* sched_trace_task_release(should_schedule); */
503 set_rt_flags(ce_data->should_schedule, RT_F_RUNNING);
504 }
505 return next_timer_abs;
506}
507
508/*
509 * What to do when a timer fires. The timer should only be armed if the number
510 * of PID entries is positive.
511 */
512#ifdef CONFIG_MERGE_TIMERS
513static void mc_ce_timer_callback(struct rt_event *e)
514#else
515static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
516#endif
517{
518 struct ce_dom_data *ce_data;
519 unsigned long flags;
520 struct domain *dom;
521 lt_t next_timer_abs;
522#ifdef CONFIG_MERGE_TIMERS
523 struct event_group *event_group;
524 ce_data = container_of(e, struct ce_dom_data, event);
525 /* use the same CPU the callbacking is executing on by passing NO_CPU */
526 event_group = get_event_group_for(NO_CPU);
527#else /* CONFIG_MERGE_TIMERS */
528 ce_data = container_of(timer, struct ce_dom_data, timer);
529#endif
530 dom = get_domain_for(ce_data->cpu);
531
532 TRACE("timer callback on CPU %d (before lock)\n", ce_data->cpu);
533
534 raw_spin_lock_irqsave(dom->lock, flags);
535 next_timer_abs = mc_ce_timer_callback_common(dom);
536
537 /* setup an event or timer for the next release in the CE schedule */
538#ifdef CONFIG_MERGE_TIMERS
539 add_event(event_group, e, next_timer_abs);
540#else
541 hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
542#endif
543
544 if (ce_data->scheduled != ce_data->should_schedule)
545 preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
546
547 raw_spin_unlock_irqrestore(dom->lock, flags);
548
549#ifndef CONFIG_MERGE_TIMERS
550 return HRTIMER_RESTART;
551#endif
552}
553
554/*
555 * Cancel timers on all CPUs. Returns 1 if any were active.
556 */
557static int cancel_all_timers(void)
558{
559 struct ce_dom_data *ce_data;
560 struct domain *dom;
561 int cpu, ret = 0;
562#ifndef CONFIG_MERGE_TIMERS
563 int cancel_res;
564#endif
565
566 TRACE("cancel all timers\n");
567
568 for_each_online_cpu(cpu) {
569 dom = get_domain_for(cpu);
570 ce_data = dom->data;
571 ce_data->should_schedule = NULL;
572#ifdef CONFIG_MERGE_TIMERS
573 cancel_event(&ce_data->event);
574#else
575 cancel_res = hrtimer_cancel(&ce_data->timer);
576 atomic_set(&ce_data->timer_info.state,
577 HRTIMER_START_ON_INACTIVE);
578 ret = ret || cancel_res;
579#endif
580 }
581 return ret;
582}
583
584/*
585 * Arm all timers so that they start at the new value of start time.
586 * Any CPU without CE PID entries won't have a timer armed.
587 * All timers should be canceled before calling this.
588 */
589static void arm_all_timers(void)
590{
591 struct domain *dom;
592 struct ce_dom_data *ce_data;
593 struct ce_pid_table *pid_table;
594 int cpu, idx, cpu_for_timer;
595 const lt_t start = atomic64_read(&start_time);
596
597 TRACE("arm all timers\n");
598
599 for_each_online_cpu(cpu) {
600 dom = get_domain_for(cpu);
601 ce_data = dom->data;
602 pid_table = get_pid_table(cpu);
603 if (0 == pid_table->num_pid_entries)
604 continue;
605 for (idx = 0; idx < pid_table->num_pid_entries; idx++) {
606 pid_table->entries[idx].expected_job = 0;
607 }
608#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
609 cpu_for_timer = interrupt_cpu;
610#else
611 cpu_for_timer = cpu;
612#endif
613
614#ifdef CONFIG_MERGE_TIMERS
615 add_event(get_event_group_for(cpu_for_timer),
616 &ce_data->event, start);
617#else
618 hrtimer_start_on(cpu_for_timer, &ce_data->timer_info,
619 &ce_data->timer, ns_to_ktime(start),
620 HRTIMER_MODE_ABS_PINNED);
621#endif
622 }
623}
624
625/*
626 * There are no real releases in the CE, but the task release syscall will
627 * call this. We can re-set our notion of the CE period start to make
628 * the schedule look pretty.
629 */
630void mc_ce_release_at_common(struct task_struct *ts, lt_t start)
631{
632 TRACE_TASK(ts, "release at\n");
633 if (atomic_inc_and_test(&start_time_set)) {
634 /* in this case, we won the race */
635 cancel_all_timers();
636 atomic64_set(&start_time, start);
637 arm_all_timers();
638 } else
639 atomic_dec(&start_time_set);
640}
641
642long mc_ce_activate_plugin_common(void)
643{
644 struct ce_dom_data *ce_data;
645 struct domain *dom;
646 long ret;
647 int cpu;
648
649#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
650 interrupt_cpu = atomic_read(&release_master_cpu);
651 if (NO_CPU == interrupt_cpu) {
652 printk(KERN_ERR "LITMUS: MC-CE needs a release master\n");
653 ret = -EINVAL;
654 goto out;
655 }
656#endif
657
658 for_each_online_cpu(cpu) {
659 dom = get_domain_for(cpu);
660 ce_data = dom->data;
661 ce_data->scheduled = NULL;
662 ce_data->should_schedule = NULL;
663 }
664
665 atomic_set(&start_time_set, -1);
666 atomic64_set(&start_time, litmus_clock());
667 /* may not want to arm timers on activation, just after release */
668 arm_all_timers();
669 ret = 0;
670out:
671 return ret;
672}
673
674static long mc_ce_activate_plugin(void)
675{
676 struct domain_data *our_domains[NR_CPUS];
677 int cpu, n = 0;
678 long ret;
679
680 for_each_online_cpu(cpu) {
681 BUG_ON(NR_CPUS <= n);
682 our_domains[cpu] = &per_cpu(_mc_ce_doms, cpu);
683 n++;
684 }
685 ret = mc_ce_set_domains(n, our_domains);
686 if (ret)
687 goto out;
688 ret = mc_ce_activate_plugin_common();
689out:
690 return ret;
691}
692
693static void clear_pid_entries(void)
694{
695 struct ce_pid_table *pid_table = NULL;
696 int cpu, entry;
697
698 for_each_online_cpu(cpu) {
699 pid_table = get_pid_table(cpu);
700 pid_table->num_pid_entries = 0;
701 pid_table->cycle_time = 0;
702 for (entry = 0; entry < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS;
703 ++entry) {
704 if (NULL != pid_table->entries[entry].pid) {
705 put_pid(pid_table->entries[entry].pid);
706 pid_table->entries[entry].pid = NULL;
707 }
708 pid_table->entries[entry].budget = 0;
709 pid_table->entries[entry].acc_time = 0;
710 pid_table->entries[entry].expected_job = 0;
711 }
712 }
713}
714
715long mc_ce_deactivate_plugin_common(void)
716{
717 int cpu;
718 cancel_all_timers();
719 for_each_online_cpu(cpu) {
720 per_cpu(domains, cpu) = NULL;
721 }
722 return 0;
723}
724
725/* Plugin object */
726static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
727 .plugin_name = "MC-CE",
728 .admit_task = mc_ce_admit_task,
729 .task_new = mc_ce_task_new,
730 .complete_job = complete_job,
731 .release_at = mc_ce_release_at_common,
732 .task_exit = mc_ce_task_exit_common,
733 .schedule = mc_ce_schedule,
734 .finish_switch = mc_ce_finish_switch,
735 .task_wake_up = mc_ce_task_wake_up,
736 .task_block = mc_ce_task_block,
737 .activate_plugin = mc_ce_activate_plugin,
738 .deactivate_plugin = mc_ce_deactivate_plugin_common,
739};
740
741static int setup_proc(void);
742static int __init init_sched_mc_ce(void)
743{
744 raw_spinlock_t *ce_lock;
745 struct domain_data *dom_data;
746 struct domain *dom;
747 int cpu, err;
748
749 for_each_online_cpu(cpu) {
750 per_cpu(domains, cpu) = NULL;
751 ce_lock = &per_cpu(_mc_ce_dom_locks, cpu);
752 raw_spin_lock_init(ce_lock);
753 dom_data = &per_cpu(_mc_ce_doms, cpu);
754 dom = &dom_data->domain;
755 ce_domain_init(dom, ce_lock, NULL, NULL, NULL, NULL, NULL,
756 &per_cpu(_mc_ce_dom_data, cpu), cpu,
757 mc_ce_timer_callback);
758 }
759 clear_pid_entries();
760 err = setup_proc();
761 if (!err)
762 err = register_sched_plugin(&mc_ce_plugin);
763 return err;
764}
765
766#define BUF_SIZE PAGE_SIZE
767static int write_into_proc(char *proc_buf, const int proc_size, char *fmt, ...)
768{
769 static char buf[BUF_SIZE];
770 int n;
771 va_list args;
772
773 /* When writing to procfs, we don't care about the trailing null that
774 * is not included in the count returned by vscnprintf.
775 */
776 va_start(args, fmt);
777 n = vsnprintf(buf, BUF_SIZE, fmt, args);
778 va_end(args);
779 if (BUF_SIZE <= n || proc_size <= n) {
780 /* too big for formatting buffer or proc (less null byte) */
781 n = -EINVAL;
782 goto out;
783 }
784 memcpy(proc_buf, buf, n);
785out:
786 return n;
787}
788#undef BUF_SIZE
789
790/*
791 * Writes a PID entry to the procfs.
792 *
793 * @page buffer to write into.
794 * @count bytes available in the buffer
795 */
796#define PID_SPACE 15
797#define TASK_INFO_BUF (PID_SPACE + TASK_COMM_LEN)
798static int write_pid_entry(char *page, const int count, const int cpu,
799 const int task, struct ce_pid_entry *pid_entry)
800{
801 static char task_info[TASK_INFO_BUF];
802 struct task_struct *ts;
803 int n = 0, err, ti_n;
804 char *ti_b;
805
806 if (pid_entry->pid) {
807 rcu_read_lock();
808 ts = pid_task(pid_entry->pid, PIDTYPE_PID);
809 rcu_read_unlock();
810
811 /* get some information about the task */
812 if (ts) {
813 ti_b = task_info;
814 ti_n = snprintf(ti_b, PID_SPACE, "%d", ts->pid);
815 if (PID_SPACE <= ti_n)
816 ti_n = PID_SPACE - 1;
817 ti_b += ti_n;
818 *ti_b = ' '; /* nuke the null byte */
819 ti_b++;
820 get_task_comm(ti_b, ts);
821 } else {
822 strncpy(task_info, "pid_task() failed :(",
823 TASK_INFO_BUF);
824 }
825
826 } else
827 strncpy(task_info, "no", TASK_INFO_BUF);
828 task_info[TASK_INFO_BUF - 1] = '\0'; /* just to be sure */
829
830 err = write_into_proc(page + n, count - n, "# task: %s\n", task_info);
831 if (err < 0) {
832 n = -ENOSPC;
833 goto out;
834 }
835 n += err;
836 err = write_into_proc(page + n, count - n, "%d, %d, %llu\n",
837 cpu, task, pid_entry->budget);
838 if (err < 0) {
839 n = -ENOSPC;
840 goto out;
841 }
842 n += err;
843out:
844 return n;
845}
846#undef PID_SPACE
847#undef TASK_INFO_BUF
848
849/*
850 * Called when the user-land reads from proc.
851 */
852static int proc_read_ce_file(char *page, char **start, off_t off, int count,
853 int *eof, void *data)
854{
855 int n = 0, err, cpu, t;
856 struct ce_pid_table *pid_table;
857
858 if (off > 0) {
859 printk(KERN_INFO "litmus: MC-CE called read with off > 0\n");
860 goto out;
861 }
862
863 for_each_online_cpu(cpu) {
864 pid_table = get_pid_table(cpu);
865 for (t = 0; t < pid_table->num_pid_entries; ++t) {
866 err = write_pid_entry(page + n, count - n,
867 cpu, t, get_pid_entry(cpu, t));
868 if (err < 0) {
869 n = -ENOSPC;
870 goto out;
871 }
872 n += err;
873 }
874 }
875out:
876 *eof = 1;
877 return n;
878}
879
880/*
881 * Skip a commented line.
882 */
883static int skip_comment(const char *buf, const unsigned long max)
884{
885 unsigned long i = 0;
886 const char *c = buf;
887 if (0 == max || !c || *c != '#')
888 return 0;
889 ++c; ++i;
890 for (; i < max; ++i) {
891 if (*c == '\n') {
892 ++c; ++i;
893 break;
894 }
895 ++c;
896 }
897 return i;
898}
899
900/* a budget of 5 milliseconds is probably reasonable */
901#define BUDGET_THRESHOLD 5000000ULL
902static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
903{
904 struct ce_pid_table *pid_table = get_pid_table(cpu);
905 struct ce_pid_entry *new_entry = NULL;
906 int err = 0;
907
908 /* check the inputs */
909 if (cpu < 0 || NR_CPUS <= cpu || task < 0 ||
910 CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= task ||
911 budget < 1) {
912 printk(KERN_INFO "litmus: bad cpu, task ID, or budget sent to "
913 "MC-CE proc\n");
914 err = -EINVAL;
915 goto out;
916 }
917 /* check for small budgets */
918 if (BUDGET_THRESHOLD > budget) {
919 printk(KERN_CRIT "litmus: you gave a small budget for an "
920 "MC-CE task; that might be an issue.\n");
921 }
922 /* check that we have space for a new entry */
923 if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= pid_table->num_pid_entries) {
924 printk(KERN_INFO "litmus: too many MC-CE tasks for cpu "
925 "%d\n", cpu);
926 err = -EINVAL;
927 goto out;
928 }
929 /* add the new entry */
930 new_entry = get_pid_entry(cpu, pid_table->num_pid_entries);
931 BUG_ON(NULL != new_entry->pid);
932 new_entry->budget = budget;
933 new_entry->acc_time = pid_table->cycle_time + budget;
934 /* update the domain entry */
935 pid_table->cycle_time += budget;
936 pid_table->num_pid_entries++;
937out:
938 return err;
939}
940#undef BUDGET_THRESHOLD
941
942/*
943 * Called when the user-land writes to proc.
944 *
945 * Error checking is quite minimal. Format is:
946 * <cpu>, <process ID>, <budget>
947 */
948#define PROCFS_MAX_SIZE PAGE_SIZE
949static int proc_write_ce_file(struct file *file, const char __user *buffer,
950 unsigned long count, void *data)
951{
952 static char kbuf[PROCFS_MAX_SIZE];
953 char *c = kbuf, *c_skipped;
954 int cpu, task, cnt = 0, chars_read, converted, err;
955 lt_t budget;
956
957 if (!using_linux_plugin()) {
958 printk(KERN_INFO "litmus: can only edit MC-CE proc under Linux "
959 "plugin\n");
960 cnt = -EINVAL;
961 goto out;
962 }
963
964 if (count > PROCFS_MAX_SIZE) {
965 printk(KERN_INFO "litmus: MC-CE procfs got too many bytes "
966 "from user-space.\n");
967 cnt = -EINVAL;
968 goto out;
969 }
970
971 if (copy_from_user(kbuf, buffer, count)) {
972 printk(KERN_INFO "litmus: couldn't copy from user %s\n",
973 __FUNCTION__);
974 cnt = -EFAULT;
975 goto out;
976 }
977 clear_pid_entries();
978 while (cnt < count) {
979 c_skipped = skip_spaces(c);
980 if (c_skipped != c) {
981 chars_read = c_skipped - c;
982 cnt += chars_read;
983 c += chars_read;
984 continue;
985 }
986 if (*c == '#') {
987 chars_read = skip_comment(c, count - cnt);
988 cnt += chars_read;
989 c += chars_read;
990 continue;
991 }
992 converted = sscanf(c, "%d, %d, %llu%n", &cpu, &task, &budget,
993 &chars_read);
994 if (3 != converted) {
995 printk(KERN_INFO "litmus: MC-CE procfs expected three "
996 "arguments, but got %d.\n", converted);
997 cnt = -EINVAL;
998 goto out;
999 }
1000 cnt += chars_read;
1001 c += chars_read;
1002 err = setup_pid_entry(cpu, task, budget);
1003 if (err) {
1004 cnt = -EINVAL;
1005 goto out;
1006 }
1007 }
1008out:
1009 return cnt;
1010}
1011#undef PROCFS_MAX_SIZE
1012
1013#define CE_FILE_PROC_NAME "ce_file"
1014static void tear_down_proc(void)
1015{
1016 if (ce_file)
1017 remove_proc_entry(CE_FILE_PROC_NAME, mc_ce_dir);
1018 if (mc_ce_dir)
1019 remove_plugin_proc_dir(&mc_ce_plugin);
1020}
1021
1022static int setup_proc(void)
1023{
1024 int err;
1025 err = make_plugin_proc_dir(&mc_ce_plugin, &mc_ce_dir);
1026 if (err) {
1027 printk(KERN_ERR "could not create MC-CE procfs dir.\n");
1028 goto out;
1029 }
1030 ce_file = create_proc_entry(CE_FILE_PROC_NAME, 0644, mc_ce_dir);
1031 if (!ce_file) {
1032 printk(KERN_ERR "could not create MC-CE procfs file.\n");
1033 err = -EIO;
1034 goto out_remove_proc;
1035 }
1036 ce_file->read_proc = proc_read_ce_file;
1037 ce_file->write_proc = proc_write_ce_file;
1038 goto out;
1039out_remove_proc:
1040 tear_down_proc();
1041out:
1042 return err;
1043}
1044#undef CE_FILE_PROC_NAME
1045
1046static void clean_sched_mc_ce(void)
1047{
1048 tear_down_proc();
1049}
1050
1051module_init(init_sched_mc_ce);
1052module_exit(clean_sched_mc_ce);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..123c7516fb76 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -95,6 +95,10 @@ static void litmus_dummy_task_exit(struct task_struct *task)
95{ 95{
96} 96}
97 97
98static void litmus_dummy_release_ts(lt_t time)
99{
100}
101
98static long litmus_dummy_complete_job(void) 102static long litmus_dummy_complete_job(void)
99{ 103{
100 return -ENOSYS; 104 return -ENOSYS;
@@ -136,6 +140,7 @@ struct sched_plugin linux_sched_plugin = {
136 .finish_switch = litmus_dummy_finish_switch, 140 .finish_switch = litmus_dummy_finish_switch,
137 .activate_plugin = litmus_dummy_activate_plugin, 141 .activate_plugin = litmus_dummy_activate_plugin,
138 .deactivate_plugin = litmus_dummy_deactivate_plugin, 142 .deactivate_plugin = litmus_dummy_deactivate_plugin,
143 .release_ts = litmus_dummy_release_ts,
139#ifdef CONFIG_LITMUS_LOCKING 144#ifdef CONFIG_LITMUS_LOCKING
140 .allocate_lock = litmus_dummy_allocate_lock, 145 .allocate_lock = litmus_dummy_allocate_lock,
141#endif 146#endif
@@ -174,6 +179,7 @@ int register_sched_plugin(struct sched_plugin* plugin)
174 CHECK(complete_job); 179 CHECK(complete_job);
175 CHECK(activate_plugin); 180 CHECK(activate_plugin);
176 CHECK(deactivate_plugin); 181 CHECK(deactivate_plugin);
182 CHECK(release_ts);
177#ifdef CONFIG_LITMUS_LOCKING 183#ifdef CONFIG_LITMUS_LOCKING
178 CHECK(allocate_lock); 184 CHECK(allocate_lock);
179#endif 185#endif
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index 8933e15605ae..4e117be9546b 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -290,6 +290,9 @@ static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
290 TRACE_TASK(t, "psn edf: task new, cpu = %d\n", 290 TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
291 t->rt_param.task_params.cpu); 291 t->rt_param.task_params.cpu);
292 292
293 trace_litmus_server_param(0 - t->pid, -1 - get_partition(t),
294 get_exec_time(t), get_rt_period(t));
295
293 /* setup job parameters */ 296 /* setup job parameters */
294 release_at(t, litmus_clock()); 297 release_at(t, litmus_clock());
295 298
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 7194d2fe6c6f..67b01c1dd51b 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,14 +7,18 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/percpu.h> 9#include <linux/percpu.h>
10#include <linux/math64.h>
10 11
11#include <litmus/ftdev.h> 12#include <litmus/ftdev.h>
12#include <litmus/litmus.h> 13#include <litmus/litmus.h>
13
14#include <litmus/sched_trace.h> 14#include <litmus/sched_trace.h>
15#include <litmus/feather_trace.h> 15#include <litmus/feather_trace.h>
16#include <litmus/ftdev.h> 16#include <litmus/ftdev.h>
17#include <litmus/rt_domain.h>
18#include <litmus/domain.h>
19#include <litmus/event_group.h>
17 20
21#include <litmus/sched_mc.h>
18 22
19#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) 23#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
20 24
@@ -132,6 +136,7 @@ feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long
132 rec->data.param.phase = get_rt_phase(t); 136 rec->data.param.phase = get_rt_phase(t);
133 rec->data.param.partition = get_partition(t); 137 rec->data.param.partition = get_partition(t);
134 rec->data.param.class = get_class(t); 138 rec->data.param.class = get_class(t);
139 rec->data.param.level = (tsk_mc_data(t) ? tsk_mc_crit(t) : -1);
135 put_record(rec); 140 put_record(rec);
136 } 141 }
137} 142}
@@ -141,8 +146,8 @@ feather_callback void do_sched_trace_task_release(unsigned long id, unsigned lon
141 struct task_struct *t = (struct task_struct*) _task; 146 struct task_struct *t = (struct task_struct*) _task;
142 struct st_event_record* rec = get_record(ST_RELEASE, t); 147 struct st_event_record* rec = get_record(ST_RELEASE, t);
143 if (rec) { 148 if (rec) {
144 rec->data.release.release = get_release(t); 149 rec->data.release.release = tsk_rt(t)->job_params.real_release;
145 rec->data.release.deadline = get_deadline(t); 150 rec->data.release.deadline = tsk_rt(t)->job_params.real_deadline;
146 put_record(rec); 151 put_record(rec);
147 } 152 }
148} 153}
@@ -231,7 +236,7 @@ feather_callback void do_sched_trace_task_exit(unsigned long id,
231{ 236{
232 struct task_struct *t = (struct task_struct*) _task; 237 struct task_struct *t = (struct task_struct*) _task;
233 const lt_t max_exec_time = tsk_rt(t)->max_exec_time; 238 const lt_t max_exec_time = tsk_rt(t)->max_exec_time;
234 const lt_t avg_exec_time = tsk_rt(t)->tot_exec_time / (get_job_no(t) - 1); 239 const lt_t avg_exec_time = div64_u64(tsk_rt(t)->tot_exec_time, (get_job_no(t) - 1));
235 240
236 struct st_event_record *rec = get_record(ST_TASK_EXIT, t); 241 struct st_event_record *rec = get_record(ST_TASK_EXIT, t);
237 if (rec) { 242 if (rec) {
diff --git a/litmus/sync.c b/litmus/sync.c
index bf75fde5450b..f3c9262f7022 100644
--- a/litmus/sync.c
+++ b/litmus/sync.c
@@ -73,6 +73,9 @@ static long do_release_ts(lt_t start)
73 73
74 complete_n(&ts_release, task_count); 74 complete_n(&ts_release, task_count);
75 75
76 /* TODO: remove this hack */
77 litmus->release_ts(start);
78
76 return task_count; 79 return task_count;
77} 80}
78 81
diff --git a/litmus/trace.c b/litmus/trace.c
index 3c35c527e805..4722ffa443c6 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -6,6 +6,10 @@
6#include <litmus/litmus.h> 6#include <litmus/litmus.h>
7#include <litmus/trace.h> 7#include <litmus/trace.h>
8 8
9#include <litmus/domain.h>
10#include <litmus/event_group.h>
11#include <litmus/sched_mc.h>
12
9/******************************************************************************/ 13/******************************************************************************/
10/* Allocation */ 14/* Allocation */
11/******************************************************************************/ 15/******************************************************************************/
@@ -83,6 +87,36 @@ static inline void __save_timestamp(unsigned long event,
83 __save_timestamp_cpu(event, type, raw_smp_processor_id()); 87 __save_timestamp_cpu(event, type, raw_smp_processor_id());
84} 88}
85 89
90/* hack: fake timestamp to user-reported time, and record parts of the PID */
91feather_callback void save_timestamp_time(unsigned long event, unsigned long ptr)
92{
93 uint64_t* time = (uint64_t*) ptr;
94 unsigned int seq_no;
95 struct timestamp *ts;
96 seq_no = fetch_and_inc((int *) &ts_seq_no);
97 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
98 ts->event = event;
99 ts->timestamp = *time;
100 ts->seq_no = seq_no;
101 /* type takes lowest byte of PID */
102 ts->task_type = (uint8_t) current->pid;
103 /* cpu takes second-lowest byte of PID*/
104 ts->cpu = (uint8_t) (current->pid >> 8);
105
106 ft_buffer_finish_write(trace_ts_buf, ts);
107 }
108}
109
110feather_callback void save_timestamp_pid(unsigned long event)
111{
112 /* Abuse existing fields to partially export PID. */
113 __save_timestamp_cpu(event,
114 /* type takes lowest byte of PID */
115 (uint8_t) current->pid,
116 /* cpu takes second-lowest byte of PID*/
117 (uint8_t) (current->pid >> 8));
118}
119
86feather_callback void save_timestamp(unsigned long event) 120feather_callback void save_timestamp(unsigned long event)
87{ 121{
88 __save_timestamp(event, TSK_UNKNOWN); 122 __save_timestamp(event, TSK_UNKNOWN);
@@ -97,8 +131,21 @@ feather_callback void save_timestamp_def(unsigned long event,
97feather_callback void save_timestamp_task(unsigned long event, 131feather_callback void save_timestamp_task(unsigned long event,
98 unsigned long t_ptr) 132 unsigned long t_ptr)
99{ 133{
100 int rt = is_realtime((struct task_struct *) t_ptr); 134 struct task_struct *ts = (struct task_struct*) t_ptr;
101 __save_timestamp(event, rt ? TSK_RT : TSK_BE); 135 int rt = is_realtime(ts);
136 uint8_t type = rt ? TSK_RT : TSK_BE;
137
138 if (TS_LVLA_SCHED_END_ID == event) {
139 if (rt && CRIT_LEVEL_A == tsk_mc_crit(ts))
140 type = TSK_LVLA;
141 } else if (TS_LVLB_SCHED_END_ID == event) {
142 if (rt && CRIT_LEVEL_B == tsk_mc_crit(ts))
143 type = TSK_LVLB;
144 } else if (TS_LVLC_SCHED_END_ID == event) {
145 if (rt && CRIT_LEVEL_C == tsk_mc_crit(ts))
146 type = TSK_LVLC;
147 }
148 __save_timestamp(event, type);
102} 149}
103 150
104feather_callback void save_timestamp_cpu(unsigned long event, 151feather_callback void save_timestamp_cpu(unsigned long event,
diff --git a/mm/memory.c b/mm/memory.c
index 9b8a01d941cb..36e889cca247 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3016,6 +3016,35 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
3016 return 0; 3016 return 0;
3017} 3017}
3018 3018
3019static inline void print_page_fault_info(struct mm_struct *mm,
3020 unsigned long address, struct page *page)
3021{
3022 struct task_struct *ts = current;
3023 char ts_name[TASK_COMM_LEN];
3024 char mismatch[4];
3025#if 0
3026 struct file *file = mm->exe_file;
3027 const unsigned char *name_null = "NULL";
3028 const unsigned char *name =
3029 (file) ?file->f_dentry->d_name.name : name_null;
3030#endif
3031
3032 get_task_comm(ts_name, ts);
3033 if (likely(ts->mm == mm))
3034 mismatch[0] = '\0';
3035 else
3036 snprintf(mismatch, 4, "(M)");
3037#if 0
3038 printk("%s: cur: %15s name: %15s user_addr: 0x%12lx pfn: %12lu "
3039 "addr: 0x%13llx\n",
3040 __FUNCTION__, ts_name, name, address,
3041 page_to_pfn(page), page_to_phys(page));
3042#endif
3043 printk("%s: %3s %15s laddr: 0x%12lx pfn: %19lu paddr: 0x%13llx\n",
3044 __FUNCTION__, mismatch, ts_name, address >> PAGE_SHIFT,
3045 page_to_pfn(page), page_to_phys(page) >> PAGE_SHIFT);
3046}
3047
3019/* 3048/*
3020 * We enter with non-exclusive mmap_sem (to exclude vma changes, 3049 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3021 * but allow concurrent faults), and pte mapped but not yet locked. 3050 * but allow concurrent faults), and pte mapped but not yet locked.
@@ -3066,6 +3095,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
3066 3095
3067 inc_mm_counter_fast(mm, MM_ANONPAGES); 3096 inc_mm_counter_fast(mm, MM_ANONPAGES);
3068 page_add_new_anon_rmap(page, vma, address); 3097 page_add_new_anon_rmap(page, vma, address);
3098#if 0
3099 print_page_fault_info(mm, address, page);
3100#endif
3069setpte: 3101setpte:
3070 set_pte_at(mm, address, page_table, entry); 3102 set_pte_at(mm, address, page_table, entry);
3071 3103
@@ -3075,6 +3107,9 @@ unlock:
3075 pte_unmap_unlock(page_table, ptl); 3107 pte_unmap_unlock(page_table, ptl);
3076 return 0; 3108 return 0;
3077release: 3109release:
3110#if 0
3111 printk("%s: release label\n", __FUNCTION__);
3112#endif
3078 mem_cgroup_uncharge_page(page); 3113 mem_cgroup_uncharge_page(page);
3079 page_cache_release(page); 3114 page_cache_release(page);
3080 goto unlock; 3115 goto unlock;