From 4b38febbd59fd33542a343991262119eb9860f5e Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Thu, 17 Dec 2009 21:23:36 -0500 Subject: [ported from 2008.3] Core LITMUS^RT infrastructure Port 2008.3 Core LITMUS^RT infrastructure to Linux 2.6.32 litmus_sched_class implements 4 new methods: - prio_changed: void - switched_to: void - get_rr_interval: return infinity (i.e., 0) - select_task_rq: return current cpu --- Makefile | 4 +- include/linux/sched.h | 7 + include/litmus/feather_buffer.h | 94 ++++++ include/litmus/feather_trace.h | 36 +++ include/litmus/heap.h | 77 +++++ include/litmus/jobs.h | 9 + include/litmus/litmus.h | 177 +++++++++++ include/litmus/rt_param.h | 175 +++++++++++ include/litmus/sched_plugin.h | 159 ++++++++++ include/litmus/sched_trace.h | 191 ++++++++++++ include/litmus/trace.h | 113 +++++++ kernel/fork.c | 7 + kernel/sched.c | 92 +++++- kernel/sched_fair.c | 2 +- kernel/sched_rt.c | 2 +- litmus/Kconfig | 50 +++ litmus/Makefile | 12 + litmus/ft_event.c | 43 +++ litmus/heap.c | 314 +++++++++++++++++++ litmus/jobs.c | 43 +++ litmus/litmus.c | 654 ++++++++++++++++++++++++++++++++++++++++ litmus/sched_litmus.c | 275 +++++++++++++++++ litmus/sched_plugin.c | 199 ++++++++++++ 23 files changed, 2723 insertions(+), 12 deletions(-) create mode 100644 include/litmus/feather_buffer.h create mode 100644 include/litmus/feather_trace.h create mode 100644 include/litmus/heap.h create mode 100644 include/litmus/jobs.h create mode 100644 include/litmus/litmus.h create mode 100644 include/litmus/rt_param.h create mode 100644 include/litmus/sched_plugin.h create mode 100644 include/litmus/sched_trace.h create mode 100644 include/litmus/trace.h create mode 100644 litmus/Kconfig create mode 100644 litmus/Makefile create mode 100644 litmus/ft_event.c create mode 100644 litmus/heap.c create mode 100644 litmus/jobs.c create mode 100644 litmus/litmus.c create mode 100644 litmus/sched_litmus.c create mode 100644 litmus/sched_plugin.c diff --git a/Makefile b/Makefile index f5cdb72ba2ce..2603066a012d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = +EXTRAVERSION =-litmus2010 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* @@ -644,7 +644,7 @@ export mod_strip_cmd ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..bb046c0adf99 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -38,6 +38,7 @@ #define SCHED_BATCH 3 /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 +#define SCHED_LITMUS 6 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ #define SCHED_RESET_ON_FORK 0x40000000 @@ -94,6 +95,8 @@ struct sched_param { #include +#include + struct exec_domain; struct futex_pi_state; struct robust_list_head; @@ -1505,6 +1508,10 @@ struct task_struct { int make_it_fail; #endif struct prop_local_single dirties; + + /* LITMUS RT parameters and state */ + struct rt_param rt_param; + #ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h new file mode 100644 index 000000000000..6c18277fdfc9 --- /dev/null +++ b/include/litmus/feather_buffer.h @@ -0,0 +1,94 @@ +#ifndef _FEATHER_BUFFER_H_ +#define _FEATHER_BUFFER_H_ + +/* requires UINT_MAX and memcpy */ + +#define SLOT_FREE 0 +#define SLOT_BUSY 1 +#define SLOT_READY 2 + +struct ft_buffer { + unsigned int slot_count; + unsigned int slot_size; + + int free_count; + unsigned int write_idx; + unsigned int read_idx; + + char* slots; + void* buffer_mem; + unsigned int failed_writes; +}; + +static inline int init_ft_buffer(struct ft_buffer* buf, + unsigned int slot_count, + unsigned int slot_size, + char* slots, + void* buffer_mem) +{ + int i = 0; + if (!slot_count || UINT_MAX % slot_count != slot_count - 1) { + /* The slot count must divide UNIT_MAX + 1 so that when it + * wraps around the index correctly points to 0. + */ + return 0; + } else { + buf->slot_count = slot_count; + buf->slot_size = slot_size; + buf->slots = slots; + buf->buffer_mem = buffer_mem; + buf->free_count = slot_count; + buf->write_idx = 0; + buf->read_idx = 0; + buf->failed_writes = 0; + for (i = 0; i < slot_count; i++) + buf->slots[i] = SLOT_FREE; + return 1; + } +} + +static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr) +{ + int free = fetch_and_dec(&buf->free_count); + unsigned int idx; + if (free <= 0) { + fetch_and_inc(&buf->free_count); + *ptr = 0; + fetch_and_inc(&buf->failed_writes); + return 0; + } else { + idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count; + buf->slots[idx] = SLOT_BUSY; + *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size; + return 1; + } +} + +static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr) +{ + unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size; + buf->slots[idx] = SLOT_READY; +} + + +/* exclusive reader access is assumed */ +static inline int ft_buffer_read(struct ft_buffer* buf, void* dest) +{ + unsigned int idx; + if (buf->free_count == buf->slot_count) + /* nothing available */ + return 0; + idx = buf->read_idx % buf->slot_count; + if (buf->slots[idx] == SLOT_READY) { + memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size, + buf->slot_size); + buf->slots[idx] = SLOT_FREE; + buf->read_idx++; + fetch_and_inc(&buf->free_count); + return 1; + } else + return 0; +} + + +#endif diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h new file mode 100644 index 000000000000..3ac1ee5e0277 --- /dev/null +++ b/include/litmus/feather_trace.h @@ -0,0 +1,36 @@ +#ifndef _FEATHER_TRACE_H_ +#define _FEATHER_TRACE_H_ + + +int ft_enable_event(unsigned long id); +int ft_disable_event(unsigned long id); +int ft_is_event_enabled(unsigned long id); +int ft_disable_all_events(void); + +#ifndef __ARCH_HAS_FEATHER_TRACE +/* provide default implementation */ + +#define feather_callback + +#define MAX_EVENTS 1024 + +extern int ft_events[MAX_EVENTS]; + +#define ft_event(id, callback) \ + if (ft_events[id]) callback(); + +#define ft_event0(id, callback) \ + if (ft_events[id]) callback(id); + +#define ft_event1(id, callback, param) \ + if (ft_events[id]) callback(id, param); + +#define ft_event2(id, callback, param, param2) \ + if (ft_events[id]) callback(id, param, param2); + +#define ft_event3(id, callback, p, p2, p3) \ + if (ft_events[id]) callback(id, p, p2, p3); +#endif + + +#endif diff --git a/include/litmus/heap.h b/include/litmus/heap.h new file mode 100644 index 000000000000..da959b0bec9c --- /dev/null +++ b/include/litmus/heap.h @@ -0,0 +1,77 @@ +/* heaps.h -- Binomial Heaps + * + * (c) 2008, 2009 Bjoern Brandenburg + */ + +#ifndef HEAP_H +#define HEAP_H + +#define NOT_IN_HEAP UINT_MAX + +struct heap_node { + struct heap_node* parent; + struct heap_node* next; + struct heap_node* child; + + unsigned int degree; + void* value; + struct heap_node** ref; +}; + +struct heap { + struct heap_node* head; + /* We cache the minimum of the heap. + * This speeds up repeated peek operations. + */ + struct heap_node* min; +}; + +typedef int (*heap_prio_t)(struct heap_node* a, struct heap_node* b); + +void heap_init(struct heap* heap); +void heap_node_init(struct heap_node** ref_to_heap_node_ptr, void* value); + +static inline int heap_node_in_heap(struct heap_node* h) +{ + return h->degree != NOT_IN_HEAP; +} + +static inline int heap_empty(struct heap* heap) +{ + return heap->head == NULL && heap->min == NULL; +} + +/* insert (and reinitialize) a node into the heap */ +void heap_insert(heap_prio_t higher_prio, + struct heap* heap, + struct heap_node* node); + +/* merge addition into target */ +void heap_union(heap_prio_t higher_prio, + struct heap* target, + struct heap* addition); + +struct heap_node* heap_peek(heap_prio_t higher_prio, + struct heap* heap); + +struct heap_node* heap_take(heap_prio_t higher_prio, + struct heap* heap); + +void heap_uncache_min(heap_prio_t higher_prio, struct heap* heap); +int heap_decrease(heap_prio_t higher_prio, struct heap_node* node); + +void heap_delete(heap_prio_t higher_prio, + struct heap* heap, + struct heap_node* node); + +/* allocate from memcache */ +struct heap_node* heap_node_alloc(int gfp_flags); +void heap_node_free(struct heap_node* hn); + +/* allocate a heap node for value and insert into the heap */ +int heap_add(heap_prio_t higher_prio, struct heap* heap, + void* value, int gfp_flags); + +void* heap_take_del(heap_prio_t higher_prio, + struct heap* heap); +#endif diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h new file mode 100644 index 000000000000..9bd361ef3943 --- /dev/null +++ b/include/litmus/jobs.h @@ -0,0 +1,9 @@ +#ifndef __LITMUS_JOBS_H__ +#define __LITMUS_JOBS_H__ + +void prepare_for_next_period(struct task_struct *t); +void release_at(struct task_struct *t, lt_t start); +long complete_job(void); + +#endif + diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h new file mode 100644 index 000000000000..380fcb8acb33 --- /dev/null +++ b/include/litmus/litmus.h @@ -0,0 +1,177 @@ +/* + * Constant definitions related to + * scheduling policy. + */ + +#ifndef _LINUX_LITMUS_H_ +#define _LINUX_LITMUS_H_ + +#include +#include + +extern atomic_t release_master_cpu; + +extern atomic_t __log_seq_no; + +#define TRACE(fmt, args...) \ + sched_trace_log_message("%d P%d: " fmt, atomic_add_return(1, &__log_seq_no), \ + raw_smp_processor_id(), ## args) + +#define TRACE_TASK(t, fmt, args...) \ + TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args) + +#define TRACE_CUR(fmt, args...) \ + TRACE_TASK(current, fmt, ## args) + +#define TRACE_BUG_ON(cond) \ + do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \ + "called from %p current=%s/%d state=%d " \ + "flags=%x partition=%d cpu=%d rtflags=%d"\ + " job=%u knp=%d timeslice=%u\n", \ + #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \ + current->pid, current->state, current->flags, \ + get_partition(current), smp_processor_id(), get_rt_flags(current), \ + current->rt_param.job_params.job_no, current->rt_param.kernel_np, \ + current->rt.time_slice\ + ); } while(0); + + +/* in_list - is a given list_head queued on some list? + */ +static inline int in_list(struct list_head* list) +{ + return !( /* case 1: deleted */ + (list->next == LIST_POISON1 && + list->prev == LIST_POISON2) + || + /* case 2: initialized */ + (list->next == list && + list->prev == list) + ); +} + +#define NO_CPU 0xffffffff + +void litmus_fork(struct task_struct *tsk); +void litmus_exec(void); +/* clean up real-time state of a task */ +void exit_litmus(struct task_struct *dead_tsk); + +long litmus_admit_task(struct task_struct *tsk); +void litmus_exit_task(struct task_struct *tsk); + +#define is_realtime(t) ((t)->policy == SCHED_LITMUS) +#define rt_transition_pending(t) \ + ((t)->rt_param.transition_pending) + +#define tsk_rt(t) (&(t)->rt_param) + +/* Realtime utility macros */ +#define get_rt_flags(t) (tsk_rt(t)->flags) +#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f)) +#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost) +#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) +#define get_rt_period(t) (tsk_rt(t)->task_params.period) +#define get_rt_phase(t) (tsk_rt(t)->task_params.phase) +#define get_partition(t) (tsk_rt(t)->task_params.cpu) +#define get_deadline(t) (tsk_rt(t)->job_params.deadline) +#define get_release(t) (tsk_rt(t)->job_params.release) +#define get_class(t) (tsk_rt(t)->task_params.cls) + +inline static int budget_exhausted(struct task_struct* t) +{ + return get_exec_time(t) >= get_exec_cost(t); +} + + +#define is_hrt(t) \ + (tsk_rt(t)->task_params.class == RT_CLASS_HARD) +#define is_srt(t) \ + (tsk_rt(t)->task_params.class == RT_CLASS_SOFT) +#define is_be(t) \ + (tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT) + +/* Our notion of time within LITMUS: kernel monotonic time. */ +static inline lt_t litmus_clock(void) +{ + return ktime_to_ns(ktime_get()); +} + +/* A macro to convert from nanoseconds to ktime_t. */ +#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t) + +#define get_domain(t) (tsk_rt(t)->domain) + +/* Honor the flag in the preempt_count variable that is set + * when scheduling is in progress. + */ +#define is_running(t) \ + ((t)->state == TASK_RUNNING || \ + task_thread_info(t)->preempt_count & PREEMPT_ACTIVE) + +#define is_blocked(t) \ + (!is_running(t)) +#define is_released(t, now) \ + (lt_before_eq(get_release(t), now)) +#define is_tardy(t, now) \ + (lt_before_eq(tsk_rt(t)->job_params.deadline, now)) + +/* real-time comparison macros */ +#define earlier_deadline(a, b) (lt_before(\ + (a)->rt_param.job_params.deadline,\ + (b)->rt_param.job_params.deadline)) +#define earlier_release(a, b) (lt_before(\ + (a)->rt_param.job_params.release,\ + (b)->rt_param.job_params.release)) + +#define make_np(t) do {t->rt_param.kernel_np++;} while(0); +#define take_np(t) do {t->rt_param.kernel_np--;} while(0); + +#ifdef CONFIG_SRP +void srp_ceiling_block(void); +#else +#define srp_ceiling_block() /* nothing */ +#endif + +#define heap2task(hn) ((struct task_struct*) hn->value) + +static inline int is_np(struct task_struct *t) +{ + return tsk_rt(t)->kernel_np; +} + +#define request_exit_np(t) + +static inline int is_present(struct task_struct* t) +{ + return t && tsk_rt(t)->present; +} + + +/* make the unit explicit */ +typedef unsigned long quanta_t; + +enum round { + FLOOR, + CEIL +}; + + +/* Tick period is used to convert ns-specified execution + * costs and periods into tick-based equivalents. + */ +extern ktime_t tick_period; + +static inline quanta_t time2quanta(lt_t time, enum round round) +{ + s64 quantum_length = ktime_to_ns(tick_period); + + if (do_div(time, quantum_length) && round == CEIL) + time++; + return (quanta_t) time; +} + +/* By how much is cpu staggered behind CPU 0? */ +u64 cpu_stagger_offset(int cpu); + +#endif diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h new file mode 100644 index 000000000000..c599f848d1ed --- /dev/null +++ b/include/litmus/rt_param.h @@ -0,0 +1,175 @@ +/* + * Definition of the scheduler plugin interface. + * + */ +#ifndef _LINUX_RT_PARAM_H_ +#define _LINUX_RT_PARAM_H_ + +/* Litmus time type. */ +typedef unsigned long long lt_t; + +static inline int lt_after(lt_t a, lt_t b) +{ + return ((long long) b) - ((long long) a) < 0; +} +#define lt_before(a, b) lt_after(b, a) + +static inline int lt_after_eq(lt_t a, lt_t b) +{ + return ((long long) a) - ((long long) b) >= 0; +} +#define lt_before_eq(a, b) lt_after_eq(b, a) + +/* different types of clients */ +typedef enum { + RT_CLASS_HARD, + RT_CLASS_SOFT, + RT_CLASS_BEST_EFFORT +} task_class_t; + +struct rt_task { + lt_t exec_cost; + lt_t period; + lt_t phase; + unsigned int cpu; + task_class_t cls; +}; + +/* don't export internal data structures to user space (liblitmus) */ +#ifdef __KERNEL__ + +struct _rt_domain; +struct heap_node; +struct release_heap; + +struct rt_job { + /* Time instant the the job was or will be released. */ + lt_t release; + /* What is the current deadline? */ + lt_t deadline; + + /* How much service has this job received so far? */ + lt_t exec_time; + + /* Which job is this. This is used to let user space + * specify which job to wait for, which is important if jobs + * overrun. If we just call sys_sleep_next_period() then we + * will unintentionally miss jobs after an overrun. + * + * Increase this sequence number when a job is released. + */ + unsigned int job_no; +}; + + +struct pfair_param; + +/* RT task parameters for scheduling extensions + * These parameters are inherited during clone and therefore must + * be explicitly set up before the task set is launched. + */ +struct rt_param { + /* is the task sleeping? */ + unsigned int flags:8; + + /* do we need to check for srp blocking? */ + unsigned int srp_non_recurse:1; + + /* is the task present? (true if it can be scheduled) */ + unsigned int present:1; + + /* user controlled parameters */ + struct rt_task task_params; + + /* timing parameters */ + struct rt_job job_params; + + /* task representing the current "inherited" task + * priority, assigned by inherit_priority and + * return priority in the scheduler plugins. + * could point to self if PI does not result in + * an increased task priority. + */ + struct task_struct* inh_task; + + /* Don't just dereference this pointer in kernel space! + * It might very well point to junk or nothing at all. + * NULL indicates that the task has not requested any non-preemptable + * section support. + * Not inherited upon fork. + */ + short* np_flag; + + /* re-use unused counter in plugins that don't need it */ + union { + /* For the FMLP under PSN-EDF, it is required to make the task + * non-preemptive from kernel space. In order not to interfere with + * user space, this counter indicates the kernel space np setting. + * kernel_np > 0 => task is non-preemptive + */ + unsigned int kernel_np; + + /* Used by GQ-EDF */ + unsigned int last_cpu; + }; + + /* This field can be used by plugins to store where the task + * is currently scheduled. It is the responsibility of the + * plugin to avoid race conditions. + * + * This used by GSN-EDF and PFAIR. + */ + volatile int scheduled_on; + + /* Is the stack of the task currently in use? This is updated by + * the LITMUS core. + * + * Be careful to avoid deadlocks! + */ + volatile int stack_in_use; + + /* This field can be used by plugins to store where the task + * is currently linked. It is the responsibility of the plugin + * to avoid race conditions. + * + * Used by GSN-EDF. + */ + volatile int linked_on; + + /* PFAIR/PD^2 state. Allocated on demand. */ + struct pfair_param* pfair; + + /* Fields saved before BE->RT transition. + */ + int old_policy; + int old_prio; + + /* ready queue for this task */ + struct _rt_domain* domain; + + /* heap element for this task + * + * Warning: Don't statically allocate this node. The heap + * implementation swaps these between tasks, thus after + * dequeuing from a heap you may end up with a different node + * then the one you had when enqueuing the task. For the same + * reason, don't obtain and store references to this node + * other than this pointer (which is updated by the heap + * implementation). + */ + struct heap_node* heap_node; + struct release_heap* rel_heap; + + /* Used by rt_domain to queue task in release list. + */ + struct list_head list; +}; + +/* Possible RT flags */ +#define RT_F_RUNNING 0x00000000 +#define RT_F_SLEEP 0x00000001 +#define RT_F_EXIT_SEM 0x00000008 + +#endif + +#endif diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h new file mode 100644 index 000000000000..94952f6ccbfa --- /dev/null +++ b/include/litmus/sched_plugin.h @@ -0,0 +1,159 @@ +/* + * Definition of the scheduler plugin interface. + * + */ +#ifndef _LINUX_SCHED_PLUGIN_H_ +#define _LINUX_SCHED_PLUGIN_H_ + +#include + +/* struct for semaphore with priority inheritance */ +struct pi_semaphore { + atomic_t count; + int sleepers; + wait_queue_head_t wait; + union { + /* highest-prio holder/waiter */ + struct task_struct *task; + struct task_struct* cpu_task[NR_CPUS]; + } hp; + /* current lock holder */ + struct task_struct *holder; +}; + +/************************ setup/tear down ********************/ + +typedef long (*activate_plugin_t) (void); +typedef long (*deactivate_plugin_t) (void); + + + +/********************* scheduler invocation ******************/ + +/* Plugin-specific realtime tick handler */ +typedef void (*scheduler_tick_t) (struct task_struct *cur); +/* Novell make sched decision function */ +typedef struct task_struct* (*schedule_t)(struct task_struct * prev); +/* Clean up after the task switch has occured. + * This function is called after every (even non-rt) task switch. + */ +typedef void (*finish_switch_t)(struct task_struct *prev); + + +/********************* task state changes ********************/ + +/* Called to setup a new real-time task. + * Release the first job, enqueue, etc. + * Task may already be running. + */ +typedef void (*task_new_t) (struct task_struct *task, + int on_rq, + int running); + +/* Called to re-introduce a task after blocking. + * Can potentially be called multiple times. + */ +typedef void (*task_wake_up_t) (struct task_struct *task); +/* called to notify the plugin of a blocking real-time task + * it will only be called for real-time tasks and before schedule is called */ +typedef void (*task_block_t) (struct task_struct *task); +/* Called when a real-time task exits or changes to a different scheduling + * class. + * Free any allocated resources + */ +typedef void (*task_exit_t) (struct task_struct *); + +/* Called when the new_owner is released from the wait queue + * it should now inherit the priority from sem, _before_ it gets readded + * to any queue + */ +typedef long (*inherit_priority_t) (struct pi_semaphore *sem, + struct task_struct *new_owner); + +/* Called when the current task releases a semahpore where it might have + * inherited a piority from + */ +typedef long (*return_priority_t) (struct pi_semaphore *sem); + +/* Called when a task tries to acquire a semaphore and fails. Check if its + * priority is higher than that of the current holder. + */ +typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t); + + + + +/********************* sys call backends ********************/ +/* This function causes the caller to sleep until the next release */ +typedef long (*complete_job_t) (void); + +typedef long (*admit_task_t)(struct task_struct* tsk); + +typedef void (*release_at_t)(struct task_struct *t, lt_t start); + +struct sched_plugin { + struct list_head list; + /* basic info */ + char *plugin_name; + + /* setup */ + activate_plugin_t activate_plugin; + deactivate_plugin_t deactivate_plugin; + +#ifdef CONFIG_SRP + unsigned int srp_active; +#endif + + /* scheduler invocation */ + scheduler_tick_t tick; + schedule_t schedule; + finish_switch_t finish_switch; + + /* syscall backend */ + complete_job_t complete_job; + release_at_t release_at; + + /* task state changes */ + admit_task_t admit_task; + + task_new_t task_new; + task_wake_up_t task_wake_up; + task_block_t task_block; + task_exit_t task_exit; + +#ifdef CONFIG_FMLP + /* priority inheritance */ + unsigned int fmlp_active; + inherit_priority_t inherit_priority; + return_priority_t return_priority; + pi_block_t pi_block; +#endif +} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); + + +extern struct sched_plugin *litmus; + +int register_sched_plugin(struct sched_plugin* plugin); +struct sched_plugin* find_sched_plugin(const char* name); +int print_sched_plugins(char* buf, int max); + +static inline int srp_active(void) +{ +#ifdef CONFIG_SRP + return litmus->srp_active; +#else + return 0; +#endif +} +static inline int fmlp_active(void) +{ +#ifdef CONFIG_FMLP + return litmus->fmlp_active; +#else + return 0; +#endif +} + +extern struct sched_plugin linux_sched_plugin; + +#endif diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h new file mode 100644 index 000000000000..afd0391d127b --- /dev/null +++ b/include/litmus/sched_trace.h @@ -0,0 +1,191 @@ +/* sched_trace.h -- record scheduler events to a byte stream for offline analysis. + */ +#ifndef _LINUX_SCHED_TRACE_H_ +#define _LINUX_SCHED_TRACE_H_ + +/* all times in nanoseconds */ + +struct st_trace_header { + u8 type; /* Of what type is this record? */ + u8 cpu; /* On which CPU was it recorded? */ + u16 pid; /* PID of the task. */ + u32 job; /* The job sequence number. */ +}; + +#define ST_NAME_LEN 16 +struct st_name_data { + char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ +}; + +struct st_param_data { /* regular params */ + u32 wcet; + u32 period; + u32 phase; + u8 partition; + u8 __unused[3]; +}; + +struct st_release_data { /* A job is was/is going to be released. */ + u64 release; /* What's the release time? */ + u64 deadline; /* By when must it finish? */ +}; + +struct st_assigned_data { /* A job was asigned to a CPU. */ + u64 when; + u8 target; /* Where should it execute? */ + u8 __unused[3]; +}; + +struct st_switch_to_data { /* A process was switched to on a given CPU. */ + u64 when; /* When did this occur? */ + u32 exec_time; /* Time the current job has executed. */ + +}; + +struct st_switch_away_data { /* A process was switched away from on a given CPU. */ + u64 when; + u64 exec_time; +}; + +struct st_completion_data { /* A job completed. */ + u64 when; + u8 forced:1; /* Set to 1 if job overran and kernel advanced to the + * next task automatically; set to 0 otherwise. + */ + u8 __uflags:7; + u8 __unused[3]; +}; + +struct st_block_data { /* A task blocks. */ + u64 when; + u64 __unused; +}; + +struct st_resume_data { /* A task resumes. */ + u64 when; + u64 __unused; +}; + +struct st_sys_release_data { + u64 when; + u64 release; +}; + +#define DATA(x) struct st_ ## x ## _data x; + +typedef enum { + ST_NAME = 1, /* Start at one, so that we can spot + * uninitialized records. */ + ST_PARAM, + ST_RELEASE, + ST_ASSIGNED, + ST_SWITCH_TO, + ST_SWITCH_AWAY, + ST_COMPLETION, + ST_BLOCK, + ST_RESUME, + ST_SYS_RELEASE, +} st_event_record_type_t; + +struct st_event_record { + struct st_trace_header hdr; + union { + u64 raw[2]; + + DATA(name); + DATA(param); + DATA(release); + DATA(assigned); + DATA(switch_to); + DATA(switch_away); + DATA(completion); + DATA(block); + DATA(resume); + DATA(sys_release); + + } data; +}; + +#undef DATA + +#ifdef __KERNEL__ + +#include +#include + +#ifdef CONFIG_SCHED_TASK_TRACE + +#define SCHED_TRACE(id, callback, task) \ + ft_event1(id, callback, task) +#define SCHED_TRACE2(id, callback, task, xtra) \ + ft_event2(id, callback, task, xtra) + +/* provide prototypes; needed on sparc64 */ +#ifndef NO_TASK_TRACE_DECLS +feather_callback void do_sched_trace_task_name(unsigned long id, + struct task_struct* task); +feather_callback void do_sched_trace_task_param(unsigned long id, + struct task_struct* task); +feather_callback void do_sched_trace_task_release(unsigned long id, + struct task_struct* task); +feather_callback void do_sched_trace_task_switch_to(unsigned long id, + struct task_struct* task); +feather_callback void do_sched_trace_task_switch_away(unsigned long id, + struct task_struct* task); +feather_callback void do_sched_trace_task_completion(unsigned long id, + struct task_struct* task, + unsigned long forced); +feather_callback void do_sched_trace_task_block(unsigned long id, + struct task_struct* task); +feather_callback void do_sched_trace_task_resume(unsigned long id, + struct task_struct* task); +feather_callback void do_sched_trace_sys_release(unsigned long id, + lt_t* start); +#endif + +#else + +#define SCHED_TRACE(id, callback, task) /* no tracing */ +#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ + +#endif + + +#define SCHED_TRACE_BASE_ID 500 + + +#define sched_trace_task_name(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t) +#define sched_trace_task_param(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t) +#define sched_trace_task_release(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t) +#define sched_trace_task_switch_to(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t) +#define sched_trace_task_switch_away(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t) +#define sched_trace_task_completion(t, forced) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \ + forced) +#define sched_trace_task_block(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t) +#define sched_trace_task_resume(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t) + +#define sched_trace_sys_release(when) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 9, do_sched_trace_sys_release, when) + +#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ + +#ifdef CONFIG_SCHED_DEBUG_TRACE +void sched_trace_log_message(const char* fmt, ...); +void dump_trace_buffer(int max); +#else + +#define sched_trace_log_message(fmt, ...) + +#endif + +#endif /* __KERNEL__ */ + +#endif diff --git a/include/litmus/trace.h b/include/litmus/trace.h new file mode 100644 index 000000000000..e8e0c7b6cc6a --- /dev/null +++ b/include/litmus/trace.h @@ -0,0 +1,113 @@ +#ifndef _SYS_TRACE_H_ +#define _SYS_TRACE_H_ + +#ifdef CONFIG_SCHED_OVERHEAD_TRACE + +#include +#include + + +/*********************** TIMESTAMPS ************************/ + +enum task_type_marker { + TSK_BE, + TSK_RT, + TSK_UNKNOWN +}; + +struct timestamp { + uint64_t timestamp; + uint32_t seq_no; + uint8_t cpu; + uint8_t event; + uint8_t task_type; +}; + +/* tracing callbacks */ +feather_callback void save_timestamp(unsigned long event); +feather_callback void save_timestamp_def(unsigned long event, unsigned long type); +feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr); +feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu); + + +#define TIMESTAMP(id) ft_event0(id, save_timestamp) + +#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, def) + +#define TTIMESTAMP(id, task) \ + ft_event1(id, save_timestamp_task, (unsigned long) task) + +#define CTIMESTAMP(id, cpu) \ + ft_event1(id, save_timestamp_cpu, cpu) + +#else /* !CONFIG_SCHED_OVERHEAD_TRACE */ + +#define TIMESTAMP(id) /* no tracing */ + +#define DTIMESTAMP(id, def) /* no tracing */ + +#define TTIMESTAMP(id, task) /* no tracing */ + +#define CTIMESTAMP(id, cpu) /* no tracing */ + +#endif + + +/* Convention for timestamps + * ========================= + * + * In order to process the trace files with a common tool, we use the following + * convention to measure execution times: The end time id of a code segment is + * always the next number after the start time event id. + */ + +#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only + * care + * about + * next */ +#define TS_SCHED_END(t) TTIMESTAMP(101, t) +#define TS_SCHED2_START(t) TTIMESTAMP(102, t) +#define TS_SCHED2_END(t) TTIMESTAMP(103, t) + +#define TS_CXS_START(t) TTIMESTAMP(104, t) +#define TS_CXS_END(t) TTIMESTAMP(105, t) + +#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT) +#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT) + +#define TS_TICK_START(t) TTIMESTAMP(110, t) +#define TS_TICK_END(t) TTIMESTAMP(111, t) + + +#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ +#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ + +#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */ +#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */ + +#define TS_ENTER_NP_START TIMESTAMP(140) +#define TS_ENTER_NP_END TIMESTAMP(141) + +#define TS_EXIT_NP_START TIMESTAMP(150) +#define TS_EXIT_NP_END TIMESTAMP(151) + +#define TS_SRP_UP_START TIMESTAMP(160) +#define TS_SRP_UP_END TIMESTAMP(161) +#define TS_SRP_DOWN_START TIMESTAMP(162) +#define TS_SRP_DOWN_END TIMESTAMP(163) + +#define TS_PI_UP_START TIMESTAMP(170) +#define TS_PI_UP_END TIMESTAMP(171) +#define TS_PI_DOWN_START TIMESTAMP(172) +#define TS_PI_DOWN_END TIMESTAMP(173) + +#define TS_FIFO_UP_START TIMESTAMP(180) +#define TS_FIFO_UP_END TIMESTAMP(181) +#define TS_FIFO_DOWN_START TIMESTAMP(182) +#define TS_FIFO_DOWN_END TIMESTAMP(183) + +#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) +#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) + + +#endif /* !_SYS_TRACE_H_ */ diff --git a/kernel/fork.c b/kernel/fork.c index 166b8c49257c..889730cce3ad 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -74,6 +74,9 @@ #include +#include +#include + /* * Protected counters by write_lock_irq(&tasklist_lock) */ @@ -162,6 +165,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); + exit_litmus(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); @@ -244,6 +248,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->stack = ti; + /* Don't let the new task be a real-time task. */ + memset(&tsk->rt_param, 0, sizeof(struct rt_task)); + err = prop_local_init_single(&tsk->dirties); if (err) goto out; diff --git a/kernel/sched.c b/kernel/sched.c index 3c11ae0a948d..fcaed6b96442 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -77,6 +77,9 @@ #include "sched_cpupri.h" +#include +#include + #define CREATE_TRACE_POINTS #include @@ -571,6 +574,8 @@ struct rq { atomic_t nr_iowait; + struct task_struct *litmus_next; + #ifdef CONFIG_SMP struct root_domain *rd; struct sched_domain *sd; @@ -1815,11 +1820,12 @@ static void calc_load_account_active(struct rq *this_rq); #include "sched_idletask.c" #include "sched_fair.c" #include "sched_rt.c" +#include "../litmus/sched_litmus.c" #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" #endif -#define sched_class_highest (&rt_sched_class) +#define sched_class_highest (&litmus_sched_class) #define for_each_class(class) \ for (class = sched_class_highest; class; class = class->next) @@ -2343,6 +2349,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, unsigned long flags; struct rq *rq, *orig_rq; + if (is_realtime(p)) + TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); + if (!sched_feat(SYNC_WAKEUPS)) wake_flags &= ~WF_SYNC; @@ -2361,7 +2370,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, orig_cpu = cpu; #ifdef CONFIG_SMP - if (unlikely(task_running(rq, p))) + if (unlikely(task_running(rq, p)) || is_realtime(p)) goto out_activate; /* @@ -2442,6 +2451,8 @@ out_running: p->sched_class->task_wake_up(rq, p); #endif out: + if (is_realtime(p)) + TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state); task_rq_unlock(rq, &flags); put_cpu(); @@ -2750,6 +2761,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); + litmus->finish_switch(prev); + prev->rt_param.stack_in_use = NO_CPU; perf_event_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); @@ -5232,18 +5245,31 @@ void scheduler_tick(void) sched_clock_tick(); + TS_TICK_START(current); + spin_lock(&rq->lock); update_rq_clock(rq); update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); + + /* + * LITMUS_TODO: can we move litmus_tick inside task_tick + * or will deadlock ? + */ + TS_PLUGIN_TICK_START; + litmus_tick(rq, curr); + TS_PLUGIN_TICK_END; + spin_unlock(&rq->lock); perf_event_task_tick(curr, cpu); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); - trigger_load_balance(rq, cpu); + if (!is_realtime(current)) + trigger_load_balance(rq, cpu); #endif + TS_TICK_END(current); } notrace unsigned long get_parent_ip(unsigned long addr) @@ -5387,11 +5413,17 @@ pick_next_task(struct rq *rq) * Optimization: we know that if all tasks are in * the fair class we can call that function directly: */ + /* + * LITMUS_TODO: can we move processes out of fair class? + * i.e., create a litmus_rq + */ + /* Don't do this for LITMUS if (likely(rq->nr_running == rq->cfs.nr_running)) { p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; } + */ class = sched_class_highest; for ( ; ; ) { @@ -5426,6 +5458,8 @@ need_resched: release_kernel_lock(prev); need_resched_nonpreemptible: + TS_SCHED_START; + sched_trace_task_switch_away(prev); schedule_debug(prev); @@ -5436,6 +5470,14 @@ need_resched_nonpreemptible: update_rq_clock(rq); clear_tsk_need_resched(prev); + /* + * LITMUS_TODO: can we integrate litmus_schedule in + * pick_next_task? + */ + TS_PLUGIN_SCHED_START; + litmus_schedule(rq, prev); + TS_PLUGIN_SCHED_END; + if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) prev->state = TASK_RUNNING; @@ -5460,22 +5502,35 @@ need_resched_nonpreemptible: rq->curr = next; ++*switch_count; + TS_SCHED_END(next); + TS_CXS_START(next); context_switch(rq, prev, next); /* unlocks the rq */ + TS_CXS_END(current); /* * the context switch might have flipped the stack from under * us, hence refresh the local variables. */ cpu = smp_processor_id(); rq = cpu_rq(cpu); - } else + } else { + TS_SCHED_END(prev); spin_unlock_irq(&rq->lock); + } + + TS_SCHED2_START(current); + sched_trace_task_switch_to(current); post_schedule(rq); - if (unlikely(reacquire_kernel_lock(current) < 0)) + if (unlikely(reacquire_kernel_lock(current) < 0)) { + TS_SCHED2_END(current); goto need_resched_nonpreemptible; + } preempt_enable_no_resched(); + + TS_SCHED2_END(current); + if (need_resched()) goto need_resched; } @@ -6185,6 +6240,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) case SCHED_RR: p->sched_class = &rt_sched_class; break; + case SCHED_LITMUS: + p->sched_class = &litmus_sched_class; + break; } p->rt_priority = prio; @@ -6232,7 +6290,7 @@ recheck: if (policy != SCHED_FIFO && policy != SCHED_RR && policy != SCHED_NORMAL && policy != SCHED_BATCH && - policy != SCHED_IDLE) + policy != SCHED_IDLE && policy != SCHED_LITMUS) return -EINVAL; } @@ -6247,6 +6305,8 @@ recheck: return -EINVAL; if (rt_policy(policy) != (param->sched_priority != 0)) return -EINVAL; + if (policy == SCHED_LITMUS && policy == p->policy) + return -EINVAL; /* * Allow unprivileged RT tasks to decrease priority: @@ -6301,6 +6361,12 @@ recheck: return retval; } + if (policy == SCHED_LITMUS) { + retval = litmus_admit_task(p); + if (retval) + return retval; + } + /* * make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: @@ -6328,9 +6394,18 @@ recheck: p->sched_reset_on_fork = reset_on_fork; + if (p->policy == SCHED_LITMUS) + litmus_exit_task(p); + oldprio = p->prio; __setscheduler(rq, p, policy, param->sched_priority); + if (policy == SCHED_LITMUS) { + p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU; + p->rt_param.present = running; + litmus->task_new(p, on_rq, running); + } + if (running) p->sched_class->set_curr_task(rq); if (on_rq) { @@ -6500,10 +6575,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) read_lock(&tasklist_lock); p = find_process_by_pid(pid); - if (!p) { + /* Don't set affinity if task not found and for LITMUS tasks */ + if (!p || is_realtime(p)) { read_unlock(&tasklist_lock); put_online_cpus(); - return -ESRCH; + return p ? -EPERM : -ESRCH; } /* diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 37087a7fac22..ef43ff95999d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1598,7 +1598,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ update_curr(cfs_rq); - if (unlikely(rt_prio(p->prio))) { + if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS) { resched_task(curr); return; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index a4d790cddb19..f622880e918f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1004,7 +1004,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) */ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) { - if (p->prio < rq->curr->prio) { + if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) { resched_task(rq->curr); return; } diff --git a/litmus/Kconfig b/litmus/Kconfig new file mode 100644 index 000000000000..f8c642658a2f --- /dev/null +++ b/litmus/Kconfig @@ -0,0 +1,50 @@ +menu "LITMUS^RT" + +menu "Tracing" + +config FEATHER_TRACE + bool "Feather-Trace Infrastructure" + default y + help + Feather-Trace basic tracing infrastructure. Includes device file + driver and instrumentation point support. + + +config SCHED_TASK_TRACE + bool "Trace real-time tasks" + depends on FEATHER_TRACE + default y + help + Include support for the sched_trace_XXX() tracing functions. This + allows the collection of real-time task events such as job + completions, job releases, early completions, etc. This results in a + small overhead in the scheduling code. Disable if the overhead is not + acceptable (e.g., benchmarking). + + Say Yes for debugging. + Say No for overhead tracing. + +config SCHED_OVERHEAD_TRACE + bool "Record timestamps for overhead measurements" + depends on FEATHER_TRACE + default n + help + Export event stream for overhead tracing. + Say Yes for overhead tracing. + +config SCHED_DEBUG_TRACE + bool "TRACE() debugging" + default y + help + Include support for sched_trace_log_messageg(), which is used to + implement TRACE(). If disabled, no TRACE() messages will be included + in the kernel, and no overheads due to debugging statements will be + incurred by the scheduler. Disable if the overhead is not acceptable + (e.g. benchmarking). + + Say Yes for debugging. + Say No for overhead tracing. + +endmenu + +endmenu diff --git a/litmus/Makefile b/litmus/Makefile new file mode 100644 index 000000000000..f4c2d564cd0b --- /dev/null +++ b/litmus/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for LITMUS^RT +# + +obj-y = sched_plugin.o litmus.o \ + jobs.o \ + heap.o + +obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o +obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o +obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o +obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o diff --git a/litmus/ft_event.c b/litmus/ft_event.c new file mode 100644 index 000000000000..6084b6d6b364 --- /dev/null +++ b/litmus/ft_event.c @@ -0,0 +1,43 @@ +#include + +#include + +#ifndef __ARCH_HAS_FEATHER_TRACE +/* provide dummy implementation */ + +int ft_events[MAX_EVENTS]; + +int ft_enable_event(unsigned long id) +{ + if (id < MAX_EVENTS) { + ft_events[id]++; + return 1; + } else + return 0; +} + +int ft_disable_event(unsigned long id) +{ + if (id < MAX_EVENTS && ft_events[id]) { + ft_events[id]--; + return 1; + } else + return 0; +} + +int ft_disable_all_events(void) +{ + int i; + + for (i = 0; i < MAX_EVENTS; i++) + ft_events[i] = 0; + + return MAX_EVENTS; +} + +int ft_is_event_enabled(unsigned long id) +{ + return id < MAX_EVENTS && ft_events[id]; +} + +#endif diff --git a/litmus/heap.c b/litmus/heap.c new file mode 100644 index 000000000000..112d14da46c3 --- /dev/null +++ b/litmus/heap.c @@ -0,0 +1,314 @@ +#include "linux/kernel.h" +#include "litmus/heap.h" + +void heap_init(struct heap* heap) +{ + heap->head = NULL; + heap->min = NULL; +} + +void heap_node_init(struct heap_node** _h, void* value) +{ + struct heap_node* h = *_h; + h->parent = NULL; + h->next = NULL; + h->child = NULL; + h->degree = NOT_IN_HEAP; + h->value = value; + h->ref = _h; +} + + +/* make child a subtree of root */ +static void __heap_link(struct heap_node* root, + struct heap_node* child) +{ + child->parent = root; + child->next = root->child; + root->child = child; + root->degree++; +} + +/* merge root lists */ +static struct heap_node* __heap_merge(struct heap_node* a, + struct heap_node* b) +{ + struct heap_node* head = NULL; + struct heap_node** pos = &head; + + while (a && b) { + if (a->degree < b->degree) { + *pos = a; + a = a->next; + } else { + *pos = b; + b = b->next; + } + pos = &(*pos)->next; + } + if (a) + *pos = a; + else + *pos = b; + return head; +} + +/* reverse a linked list of nodes. also clears parent pointer */ +static struct heap_node* __heap_reverse(struct heap_node* h) +{ + struct heap_node* tail = NULL; + struct heap_node* next; + + if (!h) + return h; + + h->parent = NULL; + while (h->next) { + next = h->next; + h->next = tail; + tail = h; + h = next; + h->parent = NULL; + } + h->next = tail; + return h; +} + +static void __heap_min(heap_prio_t higher_prio, struct heap* heap, + struct heap_node** prev, struct heap_node** node) +{ + struct heap_node *_prev, *cur; + *prev = NULL; + + if (!heap->head) { + *node = NULL; + return; + } + + *node = heap->head; + _prev = heap->head; + cur = heap->head->next; + while (cur) { + if (higher_prio(cur, *node)) { + *node = cur; + *prev = _prev; + } + _prev = cur; + cur = cur->next; + } +} + +static void __heap_union(heap_prio_t higher_prio, struct heap* heap, + struct heap_node* h2) +{ + struct heap_node* h1; + struct heap_node *prev, *x, *next; + if (!h2) + return; + h1 = heap->head; + if (!h1) { + heap->head = h2; + return; + } + h1 = __heap_merge(h1, h2); + prev = NULL; + x = h1; + next = x->next; + while (next) { + if (x->degree != next->degree || + (next->next && next->next->degree == x->degree)) { + /* nothing to do, advance */ + prev = x; + x = next; + } else if (higher_prio(x, next)) { + /* x becomes the root of next */ + x->next = next->next; + __heap_link(x, next); + } else { + /* next becomes the root of x */ + if (prev) + prev->next = next; + else + h1 = next; + __heap_link(next, x); + x = next; + } + next = x->next; + } + heap->head = h1; +} + +static struct heap_node* __heap_extract_min(heap_prio_t higher_prio, + struct heap* heap) +{ + struct heap_node *prev, *node; + __heap_min(higher_prio, heap, &prev, &node); + if (!node) + return NULL; + if (prev) + prev->next = node->next; + else + heap->head = node->next; + __heap_union(higher_prio, heap, __heap_reverse(node->child)); + return node; +} + +/* insert (and reinitialize) a node into the heap */ +void heap_insert(heap_prio_t higher_prio, struct heap* heap, + struct heap_node* node) +{ + struct heap_node *min; + node->child = NULL; + node->parent = NULL; + node->next = NULL; + node->degree = 0; + if (heap->min && higher_prio(node, heap->min)) { + /* swap min cache */ + min = heap->min; + min->child = NULL; + min->parent = NULL; + min->next = NULL; + min->degree = 0; + __heap_union(higher_prio, heap, min); + heap->min = node; + } else + __heap_union(higher_prio, heap, node); +} + +void heap_uncache_min(heap_prio_t higher_prio, struct heap* heap) +{ + struct heap_node* min; + if (heap->min) { + min = heap->min; + heap->min = NULL; + heap_insert(higher_prio, heap, min); + } +} + +/* merge addition into target */ +void heap_union(heap_prio_t higher_prio, + struct heap* target, struct heap* addition) +{ + /* first insert any cached minima, if necessary */ + heap_uncache_min(higher_prio, target); + heap_uncache_min(higher_prio, addition); + __heap_union(higher_prio, target, addition->head); + /* this is a destructive merge */ + addition->head = NULL; +} + +struct heap_node* heap_peek(heap_prio_t higher_prio, + struct heap* heap) +{ + if (!heap->min) + heap->min = __heap_extract_min(higher_prio, heap); + return heap->min; +} + +struct heap_node* heap_take(heap_prio_t higher_prio, + struct heap* heap) +{ + struct heap_node *node; + if (!heap->min) + heap->min = __heap_extract_min(higher_prio, heap); + node = heap->min; + heap->min = NULL; + if (node) + node->degree = NOT_IN_HEAP; + return node; +} + +int heap_decrease(heap_prio_t higher_prio, struct heap_node* node) +{ + struct heap_node *parent; + struct heap_node** tmp_ref; + void* tmp; + + /* bubble up */ + parent = node->parent; + while (parent && higher_prio(node, parent)) { + /* swap parent and node */ + tmp = parent->value; + parent->value = node->value; + node->value = tmp; + /* swap references */ + *(parent->ref) = node; + *(node->ref) = parent; + tmp_ref = parent->ref; + parent->ref = node->ref; + node->ref = tmp_ref; + /* step up */ + node = parent; + parent = node->parent; + } + + return parent != NULL; +} + +void heap_delete(heap_prio_t higher_prio, struct heap* heap, + struct heap_node* node) +{ + struct heap_node *parent, *prev, *pos; + struct heap_node** tmp_ref; + void* tmp; + + if (heap->min != node) { + /* bubble up */ + parent = node->parent; + while (parent) { + /* swap parent and node */ + tmp = parent->value; + parent->value = node->value; + node->value = tmp; + /* swap references */ + *(parent->ref) = node; + *(node->ref) = parent; + tmp_ref = parent->ref; + parent->ref = node->ref; + node->ref = tmp_ref; + /* step up */ + node = parent; + parent = node->parent; + } + /* now delete: + * first find prev */ + prev = NULL; + pos = heap->head; + while (pos != node) { + prev = pos; + pos = pos->next; + } + /* we have prev, now remove node */ + if (prev) + prev->next = node->next; + else + heap->head = node->next; + __heap_union(higher_prio, heap, __heap_reverse(node->child)); + } else + heap->min = NULL; + node->degree = NOT_IN_HEAP; +} + +/* allocate a heap node for value and insert into the heap */ +int heap_add(heap_prio_t higher_prio, struct heap* heap, + void* value, int gfp_flags) +{ + struct heap_node* hn = heap_node_alloc(gfp_flags); + if (likely(hn)) { + heap_node_init(&hn, value); + heap_insert(higher_prio, heap, hn); + } + return hn != NULL; +} + +void* heap_take_del(heap_prio_t higher_prio, + struct heap* heap) +{ + struct heap_node* hn = heap_take(higher_prio, heap); + void* ret = NULL; + if (hn) { + ret = hn->value; + heap_node_free(hn); + } + return ret; +} diff --git a/litmus/jobs.c b/litmus/jobs.c new file mode 100644 index 000000000000..36e314625d86 --- /dev/null +++ b/litmus/jobs.c @@ -0,0 +1,43 @@ +/* litmus/jobs.c - common job control code + */ + +#include + +#include +#include + +void prepare_for_next_period(struct task_struct *t) +{ + BUG_ON(!t); + /* prepare next release */ + t->rt_param.job_params.release = t->rt_param.job_params.deadline; + t->rt_param.job_params.deadline += get_rt_period(t); + t->rt_param.job_params.exec_time = 0; + /* update job sequence number */ + t->rt_param.job_params.job_no++; + + /* don't confuse Linux */ + t->rt.time_slice = 1; +} + +void release_at(struct task_struct *t, lt_t start) +{ + t->rt_param.job_params.deadline = start; + prepare_for_next_period(t); + set_rt_flags(t, RT_F_RUNNING); +} + + +/* + * Deactivate current task until the beginning of the next period. + */ +long complete_job(void) +{ + /* Mark that we do not excute anymore */ + set_rt_flags(current, RT_F_SLEEP); + /* call schedule, this will return when a new job arrives + * it also takes care of preparing for the next release + */ + schedule(); + return 0; +} diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 000000000000..eb0d17e298d7 --- /dev/null +++ b/litmus/litmus.c @@ -0,0 +1,654 @@ +/* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization code, + * and the procfs interface.. + */ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include + +/* Number of RT tasks that exist in the system */ +atomic_t rt_task_count = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(task_transition_lock); + +/* Give log messages sequential IDs. */ +atomic_t __log_seq_no = ATOMIC_INIT(0); + +/* current master CPU for handling timer IRQs */ +atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); + +static struct kmem_cache * heap_node_cache; + +struct heap_node* heap_node_alloc(int gfp_flags) +{ + return kmem_cache_alloc(heap_node_cache, gfp_flags); +} + +void heap_node_free(struct heap_node* hn) +{ + kmem_cache_free(heap_node_cache, hn); +} + +/* + * sys_set_task_rt_param + * @pid: Pid of the task which scheduling parameters must be changed + * @param: New real-time extension parameters such as the execution cost and + * period + * Syscall for manipulating with task rt extension params + * Returns EFAULT if param is NULL. + * ESRCH if pid is not corrsponding + * to a valid task. + * EINVAL if either period or execution cost is <=0 + * EPERM if pid is a real-time task + * 0 if success + * + * Only non-real-time tasks may be configured with this system call + * to avoid races with the scheduler. In practice, this means that a + * task's parameters must be set _before_ calling sys_prepare_rt_task() + * + * find_task_by_vpid() assumes that we are in the same namespace of the + * target. + */ +asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) +{ + struct rt_task tp; + struct task_struct *target; + int retval = -EINVAL; + + printk("Setting up rt task parameters for process %d.\n", pid); + + if (pid < 0 || param == 0) { + goto out; + } + if (copy_from_user(&tp, param, sizeof(tp))) { + retval = -EFAULT; + goto out; + } + + /* Task search and manipulation must be protected */ + read_lock_irq(&tasklist_lock); + if (!(target = find_task_by_vpid(pid))) { + retval = -ESRCH; + goto out_unlock; + } + + if (is_realtime(target)) { + /* The task is already a real-time task. + * We cannot not allow parameter changes at this point. + */ + retval = -EBUSY; + goto out_unlock; + } + + if (tp.exec_cost <= 0) + goto out_unlock; + if (tp.period <= 0) + goto out_unlock; + if (!cpu_online(tp.cpu)) + goto out_unlock; + if (tp.period < tp.exec_cost) + { + printk(KERN_INFO "litmus: real-time task %d rejected " + "because wcet > period\n", pid); + goto out_unlock; + } + + target->rt_param.task_params = tp; + + retval = 0; + out_unlock: + read_unlock_irq(&tasklist_lock); + out: + return retval; +} + +/* + * Getter of task's RT params + * returns EINVAL if param or pid is NULL + * returns ESRCH if pid does not correspond to a valid task + * returns EFAULT if copying of parameters has failed. + * + * find_task_by_vpid() assumes that we are in the same namespace of the + * target. + */ +asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) +{ + int retval = -EINVAL; + struct task_struct *source; + struct rt_task lp; + if (param == 0 || pid < 0) + goto out; + read_lock(&tasklist_lock); + if (!(source = find_task_by_vpid(pid))) { + retval = -ESRCH; + goto out_unlock; + } + lp = source->rt_param.task_params; + read_unlock(&tasklist_lock); + /* Do copying outside the lock */ + retval = + copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; + return retval; + out_unlock: + read_unlock(&tasklist_lock); + out: + return retval; + +} + +/* + * This is the crucial function for periodic task implementation, + * It checks if a task is periodic, checks if such kind of sleep + * is permitted and calls plugin-specific sleep, which puts the + * task into a wait array. + * returns 0 on successful wakeup + * returns EPERM if current conditions do not permit such sleep + * returns EINVAL if current task is not able to go to sleep + */ +asmlinkage long sys_complete_job(void) +{ + int retval = -EPERM; + if (!is_realtime(current)) { + retval = -EINVAL; + goto out; + } + /* Task with negative or zero period cannot sleep */ + if (get_rt_period(current) <= 0) { + retval = -EINVAL; + goto out; + } + /* The plugin has to put the task into an + * appropriate queue and call schedule + */ + retval = litmus->complete_job(); + out: + return retval; +} + +/* This is an "improved" version of sys_complete_job that + * addresses the problem of unintentionally missing a job after + * an overrun. + * + * returns 0 on successful wakeup + * returns EPERM if current conditions do not permit such sleep + * returns EINVAL if current task is not able to go to sleep + */ +asmlinkage long sys_wait_for_job_release(unsigned int job) +{ + int retval = -EPERM; + if (!is_realtime(current)) { + retval = -EINVAL; + goto out; + } + + /* Task with negative or zero period cannot sleep */ + if (get_rt_period(current) <= 0) { + retval = -EINVAL; + goto out; + } + + retval = 0; + + /* first wait until we have "reached" the desired job + * + * This implementation has at least two problems: + * + * 1) It doesn't gracefully handle the wrap around of + * job_no. Since LITMUS is a prototype, this is not much + * of a problem right now. + * + * 2) It is theoretically racy if a job release occurs + * between checking job_no and calling sleep_next_period(). + * A proper solution would requiring adding another callback + * in the plugin structure and testing the condition with + * interrupts disabled. + * + * FIXME: At least problem 2 should be taken care of eventually. + */ + while (!retval && job > current->rt_param.job_params.job_no) + /* If the last job overran then job <= job_no and we + * don't send the task to sleep. + */ + retval = litmus->complete_job(); + out: + return retval; +} + +/* This is a helper syscall to query the current job sequence number. + * + * returns 0 on successful query + * returns EPERM if task is not a real-time task. + * returns EFAULT if &job is not a valid pointer. + */ +asmlinkage long sys_query_job_no(unsigned int __user *job) +{ + int retval = -EPERM; + if (is_realtime(current)) + retval = put_user(current->rt_param.job_params.job_no, job); + + return retval; +} + +/* sys_null_call() is only used for determining raw system call + * overheads (kernel entry, kernel exit). It has no useful side effects. + * If ts is non-NULL, then the current Feather-Trace time is recorded. + */ +asmlinkage long sys_null_call(cycles_t __user *ts) +{ + long ret = 0; + cycles_t now; + + if (ts) { + now = get_cycles(); + ret = put_user(now, ts); + } + + return ret; +} + +/* p is a real-time task. Re-init its state as a best-effort task. */ +static void reinit_litmus_state(struct task_struct* p, int restore) +{ + struct rt_task user_config = {}; + __user short *np_flag = NULL; + + if (restore) { + /* Safe user-space provided configuration data. */ + user_config = p->rt_param.task_params; + np_flag = p->rt_param.np_flag; + } + + /* We probably should not be inheriting any task's priority + * at this point in time. + */ + WARN_ON(p->rt_param.inh_task); + + /* We need to restore the priority of the task. */ +// __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio); + + /* Cleanup everything else. */ + memset(&p->rt_param, 0, sizeof(user_config)); + + /* Restore preserved fields. */ + if (restore) { + p->rt_param.task_params = user_config; + p->rt_param.np_flag = np_flag; + } +} + +long litmus_admit_task(struct task_struct* tsk) +{ + long retval = 0; + unsigned long flags; + + BUG_ON(is_realtime(tsk)); + + if (get_rt_period(tsk) == 0 || + get_exec_cost(tsk) > get_rt_period(tsk)) { + TRACE_TASK(tsk, "litmus admit: invalid task parameters " + "(%lu, %lu)\n", + get_exec_cost(tsk), get_rt_period(tsk)); + return -EINVAL; + } + + if (!cpu_online(get_partition(tsk))) + { + TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", + get_partition(tsk)); + return -EINVAL; + } + + INIT_LIST_HEAD(&tsk_rt(tsk)->list); + + /* avoid scheduler plugin changing underneath us */ + spin_lock_irqsave(&task_transition_lock, flags); + + /* allocate heap node for this task */ + tsk_rt(tsk)->heap_node = heap_node_alloc(GFP_ATOMIC); + if (!tsk_rt(tsk)->heap_node || + !tsk_rt(tsk)->rel_heap) { + printk(KERN_WARNING "litmus: no more heap node memory!?\n"); + retval = -ENOMEM; + heap_node_free(tsk_rt(tsk)->heap_node); + } else + heap_node_init(&tsk_rt(tsk)->heap_node, tsk); + + if (!retval) + retval = litmus->admit_task(tsk); + + if (!retval) { + sched_trace_task_name(tsk); + sched_trace_task_param(tsk); + atomic_inc(&rt_task_count); + } + + spin_unlock_irqrestore(&task_transition_lock, flags); + + return retval; +} + +void litmus_exit_task(struct task_struct* tsk) +{ + if (is_realtime(tsk)) { + sched_trace_task_completion(tsk, 1); + litmus->task_exit(tsk); + BUG_ON(heap_node_in_heap(tsk_rt(tsk)->heap_node)); + heap_node_free(tsk_rt(tsk)->heap_node); + atomic_dec(&rt_task_count); + reinit_litmus_state(tsk, 1); + } +} + +/* Switching a plugin in use is tricky. + * We must watch out that no real-time tasks exists + * (and that none is created in parallel) and that the plugin is not + * currently in use on any processor (in theory). + * + * For now, we don't enforce the second part since it is unlikely to cause + * any trouble by itself as long as we don't unload modules. + */ +int switch_sched_plugin(struct sched_plugin* plugin) +{ + unsigned long flags; + int ret = 0; + + BUG_ON(!plugin); + + /* stop task transitions */ + spin_lock_irqsave(&task_transition_lock, flags); + + /* don't switch if there are active real-time tasks */ + if (atomic_read(&rt_task_count) == 0) { + ret = litmus->deactivate_plugin(); + if (0 != ret) + goto out; + ret = plugin->activate_plugin(); + if (0 != ret) { + printk(KERN_INFO "Can't activate %s (%d).\n", + plugin->plugin_name, ret); + plugin = &linux_sched_plugin; + } + printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); + litmus = plugin; + } else + ret = -EBUSY; +out: + spin_unlock_irqrestore(&task_transition_lock, flags); + return ret; +} + +/* Called upon fork. + * p is the newly forked task. + */ +void litmus_fork(struct task_struct* p) +{ + if (is_realtime(p)) + /* clean out any litmus related state, don't preserve anything*/ + reinit_litmus_state(p, 0); +} + +/* Called upon execve(). + * current is doing the exec. + * Don't let address space specific stuff leak. + */ +void litmus_exec(void) +{ + struct task_struct* p = current; + + if (is_realtime(p)) { + WARN_ON(p->rt_param.inh_task); + p->rt_param.np_flag = NULL; + } +} + +void exit_litmus(struct task_struct *dead_tsk) +{ + if (is_realtime(dead_tsk)) + litmus_exit_task(dead_tsk); +} + + +#ifdef CONFIG_MAGIC_SYSRQ +int sys_kill(int pid, int sig); + +static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty) +{ + struct task_struct *t; + read_lock(&tasklist_lock); + for_each_process(t) { + if (is_realtime(t)) { + sys_kill(t->pid, SIGKILL); + } + } + read_unlock(&tasklist_lock); +} + +static struct sysrq_key_op sysrq_kill_rt_tasks_op = { + .handler = sysrq_handle_kill_rt_tasks, + .help_msg = "quit-rt-tasks(X)", + .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks", +}; + + +#endif + + +static int proc_read_stats(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + int len; + + len = snprintf(page, PAGE_SIZE, + "real-time tasks = %d\n" + "ready for release = %d\n", + atomic_read(&rt_task_count), + 0); + return len; +} + +static int proc_read_plugins(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + int len; + + len = print_sched_plugins(page, PAGE_SIZE); + return len; +} + +static int proc_read_curr(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + int len; + + len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name); + return len; +} + +static int proc_write_curr(struct file *file, + const char *buffer, + unsigned long count, + void *data) +{ + int len, ret; + char name[65]; + struct sched_plugin* found; + + if(count > 64) + len = 64; + else + len = count; + + if(copy_from_user(name, buffer, len)) + return -EFAULT; + + name[len] = '\0'; + /* chomp name */ + if (len > 1 && name[len - 1] == '\n') + name[len - 1] = '\0'; + + found = find_sched_plugin(name); + + if (found) { + ret = switch_sched_plugin(found); + if (ret != 0) + printk(KERN_INFO "Could not switch plugin: %d\n", ret); + } else + printk(KERN_INFO "Plugin '%s' is unknown.\n", name); + + return len; +} + + +static int proc_read_release_master(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + int len, master; + master = atomic_read(&release_master_cpu); + if (master == NO_CPU) + len = snprintf(page, PAGE_SIZE, "NO_CPU\n"); + else + len = snprintf(page, PAGE_SIZE, "%d\n", master); + return len; +} + +static int proc_write_release_master(struct file *file, + const char *buffer, + unsigned long count, + void *data) +{ + int cpu, err, online = 0; + char msg[64]; + + if (count > 63) + return -EINVAL; + + if (copy_from_user(msg, buffer, count)) + return -EFAULT; + + /* terminate */ + msg[count] = '\0'; + /* chomp */ + if (count > 1 && msg[count - 1] == '\n') + msg[count - 1] = '\0'; + + if (strcmp(msg, "NO_CPU") == 0) { + atomic_set(&release_master_cpu, NO_CPU); + return count; + } else { + err = sscanf(msg, "%d", &cpu); + if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) { + atomic_set(&release_master_cpu, cpu); + return count; + } else { + TRACE("invalid release master: '%s' " + "(err:%d cpu:%d online:%d)\n", + msg, err, cpu, online); + return -EINVAL; + } + } +} + +static struct proc_dir_entry *litmus_dir = NULL, + *curr_file = NULL, + *stat_file = NULL, + *plugs_file = NULL, + *release_master_file = NULL; + +static int __init init_litmus_proc(void) +{ + litmus_dir = proc_mkdir("litmus", NULL); + if (!litmus_dir) { + printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); + return -ENOMEM; + } + + curr_file = create_proc_entry("active_plugin", + 0644, litmus_dir); + if (!curr_file) { + printk(KERN_ERR "Could not allocate active_plugin " + "procfs entry.\n"); + return -ENOMEM; + } + curr_file->read_proc = proc_read_curr; + curr_file->write_proc = proc_write_curr; + + release_master_file = create_proc_entry("release_master", + 0644, litmus_dir); + if (!release_master_file) { + printk(KERN_ERR "Could not allocate release_master " + "procfs entry.\n"); + return -ENOMEM; + } + release_master_file->read_proc = proc_read_release_master; + release_master_file->write_proc = proc_write_release_master; + + stat_file = create_proc_read_entry("stats", 0444, litmus_dir, + proc_read_stats, NULL); + + plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir, + proc_read_plugins, NULL); + + return 0; +} + +static void exit_litmus_proc(void) +{ + if (plugs_file) + remove_proc_entry("plugins", litmus_dir); + if (stat_file) + remove_proc_entry("stats", litmus_dir); + if (curr_file) + remove_proc_entry("active_plugin", litmus_dir); + if (litmus_dir) + remove_proc_entry("litmus", NULL); +} + +extern struct sched_plugin linux_sched_plugin; + +static int __init _init_litmus(void) +{ + /* Common initializers, + * mode change lock is used to enforce single mode change + * operation. + */ + printk("Starting LITMUS^RT kernel\n"); + + register_sched_plugin(&linux_sched_plugin); + + heap_node_cache = KMEM_CACHE(heap_node, SLAB_PANIC); + +#ifdef CONFIG_MAGIC_SYSRQ + /* offer some debugging help */ + if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op)) + printk("Registered kill rt tasks magic sysrq.\n"); + else + printk("Could not register kill rt tasks magic sysrq.\n"); +#endif + + init_litmus_proc(); + + return 0; +} + +static void _exit_litmus(void) +{ + exit_litmus_proc(); + kmem_cache_destroy(heap_node_cache); +} + +module_init(_init_litmus); +module_exit(_exit_litmus); diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c new file mode 100644 index 000000000000..ccedd3670ac5 --- /dev/null +++ b/litmus/sched_litmus.c @@ -0,0 +1,275 @@ +/* This file is included from kernel/sched.c */ + +#include +#include + +static void update_time_litmus(struct rq *rq, struct task_struct *p) +{ + u64 delta = rq->clock - p->se.exec_start; + if (unlikely((s64)delta < 0)) + delta = 0; + /* per job counter */ + p->rt_param.job_params.exec_time += delta; + /* task counter */ + p->se.sum_exec_runtime += delta; + /* sched_clock() */ + p->se.exec_start = rq->clock; + cpuacct_charge(p, delta); +} + +static void double_rq_lock(struct rq *rq1, struct rq *rq2); +static void double_rq_unlock(struct rq *rq1, struct rq *rq2); + +static void litmus_tick(struct rq *rq, struct task_struct *p) +{ + if (is_realtime(p)) + update_time_litmus(rq, p); + litmus->tick(p); +} + +static void litmus_schedule(struct rq *rq, struct task_struct *prev) +{ + struct rq* other_rq; + long was_running; + lt_t _maybe_deadlock = 0; + /* WARNING: rq is _not_ locked! */ + if (is_realtime(prev)) { + update_time_litmus(rq, prev); + if (!is_running(prev)) + tsk_rt(prev)->present = 0; + } + + /* let the plugin schedule */ + rq->litmus_next = litmus->schedule(prev); + + /* check if a global plugin pulled a task from a different RQ */ + if (rq->litmus_next && task_rq(rq->litmus_next) != rq) { + /* we need to migrate the task */ + other_rq = task_rq(rq->litmus_next); + TRACE_TASK(rq->litmus_next, "migrate from %d\n", other_rq->cpu); + + /* while we drop the lock, the prev task could change its + * state + */ + was_running = is_running(prev); + mb(); + spin_unlock(&rq->lock); + + /* Don't race with a concurrent switch. This could deadlock in + * the case of cross or circular migrations. It's the job of + * the plugin to make sure that doesn't happen. + */ + TRACE_TASK(rq->litmus_next, "stack_in_use=%d\n", + rq->litmus_next->rt_param.stack_in_use); + if (rq->litmus_next->rt_param.stack_in_use != NO_CPU) { + TRACE_TASK(rq->litmus_next, "waiting to deschedule\n"); + _maybe_deadlock = litmus_clock(); + } + while (rq->litmus_next->rt_param.stack_in_use != NO_CPU) { + cpu_relax(); + mb(); + if (rq->litmus_next->rt_param.stack_in_use == NO_CPU) + TRACE_TASK(rq->litmus_next, + "descheduled. Proceeding.\n"); + if (lt_before(_maybe_deadlock + 10000000, + litmus_clock())) { + /* We've been spinning for 10ms. + * Something can't be right! + * Let's abandon the task and bail out; at least + * we will have debug info instead of a hard + * deadlock. + */ + TRACE_TASK(rq->litmus_next, + "stack too long in use. " + "Deadlock?\n"); + rq->litmus_next = NULL; + + /* bail out */ + spin_lock(&rq->lock); + return; + } + } +#ifdef __ARCH_WANT_UNLOCKED_CTXSW + if (rq->litmus_next->oncpu) + TRACE_TASK(rq->litmus_next, "waiting for !oncpu"); + while (rq->litmus_next->oncpu) { + cpu_relax(); + mb(); + } +#endif + double_rq_lock(rq, other_rq); + mb(); + if (is_realtime(prev) && is_running(prev) != was_running) { + TRACE_TASK(prev, + "state changed while we dropped" + " the lock: is_running=%d, was_running=%d\n", + is_running(prev), was_running); + if (is_running(prev) && !was_running) { + /* prev task became unblocked + * we need to simulate normal sequence of events + * to scheduler plugins. + */ + litmus->task_block(prev); + litmus->task_wake_up(prev); + } + } + + set_task_cpu(rq->litmus_next, smp_processor_id()); + + /* DEBUG: now that we have the lock we need to make sure a + * couple of things still hold: + * - it is still a real-time task + * - it is still runnable (could have been stopped) + * If either is violated, then the active plugin is + * doing something wrong. + */ + if (!is_realtime(rq->litmus_next) || + !is_running(rq->litmus_next)) { + /* BAD BAD BAD */ + TRACE_TASK(rq->litmus_next, + "BAD: migration invariant FAILED: " + "rt=%d running=%d\n", + is_realtime(rq->litmus_next), + is_running(rq->litmus_next)); + /* drop the task */ + rq->litmus_next = NULL; + } + /* release the other CPU's runqueue, but keep ours */ + spin_unlock(&other_rq->lock); + } + if (rq->litmus_next) + rq->litmus_next->rt_param.stack_in_use = rq->cpu; +} + +static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, + int wakeup) +{ + if (wakeup) { + sched_trace_task_resume(p); + tsk_rt(p)->present = 1; + litmus->task_wake_up(p); + } else + TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); +} + +static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep) +{ + if (sleep) { + litmus->task_block(p); + tsk_rt(p)->present = 0; + sched_trace_task_block(p); + } else + TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n"); +} + +static void yield_task_litmus(struct rq *rq) +{ + BUG_ON(rq->curr != current); + litmus->complete_job(); +} + +/* Plugins are responsible for this. + */ +static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags) +{ +} + +/* has already been taken care of */ +static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) +{ +} + +static struct task_struct *pick_next_task_litmus(struct rq *rq) +{ + struct task_struct* picked = rq->litmus_next; + rq->litmus_next = NULL; + if (picked) + picked->se.exec_start = rq->clock; + return picked; +} + +static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) +{ +} + +static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running) +{ +} + +static void prio_changed_litmus(struct rq *rq, struct task_struct *p, + int oldprio, int running) +{ +} + +unsigned int get_rr_interval_litmus(struct task_struct *p) +{ + /* return infinity */ + return 0; +} + +/* This is called when a task became a real-time task, either due to a SCHED_* + * class transition or due to PI mutex inheritance. We don't handle Linux PI + * mutex inheritance yet (and probably never will). Use LITMUS provided + * synchronization primitives instead. + */ +static void set_curr_task_litmus(struct rq *rq) +{ + rq->curr->se.exec_start = rq->clock; +} + + +#ifdef CONFIG_SMP +/* execve tries to rebalance task in this scheduling domain */ +static int select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags) +{ + /* preemption is already disabled. + * We don't want to change cpu here + */ + return smp_processor_id(); +} + +/* we don't repartition at runtime */ + +static unsigned long +load_balance_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest, + unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, int *this_best_prio) +{ + return 0; +} + +static int +move_one_task_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + return 0; +} +#endif + +const struct sched_class litmus_sched_class = { + .next = &rt_sched_class, + .enqueue_task = enqueue_task_litmus, + .dequeue_task = dequeue_task_litmus, + .yield_task = yield_task_litmus, + + .check_preempt_curr = check_preempt_curr_litmus, + + .pick_next_task = pick_next_task_litmus, + .put_prev_task = put_prev_task_litmus, + +#ifdef CONFIG_SMP + .select_task_rq = select_task_rq_litmus, + + .load_balance = load_balance_litmus, + .move_one_task = move_one_task_litmus, +#endif + + .set_curr_task = set_curr_task_litmus, + .task_tick = task_tick_litmus, + + .get_rr_interval = get_rr_interval_litmus, + + .prio_changed = prio_changed_litmus, + .switched_to = switched_to_litmus, +}; diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c new file mode 100644 index 000000000000..0be091ece569 --- /dev/null +++ b/litmus/sched_plugin.c @@ -0,0 +1,199 @@ +/* sched_plugin.c -- core infrastructure for the scheduler plugin system + * + * This file includes the initialization of the plugin system, the no-op Linux + * scheduler plugin and some dummy functions. + */ + +#include +#include + +#include +#include + +#include + +/************************************************************* + * Dummy plugin functions * + *************************************************************/ + +static void litmus_dummy_finish_switch(struct task_struct * prev) +{ +} + +static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) +{ + return NULL; +} + +static void litmus_dummy_tick(struct task_struct* tsk) +{ +} + +static long litmus_dummy_admit_task(struct task_struct* tsk) +{ + printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", + tsk->comm, tsk->pid); + return -EINVAL; +} + +static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) +{ +} + +static void litmus_dummy_task_wake_up(struct task_struct *task) +{ +} + +static void litmus_dummy_task_block(struct task_struct *task) +{ +} + +static void litmus_dummy_task_exit(struct task_struct *task) +{ +} + +static long litmus_dummy_complete_job(void) +{ + return -ENOSYS; +} + +static long litmus_dummy_activate_plugin(void) +{ + return 0; +} + +static long litmus_dummy_deactivate_plugin(void) +{ + return 0; +} + +#ifdef CONFIG_FMLP + +static long litmus_dummy_inherit_priority(struct pi_semaphore *sem, + struct task_struct *new_owner) +{ + return -ENOSYS; +} + +static long litmus_dummy_return_priority(struct pi_semaphore *sem) +{ + return -ENOSYS; +} + +static long litmus_dummy_pi_block(struct pi_semaphore *sem, + struct task_struct *new_waiter) +{ + return -ENOSYS; +} + +#endif + + +/* The default scheduler plugin. It doesn't do anything and lets Linux do its + * job. + */ +struct sched_plugin linux_sched_plugin = { + .plugin_name = "Linux", + .tick = litmus_dummy_tick, + .task_new = litmus_dummy_task_new, + .task_exit = litmus_dummy_task_exit, + .task_wake_up = litmus_dummy_task_wake_up, + .task_block = litmus_dummy_task_block, + .complete_job = litmus_dummy_complete_job, + .schedule = litmus_dummy_schedule, + .finish_switch = litmus_dummy_finish_switch, + .activate_plugin = litmus_dummy_activate_plugin, + .deactivate_plugin = litmus_dummy_deactivate_plugin, +#ifdef CONFIG_FMLP + .inherit_priority = litmus_dummy_inherit_priority, + .return_priority = litmus_dummy_return_priority, + .pi_block = litmus_dummy_pi_block, +#endif + .admit_task = litmus_dummy_admit_task +}; + +/* + * The reference to current plugin that is used to schedule tasks within + * the system. It stores references to actual function implementations + * Should be initialized by calling "init_***_plugin()" + */ +struct sched_plugin *litmus = &linux_sched_plugin; + +/* the list of registered scheduling plugins */ +static LIST_HEAD(sched_plugins); +static DEFINE_SPINLOCK(sched_plugins_lock); + +#define CHECK(func) {\ + if (!plugin->func) \ + plugin->func = litmus_dummy_ ## func;} + +/* FIXME: get reference to module */ +int register_sched_plugin(struct sched_plugin* plugin) +{ + printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", + plugin->plugin_name); + + /* make sure we don't trip over null pointers later */ + CHECK(finish_switch); + CHECK(schedule); + CHECK(tick); + CHECK(task_wake_up); + CHECK(task_exit); + CHECK(task_block); + CHECK(task_new); + CHECK(complete_job); + CHECK(activate_plugin); + CHECK(deactivate_plugin); +#ifdef CONFIG_FMLP + CHECK(inherit_priority); + CHECK(return_priority); + CHECK(pi_block); +#endif + CHECK(admit_task); + + if (!plugin->release_at) + plugin->release_at = release_at; + + spin_lock(&sched_plugins_lock); + list_add(&plugin->list, &sched_plugins); + spin_unlock(&sched_plugins_lock); + + return 0; +} + + +/* FIXME: reference counting, etc. */ +struct sched_plugin* find_sched_plugin(const char* name) +{ + struct list_head *pos; + struct sched_plugin *plugin; + + spin_lock(&sched_plugins_lock); + list_for_each(pos, &sched_plugins) { + plugin = list_entry(pos, struct sched_plugin, list); + if (!strcmp(plugin->plugin_name, name)) + goto out_unlock; + } + plugin = NULL; + +out_unlock: + spin_unlock(&sched_plugins_lock); + return plugin; +} + +int print_sched_plugins(char* buf, int max) +{ + int count = 0; + struct list_head *pos; + struct sched_plugin *plugin; + + spin_lock(&sched_plugins_lock); + list_for_each(pos, &sched_plugins) { + plugin = list_entry(pos, struct sched_plugin, list); + count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name); + if (max - count <= 0) + break; + } + spin_unlock(&sched_plugins_lock); + return count; +} -- cgit v1.2.2