From 3d5537c160c1484e8d562b9828baf679cc53f67a Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 2 Jun 2011 16:06:05 -0400 Subject: Full patch for klitirqd with Nvidia GPU support. --- arch/x86/kernel/irq.c | 14 + arch/x86/kernel/syscall_table_32.S | 1 + include/linux/completion.h | 1 + include/linux/interrupt.h | 9 +- include/linux/mutex.h | 10 + include/linux/semaphore.h | 9 + include/linux/workqueue.h | 18 + include/litmus/affinity.h | 78 ++ include/litmus/fdso.h | 6 +- include/litmus/litmus.h | 1 + include/litmus/litmus_softirq.h | 199 +++++ include/litmus/nvidia_info.h | 37 + include/litmus/preempt.h | 1 + include/litmus/rt_param.h | 44 +- include/litmus/sched_plugin.h | 22 +- include/litmus/sched_trace.h | 174 +++- include/litmus/sched_trace_external.h | 42 + include/litmus/unistd_32.h | 3 +- include/litmus/unistd_64.h | 5 +- kernel/lockdep.c | 3 +- kernel/mutex.c | 141 +++ kernel/sched.c | 23 +- kernel/semaphore.c | 13 +- kernel/softirq.c | 278 +++++- kernel/workqueue.c | 70 +- litmus/Kconfig | 89 ++ litmus/Makefile | 4 + litmus/affinity.c | 49 + litmus/edf_common.c | 6 + litmus/fdso.c | 1 + litmus/litmus.c | 82 +- litmus/litmus_proc.c | 17 + litmus/litmus_softirq.c | 1579 +++++++++++++++++++++++++++++++++ litmus/locking.c | 1 - litmus/nvidia_info.c | 526 +++++++++++ litmus/preempt.c | 7 + litmus/sched_cedf.c | 852 +++++++++++++++++- litmus/sched_gsn_edf.c | 756 +++++++++++++++- litmus/sched_litmus.c | 2 + litmus/sched_plugin.c | 29 + litmus/sched_task_trace.c | 216 ++++- litmus/sched_trace_external.c | 45 + 42 files changed, 5325 insertions(+), 138 deletions(-) create mode 100644 include/litmus/affinity.h create mode 100644 include/litmus/litmus_softirq.h create mode 100644 include/litmus/nvidia_info.h create mode 100644 include/litmus/sched_trace_external.h create mode 100644 litmus/affinity.c create mode 100644 litmus/litmus_softirq.c create mode 100644 litmus/nvidia_info.c create mode 100644 litmus/sched_trace_external.c diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 91fd0c70a18a..50abbc6b7429 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -8,6 +8,10 @@ #include #include +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + #include #include #include @@ -244,7 +248,17 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } +//#ifndef CONFIG_LITMUS_NVIDIA irq_exit(); +//#else + /* skip softirqs if we're tracing an interrupt top-half */ + /* comment out if-statement if we want to trace with bh on. */ + //if(!is_interrupt_tracing_active()) +// irq_exit(); + + +// sched_trace_nv_interrupt_end(); +//#endif set_irq_regs(old_regs); return 1; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 37702905f658..b5ddae40cee2 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -352,3 +352,4 @@ ENTRY(sys_call_table) .long sys_wait_for_ts_release .long sys_release_ts .long sys_null_call + .long sys_register_nv_device diff --git a/include/linux/completion.h b/include/linux/completion.h index c63950e8a863..3ce20dd3086e 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x) init_waitqueue_head(&x->wait); } +extern void __wait_for_completion_locked(struct completion *); extern void wait_for_completion(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6f..5d22f5342376 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -459,6 +459,10 @@ struct tasklet_struct atomic_t count; void (*func)(unsigned long); unsigned long data; + +#ifdef CONFIG_LITMUS_SOFTIRQD + struct task_struct *owner; +#endif }; #define DECLARE_TASKLET(name, func, data) \ @@ -496,6 +500,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) #define tasklet_unlock(t) do { } while (0) #endif +extern void ___tasklet_schedule(struct tasklet_struct *t); extern void __tasklet_schedule(struct tasklet_struct *t); static inline void tasklet_schedule(struct tasklet_struct *t) @@ -504,6 +509,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t) __tasklet_schedule(t); } +extern void ___tasklet_hi_schedule(struct tasklet_struct *t); extern void __tasklet_hi_schedule(struct tasklet_struct *t); static inline void tasklet_hi_schedule(struct tasklet_struct *t) @@ -512,6 +518,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) __tasklet_hi_schedule(t); } +extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t); extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); /* @@ -541,7 +548,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) } static inline void tasklet_enable(struct tasklet_struct *t) -{ +{ smp_mb__before_atomic_dec(); atomic_dec(&t->count); } diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f363bc8fdc74..9f3199571994 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock) return atomic_read(&lock->count) != 1; } +/* return non-zero to abort. only pre-side-effects may abort */ +typedef int (*side_effect_t)(unsigned long); +extern void mutex_lock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg); +extern void mutex_unlock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg); + /* * See kernel/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. @@ -145,6 +154,7 @@ extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); extern int __must_check mutex_lock_killable(struct mutex *lock); + # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 5310d27abd2a..69e3f57661ec 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -49,4 +49,13 @@ extern int __must_check down_trylock(struct semaphore *sem); extern int __must_check down_timeout(struct semaphore *sem, long jiffies); extern void up(struct semaphore *sem); +extern void __down(struct semaphore *sem); +extern void __up(struct semaphore *sem); + +struct semaphore_waiter { + struct list_head list; + struct task_struct *task; + int up; +}; + #endif /* __LINUX_SEMAPHORE_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 25e02c941bac..5fecfb375eeb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -83,6 +83,9 @@ struct work_struct { #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif +#ifdef CONFIG_LITMUS_SOFTIRQD + struct task_struct *owner; +#endif }; #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) @@ -115,11 +118,25 @@ struct execute_work { #define __WORK_INIT_LOCKDEP_MAP(n, k) #endif +#ifdef CONFIG_LITMUS_SOFTIRQD +#define __WORK_INIT_OWNER() \ + .owner = NULL, + +#define PREPARE_OWNER(_work, _owner) \ + do { \ + (_work)->owner = (_owner); \ + } while(0) +#else +#define __WORK_INIT_OWNER() +#define PREPARE_OWNER(_work, _owner) +#endif + #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_STATIC_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ + __WORK_INIT_OWNER() \ } #define __DELAYED_WORK_INITIALIZER(n, f) { \ @@ -327,6 +344,7 @@ extern void flush_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); extern void flush_delayed_work(struct delayed_work *work); +extern int __schedule_work(struct work_struct *work); extern int schedule_work(struct work_struct *work); extern int schedule_work_on(int cpu, struct work_struct *work); extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h new file mode 100644 index 000000000000..877b4099c6e2 --- /dev/null +++ b/include/litmus/affinity.h @@ -0,0 +1,78 @@ +#ifndef __LITMUS_AFFINITY_H +#define __LITMUS_AFFINITY_H + +#include + +/* + L1 (instr) = depth 0 + L1 (data) = depth 1 + L2 = depth 2 + L3 = depth 3 + */ +#define NUM_CACHE_LEVELS 4 + +struct neighborhood +{ + unsigned int size[NUM_CACHE_LEVELS]; + cpumask_var_t neighbors[NUM_CACHE_LEVELS]; +}; + +/* topology info is stored redundently in a big array for fast lookups */ +extern struct neighborhood neigh_info[NR_CPUS]; + +void init_topology(void); /* called by Litmus module's _init_litmus() */ + +/* Works like: +void get_nearest_available_cpu(cpu_entry_t* nearest, cpu_entry_t* start, cpu_entry_t* entries, int release_master) + +Set release_master = -1 for no RM. + */ +#define get_nearest_available_cpu(nearest, start, entries, release_master) \ +{ \ + (nearest) = NULL; \ + if(!(start)->linked) \ + { \ + (nearest) = (start); \ + } \ + else \ + { \ + int __level; \ + int __cpu; \ + struct neighborhood* __neighbors = &neigh_info[(start)->cpu]; \ + \ + for(__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) \ + { \ + if(__neighbors->size[__level] > 1) \ + { \ + for_each_cpu(__cpu, __neighbors->neighbors[__level]) \ + { \ + if(__cpu != (release_master)) \ + { \ + cpu_entry_t* __entry = &per_cpu((entries), __cpu); \ + if(!__entry->linked) \ + { \ + (nearest) = __entry; \ + break; \ + } \ + } \ + } \ + } \ + else if(__neighbors->size[__level] == 0) \ + { \ + break; \ + } \ + } \ + } \ + \ + if((nearest)) \ + { \ + TRACE("P%d is closest available CPU to P%d\n", (nearest)->cpu, (start)->cpu); \ + } \ + else \ + { \ + TRACE("Could not find an available CPU close to P%d\n", \ + (start)->cpu); \ + } \ +} + +#endif diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h index caf2a1e6918c..c740e8fc3e88 100644 --- a/include/litmus/fdso.h +++ b/include/litmus/fdso.h @@ -18,9 +18,10 @@ typedef enum { MIN_OBJ_TYPE = 0, FMLP_SEM = 0, - SRP_SEM = 1, + KFMLP_SEM = 1, + SRP_SEM = 2, - MAX_OBJ_TYPE = 1 + MAX_OBJ_TYPE = SRP_SEM } obj_type_t; struct inode_obj_id { @@ -64,6 +65,7 @@ static inline void* od_lookup(int od, obj_type_t type) } #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) +#define lookup_kfmlp_sem(od)((struct pi_semaphore*) od_lookup(od, KFMLP_SEM)) #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index e7769ca36ec0..3df242bf272f 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list) ); } + struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); #define NO_CPU 0xffffffff diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h new file mode 100644 index 000000000000..34287f3cbb8d --- /dev/null +++ b/include/litmus/litmus_softirq.h @@ -0,0 +1,199 @@ +#ifndef __LITMUS_SOFTIRQ_H +#define __LITMUS_SOFTIRQ_H + +#include +#include + +/* + Threaded tasklet handling for Litmus. Tasklets + are scheduled with the priority of the tasklet's + owner---that is, the RT task on behalf the tasklet + runs. + + Tasklets are current scheduled in FIFO order with + NO priority inheritance for "blocked" tasklets. + + klitirqd assumes the priority of the owner of the + tasklet when the tasklet is next to execute. + + Currently, hi-tasklets are scheduled before + low-tasklets, regardless of priority of low-tasklets. + And likewise, low-tasklets are scheduled before work + queue objects. This priority inversion probably needs + to be fixed, though it is not an issue if our work with + GPUs as GPUs are owned (and associated klitirqds) for + exclusive time periods, thus no inversions can + occur. + */ + + + +#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD + +/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons. + Actual launch of threads is deffered to kworker's + workqueue, so daemons will likely not be immediately + running when this function returns, though the required + data will be initialized. + + @affinity_set: an array expressing the processor affinity + for each of the NR_LITMUS_SOFTIRQD daemons. May be set + to NULL for global scheduling. + + - Examples - + 8-CPU system with two CPU clusters: + affinity[] = {0, 0, 0, 0, 3, 3, 3, 3} + NOTE: Daemons not actually bound to specified CPU, but rather + cluster in which the CPU resides. + + 8-CPU system, partitioned: + affinity[] = {0, 1, 2, 3, 4, 5, 6, 7} + + FIXME: change array to a CPU topology or array of cpumasks + + */ +void spawn_klitirqd(int* affinity); + + +/* Raises a flag to tell klitirqds to terminate. + Termination is async, so some threads may be running + after function return. */ +void kill_klitirqd(void); + + +/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready + to handle tasklets. 0, otherwise.*/ +int klitirqd_is_ready(void); + +/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready + to handle tasklets. 0, otherwise.*/ +int klitirqd_is_dead(void); + +/* Flushes all pending work out to the OS for regular + * tasklet/work processing of the specified 'owner' + * + * PRECOND: klitirqd_thread must have a clear entry + * in the GPU registry, otherwise this call will become + * a no-op as work will loop back to the klitirqd_thread. + * + * Pass NULL for owner to flush ALL pending items. + */ +void flush_pending(struct task_struct* klitirqd_thread, + struct task_struct* owner); + +struct task_struct* get_klitirqd(unsigned int k_id); + + +extern int __litmus_tasklet_schedule( + struct tasklet_struct *t, + unsigned int k_id); + +/* schedule a tasklet on klitirqd #k_id */ +static inline int litmus_tasklet_schedule( + struct tasklet_struct *t, + unsigned int k_id) +{ + int ret = 0; + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + ret = __litmus_tasklet_schedule(t, k_id); + return(ret); +} + +/* for use by __tasklet_schedule() */ +static inline int _litmus_tasklet_schedule( + struct tasklet_struct *t, + unsigned int k_id) +{ + return(__litmus_tasklet_schedule(t, k_id)); +} + + + + +extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, + unsigned int k_id); + +/* schedule a hi tasklet on klitirqd #k_id */ +static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t, + unsigned int k_id) +{ + int ret = 0; + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + ret = __litmus_tasklet_hi_schedule(t, k_id); + return(ret); +} + +/* for use by __tasklet_hi_schedule() */ +static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t, + unsigned int k_id) +{ + return(__litmus_tasklet_hi_schedule(t, k_id)); +} + + + + + +extern int __litmus_tasklet_hi_schedule_first( + struct tasklet_struct *t, + unsigned int k_id); + +/* schedule a hi tasklet on klitirqd #k_id on next go-around */ +/* PRECONDITION: Interrupts must be disabled. */ +static inline int litmus_tasklet_hi_schedule_first( + struct tasklet_struct *t, + unsigned int k_id) +{ + int ret = 0; + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + ret = __litmus_tasklet_hi_schedule_first(t, k_id); + return(ret); +} + +/* for use by __tasklet_hi_schedule_first() */ +static inline int _litmus_tasklet_hi_schedule_first( + struct tasklet_struct *t, + unsigned int k_id) +{ + return(__litmus_tasklet_hi_schedule_first(t, k_id)); +} + + + +////////////// + +extern int __litmus_schedule_work( + struct work_struct* w, + unsigned int k_id); + +static inline int litmus_schedule_work( + struct work_struct* w, + unsigned int k_id) +{ + return(__litmus_schedule_work(w, k_id)); +} + + + +///////////// mutex operations for client threads. + +void down_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem); + +void __down_and_reset_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_reset, + enum klitirqd_sem_status to_set, + struct mutex* sem); + +void up_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem); + + + +void release_klitirqd_lock(struct task_struct* t); + +int reacquire_klitirqd_lock(struct task_struct* t); + +#endif diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h new file mode 100644 index 000000000000..579301d77cf5 --- /dev/null +++ b/include/litmus/nvidia_info.h @@ -0,0 +1,37 @@ +#ifndef __LITMUS_NVIDIA_H +#define __LITMUS_NVIDIA_H + +#include + + +#include + + +#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD + +int init_nvidia_info(void); + +int is_nvidia_func(void* func_addr); + +void dump_nvidia_info(const struct tasklet_struct *t); + + +// Returns the Nvidia device # associated with provided tasklet and work_struct. +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t); +u32 get_work_nv_device_num(const struct work_struct *t); + + +int init_nv_device_reg(void); +//int get_nv_device_id(struct task_struct* owner); + + +int reg_nv_device(int reg_device_id, int register_device); + +struct task_struct* get_nv_device_owner(u32 target_device_id); + +void lock_nv_registry(u32 reg_device_id, unsigned long* flags); +void unlock_nv_registry(u32 reg_device_id, unsigned long* flags); + +void increment_nv_int_count(u32 device); + +#endif diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h index 260c6fe17986..244924f93c48 100644 --- a/include/litmus/preempt.h +++ b/include/litmus/preempt.h @@ -26,6 +26,7 @@ const char* sched_state_name(int s); (x), #x, __FUNCTION__); \ } while (0); +//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */ #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ cpu, (x), sched_state_name(x), \ diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 5de422c742f6..53af3ce1d955 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -69,6 +69,8 @@ struct control_page { /* don't export internal data structures to user space (liblitmus) */ #ifdef __KERNEL__ +#include + struct _rt_domain; struct bheap_node; struct release_heap; @@ -94,6 +96,14 @@ struct rt_job { struct pfair_param; +enum klitirqd_sem_status +{ + NEED_TO_REACQUIRE, + REACQUIRING, + NOT_HELD, + HELD +}; + /* RT task parameters for scheduling extensions * These parameters are inherited during clone and therefore must * be explicitly set up before the task set is launched. @@ -108,6 +118,38 @@ struct rt_param { /* is the task present? (true if it can be scheduled) */ unsigned int present:1; +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy threads have minimum priority by default */ + unsigned int is_proxy_thread:1; + + /* pointer to klitirqd currently working on this + task_struct's behalf. only set by the task pointed + to by klitirqd. + + ptr only valid if is_proxy_thread == 0 + */ + struct task_struct* cur_klitirqd; + + /* Used to implement mutual execution exclusion between + * job and klitirqd execution. Job must always hold + * it's klitirqd_sem to execute. klitirqd instance + * must hold the semaphore before executing on behalf + * of a job. + */ + //struct semaphore klitirqd_sem; + struct mutex klitirqd_sem; + + /* status of held klitirqd_sem, even if the held klitirqd_sem is from + another task (only proxy threads do this though). + */ + atomic_t klitirqd_sem_stat; +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + /* number of top-half interrupts handled on behalf of current job */ + atomic_t nv_int_count; +#endif + #ifdef CONFIG_LITMUS_LOCKING /* Is the task being priority-boosted by a locking protocol? */ unsigned int priority_boosted:1; @@ -128,7 +170,7 @@ struct rt_param { * an increased task priority. */ struct task_struct* inh_task; - + #ifdef CONFIG_NP_SECTION /* For the FMLP under PSN-EDF, it is required to make the task * non-preemptive from kernel space. In order not to interfere with diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 6e7cabdddae8..df50930d14a0 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -29,7 +29,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev); */ typedef void (*finish_switch_t)(struct task_struct *prev); - /********************* task state changes ********************/ /* Called to setup a new real-time task. @@ -58,6 +57,17 @@ typedef void (*task_exit_t) (struct task_struct *); typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, void* __user config); +/* Called to change inheritance levels of given task */ +typedef void (*set_prio_inh_t)(struct task_struct* t, + struct task_struct* prio_inh); +typedef void (*clear_prio_inh_t)(struct task_struct* t); + + +typedef void (*set_prio_inh_klitirq_t)(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner); +typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd, + struct task_struct* old_owner); /********************* sys call backends ********************/ /* This function causes the caller to sleep until the next release */ @@ -88,7 +98,7 @@ struct sched_plugin { /* task state changes */ admit_task_t admit_task; - task_new_t task_new; + task_new_t task_new; task_wake_up_t task_wake_up; task_block_t task_block; task_exit_t task_exit; @@ -96,6 +106,14 @@ struct sched_plugin { #ifdef CONFIG_LITMUS_LOCKING /* locking protocols */ allocate_lock_t allocate_lock; + + set_prio_inh_t set_prio_inh; + clear_prio_inh_t clear_prio_inh; +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD + set_prio_inh_klitirq_t set_prio_inh_klitirqd; + clear_prio_inh_klitirqd_t clear_prio_inh_klitirqd; #endif } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h index 7ca34cb13881..1486c778aff8 100644 --- a/include/litmus/sched_trace.h +++ b/include/litmus/sched_trace.h @@ -11,12 +11,12 @@ struct st_trace_header { u8 cpu; /* On which CPU was it recorded? */ u16 pid; /* PID of the task. */ u32 job; /* The job sequence number. */ -}; +} __attribute__((packed)); #define ST_NAME_LEN 16 struct st_name_data { char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ -}; +} __attribute__((packed)); struct st_param_data { /* regular params */ u32 wcet; @@ -25,30 +25,29 @@ struct st_param_data { /* regular params */ u8 partition; u8 class; u8 __unused[2]; -}; +} __attribute__((packed)); struct st_release_data { /* A job is was/is going to be released. */ u64 release; /* What's the release time? */ u64 deadline; /* By when must it finish? */ -}; +} __attribute__((packed)); struct st_assigned_data { /* A job was asigned to a CPU. */ u64 when; u8 target; /* Where should it execute? */ u8 __unused[7]; -}; +} __attribute__((packed)); struct st_switch_to_data { /* A process was switched to on a given CPU. */ u64 when; /* When did this occur? */ u32 exec_time; /* Time the current job has executed. */ u8 __unused[4]; - -}; +} __attribute__((packed)); struct st_switch_away_data { /* A process was switched away from on a given CPU. */ u64 when; u64 exec_time; -}; +} __attribute__((packed)); struct st_completion_data { /* A job completed. */ u64 when; @@ -56,35 +55,92 @@ struct st_completion_data { /* A job completed. */ * next task automatically; set to 0 otherwise. */ u8 __uflags:7; - u8 __unused[7]; -}; + u16 nv_int_count; + u8 __unused[5]; +} __attribute__((packed)); struct st_block_data { /* A task blocks. */ u64 when; u64 __unused; -}; +} __attribute__((packed)); struct st_resume_data { /* A task resumes. */ u64 when; u64 __unused; -}; +} __attribute__((packed)); struct st_action_data { u64 when; u8 action; u8 __unused[7]; -}; +} __attribute__((packed)); struct st_sys_release_data { u64 when; u64 release; -}; +} __attribute__((packed)); + + +struct st_tasklet_release_data { + u64 when; + u64 __unused; +} __attribute__((packed)); + +struct st_tasklet_begin_data { + u64 when; + u16 exe_pid; + u8 __unused[6]; +} __attribute__((packed)); + +struct st_tasklet_end_data { + u64 when; + u16 exe_pid; + u8 flushed; + u8 __unused[5]; +} __attribute__((packed)); + + +struct st_work_release_data { + u64 when; + u64 __unused; +} __attribute__((packed)); + +struct st_work_begin_data { + u64 when; + u16 exe_pid; + u8 __unused[6]; +} __attribute__((packed)); + +struct st_work_end_data { + u64 when; + u16 exe_pid; + u8 flushed; + u8 __unused[5]; +} __attribute__((packed)); + +struct st_effective_priority_change_data { + u64 when; + u16 inh_pid; + u8 __unused[6]; +} __attribute__((packed)); + +struct st_nv_interrupt_begin_data { + u64 when; + u32 device; + u8 __unused[4]; +} __attribute__((packed)); + +struct st_nv_interrupt_end_data { + u64 when; + u32 device; + u8 __unused[4]; +} __attribute__((packed)); #define DATA(x) struct st_ ## x ## _data x; typedef enum { - ST_NAME = 1, /* Start at one, so that we can spot - * uninitialized records. */ + ST_NAME = 1, /* Start at one, so that we can spot + * uninitialized records. */ ST_PARAM, ST_RELEASE, ST_ASSIGNED, @@ -94,7 +150,16 @@ typedef enum { ST_BLOCK, ST_RESUME, ST_ACTION, - ST_SYS_RELEASE + ST_SYS_RELEASE, + ST_TASKLET_RELEASE, + ST_TASKLET_BEGIN, + ST_TASKLET_END, + ST_WORK_RELEASE, + ST_WORK_BEGIN, + ST_WORK_END, + ST_EFF_PRIO_CHANGE, + ST_NV_INTERRUPT_BEGIN, + ST_NV_INTERRUPT_END, } st_event_record_type_t; struct st_event_record { @@ -113,8 +178,17 @@ struct st_event_record { DATA(resume); DATA(action); DATA(sys_release); + DATA(tasklet_release); + DATA(tasklet_begin); + DATA(tasklet_end); + DATA(work_release); + DATA(work_begin); + DATA(work_end); + DATA(effective_priority_change); + DATA(nv_interrupt_begin); + DATA(nv_interrupt_end); } data; -}; +} __attribute__((packed)); #undef DATA @@ -129,6 +203,8 @@ struct st_event_record { ft_event1(id, callback, task) #define SCHED_TRACE2(id, callback, task, xtra) \ ft_event2(id, callback, task, xtra) +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \ + ft_event3(id, callback, task, xtra1, xtra2) /* provide prototypes; needed on sparc64 */ #ifndef NO_TASK_TRACE_DECLS @@ -155,12 +231,45 @@ feather_callback void do_sched_trace_action(unsigned long id, feather_callback void do_sched_trace_sys_release(unsigned long id, lt_t* start); + +feather_callback void do_sched_trace_tasklet_release(unsigned long id, + struct task_struct* owner); +feather_callback void do_sched_trace_tasklet_begin(unsigned long id, + struct task_struct* owner); +feather_callback void do_sched_trace_tasklet_end(unsigned long id, + struct task_struct* owner, + unsigned long flushed); + +feather_callback void do_sched_trace_work_release(unsigned long id, + struct task_struct* owner); +feather_callback void do_sched_trace_work_begin(unsigned long id, + struct task_struct* owner, + struct task_struct* exe); +feather_callback void do_sched_trace_work_end(unsigned long id, + struct task_struct* owner, + struct task_struct* exe, + unsigned long flushed); + +feather_callback void do_sched_trace_eff_prio_change(unsigned long id, + struct task_struct* task, + struct task_struct* inh); + +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, + u32 device); +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, + unsigned long unused); + + +/* returns true if we're tracing an interrupt on current CPU */ +/* int is_interrupt_tracing_active(void); */ + #endif #else #define SCHED_TRACE(id, callback, task) /* no tracing */ #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) #endif @@ -193,6 +302,35 @@ feather_callback void do_sched_trace_sys_release(unsigned long id, SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when) +#define sched_trace_tasklet_release(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t) + +#define sched_trace_tasklet_begin(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t) + +#define sched_trace_tasklet_end(t, flushed) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed) + + +#define sched_trace_work_release(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t) + +#define sched_trace_work_begin(t, e) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e) + +#define sched_trace_work_end(t, e, flushed) \ + SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed) + + +#define sched_trace_eff_prio_change(t, inh) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh) + + +#define sched_trace_nv_interrupt_begin(d) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d) +#define sched_trace_nv_interrupt_end() \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, 0ul) + #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ #endif /* __KERNEL__ */ diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h new file mode 100644 index 000000000000..c2c872639880 --- /dev/null +++ b/include/litmus/sched_trace_external.h @@ -0,0 +1,42 @@ +/* + * sched_trace.h -- record scheduler events to a byte stream for offline analysis. + */ +#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_ +#define _LINUX_SCHED_TRACE_EXTERNAL_H_ + +extern void __sched_trace_tasklet_begin_external(struct task_struct* t); +static inline void sched_trace_tasklet_begin_external(struct task_struct* t) +{ + __sched_trace_tasklet_begin_external(t); +} + +extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed); +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed) +{ + __sched_trace_tasklet_end_external(t, flushed); +} + +extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e); +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e) +{ + __sched_trace_work_begin_external(t, e); +} + +extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f); +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f) +{ + __sched_trace_work_end_external(t, e, f); +} + +extern void __sched_trace_nv_interrupt_begin_external(u32 device); +static inline void sched_trace_nv_interrupt_begin_external(u32 device) +{ + __sched_trace_nv_interrupt_begin_external(device); +} + +extern void __sched_trace_nv_interrupt_end_external(void); +static inline void sched_trace_nv_interrupt_end_external(void) +{ + __sched_trace_nv_interrupt_end_external(); +} +#endif diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index 94264c27d9ac..c6efc4c40af2 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h @@ -17,5 +17,6 @@ #define __NR_wait_for_ts_release __LSC(9) #define __NR_release_ts __LSC(10) #define __NR_null_call __LSC(11) +#define __NR_register_nv_device __LSC(12) -#define NR_litmus_syscalls 12 +#define NR_litmus_syscalls 13 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index d5ced0d2642c..b44a7c33bdf8 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h @@ -29,5 +29,8 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) __SYSCALL(__NR_release_ts, sys_release_ts) #define __NR_null_call __LSC(11) __SYSCALL(__NR_null_call, sys_null_call) +#define __NR_register_nv_device __LSC(12) +__SYSCALL(__NR_register_nv_device, sys_register_nv_device) -#define NR_litmus_syscalls 12 + +#define NR_litmus_syscalls 13 diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f2852a510232..ebff2cf715c5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -530,7 +530,7 @@ static void print_lock(struct held_lock *hlock) print_ip_sym(hlock->acquire_ip); } -static void lockdep_print_held_locks(struct task_struct *curr) +void lockdep_print_held_locks(struct task_struct *curr) { int i, depth = curr->lockdep_depth; @@ -546,6 +546,7 @@ static void lockdep_print_held_locks(struct task_struct *curr) print_lock(curr->held_locks + i); } } +EXPORT_SYMBOL(lockdep_print_held_locks); static void print_kernel_version(void) { diff --git a/kernel/mutex.c b/kernel/mutex.c index 200407c1502f..435685ecd068 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -496,3 +496,144 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) return 1; } EXPORT_SYMBOL(atomic_dec_and_mutex_lock); + + + + + + + + +//__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); + +void mutex_lock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg) +{ + long state = TASK_UNINTERRUPTIBLE; + unsigned int subclass = 0; + unsigned long ip = _RET_IP_; + + + struct task_struct *task = current; + struct mutex_waiter waiter; + unsigned long flags; + + preempt_disable(); + mutex_acquire(&lock->dep_map, subclass, 0, ip); + + spin_lock_mutex(&lock->wait_lock, flags); + + if(pre) + { + if(unlikely(pre(pre_arg))) + { + // this will fuck with lockdep's CONFIG_PROVE_LOCKING... + spin_unlock_mutex(&lock->wait_lock, flags); + preempt_enable(); + return; + } + } + + debug_mutex_lock_common(lock, &waiter); + debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); + + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + waiter.task = task; + + if (atomic_xchg(&lock->count, -1) == 1) + goto done; + + lock_contended(&lock->dep_map, ip); + + for (;;) { + /* + * Lets try to take the lock again - this is needed even if + * we get here for the first time (shortly after failing to + * acquire the lock), to make sure that we get a wakeup once + * it's unlocked. Later on, if we sleep, this is the + * operation that gives us the lock. We xchg it to -1, so + * that when we release the lock, we properly wake up the + * other waiters: + */ + if (atomic_xchg(&lock->count, -1) == 1) + break; + + __set_task_state(task, state); + + /* didnt get the lock, go to sleep: */ + spin_unlock_mutex(&lock->wait_lock, flags); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + spin_lock_mutex(&lock->wait_lock, flags); + } + +done: + lock_acquired(&lock->dep_map, ip); + /* got the lock - rejoice! */ + mutex_remove_waiter(lock, &waiter, current_thread_info()); + mutex_set_owner(lock); + + /* set it to 0 if there are no waiters left: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + + if(post) + post(post_arg); + + spin_unlock_mutex(&lock->wait_lock, flags); + + debug_mutex_free_waiter(&waiter); + preempt_enable(); + + //return 0; +} +EXPORT_SYMBOL(mutex_lock_sfx); + + + +//__mutex_unlock_common_slowpath(lock_count, 1); + +void mutex_unlock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg) +{ + //struct mutex *lock = container_of(lock_count, struct mutex, count); + unsigned long flags; + + spin_lock_mutex(&lock->wait_lock, flags); + + if(pre) + pre(pre_arg); + + //mutex_release(&lock->dep_map, nested, _RET_IP_); + mutex_release(&lock->dep_map, 1, _RET_IP_); + debug_mutex_unlock(lock); + + /* + * some architectures leave the lock unlocked in the fastpath failure + * case, others need to leave it locked. In the later case we have to + * unlock it here + */ + if (__mutex_slowpath_needs_to_unlock()) + atomic_set(&lock->count, 1); + + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter = + list_entry(lock->wait_list.next, + struct mutex_waiter, list); + + debug_mutex_wake_waiter(lock, waiter); + + wake_up_process(waiter->task); + } + + if(post) + post(post_arg); + + spin_unlock_mutex(&lock->wait_lock, flags); +} +EXPORT_SYMBOL(mutex_unlock_sfx); diff --git a/kernel/sched.c b/kernel/sched.c index c5d775079027..3162605ffc91 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -82,6 +82,10 @@ #include #include +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + static void litmus_tick(struct rq*, struct task_struct*); #define CREATE_TRACE_POINTS @@ -3789,6 +3793,7 @@ pick_next_task(struct rq *rq) } } + /* * schedule() is the main scheduler function. */ @@ -3807,6 +3812,10 @@ need_resched: rcu_note_context_switch(cpu); prev = rq->curr; +#ifdef CONFIG_LITMUS_SOFTIRQD + release_klitirqd_lock(prev); +#endif + release_kernel_lock(prev); need_resched_nonpreemptible: TS_SCHED_START; @@ -3882,15 +3891,20 @@ need_resched_nonpreemptible: if (sched_state_validate_switch() || unlikely(reacquire_kernel_lock(prev))) goto need_resched_nonpreemptible; - + preempt_enable_no_resched(); + if (need_resched()) goto need_resched; + reacquire_klitirqd_lock(prev); + srp_ceiling_block(); } EXPORT_SYMBOL(schedule); + + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Look out! "owner" is an entirely speculative pointer @@ -4051,6 +4065,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, } } + /** * __wake_up - wake up threads blocked on a waitqueue. * @q: the waitqueue @@ -4236,6 +4251,12 @@ void __sched wait_for_completion(struct completion *x) } EXPORT_SYMBOL(wait_for_completion); +void __sched __wait_for_completion_locked(struct completion *x) +{ + do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(__wait_for_completion_locked); + /** * wait_for_completion_timeout: - waits for completion of a task (w/timeout) * @x: holds the state of this particular completion diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 94a62c0d4ade..c947a046a6d7 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -33,11 +33,11 @@ #include #include -static noinline void __down(struct semaphore *sem); +noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); static noinline int __down_killable(struct semaphore *sem); static noinline int __down_timeout(struct semaphore *sem, long jiffies); -static noinline void __up(struct semaphore *sem); +noinline void __up(struct semaphore *sem); /** * down - acquire the semaphore @@ -190,11 +190,13 @@ EXPORT_SYMBOL(up); /* Functions for the contended case */ +/* struct semaphore_waiter { struct list_head list; struct task_struct *task; int up; }; + */ /* * Because this function is inlined, the 'state' parameter will be @@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state, return -EINTR; } -static noinline void __sched __down(struct semaphore *sem) +noinline void __sched __down(struct semaphore *sem) { __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } +EXPORT_SYMBOL(__down); + static noinline int __sched __down_interruptible(struct semaphore *sem) { @@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); } -static noinline void __sched __up(struct semaphore *sem) +noinline void __sched __up(struct semaphore *sem) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); @@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem) waiter->up = 1; wake_up_process(waiter->task); } +EXPORT_SYMBOL(__up); \ No newline at end of file diff --git a/kernel/softirq.c b/kernel/softirq.c index 07b4f1b1a73a..be4b8fab3637 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -29,6 +29,14 @@ #include #include + +#include +#include + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + /* - No shared variables, all the data are CPU local. - If a softirq needs serialization, let it serialize itself @@ -54,7 +62,7 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd) = NULL; char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", @@ -177,6 +185,7 @@ void local_bh_enable_ip(unsigned long ip) } EXPORT_SYMBOL(local_bh_enable_ip); + /* * We restart softirq processing MAX_SOFTIRQ_RESTART times, * and we fall back to softirqd after that. @@ -187,34 +196,30 @@ EXPORT_SYMBOL(local_bh_enable_ip); * should not be able to lock up the box. */ #define MAX_SOFTIRQ_RESTART 10 - -asmlinkage void __do_softirq(void) +static void ____do_softirq(void) { - struct softirq_action *h; __u32 pending; - int max_restart = MAX_SOFTIRQ_RESTART; + + struct softirq_action *h; int cpu; - + pending = local_softirq_pending(); + account_system_vtime(current); - - __local_bh_disable((unsigned long)__builtin_return_address(0)); - lockdep_softirq_enter(); - + cpu = smp_processor_id(); -restart: - /* Reset the pending bitmask before enabling irqs */ - set_softirq_pending(0); + set_softirq_pending(0); + local_irq_enable(); - + h = softirq_vec; - + do { if (pending & 1) { int prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(h - softirq_vec); - + trace_softirq_entry(h, softirq_vec); h->action(h); trace_softirq_exit(h, softirq_vec); @@ -226,26 +231,70 @@ restart: h->action, prev_count, preempt_count()); preempt_count() = prev_count; } - + rcu_bh_qs(cpu); } h++; pending >>= 1; } while (pending); - + local_irq_disable(); +} + +static void ___do_softirq(void) +{ + __u32 pending; + + //struct softirq_action *h; + int max_restart = MAX_SOFTIRQ_RESTART; + //int cpu; + + pending = local_softirq_pending(); + +restart: + ____do_softirq(); pending = local_softirq_pending(); if (pending && --max_restart) goto restart; if (pending) + { wakeup_softirqd(); + } +} +asmlinkage void __do_softirq(void) +{ +#ifdef LITMUS_THREAD_ALL_SOFTIRQ + /* Skip straight to wakeup_softirqd() if we're using + LITMUS_THREAD_ALL_SOFTIRQ (unless there's really high prio-stuff waiting.). */ + struct task_struct *tsk = __get_cpu_var(ksoftirqd); + + if(tsk) + { + __u32 pending = local_softirq_pending(); + const __u32 high_prio_softirq = (1<func)) + { + u32 nvidia_device = get_tasklet_nv_device_num(t); + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", + // __FUNCTION__, nvidia_device,litmus_clock()); + + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidia_device, &flags); + + device_owner = get_nv_device_owner(nvidia_device); + + if(device_owner==NULL) + { + t->owner = NULL; + } + else + { + if(is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", + __FUNCTION__, nvidia_device,litmus_clock()); + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__,device_owner->pid,nvidia_device); + + t->owner = device_owner; + sched_trace_tasklet_release(t->owner); + if(likely(_litmus_tasklet_schedule(t,nvidia_device))) + { + unlock_nv_registry(nvidia_device, &flags); + return; + } + else + { + t->owner = NULL; /* fall through to normal scheduling */ + } + } + else + { + t->owner = NULL; + } + } + unlock_nv_registry(nvidia_device, &flags); + } +#endif + + ___tasklet_schedule(t); +} +EXPORT_SYMBOL(__tasklet_schedule); + + +void ___tasklet_schedule(struct tasklet_struct *t) { unsigned long flags; @@ -368,10 +473,64 @@ void __tasklet_schedule(struct tasklet_struct *t) raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_restore(flags); } +EXPORT_SYMBOL(___tasklet_schedule); -EXPORT_SYMBOL(__tasklet_schedule); void __tasklet_hi_schedule(struct tasklet_struct *t) +{ +#ifdef CONFIG_LITMUS_NVIDIA + if(is_nvidia_func(t->func)) + { + u32 nvidia_device = get_tasklet_nv_device_num(t); + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", + // __FUNCTION__, nvidia_device,litmus_clock()); + + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidia_device, &flags); + + device_owner = get_nv_device_owner(nvidia_device); + + if(device_owner==NULL) + { + t->owner = NULL; + } + else + { + if( is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n", + __FUNCTION__, nvidia_device,litmus_clock()); + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__,device_owner->pid,nvidia_device); + + t->owner = device_owner; + sched_trace_tasklet_release(t->owner); + if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device))) + { + unlock_nv_registry(nvidia_device, &flags); + return; + } + else + { + t->owner = NULL; /* fall through to normal scheduling */ + } + } + else + { + t->owner = NULL; + } + } + unlock_nv_registry(nvidia_device, &flags); + } +#endif + + ___tasklet_hi_schedule(t); +} +EXPORT_SYMBOL(__tasklet_hi_schedule); + +void ___tasklet_hi_schedule(struct tasklet_struct* t) { unsigned long flags; @@ -382,10 +541,64 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) raise_softirq_irqoff(HI_SOFTIRQ); local_irq_restore(flags); } - -EXPORT_SYMBOL(__tasklet_hi_schedule); +EXPORT_SYMBOL(___tasklet_hi_schedule); void __tasklet_hi_schedule_first(struct tasklet_struct *t) +{ + BUG_ON(!irqs_disabled()); +#ifdef CONFIG_LITMUS_NVIDIA + if(is_nvidia_func(t->func)) + { + u32 nvidia_device = get_tasklet_nv_device_num(t); + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", + // __FUNCTION__, nvidia_device,litmus_clock()); + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidia_device, &flags); + + device_owner = get_nv_device_owner(nvidia_device); + + if(device_owner==NULL) + { + t->owner = NULL; + } + else + { + if(is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", + __FUNCTION__, nvidia_device,litmus_clock()); + + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__,device_owner->pid,nvidia_device); + + t->owner = device_owner; + sched_trace_tasklet_release(t->owner); + if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device))) + { + unlock_nv_registry(nvidia_device, &flags); + return; + } + else + { + t->owner = NULL; /* fall through to normal scheduling */ + } + } + else + { + t->owner = NULL; + } + } + unlock_nv_registry(nvidia_device, &flags); + } +#endif + + ___tasklet_hi_schedule_first(t); +} +EXPORT_SYMBOL(__tasklet_hi_schedule_first); + +void ___tasklet_hi_schedule_first(struct tasklet_struct* t) { BUG_ON(!irqs_disabled()); @@ -393,8 +606,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) __get_cpu_var(tasklet_hi_vec).head = t; __raise_softirq_irqoff(HI_SOFTIRQ); } - -EXPORT_SYMBOL(__tasklet_hi_schedule_first); +EXPORT_SYMBOL(___tasklet_hi_schedule_first); static void tasklet_action(struct softirq_action *a) { @@ -450,6 +662,7 @@ static void tasklet_hi_action(struct softirq_action *a) if (!atomic_read(&t->count)) { if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); + t->func(t->data); tasklet_unlock(t); continue; @@ -473,8 +686,13 @@ void tasklet_init(struct tasklet_struct *t, t->next = NULL; t->state = 0; atomic_set(&t->count, 0); + t->func = func; t->data = data; + +#ifdef CONFIG_LITMUS_SOFTIRQD + t->owner = NULL; +#endif } EXPORT_SYMBOL(tasklet_init); @@ -489,6 +707,7 @@ void tasklet_kill(struct tasklet_struct *t) yield(); } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } + tasklet_unlock_wait(t); clear_bit(TASKLET_STATE_SCHED, &t->state); } @@ -694,6 +913,8 @@ void __init softirq_init(void) static int run_ksoftirqd(void * __bind_cpu) { + unsigned long flags; + set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { @@ -712,7 +933,11 @@ static int run_ksoftirqd(void * __bind_cpu) don't process */ if (cpu_is_offline((long)__bind_cpu)) goto wait_to_die; - do_softirq(); + + local_irq_save(flags); + ____do_softirq(); + local_irq_restore(flags); + preempt_enable_no_resched(); cond_resched(); preempt_disable(); @@ -760,6 +985,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { if (*i == t) { *i = t->next; + /* If this was the tail element, move the tail ptr */ if (*i == NULL) per_cpu(tasklet_vec, cpu).tail = i; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f77afd939229..8139208eaee1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -47,6 +47,13 @@ #include "workqueue_sched.h" +#ifdef CONFIG_LITMUS_NVIDIA +#include +#include +#include +#endif + + enum { /* global_cwq flags */ GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ @@ -1010,9 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, work_flags |= WORK_STRUCT_DELAYED; worklist = &cwq->delayed_works; } - insert_work(cwq, work, worklist, work_flags); - spin_unlock_irqrestore(&gcwq->lock, flags); } @@ -2526,10 +2531,69 @@ EXPORT_SYMBOL(cancel_delayed_work_sync); */ int schedule_work(struct work_struct *work) { - return queue_work(system_wq, work); +#ifdef CONFIG_LITMUS_NVIDIA + if(is_nvidia_func(work->func)) + { + u32 nvidiaDevice = get_work_nv_device_num(work); + + //1) Ask Litmus which task owns GPU . (API to be defined.) + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidiaDevice, &flags); + + device_owner = get_nv_device_owner(nvidiaDevice); + + //2) If there is an owner, set work->owner to the owner's task struct. + if(device_owner==NULL) + { + work->owner = NULL; + //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice); + } + else + { + if( is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n", + __FUNCTION__, nvidiaDevice,litmus_clock()); + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__, + device_owner->pid, + nvidiaDevice); + + //3) Call litmus_schedule_work() and return (don't execute the rest + // of schedule_schedule()). + work->owner = device_owner; + sched_trace_work_release(work->owner); + if(likely(litmus_schedule_work(work, nvidiaDevice))) + { + unlock_nv_registry(nvidiaDevice, &flags); + return 1; + } + else + { + work->owner = NULL; /* fall through to normal work scheduling */ + } + } + else + { + work->owner = NULL; + } + } + unlock_nv_registry(nvidiaDevice, &flags); + } +#endif + + return(__schedule_work(work)); } EXPORT_SYMBOL(schedule_work); +int __schedule_work(struct work_struct* work) +{ + return queue_work(system_wq, work); +} +EXPORT_SYMBOL(__schedule_work); + /* * schedule_work_on - put work task on a specific cpu * @cpu: cpu to put the work task on diff --git a/litmus/Kconfig b/litmus/Kconfig index ad8dc8308cf0..7e865d4dd703 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -62,6 +62,25 @@ config LITMUS_LOCKING endmenu +menu "Performance Enhancements" + +config SCHED_CPU_AFFINITY + bool "Local Migration Affinity" + default y + help + Rescheduled tasks prefer CPUs near to their previously used CPU. This + may improve performance through possible preservation of cache affinity. + + Warning: May make bugs ahrder to find since tasks may migrate less often. + + NOTES: + * Pfair/PD^2 does not support this option. + * Only x86 currently supported. + + Say Yes if unsure. + +endmenu + menu "Tracing" config FEATHER_TRACE @@ -182,4 +201,74 @@ config SCHED_DEBUG_TRACE_CALLER endmenu +menu "Interrupt Handling" + +config LITMUS_THREAD_ALL_SOFTIRQ + bool "Process all softirqs in ksoftirqd threads." + default n + help + (Experimental) Thread all softirqs to ksoftirqd + daemon threads, similar to PREEMPT_RT. I/O + throughput will will drop with this enabled, but + latencies due to interrupts will be reduced. + + WARNING: Timer responsiveness will likely be + decreased as timer callbacks are also threaded. + This is unlike PREEEMPT_RTs hardirqs. + + If unsure, say No. + +config LITMUS_SOFTIRQD + bool "Spawn klitirqd interrupt handling threads." + depends on LITMUS_LOCKING + default n + help + Create klitirqd interrupt handling threads. Work must be + specifically dispatched to these workers. (Softirqs for + Litmus tasks are not magically redirected to klitirqd.) + + G-EDF ONLY for now! + + If unsure, say No. + +config NR_LITMUS_SOFTIRQD + int "Number of klitirqd." + depends on LITMUS_SOFTIRQD + range 1 4096 + default "1" + help + Should be <= to the number of CPUs in your system. + +config LITMUS_NVIDIA + bool "Litmus handling of NVIDIA interrupts." + depends on LITMUS_SOFTIRQD + default n + help + Direct tasklets from NVIDIA devices to Litmus's klitirqd. + + If unsure, say No. + +choice + prompt "CUDA/Driver Version Support" + default CUDA_4_0 + depends on LITMUS_NVIDIA + help + Select the version of CUDA/driver to support. + +config CUDA_4_0 + bool "CUDA 4.0" + depends on LITMUS_NVIDIA + help + Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40) + +config CUDA_3_2 + bool "CUDA 3.2" + depends on LITMUS_NVIDIA + help + Support CUDA 3.2 (dev. driver version: x86_64-260.24) + +endchoice + +endmenu + endmenu diff --git a/litmus/Makefile b/litmus/Makefile index ad9936e07b83..892e01c2e1b3 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -21,8 +21,12 @@ obj-y = sched_plugin.o litmus.o \ obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o +obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o + +obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o +obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o diff --git a/litmus/affinity.c b/litmus/affinity.c new file mode 100644 index 000000000000..3b430d18885b --- /dev/null +++ b/litmus/affinity.c @@ -0,0 +1,49 @@ +#include + +#include + +struct neighborhood neigh_info[NR_CPUS]; + +/* called by _init_litmus() */ +void init_topology(void) +{ + int cpu; + int i; + int chk; + int depth = num_cache_leaves; + + if(depth > NUM_CACHE_LEVELS) + depth = NUM_CACHE_LEVELS; + + for_each_online_cpu(cpu) + { + for(i = 0; i < depth; ++i) + { + long unsigned int firstbits; + + chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i); + if(chk) /* failed */ + { + neigh_info[cpu].size[i] = 0; + } + else + { + /* size = num bits in mask */ + neigh_info[cpu].size[i] = cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); + } + firstbits = *neigh_info[cpu].neighbors[i]->bits; + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", + cpu, neigh_info[cpu].size[i], i, firstbits); + } + + /* set data for non-existent levels */ + for(; i < NUM_CACHE_LEVELS; ++i) + { + neigh_info[cpu].size[i] = 0; + + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", + cpu, neigh_info[cpu].size[i], i, 0lu); + } + } +} + diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 9b44dc2d8d1e..fbd67ab5f467 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -65,6 +65,12 @@ int edf_higher_prio(struct task_struct* first, return !is_realtime(second_task) || + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy threads always lose w/o inheritance. */ + (first_task->rt_param.is_proxy_thread < + second_task->rt_param.is_proxy_thread) || +#endif /* is the deadline of the first task earlier? * Then it has higher priority. diff --git a/litmus/fdso.c b/litmus/fdso.c index aa7b384264e3..2b7f9ba85857 100644 --- a/litmus/fdso.c +++ b/litmus/fdso.c @@ -22,6 +22,7 @@ extern struct fdso_ops generic_lock_ops; static const struct fdso_ops* fdso_ops[] = { &generic_lock_ops, /* FMLP_SEM */ + &generic_lock_ops, /* KFMLP_SEM */ &generic_lock_ops, /* SRP_SEM */ }; diff --git a/litmus/litmus.c b/litmus/litmus.c index 26938acacafc..29363c6ad565 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -17,6 +17,14 @@ #include #include +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + /* Number of RT tasks that exist in the system */ atomic_t rt_task_count = ATOMIC_INIT(0); static DEFINE_RAW_SPINLOCK(task_transition_lock); @@ -47,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn) struct release_heap* release_heap_alloc(int gfp_flags); void release_heap_free(struct release_heap* rh); +#ifdef CONFIG_LITMUS_NVIDIA +/* + * sys_register_nv_device + * @nv_device_id: The Nvidia device id that the task want to register + * @reg_action: set to '1' to register the specified device. zero otherwise. + * Syscall for register task's designated nvidia device into NV_DEVICE_REG array + * Returns EFAULT if nv_device_id is out of range. + * 0 if success + */ +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) +{ + /* register the device to caller (aka 'current') */ + return(reg_nv_device(nv_device_id, reg_action)); +} +#else +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) +{ + return(-EINVAL); +} +#endif + + /* * sys_set_task_rt_param * @pid: Pid of the task which scheduling parameters must be changed @@ -115,7 +145,7 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) tp.cls != RT_CLASS_BEST_EFFORT) { printk(KERN_INFO "litmus: real-time task %d rejected " - "because its class is invalid\n"); + "because its class is invalid\n", pid); goto out_unlock; } if (tp.budget_policy != NO_ENFORCEMENT && @@ -131,6 +161,22 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) target->rt_param.task_params = tp; +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy thread off by default */ + target->rt_param.is_proxy_thread = 0; + target->rt_param.cur_klitirqd = NULL; + //init_MUTEX(&target->rt_param.klitirqd_sem); + mutex_init(&target->rt_param.klitirqd_sem); + //init_completion(&target->rt_param.klitirqd_sem); + //target->rt_param.klitirqd_sem_stat = NOT_HELD; + atomic_set(&target->rt_param.klitirqd_sem_stat, NOT_HELD); +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&target->rt_param.nv_int_count, 0); +#endif + + retval = 0; out_unlock: read_unlock_irq(&tasklist_lock); @@ -265,6 +311,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job) return retval; } + /* sys_null_call() is only used for determining raw system call * overheads (kernel entry, kernel exit). It has no useful side effects. * If ts is non-NULL, then the current Feather-Trace time is recorded. @@ -278,7 +325,7 @@ asmlinkage long sys_null_call(cycles_t __user *ts) now = get_cycles(); ret = put_user(now, ts); } - + return ret; } @@ -299,6 +346,20 @@ static void reinit_litmus_state(struct task_struct* p, int restore) * at this point in time. */ WARN_ON(p->rt_param.inh_task); + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* We probably should not have any tasklets executing for + * us at this time. + */ + WARN_ON(p->rt_param.cur_klitirqd); + WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD); + + if(p->rt_param.cur_klitirqd) + flush_pending(p->rt_param.cur_klitirqd, p); + + if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD) + up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem); +#endif /* Cleanup everything else. */ memset(&p->rt_param, 0, sizeof(p->rt_param)); @@ -399,7 +460,7 @@ static void synch_on_plugin_switch(void* info) */ int switch_sched_plugin(struct sched_plugin* plugin) { - unsigned long flags; + //unsigned long flags; int ret = 0; BUG_ON(!plugin); @@ -413,8 +474,15 @@ int switch_sched_plugin(struct sched_plugin* plugin) while (atomic_read(&cannot_use_plugin) < num_online_cpus()) cpu_relax(); +#ifdef CONFIG_LITMUS_SOFTIRQD + if(!klitirqd_is_dead()) + { + kill_klitirqd(); + } +#endif + /* stop task transitions */ - raw_spin_lock_irqsave(&task_transition_lock, flags); + //raw_spin_lock_irqsave(&task_transition_lock, flags); /* don't switch if there are active real-time tasks */ if (atomic_read(&rt_task_count) == 0) { @@ -432,7 +500,7 @@ int switch_sched_plugin(struct sched_plugin* plugin) } else ret = -EBUSY; out: - raw_spin_unlock_irqrestore(&task_transition_lock, flags); + //raw_spin_unlock_irqrestore(&task_transition_lock, flags); atomic_set(&cannot_use_plugin, 0); return ret; } @@ -540,6 +608,10 @@ static int __init _init_litmus(void) init_litmus_proc(); +#ifdef CONFIG_SCHED_CPU_AFFINITY + init_topology(); +#endif + return 0; } diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c index 4bf725a36c9c..381513366c7a 100644 --- a/litmus/litmus_proc.c +++ b/litmus/litmus_proc.c @@ -19,12 +19,19 @@ static struct proc_dir_entry *litmus_dir = NULL, *plugs_dir = NULL, #ifdef CONFIG_RELEASE_MASTER *release_master_file = NULL, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + *klitirqd_file = NULL, #endif *plugs_file = NULL; /* in litmus/sync.c */ int count_tasks_waiting_for_release(void); +extern int proc_read_klitirqd_stats(char *page, char **start, + off_t off, int count, + int *eof, void *data); + static int proc_read_stats(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -161,6 +168,12 @@ int __init init_litmus_proc(void) release_master_file->write_proc = proc_write_release_master; #endif +#ifdef CONFIG_LITMUS_SOFTIRQD + klitirqd_file = + create_proc_read_entry("klitirqd_stats", 0444, litmus_dir, + proc_read_klitirqd_stats, NULL); +#endif + stat_file = create_proc_read_entry("stats", 0444, litmus_dir, proc_read_stats, NULL); @@ -187,6 +200,10 @@ void exit_litmus_proc(void) remove_proc_entry("stats", litmus_dir); if (curr_file) remove_proc_entry("active_plugin", litmus_dir); +#ifdef CONFIG_LITMUS_SOFTIRQD + if (klitirqd_file) + remove_proc_entry("klitirqd_stats", litmus_dir); +#endif #ifdef CONFIG_RELEASE_MASTER if (release_master_file) remove_proc_entry("release_master", litmus_dir); diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c new file mode 100644 index 000000000000..271e770dbaea --- /dev/null +++ b/litmus/litmus_softirq.c @@ -0,0 +1,1579 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +/* TODO: Remove unneeded mb() and other barriers. */ + + +/* counts number of daemons ready to handle litmus irqs. */ +static atomic_t num_ready_klitirqds = ATOMIC_INIT(0); + +enum pending_flags +{ + LIT_TASKLET_LOW = 0x1, + LIT_TASKLET_HI = LIT_TASKLET_LOW<<1, + LIT_WORK = LIT_TASKLET_HI<<1 +}; + +/* only support tasklet processing for now. */ +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; + +struct klitirqd_info +{ + struct task_struct* klitirqd; + struct task_struct* current_owner; + int terminating; + + + raw_spinlock_t lock; + + u32 pending; + atomic_t num_hi_pending; + atomic_t num_low_pending; + atomic_t num_work_pending; + + /* in order of priority */ + struct tasklet_head pending_tasklets_hi; + struct tasklet_head pending_tasklets; + struct list_head worklist; +}; + +/* one list for each klitirqd */ +static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD]; + + + + + +int proc_read_klitirqd_stats(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + int len = snprintf(page, PAGE_SIZE, + "num ready klitirqds: %d\n\n", + atomic_read(&num_ready_klitirqds)); + + if(klitirqd_is_ready()) + { + int i; + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + len += + snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */ + "klitirqd_th%d: %s/%d\n" + "\tcurrent_owner: %s/%d\n" + "\tpending: %x\n" + "\tnum hi: %d\n" + "\tnum low: %d\n" + "\tnum work: %d\n\n", + i, + klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid, + (klitirqds[i].current_owner != NULL) ? + klitirqds[i].current_owner->comm : "(null)", + (klitirqds[i].current_owner != NULL) ? + klitirqds[i].current_owner->pid : 0, + klitirqds[i].pending, + atomic_read(&klitirqds[i].num_hi_pending), + atomic_read(&klitirqds[i].num_low_pending), + atomic_read(&klitirqds[i].num_work_pending)); + } + } + + return(len); +} + + + + + +#if 0 +static atomic_t dump_id = ATOMIC_INIT(0); + +static void __dump_state(struct klitirqd_info* which, const char* caller) +{ + struct tasklet_struct* list; + + int id = atomic_inc_return(&dump_id); + + //if(in_interrupt()) + { + if(which->current_owner) + { + TRACE("(id: %d caller: %s)\n" + "klitirqd: %s/%d\n" + "current owner: %s/%d\n" + "pending: %x\n", + id, caller, + which->klitirqd->comm, which->klitirqd->pid, + which->current_owner->comm, which->current_owner->pid, + which->pending); + } + else + { + TRACE("(id: %d caller: %s)\n" + "klitirqd: %s/%d\n" + "current owner: %p\n" + "pending: %x\n", + id, caller, + which->klitirqd->comm, which->klitirqd->pid, + NULL, + which->pending); + } + + list = which->pending_tasklets.head; + while(list) + { + struct tasklet_struct *t = list; + list = list->next; /* advance */ + if(t->owner) + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid); + else + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL); + } + } +} + +static void dump_state(struct klitirqd_info* which, const char* caller) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&which->lock, flags); + __dump_state(which, caller); + raw_spin_unlock_irqrestore(&which->lock, flags); +} +#endif + + +/* forward declarations */ +static void ___litmus_tasklet_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup); +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup); +static void ___litmus_schedule_work(struct work_struct *w, + struct klitirqd_info *which, + int wakeup); + + + +inline unsigned int klitirqd_id(struct task_struct* tsk) +{ + int i; + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(klitirqds[i].klitirqd == tsk) + { + return i; + } + } + + BUG(); + + return 0; +} + + +inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_TASKLET_HI); +} + +inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_TASKLET_LOW); +} + +inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_WORK); +} + +inline static u32 litirq_pending_irqoff(struct klitirqd_info* which) +{ + return(which->pending); +} + + +inline static u32 litirq_pending(struct klitirqd_info* which) +{ + unsigned long flags; + u32 pending; + + raw_spin_lock_irqsave(&which->lock, flags); + pending = litirq_pending_irqoff(which); + raw_spin_unlock_irqrestore(&which->lock, flags); + + return pending; +}; + +inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner) +{ + unsigned long flags; + u32 pending; + + raw_spin_lock_irqsave(&which->lock, flags); + pending = litirq_pending_irqoff(which); + if(pending) + { + if(which->current_owner != owner) + { + pending = 0; // owner switch! + } + } + raw_spin_unlock_irqrestore(&which->lock, flags); + + return pending; +} + + +inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which, + struct mutex** sem, + struct task_struct** t) +{ + unsigned long flags; + u32 pending; + + /* init values */ + *sem = NULL; + *t = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + pending = litirq_pending_irqoff(which); + if(pending) + { + if(which->current_owner != NULL) + { + *t = which->current_owner; + *sem = &tsk_rt(which->current_owner)->klitirqd_sem; + } + else + { + BUG(); + } + } + raw_spin_unlock_irqrestore(&which->lock, flags); + + if(likely(*sem)) + { + return pending; + } + else + { + return 0; + } +} + +/* returns true if the next piece of work to do is from a different owner. + */ +static int tasklet_ownership_change( + struct klitirqd_info* which, + enum pending_flags taskletQ) +{ + /* this function doesn't have to look at work objects since they have + priority below tasklets. */ + + unsigned long flags; + int ret = 0; + + raw_spin_lock_irqsave(&which->lock, flags); + + switch(taskletQ) + { + case LIT_TASKLET_HI: + if(litirq_pending_hi_irqoff(which)) + { + ret = (which->pending_tasklets_hi.head->owner != + which->current_owner); + } + break; + case LIT_TASKLET_LOW: + if(litirq_pending_low_irqoff(which)) + { + ret = (which->pending_tasklets.head->owner != + which->current_owner); + } + break; + default: + break; + } + + raw_spin_unlock_irqrestore(&which->lock, flags); + + TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret); + + return ret; +} + + +static void __reeval_prio(struct klitirqd_info* which) +{ + struct task_struct* next_owner = NULL; + struct task_struct* klitirqd = which->klitirqd; + + /* Check in prio-order */ + u32 pending = litirq_pending_irqoff(which); + + //__dump_state(which, "__reeval_prio: before"); + + if(pending) + { + if(pending & LIT_TASKLET_HI) + { + next_owner = which->pending_tasklets_hi.head->owner; + } + else if(pending & LIT_TASKLET_LOW) + { + next_owner = which->pending_tasklets.head->owner; + } + else if(pending & LIT_WORK) + { + struct work_struct* work = + list_first_entry(&which->worklist, struct work_struct, entry); + next_owner = work->owner; + } + } + + if(next_owner != which->current_owner) + { + struct task_struct* old_owner = which->current_owner; + + /* bind the next owner. */ + which->current_owner = next_owner; + mb(); + + if(next_owner != NULL) + { + if(!in_interrupt()) + { + TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + next_owner->comm, next_owner->pid); + } + else + { + TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + next_owner->comm, next_owner->pid); + } + + litmus->set_prio_inh_klitirqd(klitirqd, old_owner, next_owner); + } + else + { + if(likely(!in_interrupt())) + { + TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n", + __FUNCTION__, klitirqd->comm, klitirqd->pid); + } + else + { + // is this a bug? + TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n", + __FUNCTION__, klitirqd->comm, klitirqd->pid); + } + + BUG_ON(pending != 0); + litmus->clear_prio_inh_klitirqd(klitirqd, old_owner); + } + } + + //__dump_state(which, "__reeval_prio: after"); +} + +static void reeval_prio(struct klitirqd_info* which) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&which->lock, flags); + __reeval_prio(which); + raw_spin_unlock_irqrestore(&which->lock, flags); +} + + +static void wakeup_litirqd_locked(struct klitirqd_info* which) +{ + /* Interrupts are disabled: no need to stop preemption */ + if (which && which->klitirqd) + { + __reeval_prio(which); /* configure the proper priority */ + + if(which->klitirqd->state != TASK_RUNNING) + { + TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__, + which->klitirqd->comm, which->klitirqd->pid); + + wake_up_process(which->klitirqd); + } + } +} + + +static void do_lit_tasklet(struct klitirqd_info* which, + struct tasklet_head* pending_tasklets) +{ + unsigned long flags; + struct tasklet_struct *list; + atomic_t* count; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "do_lit_tasklet: before steal"); + + /* copy out the tasklets for our private use. */ + list = pending_tasklets->head; + pending_tasklets->head = NULL; + pending_tasklets->tail = &pending_tasklets->head; + + /* remove pending flag */ + which->pending &= (pending_tasklets == &which->pending_tasklets) ? + ~LIT_TASKLET_LOW : + ~LIT_TASKLET_HI; + + count = (pending_tasklets == &which->pending_tasklets) ? + &which->num_low_pending: + &which->num_hi_pending; + + //__dump_state(which, "do_lit_tasklet: after steal"); + + raw_spin_unlock_irqrestore(&which->lock, flags); + + + while(list) + { + struct tasklet_struct *t = list; + + /* advance, lest we forget */ + list = list->next; + + /* execute tasklet if it has my priority and is free */ + if ((t->owner == which->current_owner) && tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + { + BUG(); + } + TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__); + t->func(t->data); + tasklet_unlock(t); + + atomic_dec(count); + + continue; /* process more tasklets */ + } + tasklet_unlock(t); + } + + TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__); + + /* couldn't process tasklet. put it back at the end of the queue. */ + if(pending_tasklets == &which->pending_tasklets) + ___litmus_tasklet_schedule(t, which, 0); + else + ___litmus_tasklet_hi_schedule(t, which, 0); + } +} + + +// returns 1 if priorities need to be changed to continue processing +// pending tasklets. +static int do_litirq(struct klitirqd_info* which) +{ + u32 pending; + int resched = 0; + + if(in_interrupt()) + { + TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__); + return(0); + } + + if(which->klitirqd != current) + { + TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n", + __FUNCTION__, current->comm, current->pid, + which->klitirqd->comm, which->klitirqd->pid); + return(0); + } + + if(!is_realtime(current)) + { + TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n", + __FUNCTION__, current->policy); + return(0); + } + + + /* We only handle tasklets & work objects, no need for RCU triggers? */ + + pending = litirq_pending(which); + if(pending) + { + /* extract the work to do and do it! */ + if(pending & LIT_TASKLET_HI) + { + TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__); + do_lit_tasklet(which, &which->pending_tasklets_hi); + resched = tasklet_ownership_change(which, LIT_TASKLET_HI); + + if(resched) + { + TRACE_CUR("%s: HI tasklets of another owner remain. " + "Skipping any LOW tasklets.\n", __FUNCTION__); + } + } + + if(!resched && (pending & LIT_TASKLET_LOW)) + { + TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__); + do_lit_tasklet(which, &which->pending_tasklets); + resched = tasklet_ownership_change(which, LIT_TASKLET_LOW); + + if(resched) + { + TRACE_CUR("%s: LOW tasklets of another owner remain. " + "Skipping any work objects.\n", __FUNCTION__); + } + } + } + + return(resched); +} + + +static void do_work(struct klitirqd_info* which) +{ + unsigned long flags; + work_func_t f; + struct work_struct* work; + + // only execute one work-queue item to yield to tasklets. + // ...is this a good idea, or should we just batch them? + raw_spin_lock_irqsave(&which->lock, flags); + + if(!litirq_pending_work_irqoff(which)) + { + raw_spin_unlock_irqrestore(&which->lock, flags); + goto no_work; + } + + work = list_first_entry(&which->worklist, struct work_struct, entry); + list_del_init(&work->entry); + + if(list_empty(&which->worklist)) + { + which->pending &= ~LIT_WORK; + } + + raw_spin_unlock_irqrestore(&which->lock, flags); + + + + /* safe to read current_owner outside of lock since only this thread + may write to the pointer. */ + if(work->owner == which->current_owner) + { + TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__); + // do the work! + work_clear_pending(work); + f = work->func; + f(work); /* can't touch 'work' after this point, + the user may have freed it. */ + + atomic_dec(&which->num_work_pending); + } + else + { + TRACE_CUR("%s: Could not invoke work object. Requeuing.\n", + __FUNCTION__); + ___litmus_schedule_work(work, which, 0); + } + +no_work: + return; +} + + +static int set_litmus_daemon_sched(void) +{ + /* set up a daemon job that will never complete. + it should only ever run on behalf of another + real-time task. + + TODO: Transition to a new job whenever a + new tasklet is handled */ + + int ret = 0; + + struct rt_task tp = { + .exec_cost = 0, + .period = 1000000000, /* dummy 1 second period */ + .phase = 0, + .cpu = task_cpu(current), + .budget_policy = NO_ENFORCEMENT, + .cls = RT_CLASS_BEST_EFFORT + }; + + struct sched_param param = { .sched_priority = 0}; + + + /* set task params, mark as proxy thread, and init other data */ + tsk_rt(current)->task_params = tp; + tsk_rt(current)->is_proxy_thread = 1; + tsk_rt(current)->cur_klitirqd = NULL; + //init_MUTEX(&tsk_rt(current)->klitirqd_sem); + mutex_init(&tsk_rt(current)->klitirqd_sem); + //init_completion(&tsk_rt(current)->klitirqd_sem); + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD); + + /* inform the OS we're SCHED_LITMUS -- + sched_setscheduler_nocheck() calls litmus_admit_task(). */ + sched_setscheduler_nocheck(current, SCHED_LITMUS, ¶m); + + return ret; +} + +static void enter_execution_phase(struct klitirqd_info* which, + struct mutex* sem, + struct task_struct* t) +{ + TRACE_CUR("%s: Trying to enter execution phase. " + "Acquiring semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + down_and_set_stat(current, HELD, sem); + TRACE_CUR("%s: Execution phase entered! " + "Acquired semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); +} + +static void exit_execution_phase(struct klitirqd_info* which, + struct mutex* sem, + struct task_struct* t) +{ + TRACE_CUR("%s: Exiting execution phase. " + "Releasing semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD) + { + up_and_set_stat(current, NOT_HELD, sem); + TRACE_CUR("%s: Execution phase exited! " + "Released semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + } + else + { + TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__); + } +} + +/* main loop for klitsoftirqd */ +static int run_klitirqd(void* unused) +{ + struct klitirqd_info* which = &klitirqds[klitirqd_id(current)]; + struct mutex* sem; + struct task_struct* owner; + + int rt_status = set_litmus_daemon_sched(); + + if(rt_status != 0) + { + TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__); + goto rt_failed; + } + + atomic_inc(&num_ready_klitirqds); + + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) + { + preempt_disable(); + if (!litirq_pending(which)) + { + /* sleep for work */ + TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n", + __FUNCTION__); + preempt_enable_no_resched(); + schedule(); + + if(kthread_should_stop()) /* bail out */ + { + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); + continue; + } + + preempt_disable(); + } + + __set_current_state(TASK_RUNNING); + + while (litirq_pending_and_sem_and_owner(which, &sem, &owner)) + { + int needs_resched = 0; + + preempt_enable_no_resched(); + + BUG_ON(sem == NULL); + + // wait to enter execution phase; wait for 'current_owner' to block. + enter_execution_phase(which, sem, owner); + + if(kthread_should_stop()) + { + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); + break; + } + + preempt_disable(); + + /* Double check that there's still pending work and the owner hasn't + * changed. Pending items may have been flushed while we were sleeping. + */ + if(litirq_pending_with_owner(which, owner)) + { + TRACE_CUR("%s: Executing tasklets and/or work objects.\n", + __FUNCTION__); + + needs_resched = do_litirq(which); + + preempt_enable_no_resched(); + + // work objects are preemptible. + if(!needs_resched) + { + do_work(which); + } + + // exit execution phase. + exit_execution_phase(which, sem, owner); + + TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__); + reeval_prio(which); /* check if we need to change priority here */ + } + else + { + TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n", + __FUNCTION__, + owner->comm, owner->pid); + preempt_enable_no_resched(); + + // exit execution phase. + exit_execution_phase(which, sem, owner); + } + + cond_resched(); + preempt_disable(); + } + preempt_enable(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + atomic_dec(&num_ready_klitirqds); + +rt_failed: + litmus_exit_task(current); + + return rt_status; +} + + +struct klitirqd_launch_data +{ + int* cpu_affinity; + struct work_struct work; +}; + +/* executed by a kworker from workqueues */ +static void launch_klitirqd(struct work_struct *work) +{ + int i; + + struct klitirqd_launch_data* launch_data = + container_of(work, struct klitirqd_launch_data, work); + + TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + /* create the daemon threads */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(launch_data->cpu_affinity) + { + klitirqds[i].klitirqd = + kthread_create( + run_klitirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)(long long)launch_data->cpu_affinity[i], + "klitirqd_th%d/%d", + i, + launch_data->cpu_affinity[i]); + + /* litmus will put is in the right cluster. */ + kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]); + } + else + { + klitirqds[i].klitirqd = + kthread_create( + run_klitirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)(long long)(-1), + "klitirqd_th%d", + i); + } + } + + TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + /* unleash the daemons */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + wake_up_process(klitirqds[i].klitirqd); + } + + if(launch_data->cpu_affinity) + kfree(launch_data->cpu_affinity); + kfree(launch_data); +} + + +void spawn_klitirqd(int* affinity) +{ + int i; + struct klitirqd_launch_data* delayed_launch; + + if(atomic_read(&num_ready_klitirqds) != 0) + { + TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n"); + return; + } + + /* init the tasklet & work queues */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + klitirqds[i].terminating = 0; + klitirqds[i].pending = 0; + + klitirqds[i].num_hi_pending.counter = 0; + klitirqds[i].num_low_pending.counter = 0; + klitirqds[i].num_work_pending.counter = 0; + + klitirqds[i].pending_tasklets_hi.head = NULL; + klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head; + + klitirqds[i].pending_tasklets.head = NULL; + klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head; + + INIT_LIST_HEAD(&klitirqds[i].worklist); + + raw_spin_lock_init(&klitirqds[i].lock); + } + + /* wait to flush the initializations to memory since other threads + will access it. */ + mb(); + + /* tell a work queue to launch the threads. we can't make scheduling + calls since we're in an atomic state. */ + TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__); + delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC); + if(affinity) + { + delayed_launch->cpu_affinity = + kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC); + + memcpy(delayed_launch->cpu_affinity, affinity, + sizeof(int)*NR_LITMUS_SOFTIRQD); + } + else + { + delayed_launch->cpu_affinity = NULL; + } + INIT_WORK(&delayed_launch->work, launch_klitirqd); + schedule_work(&delayed_launch->work); +} + + +void kill_klitirqd(void) +{ + if(!klitirqd_is_dead()) + { + int i; + + TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(klitirqds[i].terminating != 1) + { + klitirqds[i].terminating = 1; + mb(); /* just to be sure? */ + flush_pending(klitirqds[i].klitirqd, NULL); + + /* signal termination */ + kthread_stop(klitirqds[i].klitirqd); + } + } + } +} + + +int klitirqd_is_ready(void) +{ + return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD); +} + +int klitirqd_is_dead(void) +{ + return(atomic_read(&num_ready_klitirqds) == 0); +} + + +struct task_struct* get_klitirqd(unsigned int k_id) +{ + return(klitirqds[k_id].klitirqd); +} + + +void flush_pending(struct task_struct* klitirqd_thread, + struct task_struct* owner) +{ + unsigned int k_id = klitirqd_id(klitirqd_thread); + struct klitirqd_info *which = &klitirqds[k_id]; + + unsigned long flags; + struct tasklet_struct *list; + + u32 work_flushed = 0; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "flush_pending: before"); + + // flush hi tasklets. + if(litirq_pending_hi_irqoff(which)) + { + which->pending &= ~LIT_TASKLET_HI; + + list = which->pending_tasklets_hi.head; + which->pending_tasklets_hi.head = NULL; + which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head; + + TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__); + + while(list) + { + struct tasklet_struct *t = list; + list = list->next; + + if(likely((t->owner == owner) || (owner == NULL))) + { + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) + { + BUG(); + } + + work_flushed |= LIT_TASKLET_HI; + + t->owner = NULL; + + // WTF? + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_hi_pending); + ___tasklet_hi_schedule(t); + } + else + { + TRACE("%s: dropped hi tasklet??\n", __FUNCTION__); + BUG(); + } + } + else + { + TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__); + // put back on queue. + ___litmus_tasklet_hi_schedule(t, which, 0); + } + } + } + + // flush low tasklets. + if(litirq_pending_low_irqoff(which)) + { + which->pending &= ~LIT_TASKLET_LOW; + + list = which->pending_tasklets.head; + which->pending_tasklets.head = NULL; + which->pending_tasklets.tail = &which->pending_tasklets.head; + + TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__); + + while(list) + { + struct tasklet_struct *t = list; + list = list->next; + + if(likely((t->owner == owner) || (owner == NULL))) + { + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) + { + BUG(); + } + + work_flushed |= LIT_TASKLET_LOW; + + t->owner = NULL; + sched_trace_tasklet_end(owner, 1ul); + + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_low_pending); + ___tasklet_schedule(t); + } + else + { + TRACE("%s: dropped tasklet??\n", __FUNCTION__); + BUG(); + } + } + else + { + TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__); + // put back on queue + ___litmus_tasklet_schedule(t, which, 0); + } + } + } + + // flush work objects + if(litirq_pending_work_irqoff(which)) + { + which->pending &= ~LIT_WORK; + + TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__); + + while(!list_empty(&which->worklist)) + { + struct work_struct* work = + list_first_entry(&which->worklist, struct work_struct, entry); + list_del_init(&work->entry); + + if(likely((work->owner == owner) || (owner == NULL))) + { + work_flushed |= LIT_WORK; + atomic_dec(&which->num_work_pending); + + work->owner = NULL; + sched_trace_work_end(owner, current, 1ul); + __schedule_work(work); + } + else + { + TRACE("%s: Could not flush a work object.\n", __FUNCTION__); + // put back on queue + ___litmus_schedule_work(work, which, 0); + } + } + } + + //__dump_state(which, "flush_pending: after (before reeval prio)"); + + + mb(); /* commit changes to pending flags */ + + /* reset the scheduling priority */ + if(work_flushed) + { + __reeval_prio(which); + + /* Try to offload flushed tasklets to Linux's ksoftirqd. */ + if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI)) + { + wakeup_softirqd(); + } + } + else + { + TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__); + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + + + + +static void ___litmus_tasklet_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + t->next = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "___litmus_tasklet_schedule: before queuing"); + + *(which->pending_tasklets.tail) = t; + which->pending_tasklets.tail = &t->next; + + old_pending = which->pending; + which->pending |= LIT_TASKLET_LOW; + + atomic_inc(&which->num_low_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wake up the klitirqd */ + } + + //__dump_state(which, "___litmus_tasklet_schedule: after queuing"); + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + /* Can't accept tasklets while we're processing a workqueue + because they're handled by the same thread. This case is + very RARE. + + TODO: Use a separate thread for work objects!!!!!! + */ + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; + ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1); + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_schedule); + + +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + t->next = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + *(which->pending_tasklets_hi.tail) = t; + which->pending_tasklets_hi.tail = &t->next; + + old_pending = which->pending; + which->pending |= LIT_TASKLET_HI; + + atomic_inc(&which->num_hi_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wake up the klitirqd */ + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; + ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1); + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); + + +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + u32 old_pending; + + BUG_ON(!irqs_disabled()); + + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + raw_spin_lock(&klitirqds[k_id].lock); + + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; // success! + + t->next = klitirqds[k_id].pending_tasklets_hi.head; + klitirqds[k_id].pending_tasklets_hi.head = t; + + old_pending = klitirqds[k_id].pending; + klitirqds[k_id].pending |= LIT_TASKLET_HI; + + atomic_inc(&klitirqds[k_id].num_hi_pending); + + mb(); + + if(!old_pending) + wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */ + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + + raw_spin_unlock(&klitirqds[k_id].lock); + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); + + + +static void ___litmus_schedule_work(struct work_struct *w, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + raw_spin_lock_irqsave(&which->lock, flags); + + work_pending(w); + list_add_tail(&w->entry, &which->worklist); + + old_pending = which->pending; + which->pending |= LIT_WORK; + + atomic_inc(&which->num_work_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wakeup the klitirqd */ + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) +{ + int ret = 1; /* assume success */ + if(unlikely(w->owner == NULL) || !is_realtime(w->owner)) + { + TRACE("%s: No owner associated with this work object!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%u!\n", k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + ___litmus_schedule_work(w, &klitirqds[k_id], 1); + else + ret = 0; + return(ret); +} +EXPORT_SYMBOL(__litmus_schedule_work); + + +static int set_klitirqd_sem_status(unsigned long stat) +{ + TRACE_CUR("SETTING STATUS FROM %d TO %d\n", + atomic_read(&tsk_rt(current)->klitirqd_sem_stat), + stat); + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat); + //mb(); + + return(0); +} + +static int set_klitirqd_sem_status_if_not_held(unsigned long stat) +{ + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD) + { + return(set_klitirqd_sem_status(stat)); + } + return(-1); +} + + +void __down_and_reset_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_reset, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif + + mutex_lock_sfx(sem, + set_klitirqd_sem_status_if_not_held, to_reset, + set_klitirqd_sem_status, to_set); +#if 0 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif +} + +void down_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif + + mutex_lock_sfx(sem, + NULL, 0, + set_klitirqd_sem_status, to_set); + +#if 0 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif +} + + +void up_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n", + __FUNCTION__, + task->comm, task->pid); +#endif + + mutex_unlock_sfx(sem, NULL, 0, + set_klitirqd_sem_status, to_set); + +#if 0 + TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n", + __FUNCTION__, + task->comm, task->pid); +#endif +} + + + +void release_klitirqd_lock(struct task_struct* t) +{ + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD)) + { + struct mutex* sem; + struct task_struct* owner = t; + + if(t->state == TASK_RUNNING) + { + TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n"); + return; + } + + if(likely(!tsk_rt(t)->is_proxy_thread)) + { + sem = &tsk_rt(t)->klitirqd_sem; + } + else + { + unsigned int k_id = klitirqd_id(t); + owner = klitirqds[k_id].current_owner; + + BUG_ON(t != klitirqds[k_id].klitirqd); + + if(likely(owner)) + { + sem = &tsk_rt(owner)->klitirqd_sem; + } + else + { + BUG(); + + // We had the rug pulled out from under us. Abort attempt + // to reacquire the lock since our client no longer needs us. + TRACE_CUR("HUH?! How did this happen?\n"); + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + return; + } + } + + //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid); + up_and_set_stat(t, NEED_TO_REACQUIRE, sem); + //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid); + } + /* + else if(is_realtime(t)) + { + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + } + */ +} + +int reacquire_klitirqd_lock(struct task_struct* t) +{ + int ret = 0; + + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE)) + { + struct mutex* sem; + struct task_struct* owner = t; + + if(likely(!tsk_rt(t)->is_proxy_thread)) + { + sem = &tsk_rt(t)->klitirqd_sem; + } + else + { + unsigned int k_id = klitirqd_id(t); + //struct task_struct* owner = klitirqds[k_id].current_owner; + owner = klitirqds[k_id].current_owner; + + BUG_ON(t != klitirqds[k_id].klitirqd); + + if(likely(owner)) + { + sem = &tsk_rt(owner)->klitirqd_sem; + } + else + { + // We had the rug pulled out from under us. Abort attempt + // to reacquire the lock since our client no longer needs us. + TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n"); + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + return(0); + } + } + + //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid); + __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem); + //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid); + } + /* + else if(is_realtime(t)) + { + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + } + */ + + return(ret); +} + diff --git a/litmus/locking.c b/litmus/locking.c index 2693f1aca859..cfce98e7480d 100644 --- a/litmus/locking.c +++ b/litmus/locking.c @@ -121,7 +121,6 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) return(t); } - #else struct fdso_ops generic_lock_ops = {}; diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c new file mode 100644 index 000000000000..78f035244d21 --- /dev/null +++ b/litmus/nvidia_info.c @@ -0,0 +1,526 @@ +#include +#include +#include + +#include +#include +#include + +typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ +typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ +typedef unsigned char NvU8; /* 0 to 255 */ +typedef unsigned short NvU16; /* 0 to 65535 */ +typedef signed char NvS8; /* -128 to 127 */ +typedef signed short NvS16; /* -32768 to 32767 */ +typedef float NvF32; /* IEEE Single Precision (S1E8M23) */ +typedef double NvF64; /* IEEE Double Precision (S1E11M52) */ +typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ +typedef unsigned int NvU32; /* 0 to 4294967295 */ +typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ +typedef union +{ + volatile NvV8 Reg008[1]; + volatile NvV16 Reg016[1]; + volatile NvV32 Reg032[1]; +} litmus_nv_hwreg_t, * litmus_nv_phwreg_t; + +typedef struct +{ + NvU64 address; + NvU64 size; + NvU32 offset; + NvU32 *map; + litmus_nv_phwreg_t map_u; +} litmus_nv_aperture_t; + +typedef struct +{ + void *priv; /* private data */ + void *os_state; /* os-specific device state */ + + int rmInitialized; + int flags; + + /* PCI config info */ + NvU32 domain; + NvU16 bus; + NvU16 slot; + NvU16 vendor_id; + NvU16 device_id; + NvU16 subsystem_id; + NvU32 gpu_id; + void *handle; + + NvU32 pci_cfg_space[16]; + + /* physical characteristics */ + litmus_nv_aperture_t bars[3]; + litmus_nv_aperture_t *regs; + litmus_nv_aperture_t *fb, ud; + litmus_nv_aperture_t agp; + + NvU32 interrupt_line; + + NvU32 agp_config; + NvU32 agp_status; + + NvU32 primary_vga; + + NvU32 sim_env; + + NvU32 rc_timer_enabled; + + /* list of events allocated for this device */ + void *event_list; + + void *kern_mappings; + +} litmus_nv_state_t; + +typedef struct work_struct litmus_nv_task_t; + +typedef struct litmus_nv_work_s { + litmus_nv_task_t task; + void *data; +} litmus_nv_work_t; + +typedef struct litmus_nv_linux_state_s { + litmus_nv_state_t nv_state; + atomic_t usage_count; + + struct pci_dev *dev; + void *agp_bridge; + void *alloc_queue; + + void *timer_sp; + void *isr_sp; + void *pci_cfgchk_sp; + void *isr_bh_sp; + +#ifdef CONFIG_CUDA_4_0 + char registry_keys[512]; +#endif + + /* keep track of any pending bottom halfes */ + struct tasklet_struct tasklet; + litmus_nv_work_t work; + + /* get a timer callback every second */ + struct timer_list rc_timer; + + /* lock for linux-specific data, not used by core rm */ + struct semaphore ldata_lock; + + /* lock for linux-specific alloc queue */ + struct semaphore at_lock; + +#if 0 +#if defined(NV_USER_MAP) + /* list of user mappings */ + struct nv_usermap_s *usermap_list; + + /* lock for VMware-specific mapping list */ + struct semaphore mt_lock; +#endif /* defined(NV_USER_MAP) */ +#if defined(NV_PM_SUPPORT_OLD_STYLE_APM) + void *apm_nv_dev; +#endif +#endif + + NvU32 device_num; + struct litmus_nv_linux_state_s *next; +} litmus_nv_linux_state_t; + +void dump_nvidia_info(const struct tasklet_struct *t) +{ + litmus_nv_state_t* nvstate = NULL; + litmus_nv_linux_state_t* linuxstate = NULL; + struct pci_dev* pci = NULL; + + nvstate = (litmus_nv_state_t*)(t->data); + + if(nvstate) + { + TRACE("NV State:\n" + "\ttasklet ptr = %p\n" + "\tstate ptr = %p\n" + "\tprivate data ptr = %p\n" + "\tos state ptr = %p\n" + "\tdomain = %u\n" + "\tbus = %u\n" + "\tslot = %u\n" + "\tvender_id = %u\n" + "\tdevice_id = %u\n" + "\tsubsystem_id = %u\n" + "\tgpu_id = %u\n" + "\tinterrupt_line = %u\n", + t, + nvstate, + nvstate->priv, + nvstate->os_state, + nvstate->domain, + nvstate->bus, + nvstate->slot, + nvstate->vendor_id, + nvstate->device_id, + nvstate->subsystem_id, + nvstate->gpu_id, + nvstate->interrupt_line); + + linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); + } + else + { + TRACE("INVALID NVSTATE????\n"); + } + + if(linuxstate) + { + int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); + int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); + int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); + + + TRACE("LINUX NV State:\n" + "\tlinux nv state ptr: %p\n" + "\taddress of tasklet: %p\n" + "\taddress of work: %p\n" + "\tusage_count: %d\n" + "\tdevice_num: %u\n" + "\ttasklet addr == this tasklet: %d\n" + "\tpci: %p\n", + linuxstate, + &(linuxstate->tasklet), + &(linuxstate->work), + atomic_read(&(linuxstate->usage_count)), + linuxstate->device_num, + (t == &(linuxstate->tasklet)), + linuxstate->dev); + + pci = linuxstate->dev; + + TRACE("Offsets:\n" + "\tOffset from LinuxState: %d, %x\n" + "\tOffset from NVState: %d, %x\n" + "\tOffset from parameter: %d, %x\n" + "\tdevice_num: %u\n", + ls_offset, ls_offset, + ns_offset_raw, ns_offset_raw, + ns_offset_desired, ns_offset_desired, + *((u32*)((void*)nvstate + ns_offset_desired))); + } + else + { + TRACE("INVALID LINUXNVSTATE?????\n"); + } + +#if 0 + if(pci) + { + TRACE("PCI DEV Info:\n" + "pci device ptr: %p\n" + "\tdevfn = %d\n" + "\tvendor = %d\n" + "\tdevice = %d\n" + "\tsubsystem_vendor = %d\n" + "\tsubsystem_device = %d\n" + "\tslot # = %d\n", + pci, + pci->devfn, + pci->vendor, + pci->device, + pci->subsystem_vendor, + pci->subsystem_device, + pci->slot->number); + } + else + { + TRACE("INVALID PCIDEV PTR?????\n"); + } +#endif +} + +static struct module* nvidia_mod = NULL; +int init_nvidia_info(void) +{ + mutex_lock(&module_mutex); + nvidia_mod = find_module("nvidia"); + mutex_unlock(&module_mutex); + if(nvidia_mod != NULL) + { + TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, + (void*)(nvidia_mod->module_core), + (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); + init_nv_device_reg(); + return(0); + } + else + { + TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); + return(-1); + } +} + + +/* works with pointers to static data inside the module too. */ +int is_nvidia_func(void* func_addr) +{ + int ret = 0; + if(nvidia_mod) + { + ret = within_module_core((long unsigned int)func_addr, nvidia_mod); + /* + if(ret) + { + TRACE("%s : %p is in NVIDIA module: %d\n", + __FUNCTION__, func_addr, ret); + }*/ + } + + return(ret); +} + +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) +{ + // life is too short to use hard-coded offsets. update this later. + litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); + litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); + + BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); + + return(linuxstate->device_num); + + //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); + +#if 0 + // offset determined though observed behavior of the NV driver. + //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1 + //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2 + + void* state = (void*)(t->data); + void* device_num_ptr = state + DEVICE_NUM_OFFSET; + + //dump_nvidia_info(t); + return(*((u32*)device_num_ptr)); +#endif +} + +u32 get_work_nv_device_num(const struct work_struct *t) +{ + // offset determined though observed behavior of the NV driver. + const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); + void* state = (void*)(t); + void** device_num_ptr = state + DEVICE_NUM_OFFSET; + return(*((u32*)(*device_num_ptr))); +} + + + +typedef struct { + raw_spinlock_t lock; + struct task_struct *device_owner; +}nv_device_registry_t; + +static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; + +int init_nv_device_reg(void) +{ + int i; + + //memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); + + for(i = 0; i < NV_DEVICE_NUM; ++i) + { + raw_spin_lock_init(&NV_DEVICE_REG[i].lock); + NV_DEVICE_REG[i].device_owner = NULL; + } + + return(1); +} + +/* use to get nv_device_id by given owner. + (if return -1, can't get the assocaite device id)*/ +/* +int get_nv_device_id(struct task_struct* owner) +{ + int i; + if(!owner) + { + return(-1); + } + for(i = 0; i < NV_DEVICE_NUM; ++i) + { + if(NV_DEVICE_REG[i].device_owner == owner) + return(i); + } + return(-1); +} +*/ + + + +static int __reg_nv_device(int reg_device_id) +{ + struct task_struct* old = + cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, + NULL, + current); + + mb(); + + if(likely(old == NULL)) + { + down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); + TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); + return(0); + } + else + { + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); + return(-EBUSY); + } + +#if 0 + //unsigned long flags; + //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); + //lock_nv_registry(reg_device_id, &flags); + + if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL)) + { + NV_DEVICE_REG[reg_device_id].device_owner = current; + mb(); // needed? + + // release spin lock before chance of going to sleep. + //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); + //unlock_nv_registry(reg_device_id, &flags); + + down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); + TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); + return(0); + } + else + { + //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); + //unlock_nv_registry(reg_device_id, &flags); + + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); + return(-EBUSY); + } +#endif +} + +static int __clear_reg_nv_device(int de_reg_device_id) +{ + int ret; + unsigned long flags; + struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); + struct task_struct* old; + + lock_nv_registry(de_reg_device_id, &flags); + + old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, + current, + NULL); + + mb(); + + if(likely(old == current)) + { + flush_pending(klitirqd_th, current); + //unlock_nv_registry(de_reg_device_id, &flags); + + up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem); + + unlock_nv_registry(de_reg_device_id, &flags); + ret = 0; + + TRACE_CUR("%s: semaphore released.\n",__FUNCTION__); + } + else + { + unlock_nv_registry(de_reg_device_id, &flags); + ret = -EINVAL; + + if(old) + TRACE_CUR("%s: device %d is not registered for this process's use! %s/%d is!\n", + __FUNCTION__, de_reg_device_id, old->comm, old->pid); + else + TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", + __FUNCTION__, de_reg_device_id); + } + + return(ret); +} + + +int reg_nv_device(int reg_device_id, int reg_action) +{ + int ret; + + if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) + { + if(reg_action) + ret = __reg_nv_device(reg_device_id); + else + ret = __clear_reg_nv_device(reg_device_id); + } + else + { + ret = -ENODEV; + } + + return(ret); +} + +/* use to get the owner of nv_device_id. */ +struct task_struct* get_nv_device_owner(u32 target_device_id) +{ + struct task_struct* owner; + BUG_ON(target_device_id >= NV_DEVICE_NUM); + owner = NV_DEVICE_REG[target_device_id].device_owner; + return(owner); +} + +void lock_nv_registry(u32 target_device_id, unsigned long* flags) +{ + BUG_ON(target_device_id >= NV_DEVICE_NUM); + + if(in_interrupt()) + TRACE("Locking registry for %d.\n", target_device_id); + else + TRACE_CUR("Locking registry for %d.\n", target_device_id); + + raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); +} + +void unlock_nv_registry(u32 target_device_id, unsigned long* flags) +{ + BUG_ON(target_device_id >= NV_DEVICE_NUM); + + if(in_interrupt()) + TRACE("Unlocking registry for %d.\n", target_device_id); + else + TRACE_CUR("Unlocking registry for %d.\n", target_device_id); + + raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); +} + + +void increment_nv_int_count(u32 device) +{ + unsigned long flags; + struct task_struct* owner; + + lock_nv_registry(device, &flags); + + owner = NV_DEVICE_REG[device].device_owner; + if(owner) + { + atomic_inc(&tsk_rt(owner)->nv_int_count); + } + + unlock_nv_registry(device, &flags); +} +EXPORT_SYMBOL(increment_nv_int_count); + + diff --git a/litmus/preempt.c b/litmus/preempt.c index ebe2e3461895..08b98c3b57bf 100644 --- a/litmus/preempt.c +++ b/litmus/preempt.c @@ -30,8 +30,11 @@ void sched_state_will_schedule(struct task_struct* tsk) /* Litmus tasks should never be subject to a remote * set_tsk_need_resched(). */ BUG_ON(is_realtime(tsk)); + +/* TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", __builtin_return_address(0)); +*/ } /* Called by the IPI handler after another CPU called smp_send_resched(). */ @@ -43,13 +46,17 @@ void sched_state_ipi(void) /* Cause scheduler to be invoked. * This will cause a transition to WILL_SCHEDULE. */ set_tsk_need_resched(current); + /* TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", current->comm, current->pid); + */ } else { /* ignore */ + /* TRACE_STATE("ignoring IPI in state %x (%s)\n", get_sched_state(), sched_state_name(get_sched_state())); + */ } } diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 73fe1c442a0d..9b0a8d3b624d 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -45,7 +46,18 @@ /* to configure the cluster size */ #include -#include + +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif /* Reference configuration variable. Determines which cache level is used to * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that @@ -95,7 +107,7 @@ typedef struct clusterdomain { struct bheap_node *heap_node; struct bheap cpu_heap; /* lock for this cluster */ -#define lock domain.ready_lock +#define cedf_lock domain.ready_lock } cedf_domain_t; /* a cedf_domain per cluster; allocation is done at init/activation time */ @@ -257,21 +269,50 @@ static noinline void requeue(struct task_struct* task) } } +#ifdef CONFIG_SCHED_CPU_AFFINITY +static cpu_entry_t* cedf_get_nearest_available_cpu( + cedf_domain_t *cluster, cpu_entry_t* start) +{ + cpu_entry_t* affinity; + + get_nearest_available_cpu(affinity, start, cedf_cpu_entries, -1); + + /* make sure CPU is in our cluster */ + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) + return(affinity); + else + return(NULL); +} +#endif + + /* check for any necessary preemptions */ static void check_for_preemptions(cedf_domain_t *cluster) { struct task_struct *task; - cpu_entry_t* last; + cpu_entry_t *last; for(last = lowest_prio_cpu(cluster); edf_preemption_needed(&cluster->domain, last->linked); last = lowest_prio_cpu(cluster)) { /* preemption necessary */ task = __take_ready(&cluster->domain); - TRACE("check_for_preemptions: attempting to link task %d to %d\n", - task->pid, last->cpu); +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = + cedf_get_nearest_available_cpu(cluster, + &per_cpu(cedf_cpu_entries, task_cpu(task))); + if(affinity) + last = affinity; + else if(last->linked) + requeue(last->linked); + } +#else if (last->linked) requeue(last->linked); +#endif + TRACE("check_for_preemptions: attempting to link task %d to %d\n", + task->pid, last->cpu); link_task_to_cpu(task, last); preempt(last); } @@ -292,12 +333,12 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); unsigned long flags; - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); __merge_ready(&cluster->domain, tasks); check_for_preemptions(cluster); - raw_spin_unlock_irqrestore(&cluster->lock, flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); } /* caller holds cedf_lock */ @@ -307,6 +348,10 @@ static noinline void job_completion(struct task_struct *t, int forced) sched_trace_task_completion(t, forced); +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + TRACE_TASK(t, "job_completion().\n"); /* set flags */ @@ -378,7 +423,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) int out_of_time, sleep, preempt, np, exists, blocks; struct task_struct* next = NULL; - raw_spin_lock(&cluster->lock); + raw_spin_lock(&cluster->cedf_lock); clear_will_schedule(); /* sanity checking */ @@ -462,7 +507,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) next = prev; sched_state_task_picked(); - raw_spin_unlock(&cluster->lock); + raw_spin_unlock(&cluster->cedf_lock); #ifdef WANT_ALL_SCHED_EVENTS TRACE("cedf_lock released, next=0x%p\n", next); @@ -504,7 +549,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) /* the cluster doesn't change even if t is running */ cluster = task_cpu_cluster(t); - raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); /* setup job params */ release_at(t, litmus_clock()); @@ -521,20 +566,22 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) t->rt_param.linked_on = NO_CPU; cedf_job_arrival(t); - raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); } static void cedf_task_wake_up(struct task_struct *task) { unsigned long flags; - lt_t now; + //lt_t now; cedf_domain_t *cluster; TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); cluster = task_cpu_cluster(task); - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); + +#if 0 // sporadic task model /* We need to take suspensions because of semaphores into * account! If a job resumes after being suspended due to acquiring * a semaphore, it should never be treated as a new job release. @@ -556,8 +603,17 @@ static void cedf_task_wake_up(struct task_struct *task) } } } - cedf_job_arrival(task); - raw_spin_unlock_irqrestore(&cluster->lock, flags); +#endif + + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); + set_rt_flags(task, RT_F_RUNNING); // periodic model + + if(tsk_rt(task)->linked_on == NO_CPU) + cedf_job_arrival(task); + else + TRACE("WTF, mate?!\n"); + + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); } static void cedf_task_block(struct task_struct *t) @@ -570,9 +626,9 @@ static void cedf_task_block(struct task_struct *t) cluster = task_cpu_cluster(t); /* unlink if necessary */ - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); unlink(t); - raw_spin_unlock_irqrestore(&cluster->lock, flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); BUG_ON(!is_realtime(t)); } @@ -584,7 +640,7 @@ static void cedf_task_exit(struct task_struct * t) cedf_domain_t *cluster = task_cpu_cluster(t); /* unlink if necessary */ - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); unlink(t); if (tsk_rt(t)->scheduled_on != NO_CPU) { cpu_entry_t *cpu; @@ -592,7 +648,7 @@ static void cedf_task_exit(struct task_struct * t) cpu->scheduled = NULL; tsk_rt(t)->scheduled_on = NO_CPU; } - raw_spin_unlock_irqrestore(&cluster->lock, flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); @@ -603,6 +659,721 @@ static long cedf_admit_task(struct task_struct* tsk) return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; } + + + + + + + + + + + + +#ifdef CONFIG_LITMUS_LOCKING + +#include + + +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + int linked_on; + int check_preempt = 0; + + cedf_domain_t* cluster = task_cpu_cluster(t); + + if(prio_inh != NULL) + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "inherits priority from %p\n", prio_inh); + + sched_trace_eff_prio_change(t, prio_inh); + + tsk_rt(t)->inh_task = prio_inh; + + linked_on = tsk_rt(t)->linked_on; + + /* If it is scheduled, then we need to reorder the CPU heap. */ + if (linked_on != NO_CPU) { + TRACE_TASK(t, "%s: linked on %d\n", + __FUNCTION__, linked_on); + /* Holder is scheduled; need to re-order CPUs. + * We can't use heap_decrease() here since + * the cpu_heap is ordered in reverse direction, so + * it is actually an increase. */ + bheap_delete(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(cedf_cpu_entries, linked_on).hn); + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(cedf_cpu_entries, linked_on).hn); + } else { + /* holder may be queued: first stop queue changes */ + raw_spin_lock(&cluster->domain.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); + + /* We need to update the position of holder in some + * heap. Note that this could be a release heap if we + * budget enforcement is used and this job overran. */ + check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node); + + } else { + /* Nothing to do: if it is not queued and not linked + * then it is either sleeping or currently being moved + * by other code (e.g., a timer interrupt handler) that + * will use the correct priority when enqueuing the + * task. */ + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); + } + raw_spin_unlock(&cluster->domain.release_lock); + + /* If holder was enqueued in a release heap, then the following + * preemption check is pointless, but we can't easily detect + * that case. If you want to fix this, then consider that + * simply adding a state flag requires O(n) time to update when + * releasing n tasks, which conflicts with the goal to have + * O(log n) merges. */ + if (check_preempt) { + /* heap_decrease() hit the top level of the heap: make + * sure preemption checks get the right task, not the + * potentially stale cache. */ + bheap_uncache_min(edf_ready_order, &cluster->domain.ready_queue); + check_for_preemptions(cluster); + } + } +} + +/* called with IRQs off */ +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + cedf_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cedf_lock); + + __set_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&cluster->cedf_lock); +} + + +/* called with IRQs off */ +static void __clear_priority_inheritance(struct task_struct* t) +{ + TRACE_TASK(t, "priority restored\n"); + + if(tsk_rt(t)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(t, NULL); + + tsk_rt(t)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + cedf_job_arrival(t); + } + else + { + __set_priority_inheritance(t, NULL); + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); + + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(tsk_rt(t)->cur_klitirqd); + cedf_job_arrival(tsk_rt(t)->cur_klitirqd); + } + else + { + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); + } + } +#endif +} + +/* called with IRQs off */ +static void clear_priority_inheritance(struct task_struct* t) +{ + cedf_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cedf_lock); + __clear_priority_inheritance(t); + raw_spin_unlock(&cluster->cedf_lock); +} + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cedf_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); + + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __set_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&cluster->cedf_lock); +} + +/* called with IRQs off */ +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ + cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cedf_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) + { + tsk_rt(klitirqd)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(klitirqd); + cedf_job_arrival(klitirqd); + } + else + { + __set_priority_inheritance(klitirqd, NULL); + } + + tsk_rt(old_owner)->cur_klitirqd = NULL; + + raw_spin_unlock(&cluster->cedf_lock); +} +#endif // CONFIG_LITMUS_SOFTIRQD + + +/* ******************** KFMLP support ********************** */ + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + if (queued != skip && edf_higher_prio(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest( + struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + }while(step != search_start); + + return(shortest); +} + +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct kfmlp_queue *my_queue = NULL; + struct task_struct *max_hp = NULL; + + + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((my_queue == NULL) || + (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + { + my_queue = &sem->queues[i]; + } + } + + if(my_queue) + { + cedf_domain_t* cluster; + + max_hp = my_queue->hp_waiter; + BUG_ON(!max_hp); + + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + max_hp->comm, max_hp->pid, + kfmlp_get_idx(sem, my_queue)); + + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); + + /* + if(my_queue->hp_waiter) + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->hp_waiter->comm, + my_queue->hp_waiter->pid); + else + TRACE_CUR("queue %d: new hp_waiter is %p\n", + kfmlp_get_idx(sem, my_queue), NULL); + */ + + cluster = task_cpu_cluster(max_hp); + + raw_spin_lock(&cluster->cedf_lock); + + /* + if(my_queue->owner) + TRACE_CUR("queue %d: owner is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->owner->comm, + my_queue->owner->pid); + else + TRACE_CUR("queue %d: owner is %p\n", + kfmlp_get_idx(sem, my_queue), + NULL); + */ + + if(tsk_rt(my_queue->owner)->inh_task == max_hp) + { + __clear_priority_inheritance(my_queue->owner); + if(my_queue->hp_waiter != NULL) + { + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + raw_spin_unlock(&cluster->cedf_lock); + + list_for_each(pos, &my_queue->wait.task_list) + { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + /* Compare task prios, find high prio task. */ + if (queued == max_hp) + { + /* + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", + kfmlp_get_idx(sem, my_queue)); + */ + __remove_wait_queue(&my_queue->wait, + list_entry(pos, wait_queue_t, task_list)); + break; + } + } + --(my_queue->count); + } + + return(max_hp); +} + +int cedf_kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = sem->shortest_queue; + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", + kfmlp_get_idx(sem, my_queue)); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + /* check if we need to activate priority inheritance */ + if (edf_higher_prio(t, my_queue->hp_waiter)) + { + my_queue->hp_waiter = t; + if (edf_higher_prio(t, my_queue->owner)) + { + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) + { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else + { + /* this case may happen if our wait entry was stolen + between queues. record where we went.*/ + my_queue = kfmlp_get_queue(sem, t); + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else + { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + spin_unlock_irqrestore(&sem->lock, flags); + } + + return kfmlp_get_idx(sem, my_queue); +} + +int cedf_kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + /* next becomes the resouce holder */ + my_queue->owner = next; + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + set_priority_inheritance(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + + /* + if(next) + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + + my_queue->owner = next; + + if(next) + { + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + } + } + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + clear_priority_inheritance(t); + +out: + spin_unlock_irqrestore(&sem->lock, flags); + + return err; +} + +int cedf_kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + cedf_kfmlp_unlock(l); + + return 0; +} + +void cedf_kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + +static struct litmus_lock_ops cedf_kfmlp_lock_ops = { + .close = cedf_kfmlp_close, + .lock = cedf_kfmlp_lock, + .unlock = cedf_kfmlp_unlock, + .deallocate = cedf_kfmlp_free, +}; + +static struct litmus_lock* cedf_new_kfmlp(void* __user arg, int* ret_code) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(num_resources < 1) + { + *ret_code = -EINVAL; + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + *ret_code = -ENOMEM; + return NULL; + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + *ret_code = -ENOMEM; + return NULL; + } + + sem->litmus_lock.ops = &cedf_kfmlp_lock_ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + + *ret_code = 0; + return &sem->litmus_lock; +} + + +/* **** lock constructor **** */ + +static long cedf_allocate_lock(struct litmus_lock **lock, int type, + void* __user arg) +{ + int err = -ENXIO; + + /* C-EDF currently only supports the FMLP for global resources + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ + switch (type) { + case KFMLP_SEM: + *lock = cedf_new_kfmlp(arg, &err); + break; + }; + + return err; +} + +#endif // CONFIG_LITMUS_LOCKING + + + + + + /* total number of cluster */ static int num_clusters; /* we do not support cluster of different sizes */ @@ -746,6 +1517,40 @@ static long cedf_activate_plugin(void) break; } } + +#ifdef CONFIG_LITMUS_SOFTIRQD + { + /* distribute the daemons evenly across the clusters. */ + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; + int left_over = NR_LITMUS_SOFTIRQD % num_clusters; + + int daemon = 0; + for(i = 0; i < num_clusters; ++i) + { + int num_on_this_cluster = num_daemons_per_cluster; + if(left_over) + { + ++num_on_this_cluster; + --left_over; + } + + for(j = 0; j < num_on_this_cluster; ++j) + { + // first CPU of this cluster + affinity[daemon++] = i*cluster_size; + } + } + + spawn_klitirqd(affinity); + + kfree(affinity); + } +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif free_cpumask_var(mask); clusters_allocated = 1; @@ -765,6 +1570,15 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { .task_block = cedf_task_block, .admit_task = cedf_admit_task, .activate_plugin = cedf_activate_plugin, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = cedf_allocate_lock, + .set_prio_inh = set_priority_inheritance, + .clear_prio_inh = clear_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif }; static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 3092797480f8..d04e0703c154 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -12,6 +12,8 @@ #include #include #include +#include + #include #include @@ -25,6 +27,19 @@ #include +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + + /* Overview of GSN-EDF operations. * * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This @@ -253,21 +268,52 @@ static noinline void requeue(struct task_struct* task) } } +#ifdef CONFIG_SCHED_CPU_AFFINITY +static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t* start) +{ + cpu_entry_t* affinity; + + get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries, +#ifdef CONFIG_RELEASE_MASTER + gsnedf.release_master +#else + -1 +#endif + ); + + return(affinity); +} +#endif + /* check for any necessary preemptions */ static void check_for_preemptions(void) { struct task_struct *task; - cpu_entry_t* last; + cpu_entry_t *last; for(last = lowest_prio_cpu(); edf_preemption_needed(&gsnedf, last->linked); last = lowest_prio_cpu()) { /* preemption necessary */ task = __take_ready(&gsnedf); - TRACE("check_for_preemptions: attempting to link task %d to %d\n", - task->pid, last->cpu); + +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = gsnedf_get_nearest_available_cpu( + &per_cpu(gsnedf_cpu_entries, task_cpu(task))); + if(affinity) + last = affinity; + else if(last->linked) + requeue(last->linked); + } +#else if (last->linked) requeue(last->linked); +#endif + + TRACE("check_for_preemptions: attempting to link task %d to %d\n", + task->pid, last->cpu); + link_task_to_cpu(task, last); preempt(last); } @@ -277,7 +323,7 @@ static void check_for_preemptions(void) static noinline void gsnedf_job_arrival(struct task_struct* task) { BUG_ON(!task); - + requeue(task); check_for_preemptions(); } @@ -298,9 +344,13 @@ static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) static noinline void job_completion(struct task_struct *t, int forced) { BUG_ON(!t); - + sched_trace_task_completion(t, forced); +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + TRACE_TASK(t, "job_completion().\n"); /* set flags */ @@ -401,17 +451,19 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); #endif + /* if (exists) TRACE_TASK(prev, "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " "state:%d sig:%d\n", blocks, out_of_time, np, sleep, preempt, prev->state, signal_pending(prev)); + */ + if (entry->linked && preempt) TRACE_TASK(prev, "will be preempted by %s/%d\n", entry->linked->comm, entry->linked->pid); - /* If a task blocks we have no choice but to reschedule. */ if (blocks) @@ -456,12 +508,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) entry->scheduled->rt_param.scheduled_on = NO_CPU; TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); } - } else + } + else + { /* Only override Linux scheduler if we have a real-time task * scheduled that needs to continue. */ if (exists) next = prev; + } sched_state_task_picked(); @@ -486,8 +541,9 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) static void gsnedf_finish_switch(struct task_struct *prev) { cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); - + entry->scheduled = is_realtime(current) ? current : NULL; + #ifdef WANT_ALL_SCHED_EVENTS TRACE_TASK(prev, "switched away from\n"); #endif @@ -536,11 +592,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) static void gsnedf_task_wake_up(struct task_struct *task) { unsigned long flags; - lt_t now; - + lt_t now; + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); raw_spin_lock_irqsave(&gsnedf_lock, flags); + + +#if 0 // sporadic task model /* We need to take suspensions because of semaphores into * account! If a job resumes after being suspended due to acquiring * a semaphore, it should never be treated as a new job release. @@ -562,19 +621,26 @@ static void gsnedf_task_wake_up(struct task_struct *task) } } } +#else // periodic task model + set_rt_flags(task, RT_F_RUNNING); +#endif + gsnedf_job_arrival(task); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); } static void gsnedf_task_block(struct task_struct *t) { + // TODO: is this called on preemption?? unsigned long flags; TRACE_TASK(t, "block at %llu\n", litmus_clock()); /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); + unlink(t); + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); BUG_ON(!is_realtime(t)); @@ -608,51 +674,53 @@ static long gsnedf_admit_task(struct task_struct* tsk) #include -/* called with IRQs off */ -static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) + +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { int linked_on; - int check_preempt = 0; - - raw_spin_lock(&gsnedf_lock); - - TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + int check_preempt = 0; + + if(prio_inh != NULL) + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "inherits priority from %p\n", prio_inh); + + sched_trace_eff_prio_change(t, prio_inh); + tsk_rt(t)->inh_task = prio_inh; - + linked_on = tsk_rt(t)->linked_on; - + /* If it is scheduled, then we need to reorder the CPU heap. */ if (linked_on != NO_CPU) { TRACE_TASK(t, "%s: linked on %d\n", - __FUNCTION__, linked_on); + __FUNCTION__, linked_on); /* Holder is scheduled; need to re-order CPUs. * We can't use heap_decrease() here since * the cpu_heap is ordered in reverse direction, so * it is actually an increase. */ bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, - gsnedf_cpus[linked_on]->hn); + gsnedf_cpus[linked_on]->hn); bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, - gsnedf_cpus[linked_on]->hn); + gsnedf_cpus[linked_on]->hn); } else { /* holder may be queued: first stop queue changes */ raw_spin_lock(&gsnedf.release_lock); if (is_queued(t)) { - TRACE_TASK(t, "%s: is queued\n", - __FUNCTION__); + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); + /* We need to update the position of holder in some * heap. Note that this could be a release heap if we * budget enforcement is used and this job overran. */ - check_preempt = - !bheap_decrease(edf_ready_order, - tsk_rt(t)->heap_node); + check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node); + } else { /* Nothing to do: if it is not queued and not linked * then it is either sleeping or currently being moved * by other code (e.g., a timer interrupt handler) that * will use the correct priority when enqueuing the * task. */ - TRACE_TASK(t, "%s: is NOT queued => Done.\n", - __FUNCTION__); + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); } raw_spin_unlock(&gsnedf.release_lock); @@ -666,34 +734,148 @@ static void set_priority_inheritance(struct task_struct* t, struct task_struct* /* heap_decrease() hit the top level of the heap: make * sure preemption checks get the right task, not the * potentially stale cache. */ - bheap_uncache_min(edf_ready_order, - &gsnedf.ready_queue); + bheap_uncache_min(edf_ready_order, &gsnedf.ready_queue); check_for_preemptions(); } } +} +/* called with IRQs off */ +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + raw_spin_lock(&gsnedf_lock); + + __set_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + raw_spin_unlock(&gsnedf_lock); } + +/* called with IRQs off */ +static void __clear_priority_inheritance(struct task_struct* t) +{ + TRACE_TASK(t, "priority restored\n"); + + if(tsk_rt(t)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(t, NULL); + + tsk_rt(t)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + gsnedf_job_arrival(t); + } + else + { + __set_priority_inheritance(t, NULL); + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); + + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(tsk_rt(t)->cur_klitirqd); + gsnedf_job_arrival(tsk_rt(t)->cur_klitirqd); + } + else + { + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); + } + } +#endif +} + /* called with IRQs off */ static void clear_priority_inheritance(struct task_struct* t) { raw_spin_lock(&gsnedf_lock); + __clear_priority_inheritance(t); + raw_spin_unlock(&gsnedf_lock); +} - /* A job only stops inheriting a priority when it releases a - * resource. Thus we can make the following assumption.*/ - BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); - - TRACE_TASK(t, "priority restored\n"); - tsk_rt(t)->inh_task = NULL; +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&gsnedf_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); - /* Check if rescheduling is necessary. We can't use heap_decrease() - * since the priority was effectively lowered. */ - unlink(t); - gsnedf_job_arrival(t); + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __set_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&gsnedf_lock); +} +/* called with IRQs off */ +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&gsnedf_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) + { + tsk_rt(klitirqd)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(klitirqd); + gsnedf_job_arrival(klitirqd); + } + else + { + __set_priority_inheritance(klitirqd, NULL); + } + + tsk_rt(old_owner)->cur_klitirqd = NULL; + raw_spin_unlock(&gsnedf_lock); } +#endif /* ******************** FMLP support ********************** */ @@ -892,11 +1074,477 @@ static struct litmus_lock* gsnedf_new_fmlp(void) return &sem->litmus_lock; } + + + + + + +/* ******************** KFMLP support ********************** */ + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + if (queued != skip && edf_higher_prio(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest( + struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + }while(step != search_start); + + return(shortest); +} + +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct kfmlp_queue *my_queue = NULL; + struct task_struct *max_hp = NULL; + + + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((my_queue == NULL) || + (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + { + my_queue = &sem->queues[i]; + } + } + + if(my_queue) + { + max_hp = my_queue->hp_waiter; + + BUG_ON(!max_hp); + + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + max_hp->comm, max_hp->pid, + kfmlp_get_idx(sem, my_queue)); + + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); + + /* + if(my_queue->hp_waiter) + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->hp_waiter->comm, + my_queue->hp_waiter->pid); + else + TRACE_CUR("queue %d: new hp_waiter is %p\n", + kfmlp_get_idx(sem, my_queue), NULL); + */ + + raw_spin_lock(&gsnedf_lock); + + /* + if(my_queue->owner) + TRACE_CUR("queue %d: owner is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->owner->comm, + my_queue->owner->pid); + else + TRACE_CUR("queue %d: owner is %p\n", + kfmlp_get_idx(sem, my_queue), + NULL); + */ + + if(tsk_rt(my_queue->owner)->inh_task == max_hp) + { + __clear_priority_inheritance(my_queue->owner); + if(my_queue->hp_waiter != NULL) + { + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + raw_spin_unlock(&gsnedf_lock); + + list_for_each(pos, &my_queue->wait.task_list) + { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + /* Compare task prios, find high prio task. */ + if (queued == max_hp) + { + /* + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", + kfmlp_get_idx(sem, my_queue)); + */ + __remove_wait_queue(&my_queue->wait, + list_entry(pos, wait_queue_t, task_list)); + break; + } + } + --(my_queue->count); + } + + return(max_hp); +} + +int gsnedf_kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = sem->shortest_queue; + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", + kfmlp_get_idx(sem, my_queue)); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + /* check if we need to activate priority inheritance */ + if (edf_higher_prio(t, my_queue->hp_waiter)) + { + my_queue->hp_waiter = t; + if (edf_higher_prio(t, my_queue->owner)) + { + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) + { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else + { + /* this case may happen if our wait entry was stolen + between queues. record where we went. */ + my_queue = kfmlp_get_queue(sem, t); + + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else + { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + spin_unlock_irqrestore(&sem->lock, flags); + } + + return kfmlp_get_idx(sem, my_queue); +} + +int gsnedf_kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + /* next becomes the resouce holder */ + my_queue->owner = next; + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + set_priority_inheritance(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + + /* + if(next) + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + + my_queue->owner = next; + + if(next) + { + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + } + } + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + clear_priority_inheritance(t); + +out: + spin_unlock_irqrestore(&sem->lock, flags); + + return err; +} + +int gsnedf_kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + gsnedf_kfmlp_unlock(l); + + return 0; +} + +void gsnedf_kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + +static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = { + .close = gsnedf_kfmlp_close, + .lock = gsnedf_kfmlp_lock, + .unlock = gsnedf_kfmlp_unlock, + .deallocate = gsnedf_kfmlp_free, +}; + +static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg, int* ret_code) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(num_resources < 1) + { + *ret_code = -EINVAL; + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + *ret_code = -ENOMEM; + return NULL; + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + *ret_code = -ENOMEM; + return NULL; + } + + sem->litmus_lock.ops = &gsnedf_kfmlp_lock_ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + + *ret_code = 0; + return &sem->litmus_lock; +} + + + + + /* **** lock constructor **** */ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, - void* __user unused) + void* __user arg) { int err = -ENXIO; @@ -911,7 +1559,10 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, else err = -ENOMEM; break; - + + case KFMLP_SEM: + *lock = gsnedf_new_kfmlp(arg, &err); + break; }; return err; @@ -919,7 +1570,6 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, #endif - static long gsnedf_activate_plugin(void) { int cpu; @@ -946,6 +1596,15 @@ static long gsnedf_activate_plugin(void) } #endif } + +#ifdef CONFIG_LITMUS_SOFTIRQD + spawn_klitirqd(NULL); +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif + return 0; } @@ -963,8 +1622,15 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { .admit_task = gsnedf_admit_task, .activate_plugin = gsnedf_activate_plugin, #ifdef CONFIG_LITMUS_LOCKING - .allocate_lock = gsnedf_allocate_lock, + .allocate_lock = gsnedf_allocate_lock, + .set_prio_inh = set_priority_inheritance, + .clear_prio_inh = clear_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, #endif + }; diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c index e6952896dc4b..1bca2e1a33cd 100644 --- a/litmus/sched_litmus.c +++ b/litmus/sched_litmus.c @@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) } #ifdef __ARCH_WANT_UNLOCKED_CTXSW if (next->oncpu) + { TRACE_TASK(next, "waiting for !oncpu"); + } while (next->oncpu) { cpu_relax(); mb(); diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index d54886df1f57..8802670a4b0b 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -129,6 +129,27 @@ static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, return -ENXIO; } +static void litmus_dummy_set_prio_inh(struct task_struct* a, struct task_struct* b) +{ +} + +static void litmus_dummy_clear_prio_inh(struct task_struct* t) +{ +} + +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +static void litmus_dummy_set_prio_inh_klitirq(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ +} + +static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ +} #endif @@ -149,6 +170,12 @@ struct sched_plugin linux_sched_plugin = { .deactivate_plugin = litmus_dummy_deactivate_plugin, #ifdef CONFIG_LITMUS_LOCKING .allocate_lock = litmus_dummy_allocate_lock, + .set_prio_inh = litmus_dummy_set_prio_inh, + .clear_prio_inh = litmus_dummy_clear_prio_inh, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq, + .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd, #endif .admit_task = litmus_dummy_admit_task }; @@ -187,6 +214,8 @@ int register_sched_plugin(struct sched_plugin* plugin) CHECK(deactivate_plugin); #ifdef CONFIG_LITMUS_LOCKING CHECK(allocate_lock); + CHECK(set_prio_inh); + CHECK(clear_prio_inh); #endif CHECK(admit_task); diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 5ef8d09ab41f..7aeb99b668d3 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -16,13 +17,13 @@ #include -#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) +#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11)) #define now() litmus_clock() struct local_buffer { - struct st_event_record record[NO_EVENTS]; - char flag[NO_EVENTS]; + struct st_event_record record[NUM_EVENTS]; + char flag[NUM_EVENTS]; struct ft_buffer ftbuf; }; @@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void) int i, ok = 0, err; printk("Allocated %u sched_trace_xxx() events per CPU " "(buffer size: %d bytes)\n", - NO_EVENTS, (int) sizeof(struct local_buffer)); + NUM_EVENTS, (int) sizeof(struct local_buffer)); err = ftdev_init(&st_dev, THIS_MODULE, num_online_cpus(), "sched_trace"); @@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void) for (i = 0; i < st_dev.minor_cnt; i++) { buf = &per_cpu(st_event_buffer, i); - ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, + ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS, sizeof(struct st_event_record), buf->flag, buf->record); @@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id, { struct task_struct *t = (struct task_struct*) _task; struct st_event_record* rec; - if (is_realtime(t)) { + //if (is_realtime(t)) /* comment out to trace EVERYTHING */ + { rec = get_record(ST_SWITCH_TO, t); if (rec) { rec->data.switch_to.when = now(); @@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id, { struct task_struct *t = (struct task_struct*) _task; struct st_event_record* rec; - if (is_realtime(t)) { + //if (is_realtime(t)) /* comment out to trace EVERYTHING */ + { rec = get_record(ST_SWITCH_AWAY, t); if (rec) { rec->data.switch_away.when = now(); @@ -188,6 +191,7 @@ feather_callback void do_sched_trace_task_completion(unsigned long id, if (rec) { rec->data.completion.when = now(); rec->data.completion.forced = forced; + rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); put_record(rec); } } @@ -239,3 +243,201 @@ feather_callback void do_sched_trace_action(unsigned long id, put_record(rec); } } + + +feather_callback void do_sched_trace_tasklet_release(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t); + + if (rec) { + rec->data.tasklet_release.when = now(); + put_record(rec); + } +} + + +feather_callback void do_sched_trace_tasklet_begin(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t); + + if (rec) { + rec->data.tasklet_begin.when = now(); + + if(!in_interrupt()) + rec->data.tasklet_begin.exe_pid = current->pid; + else + rec->data.tasklet_begin.exe_pid = 0; + + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_tasklet_begin); + + +feather_callback void do_sched_trace_tasklet_end(unsigned long id, + unsigned long _owner, + unsigned long _flushed) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_END, t); + + if (rec) { + rec->data.tasklet_end.when = now(); + rec->data.tasklet_end.flushed = _flushed; + + if(!in_interrupt()) + rec->data.tasklet_end.exe_pid = current->pid; + else + rec->data.tasklet_end.exe_pid = 0; + + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_tasklet_end); + + +feather_callback void do_sched_trace_work_release(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_RELEASE, t); + + if (rec) { + rec->data.work_release.when = now(); + put_record(rec); + } +} + + +feather_callback void do_sched_trace_work_begin(unsigned long id, + unsigned long _owner, + unsigned long _exe) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_BEGIN, t); + + if (rec) { + struct task_struct *exe = (struct task_struct*) _exe; + rec->data.work_begin.exe_pid = exe->pid; + rec->data.work_begin.when = now(); + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_work_begin); + + +feather_callback void do_sched_trace_work_end(unsigned long id, + unsigned long _owner, + unsigned long _exe, + unsigned long _flushed) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_END, t); + + if (rec) { + struct task_struct *exe = (struct task_struct*) _exe; + rec->data.work_end.exe_pid = exe->pid; + rec->data.work_end.flushed = _flushed; + rec->data.work_end.when = now(); + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_work_end); + + +feather_callback void do_sched_trace_eff_prio_change(unsigned long id, + unsigned long _task, + unsigned long _inh) +{ + struct task_struct *t = (struct task_struct*) _task; + struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t); + + if (rec) { + struct task_struct *inh = (struct task_struct*) _inh; + rec->data.effective_priority_change.when = now(); + rec->data.effective_priority_change.inh_pid = (inh != NULL) ? + inh->pid : + 0xffff; + + put_record(rec); + } +} + + +/* pray for no nesting of nv interrupts on same CPU... */ +struct tracing_interrupt_map +{ + int active; + int count; + unsigned long data[128]; // assume nesting less than 128... +}; +DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); + +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, + unsigned long _device) +{ + struct st_event_record *rec; + + { + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + { + int_map->count++; + } + else + { + int_map->active = 0xcafebabe; + int_map->count = 1; + } + int_map->data[int_map->count-1] = _device; + } + + rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); + if(rec) { + u32 device = _device; + rec->data.nv_interrupt_begin.when = now(); + rec->data.nv_interrupt_begin.device = device; + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin); + +/* +int is_interrupt_tracing_active(void) +{ + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + return 1; + return 0; +} +*/ + +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long unused) +{ + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + { + struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL); + + int_map->count--; + if(int_map->count == 0) + int_map->active = 0; + + if(rec) { + rec->data.nv_interrupt_end.when = now(); + rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; + put_record(rec); + } + } +} +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end); + + + + + + diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c new file mode 100644 index 000000000000..d7d7d8bae298 --- /dev/null +++ b/litmus/sched_trace_external.c @@ -0,0 +1,45 @@ +#include + +#include +#include + +void __sched_trace_tasklet_begin_external(struct task_struct* t) +{ + sched_trace_tasklet_begin(t); +} +EXPORT_SYMBOL(__sched_trace_tasklet_begin_external); + +void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed) +{ + sched_trace_tasklet_end(t, flushed); +} +EXPORT_SYMBOL(__sched_trace_tasklet_end_external); + + + +void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e) +{ + sched_trace_work_begin(t, e); +} +EXPORT_SYMBOL(__sched_trace_work_begin_external); + +void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f) +{ + sched_trace_work_end(t, e, f); +} +EXPORT_SYMBOL(__sched_trace_work_end_external); + + + +void __sched_trace_nv_interrupt_begin_external(u32 device) +{ + unsigned long _device = device; + sched_trace_nv_interrupt_begin(_device); +} +EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); + +void __sched_trace_nv_interrupt_end_external(void) +{ + sched_trace_nv_interrupt_end(); +} +EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); -- cgit v1.2.2 From 5d7dcfa10ea0dd283773a301e3ce610a7797d582 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Wed, 11 Jan 2012 14:37:13 -0500 Subject: PAI implementation, C-RM, C-FIFO. --- include/linux/interrupt.h | 2 +- include/litmus/fifo_common.h | 25 + include/litmus/litmus.h | 4 +- include/litmus/nvidia_info.h | 3 +- include/litmus/rm_common.h | 25 + include/litmus/rm_srt_common.h | 25 + include/litmus/sched_plugin.h | 11 + include/litmus/sched_trace.h | 8 +- include/litmus/sched_trace_external.h | 22 +- include/litmus/trace.h | 14 + kernel/sched.c | 4 +- kernel/softirq.c | 4 + kernel/workqueue.c | 2 +- litmus/Kconfig | 52 +- litmus/Makefile | 3 +- litmus/edf_common.c | 39 + litmus/fifo_common.c | 124 +++ litmus/litmus_pai_softirq.c | 64 ++ litmus/litmus_softirq.c | 2 +- litmus/nvidia_info.c | 24 +- litmus/rm_common.c | 160 ++++ litmus/rm_srt_common.c | 167 ++++ litmus/sched_cedf.c | 229 ++++- litmus/sched_cfifo.c | 1611 +++++++++++++++++++++++++++++++++ litmus/sched_crm.c | 1611 +++++++++++++++++++++++++++++++++ litmus/sched_crm_srt.c | 1611 +++++++++++++++++++++++++++++++++ litmus/sched_gsn_edf.c | 10 +- litmus/sched_plugin.c | 11 + litmus/sched_task_trace.c | 26 +- litmus/sched_trace_external.c | 23 +- 30 files changed, 5874 insertions(+), 42 deletions(-) create mode 100644 include/litmus/fifo_common.h create mode 100644 include/litmus/rm_common.h create mode 100644 include/litmus/rm_srt_common.h create mode 100644 litmus/fifo_common.c create mode 100644 litmus/litmus_pai_softirq.c create mode 100644 litmus/rm_common.c create mode 100644 litmus/rm_srt_common.c create mode 100644 litmus/sched_cfifo.c create mode 100644 litmus/sched_crm.c create mode 100644 litmus/sched_crm_srt.c diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5d22f5342376..a2f2880d5517 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -460,7 +460,7 @@ struct tasklet_struct void (*func)(unsigned long); unsigned long data; -#ifdef CONFIG_LITMUS_SOFTIRQD +#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD) struct task_struct *owner; #endif }; diff --git a/include/litmus/fifo_common.h b/include/litmus/fifo_common.h new file mode 100644 index 000000000000..12cfbfea41ee --- /dev/null +++ b/include/litmus/fifo_common.h @@ -0,0 +1,25 @@ +/* + * EDF common data structures and utility functions shared by all EDF + * based scheduler plugins + */ + +/* CLEANUP: Add comments and make it less messy. + * + */ + +#ifndef __UNC_FIFO_COMMON_H__ +#define __UNC_FIFO_COMMON_H__ + +#include + +void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release); + +int fifo_higher_prio(struct task_struct* first, + struct task_struct* second); + +int fifo_ready_order(struct bheap_node* a, struct bheap_node* b); + +int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t); + +#endif diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 3df242bf272f..829c1c5ab91f 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -118,7 +118,9 @@ static inline lt_t litmus_clock(void) #define earlier_release(a, b) (lt_before(\ (a)->rt_param.job_params.release,\ (b)->rt_param.job_params.release)) - +#define shorter_period(a, b) (lt_before(\ + (a)->rt_param.task_params.period,\ + (b)->rt_param.task_params.period)) void preempt_if_preemptable(struct task_struct* t, int on_cpu); #ifdef CONFIG_LITMUS_LOCKING diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h index 579301d77cf5..9e07a27fdee3 100644 --- a/include/litmus/nvidia_info.h +++ b/include/litmus/nvidia_info.h @@ -7,7 +7,8 @@ #include -#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD +//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD +#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM int init_nvidia_info(void); diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h new file mode 100644 index 000000000000..5991b0b4e758 --- /dev/null +++ b/include/litmus/rm_common.h @@ -0,0 +1,25 @@ +/* + * EDF common data structures and utility functions shared by all EDF + * based scheduler plugins + */ + +/* CLEANUP: Add comments and make it less messy. + * + */ + +#ifndef __UNC_RM_COMMON_H__ +#define __UNC_RM_COMMON_H__ + +#include + +void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release); + +int rm_higher_prio(struct task_struct* first, + struct task_struct* second); + +int rm_ready_order(struct bheap_node* a, struct bheap_node* b); + +int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t); + +#endif diff --git a/include/litmus/rm_srt_common.h b/include/litmus/rm_srt_common.h new file mode 100644 index 000000000000..78aa287327a2 --- /dev/null +++ b/include/litmus/rm_srt_common.h @@ -0,0 +1,25 @@ +/* + * EDF common data structures and utility functions shared by all EDF + * based scheduler plugins + */ + +/* CLEANUP: Add comments and make it less messy. + * + */ + +#ifndef __UNC_RM_SRT_COMMON_H__ +#define __UNC_RM_SRT_COMMON_H__ + +#include + +void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release); + +int rm_srt_higher_prio(struct task_struct* first, + struct task_struct* second); + +int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b); + +int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t); + +#endif diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index df50930d14a0..12a9ab65a673 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -11,6 +11,10 @@ #include #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#endif + /************************ setup/tear down ********************/ typedef long (*activate_plugin_t) (void); @@ -69,6 +73,9 @@ typedef void (*set_prio_inh_klitirq_t)(struct task_struct* klitirqd, typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd, struct task_struct* old_owner); + +typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet); + /********************* sys call backends ********************/ /* This function causes the caller to sleep until the next release */ typedef long (*complete_job_t) (void); @@ -115,6 +122,10 @@ struct sched_plugin { set_prio_inh_klitirq_t set_prio_inh_klitirqd; clear_prio_inh_klitirqd_t clear_prio_inh_klitirqd; #endif + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + enqueue_pai_tasklet_t enqueue_pai_tasklet; +#endif } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h index 1486c778aff8..232c7588d103 100644 --- a/include/litmus/sched_trace.h +++ b/include/litmus/sched_trace.h @@ -127,13 +127,13 @@ struct st_effective_priority_change_data { struct st_nv_interrupt_begin_data { u64 when; u32 device; - u8 __unused[4]; + u32 serialNumber; } __attribute__((packed)); struct st_nv_interrupt_end_data { u64 when; u32 device; - u8 __unused[4]; + u32 serialNumber; } __attribute__((packed)); #define DATA(x) struct st_ ## x ## _data x; @@ -328,8 +328,8 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, #define sched_trace_nv_interrupt_begin(d) \ SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d) -#define sched_trace_nv_interrupt_end() \ - SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, 0ul) +#define sched_trace_nv_interrupt_end(d) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h index c2c872639880..90424d5c564c 100644 --- a/include/litmus/sched_trace_external.h +++ b/include/litmus/sched_trace_external.h @@ -34,9 +34,25 @@ static inline void sched_trace_nv_interrupt_begin_external(u32 device) __sched_trace_nv_interrupt_begin_external(device); } -extern void __sched_trace_nv_interrupt_end_external(void); -static inline void sched_trace_nv_interrupt_end_external(void) +extern void __sched_trace_nv_interrupt_end_external(u32 device); +static inline void sched_trace_nv_interrupt_end_external(u32 device) { - __sched_trace_nv_interrupt_end_external(); + __sched_trace_nv_interrupt_end_external(device); } + +#ifdef CONFIG_LITMUS_NVIDIA + +#define EX_TS(evt) \ +extern void __##evt(void); \ +static inline void EX_##evt(void) { __##evt(); } + +EX_TS(TS_NV_TOPISR_START) +EX_TS(TS_NV_TOPISR_END) +EX_TS(TS_NV_BOTISR_START) +EX_TS(TS_NV_BOTISR_END) +EX_TS(TS_NV_RELEASE_BOTISR_START) +EX_TS(TS_NV_RELEASE_BOTISR_END) + +#endif + #endif diff --git a/include/litmus/trace.h b/include/litmus/trace.h index 05f487263f28..aa3ee4a6757b 100644 --- a/include/litmus/trace.h +++ b/include/litmus/trace.h @@ -100,4 +100,18 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu) #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) + +#ifdef CONFIG_LITMUS_NVIDIA + +#define TS_NV_TOPISR_START TIMESTAMP(200) +#define TS_NV_TOPISR_END TIMESTAMP(201) + +#define TS_NV_BOTISR_START TIMESTAMP(202) +#define TS_NV_BOTISR_END TIMESTAMP(203) + +#define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204) +#define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205) + +#endif + #endif /* !_SYS_TRACE_H_ */ diff --git a/kernel/sched.c b/kernel/sched.c index 3162605ffc91..3aa2be09122b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3897,8 +3897,10 @@ need_resched_nonpreemptible: if (need_resched()) goto need_resched; +#ifdef LITMUS_SOFTIRQD reacquire_klitirqd_lock(prev); - +#endif + srp_ceiling_block(); } EXPORT_SYMBOL(schedule); diff --git a/kernel/softirq.c b/kernel/softirq.c index be4b8fab3637..ae77c5c1d17e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -35,6 +35,7 @@ #ifdef CONFIG_LITMUS_NVIDIA #include +#include #endif /* @@ -441,6 +442,9 @@ void __tasklet_schedule(struct tasklet_struct *t) if(likely(_litmus_tasklet_schedule(t,nvidia_device))) { unlock_nv_registry(nvidia_device, &flags); + + TS_NV_RELEASE_BOTISR_END; + return; } else diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8139208eaee1..637cadac2627 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2531,7 +2531,7 @@ EXPORT_SYMBOL(cancel_delayed_work_sync); */ int schedule_work(struct work_struct *work) { -#ifdef CONFIG_LITMUS_NVIDIA +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) if(is_nvidia_func(work->func)) { u32 nvidiaDevice = get_work_nv_device_num(work); diff --git a/litmus/Kconfig b/litmus/Kconfig index 7e865d4dd703..5109cf7db7f6 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -218,18 +218,41 @@ config LITMUS_THREAD_ALL_SOFTIRQ If unsure, say No. + +choice + prompt "Scheduling of interrupt bottom-halves in Litmus." + default LITMUS_SOFTIRQD_NONE + depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ + help + Schedule tasklets with known priorities in Litmus. + +config LITMUS_SOFTIRQD_NONE + bool "No tasklet scheduling in Litmus." + help + Don't schedule tasklets in Litmus. Default. + config LITMUS_SOFTIRQD - bool "Spawn klitirqd interrupt handling threads." - depends on LITMUS_LOCKING - default n - help - Create klitirqd interrupt handling threads. Work must be - specifically dispatched to these workers. (Softirqs for - Litmus tasks are not magically redirected to klitirqd.) + bool "Spawn klitirqd interrupt handling threads." + help + Create klitirqd interrupt handling threads. Work must be + specifically dispatched to these workers. (Softirqs for + Litmus tasks are not magically redirected to klitirqd.) - G-EDF ONLY for now! + G-EDF/RM, C-EDF/RM ONLY for now! - If unsure, say No. + +config LITMUS_PAI_SOFTIRQD + bool "Defer tasklets to context switch points." + help + Only execute scheduled tasklet bottom halves at + scheduling points. Trades context switch overhead + at the cost of non-preemptive durations of bottom half + processing. + + G-EDF/RM, C-EDF/RM ONLY for now! + +endchoice + config NR_LITMUS_SOFTIRQD int "Number of klitirqd." @@ -241,13 +264,22 @@ config NR_LITMUS_SOFTIRQD config LITMUS_NVIDIA bool "Litmus handling of NVIDIA interrupts." - depends on LITMUS_SOFTIRQD + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD default n help Direct tasklets from NVIDIA devices to Litmus's klitirqd. If unsure, say No. +config NV_DEVICE_NUM + int "Number of NVIDIA GPUs." + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD + range 1 4096 + default "1" + help + Should be (<= to the number of CPUs) and + (<= to the number of GPUs) in your system. + choice prompt "CUDA/Driver Version Support" default CUDA_4_0 diff --git a/litmus/Makefile b/litmus/Makefile index 892e01c2e1b3..869939e2270c 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -19,7 +19,7 @@ obj-y = sched_plugin.o litmus.o \ sched_gsn_edf.o \ sched_psn_edf.o -obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o +obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o sched_cfifo.o fifo_common.o sched_crm.o rm_common.o sched_crm_srt.o rm_srt_common.o obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o @@ -29,4 +29,5 @@ obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o +obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o diff --git a/litmus/edf_common.c b/litmus/edf_common.c index fbd67ab5f467..0a06d7a26c00 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -63,7 +63,45 @@ int edf_higher_prio(struct task_struct* first, #endif + if (!is_realtime(second_task)) + return true; + + if (earlier_deadline(first_task, second_task)) + return true; + + if (get_deadline(first_task) == get_deadline(second_task)) + { + if (shorter_period(first_task, second_task)) + { + return true; + } + if (get_rt_period(first_task) == get_rt_period(second_task)) + { +#ifdef CONFIG_LITMUS_SOFTIRQD + if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread) + { + return true; + } + if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) + { +#endif + if (first_task->pid < second_task->pid) + { + return true; + } + if (first_task->pid == second_task->pid) + { + return !second->rt_param.inh_task; + } +#ifdef CONFIG_LITMUS_SOFTIRQD + } +#endif + } + } + + return false; +#if 0 return !is_realtime(second_task) || #ifdef CONFIG_LITMUS_SOFTIRQD @@ -88,6 +126,7 @@ int edf_higher_prio(struct task_struct* first, */ (first_task->pid == second_task->pid && !second->rt_param.inh_task))); +#endif } int edf_ready_order(struct bheap_node* a, struct bheap_node* b) diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c new file mode 100644 index 000000000000..c94510a171d9 --- /dev/null +++ b/litmus/fifo_common.c @@ -0,0 +1,124 @@ +/* + * kernel/fifo_common.c + * + * Common functions for EDF based scheduler. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +/* fifo_higher_prio - returns true if first has a higher EDF priority + * than second. Deadline ties are broken by PID. + * + * both first and second may be NULL + */ +int fifo_higher_prio(struct task_struct* first, + struct task_struct* second) +{ + struct task_struct *first_task = first; + struct task_struct *second_task = second; + + /* There is no point in comparing a task to itself. */ + if (first && first == second) { + TRACE_TASK(first, + "WARNING: pointless edf priority comparison.\n"); + return 0; + } + + + /* check for NULL tasks */ + if (!first || !second) + return first && !second; + +#ifdef CONFIG_LITMUS_LOCKING + + /* Check for inherited priorities. Change task + * used for comparison in such a case. + */ + if (unlikely(first->rt_param.inh_task)) + first_task = first->rt_param.inh_task; + if (unlikely(second->rt_param.inh_task)) + second_task = second->rt_param.inh_task; + + /* Check for priority boosting. Tie-break by start of boosting. + */ + if (unlikely(is_priority_boosted(first_task))) { + /* first_task is boosted, how about second_task? */ + if (!is_priority_boosted(second_task) || + lt_before(get_boost_start(first_task), + get_boost_start(second_task))) + return 1; + else + return 0; + } else if (unlikely(is_priority_boosted(second_task))) + /* second_task is boosted, first is not*/ + return 0; + +#endif + + + return !is_realtime(second_task) || + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy threads always lose w/o inheritance. */ + (first_task->rt_param.is_proxy_thread < + second_task->rt_param.is_proxy_thread) || +#endif + + /* is the deadline of the first task earlier? + * Then it has higher priority. + */ + earlier_release(first_task, second_task) || + + /* Do we have a deadline tie? + * Then break by PID. + */ + (get_release(first_task) == get_release(second_task) && + (first_task->pid < second_task->pid || + + /* If the PIDs are the same then the task with the inherited + * priority wins. + */ + (first_task->pid == second_task->pid && + !second->rt_param.inh_task))); +} + +int fifo_ready_order(struct bheap_node* a, struct bheap_node* b) +{ + return fifo_higher_prio(bheap2task(a), bheap2task(b)); +} + +void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release) +{ + rt_domain_init(rt, fifo_ready_order, resched, release); +} + +/* need_to_preempt - check whether the task t needs to be preempted + * call only with irqs disabled and with ready_lock acquired + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! + */ +int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t) +{ + /* we need the read lock for fifo_ready_queue */ + /* no need to preempt if there is nothing pending */ + if (!__jobs_pending(rt)) + return 0; + /* we need to reschedule if t doesn't exist */ + if (!t) + return 1; + + /* NOTE: We cannot check for non-preemptibility since we + * don't know what address space we're currently in. + */ + + /* make sure to get non-rt stuff out of the way */ + return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t); +} diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c new file mode 100644 index 000000000000..b31eeb8a2538 --- /dev/null +++ b/litmus/litmus_pai_softirq.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + + + +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + ret = litmus->enqueue_pai_tasklet(t); + + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_schedule); + + + +// failure causes default Linux handling. +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + return(ret); +} +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); + + +// failure causes default Linux handling. +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + return(ret); +} +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); + + +// failure causes default Linux handling. +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + return(ret); +} +EXPORT_SYMBOL(__litmus_schedule_work); + diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index 271e770dbaea..f5cca964b6c6 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c @@ -1166,7 +1166,7 @@ int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); BUG(); } - + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) { TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 78f035244d21..d17152138c63 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -361,6 +361,7 @@ int get_nv_device_id(struct task_struct* owner) static int __reg_nv_device(int reg_device_id) { + int ret = 0; struct task_struct* old = cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, NULL, @@ -370,16 +371,21 @@ static int __reg_nv_device(int reg_device_id) if(likely(old == NULL)) { +#ifdef CONFIG_LITMUS_SOFTIRQD down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); +#endif TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); - return(0); } else { TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); - return(-EBUSY); + ret = -EBUSY; } - + + return(ret); + + + #if 0 //unsigned long flags; //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); @@ -411,19 +417,22 @@ static int __reg_nv_device(int reg_device_id) static int __clear_reg_nv_device(int de_reg_device_id) { - int ret; - unsigned long flags; - struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); + int ret = 0; struct task_struct* old; +#ifdef CONFIG_LITMUS_SOFTIRQD + unsigned long flags; + struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); lock_nv_registry(de_reg_device_id, &flags); +#endif old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, current, NULL); mb(); - + +#ifdef CONFIG_LITMUS_SOFTIRQD if(likely(old == current)) { flush_pending(klitirqd_th, current); @@ -448,6 +457,7 @@ static int __clear_reg_nv_device(int de_reg_device_id) TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", __FUNCTION__, de_reg_device_id); } +#endif return(ret); } diff --git a/litmus/rm_common.c b/litmus/rm_common.c new file mode 100644 index 000000000000..88f83bcbd9d8 --- /dev/null +++ b/litmus/rm_common.c @@ -0,0 +1,160 @@ +/* + * kernel/rm_common.c + * + * Common functions for EDF based scheduler. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +/* rm_higher_prio - returns true if first has a higher EDF priority + * than second. Deadline ties are broken by PID. + * + * both first and second may be NULL + */ +int rm_higher_prio(struct task_struct* first, + struct task_struct* second) +{ + struct task_struct *first_task = first; + struct task_struct *second_task = second; + + /* There is no point in comparing a task to itself. */ + if (first && first == second) { + TRACE_TASK(first, + "WARNING: pointless edf priority comparison.\n"); + return 0; + } + + + /* check for NULL tasks */ + if (!first || !second) + return first && !second; + +#ifdef CONFIG_LITMUS_LOCKING + + /* Check for inherited priorities. Change task + * used for comparison in such a case. + */ + if (unlikely(first->rt_param.inh_task)) + first_task = first->rt_param.inh_task; + if (unlikely(second->rt_param.inh_task)) + second_task = second->rt_param.inh_task; + + /* Check for priority boosting. Tie-break by start of boosting. + */ + if (unlikely(is_priority_boosted(first_task))) { + /* first_task is boosted, how about second_task? */ + if (!is_priority_boosted(second_task) || + lt_before(get_boost_start(first_task), + get_boost_start(second_task))) + return 1; + else + return 0; + } else if (unlikely(is_priority_boosted(second_task))) + /* second_task is boosted, first is not*/ + return 0; + +#endif + + if (!is_realtime(second_task)) + return true; + + if (shorter_period(first_task, second_task)) + return true; + + if (get_rt_period(first_task) == get_rt_period(second_task)) + { +#ifdef CONFIG_LITMUS_SOFTIRQD + if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread) + { + return true; + } + if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) + { +#endif + if (first_task->pid < second_task->pid) + { + return true; + } + if (first_task->pid == second_task->pid) + { + return !second->rt_param.inh_task; + } +#ifdef CONFIG_LITMUS_SOFTIRQD + } +#endif + } + + return false; + +#if 0 + return !is_realtime(second_task) || + shorter_period(first_task, second_task) || + ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task)) + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy threads always lose w/o inheritance. */ + (first_task->rt_param.is_proxy_thread < + second_task->rt_param.is_proxy_thread) || +#endif + + /* is the period of the first task shorter? + * Then it has higher priority. + */ + shorter_period(first_task, second_task) || + + (earlier_deadline(first_task, second_task) || + + /* Do we have a deadline tie? + * Then break by PID. + */ + (get_rt_period(first_task) == get_rt_period(second_task) && + (first_task->pid < second_task->pid || + + /* If the PIDs are the same then the task with the inherited + * priority wins. + */ + (first_task->pid == second_task->pid && + !second->rt_param.inh_task))); +#endif +} + +int rm_ready_order(struct bheap_node* a, struct bheap_node* b) +{ + return rm_higher_prio(bheap2task(a), bheap2task(b)); +} + +void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release) +{ + rt_domain_init(rt, rm_ready_order, resched, release); +} + +/* need_to_preempt - check whether the task t needs to be preempted + * call only with irqs disabled and with ready_lock acquired + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! + */ +int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t) +{ + /* we need the read lock for rm_ready_queue */ + /* no need to preempt if there is nothing pending */ + if (!__jobs_pending(rt)) + return 0; + /* we need to reschedule if t doesn't exist */ + if (!t) + return 1; + + /* NOTE: We cannot check for non-preemptibility since we + * don't know what address space we're currently in. + */ + + /* make sure to get non-rt stuff out of the way */ + return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t); +} diff --git a/litmus/rm_srt_common.c b/litmus/rm_srt_common.c new file mode 100644 index 000000000000..f58a8007678f --- /dev/null +++ b/litmus/rm_srt_common.c @@ -0,0 +1,167 @@ +/* + * kernel/rm_common.c + * + * Common functions for EDF based scheduler. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +/* rm_srt_higher_prio - returns true if first has a higher EDF priority + * than second. Deadline ties are broken by PID. + * + * both first and second may be NULL + */ +int rm_srt_higher_prio(struct task_struct* first, + struct task_struct* second) +{ + struct task_struct *first_task = first; + struct task_struct *second_task = second; + + /* There is no point in comparing a task to itself. */ + if (first && first == second) { + TRACE_TASK(first, + "WARNING: pointless edf priority comparison.\n"); + return 0; + } + + + /* check for NULL tasks */ + if (!first || !second) + return first && !second; + +#ifdef CONFIG_LITMUS_LOCKING + + /* Check for inherited priorities. Change task + * used for comparison in such a case. + */ + if (unlikely(first->rt_param.inh_task)) + first_task = first->rt_param.inh_task; + if (unlikely(second->rt_param.inh_task)) + second_task = second->rt_param.inh_task; + + /* Check for priority boosting. Tie-break by start of boosting. + */ + if (unlikely(is_priority_boosted(first_task))) { + /* first_task is boosted, how about second_task? */ + if (!is_priority_boosted(second_task) || + lt_before(get_boost_start(first_task), + get_boost_start(second_task))) + return 1; + else + return 0; + } else if (unlikely(is_priority_boosted(second_task))) + /* second_task is boosted, first is not*/ + return 0; + +#endif + + if (!is_realtime(second_task)) + return true; + + if (shorter_period(first_task, second_task)) + return true; + + if (get_rt_period(first_task) == get_rt_period(second_task)) + { + if (earlier_deadline(first_task, second_task)) + { + return true; + } + if(get_deadline(first_task) == get_deadline(second_task)) + { +#ifdef CONFIG_LITMUS_SOFTIRQD + if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread) + { + return true; + } + if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) + { +#endif + if (first_task->pid < second_task->pid) + { + return true; + } + if (first_task->pid == second_task->pid) + { + return !second->rt_param.inh_task; + } +#ifdef CONFIG_LITMUS_SOFTIRQD + } +#endif + } + } + + return false; + +#if 0 + return !is_realtime(second_task) || + shorter_period(first_task, second_task) || + ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task)) + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy threads always lose w/o inheritance. */ + (first_task->rt_param.is_proxy_thread < + second_task->rt_param.is_proxy_thread) || +#endif + + /* is the period of the first task shorter? + * Then it has higher priority. + */ + shorter_period(first_task, second_task) || + + (earlier_deadline(first_task, second_task) || + + /* Do we have a deadline tie? + * Then break by PID. + */ + (get_rt_period(first_task) == get_rt_period(second_task) && + (first_task->pid < second_task->pid || + + /* If the PIDs are the same then the task with the inherited + * priority wins. + */ + (first_task->pid == second_task->pid && + !second->rt_param.inh_task))); +#endif +} + +int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b) +{ + return rm_srt_higher_prio(bheap2task(a), bheap2task(b)); +} + +void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release) +{ + rt_domain_init(rt, rm_srt_ready_order, resched, release); +} + +/* need_to_preempt - check whether the task t needs to be preempted + * call only with irqs disabled and with ready_lock acquired + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! + */ +int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t) +{ + /* we need the read lock for rm_ready_queue */ + /* no need to preempt if there is nothing pending */ + if (!__jobs_pending(rt)) + return 0; + /* we need to reschedule if t doesn't exist */ + if (!t) + return 1; + + /* NOTE: We cannot check for non-preemptibility since we + * don't know what address space we're currently in. + */ + + /* make sure to get non-rt stuff out of the way */ + return !is_realtime(t) || rm_srt_higher_prio(__next_ready(rt), t); +} diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 9b0a8d3b624d..f0356de60b2f 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -55,6 +55,10 @@ #include #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#endif + #ifdef CONFIG_LITMUS_NVIDIA #include #endif @@ -91,6 +95,15 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; +#endif + /* * In C-EDF there is a cedf domain _per_ cluster * The number of clusters is dynamically determined accordingly to the @@ -108,6 +121,12 @@ typedef struct clusterdomain { struct bheap cpu_heap; /* lock for this cluster */ #define cedf_lock domain.ready_lock + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + struct tasklet_head pending_tasklets; +#endif + } cedf_domain_t; /* a cedf_domain per cluster; allocation is done at init/activation time */ @@ -395,6 +414,198 @@ static void cedf_tick(struct task_struct* t) } } + + + + + + + + + + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + + +void __do_lit_tasklet(struct tasklet_struct* tasklet) +{ + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d.\n", __FUNCTION__, tasklet->owner->pid); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + +} + +void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* next) +{ + int work_to_do = 1; + struct tasklet_struct *tasklet = NULL; + + TRACE("%s: entered.\n", __FUNCTION__); + + while(work_to_do) { + // remove tasklet at head of list if it has higher priority. + raw_spin_lock(&cluster->cedf_lock); + // remove tasklet at head. + if(cluster->pending_tasklets.head != NULL) { + tasklet = cluster->pending_tasklets.head; + + if(edf_higher_prio(tasklet->owner, next)) { + // remove the tasklet from the queue + cluster->pending_tasklets.head = tasklet->next; + + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + } + else { + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); + tasklet = NULL; + } + } + else { + //TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + } + raw_spin_unlock(&cluster->cedf_lock); + + if(tasklet) { + __do_lit_tasklet(tasklet); + tasklet = NULL; + } + else { + work_to_do = 0; + } + } + + TRACE("%s: exited.\n", __FUNCTION__); +} + + +void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) +{ + struct tasklet_struct* step; + + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n"); + while(step != NULL){ + TRACE("%s: %d\n", __FUNCTION__, step->owner); + step = step->next; + } + TRACE("%s: done.\n", __FUNCTION__); + + + step = cluster->pending_tasklets.head; + if(step == NULL) { + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); + // insert at tail. + tasklet->next = NULL; + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &tasklet->next; + } + else if((*cluster->pending_tasklets.tail != NULL) && + edf_higher_prio((*cluster->pending_tasklets.tail)->owner, tasklet->owner)) { + // insert at tail. + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); + + tasklet->next = NULL; + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &tasklet->next; + } + else { + // insert the tasklet somewhere in the middle. + + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { + step = step->next; + } + + // insert tasklet right before step->next. + + TRACE("%s: tasklet belongs at end. inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + + tasklet->next = step->next; + step->next = tasklet; + + // patch up the head if needed. + if(cluster->pending_tasklets.head == step) + { + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.head = tasklet; + } + } + + + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %d\n", __FUNCTION__, step->owner); + step = step->next; + } + TRACE("%s: done.\n", __FUNCTION__); + +// TODO: Maintain this list in priority order. +// tasklet->next = NULL; +// *(cluster->pending_tasklets.tail) = tasklet; +// cluster->pending_tasklets.tail = &tasklet->next; +} + +int enqueue_pai_tasklet(struct tasklet_struct* tasklet) +{ + cedf_domain_t* cluster = task_cpu_cluster(tasklet->owner); + cpu_entry_t *lowest; + unsigned long flags; + + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); + + lowest = lowest_prio_cpu(cluster); + if (edf_higher_prio(tasklet->owner, lowest->linked)) { + if (smp_processor_id() == lowest->cpu) { + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + // execute the tasklet now. + __do_lit_tasklet(tasklet); + } + else { + // preempt the lowest CPU + __add_pai_tasklet(tasklet, cluster); + + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, lowest->cpu); + + preempt(lowest); + } + } + + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); + + return(1); // success +} + + +#endif + + + + + + + + + + + + + + + + + /* Getting schedule() right is a bit tricky. schedule() may not make any * assumptions on the state of the current task since it may be called for a * number of reasons. The reasons include a scheduler_tick() determined that it @@ -507,8 +718,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) next = prev; sched_state_task_picked(); + raw_spin_unlock(&cluster->cedf_lock); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + do_lit_tasklets(cluster, next); +#endif + #ifdef WANT_ALL_SCHED_EVENTS TRACE("cedf_lock released, next=0x%p\n", next); @@ -518,7 +734,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) TRACE("becomes idle at %llu.\n", litmus_clock()); #endif - return next; } @@ -1467,6 +1682,13 @@ static long cedf_activate_plugin(void) bheap_init(&(cedf[i].cpu_heap)); edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + cedf[i].pending_tasklets.head = NULL; + cedf[i].pending_tasklets.tail = &cedf[i].pending_tasklets.head; +#endif + + if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) return -ENOMEM; } @@ -1578,7 +1800,10 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { #ifdef CONFIG_LITMUS_SOFTIRQD .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, -#endif +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = enqueue_pai_tasklet, +#endif }; static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; diff --git a/litmus/sched_cfifo.c b/litmus/sched_cfifo.c new file mode 100644 index 000000000000..f515446f76ed --- /dev/null +++ b/litmus/sched_cfifo.c @@ -0,0 +1,1611 @@ +/* + * litmus/sched_cfifo.c + * + * Implementation of the C-FIFO scheduling algorithm. + * + * This implementation is based on G-EDF: + * - CPUs are clustered around L2 or L3 caches. + * - Clusters topology is automatically detected (this is arch dependent + * and is working only on x86 at the moment --- and only with modern + * cpus that exports cpuid4 information) + * - The plugins _does not_ attempt to put tasks in the right cluster i.e. + * the programmer needs to be aware of the topology to place tasks + * in the desired cluster + * - default clustering is around L2 cache (cache index = 2) + * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all + * online_cpus are placed in a single cluster). + * + * For details on functions, take a look at sched_gsn_edf.c + * + * Currently, we do not support changes in the number of online cpus. + * If the num_online_cpus() dynamically changes, the plugin is broken. + * + * This version uses the simple approach and serializes all scheduling + * decisions by the use of a queue lock. This is probably not the + * best way to do it, but it should suffice for now. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +/* to configure the cluster size */ +#include + +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + +/* Reference configuration variable. Determines which cache level is used to + * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that + * all CPUs form a single cluster (just like GSN-EDF). + */ +static enum cache_level cluster_config = GLOBAL_CLUSTER; + +struct clusterdomain; + +/* cpu_entry_t - maintain the linked and scheduled state + * + * A cpu also contains a pointer to the cfifo_domain_t cluster + * that owns it (struct clusterdomain*) + */ +typedef struct { + int cpu; + struct clusterdomain* cluster; /* owning cluster */ + struct task_struct* linked; /* only RT tasks */ + struct task_struct* scheduled; /* only RT tasks */ + atomic_t will_schedule; /* prevent unneeded IPIs */ + struct bheap_node* hn; +} cpu_entry_t; + +/* one cpu_entry_t per CPU */ +DEFINE_PER_CPU(cpu_entry_t, cfifo_cpu_entries); + +#define set_will_schedule() \ + (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 1)) +#define clear_will_schedule() \ + (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 0)) +#define test_will_schedule(cpu) \ + (atomic_read(&per_cpu(cfifo_cpu_entries, cpu).will_schedule)) + +/* + * In C-FIFO there is a cfifo domain _per_ cluster + * The number of clusters is dynamically determined accordingly to the + * total cpu number and the cluster size + */ +typedef struct clusterdomain { + /* rt_domain for this cluster */ + rt_domain_t domain; + /* cpus in this cluster */ + cpu_entry_t* *cpus; + /* map of this cluster cpus */ + cpumask_var_t cpu_map; + /* the cpus queue themselves according to priority in here */ + struct bheap_node *heap_node; + struct bheap cpu_heap; + /* lock for this cluster */ +#define cfifo_lock domain.ready_lock +} cfifo_domain_t; + +/* a cfifo_domain per cluster; allocation is done at init/activation time */ +cfifo_domain_t *cfifo; + +#define remote_cluster(cpu) ((cfifo_domain_t *) per_cpu(cfifo_cpu_entries, cpu).cluster) +#define task_cpu_cluster(task) remote_cluster(get_partition(task)) + +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose + * information during the initialization of the plugin (e.g., topology) +#define WANT_ALL_SCHED_EVENTS + */ +#define VERBOSE_INIT + +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) +{ + cpu_entry_t *a, *b; + a = _a->value; + b = _b->value; + /* Note that a and b are inverted: we want the lowest-priority CPU at + * the top of the heap. + */ + return fifo_higher_prio(b->linked, a->linked); +} + +/* update_cpu_position - Move the cpu entry to the correct place to maintain + * order in the cpu queue. Caller must hold cfifo lock. + */ +static void update_cpu_position(cpu_entry_t *entry) +{ + cfifo_domain_t *cluster = entry->cluster; + + if (likely(bheap_node_in_heap(entry->hn))) + bheap_delete(cpu_lower_prio, + &cluster->cpu_heap, + entry->hn); + + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); +} + +/* caller must hold cfifo lock */ +static cpu_entry_t* lowest_prio_cpu(cfifo_domain_t *cluster) +{ + struct bheap_node* hn; + hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); + return hn->value; +} + + +/* link_task_to_cpu - Update the link of a CPU. + * Handles the case where the to-be-linked task is already + * scheduled on a different CPU. + */ +static noinline void link_task_to_cpu(struct task_struct* linked, + cpu_entry_t *entry) +{ + cpu_entry_t *sched; + struct task_struct* tmp; + int on_cpu; + + BUG_ON(linked && !is_realtime(linked)); + + /* Currently linked task is set to be unlinked. */ + if (entry->linked) { + entry->linked->rt_param.linked_on = NO_CPU; + } + + /* Link new task to CPU. */ + if (linked) { + set_rt_flags(linked, RT_F_RUNNING); + /* handle task is already scheduled somewhere! */ + on_cpu = linked->rt_param.scheduled_on; + if (on_cpu != NO_CPU) { + sched = &per_cpu(cfifo_cpu_entries, on_cpu); + /* this should only happen if not linked already */ + BUG_ON(sched->linked == linked); + + /* If we are already scheduled on the CPU to which we + * wanted to link, we don't need to do the swap -- + * we just link ourselves to the CPU and depend on + * the caller to get things right. + */ + if (entry != sched) { + TRACE_TASK(linked, + "already scheduled on %d, updating link.\n", + sched->cpu); + tmp = sched->linked; + linked->rt_param.linked_on = sched->cpu; + sched->linked = linked; + update_cpu_position(sched); + linked = tmp; + } + } + if (linked) /* might be NULL due to swap */ + linked->rt_param.linked_on = entry->cpu; + } + entry->linked = linked; +#ifdef WANT_ALL_SCHED_EVENTS + if (linked) + TRACE_TASK(linked, "linked to %d.\n", entry->cpu); + else + TRACE("NULL linked to %d.\n", entry->cpu); +#endif + update_cpu_position(entry); +} + +/* unlink - Make sure a task is not linked any longer to an entry + * where it was linked before. Must hold cfifo_lock. + */ +static noinline void unlink(struct task_struct* t) +{ + cpu_entry_t *entry; + + if (t->rt_param.linked_on != NO_CPU) { + /* unlink */ + entry = &per_cpu(cfifo_cpu_entries, t->rt_param.linked_on); + t->rt_param.linked_on = NO_CPU; + link_task_to_cpu(NULL, entry); + } else if (is_queued(t)) { + /* This is an interesting situation: t is scheduled, + * but was just recently unlinked. It cannot be + * linked anywhere else (because then it would have + * been relinked to this CPU), thus it must be in some + * queue. We must remove it from the list in this + * case. + * + * in C-FIFO case is should be somewhere in the queue for + * its domain, therefore and we can get the domain using + * task_cpu_cluster + */ + remove(&(task_cpu_cluster(t))->domain, t); + } +} + + +/* preempt - force a CPU to reschedule + */ +static void preempt(cpu_entry_t *entry) +{ + preempt_if_preemptable(entry->scheduled, entry->cpu); +} + +/* requeue - Put an unlinked task into gsn-edf domain. + * Caller must hold cfifo_lock. + */ +static noinline void requeue(struct task_struct* task) +{ + cfifo_domain_t *cluster = task_cpu_cluster(task); + BUG_ON(!task); + /* sanity check before insertion */ + BUG_ON(is_queued(task)); + + if (is_released(task, litmus_clock())) + __add_ready(&cluster->domain, task); + else { + /* it has got to wait */ + add_release(&cluster->domain, task); + } +} + +#ifdef CONFIG_SCHED_CPU_AFFINITY +static cpu_entry_t* cfifo_get_nearest_available_cpu( + cfifo_domain_t *cluster, cpu_entry_t* start) +{ + cpu_entry_t* affinity; + + get_nearest_available_cpu(affinity, start, cfifo_cpu_entries, -1); + + /* make sure CPU is in our cluster */ + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) + return(affinity); + else + return(NULL); +} +#endif + + +/* check for any necessary preemptions */ +static void check_for_preemptions(cfifo_domain_t *cluster) +{ + struct task_struct *task; + cpu_entry_t *last; + + for(last = lowest_prio_cpu(cluster); + fifo_preemption_needed(&cluster->domain, last->linked); + last = lowest_prio_cpu(cluster)) { + /* preemption necessary */ + task = __take_ready(&cluster->domain); +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = + cfifo_get_nearest_available_cpu(cluster, + &per_cpu(cfifo_cpu_entries, task_cpu(task))); + if(affinity) + last = affinity; + else if(last->linked) + requeue(last->linked); + } +#else + if (last->linked) + requeue(last->linked); +#endif + TRACE("check_for_preemptions: attempting to link task %d to %d\n", + task->pid, last->cpu); + link_task_to_cpu(task, last); + preempt(last); + } +} + +/* cfifo_job_arrival: task is either resumed or released */ +static noinline void cfifo_job_arrival(struct task_struct* task) +{ + cfifo_domain_t *cluster = task_cpu_cluster(task); + BUG_ON(!task); + + requeue(task); + check_for_preemptions(cluster); +} + +static void cfifo_release_jobs(rt_domain_t* rt, struct bheap* tasks) +{ + cfifo_domain_t* cluster = container_of(rt, cfifo_domain_t, domain); + unsigned long flags; + + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + + __merge_ready(&cluster->domain, tasks); + check_for_preemptions(cluster); + + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); +} + +/* caller holds cfifo_lock */ +static noinline void job_completion(struct task_struct *t, int forced) +{ + BUG_ON(!t); + + sched_trace_task_completion(t, forced); + +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + + TRACE_TASK(t, "job_completion().\n"); + + /* set flags */ + set_rt_flags(t, RT_F_SLEEP); + /* prepare for next period */ + prepare_for_next_period(t); + if (is_released(t, litmus_clock())) + sched_trace_task_release(t); + /* unlink */ + unlink(t); + /* requeue + * But don't requeue a blocking task. */ + if (is_running(t)) + cfifo_job_arrival(t); +} + +/* cfifo_tick - this function is called for every local timer + * interrupt. + * + * checks whether the current task has expired and checks + * whether we need to preempt it if it has not expired + */ +static void cfifo_tick(struct task_struct* t) +{ + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { + if (!is_np(t)) { + /* np tasks will be preempted when they become + * preemptable again + */ + litmus_reschedule_local(); + set_will_schedule(); + TRACE("cfifo_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } else if (is_user_np(t)) { + TRACE("cfifo_scheduler_tick: " + "%d is non-preemptable, " + "preemption delayed.\n", t->pid); + request_exit_np(t); + } + } +} + +/* Getting schedule() right is a bit tricky. schedule() may not make any + * assumptions on the state of the current task since it may be called for a + * number of reasons. The reasons include a scheduler_tick() determined that it + * was necessary, because sys_exit_np() was called, because some Linux + * subsystem determined so, or even (in the worst case) because there is a bug + * hidden somewhere. Thus, we must take extreme care to determine what the + * current state is. + * + * The CPU could currently be scheduling a task (or not), be linked (or not). + * + * The following assertions for the scheduled task could hold: + * + * - !is_running(scheduled) // the job blocks + * - scheduled->timeslice == 0 // the job completed (forcefully) + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) + * - linked != scheduled // we need to reschedule (for any reason) + * - is_np(scheduled) // rescheduling must be delayed, + * sys_exit_np must be requested + * + * Any of these can occur together. + */ +static struct task_struct* cfifo_schedule(struct task_struct * prev) +{ + cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries); + cfifo_domain_t *cluster = entry->cluster; + int out_of_time, sleep, preempt, np, exists, blocks; + struct task_struct* next = NULL; + + raw_spin_lock(&cluster->cfifo_lock); + clear_will_schedule(); + + /* sanity checking */ + BUG_ON(entry->scheduled && entry->scheduled != prev); + BUG_ON(entry->scheduled && !is_realtime(prev)); + BUG_ON(is_realtime(prev) && !entry->scheduled); + + /* (0) Determine state */ + exists = entry->scheduled != NULL; + blocks = exists && !is_running(entry->scheduled); + out_of_time = exists && + budget_enforced(entry->scheduled) && + budget_exhausted(entry->scheduled); + np = exists && is_np(entry->scheduled); + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; + preempt = entry->scheduled != entry->linked; + +#ifdef WANT_ALL_SCHED_EVENTS + TRACE_TASK(prev, "invoked cfifo_schedule.\n"); +#endif + + if (exists) + TRACE_TASK(prev, + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " + "state:%d sig:%d\n", + blocks, out_of_time, np, sleep, preempt, + prev->state, signal_pending(prev)); + if (entry->linked && preempt) + TRACE_TASK(prev, "will be preempted by %s/%d\n", + entry->linked->comm, entry->linked->pid); + + + /* If a task blocks we have no choice but to reschedule. + */ + if (blocks) + unlink(entry->scheduled); + + /* Request a sys_exit_np() call if we would like to preempt but cannot. + * We need to make sure to update the link structure anyway in case + * that we are still linked. Multiple calls to request_exit_np() don't + * hurt. + */ + if (np && (out_of_time || preempt || sleep)) { + unlink(entry->scheduled); + request_exit_np(entry->scheduled); + } + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. Don't do a job completion if we block (can't have timers running + * for blocked jobs). Preemption go first for the same reason. + */ + if (!np && (out_of_time || sleep) && !blocks && !preempt) + job_completion(entry->scheduled, !sleep); + + /* Link pending task if we became unlinked. + */ + if (!entry->linked) + link_task_to_cpu(__take_ready(&cluster->domain), entry); + + /* The final scheduling decision. Do we need to switch for some reason? + * If linked is different from scheduled, then select linked as next. + */ + if ((!np || blocks) && + entry->linked != entry->scheduled) { + /* Schedule a linked job? */ + if (entry->linked) { + entry->linked->rt_param.scheduled_on = entry->cpu; + next = entry->linked; + } + if (entry->scheduled) { + /* not gonna be scheduled soon */ + entry->scheduled->rt_param.scheduled_on = NO_CPU; + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); + } + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + sched_state_task_picked(); + raw_spin_unlock(&cluster->cfifo_lock); + +#ifdef WANT_ALL_SCHED_EVENTS + TRACE("cfifo_lock released, next=0x%p\n", next); + + if (next) + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + else if (exists && !next) + TRACE("becomes idle at %llu.\n", litmus_clock()); +#endif + + + return next; +} + + +/* _finish_switch - we just finished the switch away from prev + */ +static void cfifo_finish_switch(struct task_struct *prev) +{ + cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries); + + entry->scheduled = is_realtime(current) ? current : NULL; +#ifdef WANT_ALL_SCHED_EVENTS + TRACE_TASK(prev, "switched away from\n"); +#endif +} + + +/* Prepare a task for running in RT mode + */ +static void cfifo_task_new(struct task_struct * t, int on_rq, int running) +{ + unsigned long flags; + cpu_entry_t* entry; + cfifo_domain_t* cluster; + + TRACE("gsn edf: task new %d\n", t->pid); + + /* the cluster doesn't change even if t is running */ + cluster = task_cpu_cluster(t); + + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + + /* setup job params */ + release_at(t, litmus_clock()); + + if (running) { + entry = &per_cpu(cfifo_cpu_entries, task_cpu(t)); + BUG_ON(entry->scheduled); + + entry->scheduled = t; + tsk_rt(t)->scheduled_on = task_cpu(t); + } else { + t->rt_param.scheduled_on = NO_CPU; + } + t->rt_param.linked_on = NO_CPU; + + cfifo_job_arrival(t); + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); +} + +static void cfifo_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + //lt_t now; + cfifo_domain_t *cluster; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + + cluster = task_cpu_cluster(task); + + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + +#if 0 // sporadic task model + /* We need to take suspensions because of semaphores into + * account! If a job resumes after being suspended due to acquiring + * a semaphore, it should never be treated as a new job release. + */ + if (get_rt_flags(task) == RT_F_EXIT_SEM) { + set_rt_flags(task, RT_F_RUNNING); + } else { + now = litmus_clock(); + if (is_tardy(task, now)) { + /* new sporadic release */ + release_at(task, now); + sched_trace_task_release(task); + } + else { + if (task->rt.time_slice) { + /* came back in time before deadline + */ + set_rt_flags(task, RT_F_RUNNING); + } + } + } +#endif + + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); + set_rt_flags(task, RT_F_RUNNING); // periodic model + + if(tsk_rt(task)->linked_on == NO_CPU) + cfifo_job_arrival(task); + else + TRACE("WTF, mate?!\n"); + + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); +} + +static void cfifo_task_block(struct task_struct *t) +{ + unsigned long flags; + cfifo_domain_t *cluster; + + TRACE_TASK(t, "block at %llu\n", litmus_clock()); + + cluster = task_cpu_cluster(t); + + /* unlink if necessary */ + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + unlink(t); + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); + + BUG_ON(!is_realtime(t)); +} + + +static void cfifo_task_exit(struct task_struct * t) +{ + unsigned long flags; + cfifo_domain_t *cluster = task_cpu_cluster(t); + + /* unlink if necessary */ + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + unlink(t); + if (tsk_rt(t)->scheduled_on != NO_CPU) { + cpu_entry_t *cpu; + cpu = &per_cpu(cfifo_cpu_entries, tsk_rt(t)->scheduled_on); + cpu->scheduled = NULL; + tsk_rt(t)->scheduled_on = NO_CPU; + } + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); + + BUG_ON(!is_realtime(t)); + TRACE_TASK(t, "RIP\n"); +} + +static long cfifo_admit_task(struct task_struct* tsk) +{ + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; +} + + + + + + + + + + + + + +#ifdef CONFIG_LITMUS_LOCKING + +#include + + +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + int linked_on; + int check_preempt = 0; + + cfifo_domain_t* cluster = task_cpu_cluster(t); + + if(prio_inh != NULL) + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "inherits priority from %p\n", prio_inh); + + sched_trace_eff_prio_change(t, prio_inh); + + tsk_rt(t)->inh_task = prio_inh; + + linked_on = tsk_rt(t)->linked_on; + + /* If it is scheduled, then we need to reorder the CPU heap. */ + if (linked_on != NO_CPU) { + TRACE_TASK(t, "%s: linked on %d\n", + __FUNCTION__, linked_on); + /* Holder is scheduled; need to re-order CPUs. + * We can't use heap_decrease() here since + * the cpu_heap is ordered in reverse direction, so + * it is actually an increase. */ + bheap_delete(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(cfifo_cpu_entries, linked_on).hn); + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(cfifo_cpu_entries, linked_on).hn); + } else { + /* holder may be queued: first stop queue changes */ + raw_spin_lock(&cluster->domain.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); + + /* We need to update the position of holder in some + * heap. Note that this could be a release heap if we + * budget enforcement is used and this job overran. */ + check_preempt = !bheap_decrease(fifo_ready_order, tsk_rt(t)->heap_node); + + } else { + /* Nothing to do: if it is not queued and not linked + * then it is either sleeping or currently being moved + * by other code (e.g., a timer interrupt handler) that + * will use the correct priority when enqueuing the + * task. */ + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); + } + raw_spin_unlock(&cluster->domain.release_lock); + + /* If holder was enqueued in a release heap, then the following + * preemption check is pointless, but we can't easily detect + * that case. If you want to fix this, then consider that + * simply adding a state flag requires O(n) time to update when + * releasing n tasks, which conflicts with the goal to have + * O(log n) merges. */ + if (check_preempt) { + /* heap_decrease() hit the top level of the heap: make + * sure preemption checks get the right task, not the + * potentially stale cache. */ + bheap_uncache_min(fifo_ready_order, &cluster->domain.ready_queue); + check_for_preemptions(cluster); + } + } +} + +/* called with IRQs off */ +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + cfifo_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cfifo_lock); + + __set_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&cluster->cfifo_lock); +} + + +/* called with IRQs off */ +static void __clear_priority_inheritance(struct task_struct* t) +{ + TRACE_TASK(t, "priority restored\n"); + + if(tsk_rt(t)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(t, NULL); + + tsk_rt(t)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + cfifo_job_arrival(t); + } + else + { + __set_priority_inheritance(t, NULL); + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); + + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(tsk_rt(t)->cur_klitirqd); + cfifo_job_arrival(tsk_rt(t)->cur_klitirqd); + } + else + { + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); + } + } +#endif +} + +/* called with IRQs off */ +static void clear_priority_inheritance(struct task_struct* t) +{ + cfifo_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cfifo_lock); + __clear_priority_inheritance(t); + raw_spin_unlock(&cluster->cfifo_lock); +} + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + cfifo_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cfifo_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); + + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __set_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&cluster->cfifo_lock); +} + +/* called with IRQs off */ +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ + cfifo_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cfifo_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) + { + tsk_rt(klitirqd)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(klitirqd); + cfifo_job_arrival(klitirqd); + } + else + { + __set_priority_inheritance(klitirqd, NULL); + } + + tsk_rt(old_owner)->cur_klitirqd = NULL; + + raw_spin_unlock(&cluster->cfifo_lock); +} +#endif // CONFIG_LITMUS_SOFTIRQD + + +/* ******************** KFMLP support ********************** */ + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + if (queued != skip && fifo_higher_prio(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest( + struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + }while(step != search_start); + + return(shortest); +} + +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct kfmlp_queue *my_queue = NULL; + struct task_struct *max_hp = NULL; + + + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((my_queue == NULL) || + (fifo_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + { + my_queue = &sem->queues[i]; + } + } + + if(my_queue) + { + cfifo_domain_t* cluster; + + max_hp = my_queue->hp_waiter; + BUG_ON(!max_hp); + + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + max_hp->comm, max_hp->pid, + kfmlp_get_idx(sem, my_queue)); + + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); + + /* + if(my_queue->hp_waiter) + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->hp_waiter->comm, + my_queue->hp_waiter->pid); + else + TRACE_CUR("queue %d: new hp_waiter is %p\n", + kfmlp_get_idx(sem, my_queue), NULL); + */ + + cluster = task_cpu_cluster(max_hp); + + raw_spin_lock(&cluster->cfifo_lock); + + /* + if(my_queue->owner) + TRACE_CUR("queue %d: owner is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->owner->comm, + my_queue->owner->pid); + else + TRACE_CUR("queue %d: owner is %p\n", + kfmlp_get_idx(sem, my_queue), + NULL); + */ + + if(tsk_rt(my_queue->owner)->inh_task == max_hp) + { + __clear_priority_inheritance(my_queue->owner); + if(my_queue->hp_waiter != NULL) + { + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + raw_spin_unlock(&cluster->cfifo_lock); + + list_for_each(pos, &my_queue->wait.task_list) + { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + /* Compare task prios, find high prio task. */ + if (queued == max_hp) + { + /* + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", + kfmlp_get_idx(sem, my_queue)); + */ + __remove_wait_queue(&my_queue->wait, + list_entry(pos, wait_queue_t, task_list)); + break; + } + } + --(my_queue->count); + } + + return(max_hp); +} + +int cfifo_kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = sem->shortest_queue; + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", + kfmlp_get_idx(sem, my_queue)); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + /* check if we need to activate priority inheritance */ + if (fifo_higher_prio(t, my_queue->hp_waiter)) + { + my_queue->hp_waiter = t; + if (fifo_higher_prio(t, my_queue->owner)) + { + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) + { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else + { + /* this case may happen if our wait entry was stolen + between queues. record where we went.*/ + my_queue = kfmlp_get_queue(sem, t); + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else + { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + spin_unlock_irqrestore(&sem->lock, flags); + } + + return kfmlp_get_idx(sem, my_queue); +} + +int cfifo_kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + /* next becomes the resouce holder */ + my_queue->owner = next; + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + set_priority_inheritance(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + + /* + if(next) + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + + my_queue->owner = next; + + if(next) + { + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + } + } + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + clear_priority_inheritance(t); + +out: + spin_unlock_irqrestore(&sem->lock, flags); + + return err; +} + +int cfifo_kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + cfifo_kfmlp_unlock(l); + + return 0; +} + +void cfifo_kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + +static struct litmus_lock_ops cfifo_kfmlp_lock_ops = { + .close = cfifo_kfmlp_close, + .lock = cfifo_kfmlp_lock, + .unlock = cfifo_kfmlp_unlock, + .deallocate = cfifo_kfmlp_free, +}; + +static struct litmus_lock* cfifo_new_kfmlp(void* __user arg, int* ret_code) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(num_resources < 1) + { + *ret_code = -EINVAL; + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + *ret_code = -ENOMEM; + return NULL; + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + *ret_code = -ENOMEM; + return NULL; + } + + sem->litmus_lock.ops = &cfifo_kfmlp_lock_ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + + *ret_code = 0; + return &sem->litmus_lock; +} + + +/* **** lock constructor **** */ + +static long cfifo_allocate_lock(struct litmus_lock **lock, int type, + void* __user arg) +{ + int err = -ENXIO; + + /* C-FIFO currently only supports the FMLP for global resources + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ + switch (type) { + case KFMLP_SEM: + *lock = cfifo_new_kfmlp(arg, &err); + break; + }; + + return err; +} + +#endif // CONFIG_LITMUS_LOCKING + + + + + + +/* total number of cluster */ +static int num_clusters; +/* we do not support cluster of different sizes */ +static unsigned int cluster_size; + +#ifdef VERBOSE_INIT +static void print_cluster_topology(cpumask_var_t mask, int cpu) +{ + int chk; + char buf[255]; + + chk = cpulist_scnprintf(buf, 254, mask); + buf[chk] = '\0'; + printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); + +} +#endif + +static int clusters_allocated = 0; + +static void cleanup_cfifo(void) +{ + int i; + + if (clusters_allocated) { + for (i = 0; i < num_clusters; i++) { + kfree(cfifo[i].cpus); + kfree(cfifo[i].heap_node); + free_cpumask_var(cfifo[i].cpu_map); + } + + kfree(cfifo); + } +} + +static long cfifo_activate_plugin(void) +{ + int i, j, cpu, ccpu, cpu_count; + cpu_entry_t *entry; + + cpumask_var_t mask; + int chk = 0; + + /* de-allocate old clusters, if any */ + cleanup_cfifo(); + + printk(KERN_INFO "C-FIFO: Activate Plugin, cluster configuration = %d\n", + cluster_config); + + /* need to get cluster_size first */ + if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) + return -ENOMEM; + + if (unlikely(cluster_config == GLOBAL_CLUSTER)) { + cluster_size = num_online_cpus(); + } else { + chk = get_shared_cpu_map(mask, 0, cluster_config); + if (chk) { + /* if chk != 0 then it is the max allowed index */ + printk(KERN_INFO "C-FIFO: Cluster configuration = %d " + "is not supported on this hardware.\n", + cluster_config); + /* User should notice that the configuration failed, so + * let's bail out. */ + return -EINVAL; + } + + cluster_size = cpumask_weight(mask); + } + + if ((num_online_cpus() % cluster_size) != 0) { + /* this can't be right, some cpus are left out */ + printk(KERN_ERR "C-FIFO: Trying to group %d cpus in %d!\n", + num_online_cpus(), cluster_size); + return -1; + } + + num_clusters = num_online_cpus() / cluster_size; + printk(KERN_INFO "C-FIFO: %d cluster(s) of size = %d\n", + num_clusters, cluster_size); + + /* initialize clusters */ + cfifo = kmalloc(num_clusters * sizeof(cfifo_domain_t), GFP_ATOMIC); + for (i = 0; i < num_clusters; i++) { + + cfifo[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), + GFP_ATOMIC); + cfifo[i].heap_node = kmalloc( + cluster_size * sizeof(struct bheap_node), + GFP_ATOMIC); + bheap_init(&(cfifo[i].cpu_heap)); + fifo_domain_init(&(cfifo[i].domain), NULL, cfifo_release_jobs); + + if(!zalloc_cpumask_var(&cfifo[i].cpu_map, GFP_ATOMIC)) + return -ENOMEM; + } + + /* cycle through cluster and add cpus to them */ + for (i = 0; i < num_clusters; i++) { + + for_each_online_cpu(cpu) { + /* check if the cpu is already in a cluster */ + for (j = 0; j < num_clusters; j++) + if (cpumask_test_cpu(cpu, cfifo[j].cpu_map)) + break; + /* if it is in a cluster go to next cpu */ + if (j < num_clusters && + cpumask_test_cpu(cpu, cfifo[j].cpu_map)) + continue; + + /* this cpu isn't in any cluster */ + /* get the shared cpus */ + if (unlikely(cluster_config == GLOBAL_CLUSTER)) + cpumask_copy(mask, cpu_online_mask); + else + get_shared_cpu_map(mask, cpu, cluster_config); + + cpumask_copy(cfifo[i].cpu_map, mask); +#ifdef VERBOSE_INIT + print_cluster_topology(mask, cpu); +#endif + /* add cpus to current cluster and init cpu_entry_t */ + cpu_count = 0; + for_each_cpu(ccpu, cfifo[i].cpu_map) { + + entry = &per_cpu(cfifo_cpu_entries, ccpu); + cfifo[i].cpus[cpu_count] = entry; + atomic_set(&entry->will_schedule, 0); + entry->cpu = ccpu; + entry->cluster = &cfifo[i]; + entry->hn = &(cfifo[i].heap_node[cpu_count]); + bheap_node_init(&entry->hn, entry); + + cpu_count++; + + entry->linked = NULL; + entry->scheduled = NULL; + update_cpu_position(entry); + } + /* done with this cluster */ + break; + } + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + { + /* distribute the daemons evenly across the clusters. */ + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; + int left_over = NR_LITMUS_SOFTIRQD % num_clusters; + + int daemon = 0; + for(i = 0; i < num_clusters; ++i) + { + int num_on_this_cluster = num_daemons_per_cluster; + if(left_over) + { + ++num_on_this_cluster; + --left_over; + } + + for(j = 0; j < num_on_this_cluster; ++j) + { + // first CPU of this cluster + affinity[daemon++] = i*cluster_size; + } + } + + spawn_klitirqd(affinity); + + kfree(affinity); + } +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif + + free_cpumask_var(mask); + clusters_allocated = 1; + return 0; +} + +/* Plugin object */ +static struct sched_plugin cfifo_plugin __cacheline_aligned_in_smp = { + .plugin_name = "C-FIFO", + .finish_switch = cfifo_finish_switch, + .tick = cfifo_tick, + .task_new = cfifo_task_new, + .complete_job = complete_job, + .task_exit = cfifo_task_exit, + .schedule = cfifo_schedule, + .task_wake_up = cfifo_task_wake_up, + .task_block = cfifo_task_block, + .admit_task = cfifo_admit_task, + .activate_plugin = cfifo_activate_plugin, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = cfifo_allocate_lock, + .set_prio_inh = set_priority_inheritance, + .clear_prio_inh = clear_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif +}; + +static struct proc_dir_entry *cluster_file = NULL, *cfifo_dir = NULL; + +static int __init init_cfifo(void) +{ + int err, fs; + + err = register_sched_plugin(&cfifo_plugin); + if (!err) { + fs = make_plugin_proc_dir(&cfifo_plugin, &cfifo_dir); + if (!fs) + cluster_file = create_cluster_file(cfifo_dir, &cluster_config); + else + printk(KERN_ERR "Could not allocate C-FIFO procfs dir.\n"); + } + return err; +} + +static void clean_cfifo(void) +{ + cleanup_cfifo(); + if (cluster_file) + remove_proc_entry("cluster", cfifo_dir); + if (cfifo_dir) + remove_plugin_proc_dir(&cfifo_plugin); +} + +module_init(init_cfifo); +module_exit(clean_cfifo); diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c new file mode 100644 index 000000000000..061b29eaff7e --- /dev/null +++ b/litmus/sched_crm.c @@ -0,0 +1,1611 @@ +/* + * litmus/sched_crm.c + * + * Implementation of the C-RM scheduling algorithm. + * + * This implementation is based on G-EDF: + * - CPUs are clustered around L2 or L3 caches. + * - Clusters topology is automatically detected (this is arch dependent + * and is working only on x86 at the moment --- and only with modern + * cpus that exports cpuid4 information) + * - The plugins _does not_ attempt to put tasks in the right cluster i.e. + * the programmer needs to be aware of the topology to place tasks + * in the desired cluster + * - default clustering is around L2 cache (cache index = 2) + * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all + * online_cpus are placed in a single cluster). + * + * For details on functions, take a look at sched_gsn_edf.c + * + * Currently, we do not support changes in the number of online cpus. + * If the num_online_cpus() dynamically changes, the plugin is broken. + * + * This version uses the simple approach and serializes all scheduling + * decisions by the use of a queue lock. This is probably not the + * best way to do it, but it should suffice for now. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +/* to configure the cluster size */ +#include + +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + +/* Reference configuration variable. Determines which cache level is used to + * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that + * all CPUs form a single cluster (just like GSN-EDF). + */ +static enum cache_level cluster_config = GLOBAL_CLUSTER; + +struct clusterdomain; + +/* cpu_entry_t - maintain the linked and scheduled state + * + * A cpu also contains a pointer to the crm_domain_t cluster + * that owns it (struct clusterdomain*) + */ +typedef struct { + int cpu; + struct clusterdomain* cluster; /* owning cluster */ + struct task_struct* linked; /* only RT tasks */ + struct task_struct* scheduled; /* only RT tasks */ + atomic_t will_schedule; /* prevent unneeded IPIs */ + struct bheap_node* hn; +} cpu_entry_t; + +/* one cpu_entry_t per CPU */ +DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries); + +#define set_will_schedule() \ + (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1)) +#define clear_will_schedule() \ + (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0)) +#define test_will_schedule(cpu) \ + (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule)) + +/* + * In C-RM there is a crm domain _per_ cluster + * The number of clusters is dynamically determined accordingly to the + * total cpu number and the cluster size + */ +typedef struct clusterdomain { + /* rt_domain for this cluster */ + rt_domain_t domain; + /* cpus in this cluster */ + cpu_entry_t* *cpus; + /* map of this cluster cpus */ + cpumask_var_t cpu_map; + /* the cpus queue themselves according to priority in here */ + struct bheap_node *heap_node; + struct bheap cpu_heap; + /* lock for this cluster */ +#define crm_lock domain.ready_lock +} crm_domain_t; + +/* a crm_domain per cluster; allocation is done at init/activation time */ +crm_domain_t *crm; + +#define remote_cluster(cpu) ((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster) +#define task_cpu_cluster(task) remote_cluster(get_partition(task)) + +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose + * information during the initialization of the plugin (e.g., topology) +#define WANT_ALL_SCHED_EVENTS + */ +#define VERBOSE_INIT + +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) +{ + cpu_entry_t *a, *b; + a = _a->value; + b = _b->value; + /* Note that a and b are inverted: we want the lowest-priority CPU at + * the top of the heap. + */ + return rm_higher_prio(b->linked, a->linked); +} + +/* update_cpu_position - Move the cpu entry to the correct place to maintain + * order in the cpu queue. Caller must hold crm lock. + */ +static void update_cpu_position(cpu_entry_t *entry) +{ + crm_domain_t *cluster = entry->cluster; + + if (likely(bheap_node_in_heap(entry->hn))) + bheap_delete(cpu_lower_prio, + &cluster->cpu_heap, + entry->hn); + + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); +} + +/* caller must hold crm lock */ +static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster) +{ + struct bheap_node* hn; + hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); + return hn->value; +} + + +/* link_task_to_cpu - Update the link of a CPU. + * Handles the case where the to-be-linked task is already + * scheduled on a different CPU. + */ +static noinline void link_task_to_cpu(struct task_struct* linked, + cpu_entry_t *entry) +{ + cpu_entry_t *sched; + struct task_struct* tmp; + int on_cpu; + + BUG_ON(linked && !is_realtime(linked)); + + /* Currently linked task is set to be unlinked. */ + if (entry->linked) { + entry->linked->rt_param.linked_on = NO_CPU; + } + + /* Link new task to CPU. */ + if (linked) { + set_rt_flags(linked, RT_F_RUNNING); + /* handle task is already scheduled somewhere! */ + on_cpu = linked->rt_param.scheduled_on; + if (on_cpu != NO_CPU) { + sched = &per_cpu(crm_cpu_entries, on_cpu); + /* this should only happen if not linked already */ + BUG_ON(sched->linked == linked); + + /* If we are already scheduled on the CPU to which we + * wanted to link, we don't need to do the swap -- + * we just link ourselves to the CPU and depend on + * the caller to get things right. + */ + if (entry != sched) { + TRACE_TASK(linked, + "already scheduled on %d, updating link.\n", + sched->cpu); + tmp = sched->linked; + linked->rt_param.linked_on = sched->cpu; + sched->linked = linked; + update_cpu_position(sched); + linked = tmp; + } + } + if (linked) /* might be NULL due to swap */ + linked->rt_param.linked_on = entry->cpu; + } + entry->linked = linked; +#ifdef WANT_ALL_SCHED_EVENTS + if (linked) + TRACE_TASK(linked, "linked to %d.\n", entry->cpu); + else + TRACE("NULL linked to %d.\n", entry->cpu); +#endif + update_cpu_position(entry); +} + +/* unlink - Make sure a task is not linked any longer to an entry + * where it was linked before. Must hold crm_lock. + */ +static noinline void unlink(struct task_struct* t) +{ + cpu_entry_t *entry; + + if (t->rt_param.linked_on != NO_CPU) { + /* unlink */ + entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on); + t->rt_param.linked_on = NO_CPU; + link_task_to_cpu(NULL, entry); + } else if (is_queued(t)) { + /* This is an interesting situation: t is scheduled, + * but was just recently unlinked. It cannot be + * linked anywhere else (because then it would have + * been relinked to this CPU), thus it must be in some + * queue. We must remove it from the list in this + * case. + * + * in C-RM case is should be somewhere in the queue for + * its domain, therefore and we can get the domain using + * task_cpu_cluster + */ + remove(&(task_cpu_cluster(t))->domain, t); + } +} + + +/* preempt - force a CPU to reschedule + */ +static void preempt(cpu_entry_t *entry) +{ + preempt_if_preemptable(entry->scheduled, entry->cpu); +} + +/* requeue - Put an unlinked task into gsn-edf domain. + * Caller must hold crm_lock. + */ +static noinline void requeue(struct task_struct* task) +{ + crm_domain_t *cluster = task_cpu_cluster(task); + BUG_ON(!task); + /* sanity check before insertion */ + BUG_ON(is_queued(task)); + + if (is_released(task, litmus_clock())) + __add_ready(&cluster->domain, task); + else { + /* it has got to wait */ + add_release(&cluster->domain, task); + } +} + +#ifdef CONFIG_SCHED_CPU_AFFINITY +static cpu_entry_t* crm_get_nearest_available_cpu( + crm_domain_t *cluster, cpu_entry_t* start) +{ + cpu_entry_t* affinity; + + get_nearest_available_cpu(affinity, start, crm_cpu_entries, -1); + + /* make sure CPU is in our cluster */ + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) + return(affinity); + else + return(NULL); +} +#endif + + +/* check for any necessary preemptions */ +static void check_for_preemptions(crm_domain_t *cluster) +{ + struct task_struct *task; + cpu_entry_t *last; + + for(last = lowest_prio_cpu(cluster); + rm_preemption_needed(&cluster->domain, last->linked); + last = lowest_prio_cpu(cluster)) { + /* preemption necessary */ + task = __take_ready(&cluster->domain); +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = + crm_get_nearest_available_cpu(cluster, + &per_cpu(crm_cpu_entries, task_cpu(task))); + if(affinity) + last = affinity; + else if(last->linked) + requeue(last->linked); + } +#else + if (last->linked) + requeue(last->linked); +#endif + TRACE("check_for_preemptions: attempting to link task %d to %d\n", + task->pid, last->cpu); + link_task_to_cpu(task, last); + preempt(last); + } +} + +/* crm_job_arrival: task is either resumed or released */ +static noinline void crm_job_arrival(struct task_struct* task) +{ + crm_domain_t *cluster = task_cpu_cluster(task); + BUG_ON(!task); + + requeue(task); + check_for_preemptions(cluster); +} + +static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks) +{ + crm_domain_t* cluster = container_of(rt, crm_domain_t, domain); + unsigned long flags; + + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + + __merge_ready(&cluster->domain, tasks); + check_for_preemptions(cluster); + + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); +} + +/* caller holds crm_lock */ +static noinline void job_completion(struct task_struct *t, int forced) +{ + BUG_ON(!t); + + sched_trace_task_completion(t, forced); + +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + + TRACE_TASK(t, "job_completion().\n"); + + /* set flags */ + set_rt_flags(t, RT_F_SLEEP); + /* prepare for next period */ + prepare_for_next_period(t); + if (is_released(t, litmus_clock())) + sched_trace_task_release(t); + /* unlink */ + unlink(t); + /* requeue + * But don't requeue a blocking task. */ + if (is_running(t)) + crm_job_arrival(t); +} + +/* crm_tick - this function is called for every local timer + * interrupt. + * + * checks whether the current task has expired and checks + * whether we need to preempt it if it has not expired + */ +static void crm_tick(struct task_struct* t) +{ + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { + if (!is_np(t)) { + /* np tasks will be preempted when they become + * preemptable again + */ + litmus_reschedule_local(); + set_will_schedule(); + TRACE("crm_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } else if (is_user_np(t)) { + TRACE("crm_scheduler_tick: " + "%d is non-preemptable, " + "preemption delayed.\n", t->pid); + request_exit_np(t); + } + } +} + +/* Getting schedule() right is a bit tricky. schedule() may not make any + * assumptions on the state of the current task since it may be called for a + * number of reasons. The reasons include a scheduler_tick() determined that it + * was necessary, because sys_exit_np() was called, because some Linux + * subsystem determined so, or even (in the worst case) because there is a bug + * hidden somewhere. Thus, we must take extreme care to determine what the + * current state is. + * + * The CPU could currently be scheduling a task (or not), be linked (or not). + * + * The following assertions for the scheduled task could hold: + * + * - !is_running(scheduled) // the job blocks + * - scheduled->timeslice == 0 // the job completed (forcefully) + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) + * - linked != scheduled // we need to reschedule (for any reason) + * - is_np(scheduled) // rescheduling must be delayed, + * sys_exit_np must be requested + * + * Any of these can occur together. + */ +static struct task_struct* crm_schedule(struct task_struct * prev) +{ + cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); + crm_domain_t *cluster = entry->cluster; + int out_of_time, sleep, preempt, np, exists, blocks; + struct task_struct* next = NULL; + + raw_spin_lock(&cluster->crm_lock); + clear_will_schedule(); + + /* sanity checking */ + BUG_ON(entry->scheduled && entry->scheduled != prev); + BUG_ON(entry->scheduled && !is_realtime(prev)); + BUG_ON(is_realtime(prev) && !entry->scheduled); + + /* (0) Determine state */ + exists = entry->scheduled != NULL; + blocks = exists && !is_running(entry->scheduled); + out_of_time = exists && + budget_enforced(entry->scheduled) && + budget_exhausted(entry->scheduled); + np = exists && is_np(entry->scheduled); + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; + preempt = entry->scheduled != entry->linked; + +#ifdef WANT_ALL_SCHED_EVENTS + TRACE_TASK(prev, "invoked crm_schedule.\n"); +#endif + + if (exists) + TRACE_TASK(prev, + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " + "state:%d sig:%d\n", + blocks, out_of_time, np, sleep, preempt, + prev->state, signal_pending(prev)); + if (entry->linked && preempt) + TRACE_TASK(prev, "will be preempted by %s/%d\n", + entry->linked->comm, entry->linked->pid); + + + /* If a task blocks we have no choice but to reschedule. + */ + if (blocks) + unlink(entry->scheduled); + + /* Request a sys_exit_np() call if we would like to preempt but cannot. + * We need to make sure to update the link structure anyway in case + * that we are still linked. Multiple calls to request_exit_np() don't + * hurt. + */ + if (np && (out_of_time || preempt || sleep)) { + unlink(entry->scheduled); + request_exit_np(entry->scheduled); + } + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. Don't do a job completion if we block (can't have timers running + * for blocked jobs). Preemption go first for the same reason. + */ + if (!np && (out_of_time || sleep) && !blocks && !preempt) + job_completion(entry->scheduled, !sleep); + + /* Link pending task if we became unlinked. + */ + if (!entry->linked) + link_task_to_cpu(__take_ready(&cluster->domain), entry); + + /* The final scheduling decision. Do we need to switch for some reason? + * If linked is different from scheduled, then select linked as next. + */ + if ((!np || blocks) && + entry->linked != entry->scheduled) { + /* Schedule a linked job? */ + if (entry->linked) { + entry->linked->rt_param.scheduled_on = entry->cpu; + next = entry->linked; + } + if (entry->scheduled) { + /* not gonna be scheduled soon */ + entry->scheduled->rt_param.scheduled_on = NO_CPU; + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); + } + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + sched_state_task_picked(); + raw_spin_unlock(&cluster->crm_lock); + +#ifdef WANT_ALL_SCHED_EVENTS + TRACE("crm_lock released, next=0x%p\n", next); + + if (next) + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + else if (exists && !next) + TRACE("becomes idle at %llu.\n", litmus_clock()); +#endif + + + return next; +} + + +/* _finish_switch - we just finished the switch away from prev + */ +static void crm_finish_switch(struct task_struct *prev) +{ + cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); + + entry->scheduled = is_realtime(current) ? current : NULL; +#ifdef WANT_ALL_SCHED_EVENTS + TRACE_TASK(prev, "switched away from\n"); +#endif +} + + +/* Prepare a task for running in RT mode + */ +static void crm_task_new(struct task_struct * t, int on_rq, int running) +{ + unsigned long flags; + cpu_entry_t* entry; + crm_domain_t* cluster; + + TRACE("gsn edf: task new %d\n", t->pid); + + /* the cluster doesn't change even if t is running */ + cluster = task_cpu_cluster(t); + + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + + /* setup job params */ + release_at(t, litmus_clock()); + + if (running) { + entry = &per_cpu(crm_cpu_entries, task_cpu(t)); + BUG_ON(entry->scheduled); + + entry->scheduled = t; + tsk_rt(t)->scheduled_on = task_cpu(t); + } else { + t->rt_param.scheduled_on = NO_CPU; + } + t->rt_param.linked_on = NO_CPU; + + crm_job_arrival(t); + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); +} + +static void crm_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + //lt_t now; + crm_domain_t *cluster; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + + cluster = task_cpu_cluster(task); + + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + +#if 0 // sporadic task model + /* We need to take suspensions because of semaphores into + * account! If a job resumes after being suspended due to acquiring + * a semaphore, it should never be treated as a new job release. + */ + if (get_rt_flags(task) == RT_F_EXIT_SEM) { + set_rt_flags(task, RT_F_RUNNING); + } else { + now = litmus_clock(); + if (is_tardy(task, now)) { + /* new sporadic release */ + release_at(task, now); + sched_trace_task_release(task); + } + else { + if (task->rt.time_slice) { + /* came back in time before deadline + */ + set_rt_flags(task, RT_F_RUNNING); + } + } + } +#endif + + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); + set_rt_flags(task, RT_F_RUNNING); // periodic model + + if(tsk_rt(task)->linked_on == NO_CPU) + crm_job_arrival(task); + else + TRACE("WTF, mate?!\n"); + + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); +} + +static void crm_task_block(struct task_struct *t) +{ + unsigned long flags; + crm_domain_t *cluster; + + TRACE_TASK(t, "block at %llu\n", litmus_clock()); + + cluster = task_cpu_cluster(t); + + /* unlink if necessary */ + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + unlink(t); + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); + + BUG_ON(!is_realtime(t)); +} + + +static void crm_task_exit(struct task_struct * t) +{ + unsigned long flags; + crm_domain_t *cluster = task_cpu_cluster(t); + + /* unlink if necessary */ + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + unlink(t); + if (tsk_rt(t)->scheduled_on != NO_CPU) { + cpu_entry_t *cpu; + cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on); + cpu->scheduled = NULL; + tsk_rt(t)->scheduled_on = NO_CPU; + } + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); + + BUG_ON(!is_realtime(t)); + TRACE_TASK(t, "RIP\n"); +} + +static long crm_admit_task(struct task_struct* tsk) +{ + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; +} + + + + + + + + + + + + + +#ifdef CONFIG_LITMUS_LOCKING + +#include + + +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + int linked_on; + int check_preempt = 0; + + crm_domain_t* cluster = task_cpu_cluster(t); + + if(prio_inh != NULL) + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "inherits priority from %p\n", prio_inh); + + sched_trace_eff_prio_change(t, prio_inh); + + tsk_rt(t)->inh_task = prio_inh; + + linked_on = tsk_rt(t)->linked_on; + + /* If it is scheduled, then we need to reorder the CPU heap. */ + if (linked_on != NO_CPU) { + TRACE_TASK(t, "%s: linked on %d\n", + __FUNCTION__, linked_on); + /* Holder is scheduled; need to re-order CPUs. + * We can't use heap_decrease() here since + * the cpu_heap is ordered in reverse direction, so + * it is actually an increase. */ + bheap_delete(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(crm_cpu_entries, linked_on).hn); + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(crm_cpu_entries, linked_on).hn); + } else { + /* holder may be queued: first stop queue changes */ + raw_spin_lock(&cluster->domain.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); + + /* We need to update the position of holder in some + * heap. Note that this could be a release heap if we + * budget enforcement is used and this job overran. */ + check_preempt = !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node); + + } else { + /* Nothing to do: if it is not queued and not linked + * then it is either sleeping or currently being moved + * by other code (e.g., a timer interrupt handler) that + * will use the correct priority when enqueuing the + * task. */ + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); + } + raw_spin_unlock(&cluster->domain.release_lock); + + /* If holder was enqueued in a release heap, then the following + * preemption check is pointless, but we can't easily detect + * that case. If you want to fix this, then consider that + * simply adding a state flag requires O(n) time to update when + * releasing n tasks, which conflicts with the goal to have + * O(log n) merges. */ + if (check_preempt) { + /* heap_decrease() hit the top level of the heap: make + * sure preemption checks get the right task, not the + * potentially stale cache. */ + bheap_uncache_min(rm_ready_order, &cluster->domain.ready_queue); + check_for_preemptions(cluster); + } + } +} + +/* called with IRQs off */ +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + crm_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->crm_lock); + + __set_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&cluster->crm_lock); +} + + +/* called with IRQs off */ +static void __clear_priority_inheritance(struct task_struct* t) +{ + TRACE_TASK(t, "priority restored\n"); + + if(tsk_rt(t)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(t, NULL); + + tsk_rt(t)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + crm_job_arrival(t); + } + else + { + __set_priority_inheritance(t, NULL); + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); + + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(tsk_rt(t)->cur_klitirqd); + crm_job_arrival(tsk_rt(t)->cur_klitirqd); + } + else + { + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); + } + } +#endif +} + +/* called with IRQs off */ +static void clear_priority_inheritance(struct task_struct* t) +{ + crm_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->crm_lock); + __clear_priority_inheritance(t); + raw_spin_unlock(&cluster->crm_lock); +} + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + crm_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->crm_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); + + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __set_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&cluster->crm_lock); +} + +/* called with IRQs off */ +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ + crm_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->crm_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) + { + tsk_rt(klitirqd)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(klitirqd); + crm_job_arrival(klitirqd); + } + else + { + __set_priority_inheritance(klitirqd, NULL); + } + + tsk_rt(old_owner)->cur_klitirqd = NULL; + + raw_spin_unlock(&cluster->crm_lock); +} +#endif // CONFIG_LITMUS_SOFTIRQD + + +/* ******************** KFMLP support ********************** */ + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + if (queued != skip && rm_higher_prio(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest( + struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + }while(step != search_start); + + return(shortest); +} + +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct kfmlp_queue *my_queue = NULL; + struct task_struct *max_hp = NULL; + + + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((my_queue == NULL) || + (rm_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + { + my_queue = &sem->queues[i]; + } + } + + if(my_queue) + { + crm_domain_t* cluster; + + max_hp = my_queue->hp_waiter; + BUG_ON(!max_hp); + + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + max_hp->comm, max_hp->pid, + kfmlp_get_idx(sem, my_queue)); + + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); + + /* + if(my_queue->hp_waiter) + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->hp_waiter->comm, + my_queue->hp_waiter->pid); + else + TRACE_CUR("queue %d: new hp_waiter is %p\n", + kfmlp_get_idx(sem, my_queue), NULL); + */ + + cluster = task_cpu_cluster(max_hp); + + raw_spin_lock(&cluster->crm_lock); + + /* + if(my_queue->owner) + TRACE_CUR("queue %d: owner is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->owner->comm, + my_queue->owner->pid); + else + TRACE_CUR("queue %d: owner is %p\n", + kfmlp_get_idx(sem, my_queue), + NULL); + */ + + if(tsk_rt(my_queue->owner)->inh_task == max_hp) + { + __clear_priority_inheritance(my_queue->owner); + if(my_queue->hp_waiter != NULL) + { + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + raw_spin_unlock(&cluster->crm_lock); + + list_for_each(pos, &my_queue->wait.task_list) + { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + /* Compare task prios, find high prio task. */ + if (queued == max_hp) + { + /* + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", + kfmlp_get_idx(sem, my_queue)); + */ + __remove_wait_queue(&my_queue->wait, + list_entry(pos, wait_queue_t, task_list)); + break; + } + } + --(my_queue->count); + } + + return(max_hp); +} + +int crm_kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = sem->shortest_queue; + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", + kfmlp_get_idx(sem, my_queue)); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + /* check if we need to activate priority inheritance */ + if (rm_higher_prio(t, my_queue->hp_waiter)) + { + my_queue->hp_waiter = t; + if (rm_higher_prio(t, my_queue->owner)) + { + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) + { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else + { + /* this case may happen if our wait entry was stolen + between queues. record where we went.*/ + my_queue = kfmlp_get_queue(sem, t); + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else + { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + spin_unlock_irqrestore(&sem->lock, flags); + } + + return kfmlp_get_idx(sem, my_queue); +} + +int crm_kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + /* next becomes the resouce holder */ + my_queue->owner = next; + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + set_priority_inheritance(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + + /* + if(next) + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + + my_queue->owner = next; + + if(next) + { + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + } + } + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + clear_priority_inheritance(t); + +out: + spin_unlock_irqrestore(&sem->lock, flags); + + return err; +} + +int crm_kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + crm_kfmlp_unlock(l); + + return 0; +} + +void crm_kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + +static struct litmus_lock_ops crm_kfmlp_lock_ops = { + .close = crm_kfmlp_close, + .lock = crm_kfmlp_lock, + .unlock = crm_kfmlp_unlock, + .deallocate = crm_kfmlp_free, +}; + +static struct litmus_lock* crm_new_kfmlp(void* __user arg, int* ret_code) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(num_resources < 1) + { + *ret_code = -EINVAL; + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + *ret_code = -ENOMEM; + return NULL; + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + *ret_code = -ENOMEM; + return NULL; + } + + sem->litmus_lock.ops = &crm_kfmlp_lock_ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + + *ret_code = 0; + return &sem->litmus_lock; +} + + +/* **** lock constructor **** */ + +static long crm_allocate_lock(struct litmus_lock **lock, int type, + void* __user arg) +{ + int err = -ENXIO; + + /* C-RM currently only supports the FMLP for global resources + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ + switch (type) { + case KFMLP_SEM: + *lock = crm_new_kfmlp(arg, &err); + break; + }; + + return err; +} + +#endif // CONFIG_LITMUS_LOCKING + + + + + + +/* total number of cluster */ +static int num_clusters; +/* we do not support cluster of different sizes */ +static unsigned int cluster_size; + +#ifdef VERBOSE_INIT +static void print_cluster_topology(cpumask_var_t mask, int cpu) +{ + int chk; + char buf[255]; + + chk = cpulist_scnprintf(buf, 254, mask); + buf[chk] = '\0'; + printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); + +} +#endif + +static int clusters_allocated = 0; + +static void cleanup_crm(void) +{ + int i; + + if (clusters_allocated) { + for (i = 0; i < num_clusters; i++) { + kfree(crm[i].cpus); + kfree(crm[i].heap_node); + free_cpumask_var(crm[i].cpu_map); + } + + kfree(crm); + } +} + +static long crm_activate_plugin(void) +{ + int i, j, cpu, ccpu, cpu_count; + cpu_entry_t *entry; + + cpumask_var_t mask; + int chk = 0; + + /* de-allocate old clusters, if any */ + cleanup_crm(); + + printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n", + cluster_config); + + /* need to get cluster_size first */ + if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) + return -ENOMEM; + + if (unlikely(cluster_config == GLOBAL_CLUSTER)) { + cluster_size = num_online_cpus(); + } else { + chk = get_shared_cpu_map(mask, 0, cluster_config); + if (chk) { + /* if chk != 0 then it is the max allowed index */ + printk(KERN_INFO "C-RM: Cluster configuration = %d " + "is not supported on this hardware.\n", + cluster_config); + /* User should notice that the configuration failed, so + * let's bail out. */ + return -EINVAL; + } + + cluster_size = cpumask_weight(mask); + } + + if ((num_online_cpus() % cluster_size) != 0) { + /* this can't be right, some cpus are left out */ + printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n", + num_online_cpus(), cluster_size); + return -1; + } + + num_clusters = num_online_cpus() / cluster_size; + printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n", + num_clusters, cluster_size); + + /* initialize clusters */ + crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC); + for (i = 0; i < num_clusters; i++) { + + crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), + GFP_ATOMIC); + crm[i].heap_node = kmalloc( + cluster_size * sizeof(struct bheap_node), + GFP_ATOMIC); + bheap_init(&(crm[i].cpu_heap)); + rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs); + + if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC)) + return -ENOMEM; + } + + /* cycle through cluster and add cpus to them */ + for (i = 0; i < num_clusters; i++) { + + for_each_online_cpu(cpu) { + /* check if the cpu is already in a cluster */ + for (j = 0; j < num_clusters; j++) + if (cpumask_test_cpu(cpu, crm[j].cpu_map)) + break; + /* if it is in a cluster go to next cpu */ + if (j < num_clusters && + cpumask_test_cpu(cpu, crm[j].cpu_map)) + continue; + + /* this cpu isn't in any cluster */ + /* get the shared cpus */ + if (unlikely(cluster_config == GLOBAL_CLUSTER)) + cpumask_copy(mask, cpu_online_mask); + else + get_shared_cpu_map(mask, cpu, cluster_config); + + cpumask_copy(crm[i].cpu_map, mask); +#ifdef VERBOSE_INIT + print_cluster_topology(mask, cpu); +#endif + /* add cpus to current cluster and init cpu_entry_t */ + cpu_count = 0; + for_each_cpu(ccpu, crm[i].cpu_map) { + + entry = &per_cpu(crm_cpu_entries, ccpu); + crm[i].cpus[cpu_count] = entry; + atomic_set(&entry->will_schedule, 0); + entry->cpu = ccpu; + entry->cluster = &crm[i]; + entry->hn = &(crm[i].heap_node[cpu_count]); + bheap_node_init(&entry->hn, entry); + + cpu_count++; + + entry->linked = NULL; + entry->scheduled = NULL; + update_cpu_position(entry); + } + /* done with this cluster */ + break; + } + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + { + /* distribute the daemons evenly across the clusters. */ + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; + int left_over = NR_LITMUS_SOFTIRQD % num_clusters; + + int daemon = 0; + for(i = 0; i < num_clusters; ++i) + { + int num_on_this_cluster = num_daemons_per_cluster; + if(left_over) + { + ++num_on_this_cluster; + --left_over; + } + + for(j = 0; j < num_on_this_cluster; ++j) + { + // first CPU of this cluster + affinity[daemon++] = i*cluster_size; + } + } + + spawn_klitirqd(affinity); + + kfree(affinity); + } +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif + + free_cpumask_var(mask); + clusters_allocated = 1; + return 0; +} + +/* Plugin object */ +static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = { + .plugin_name = "C-RM", + .finish_switch = crm_finish_switch, + .tick = crm_tick, + .task_new = crm_task_new, + .complete_job = complete_job, + .task_exit = crm_task_exit, + .schedule = crm_schedule, + .task_wake_up = crm_task_wake_up, + .task_block = crm_task_block, + .admit_task = crm_admit_task, + .activate_plugin = crm_activate_plugin, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = crm_allocate_lock, + .set_prio_inh = set_priority_inheritance, + .clear_prio_inh = clear_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif +}; + +static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL; + +static int __init init_crm(void) +{ + int err, fs; + + err = register_sched_plugin(&crm_plugin); + if (!err) { + fs = make_plugin_proc_dir(&crm_plugin, &crm_dir); + if (!fs) + cluster_file = create_cluster_file(crm_dir, &cluster_config); + else + printk(KERN_ERR "Could not allocate C-RM procfs dir.\n"); + } + return err; +} + +static void clean_crm(void) +{ + cleanup_crm(); + if (cluster_file) + remove_proc_entry("cluster", crm_dir); + if (crm_dir) + remove_plugin_proc_dir(&crm_plugin); +} + +module_init(init_crm); +module_exit(clean_crm); diff --git a/litmus/sched_crm_srt.c b/litmus/sched_crm_srt.c new file mode 100644 index 000000000000..4473f35e64cd --- /dev/null +++ b/litmus/sched_crm_srt.c @@ -0,0 +1,1611 @@ +/* + * litmus/sched_crm_srt.c + * + * Implementation of the C-RM-SRT scheduling algorithm. + * + * This implementation is based on G-EDF: + * - CPUs are clustered around L2 or L3 caches. + * - Clusters topology is automatically detected (this is arch dependent + * and is working only on x86 at the moment --- and only with modern + * cpus that exports cpuid4 information) + * - The plugins _does not_ attempt to put tasks in the right cluster i.e. + * the programmer needs to be aware of the topology to place tasks + * in the desired cluster + * - default clustering is around L2 cache (cache index = 2) + * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all + * online_cpus are placed in a single cluster). + * + * For details on functions, take a look at sched_gsn_edf.c + * + * Currently, we do not support changes in the number of online cpus. + * If the num_online_cpus() dynamically changes, the plugin is broken. + * + * This version uses the simple approach and serializes all scheduling + * decisions by the use of a queue lock. This is probably not the + * best way to do it, but it should suffice for now. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +/* to configure the cluster size */ +#include + +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + +/* Reference configuration variable. Determines which cache level is used to + * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that + * all CPUs form a single cluster (just like GSN-EDF). + */ +static enum cache_level cluster_config = GLOBAL_CLUSTER; + +struct clusterdomain; + +/* cpu_entry_t - maintain the linked and scheduled state + * + * A cpu also contains a pointer to the crm_srt_domain_t cluster + * that owns it (struct clusterdomain*) + */ +typedef struct { + int cpu; + struct clusterdomain* cluster; /* owning cluster */ + struct task_struct* linked; /* only RT tasks */ + struct task_struct* scheduled; /* only RT tasks */ + atomic_t will_schedule; /* prevent unneeded IPIs */ + struct bheap_node* hn; +} cpu_entry_t; + +/* one cpu_entry_t per CPU */ +DEFINE_PER_CPU(cpu_entry_t, crm_srt_cpu_entries); + +#define set_will_schedule() \ + (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 1)) +#define clear_will_schedule() \ + (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 0)) +#define test_will_schedule(cpu) \ + (atomic_read(&per_cpu(crm_srt_cpu_entries, cpu).will_schedule)) + +/* + * In C-RM-SRT there is a crm_srt domain _per_ cluster + * The number of clusters is dynamically determined accordingly to the + * total cpu number and the cluster size + */ +typedef struct clusterdomain { + /* rt_domain for this cluster */ + rt_domain_t domain; + /* cpus in this cluster */ + cpu_entry_t* *cpus; + /* map of this cluster cpus */ + cpumask_var_t cpu_map; + /* the cpus queue themselves according to priority in here */ + struct bheap_node *heap_node; + struct bheap cpu_heap; + /* lock for this cluster */ +#define crm_srt_lock domain.ready_lock +} crm_srt_domain_t; + +/* a crm_srt_domain per cluster; allocation is done at init/activation time */ +crm_srt_domain_t *crm_srt; + +#define remote_cluster(cpu) ((crm_srt_domain_t *) per_cpu(crm_srt_cpu_entries, cpu).cluster) +#define task_cpu_cluster(task) remote_cluster(get_partition(task)) + +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose + * information during the initialization of the plugin (e.g., topology) +#define WANT_ALL_SCHED_EVENTS + */ +#define VERBOSE_INIT + +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) +{ + cpu_entry_t *a, *b; + a = _a->value; + b = _b->value; + /* Note that a and b are inverted: we want the lowest-priority CPU at + * the top of the heap. + */ + return rm_srt_higher_prio(b->linked, a->linked); +} + +/* update_cpu_position - Move the cpu entry to the correct place to maintain + * order in the cpu queue. Caller must hold crm_srt lock. + */ +static void update_cpu_position(cpu_entry_t *entry) +{ + crm_srt_domain_t *cluster = entry->cluster; + + if (likely(bheap_node_in_heap(entry->hn))) + bheap_delete(cpu_lower_prio, + &cluster->cpu_heap, + entry->hn); + + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); +} + +/* caller must hold crm_srt lock */ +static cpu_entry_t* lowest_prio_cpu(crm_srt_domain_t *cluster) +{ + struct bheap_node* hn; + hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); + return hn->value; +} + + +/* link_task_to_cpu - Update the link of a CPU. + * Handles the case where the to-be-linked task is already + * scheduled on a different CPU. + */ +static noinline void link_task_to_cpu(struct task_struct* linked, + cpu_entry_t *entry) +{ + cpu_entry_t *sched; + struct task_struct* tmp; + int on_cpu; + + BUG_ON(linked && !is_realtime(linked)); + + /* Currently linked task is set to be unlinked. */ + if (entry->linked) { + entry->linked->rt_param.linked_on = NO_CPU; + } + + /* Link new task to CPU. */ + if (linked) { + set_rt_flags(linked, RT_F_RUNNING); + /* handle task is already scheduled somewhere! */ + on_cpu = linked->rt_param.scheduled_on; + if (on_cpu != NO_CPU) { + sched = &per_cpu(crm_srt_cpu_entries, on_cpu); + /* this should only happen if not linked already */ + BUG_ON(sched->linked == linked); + + /* If we are already scheduled on the CPU to which we + * wanted to link, we don't need to do the swap -- + * we just link ourselves to the CPU and depend on + * the caller to get things right. + */ + if (entry != sched) { + TRACE_TASK(linked, + "already scheduled on %d, updating link.\n", + sched->cpu); + tmp = sched->linked; + linked->rt_param.linked_on = sched->cpu; + sched->linked = linked; + update_cpu_position(sched); + linked = tmp; + } + } + if (linked) /* might be NULL due to swap */ + linked->rt_param.linked_on = entry->cpu; + } + entry->linked = linked; +#ifdef WANT_ALL_SCHED_EVENTS + if (linked) + TRACE_TASK(linked, "linked to %d.\n", entry->cpu); + else + TRACE("NULL linked to %d.\n", entry->cpu); +#endif + update_cpu_position(entry); +} + +/* unlink - Make sure a task is not linked any longer to an entry + * where it was linked before. Must hold crm_srt_lock. + */ +static noinline void unlink(struct task_struct* t) +{ + cpu_entry_t *entry; + + if (t->rt_param.linked_on != NO_CPU) { + /* unlink */ + entry = &per_cpu(crm_srt_cpu_entries, t->rt_param.linked_on); + t->rt_param.linked_on = NO_CPU; + link_task_to_cpu(NULL, entry); + } else if (is_queued(t)) { + /* This is an interesting situation: t is scheduled, + * but was just recently unlinked. It cannot be + * linked anywhere else (because then it would have + * been relinked to this CPU), thus it must be in some + * queue. We must remove it from the list in this + * case. + * + * in C-RM-SRT case is should be somewhere in the queue for + * its domain, therefore and we can get the domain using + * task_cpu_cluster + */ + remove(&(task_cpu_cluster(t))->domain, t); + } +} + + +/* preempt - force a CPU to reschedule + */ +static void preempt(cpu_entry_t *entry) +{ + preempt_if_preemptable(entry->scheduled, entry->cpu); +} + +/* requeue - Put an unlinked task into gsn-edf domain. + * Caller must hold crm_srt_lock. + */ +static noinline void requeue(struct task_struct* task) +{ + crm_srt_domain_t *cluster = task_cpu_cluster(task); + BUG_ON(!task); + /* sanity check before insertion */ + BUG_ON(is_queued(task)); + + if (is_released(task, litmus_clock())) + __add_ready(&cluster->domain, task); + else { + /* it has got to wait */ + add_release(&cluster->domain, task); + } +} + +#ifdef CONFIG_SCHED_CPU_AFFINITY +static cpu_entry_t* crm_srt_get_nearest_available_cpu( + crm_srt_domain_t *cluster, cpu_entry_t* start) +{ + cpu_entry_t* affinity; + + get_nearest_available_cpu(affinity, start, crm_srt_cpu_entries, -1); + + /* make sure CPU is in our cluster */ + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) + return(affinity); + else + return(NULL); +} +#endif + + +/* check for any necessary preemptions */ +static void check_for_preemptions(crm_srt_domain_t *cluster) +{ + struct task_struct *task; + cpu_entry_t *last; + + for(last = lowest_prio_cpu(cluster); + rm_srt_preemption_needed(&cluster->domain, last->linked); + last = lowest_prio_cpu(cluster)) { + /* preemption necessary */ + task = __take_ready(&cluster->domain); +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = + crm_srt_get_nearest_available_cpu(cluster, + &per_cpu(crm_srt_cpu_entries, task_cpu(task))); + if(affinity) + last = affinity; + else if(last->linked) + requeue(last->linked); + } +#else + if (last->linked) + requeue(last->linked); +#endif + TRACE("check_for_preemptions: attempting to link task %d to %d\n", + task->pid, last->cpu); + link_task_to_cpu(task, last); + preempt(last); + } +} + +/* crm_srt_job_arrival: task is either resumed or released */ +static noinline void crm_srt_job_arrival(struct task_struct* task) +{ + crm_srt_domain_t *cluster = task_cpu_cluster(task); + BUG_ON(!task); + + requeue(task); + check_for_preemptions(cluster); +} + +static void crm_srt_release_jobs(rt_domain_t* rt, struct bheap* tasks) +{ + crm_srt_domain_t* cluster = container_of(rt, crm_srt_domain_t, domain); + unsigned long flags; + + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + + __merge_ready(&cluster->domain, tasks); + check_for_preemptions(cluster); + + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); +} + +/* caller holds crm_srt_lock */ +static noinline void job_completion(struct task_struct *t, int forced) +{ + BUG_ON(!t); + + sched_trace_task_completion(t, forced); + +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + + TRACE_TASK(t, "job_completion().\n"); + + /* set flags */ + set_rt_flags(t, RT_F_SLEEP); + /* prepare for next period */ + prepare_for_next_period(t); + if (is_released(t, litmus_clock())) + sched_trace_task_release(t); + /* unlink */ + unlink(t); + /* requeue + * But don't requeue a blocking task. */ + if (is_running(t)) + crm_srt_job_arrival(t); +} + +/* crm_srt_tick - this function is called for every local timer + * interrupt. + * + * checks whether the current task has expired and checks + * whether we need to preempt it if it has not expired + */ +static void crm_srt_tick(struct task_struct* t) +{ + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { + if (!is_np(t)) { + /* np tasks will be preempted when they become + * preemptable again + */ + litmus_reschedule_local(); + set_will_schedule(); + TRACE("crm_srt_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } else if (is_user_np(t)) { + TRACE("crm_srt_scheduler_tick: " + "%d is non-preemptable, " + "preemption delayed.\n", t->pid); + request_exit_np(t); + } + } +} + +/* Getting schedule() right is a bit tricky. schedule() may not make any + * assumptions on the state of the current task since it may be called for a + * number of reasons. The reasons include a scheduler_tick() determined that it + * was necessary, because sys_exit_np() was called, because some Linux + * subsystem determined so, or even (in the worst case) because there is a bug + * hidden somewhere. Thus, we must take extreme care to determine what the + * current state is. + * + * The CPU could currently be scheduling a task (or not), be linked (or not). + * + * The following assertions for the scheduled task could hold: + * + * - !is_running(scheduled) // the job blocks + * - scheduled->timeslice == 0 // the job completed (forcefully) + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) + * - linked != scheduled // we need to reschedule (for any reason) + * - is_np(scheduled) // rescheduling must be delayed, + * sys_exit_np must be requested + * + * Any of these can occur together. + */ +static struct task_struct* crm_srt_schedule(struct task_struct * prev) +{ + cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries); + crm_srt_domain_t *cluster = entry->cluster; + int out_of_time, sleep, preempt, np, exists, blocks; + struct task_struct* next = NULL; + + raw_spin_lock(&cluster->crm_srt_lock); + clear_will_schedule(); + + /* sanity checking */ + BUG_ON(entry->scheduled && entry->scheduled != prev); + BUG_ON(entry->scheduled && !is_realtime(prev)); + BUG_ON(is_realtime(prev) && !entry->scheduled); + + /* (0) Determine state */ + exists = entry->scheduled != NULL; + blocks = exists && !is_running(entry->scheduled); + out_of_time = exists && + budget_enforced(entry->scheduled) && + budget_exhausted(entry->scheduled); + np = exists && is_np(entry->scheduled); + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; + preempt = entry->scheduled != entry->linked; + +#ifdef WANT_ALL_SCHED_EVENTS + TRACE_TASK(prev, "invoked crm_srt_schedule.\n"); +#endif + + if (exists) + TRACE_TASK(prev, + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " + "state:%d sig:%d\n", + blocks, out_of_time, np, sleep, preempt, + prev->state, signal_pending(prev)); + if (entry->linked && preempt) + TRACE_TASK(prev, "will be preempted by %s/%d\n", + entry->linked->comm, entry->linked->pid); + + + /* If a task blocks we have no choice but to reschedule. + */ + if (blocks) + unlink(entry->scheduled); + + /* Request a sys_exit_np() call if we would like to preempt but cannot. + * We need to make sure to update the link structure anyway in case + * that we are still linked. Multiple calls to request_exit_np() don't + * hurt. + */ + if (np && (out_of_time || preempt || sleep)) { + unlink(entry->scheduled); + request_exit_np(entry->scheduled); + } + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. Don't do a job completion if we block (can't have timers running + * for blocked jobs). Preemption go first for the same reason. + */ + if (!np && (out_of_time || sleep) && !blocks && !preempt) + job_completion(entry->scheduled, !sleep); + + /* Link pending task if we became unlinked. + */ + if (!entry->linked) + link_task_to_cpu(__take_ready(&cluster->domain), entry); + + /* The final scheduling decision. Do we need to switch for some reason? + * If linked is different from scheduled, then select linked as next. + */ + if ((!np || blocks) && + entry->linked != entry->scheduled) { + /* Schedule a linked job? */ + if (entry->linked) { + entry->linked->rt_param.scheduled_on = entry->cpu; + next = entry->linked; + } + if (entry->scheduled) { + /* not gonna be scheduled soon */ + entry->scheduled->rt_param.scheduled_on = NO_CPU; + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); + } + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + sched_state_task_picked(); + raw_spin_unlock(&cluster->crm_srt_lock); + +#ifdef WANT_ALL_SCHED_EVENTS + TRACE("crm_srt_lock released, next=0x%p\n", next); + + if (next) + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + else if (exists && !next) + TRACE("becomes idle at %llu.\n", litmus_clock()); +#endif + + + return next; +} + + +/* _finish_switch - we just finished the switch away from prev + */ +static void crm_srt_finish_switch(struct task_struct *prev) +{ + cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries); + + entry->scheduled = is_realtime(current) ? current : NULL; +#ifdef WANT_ALL_SCHED_EVENTS + TRACE_TASK(prev, "switched away from\n"); +#endif +} + + +/* Prepare a task for running in RT mode + */ +static void crm_srt_task_new(struct task_struct * t, int on_rq, int running) +{ + unsigned long flags; + cpu_entry_t* entry; + crm_srt_domain_t* cluster; + + TRACE("gsn edf: task new %d\n", t->pid); + + /* the cluster doesn't change even if t is running */ + cluster = task_cpu_cluster(t); + + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + + /* setup job params */ + release_at(t, litmus_clock()); + + if (running) { + entry = &per_cpu(crm_srt_cpu_entries, task_cpu(t)); + BUG_ON(entry->scheduled); + + entry->scheduled = t; + tsk_rt(t)->scheduled_on = task_cpu(t); + } else { + t->rt_param.scheduled_on = NO_CPU; + } + t->rt_param.linked_on = NO_CPU; + + crm_srt_job_arrival(t); + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); +} + +static void crm_srt_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + //lt_t now; + crm_srt_domain_t *cluster; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + + cluster = task_cpu_cluster(task); + + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + +#if 0 // sporadic task model + /* We need to take suspensions because of semaphores into + * account! If a job resumes after being suspended due to acquiring + * a semaphore, it should never be treated as a new job release. + */ + if (get_rt_flags(task) == RT_F_EXIT_SEM) { + set_rt_flags(task, RT_F_RUNNING); + } else { + now = litmus_clock(); + if (is_tardy(task, now)) { + /* new sporadic release */ + release_at(task, now); + sched_trace_task_release(task); + } + else { + if (task->rt.time_slice) { + /* came back in time before deadline + */ + set_rt_flags(task, RT_F_RUNNING); + } + } + } +#endif + + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); + set_rt_flags(task, RT_F_RUNNING); // periodic model + + if(tsk_rt(task)->linked_on == NO_CPU) + crm_srt_job_arrival(task); + else + TRACE("WTF, mate?!\n"); + + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); +} + +static void crm_srt_task_block(struct task_struct *t) +{ + unsigned long flags; + crm_srt_domain_t *cluster; + + TRACE_TASK(t, "block at %llu\n", litmus_clock()); + + cluster = task_cpu_cluster(t); + + /* unlink if necessary */ + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + unlink(t); + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); + + BUG_ON(!is_realtime(t)); +} + + +static void crm_srt_task_exit(struct task_struct * t) +{ + unsigned long flags; + crm_srt_domain_t *cluster = task_cpu_cluster(t); + + /* unlink if necessary */ + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + unlink(t); + if (tsk_rt(t)->scheduled_on != NO_CPU) { + cpu_entry_t *cpu; + cpu = &per_cpu(crm_srt_cpu_entries, tsk_rt(t)->scheduled_on); + cpu->scheduled = NULL; + tsk_rt(t)->scheduled_on = NO_CPU; + } + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); + + BUG_ON(!is_realtime(t)); + TRACE_TASK(t, "RIP\n"); +} + +static long crm_srt_admit_task(struct task_struct* tsk) +{ + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; +} + + + + + + + + + + + + + +#ifdef CONFIG_LITMUS_LOCKING + +#include + + +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + int linked_on; + int check_preempt = 0; + + crm_srt_domain_t* cluster = task_cpu_cluster(t); + + if(prio_inh != NULL) + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "inherits priority from %p\n", prio_inh); + + sched_trace_eff_prio_change(t, prio_inh); + + tsk_rt(t)->inh_task = prio_inh; + + linked_on = tsk_rt(t)->linked_on; + + /* If it is scheduled, then we need to reorder the CPU heap. */ + if (linked_on != NO_CPU) { + TRACE_TASK(t, "%s: linked on %d\n", + __FUNCTION__, linked_on); + /* Holder is scheduled; need to re-order CPUs. + * We can't use heap_decrease() here since + * the cpu_heap is ordered in reverse direction, so + * it is actually an increase. */ + bheap_delete(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(crm_srt_cpu_entries, linked_on).hn); + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(crm_srt_cpu_entries, linked_on).hn); + } else { + /* holder may be queued: first stop queue changes */ + raw_spin_lock(&cluster->domain.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); + + /* We need to update the position of holder in some + * heap. Note that this could be a release heap if we + * budget enforcement is used and this job overran. */ + check_preempt = !bheap_decrease(rm_srt_ready_order, tsk_rt(t)->heap_node); + + } else { + /* Nothing to do: if it is not queued and not linked + * then it is either sleeping or currently being moved + * by other code (e.g., a timer interrupt handler) that + * will use the correct priority when enqueuing the + * task. */ + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); + } + raw_spin_unlock(&cluster->domain.release_lock); + + /* If holder was enqueued in a release heap, then the following + * preemption check is pointless, but we can't easily detect + * that case. If you want to fix this, then consider that + * simply adding a state flag requires O(n) time to update when + * releasing n tasks, which conflicts with the goal to have + * O(log n) merges. */ + if (check_preempt) { + /* heap_decrease() hit the top level of the heap: make + * sure preemption checks get the right task, not the + * potentially stale cache. */ + bheap_uncache_min(rm_srt_ready_order, &cluster->domain.ready_queue); + check_for_preemptions(cluster); + } + } +} + +/* called with IRQs off */ +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + crm_srt_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->crm_srt_lock); + + __set_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&cluster->crm_srt_lock); +} + + +/* called with IRQs off */ +static void __clear_priority_inheritance(struct task_struct* t) +{ + TRACE_TASK(t, "priority restored\n"); + + if(tsk_rt(t)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(t, NULL); + + tsk_rt(t)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + crm_srt_job_arrival(t); + } + else + { + __set_priority_inheritance(t, NULL); + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); + + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(tsk_rt(t)->cur_klitirqd); + crm_srt_job_arrival(tsk_rt(t)->cur_klitirqd); + } + else + { + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); + } + } +#endif +} + +/* called with IRQs off */ +static void clear_priority_inheritance(struct task_struct* t) +{ + crm_srt_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->crm_srt_lock); + __clear_priority_inheritance(t); + raw_spin_unlock(&cluster->crm_srt_lock); +} + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->crm_srt_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); + + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __set_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&cluster->crm_srt_lock); +} + +/* called with IRQs off */ +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ + crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->crm_srt_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) + { + tsk_rt(klitirqd)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(klitirqd); + crm_srt_job_arrival(klitirqd); + } + else + { + __set_priority_inheritance(klitirqd, NULL); + } + + tsk_rt(old_owner)->cur_klitirqd = NULL; + + raw_spin_unlock(&cluster->crm_srt_lock); +} +#endif // CONFIG_LITMUS_SOFTIRQD + + +/* ******************** KFMLP support ********************** */ + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + if (queued != skip && rm_srt_higher_prio(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest( + struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + }while(step != search_start); + + return(shortest); +} + +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct kfmlp_queue *my_queue = NULL; + struct task_struct *max_hp = NULL; + + + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((my_queue == NULL) || + (rm_srt_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + { + my_queue = &sem->queues[i]; + } + } + + if(my_queue) + { + crm_srt_domain_t* cluster; + + max_hp = my_queue->hp_waiter; + BUG_ON(!max_hp); + + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + max_hp->comm, max_hp->pid, + kfmlp_get_idx(sem, my_queue)); + + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); + + /* + if(my_queue->hp_waiter) + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->hp_waiter->comm, + my_queue->hp_waiter->pid); + else + TRACE_CUR("queue %d: new hp_waiter is %p\n", + kfmlp_get_idx(sem, my_queue), NULL); + */ + + cluster = task_cpu_cluster(max_hp); + + raw_spin_lock(&cluster->crm_srt_lock); + + /* + if(my_queue->owner) + TRACE_CUR("queue %d: owner is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->owner->comm, + my_queue->owner->pid); + else + TRACE_CUR("queue %d: owner is %p\n", + kfmlp_get_idx(sem, my_queue), + NULL); + */ + + if(tsk_rt(my_queue->owner)->inh_task == max_hp) + { + __clear_priority_inheritance(my_queue->owner); + if(my_queue->hp_waiter != NULL) + { + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + raw_spin_unlock(&cluster->crm_srt_lock); + + list_for_each(pos, &my_queue->wait.task_list) + { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + /* Compare task prios, find high prio task. */ + if (queued == max_hp) + { + /* + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", + kfmlp_get_idx(sem, my_queue)); + */ + __remove_wait_queue(&my_queue->wait, + list_entry(pos, wait_queue_t, task_list)); + break; + } + } + --(my_queue->count); + } + + return(max_hp); +} + +int crm_srt_kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = sem->shortest_queue; + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", + kfmlp_get_idx(sem, my_queue)); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + /* check if we need to activate priority inheritance */ + if (rm_srt_higher_prio(t, my_queue->hp_waiter)) + { + my_queue->hp_waiter = t; + if (rm_srt_higher_prio(t, my_queue->owner)) + { + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) + { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else + { + /* this case may happen if our wait entry was stolen + between queues. record where we went.*/ + my_queue = kfmlp_get_queue(sem, t); + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else + { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + spin_unlock_irqrestore(&sem->lock, flags); + } + + return kfmlp_get_idx(sem, my_queue); +} + +int crm_srt_kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + /* next becomes the resouce holder */ + my_queue->owner = next; + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + set_priority_inheritance(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + + /* + if(next) + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + + my_queue->owner = next; + + if(next) + { + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + } + } + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + clear_priority_inheritance(t); + +out: + spin_unlock_irqrestore(&sem->lock, flags); + + return err; +} + +int crm_srt_kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + crm_srt_kfmlp_unlock(l); + + return 0; +} + +void crm_srt_kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + +static struct litmus_lock_ops crm_srt_kfmlp_lock_ops = { + .close = crm_srt_kfmlp_close, + .lock = crm_srt_kfmlp_lock, + .unlock = crm_srt_kfmlp_unlock, + .deallocate = crm_srt_kfmlp_free, +}; + +static struct litmus_lock* crm_srt_new_kfmlp(void* __user arg, int* ret_code) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(num_resources < 1) + { + *ret_code = -EINVAL; + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + *ret_code = -ENOMEM; + return NULL; + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + *ret_code = -ENOMEM; + return NULL; + } + + sem->litmus_lock.ops = &crm_srt_kfmlp_lock_ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + + *ret_code = 0; + return &sem->litmus_lock; +} + + +/* **** lock constructor **** */ + +static long crm_srt_allocate_lock(struct litmus_lock **lock, int type, + void* __user arg) +{ + int err = -ENXIO; + + /* C-RM-SRT currently only supports the FMLP for global resources + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ + switch (type) { + case KFMLP_SEM: + *lock = crm_srt_new_kfmlp(arg, &err); + break; + }; + + return err; +} + +#endif // CONFIG_LITMUS_LOCKING + + + + + + +/* total number of cluster */ +static int num_clusters; +/* we do not support cluster of different sizes */ +static unsigned int cluster_size; + +#ifdef VERBOSE_INIT +static void print_cluster_topology(cpumask_var_t mask, int cpu) +{ + int chk; + char buf[255]; + + chk = cpulist_scnprintf(buf, 254, mask); + buf[chk] = '\0'; + printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); + +} +#endif + +static int clusters_allocated = 0; + +static void cleanup_crm_srt(void) +{ + int i; + + if (clusters_allocated) { + for (i = 0; i < num_clusters; i++) { + kfree(crm_srt[i].cpus); + kfree(crm_srt[i].heap_node); + free_cpumask_var(crm_srt[i].cpu_map); + } + + kfree(crm_srt); + } +} + +static long crm_srt_activate_plugin(void) +{ + int i, j, cpu, ccpu, cpu_count; + cpu_entry_t *entry; + + cpumask_var_t mask; + int chk = 0; + + /* de-allocate old clusters, if any */ + cleanup_crm_srt(); + + printk(KERN_INFO "C-RM-SRT: Activate Plugin, cluster configuration = %d\n", + cluster_config); + + /* need to get cluster_size first */ + if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) + return -ENOMEM; + + if (unlikely(cluster_config == GLOBAL_CLUSTER)) { + cluster_size = num_online_cpus(); + } else { + chk = get_shared_cpu_map(mask, 0, cluster_config); + if (chk) { + /* if chk != 0 then it is the max allowed index */ + printk(KERN_INFO "C-RM-SRT: Cluster configuration = %d " + "is not supported on this hardware.\n", + cluster_config); + /* User should notice that the configuration failed, so + * let's bail out. */ + return -EINVAL; + } + + cluster_size = cpumask_weight(mask); + } + + if ((num_online_cpus() % cluster_size) != 0) { + /* this can't be right, some cpus are left out */ + printk(KERN_ERR "C-RM-SRT: Trying to group %d cpus in %d!\n", + num_online_cpus(), cluster_size); + return -1; + } + + num_clusters = num_online_cpus() / cluster_size; + printk(KERN_INFO "C-RM-SRT: %d cluster(s) of size = %d\n", + num_clusters, cluster_size); + + /* initialize clusters */ + crm_srt = kmalloc(num_clusters * sizeof(crm_srt_domain_t), GFP_ATOMIC); + for (i = 0; i < num_clusters; i++) { + + crm_srt[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), + GFP_ATOMIC); + crm_srt[i].heap_node = kmalloc( + cluster_size * sizeof(struct bheap_node), + GFP_ATOMIC); + bheap_init(&(crm_srt[i].cpu_heap)); + rm_srt_domain_init(&(crm_srt[i].domain), NULL, crm_srt_release_jobs); + + if(!zalloc_cpumask_var(&crm_srt[i].cpu_map, GFP_ATOMIC)) + return -ENOMEM; + } + + /* cycle through cluster and add cpus to them */ + for (i = 0; i < num_clusters; i++) { + + for_each_online_cpu(cpu) { + /* check if the cpu is already in a cluster */ + for (j = 0; j < num_clusters; j++) + if (cpumask_test_cpu(cpu, crm_srt[j].cpu_map)) + break; + /* if it is in a cluster go to next cpu */ + if (j < num_clusters && + cpumask_test_cpu(cpu, crm_srt[j].cpu_map)) + continue; + + /* this cpu isn't in any cluster */ + /* get the shared cpus */ + if (unlikely(cluster_config == GLOBAL_CLUSTER)) + cpumask_copy(mask, cpu_online_mask); + else + get_shared_cpu_map(mask, cpu, cluster_config); + + cpumask_copy(crm_srt[i].cpu_map, mask); +#ifdef VERBOSE_INIT + print_cluster_topology(mask, cpu); +#endif + /* add cpus to current cluster and init cpu_entry_t */ + cpu_count = 0; + for_each_cpu(ccpu, crm_srt[i].cpu_map) { + + entry = &per_cpu(crm_srt_cpu_entries, ccpu); + crm_srt[i].cpus[cpu_count] = entry; + atomic_set(&entry->will_schedule, 0); + entry->cpu = ccpu; + entry->cluster = &crm_srt[i]; + entry->hn = &(crm_srt[i].heap_node[cpu_count]); + bheap_node_init(&entry->hn, entry); + + cpu_count++; + + entry->linked = NULL; + entry->scheduled = NULL; + update_cpu_position(entry); + } + /* done with this cluster */ + break; + } + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + { + /* distribute the daemons evenly across the clusters. */ + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; + int left_over = NR_LITMUS_SOFTIRQD % num_clusters; + + int daemon = 0; + for(i = 0; i < num_clusters; ++i) + { + int num_on_this_cluster = num_daemons_per_cluster; + if(left_over) + { + ++num_on_this_cluster; + --left_over; + } + + for(j = 0; j < num_on_this_cluster; ++j) + { + // first CPU of this cluster + affinity[daemon++] = i*cluster_size; + } + } + + spawn_klitirqd(affinity); + + kfree(affinity); + } +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif + + free_cpumask_var(mask); + clusters_allocated = 1; + return 0; +} + +/* Plugin object */ +static struct sched_plugin crm_srt_plugin __cacheline_aligned_in_smp = { + .plugin_name = "C-RM-SRT", + .finish_switch = crm_srt_finish_switch, + .tick = crm_srt_tick, + .task_new = crm_srt_task_new, + .complete_job = complete_job, + .task_exit = crm_srt_task_exit, + .schedule = crm_srt_schedule, + .task_wake_up = crm_srt_task_wake_up, + .task_block = crm_srt_task_block, + .admit_task = crm_srt_admit_task, + .activate_plugin = crm_srt_activate_plugin, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = crm_srt_allocate_lock, + .set_prio_inh = set_priority_inheritance, + .clear_prio_inh = clear_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif +}; + +static struct proc_dir_entry *cluster_file = NULL, *crm_srt_dir = NULL; + +static int __init init_crm_srt(void) +{ + int err, fs; + + err = register_sched_plugin(&crm_srt_plugin); + if (!err) { + fs = make_plugin_proc_dir(&crm_srt_plugin, &crm_srt_dir); + if (!fs) + cluster_file = create_cluster_file(crm_srt_dir, &cluster_config); + else + printk(KERN_ERR "Could not allocate C-RM-SRT procfs dir.\n"); + } + return err; +} + +static void clean_crm_srt(void) +{ + cleanup_crm_srt(); + if (cluster_file) + remove_proc_entry("cluster", crm_srt_dir); + if (crm_srt_dir) + remove_plugin_proc_dir(&crm_srt_plugin); +} + +module_init(init_crm_srt); +module_exit(clean_crm_srt); diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index d04e0703c154..ac7685fe69f0 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -1155,12 +1155,14 @@ static inline struct kfmlp_queue* kfmlp_find_shortest( { step = (step+1 != &sem->queues[sem->num_resources]) ? step+1 : &sem->queues[0]; + if(step->count < shortest->count) { shortest = step; if(step->count == 0) break; /* can't get any shorter */ } + }while(step != search_start); return(shortest); @@ -1369,7 +1371,9 @@ int gsnedf_kfmlp_unlock(struct litmus_lock* l) my_queue->owner = next; --(my_queue->count); - if(my_queue->count < sem->shortest_queue->count) + // the '=' of '<=' is a dumb method to attempt to build + // affinity until tasks can tell us where they ran last... + if(my_queue->count <= sem->shortest_queue->count) { sem->shortest_queue = my_queue; } @@ -1428,7 +1432,9 @@ int gsnedf_kfmlp_unlock(struct litmus_lock* l) TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); --(my_queue->count); - if(my_queue->count < sem->shortest_queue->count) + // the '=' of '<=' is a dumb method to attempt to build + // affinity until tasks can tell us where they ran last... + if(my_queue->count <= sem->shortest_queue->count) { sem->shortest_queue = my_queue; } diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 8802670a4b0b..e393d749baf5 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -152,6 +152,14 @@ static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd, } #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t) +{ + TRACE("PAI Tasklet unsupported in this plugin!!!!!!\n"); + return(0); // failure. +} +#endif + /* The default scheduler plugin. It doesn't do anything and lets Linux do its * job. @@ -176,6 +184,9 @@ struct sched_plugin linux_sched_plugin = { #ifdef CONFIG_LITMUS_SOFTIRQD .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq, .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd, +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet, #endif .admit_task = litmus_dummy_admit_task }; diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 7aeb99b668d3..d079df2b292a 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c @@ -191,7 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id, if (rec) { rec->data.completion.when = now(); rec->data.completion.forced = forced; +#ifdef LITMUS_NVIDIA rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); +#endif put_record(rec); } } @@ -367,24 +369,29 @@ feather_callback void do_sched_trace_eff_prio_change(unsigned long id, } } - /* pray for no nesting of nv interrupts on same CPU... */ struct tracing_interrupt_map { int active; int count; unsigned long data[128]; // assume nesting less than 128... + unsigned long serial[128]; }; DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); + +DEFINE_PER_CPU(u32, intCounter); + feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, unsigned long _device) { struct st_event_record *rec; + u32 serialNum; { + u32* serial; struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); - if(int_map->active == 0xcafebabe) + if(!int_map->active == 0xcafebabe) { int_map->count++; } @@ -393,7 +400,12 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, int_map->active = 0xcafebabe; int_map->count = 1; } - int_map->data[int_map->count-1] = _device; + //int_map->data[int_map->count-1] = _device; + + serial = &per_cpu(intCounter, smp_processor_id()); + *serial += num_online_cpus(); + serialNum = *serial; + int_map->serial[int_map->count-1] = serialNum; } rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); @@ -401,6 +413,7 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, u32 device = _device; rec->data.nv_interrupt_begin.when = now(); rec->data.nv_interrupt_begin.device = device; + rec->data.nv_interrupt_begin.serialNumber = serialNum; put_record(rec); } } @@ -416,7 +429,7 @@ int is_interrupt_tracing_active(void) } */ -feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long unused) +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device) { struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); if(int_map->active == 0xcafebabe) @@ -428,8 +441,11 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned int_map->active = 0; if(rec) { + u32 device = _device; rec->data.nv_interrupt_end.when = now(); - rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; + //rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; + rec->data.nv_interrupt_end.device = device; + rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count]; put_record(rec); } } diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c index d7d7d8bae298..5b7e6152416a 100644 --- a/litmus/sched_trace_external.c +++ b/litmus/sched_trace_external.c @@ -1,5 +1,6 @@ #include +#include #include #include @@ -38,8 +39,26 @@ void __sched_trace_nv_interrupt_begin_external(u32 device) } EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); -void __sched_trace_nv_interrupt_end_external(void) +void __sched_trace_nv_interrupt_end_external(u32 device) { - sched_trace_nv_interrupt_end(); + unsigned long _device = device; + sched_trace_nv_interrupt_end(_device); } EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); + + +#ifdef CONFIG_LITMUS_NVIDIA + +#define EXX_TS(evt) \ +void __##evt(void) { evt; } \ +EXPORT_SYMBOL(__##evt); + +EXX_TS(TS_NV_TOPISR_START) +EXX_TS(TS_NV_TOPISR_END) +EXX_TS(TS_NV_BOTISR_START) +EXX_TS(TS_NV_BOTISR_END) +EXX_TS(TS_NV_RELEASE_BOTISR_START) +EXX_TS(TS_NV_RELEASE_BOTISR_END) + +#endif + -- cgit v1.2.2 From 53a6dbb9f5337e77fce9c2672488c1c5e0621beb Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sat, 14 Jan 2012 14:20:07 -0500 Subject: Completed PAI for C-EDF. --- include/litmus/sched_plugin.h | 2 + include/litmus/sched_trace_external.h | 20 +++ kernel/sched.c | 5 + kernel/softirq.c | 3 - litmus/litmus_softirq.c | 5 + litmus/sched_cedf.c | 319 +++++++++++++++++++++++++++++----- litmus/sched_plugin.c | 8 +- litmus/sched_trace_external.c | 8 +- 8 files changed, 316 insertions(+), 54 deletions(-) diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 12a9ab65a673..3fc64f832fef 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -75,6 +75,7 @@ typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd, typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet); +typedef void (*run_tasklets_t)(struct task_struct* next); /********************* sys call backends ********************/ /* This function causes the caller to sleep until the next release */ @@ -125,6 +126,7 @@ struct sched_plugin { #ifdef CONFIG_LITMUS_PAI_SOFTIRQD enqueue_pai_tasklet_t enqueue_pai_tasklet; + run_tasklets_t run_tasklets; #endif } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h index 90424d5c564c..e70e45e4cf51 100644 --- a/include/litmus/sched_trace_external.h +++ b/include/litmus/sched_trace_external.h @@ -4,6 +4,8 @@ #ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_ #define _LINUX_SCHED_TRACE_EXTERNAL_H_ + +#ifdef CONFIG_SCHED_TASK_TRACE extern void __sched_trace_tasklet_begin_external(struct task_struct* t); static inline void sched_trace_tasklet_begin_external(struct task_struct* t) { @@ -28,6 +30,7 @@ static inline void sched_trace_work_end_external(struct task_struct* t, struct t __sched_trace_work_end_external(t, e, f); } +#ifdef CONFIG_LITMUS_NVIDIA extern void __sched_trace_nv_interrupt_begin_external(u32 device); static inline void sched_trace_nv_interrupt_begin_external(u32 device) { @@ -39,6 +42,23 @@ static inline void sched_trace_nv_interrupt_end_external(u32 device) { __sched_trace_nv_interrupt_end_external(device); } +#endif + +#else + +// no tracing. +static inline void sched_trace_tasklet_begin_external(struct task_struct* t){} +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){} +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){} +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){} + +#ifdef CONFIG_LITMUS_NVIDIA +static inline void sched_trace_nv_interrupt_begin_external(u32 device){} +static inline void sched_trace_nv_interrupt_end_external(u32 device){} +#endif + +#endif + #ifdef CONFIG_LITMUS_NVIDIA diff --git a/kernel/sched.c b/kernel/sched.c index 3aa2be09122b..08b725cd9182 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2883,6 +2883,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); + trace_sched_switch(prev, next); mm = next->mm; oldmm = prev->active_mm; @@ -3901,6 +3902,10 @@ need_resched_nonpreemptible: reacquire_klitirqd_lock(prev); #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + litmus->run_tasklets(prev); +#endif + srp_ceiling_block(); } EXPORT_SYMBOL(schedule); diff --git a/kernel/softirq.c b/kernel/softirq.c index ae77c5c1d17e..d3217c54d2bf 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -442,9 +442,6 @@ void __tasklet_schedule(struct tasklet_struct *t) if(likely(_litmus_tasklet_schedule(t,nvidia_device))) { unlock_nv_registry(nvidia_device, &flags); - - TS_NV_RELEASE_BOTISR_END; - return; } else diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index f5cca964b6c6..c49676c6d3a7 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c @@ -470,6 +470,9 @@ static void do_lit_tasklet(struct klitirqd_info* which, /* execute tasklet if it has my priority and is free */ if ((t->owner == which->current_owner) && tasklet_trylock(t)) { if (!atomic_read(&t->count)) { + + sched_trace_tasklet_begin(t->owner); + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) { BUG(); @@ -480,6 +483,8 @@ static void do_lit_tasklet(struct klitirqd_info* which, atomic_dec(count); + sched_trace_tasklet_end(t->owner, 0ul); + continue; /* process more tasklets */ } tasklet_unlock(t); diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index f0356de60b2f..4924da21865e 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -124,6 +124,7 @@ typedef struct clusterdomain { #ifdef CONFIG_LITMUS_PAI_SOFTIRQD + raw_spinlock_t tasklet_lock; struct tasklet_head pending_tasklets; #endif @@ -429,36 +430,137 @@ static void cedf_tick(struct task_struct* t) #ifdef CONFIG_LITMUS_PAI_SOFTIRQD -void __do_lit_tasklet(struct tasklet_struct* tasklet) +void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) { - if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) - { + if (!atomic_read(&tasklet->count)) { + sched_trace_tasklet_begin(tasklet->owner); + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + + sched_trace_tasklet_end(tasklet->owner, flushed); + } + else { BUG(); } - TRACE("%s: Invoking tasklet with owner pid = %d.\n", __FUNCTION__, tasklet->owner->pid); - tasklet->func(tasklet->data); - tasklet_unlock(tasklet); +} + + +void __extract_tasklets(cedf_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets) +{ + struct tasklet_struct* step; + struct tasklet_struct* tasklet; + struct tasklet_struct* prev; + + task_tasklets->head = NULL; + task_tasklets->tail = &(task_tasklets->head); + + prev = NULL; + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) + { + if(step->owner == task) + { + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid); + + tasklet = step; + + if(prev) { + prev->next = tasklet->next; + } + else if(cluster->pending_tasklets.head == tasklet) { + // we're at the head. + cluster->pending_tasklets.head = tasklet->next; + } + + if(cluster->pending_tasklets.tail == &tasklet) { + // we're at the tail + if(prev) { + cluster->pending_tasklets.tail = &prev; + } + else { + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + } + + tasklet->next = NULL; + *(task_tasklets->tail) = tasklet; + task_tasklets->tail = &(tasklet->next); + } + else { + prev = step; + } + } +} + +void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task) +{ + unsigned long flags; + struct tasklet_head task_tasklets; + struct tasklet_struct* step; + + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); + __extract_tasklets(cluster, task, &task_tasklets); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); + + if(cluster->pending_tasklets.head != NULL) { + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid); + } + + // now execute any flushed tasklets. + for(step = cluster->pending_tasklets.head; step != NULL; /**/) + { + struct tasklet_struct* temp = step->next; + + step->next = NULL; + __do_lit_tasklet(step, 1ul); + step = temp; + } } -void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* next) + +void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) { int work_to_do = 1; struct tasklet_struct *tasklet = NULL; - - TRACE("%s: entered.\n", __FUNCTION__); + struct tasklet_struct *step; + unsigned long flags; while(work_to_do) { // remove tasklet at head of list if it has higher priority. - raw_spin_lock(&cluster->cedf_lock); - // remove tasklet at head. + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); + + + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + + if(cluster->pending_tasklets.head != NULL) { + // remove tasklet at head. tasklet = cluster->pending_tasklets.head; - if(edf_higher_prio(tasklet->owner, next)) { + if(edf_higher_prio(tasklet->owner, sched_task)) { + + if(NULL == tasklet->next) { + // tasklet is at the head, list only has one element + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + // remove the tasklet from the queue cluster->pending_tasklets.head = tasklet->next; - + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); } else { @@ -467,12 +569,24 @@ void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* next) } } else { - //TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); } - raw_spin_unlock(&cluster->cedf_lock); + + + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + + + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); if(tasklet) { - __do_lit_tasklet(tasklet); + __do_lit_tasklet(tasklet, 0ul); tasklet = NULL; } else { @@ -480,7 +594,50 @@ void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* next) } } - TRACE("%s: exited.\n", __FUNCTION__); + //TRACE("%s: exited.\n", __FUNCTION__); +} + + +void run_tasklets(struct task_struct* sched_task) +{ + cedf_domain_t* cluster; + +#if 0 + int task_is_rt = is_realtime(sched_task); + cedf_domain_t* cluster; + + if(is_realtime(sched_task)) { + cluster = task_cpu_cluster(sched_task); + } + else { + cluster = remote_cluster(get_cpu()); + } + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + + do_lit_tasklets(cluster, sched_task); + } + + if(!task_is_rt) { + put_cpu_no_resched(); + } +#else + + preempt_disable(); + + cluster = (is_realtime(sched_task)) ? + task_cpu_cluster(sched_task) : + remote_cluster(smp_processor_id()); + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + do_lit_tasklets(cluster, sched_task); + } + + preempt_enable_no_resched(); + +#endif } @@ -489,41 +646,47 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) struct tasklet_struct* step; step = cluster->pending_tasklets.head; - TRACE("%s: (BEFORE) dumping tasklet queue...\n"); + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); while(step != NULL){ - TRACE("%s: %d\n", __FUNCTION__, step->owner); + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); step = step->next; } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); TRACE("%s: done.\n", __FUNCTION__); + tasklet->next = NULL; // make sure there are no old values floating around + step = cluster->pending_tasklets.head; if(step == NULL) { TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); // insert at tail. - tasklet->next = NULL; *(cluster->pending_tasklets.tail) = tasklet; - cluster->pending_tasklets.tail = &tasklet->next; + cluster->pending_tasklets.tail = &(tasklet->next); } - else if((*cluster->pending_tasklets.tail != NULL) && - edf_higher_prio((*cluster->pending_tasklets.tail)->owner, tasklet->owner)) { + else if((*(cluster->pending_tasklets.tail) != NULL) && + edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) { // insert at tail. TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); - tasklet->next = NULL; *(cluster->pending_tasklets.tail) = tasklet; - cluster->pending_tasklets.tail = &tasklet->next; + cluster->pending_tasklets.tail = &(tasklet->next); } else { + + WARN_ON(1 == 1); + // insert the tasklet somewhere in the middle. - + + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { step = step->next; } // insert tasklet right before step->next. - TRACE("%s: tasklet belongs at end. inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); tasklet->next = step->next; step->next = tasklet; @@ -540,9 +703,10 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) step = cluster->pending_tasklets.head; TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); while(step != NULL){ - TRACE("%s: %d\n", __FUNCTION__, step->owner); + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); step = step->next; } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); TRACE("%s: done.\n", __FUNCTION__); // TODO: Maintain this list in priority order. @@ -553,37 +717,89 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) int enqueue_pai_tasklet(struct tasklet_struct* tasklet) { - cedf_domain_t* cluster = task_cpu_cluster(tasklet->owner); - cpu_entry_t *lowest; + cedf_domain_t *cluster = NULL; + cpu_entry_t *targetCPU = NULL; + int thisCPU; + int runLocal = 0; + int runNow = 0; unsigned long flags; if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) { TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); - BUG(); + return 0; } + cluster = task_cpu_cluster(tasklet->owner); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); - lowest = lowest_prio_cpu(cluster); - if (edf_higher_prio(tasklet->owner, lowest->linked)) { - if (smp_processor_id() == lowest->cpu) { - TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); - // execute the tasklet now. - __do_lit_tasklet(tasklet); + thisCPU = smp_processor_id(); + +#if 1 +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = NULL; + + // use this CPU if it is in our cluster and isn't running any RT work. + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) { + affinity = &(__get_cpu_var(cedf_cpu_entries)); } else { - // preempt the lowest CPU - __add_pai_tasklet(tasklet, cluster); - - TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, lowest->cpu); - - preempt(lowest); + // this CPU is busy or shouldn't run tasklet in this cluster. + // look for available near by CPUs. + // NOTE: Affinity towards owner and not this CPU. Is this right? + affinity = + cedf_get_nearest_available_cpu(cluster, + &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner))); } + + targetCPU = affinity; + } +#endif +#endif + + if (targetCPU == NULL) { + targetCPU = lowest_prio_cpu(cluster); + } + + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) { + if (thisCPU == targetCPU->cpu) { + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); + runLocal = 1; + runNow = 1; + } + else { + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); + runLocal = 0; + runNow = 1; + } + } + else { + runLocal = 0; + runNow = 0; + } + + if(!runLocal) { + // enqueue the tasklet + __add_pai_tasklet(tasklet, cluster); } raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); + + if (runLocal /*&& runNow */) { // runNow == 1 is implied + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); + } + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); + preempt(targetCPU); // need to be protected by cedf_lock? + } + else { + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); + } + return(1); // success } @@ -721,9 +937,14 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) raw_spin_unlock(&cluster->cedf_lock); + /* #ifdef CONFIG_LITMUS_PAI_SOFTIRQD - do_lit_tasklets(cluster, next); -#endif + if(cluster->pending_tasklets.head != NULL) // peak at data. normally locked with cluster->cedf_lock + { + do_lit_tasklets(cluster, next); + } +#endif +*/ #ifdef WANT_ALL_SCHED_EVENTS TRACE("cedf_lock released, next=0x%p\n", next); @@ -865,6 +1086,10 @@ static void cedf_task_exit(struct task_struct * t) } raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); } @@ -1684,8 +1909,9 @@ static long cedf_activate_plugin(void) #ifdef CONFIG_LITMUS_PAI_SOFTIRQD + raw_spin_lock_init(&(cedf[i].tasklet_lock)); cedf[i].pending_tasklets.head = NULL; - cedf[i].pending_tasklets.tail = &cedf[i].pending_tasklets.head; + cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head); #endif @@ -1803,6 +2029,7 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = enqueue_pai_tasklet, + .run_tasklets = run_tasklets, #endif }; diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index e393d749baf5..d977e80aa32f 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -155,9 +155,14 @@ static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd, #ifdef CONFIG_LITMUS_PAI_SOFTIRQD static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t) { - TRACE("PAI Tasklet unsupported in this plugin!!!!!!\n"); + TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); return(0); // failure. } + +static void litmus_dummy_run_tasklets(struct task_struct* t) +{ + //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); +} #endif @@ -187,6 +192,7 @@ struct sched_plugin linux_sched_plugin = { #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet, + .run_tasklets = litmus_dummy_run_tasklets, #endif .admit_task = litmus_dummy_admit_task }; diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c index 5b7e6152416a..cf8e1d78aa77 100644 --- a/litmus/sched_trace_external.c +++ b/litmus/sched_trace_external.c @@ -34,15 +34,15 @@ EXPORT_SYMBOL(__sched_trace_work_end_external); void __sched_trace_nv_interrupt_begin_external(u32 device) { - unsigned long _device = device; - sched_trace_nv_interrupt_begin(_device); + //unsigned long _device = device; + sched_trace_nv_interrupt_begin((unsigned long)device); } EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); void __sched_trace_nv_interrupt_end_external(u32 device) { - unsigned long _device = device; - sched_trace_nv_interrupt_end(_device); + //unsigned long _device = device; + sched_trace_nv_interrupt_end((unsigned long)device); } EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); -- cgit v1.2.2 From 1a582a2c5e361e01a4c64f185bb1a23c3f70701a Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sat, 14 Jan 2012 16:56:47 -0500 Subject: Port PAI interrupts to GSN-EDF, C-RM/RM-SRT/FIFO. --- litmus/sched_cedf.c | 32 ++-- litmus/sched_cfifo.c | 450 ++++++++++++++++++++++++++++++++++++++++++++++++- litmus/sched_crm.c | 448 +++++++++++++++++++++++++++++++++++++++++++++++- litmus/sched_crm_srt.c | 445 +++++++++++++++++++++++++++++++++++++++++++++++- litmus/sched_gsn_edf.c | 434 ++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 1787 insertions(+), 22 deletions(-) diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 4924da21865e..02106f455c0f 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -124,7 +124,6 @@ typedef struct clusterdomain { #ifdef CONFIG_LITMUS_PAI_SOFTIRQD - raw_spinlock_t tasklet_lock; struct tasklet_head pending_tasklets; #endif @@ -430,7 +429,7 @@ static void cedf_tick(struct task_struct* t) #ifdef CONFIG_LITMUS_PAI_SOFTIRQD -void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) { if (!atomic_read(&tasklet->count)) { sched_trace_tasklet_begin(tasklet->owner); @@ -451,7 +450,7 @@ void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) } -void __extract_tasklets(cedf_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets) +static void __extract_tasklets(cedf_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets) { struct tasklet_struct* step; struct tasklet_struct* tasklet; @@ -497,7 +496,7 @@ void __extract_tasklets(cedf_domain_t* cluster, struct task_struct* task, struct } } -void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task) +static void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task) { unsigned long flags; struct tasklet_head task_tasklets; @@ -524,18 +523,18 @@ void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task) } -void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) +static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) { int work_to_do = 1; struct tasklet_struct *tasklet = NULL; - struct tasklet_struct *step; + //struct tasklet_struct *step; unsigned long flags; while(work_to_do) { // remove tasklet at head of list if it has higher priority. raw_spin_lock_irqsave(&cluster->cedf_lock, flags); - +/* step = cluster->pending_tasklets.head; TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); while(step != NULL){ @@ -544,6 +543,7 @@ void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) } TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); TRACE("%s: done.\n", __FUNCTION__); + */ if(cluster->pending_tasklets.head != NULL) { @@ -573,6 +573,7 @@ void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) } + /* step = cluster->pending_tasklets.head; TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); while(step != NULL){ @@ -581,6 +582,7 @@ void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) } TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); TRACE("%s: done.\n", __FUNCTION__); + */ raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); @@ -598,7 +600,7 @@ void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) } -void run_tasklets(struct task_struct* sched_task) +static void run_tasklets(struct task_struct* sched_task) { cedf_domain_t* cluster; @@ -641,10 +643,11 @@ void run_tasklets(struct task_struct* sched_task) } -void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) { struct tasklet_struct* step; + /* step = cluster->pending_tasklets.head; TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); while(step != NULL){ @@ -653,6 +656,7 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) } TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); TRACE("%s: done.\n", __FUNCTION__); + */ tasklet->next = NULL; // make sure there are no old values floating around @@ -674,7 +678,7 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) } else { - WARN_ON(1 == 1); + //WARN_ON(1 == 1); // insert the tasklet somewhere in the middle. @@ -699,7 +703,7 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) } } - + /* step = cluster->pending_tasklets.head; TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); while(step != NULL){ @@ -707,7 +711,8 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) step = step->next; } TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); - TRACE("%s: done.\n", __FUNCTION__); + TRACE("%s: done.\n", __FUNCTION__); + */ // TODO: Maintain this list in priority order. // tasklet->next = NULL; @@ -715,7 +720,7 @@ void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) // cluster->pending_tasklets.tail = &tasklet->next; } -int enqueue_pai_tasklet(struct tasklet_struct* tasklet) +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet) { cedf_domain_t *cluster = NULL; cpu_entry_t *targetCPU = NULL; @@ -1909,7 +1914,6 @@ static long cedf_activate_plugin(void) #ifdef CONFIG_LITMUS_PAI_SOFTIRQD - raw_spin_lock_init(&(cedf[i].tasklet_lock)); cedf[i].pending_tasklets.head = NULL; cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head); #endif diff --git a/litmus/sched_cfifo.c b/litmus/sched_cfifo.c index f515446f76ed..689b2dbe5fae 100644 --- a/litmus/sched_cfifo.c +++ b/litmus/sched_cfifo.c @@ -55,6 +55,10 @@ #include #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#endif + #ifdef CONFIG_LITMUS_NVIDIA #include #endif @@ -91,6 +95,15 @@ DEFINE_PER_CPU(cpu_entry_t, cfifo_cpu_entries); #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(cfifo_cpu_entries, cpu).will_schedule)) + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; +#endif + /* * In C-FIFO there is a cfifo domain _per_ cluster * The number of clusters is dynamically determined accordingly to the @@ -108,6 +121,12 @@ typedef struct clusterdomain { struct bheap cpu_heap; /* lock for this cluster */ #define cfifo_lock domain.ready_lock + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + struct tasklet_head pending_tasklets; +#endif + } cfifo_domain_t; /* a cfifo_domain per cluster; allocation is done at init/activation time */ @@ -251,7 +270,7 @@ static void preempt(cpu_entry_t *entry) preempt_if_preemptable(entry->scheduled, entry->cpu); } -/* requeue - Put an unlinked task into gsn-edf domain. +/* requeue - Put an unlinked task into c-fifo domain. * Caller must hold cfifo_lock. */ static noinline void requeue(struct task_struct* task) @@ -395,6 +414,419 @@ static void cfifo_tick(struct task_struct* t) } } + + + + + + + + + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + + +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) +{ + if (!atomic_read(&tasklet->count)) { + sched_trace_tasklet_begin(tasklet->owner); + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + + sched_trace_tasklet_end(tasklet->owner, flushed); + } + else { + BUG(); + } +} + + +static void __extract_tasklets(cfifo_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets) +{ + struct tasklet_struct* step; + struct tasklet_struct* tasklet; + struct tasklet_struct* prev; + + task_tasklets->head = NULL; + task_tasklets->tail = &(task_tasklets->head); + + prev = NULL; + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) + { + if(step->owner == task) + { + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid); + + tasklet = step; + + if(prev) { + prev->next = tasklet->next; + } + else if(cluster->pending_tasklets.head == tasklet) { + // we're at the head. + cluster->pending_tasklets.head = tasklet->next; + } + + if(cluster->pending_tasklets.tail == &tasklet) { + // we're at the tail + if(prev) { + cluster->pending_tasklets.tail = &prev; + } + else { + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + } + + tasklet->next = NULL; + *(task_tasklets->tail) = tasklet; + task_tasklets->tail = &(tasklet->next); + } + else { + prev = step; + } + } +} + +static void flush_tasklets(cfifo_domain_t* cluster, struct task_struct* task) +{ + unsigned long flags; + struct tasklet_head task_tasklets; + struct tasklet_struct* step; + + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + __extract_tasklets(cluster, task, &task_tasklets); + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); + + if(cluster->pending_tasklets.head != NULL) { + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid); + } + + // now execute any flushed tasklets. + for(step = cluster->pending_tasklets.head; step != NULL; /**/) + { + struct tasklet_struct* temp = step->next; + + step->next = NULL; + __do_lit_tasklet(step, 1ul); + + step = temp; + } +} + + +static void do_lit_tasklets(cfifo_domain_t* cluster, struct task_struct* sched_task) +{ + int work_to_do = 1; + struct tasklet_struct *tasklet = NULL; + //struct tasklet_struct *step; + unsigned long flags; + + while(work_to_do) { + // remove tasklet at head of list if it has higher priority. + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + + if(cluster->pending_tasklets.head != NULL) { + // remove tasklet at head. + tasklet = cluster->pending_tasklets.head; + + if(fifo_higher_prio(tasklet->owner, sched_task)) { + + if(NULL == tasklet->next) { + // tasklet is at the head, list only has one element + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + + // remove the tasklet from the queue + cluster->pending_tasklets.head = tasklet->next; + + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + } + else { + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); + tasklet = NULL; + } + } + else { + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + } + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); + + if(tasklet) { + __do_lit_tasklet(tasklet, 0ul); + tasklet = NULL; + } + else { + work_to_do = 0; + } + } + + //TRACE("%s: exited.\n", __FUNCTION__); +} + + +static void run_tasklets(struct task_struct* sched_task) +{ + cfifo_domain_t* cluster; + +#if 0 + int task_is_rt = is_realtime(sched_task); + cfifo_domain_t* cluster; + + if(is_realtime(sched_task)) { + cluster = task_cpu_cluster(sched_task); + } + else { + cluster = remote_cluster(get_cpu()); + } + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + + do_lit_tasklets(cluster, sched_task); + } + + if(!task_is_rt) { + put_cpu_no_resched(); + } +#else + + preempt_disable(); + + cluster = (is_realtime(sched_task)) ? + task_cpu_cluster(sched_task) : + remote_cluster(smp_processor_id()); + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + do_lit_tasklets(cluster, sched_task); + } + + preempt_enable_no_resched(); + +#endif +} + + +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cfifo_domain_t* cluster) +{ + struct tasklet_struct* step; + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + + tasklet->next = NULL; // make sure there are no old values floating around + + step = cluster->pending_tasklets.head; + if(step == NULL) { + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); + // insert at tail. + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else if((*(cluster->pending_tasklets.tail) != NULL) && + fifo_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) { + // insert at tail. + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); + + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else { + + //WARN_ON(1 == 1); + + // insert the tasklet somewhere in the middle. + + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); + + while(step->next && fifo_higher_prio(step->next->owner, tasklet->owner)) { + step = step->next; + } + + // insert tasklet right before step->next. + + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + + tasklet->next = step->next; + step->next = tasklet; + + // patch up the head if needed. + if(cluster->pending_tasklets.head == step) + { + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.head = tasklet; + } + } + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + // TODO: Maintain this list in priority order. + // tasklet->next = NULL; + // *(cluster->pending_tasklets.tail) = tasklet; + // cluster->pending_tasklets.tail = &tasklet->next; +} + +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet) +{ + cfifo_domain_t *cluster = NULL; + cpu_entry_t *targetCPU = NULL; + int thisCPU; + int runLocal = 0; + int runNow = 0; + unsigned long flags; + + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + return 0; + } + + cluster = task_cpu_cluster(tasklet->owner); + + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); + + thisCPU = smp_processor_id(); + +#if 1 +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = NULL; + + // use this CPU if it is in our cluster and isn't running any RT work. + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cfifo_cpu_entries).linked == NULL)) { + affinity = &(__get_cpu_var(cfifo_cpu_entries)); + } + else { + // this CPU is busy or shouldn't run tasklet in this cluster. + // look for available near by CPUs. + // NOTE: Affinity towards owner and not this CPU. Is this right? + affinity = + cfifo_get_nearest_available_cpu(cluster, + &per_cpu(cfifo_cpu_entries, task_cpu(tasklet->owner))); + } + + targetCPU = affinity; + } +#endif +#endif + + if (targetCPU == NULL) { + targetCPU = lowest_prio_cpu(cluster); + } + + if (fifo_higher_prio(tasklet->owner, targetCPU->linked)) { + if (thisCPU == targetCPU->cpu) { + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); + runLocal = 1; + runNow = 1; + } + else { + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); + runLocal = 0; + runNow = 1; + } + } + else { + runLocal = 0; + runNow = 0; + } + + if(!runLocal) { + // enqueue the tasklet + __add_pai_tasklet(tasklet, cluster); + } + + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); + + + if (runLocal /*&& runNow */) { // runNow == 1 is implied + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); + } + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); + preempt(targetCPU); // need to be protected by cfifo_lock? + } + else { + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); + } + + return(1); // success +} + + +#endif + + + + + + + + + + + + + + + + + + + + /* Getting schedule() right is a bit tricky. schedule() may not make any * assumptions on the state of the current task since it may be called for a * number of reasons. The reasons include a scheduler_tick() determined that it @@ -544,7 +976,7 @@ static void cfifo_task_new(struct task_struct * t, int on_rq, int running) cpu_entry_t* entry; cfifo_domain_t* cluster; - TRACE("gsn edf: task new %d\n", t->pid); + TRACE("cfifo: task new %d\n", t->pid); /* the cluster doesn't change even if t is running */ cluster = task_cpu_cluster(t); @@ -650,6 +1082,10 @@ static void cfifo_task_exit(struct task_struct * t) } raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); } @@ -1467,6 +1903,12 @@ static long cfifo_activate_plugin(void) bheap_init(&(cfifo[i].cpu_heap)); fifo_domain_init(&(cfifo[i].domain), NULL, cfifo_release_jobs); + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + cfifo[i].pending_tasklets.head = NULL; + cfifo[i].pending_tasklets.tail = &(cfifo[i].pending_tasklets.head); +#endif + if(!zalloc_cpumask_var(&cfifo[i].cpu_map, GFP_ATOMIC)) return -ENOMEM; } @@ -1578,6 +2020,10 @@ static struct sched_plugin cfifo_plugin __cacheline_aligned_in_smp = { #ifdef CONFIG_LITMUS_SOFTIRQD .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = enqueue_pai_tasklet, + .run_tasklets = run_tasklets, #endif }; diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c index 061b29eaff7e..fd7fab982998 100644 --- a/litmus/sched_crm.c +++ b/litmus/sched_crm.c @@ -55,6 +55,10 @@ #include #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#endif + #ifdef CONFIG_LITMUS_NVIDIA #include #endif @@ -91,6 +95,14 @@ DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries); #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule)) +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; +#endif + /* * In C-RM there is a crm domain _per_ cluster * The number of clusters is dynamically determined accordingly to the @@ -108,6 +120,10 @@ typedef struct clusterdomain { struct bheap cpu_heap; /* lock for this cluster */ #define crm_lock domain.ready_lock + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + struct tasklet_head pending_tasklets; +#endif } crm_domain_t; /* a crm_domain per cluster; allocation is done at init/activation time */ @@ -251,7 +267,7 @@ static void preempt(cpu_entry_t *entry) preempt_if_preemptable(entry->scheduled, entry->cpu); } -/* requeue - Put an unlinked task into gsn-edf domain. +/* requeue - Put an unlinked task into c-rm domain. * Caller must hold crm_lock. */ static noinline void requeue(struct task_struct* task) @@ -394,6 +410,421 @@ static void crm_tick(struct task_struct* t) } } } + + + + + + + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + + +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) +{ + if (!atomic_read(&tasklet->count)) { + sched_trace_tasklet_begin(tasklet->owner); + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + + sched_trace_tasklet_end(tasklet->owner, flushed); + } + else { + BUG(); + } +} + + +static void __extract_tasklets(crm_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets) +{ + struct tasklet_struct* step; + struct tasklet_struct* tasklet; + struct tasklet_struct* prev; + + task_tasklets->head = NULL; + task_tasklets->tail = &(task_tasklets->head); + + prev = NULL; + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) + { + if(step->owner == task) + { + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid); + + tasklet = step; + + if(prev) { + prev->next = tasklet->next; + } + else if(cluster->pending_tasklets.head == tasklet) { + // we're at the head. + cluster->pending_tasklets.head = tasklet->next; + } + + if(cluster->pending_tasklets.tail == &tasklet) { + // we're at the tail + if(prev) { + cluster->pending_tasklets.tail = &prev; + } + else { + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + } + + tasklet->next = NULL; + *(task_tasklets->tail) = tasklet; + task_tasklets->tail = &(tasklet->next); + } + else { + prev = step; + } + } +} + +static void flush_tasklets(crm_domain_t* cluster, struct task_struct* task) +{ + unsigned long flags; + struct tasklet_head task_tasklets; + struct tasklet_struct* step; + + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + __extract_tasklets(cluster, task, &task_tasklets); + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); + + if(cluster->pending_tasklets.head != NULL) { + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid); + } + + // now execute any flushed tasklets. + for(step = cluster->pending_tasklets.head; step != NULL; /**/) + { + struct tasklet_struct* temp = step->next; + + step->next = NULL; + __do_lit_tasklet(step, 1ul); + + step = temp; + } +} + + +static void do_lit_tasklets(crm_domain_t* cluster, struct task_struct* sched_task) +{ + int work_to_do = 1; + struct tasklet_struct *tasklet = NULL; + //struct tasklet_struct *step; + unsigned long flags; + + while(work_to_do) { + // remove tasklet at head of list if it has higher priority. + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + if(cluster->pending_tasklets.head != NULL) { + // remove tasklet at head. + tasklet = cluster->pending_tasklets.head; + + if(rm_higher_prio(tasklet->owner, sched_task)) { + + if(NULL == tasklet->next) { + // tasklet is at the head, list only has one element + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + + // remove the tasklet from the queue + cluster->pending_tasklets.head = tasklet->next; + + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + } + else { + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); + tasklet = NULL; + } + } + else { + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + } + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); + + if(tasklet) { + __do_lit_tasklet(tasklet, 0ul); + tasklet = NULL; + } + else { + work_to_do = 0; + } + } + + //TRACE("%s: exited.\n", __FUNCTION__); +} + + +static void run_tasklets(struct task_struct* sched_task) +{ + crm_domain_t* cluster; + +#if 0 + int task_is_rt = is_realtime(sched_task); + crm_domain_t* cluster; + + if(is_realtime(sched_task)) { + cluster = task_cpu_cluster(sched_task); + } + else { + cluster = remote_cluster(get_cpu()); + } + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + + do_lit_tasklets(cluster, sched_task); + } + + if(!task_is_rt) { + put_cpu_no_resched(); + } +#else + + preempt_disable(); + + cluster = (is_realtime(sched_task)) ? + task_cpu_cluster(sched_task) : + remote_cluster(smp_processor_id()); + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + do_lit_tasklets(cluster, sched_task); + } + + preempt_enable_no_resched(); + +#endif +} + + +static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_domain_t* cluster) +{ + struct tasklet_struct* step; + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + tasklet->next = NULL; // make sure there are no old values floating around + + step = cluster->pending_tasklets.head; + if(step == NULL) { + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); + // insert at tail. + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else if((*(cluster->pending_tasklets.tail) != NULL) && + rm_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) { + // insert at tail. + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); + + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else { + + //WARN_ON(1 == 1); + + // insert the tasklet somewhere in the middle. + + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); + + while(step->next && rm_higher_prio(step->next->owner, tasklet->owner)) { + step = step->next; + } + + // insert tasklet right before step->next. + + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + + tasklet->next = step->next; + step->next = tasklet; + + // patch up the head if needed. + if(cluster->pending_tasklets.head == step) + { + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.head = tasklet; + } + } + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + // TODO: Maintain this list in priority order. + // tasklet->next = NULL; + // *(cluster->pending_tasklets.tail) = tasklet; + // cluster->pending_tasklets.tail = &tasklet->next; +} + +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet) +{ + crm_domain_t *cluster = NULL; + cpu_entry_t *targetCPU = NULL; + int thisCPU; + int runLocal = 0; + int runNow = 0; + unsigned long flags; + + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + return 0; + } + + cluster = task_cpu_cluster(tasklet->owner); + + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + + thisCPU = smp_processor_id(); + +#if 1 +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = NULL; + + // use this CPU if it is in our cluster and isn't running any RT work. + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(crm_cpu_entries).linked == NULL)) { + affinity = &(__get_cpu_var(crm_cpu_entries)); + } + else { + // this CPU is busy or shouldn't run tasklet in this cluster. + // look for available near by CPUs. + // NOTE: Affinity towards owner and not this CPU. Is this right? + affinity = + crm_get_nearest_available_cpu(cluster, + &per_cpu(crm_cpu_entries, task_cpu(tasklet->owner))); + } + + targetCPU = affinity; + } +#endif +#endif + + if (targetCPU == NULL) { + targetCPU = lowest_prio_cpu(cluster); + } + + if (rm_higher_prio(tasklet->owner, targetCPU->linked)) { + if (thisCPU == targetCPU->cpu) { + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); + runLocal = 1; + runNow = 1; + } + else { + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); + runLocal = 0; + runNow = 1; + } + } + else { + runLocal = 0; + runNow = 0; + } + + if(!runLocal) { + // enqueue the tasklet + __add_pai_tasklet(tasklet, cluster); + } + + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); + + + if (runLocal /*&& runNow */) { // runNow == 1 is implied + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); + } + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); + preempt(targetCPU); // need to be protected by crm_lock? + } + else { + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); + } + + return(1); // success +} + + +#endif + + + + + + + + + + + + + + + + + + + + + + + + + + /* Getting schedule() right is a bit tricky. schedule() may not make any * assumptions on the state of the current task since it may be called for a @@ -544,7 +975,7 @@ static void crm_task_new(struct task_struct * t, int on_rq, int running) cpu_entry_t* entry; crm_domain_t* cluster; - TRACE("gsn edf: task new %d\n", t->pid); + TRACE("crm: task new %d\n", t->pid); /* the cluster doesn't change even if t is running */ cluster = task_cpu_cluster(t); @@ -650,6 +1081,10 @@ static void crm_task_exit(struct task_struct * t) } raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); } @@ -1467,6 +1902,11 @@ static long crm_activate_plugin(void) bheap_init(&(crm[i].cpu_heap)); rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + crm[i].pending_tasklets.head = NULL; + crm[i].pending_tasklets.tail = &(crm[i].pending_tasklets.head); +#endif + if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC)) return -ENOMEM; } @@ -1578,6 +2018,10 @@ static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = { #ifdef CONFIG_LITMUS_SOFTIRQD .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = enqueue_pai_tasklet, + .run_tasklets = run_tasklets, #endif }; diff --git a/litmus/sched_crm_srt.c b/litmus/sched_crm_srt.c index 4473f35e64cd..c0004354573d 100644 --- a/litmus/sched_crm_srt.c +++ b/litmus/sched_crm_srt.c @@ -55,6 +55,10 @@ #include #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#endif + #ifdef CONFIG_LITMUS_NVIDIA #include #endif @@ -91,6 +95,15 @@ DEFINE_PER_CPU(cpu_entry_t, crm_srt_cpu_entries); #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(crm_srt_cpu_entries, cpu).will_schedule)) + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; +#endif + /* * In C-RM-SRT there is a crm_srt domain _per_ cluster * The number of clusters is dynamically determined accordingly to the @@ -108,6 +121,12 @@ typedef struct clusterdomain { struct bheap cpu_heap; /* lock for this cluster */ #define crm_srt_lock domain.ready_lock + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + struct tasklet_head pending_tasklets; +#endif + } crm_srt_domain_t; /* a crm_srt_domain per cluster; allocation is done at init/activation time */ @@ -251,7 +270,7 @@ static void preempt(cpu_entry_t *entry) preempt_if_preemptable(entry->scheduled, entry->cpu); } -/* requeue - Put an unlinked task into gsn-edf domain. +/* requeue - Put an unlinked task into c-rm-srt domain. * Caller must hold crm_srt_lock. */ static noinline void requeue(struct task_struct* task) @@ -395,6 +414,415 @@ static void crm_srt_tick(struct task_struct* t) } } + + + + + + + + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + + +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) +{ + if (!atomic_read(&tasklet->count)) { + sched_trace_tasklet_begin(tasklet->owner); + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + + sched_trace_tasklet_end(tasklet->owner, flushed); + } + else { + BUG(); + } +} + + +static void __extract_tasklets(crm_srt_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets) +{ + struct tasklet_struct* step; + struct tasklet_struct* tasklet; + struct tasklet_struct* prev; + + task_tasklets->head = NULL; + task_tasklets->tail = &(task_tasklets->head); + + prev = NULL; + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) + { + if(step->owner == task) + { + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid); + + tasklet = step; + + if(prev) { + prev->next = tasklet->next; + } + else if(cluster->pending_tasklets.head == tasklet) { + // we're at the head. + cluster->pending_tasklets.head = tasklet->next; + } + + if(cluster->pending_tasklets.tail == &tasklet) { + // we're at the tail + if(prev) { + cluster->pending_tasklets.tail = &prev; + } + else { + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + } + + tasklet->next = NULL; + *(task_tasklets->tail) = tasklet; + task_tasklets->tail = &(tasklet->next); + } + else { + prev = step; + } + } +} + +static void flush_tasklets(crm_srt_domain_t* cluster, struct task_struct* task) +{ + unsigned long flags; + struct tasklet_head task_tasklets; + struct tasklet_struct* step; + + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + __extract_tasklets(cluster, task, &task_tasklets); + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); + + if(cluster->pending_tasklets.head != NULL) { + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid); + } + + // now execute any flushed tasklets. + for(step = cluster->pending_tasklets.head; step != NULL; /**/) + { + struct tasklet_struct* temp = step->next; + + step->next = NULL; + __do_lit_tasklet(step, 1ul); + + step = temp; + } +} + + +static void do_lit_tasklets(crm_srt_domain_t* cluster, struct task_struct* sched_task) +{ + int work_to_do = 1; + struct tasklet_struct *tasklet = NULL; + //struct tasklet_struct *step; + unsigned long flags; + + while(work_to_do) { + // remove tasklet at head of list if it has higher priority. + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + if(cluster->pending_tasklets.head != NULL) { + // remove tasklet at head. + tasklet = cluster->pending_tasklets.head; + + if(rm_srt_higher_prio(tasklet->owner, sched_task)) { + + if(NULL == tasklet->next) { + // tasklet is at the head, list only has one element + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + + // remove the tasklet from the queue + cluster->pending_tasklets.head = tasklet->next; + + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + } + else { + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); + tasklet = NULL; + } + } + else { + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + } + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); + + if(tasklet) { + __do_lit_tasklet(tasklet, 0ul); + tasklet = NULL; + } + else { + work_to_do = 0; + } + } + + //TRACE("%s: exited.\n", __FUNCTION__); +} + + +static void run_tasklets(struct task_struct* sched_task) +{ + crm_srt_domain_t* cluster; + +#if 0 + int task_is_rt = is_realtime(sched_task); + crm_srt_domain_t* cluster; + + if(is_realtime(sched_task)) { + cluster = task_cpu_cluster(sched_task); + } + else { + cluster = remote_cluster(get_cpu()); + } + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + + do_lit_tasklets(cluster, sched_task); + } + + if(!task_is_rt) { + put_cpu_no_resched(); + } +#else + + preempt_disable(); + + cluster = (is_realtime(sched_task)) ? + task_cpu_cluster(sched_task) : + remote_cluster(smp_processor_id()); + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + do_lit_tasklets(cluster, sched_task); + } + + preempt_enable_no_resched(); + +#endif +} + + +static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_srt_domain_t* cluster) +{ + struct tasklet_struct* step; + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + tasklet->next = NULL; // make sure there are no old values floating around + + step = cluster->pending_tasklets.head; + if(step == NULL) { + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); + // insert at tail. + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else if((*(cluster->pending_tasklets.tail) != NULL) && + rm_srt_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) { + // insert at tail. + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); + + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else { + + //WARN_ON(1 == 1); + + // insert the tasklet somewhere in the middle. + + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); + + while(step->next && rm_srt_higher_prio(step->next->owner, tasklet->owner)) { + step = step->next; + } + + // insert tasklet right before step->next. + + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + + tasklet->next = step->next; + step->next = tasklet; + + // patch up the head if needed. + if(cluster->pending_tasklets.head == step) + { + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.head = tasklet; + } + } + + /* + step = cluster->pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + // TODO: Maintain this list in priority order. + // tasklet->next = NULL; + // *(cluster->pending_tasklets.tail) = tasklet; + // cluster->pending_tasklets.tail = &tasklet->next; +} + +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet) +{ + crm_srt_domain_t *cluster = NULL; + cpu_entry_t *targetCPU = NULL; + int thisCPU; + int runLocal = 0; + int runNow = 0; + unsigned long flags; + + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + return 0; + } + + cluster = task_cpu_cluster(tasklet->owner); + + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); + + thisCPU = smp_processor_id(); + +#if 1 +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = NULL; + + // use this CPU if it is in our cluster and isn't running any RT work. + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(crm_srt_cpu_entries).linked == NULL)) { + affinity = &(__get_cpu_var(crm_srt_cpu_entries)); + } + else { + // this CPU is busy or shouldn't run tasklet in this cluster. + // look for available near by CPUs. + // NOTE: Affinity towards owner and not this CPU. Is this right? + affinity = + crm_srt_get_nearest_available_cpu(cluster, + &per_cpu(crm_srt_cpu_entries, task_cpu(tasklet->owner))); + } + + targetCPU = affinity; + } +#endif +#endif + + if (targetCPU == NULL) { + targetCPU = lowest_prio_cpu(cluster); + } + + if (rm_srt_higher_prio(tasklet->owner, targetCPU->linked)) { + if (thisCPU == targetCPU->cpu) { + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); + runLocal = 1; + runNow = 1; + } + else { + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); + runLocal = 0; + runNow = 1; + } + } + else { + runLocal = 0; + runNow = 0; + } + + if(!runLocal) { + // enqueue the tasklet + __add_pai_tasklet(tasklet, cluster); + } + + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); + + + if (runLocal /*&& runNow */) { // runNow == 1 is implied + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); + } + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); + preempt(targetCPU); // need to be protected by crm_srt_lock? + } + else { + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); + } + + return(1); // success +} + + +#endif + + + + + + + + + + + + + + + + + + + /* Getting schedule() right is a bit tricky. schedule() may not make any * assumptions on the state of the current task since it may be called for a * number of reasons. The reasons include a scheduler_tick() determined that it @@ -544,7 +972,7 @@ static void crm_srt_task_new(struct task_struct * t, int on_rq, int running) cpu_entry_t* entry; crm_srt_domain_t* cluster; - TRACE("gsn edf: task new %d\n", t->pid); + TRACE("crm srt: task new %d\n", t->pid); /* the cluster doesn't change even if t is running */ cluster = task_cpu_cluster(t); @@ -650,6 +1078,10 @@ static void crm_srt_task_exit(struct task_struct * t) } raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); } @@ -1467,6 +1899,11 @@ static long crm_srt_activate_plugin(void) bheap_init(&(crm_srt[i].cpu_heap)); rm_srt_domain_init(&(crm_srt[i].domain), NULL, crm_srt_release_jobs); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + crm_srt[i].pending_tasklets.head = NULL; + crm_srt[i].pending_tasklets.tail = &(crm_srt[i].pending_tasklets.head); +#endif + if(!zalloc_cpumask_var(&crm_srt[i].cpu_map, GFP_ATOMIC)) return -ENOMEM; } @@ -1578,6 +2015,10 @@ static struct sched_plugin crm_srt_plugin __cacheline_aligned_in_smp = { #ifdef CONFIG_LITMUS_SOFTIRQD .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = enqueue_pai_tasklet, + .run_tasklets = run_tasklets, #endif }; diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index ac7685fe69f0..b40ff7ba4f0e 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -35,6 +35,10 @@ #include #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#endif + #ifdef CONFIG_LITMUS_NVIDIA #include #endif @@ -126,6 +130,16 @@ static struct bheap gsnedf_cpu_heap; static rt_domain_t gsnedf; #define gsnedf_lock (gsnedf.ready_lock) +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; + +struct tasklet_head gsnedf_pending_tasklets; +#endif + /* Uncomment this if you want to see all scheduling decisions in the * TRACE() log. @@ -393,6 +407,410 @@ static void gsnedf_tick(struct task_struct* t) } } + + + + + + + + + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + + +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) +{ + if (!atomic_read(&tasklet->count)) { + sched_trace_tasklet_begin(tasklet->owner); + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + + sched_trace_tasklet_end(tasklet->owner, flushed); + } + else { + BUG(); + } +} + + +static void __extract_tasklets(struct task_struct* task, struct tasklet_head* task_tasklets) +{ + struct tasklet_struct* step; + struct tasklet_struct* tasklet; + struct tasklet_struct* prev; + + task_tasklets->head = NULL; + task_tasklets->tail = &(task_tasklets->head); + + prev = NULL; + for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) + { + if(step->owner == task) + { + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid); + + tasklet = step; + + if(prev) { + prev->next = tasklet->next; + } + else if(gsnedf_pending_tasklets.head == tasklet) { + // we're at the head. + gsnedf_pending_tasklets.head = tasklet->next; + } + + if(gsnedf_pending_tasklets.tail == &tasklet) { + // we're at the tail + if(prev) { + gsnedf_pending_tasklets.tail = &prev; + } + else { + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); + } + } + + tasklet->next = NULL; + *(task_tasklets->tail) = tasklet; + task_tasklets->tail = &(tasklet->next); + } + else { + prev = step; + } + } +} + +static void flush_tasklets(struct task_struct* task) +{ + unsigned long flags; + struct tasklet_head task_tasklets; + struct tasklet_struct* step; + + raw_spin_lock_irqsave(&gsnedf_lock, flags); + __extract_tasklets(task, &task_tasklets); + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); + + if(gsnedf_pending_tasklets.head != NULL) { + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid); + } + + // now execute any flushed tasklets. + for(step = gsnedf_pending_tasklets.head; step != NULL; /**/) + { + struct tasklet_struct* temp = step->next; + + step->next = NULL; + __do_lit_tasklet(step, 1ul); + + step = temp; + } +} + + +static void do_lit_tasklets(struct task_struct* sched_task) +{ + int work_to_do = 1; + struct tasklet_struct *tasklet = NULL; + //struct tasklet_struct *step; + unsigned long flags; + + while(work_to_do) { + // remove tasklet at head of list if it has higher priority. + raw_spin_lock_irqsave(&gsnedf_lock, flags); + + /* + step = gsnedf_pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + + if(gsnedf_pending_tasklets.head != NULL) { + // remove tasklet at head. + tasklet = gsnedf_pending_tasklets.head; + + if(edf_higher_prio(tasklet->owner, sched_task)) { + + if(NULL == tasklet->next) { + // tasklet is at the head, list only has one element + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); + } + + // remove the tasklet from the queue + gsnedf_pending_tasklets.head = tasklet->next; + + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + } + else { + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); + tasklet = NULL; + } + } + else { + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + } + + + /* + step = gsnedf_pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); + + if(tasklet) { + __do_lit_tasklet(tasklet, 0ul); + tasklet = NULL; + } + else { + work_to_do = 0; + } + } + + //TRACE("%s: exited.\n", __FUNCTION__); +} + + +static void run_tasklets(struct task_struct* sched_task) +{ +#if 0 + int task_is_rt = is_realtime(sched_task); + cedf_domain_t* cluster; + + if(is_realtime(sched_task)) { + cluster = task_cpu_cluster(sched_task); + } + else { + cluster = remote_cluster(get_cpu()); + } + + if(cluster && gsnedf_pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + + do_lit_tasklets(cluster, sched_task); + } + + if(!task_is_rt) { + put_cpu_no_resched(); + } +#else + + preempt_disable(); + + if(gsnedf_pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + do_lit_tasklets(sched_task); + } + + preempt_enable_no_resched(); + +#endif +} + + +static void __add_pai_tasklet(struct tasklet_struct* tasklet) +{ + struct tasklet_struct* step; + + /* + step = gsnedf_pending_tasklets.head; + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + + tasklet->next = NULL; // make sure there are no old values floating around + + step = gsnedf_pending_tasklets.head; + if(step == NULL) { + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); + // insert at tail. + *(gsnedf_pending_tasklets.tail) = tasklet; + gsnedf_pending_tasklets.tail = &(tasklet->next); + } + else if((*(gsnedf_pending_tasklets.tail) != NULL) && + edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) { + // insert at tail. + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); + + *(gsnedf_pending_tasklets.tail) = tasklet; + gsnedf_pending_tasklets.tail = &(tasklet->next); + } + else { + + //WARN_ON(1 == 1); + + // insert the tasklet somewhere in the middle. + + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); + + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { + step = step->next; + } + + // insert tasklet right before step->next. + + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + + tasklet->next = step->next; + step->next = tasklet; + + // patch up the head if needed. + if(gsnedf_pending_tasklets.head == step) + { + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); + gsnedf_pending_tasklets.head = tasklet; + } + } + + /* + step = gsnedf_pending_tasklets.head; + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); + while(step != NULL){ + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid); + step = step->next; + } + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1); + TRACE("%s: done.\n", __FUNCTION__); + */ + + // TODO: Maintain this list in priority order. + // tasklet->next = NULL; + // *(gsnedf_pending_tasklets.tail) = tasklet; + // gsnedf_pending_tasklets.tail = &tasklet->next; +} + +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet) +{ + cpu_entry_t *targetCPU = NULL; + int thisCPU; + int runLocal = 0; + int runNow = 0; + unsigned long flags; + + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + return 0; + } + + + raw_spin_lock_irqsave(&gsnedf_lock, flags); + + thisCPU = smp_processor_id(); + +#if 1 +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = NULL; + + // use this CPU if it is in our cluster and isn't running any RT work. + if( +#ifdef CONFIG_RELEASE_MASTER + (thisCPU != gsnedf.release_master) && +#endif + (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) { + affinity = &(__get_cpu_var(gsnedf_cpu_entries)); + } + else { + // this CPU is busy or shouldn't run tasklet in this cluster. + // look for available near by CPUs. + // NOTE: Affinity towards owner and not this CPU. Is this right? + affinity = + gsnedf_get_nearest_available_cpu( + &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner))); + } + + targetCPU = affinity; + } +#endif +#endif + + if (targetCPU == NULL) { + targetCPU = lowest_prio_cpu(); + } + + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) { + if (thisCPU == targetCPU->cpu) { + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); + runLocal = 1; + runNow = 1; + } + else { + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); + runLocal = 0; + runNow = 1; + } + } + else { + runLocal = 0; + runNow = 0; + } + + if(!runLocal) { + // enqueue the tasklet + __add_pai_tasklet(tasklet); + } + + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); + + + if (runLocal /*&& runNow */) { // runNow == 1 is implied + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); + } + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); + preempt(targetCPU); // need to be protected by cedf_lock? + } + else { + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); + } + + return(1); // success +} + + +#endif + + + + + + + + + + + + + /* Getting schedule() right is a bit tricky. schedule() may not make any * assumptions on the state of the current task since it may be called for a * number of reasons. The reasons include a scheduler_tick() determined that it @@ -592,7 +1010,7 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) static void gsnedf_task_wake_up(struct task_struct *task) { unsigned long flags; - lt_t now; + //lt_t now; TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); @@ -660,6 +1078,10 @@ static void gsnedf_task_exit(struct task_struct * t) } raw_spin_unlock_irqrestore(&gsnedf_lock, flags); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(t); +#endif + BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); } @@ -1602,6 +2024,11 @@ static long gsnedf_activate_plugin(void) } #endif } + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + gsnedf_pending_tasklets.head = NULL; + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); +#endif #ifdef CONFIG_LITMUS_SOFTIRQD spawn_klitirqd(NULL); @@ -1636,7 +2063,10 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, #endif - +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = enqueue_pai_tasklet, + .run_tasklets = run_tasklets, +#endif }; -- cgit v1.2.2 From f5264e2cb8213dad425cb2d2db564edbc443a51a Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Fri, 20 Jan 2012 11:09:15 -0500 Subject: Fix bugs in tracing and PAI handling --- include/litmus/sched_plugin.h | 2 -- include/litmus/trace.h | 6 ++++ litmus/sched_cedf.c | 71 +++++++++++++++++++++++++++++++++--------- litmus/sched_cfifo.c | 14 ++++++--- litmus/sched_crm.c | 72 ++++++++++++++++++++++++++++++++++--------- litmus/sched_crm_srt.c | 14 ++++++--- litmus/sched_gsn_edf.c | 15 ++++++--- 7 files changed, 151 insertions(+), 43 deletions(-) diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 3fc64f832fef..8fdf05dd7cd3 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -11,9 +11,7 @@ #include #endif -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include -#endif /************************ setup/tear down ********************/ diff --git a/include/litmus/trace.h b/include/litmus/trace.h index aa3ee4a6757b..09d409b60268 100644 --- a/include/litmus/trace.h +++ b/include/litmus/trace.h @@ -114,4 +114,10 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu) #endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#define TS_NV_SCHED_BOTISR_START TIMESTAMP(206) +#define TS_NV_SCHED_BOTISR_END TIMESTAMP(207) +#endif + + #endif /* !_SYS_TRACE_H_ */ diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 02106f455c0f..9aa5822c3834 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -57,6 +57,7 @@ #ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include +#include #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -432,17 +433,24 @@ static void cedf_tick(struct task_struct* t) static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) { if (!atomic_read(&tasklet->count)) { - sched_trace_tasklet_begin(tasklet->owner); + if(tasklet->owner) { + sched_trace_tasklet_begin(tasklet->owner); + } if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) { BUG(); } - TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed); + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", + __FUNCTION__, + (tasklet->owner) ? tasklet->owner->pid : -1, + (tasklet->owner) ? 0 : 1); tasklet->func(tasklet->data); tasklet_unlock(tasklet); - - sched_trace_tasklet_end(tasklet->owner, flushed); + + if(tasklet->owner) { + sched_trace_tasklet_end(tasklet->owner, flushed); + } } else { BUG(); @@ -498,6 +506,7 @@ static void __extract_tasklets(cedf_domain_t* cluster, struct task_struct* task, static void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task) { +#if 0 unsigned long flags; struct tasklet_head task_tasklets; struct tasklet_struct* step; @@ -520,6 +529,27 @@ static void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task) step = temp; } +#endif + + // lazy flushing. + // just change ownership to NULL and let an idle processor + // take care of it. :P + + struct tasklet_struct* step; + unsigned long flags; + + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); + + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) + { + if(step->owner == task) + { + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid); + step->owner = NULL; + } + } + + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); } @@ -531,6 +561,9 @@ static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_ta unsigned long flags; while(work_to_do) { + + TS_NV_SCHED_BOTISR_START; + // remove tasklet at head of list if it has higher priority. raw_spin_lock_irqsave(&cluster->cedf_lock, flags); @@ -544,7 +577,6 @@ static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_ta TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1); TRACE("%s: done.\n", __FUNCTION__); */ - if(cluster->pending_tasklets.head != NULL) { // remove tasklet at head. @@ -554,17 +586,17 @@ static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_ta if(NULL == tasklet->next) { // tasklet is at the head, list only has one element - TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1); cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); } // remove the tasklet from the queue cluster->pending_tasklets.head = tasklet->next; - TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1); } else { - TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1, smp_processor_id()); tasklet = NULL; } } @@ -584,9 +616,11 @@ static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_ta TRACE("%s: done.\n", __FUNCTION__); */ - raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); + + TS_NV_SCHED_BOTISR_END; + if(tasklet) { __do_lit_tasklet(tasklet, 0ul); tasklet = NULL; @@ -690,7 +724,16 @@ static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* clu // insert tasklet right before step->next. - TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, + tasklet->owner->pid, + (step->owner) ? + step->owner->pid : + -1, + (step->next) ? + ((step->next->owner) ? + step->next->owner->pid : + -1) : + -1); tasklet->next = step->next; step->next = tasklet; @@ -1080,6 +1123,10 @@ static void cedf_task_exit(struct task_struct * t) unsigned long flags; cedf_domain_t *cluster = task_cpu_cluster(t); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->cedf_lock, flags); unlink(t); @@ -1091,10 +1138,6 @@ static void cedf_task_exit(struct task_struct * t) } raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD - flush_tasklets(cluster, t); -#endif - BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); } diff --git a/litmus/sched_cfifo.c b/litmus/sched_cfifo.c index 689b2dbe5fae..7fbdec3f1d15 100644 --- a/litmus/sched_cfifo.c +++ b/litmus/sched_cfifo.c @@ -57,6 +57,7 @@ #ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include +#include #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -530,6 +531,9 @@ static void do_lit_tasklets(cfifo_domain_t* cluster, struct task_struct* sched_t unsigned long flags; while(work_to_do) { + + TS_NV_SCHED_BOTISR_START; + // remove tasklet at head of list if it has higher priority. raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); @@ -584,6 +588,8 @@ static void do_lit_tasklets(cfifo_domain_t* cluster, struct task_struct* sched_t raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); + TS_NV_SCHED_BOTISR_END; + if(tasklet) { __do_lit_tasklet(tasklet, 0ul); tasklet = NULL; @@ -1071,6 +1077,10 @@ static void cfifo_task_exit(struct task_struct * t) unsigned long flags; cfifo_domain_t *cluster = task_cpu_cluster(t); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); unlink(t); @@ -1081,10 +1091,6 @@ static void cfifo_task_exit(struct task_struct * t) tsk_rt(t)->scheduled_on = NO_CPU; } raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); - -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD - flush_tasklets(cluster, t); -#endif BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c index fd7fab982998..e51de10557f9 100644 --- a/litmus/sched_crm.c +++ b/litmus/sched_crm.c @@ -57,6 +57,7 @@ #ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include +#include #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -425,17 +426,24 @@ static void crm_tick(struct task_struct* t) static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) { if (!atomic_read(&tasklet->count)) { - sched_trace_tasklet_begin(tasklet->owner); + if(tasklet->owner) { + sched_trace_tasklet_begin(tasklet->owner); + } if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) { BUG(); } - TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed); + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", + __FUNCTION__, + (tasklet->owner) ? tasklet->owner->pid : -1, + (tasklet->owner) ? 0 : 1); tasklet->func(tasklet->data); tasklet_unlock(tasklet); - sched_trace_tasklet_end(tasklet->owner, flushed); + if(tasklet->owner) { + sched_trace_tasklet_end(tasklet->owner, flushed); + } } else { BUG(); @@ -491,6 +499,7 @@ static void __extract_tasklets(crm_domain_t* cluster, struct task_struct* task, static void flush_tasklets(crm_domain_t* cluster, struct task_struct* task) { +#if 0 unsigned long flags; struct tasklet_head task_tasklets; struct tasklet_struct* step; @@ -513,6 +522,27 @@ static void flush_tasklets(crm_domain_t* cluster, struct task_struct* task) step = temp; } +#endif + + // lazy flushing. + // just change ownership to NULL and let an idle processor + // take care of it. :P + + struct tasklet_struct* step; + unsigned long flags; + + raw_spin_lock_irqsave(&cluster->crm_lock, flags); + + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) + { + if(step->owner == task) + { + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid); + step->owner = NULL; + } + } + + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); } @@ -524,6 +554,9 @@ static void do_lit_tasklets(crm_domain_t* cluster, struct task_struct* sched_tas unsigned long flags; while(work_to_do) { + + TS_NV_SCHED_BOTISR_START; + // remove tasklet at head of list if it has higher priority. raw_spin_lock_irqsave(&cluster->crm_lock, flags); @@ -546,17 +579,17 @@ static void do_lit_tasklets(crm_domain_t* cluster, struct task_struct* sched_tas if(NULL == tasklet->next) { // tasklet is at the head, list only has one element - TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1); cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); } // remove the tasklet from the queue cluster->pending_tasklets.head = tasklet->next; - TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1); } else { - TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1, smp_processor_id()); tasklet = NULL; } } @@ -577,6 +610,8 @@ static void do_lit_tasklets(crm_domain_t* cluster, struct task_struct* sched_tas raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); + TS_NV_SCHED_BOTISR_END; + if(tasklet) { __do_lit_tasklet(tasklet, 0ul); tasklet = NULL; @@ -619,8 +654,8 @@ static void run_tasklets(struct task_struct* sched_task) preempt_disable(); cluster = (is_realtime(sched_task)) ? - task_cpu_cluster(sched_task) : - remote_cluster(smp_processor_id()); + task_cpu_cluster(sched_task) : + remote_cluster(smp_processor_id()); if(cluster && cluster->pending_tasklets.head != NULL) { TRACE("%s: There are tasklets to process.\n", __FUNCTION__); @@ -679,8 +714,17 @@ static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_domain_t* clus // insert tasklet right before step->next. - TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); - + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, + tasklet->owner->pid, + (step->owner) ? + step->owner->pid : + -1, + (step->next) ? + ((step->next->owner) ? + step->next->owner->pid : + -1) : + -1); + tasklet->next = step->next; step->next = tasklet; @@ -1070,6 +1114,10 @@ static void crm_task_exit(struct task_struct * t) unsigned long flags; crm_domain_t *cluster = task_cpu_cluster(t); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->crm_lock, flags); unlink(t); @@ -1080,10 +1128,6 @@ static void crm_task_exit(struct task_struct * t) tsk_rt(t)->scheduled_on = NO_CPU; } raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); - -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD - flush_tasklets(cluster, t); -#endif BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); diff --git a/litmus/sched_crm_srt.c b/litmus/sched_crm_srt.c index c0004354573d..f0064d486953 100644 --- a/litmus/sched_crm_srt.c +++ b/litmus/sched_crm_srt.c @@ -57,6 +57,7 @@ #ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include +#include #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -529,6 +530,9 @@ static void do_lit_tasklets(crm_srt_domain_t* cluster, struct task_struct* sched unsigned long flags; while(work_to_do) { + + TS_NV_SCHED_BOTISR_START; + // remove tasklet at head of list if it has higher priority. raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); @@ -582,6 +586,8 @@ static void do_lit_tasklets(crm_srt_domain_t* cluster, struct task_struct* sched raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); + TS_NV_SCHED_BOTISR_END; + if(tasklet) { __do_lit_tasklet(tasklet, 0ul); tasklet = NULL; @@ -1067,6 +1073,10 @@ static void crm_srt_task_exit(struct task_struct * t) unsigned long flags; crm_srt_domain_t *cluster = task_cpu_cluster(t); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(cluster, t); +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); unlink(t); @@ -1077,10 +1087,6 @@ static void crm_srt_task_exit(struct task_struct * t) tsk_rt(t)->scheduled_on = NO_CPU; } raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); - -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD - flush_tasklets(cluster, t); -#endif BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index b40ff7ba4f0e..30c745fe33a7 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -37,6 +37,7 @@ #ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include +#include #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -523,6 +524,9 @@ static void do_lit_tasklets(struct task_struct* sched_task) unsigned long flags; while(work_to_do) { + + TS_NV_SCHED_BOTISR_START; + // remove tasklet at head of list if it has higher priority. raw_spin_lock_irqsave(&gsnedf_lock, flags); @@ -576,9 +580,10 @@ static void do_lit_tasklets(struct task_struct* sched_task) TRACE("%s: done.\n", __FUNCTION__); */ - raw_spin_unlock_irqrestore(&gsnedf_lock, flags); + TS_NV_SCHED_BOTISR_END; + if(tasklet) { __do_lit_tasklet(tasklet, 0ul); tasklet = NULL; @@ -1069,6 +1074,10 @@ static void gsnedf_task_exit(struct task_struct * t) { unsigned long flags; +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + flush_tasklets(t); +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); unlink(t); @@ -1077,10 +1086,6 @@ static void gsnedf_task_exit(struct task_struct * t) tsk_rt(t)->scheduled_on = NO_CPU; } raw_spin_unlock_irqrestore(&gsnedf_lock, flags); - -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD - flush_tasklets(t); -#endif BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); -- cgit v1.2.2 From 3d1c6d44d3f133909d1c594351c2b7c779b1d7d4 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 4 Mar 2012 16:09:04 -0500 Subject: Some cleanup of PAI --- arch/x86/kernel/irq.c | 10 ---------- kernel/softirq.c | 1 + kernel/workqueue.c | 3 ++- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 50abbc6b7429..433cd154333c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -248,17 +248,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } -//#ifndef CONFIG_LITMUS_NVIDIA irq_exit(); -//#else - /* skip softirqs if we're tracing an interrupt top-half */ - /* comment out if-statement if we want to trace with bh on. */ - //if(!is_interrupt_tracing_active()) -// irq_exit(); - - -// sched_trace_nv_interrupt_end(); -//#endif set_irq_regs(old_regs); return 1; diff --git a/kernel/softirq.c b/kernel/softirq.c index d3217c54d2bf..7a6f500570f1 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -439,6 +439,7 @@ void __tasklet_schedule(struct tasklet_struct *t) t->owner = device_owner; sched_trace_tasklet_release(t->owner); + if(likely(_litmus_tasklet_schedule(t,nvidia_device))) { unlock_nv_registry(nvidia_device, &flags); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 637cadac2627..2293aadbb1ab 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2531,6 +2531,7 @@ EXPORT_SYMBOL(cancel_delayed_work_sync); */ int schedule_work(struct work_struct *work) { +#if 0 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) if(is_nvidia_func(work->func)) { @@ -2583,7 +2584,7 @@ int schedule_work(struct work_struct *work) unlock_nv_registry(nvidiaDevice, &flags); } #endif - +#endif return(__schedule_work(work)); } EXPORT_SYMBOL(schedule_work); -- cgit v1.2.2