From a463f9a9e04385f0729f7435a0a6dff7d89b25de Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sat, 26 May 2012 17:29:58 -0400 Subject: GPUSync patch for Litmus 2012.1. --- arch/x86/kernel/irq.c | 4 + arch/x86/kernel/syscall_table_32.S | 1 + include/linux/completion.h | 1 + include/linux/interrupt.h | 10 +- include/linux/mutex.h | 10 + include/linux/semaphore.h | 9 + include/linux/workqueue.h | 18 + include/litmus/binheap.h | 207 +++ include/litmus/edf_common.h | 12 + include/litmus/fdso.h | 14 +- include/litmus/fpmath.h | 145 ++ include/litmus/gpu_affinity.h | 49 + include/litmus/ikglp_lock.h | 160 ++ include/litmus/kexclu_affinity.h | 35 + include/litmus/kfmlp_lock.h | 97 ++ include/litmus/litmus.h | 9 +- include/litmus/litmus_softirq.h | 199 +++ include/litmus/locking.h | 142 +- include/litmus/nvidia_info.h | 46 + include/litmus/preempt.h | 2 +- include/litmus/rsm_lock.h | 54 + include/litmus/rt_param.h | 100 +- include/litmus/sched_plugin.h | 76 +- include/litmus/sched_trace.h | 218 ++- include/litmus/sched_trace_external.h | 78 + include/litmus/trace.h | 34 +- include/litmus/unistd_32.h | 5 +- include/litmus/unistd_64.h | 9 +- kernel/lockdep.c | 7 +- kernel/mutex.c | 125 ++ kernel/sched.c | 27 + kernel/semaphore.c | 13 +- kernel/softirq.c | 322 +++- kernel/workqueue.c | 71 +- litmus/Kconfig | 148 +- litmus/Makefile | 11 +- litmus/affinity.c | 2 +- litmus/binheap.c | 443 +++++ litmus/edf_common.c | 147 +- litmus/fdso.c | 13 + litmus/gpu_affinity.c | 113 ++ litmus/ikglp_lock.c | 2838 +++++++++++++++++++++++++++++++++ litmus/jobs.c | 17 +- litmus/kexclu_affinity.c | 92 ++ litmus/kfmlp_lock.c | 1002 ++++++++++++ litmus/litmus.c | 126 +- litmus/litmus_pai_softirq.c | 64 + litmus/litmus_proc.c | 17 + litmus/litmus_softirq.c | 1582 ++++++++++++++++++ litmus/locking.c | 393 ++++- litmus/nvidia_info.c | 597 +++++++ litmus/preempt.c | 5 + litmus/rsm_lock.c | 796 +++++++++ litmus/sched_cedf.c | 1062 +++++++++++- litmus/sched_gsn_edf.c | 1032 ++++++++++-- litmus/sched_litmus.c | 2 + litmus/sched_plugin.c | 135 +- litmus/sched_task_trace.c | 282 +++- litmus/sched_trace_external.c | 64 + 59 files changed, 13012 insertions(+), 280 deletions(-) create mode 100644 include/litmus/binheap.h create mode 100644 include/litmus/fpmath.h create mode 100644 include/litmus/gpu_affinity.h create mode 100644 include/litmus/ikglp_lock.h create mode 100644 include/litmus/kexclu_affinity.h create mode 100644 include/litmus/kfmlp_lock.h create mode 100644 include/litmus/litmus_softirq.h create mode 100644 include/litmus/nvidia_info.h create mode 100644 include/litmus/rsm_lock.h create mode 100644 include/litmus/sched_trace_external.h create mode 100644 litmus/binheap.c create mode 100644 litmus/gpu_affinity.c create mode 100644 litmus/ikglp_lock.c create mode 100644 litmus/kexclu_affinity.c create mode 100644 litmus/kfmlp_lock.c create mode 100644 litmus/litmus_pai_softirq.c create mode 100644 litmus/litmus_softirq.c create mode 100644 litmus/nvidia_info.c create mode 100644 litmus/rsm_lock.c create mode 100644 litmus/sched_trace_external.c diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 6c0802eb2f7f..680a5cb4b585 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -10,6 +10,10 @@ #include #include +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + #include #include #include diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d0126222b394..0cb4373698e7 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -358,3 +358,4 @@ ENTRY(sys_call_table) .long sys_wait_for_ts_release .long sys_release_ts /* +10 */ .long sys_null_call + .long sys_register_nv_device diff --git a/include/linux/completion.h b/include/linux/completion.h index 9d727271c9fe..cff405c4dd3a 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x) init_waitqueue_head(&x->wait); } +extern void __wait_for_completion_locked(struct completion *); extern void wait_for_completion(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f6efed0039ed..57a7bc8807be 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -445,6 +445,7 @@ static inline void __raise_softirq_irqoff(unsigned int nr) extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +extern void wakeup_softirqd(void); /* This is the worklist that queues up per-cpu softirq work. * @@ -500,6 +501,10 @@ struct tasklet_struct atomic_t count; void (*func)(unsigned long); unsigned long data; + +#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD) + struct task_struct *owner; +#endif }; #define DECLARE_TASKLET(name, func, data) \ @@ -537,6 +542,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) #define tasklet_unlock(t) do { } while (0) #endif +extern void ___tasklet_schedule(struct tasklet_struct *t); extern void __tasklet_schedule(struct tasklet_struct *t); static inline void tasklet_schedule(struct tasklet_struct *t) @@ -545,6 +551,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t) __tasklet_schedule(t); } +extern void ___tasklet_hi_schedule(struct tasklet_struct *t); extern void __tasklet_hi_schedule(struct tasklet_struct *t); static inline void tasklet_hi_schedule(struct tasklet_struct *t) @@ -553,6 +560,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) __tasklet_hi_schedule(t); } +extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t); extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); /* @@ -582,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) } static inline void tasklet_enable(struct tasklet_struct *t) -{ +{ smp_mb__before_atomic_dec(); atomic_dec(&t->count); } diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a940fe435aca..cb47debbf24d 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock) return atomic_read(&lock->count) != 1; } +/* return non-zero to abort. only pre-side-effects may abort */ +typedef int (*side_effect_t)(unsigned long); +extern void mutex_lock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg); +extern void mutex_unlock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg); + /* * See kernel/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. @@ -153,6 +162,7 @@ extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); extern int __must_check mutex_lock_killable(struct mutex *lock); + # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 39fa04966aa8..c83fc2b65f01 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -43,4 +43,13 @@ extern int __must_check down_trylock(struct semaphore *sem); extern int __must_check down_timeout(struct semaphore *sem, long jiffies); extern void up(struct semaphore *sem); +extern void __down(struct semaphore *sem); +extern void __up(struct semaphore *sem); + +struct semaphore_waiter { + struct list_head list; + struct task_struct *task; + int up; +}; + #endif /* __LINUX_SEMAPHORE_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f584aba78ca9..1ec2ec7d4e3b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -83,6 +83,9 @@ struct work_struct { #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif +#ifdef CONFIG_LITMUS_SOFTIRQD + struct task_struct *owner; +#endif }; #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) @@ -115,11 +118,25 @@ struct execute_work { #define __WORK_INIT_LOCKDEP_MAP(n, k) #endif +#ifdef CONFIG_LITMUS_SOFTIRQD +#define __WORK_INIT_OWNER() \ + .owner = NULL, + +#define PREPARE_OWNER(_work, _owner) \ + do { \ + (_work)->owner = (_owner); \ + } while(0) +#else +#define __WORK_INIT_OWNER() +#define PREPARE_OWNER(_work, _owner) +#endif + #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_STATIC_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ + __WORK_INIT_OWNER() \ } #define __DELAYED_WORK_INITIALIZER(n, f) { \ @@ -357,6 +374,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); +extern int __schedule_work(struct work_struct *work); extern int schedule_work(struct work_struct *work); extern int schedule_work_on(int cpu, struct work_struct *work); extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h new file mode 100644 index 000000000000..9e966e3886cb --- /dev/null +++ b/include/litmus/binheap.h @@ -0,0 +1,207 @@ +#ifndef LITMUS_BINARY_HEAP_H +#define LITMUS_BINARY_HEAP_H + +#include + +/** + * Simple binary heap with add, arbitrary delete, delete_root, and top + * operations. + * + * Style meant to conform with list.h. + * + * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial + * heap may be overkill (and perhaps not general enough) for some applications. + * + * Note: In order to make node swaps fast, a node inserted with a data pointer + * may not always hold said data pointer. This is similar to the binomial heap + * implementation. This does make node deletion tricky since we have to + * (1) locate the node that holds the data pointer to delete, and (2) the + * node that was originally inserted with said data pointer. These have to be + * coalesced into a single node before removal (see usage of + * __binheap_safe_swap()). We have to track node references to accomplish this. + */ + +struct binheap_node { + void *data; + struct binheap_node *parent; + struct binheap_node *left; + struct binheap_node *right; + + /* pointer to binheap_node that holds *data for which this binheap_node + * was originally inserted. (*data "owns" this node) + */ + struct binheap_node *ref; + struct binheap_node **ref_ptr; +}; + +/** + * Signature of compator function. Assumed 'less-than' (min-heap). + * Pass in 'greater-than' for max-heap. + * + * TODO: Consider macro-based implementation that allows comparator to be + * inlined (similar to Linux red/black tree) for greater efficiency. + */ +typedef int (*binheap_order_t)(struct binheap_node *a, + struct binheap_node *b); + + +struct binheap_handle { + struct binheap_node *root; + + /* pointer to node to take next inserted child */ + struct binheap_node *next; + + /* pointer to last node in complete binary tree */ + struct binheap_node *last; + + /* comparator function pointer */ + binheap_order_t compare; +}; + + +#define BINHEAP_POISON ((void*)(0xdeadbeef)) + + +/** + * binheap_entry - get the struct for this heap node. + * Only valid when called upon heap nodes other than the root handle. + * @ptr: the heap node. + * @type: the type of struct pointed to by binheap_node::data. + * @member: unused. + */ +#define binheap_entry(ptr, type, member) \ +((type *)((ptr)->data)) + +/** + * binheap_node_container - get the struct that contains this node. + * Only valid when called upon heap nodes other than the root handle. + * @ptr: the heap node. + * @type: the type of struct the node is embedded in. + * @member: the name of the binheap_struct within the (type) struct. + */ +#define binheap_node_container(ptr, type, member) \ +container_of((ptr), type, member) + +/** + * binheap_top_entry - get the struct for the node at the top of the heap. + * Only valid when called upon the heap handle node. + * @ptr: the special heap-handle node. + * @type: the type of the struct the head is embedded in. + * @member: the name of the binheap_struct within the (type) struct. + */ +#define binheap_top_entry(ptr, type, member) \ +binheap_entry((ptr)->root, type, member) + +/** + * binheap_delete_root - remove the root element from the heap. + * @handle: handle to the heap. + * @type: the type of the struct the head is embedded in. + * @member: the name of the binheap_struct within the (type) struct. + */ +#define binheap_delete_root(handle, type, member) \ +__binheap_delete_root((handle), &((type *)((handle)->root->data))->member) + +/** + * binheap_delete - remove an arbitrary element from the heap. + * @to_delete: pointer to node to be removed. + * @handle: handle to the heap. + */ +#define binheap_delete(to_delete, handle) \ +__binheap_delete((to_delete), (handle)) + +/** + * binheap_add - insert an element to the heap + * new_node: node to add. + * @handle: handle to the heap. + * @type: the type of the struct the head is embedded in. + * @member: the name of the binheap_struct within the (type) struct. + */ +#define binheap_add(new_node, handle, type, member) \ +__binheap_add((new_node), (handle), container_of((new_node), type, member)) + +/** + * binheap_decrease - re-eval the position of a node (based upon its + * original data pointer). + * @handle: handle to the heap. + * @orig_node: node that was associated with the data pointer + * (whose value has changed) when said pointer was + * added to the heap. + */ +#define binheap_decrease(orig_node, handle) \ +__binheap_decrease((orig_node), (handle)) + +#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL} + +#define BINHEAP_NODE(name) \ + struct binheap_node name = BINHEAP_NODE_INIT() + + +static inline void INIT_BINHEAP_NODE(struct binheap_node *n) +{ + n->data = NULL; + n->parent = BINHEAP_POISON; + n->left = NULL; + n->right = NULL; + n->ref = NULL; + n->ref_ptr = NULL; +} + +static inline void INIT_BINHEAP_HANDLE( + struct binheap_handle *handle, + binheap_order_t compare) +{ + handle->root = NULL; + handle->next = NULL; + handle->last = NULL; + handle->compare = compare; +} + +/* Returns true (1) if binheap is empty. */ +static inline int binheap_empty(struct binheap_handle *handle) +{ + return(handle->root == NULL); +} + +/* Returns true (1) if binheap node is in a heap. */ +static inline int binheap_is_in_heap(struct binheap_node *node) +{ + return (node->parent != BINHEAP_POISON); +} + + +int binheap_is_in_this_heap(struct binheap_node *node, struct binheap_handle* heap); + + + +void __binheap_add(struct binheap_node *new_node, + struct binheap_handle *handle, + void *data); + + +/** + * Removes the root node from the heap. The node is removed after coalescing + * the binheap_node with its original data pointer at the root of the tree. + * + * The 'last' node in the tree is then swapped up to the root and bubbled + * down. + */ +void __binheap_delete_root(struct binheap_handle *handle, + struct binheap_node *container); + +/** + * Delete an arbitrary node. Bubble node to delete up to the root, + * and then delete to root. + */ +void __binheap_delete( + struct binheap_node *node_to_delete, + struct binheap_handle *handle); + +/** + * Bubble up a node whose pointer has decreased in value. + */ +void __binheap_decrease(struct binheap_node *orig_node, + struct binheap_handle *handle); + + +#endif + diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h index bbaf22ea7f12..63dff7efe8fb 100644 --- a/include/litmus/edf_common.h +++ b/include/litmus/edf_common.h @@ -20,6 +20,18 @@ int edf_higher_prio(struct task_struct* first, int edf_ready_order(struct bheap_node* a, struct bheap_node* b); +#ifdef CONFIG_LITMUS_NESTED_LOCKING +/* binheap_nodes must be embedded within 'struct litmus_lock' */ +int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b); +int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b); +int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b); +int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b); + +int __edf_higher_prio(struct task_struct* first, comparison_mode_t first_mode, + struct task_struct* second, comparison_mode_t second_mode); + +#endif + int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); #endif diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h index caf2a1e6918c..1f5d3bd1a1db 100644 --- a/include/litmus/fdso.h +++ b/include/litmus/fdso.h @@ -20,7 +20,16 @@ typedef enum { FMLP_SEM = 0, SRP_SEM = 1, - MAX_OBJ_TYPE = 1 + RSM_MUTEX = 2, + IKGLP_SEM = 3, + KFMLP_SEM = 4, + + IKGLP_SIMPLE_GPU_AFF_OBS = 5, + IKGLP_GPU_AFF_OBS = 6, + KFMLP_SIMPLE_GPU_AFF_OBS = 7, + KFMLP_GPU_AFF_OBS = 8, + + MAX_OBJ_TYPE = 8 } obj_type_t; struct inode_obj_id { @@ -64,8 +73,11 @@ static inline void* od_lookup(int od, obj_type_t type) } #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) +#define lookup_kfmlp_sem(od)((struct pi_semaphore*) od_lookup(od, KFMLP_SEM)) #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) +#define lookup_rsm_mutex(od)((struct litmus_lock*) od_lookup(od, FMLP_SEM)) + #endif diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h new file mode 100644 index 000000000000..04d4bcaeae96 --- /dev/null +++ b/include/litmus/fpmath.h @@ -0,0 +1,145 @@ +#ifndef __FP_MATH_H__ +#define __FP_MATH_H__ + +#ifndef __KERNEL__ +#include +#define abs(x) (((x) < 0) ? -(x) : x) +#endif + +// Use 64-bit because we want to track things at the nanosecond scale. +// This can lead to very large numbers. +typedef int64_t fpbuf_t; +typedef struct +{ + fpbuf_t val; +} fp_t; + +#define FP_SHIFT 10 +#define ROUND_BIT (FP_SHIFT - 1) + +#define _fp(x) ((fp_t) {x}) + +#ifdef __KERNEL__ +static const fp_t LITMUS_FP_ZERO = {.val = 0}; +static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)}; +#endif + +static inline fp_t FP(fpbuf_t x) +{ + return _fp(((fpbuf_t) x) << FP_SHIFT); +} + +/* divide two integers to obtain a fixed point value */ +static inline fp_t _frac(fpbuf_t a, fpbuf_t b) +{ + return _fp(FP(a).val / (b)); +} + +static inline fpbuf_t _point(fp_t x) +{ + return (x.val % (1 << FP_SHIFT)); + +} + +#define fp2str(x) x.val +/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */ +#define _FP_ "%ld/1024" + +static inline fpbuf_t _floor(fp_t x) +{ + return x.val >> FP_SHIFT; +} + +/* FIXME: negative rounding */ +static inline fpbuf_t _round(fp_t x) +{ + return _floor(x) + ((x.val >> ROUND_BIT) & 1); +} + +/* multiply two fixed point values */ +static inline fp_t _mul(fp_t a, fp_t b) +{ + return _fp((a.val * b.val) >> FP_SHIFT); +} + +static inline fp_t _div(fp_t a, fp_t b) +{ +#if !defined(__KERNEL__) && !defined(unlikely) +#define unlikely(x) (x) +#define DO_UNDEF_UNLIKELY +#endif + /* try not to overflow */ + if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) )) + return _fp((a.val / b.val) << FP_SHIFT); + else + return _fp((a.val << FP_SHIFT) / b.val); +#ifdef DO_UNDEF_UNLIKELY +#undef unlikely +#undef DO_UNDEF_UNLIKELY +#endif +} + +static inline fp_t _add(fp_t a, fp_t b) +{ + return _fp(a.val + b.val); +} + +static inline fp_t _sub(fp_t a, fp_t b) +{ + return _fp(a.val - b.val); +} + +static inline fp_t _neg(fp_t x) +{ + return _fp(-x.val); +} + +static inline fp_t _abs(fp_t x) +{ + return _fp(abs(x.val)); +} + +/* works the same as casting float/double to integer */ +static inline fpbuf_t _fp_to_integer(fp_t x) +{ + return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1); +} + +static inline fp_t _integer_to_fp(fpbuf_t x) +{ + return _frac(x,1); +} + +static inline int _leq(fp_t a, fp_t b) +{ + return a.val <= b.val; +} + +static inline int _geq(fp_t a, fp_t b) +{ + return a.val >= b.val; +} + +static inline int _lt(fp_t a, fp_t b) +{ + return a.val < b.val; +} + +static inline int _gt(fp_t a, fp_t b) +{ + return a.val > b.val; +} + +static inline int _eq(fp_t a, fp_t b) +{ + return a.val == b.val; +} + +static inline fp_t _max(fp_t a, fp_t b) +{ + if (a.val < b.val) + return b; + else + return a; +} +#endif diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h new file mode 100644 index 000000000000..6b3fb8b28745 --- /dev/null +++ b/include/litmus/gpu_affinity.h @@ -0,0 +1,49 @@ +#ifndef LITMUS_GPU_AFFINITY_H +#define LITMUS_GPU_AFFINITY_H + +#include +#include +#include + +void update_gpu_estimate(struct task_struct* t, lt_t observed); +gpu_migration_dist_t gpu_migration_distance(int a, int b); + +static inline void reset_gpu_tracker(struct task_struct* t) +{ + t->rt_param.accum_gpu_time = 0; +} + +static inline void start_gpu_tracker(struct task_struct* t) +{ + t->rt_param.gpu_time_stamp = litmus_clock(); +} + +static inline void stop_gpu_tracker(struct task_struct* t) +{ + lt_t now = litmus_clock(); + t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp); +} + +static inline lt_t get_gpu_time(struct task_struct* t) +{ + return t->rt_param.accum_gpu_time; +} + +static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) +{ + int i; + fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); + lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... + + WARN_ON(temp < 0); + + // lower-bound a distant migration to be at least equal to the level + // below it. + for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { + val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); + } + + return ((val > 0) ? val : dist+1); +} + +#endif diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h new file mode 100644 index 000000000000..af6f15178cb1 --- /dev/null +++ b/include/litmus/ikglp_lock.h @@ -0,0 +1,160 @@ +#ifndef LITMUS_IKGLP_H +#define LITMUS_IKGLP_H + +#include +#include +#include + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +#include + +struct ikglp_affinity; +#endif + +typedef struct ikglp_heap_node +{ + struct task_struct *task; + struct binheap_node node; +} ikglp_heap_node_t; + +struct fifo_queue; +struct ikglp_wait_state; + +typedef struct ikglp_donee_heap_node +{ + struct task_struct *task; + struct fifo_queue *fq; + struct ikglp_wait_state *donor_info; // cross-linked with ikglp_wait_state_t of donor + + struct binheap_node node; +} ikglp_donee_heap_node_t; + +// Maintains the state of a request as it goes through the IKGLP +typedef struct ikglp_wait_state { + struct task_struct *task; // pointer back to the requesting task + + // Data for while waiting in FIFO Queue + wait_queue_t fq_node; + ikglp_heap_node_t global_heap_node; + ikglp_donee_heap_node_t donee_heap_node; + + // Data for while waiting in PQ + ikglp_heap_node_t pq_node; + + // Data for while waiting as a donor + ikglp_donee_heap_node_t *donee_info; // cross-linked with donee's ikglp_donee_heap_node_t + struct nested_info prio_donation; + struct binheap_node node; +} ikglp_wait_state_t; + +/* struct for semaphore with priority inheritance */ +struct fifo_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + + // used for bookkeepping + ikglp_heap_node_t global_heap_node; + ikglp_donee_heap_node_t donee_heap_node; + + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ + + struct nested_info nest; +}; + +struct ikglp_semaphore +{ + struct litmus_lock litmus_lock; + + raw_spinlock_t lock; + raw_spinlock_t real_lock; + + int nr_replicas; // AKA k + int m; + + int max_fifo_len; // max len of a fifo queue + int nr_in_fifos; + + struct binheap_handle top_m; // min heap, base prio + int top_m_size; // number of nodes in top_m + + struct binheap_handle not_top_m; // max heap, base prio + + struct binheap_handle donees; // min-heap, base prio + struct fifo_queue *shortest_fifo_queue; // pointer to shortest fifo queue + + /* data structures for holding requests */ + struct fifo_queue *fifo_queues; // array nr_replicas in length + struct binheap_handle priority_queue; // max-heap, base prio + struct binheap_handle donors; // max-heap, base prio + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + struct ikglp_affinity *aff_obs; +#endif +}; + +static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct ikglp_semaphore, litmus_lock); +} + +int ikglp_lock(struct litmus_lock* l); +int ikglp_unlock(struct litmus_lock* l); +int ikglp_close(struct litmus_lock* l); +void ikglp_free(struct litmus_lock* l); +struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg); + + + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + +struct ikglp_queue_info +{ + struct fifo_queue* q; + lt_t estimated_len; + int *nr_cur_users; +}; + +struct ikglp_affinity_ops +{ + struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO + ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select steal from FIFO + ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff, struct task_struct* t); // select a donee + ikglp_wait_state_t* (*advise_donor_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select a donor to move to PQ + + void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue + void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue + void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired + void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed + int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding) +}; + +struct ikglp_affinity +{ + struct affinity_observer obs; + struct ikglp_affinity_ops *ops; + struct ikglp_queue_info *q_info; + int *nr_cur_users_on_rsrc; + int offset; + int nr_simult; + int nr_rsrc; + int relax_max_fifo_len; +}; + +static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) +{ + return container_of(aff_obs, struct ikglp_affinity, obs); +} + +int ikglp_aff_obs_close(struct affinity_observer*); +void ikglp_aff_obs_free(struct affinity_observer*); +struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*, + void* __user arg); +struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, + void* __user arg); +#endif + + + +#endif diff --git a/include/litmus/kexclu_affinity.h b/include/litmus/kexclu_affinity.h new file mode 100644 index 000000000000..f6355de49074 --- /dev/null +++ b/include/litmus/kexclu_affinity.h @@ -0,0 +1,35 @@ +#ifndef LITMUS_AFF_OBS_H +#define LITMUS_AFF_OBS_H + +#include + +struct affinity_observer_ops; + +struct affinity_observer +{ + struct affinity_observer_ops* ops; + int type; + int ident; + + struct litmus_lock* lock; // the lock under observation +}; + +typedef int (*aff_obs_open_t)(struct affinity_observer* aff_obs, + void* __user arg); +typedef int (*aff_obs_close_t)(struct affinity_observer* aff_obs); +typedef void (*aff_obs_free_t)(struct affinity_observer* aff_obs); + +struct affinity_observer_ops +{ + aff_obs_open_t open; + aff_obs_close_t close; + aff_obs_free_t deallocate; +}; + +struct litmus_lock* get_lock_from_od(int od); + +void affinity_observer_new(struct affinity_observer* aff, + struct affinity_observer_ops* ops, + struct affinity_observer_args* args); + +#endif diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h new file mode 100644 index 000000000000..5f0aae6e6f42 --- /dev/null +++ b/include/litmus/kfmlp_lock.h @@ -0,0 +1,97 @@ +#ifndef LITMUS_KFMLP_H +#define LITMUS_KFMLP_H + +#include +#include + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +#include + +struct kfmlp_affinity; +#endif + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + struct kfmlp_affinity *aff_obs; +#endif +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +int kfmlp_lock(struct litmus_lock* l); +int kfmlp_unlock(struct litmus_lock* l); +int kfmlp_close(struct litmus_lock* l); +void kfmlp_free(struct litmus_lock* l); +struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg); + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + +struct kfmlp_queue_info +{ + struct kfmlp_queue* q; + lt_t estimated_len; + int *nr_cur_users; +}; + +struct kfmlp_affinity_ops +{ + struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t); + struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from); + void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); + void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); + void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); + void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); + int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq); +}; + +struct kfmlp_affinity +{ + struct affinity_observer obs; + struct kfmlp_affinity_ops *ops; + struct kfmlp_queue_info *q_info; + int *nr_cur_users_on_rsrc; + int offset; + int nr_simult; + int nr_rsrc; +}; + +static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) +{ + return container_of(aff_obs, struct kfmlp_affinity, obs); +} + +int kfmlp_aff_obs_close(struct affinity_observer*); +void kfmlp_aff_obs_free(struct affinity_observer*); +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*, + void* __user arg); +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, + void* __user arg); + + +#endif + +#endif + + diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 0b071fd359f9..71df378236f5 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list) ); } + struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); #define NO_CPU 0xffffffff @@ -53,12 +54,16 @@ void litmus_exit_task(struct task_struct *tsk); #define get_rt_phase(t) (tsk_rt(t)->task_params.phase) #define get_partition(t) (tsk_rt(t)->task_params.cpu) #define get_deadline(t) (tsk_rt(t)->job_params.deadline) +#define get_period(t) (tsk_rt(t)->task_params.period) #define get_release(t) (tsk_rt(t)->job_params.release) #define get_class(t) (tsk_rt(t)->task_params.cls) #define is_priority_boosted(t) (tsk_rt(t)->priority_boosted) #define get_boost_start(t) (tsk_rt(t)->boost_start_time) +#define effective_priority(t) ((!(tsk_rt(t)->inh_task)) ? t : tsk_rt(t)->inh_task) +#define base_priority(t) (t) + inline static int budget_exhausted(struct task_struct* t) { return get_exec_time(t) >= get_exec_cost(t); @@ -114,10 +119,12 @@ static inline lt_t litmus_clock(void) #define earlier_deadline(a, b) (lt_before(\ (a)->rt_param.job_params.deadline,\ (b)->rt_param.job_params.deadline)) +#define shorter_period(a, b) (lt_before(\ + (a)->rt_param.task_params.period,\ + (b)->rt_param.task_params.period)) #define earlier_release(a, b) (lt_before(\ (a)->rt_param.job_params.release,\ (b)->rt_param.job_params.release)) - void preempt_if_preemptable(struct task_struct* t, int on_cpu); #ifdef CONFIG_LITMUS_LOCKING diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h new file mode 100644 index 000000000000..1eb5ea1a6c4b --- /dev/null +++ b/include/litmus/litmus_softirq.h @@ -0,0 +1,199 @@ +#ifndef __LITMUS_SOFTIRQ_H +#define __LITMUS_SOFTIRQ_H + +#include +#include + +/* + Threaded tasklet handling for Litmus. Tasklets + are scheduled with the priority of the tasklet's + owner---that is, the RT task on behalf the tasklet + runs. + + Tasklets are current scheduled in FIFO order with + NO priority inheritance for "blocked" tasklets. + + klitirqd assumes the priority of the owner of the + tasklet when the tasklet is next to execute. + + Currently, hi-tasklets are scheduled before + low-tasklets, regardless of priority of low-tasklets. + And likewise, low-tasklets are scheduled before work + queue objects. This priority inversion probably needs + to be fixed, though it is not an issue if our work with + GPUs as GPUs are owned (and associated klitirqds) for + exclusive time periods, thus no inversions can + occur. + */ + + + +#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD + +/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons. + Actual launch of threads is deffered to kworker's + workqueue, so daemons will likely not be immediately + running when this function returns, though the required + data will be initialized. + + @affinity_set: an array expressing the processor affinity + for each of the NR_LITMUS_SOFTIRQD daemons. May be set + to NULL for global scheduling. + + - Examples - + 8-CPU system with two CPU clusters: + affinity[] = {0, 0, 0, 0, 3, 3, 3, 3} + NOTE: Daemons not actually bound to specified CPU, but rather + cluster in which the CPU resides. + + 8-CPU system, partitioned: + affinity[] = {0, 1, 2, 3, 4, 5, 6, 7} + + FIXME: change array to a CPU topology or array of cpumasks + + */ +void spawn_klitirqd(int* affinity); + + +/* Raises a flag to tell klitirqds to terminate. + Termination is async, so some threads may be running + after function return. */ +void kill_klitirqd(void); + + +/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready + to handle tasklets. 0, otherwise.*/ +int klitirqd_is_ready(void); + +/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready + to handle tasklets. 0, otherwise.*/ +int klitirqd_is_dead(void); + +/* Flushes all pending work out to the OS for regular + * tasklet/work processing of the specified 'owner' + * + * PRECOND: klitirqd_thread must have a clear entry + * in the GPU registry, otherwise this call will become + * a no-op as work will loop back to the klitirqd_thread. + * + * Pass NULL for owner to flush ALL pending items. + */ +void flush_pending(struct task_struct* klitirqd_thread, + struct task_struct* owner); + +struct task_struct* get_klitirqd(unsigned int k_id); + + +extern int __litmus_tasklet_schedule( + struct tasklet_struct *t, + unsigned int k_id); + +/* schedule a tasklet on klitirqd #k_id */ +static inline int litmus_tasklet_schedule( + struct tasklet_struct *t, + unsigned int k_id) +{ + int ret = 0; + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + ret = __litmus_tasklet_schedule(t, k_id); + return(ret); +} + +/* for use by __tasklet_schedule() */ +static inline int _litmus_tasklet_schedule( + struct tasklet_struct *t, + unsigned int k_id) +{ + return(__litmus_tasklet_schedule(t, k_id)); +} + + + + +extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, + unsigned int k_id); + +/* schedule a hi tasklet on klitirqd #k_id */ +static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t, + unsigned int k_id) +{ + int ret = 0; + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + ret = __litmus_tasklet_hi_schedule(t, k_id); + return(ret); +} + +/* for use by __tasklet_hi_schedule() */ +static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t, + unsigned int k_id) +{ + return(__litmus_tasklet_hi_schedule(t, k_id)); +} + + + + + +extern int __litmus_tasklet_hi_schedule_first( + struct tasklet_struct *t, + unsigned int k_id); + +/* schedule a hi tasklet on klitirqd #k_id on next go-around */ +/* PRECONDITION: Interrupts must be disabled. */ +static inline int litmus_tasklet_hi_schedule_first( + struct tasklet_struct *t, + unsigned int k_id) +{ + int ret = 0; + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + ret = __litmus_tasklet_hi_schedule_first(t, k_id); + return(ret); +} + +/* for use by __tasklet_hi_schedule_first() */ +static inline int _litmus_tasklet_hi_schedule_first( + struct tasklet_struct *t, + unsigned int k_id) +{ + return(__litmus_tasklet_hi_schedule_first(t, k_id)); +} + + + +////////////// + +extern int __litmus_schedule_work( + struct work_struct* w, + unsigned int k_id); + +static inline int litmus_schedule_work( + struct work_struct* w, + unsigned int k_id) +{ + return(__litmus_schedule_work(w, k_id)); +} + + + +///////////// mutex operations for client threads. + +void down_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem); + +void __down_and_reset_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_reset, + enum klitirqd_sem_status to_set, + struct mutex* sem); + +void up_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem); + + + +void release_klitirqd_lock(struct task_struct* t); + +int reacquire_klitirqd_lock(struct task_struct* t); + +#endif diff --git a/include/litmus/locking.h b/include/litmus/locking.h index 4d7b870cb443..36647fee03e4 100644 --- a/include/litmus/locking.h +++ b/include/litmus/locking.h @@ -1,28 +1,160 @@ #ifndef LITMUS_LOCKING_H #define LITMUS_LOCKING_H +#include + struct litmus_lock_ops; +#ifdef CONFIG_LITMUS_NESTED_LOCKING +struct nested_info +{ + struct litmus_lock *lock; + struct task_struct *hp_waiter_eff_prio; + struct task_struct **hp_waiter_ptr; + struct binheap_node hp_binheap_node; +}; + +static inline struct task_struct* top_priority(struct binheap_handle* handle) { + if(!binheap_empty(handle)) { + return (struct task_struct*)(binheap_top_entry(handle, struct nested_info, hp_binheap_node)->hp_waiter_eff_prio); + } + return NULL; +} + +void print_hp_waiters(struct binheap_node* n, int depth); +#endif + + /* Generic base struct for LITMUS^RT userspace semaphores. * This structure should be embedded in protocol-specific semaphores. */ struct litmus_lock { struct litmus_lock_ops *ops; int type; + + int ident; + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + struct nested_info nest; +//#ifdef CONFIG_DEBUG_SPINLOCK + char cheat_lockdep[2]; + struct lock_class_key key; +//#endif +#endif }; +#ifdef CONFIG_LITMUS_DGL_SUPPORT + +#define MAX_DGL_SIZE CONFIG_LITMUS_MAX_DGL_SIZE + +typedef struct dgl_wait_state { + struct task_struct *task; /* task waiting on DGL */ + struct litmus_lock *locks[MAX_DGL_SIZE]; /* requested locks in DGL */ + int size; /* size of the DGL */ + int nr_remaining; /* nr locks remainging before DGL is complete */ + int last_primary; /* index lock in locks[] that has active priority */ + wait_queue_t wq_nodes[MAX_DGL_SIZE]; +} dgl_wait_state_t; + +void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait); +void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/); + +void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait); +int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key); +void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait, struct task_struct **task); +#endif + +typedef int (*lock_op_t)(struct litmus_lock *l); +typedef lock_op_t lock_close_t; +typedef lock_op_t lock_lock_t; +typedef lock_op_t lock_unlock_t; + +typedef int (*lock_open_t)(struct litmus_lock *l, void* __user arg); +typedef void (*lock_free_t)(struct litmus_lock *l); + struct litmus_lock_ops { /* Current task tries to obtain / drop a reference to a lock. * Optional methods, allowed by default. */ - int (*open)(struct litmus_lock*, void* __user); - int (*close)(struct litmus_lock*); + lock_open_t open; + lock_close_t close; /* Current tries to lock/unlock this lock (mandatory methods). */ - int (*lock)(struct litmus_lock*); - int (*unlock)(struct litmus_lock*); + lock_lock_t lock; + lock_unlock_t unlock; /* The lock is no longer being referenced (mandatory method). */ - void (*deallocate)(struct litmus_lock*); + lock_free_t deallocate; + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + void (*propagate_increase_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags); + void (*propagate_decrease_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags); +#endif + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t* (*get_dgl_spin_lock)(struct litmus_lock *l); + int (*dgl_lock)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node); + int (*is_owner)(struct litmus_lock *l, struct task_struct *t); + void (*enable_priority)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait); +#endif }; + +/* + Nested inheritance can be achieved with fine-grain locking when there is + no need for DGL support, presuming locks are acquired in a partial order + (no cycles!). However, DGLs allow locks to be acquired in any order. This + makes nested inheritance very difficult (we don't yet know a solution) to + realize with fine-grain locks, so we use a big lock instead. + + Code contains both fine-grain and coarse-grain methods together, side-by-side. + Each lock operation *IS NOT* surrounded by ifdef/endif to help make code more + readable. However, this leads to the odd situation where both code paths + appear together in code as if they were both active together. + + THIS IS NOT REALLY THE CASE! ONLY ONE CODE PATH IS ACTUALLY ACTIVE! + + Example: + lock_global_irqsave(coarseLock, flags); + lock_fine_irqsave(fineLock, flags); + + Reality (coarse): + lock_global_irqsave(coarseLock, flags); + //lock_fine_irqsave(fineLock, flags); + + Reality (fine): + //lock_global_irqsave(coarseLock, flags); + lock_fine_irqsave(fineLock, flags); + + Be careful when you read code involving nested inheritance. + */ +#if defined(CONFIG_LITMUS_DGL_SUPPORT) +/* DGL requires a big lock to implement nested inheritance */ +#define lock_global_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags)) +#define lock_global(lock) raw_spin_lock((lock)) +#define unlock_global_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags)) +#define unlock_global(lock) raw_spin_unlock((lock)) + +/* fine-grain locking are no-ops with DGL support */ +#define lock_fine_irqsave(lock, flags) +#define lock_fine(lock) +#define unlock_fine_irqrestore(lock, flags) +#define unlock_fine(lock) + +#elif defined(CONFIG_LITMUS_NESTED_LOCKING) +/* Use fine-grain locking when DGLs are disabled. */ +/* global locking are no-ops without DGL support */ +#define lock_global_irqsave(lock, flags) +#define lock_global(lock) +#define unlock_global_irqrestore(lock, flags) +#define unlock_global(lock) + +#define lock_fine_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags)) +#define lock_fine(lock) raw_spin_lock((lock)) +#define unlock_fine_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags)) +#define unlock_fine(lock) raw_spin_unlock((lock)) + #endif + + +#endif + diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h new file mode 100644 index 000000000000..97c9577141db --- /dev/null +++ b/include/litmus/nvidia_info.h @@ -0,0 +1,46 @@ +#ifndef __LITMUS_NVIDIA_H +#define __LITMUS_NVIDIA_H + +#include + + +#include + + +//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD +#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM +#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS + +int init_nvidia_info(void); +void shutdown_nvidia_info(void); + +int is_nvidia_func(void* func_addr); + +void dump_nvidia_info(const struct tasklet_struct *t); + + +// Returns the Nvidia device # associated with provided tasklet and work_struct. +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t); +u32 get_work_nv_device_num(const struct work_struct *t); + + +int init_nv_device_reg(void); +//int get_nv_device_id(struct task_struct* owner); + + +int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t); + +struct task_struct* get_nv_max_device_owner(u32 target_device_id); +//int is_nv_device_owner(u32 target_device_id); + +void lock_nv_registry(u32 reg_device_id, unsigned long* flags); +void unlock_nv_registry(u32 reg_device_id, unsigned long* flags); + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +void pai_check_priority_increase(struct task_struct *t, int reg_device_id); +void pai_check_priority_decrease(struct task_struct *t, int reg_device_id); +#endif + +//void increment_nv_int_count(u32 device); + +#endif diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h index 380b886d78ff..8f3a9ca2d4e3 100644 --- a/include/litmus/preempt.h +++ b/include/litmus/preempt.h @@ -26,12 +26,12 @@ const char* sched_state_name(int s); (x), #x, __FUNCTION__); \ } while (0); +//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */ #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ cpu, (x), sched_state_name(x), \ (y), sched_state_name(y)) - typedef enum scheduling_state { TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that * should be scheduled, and the processor does not diff --git a/include/litmus/rsm_lock.h b/include/litmus/rsm_lock.h new file mode 100644 index 000000000000..a15189683de4 --- /dev/null +++ b/include/litmus/rsm_lock.h @@ -0,0 +1,54 @@ +#ifndef LITMUS_RSM_H +#define LITMUS_RSM_H + +#include +#include +#include + +/* struct for semaphore with priority inheritance */ +struct rsm_mutex { + struct litmus_lock litmus_lock; + + /* current resource holder */ + struct task_struct *owner; + + /* highest-priority waiter */ + struct task_struct *hp_waiter; + + /* FIFO queue of waiting tasks -- for now. time stamp in the future. */ + wait_queue_head_t wait; + + /* we do some nesting within spinlocks, so we can't use the normal + sleeplocks found in wait_queue_head_t. */ + raw_spinlock_t lock; +}; + +static inline struct rsm_mutex* rsm_mutex_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct rsm_mutex, litmus_lock); +} + +#ifdef CONFIG_LITMUS_DGL_SUPPORT +int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t); +int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node); +void rsm_mutex_enable_priority(struct litmus_lock *l, dgl_wait_state_t* dgl_wait); +#endif + +void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l, + struct task_struct* t, + raw_spinlock_t* to_unlock, + unsigned long irqflags); + +void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l, + struct task_struct* t, + raw_spinlock_t* to_unlock, + unsigned long irqflags); + +int rsm_mutex_lock(struct litmus_lock* l); +int rsm_mutex_unlock(struct litmus_lock* l); +int rsm_mutex_close(struct litmus_lock* l); +void rsm_mutex_free(struct litmus_lock* l); +struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops*); + + +#endif \ No newline at end of file diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index d6d799174160..0198884eab86 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -5,6 +5,8 @@ #ifndef _LINUX_RT_PARAM_H_ #define _LINUX_RT_PARAM_H_ +#include + /* Litmus time type. */ typedef unsigned long long lt_t; @@ -24,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b) typedef enum { RT_CLASS_HARD, RT_CLASS_SOFT, + RT_CLASS_SOFT_W_SLIP, RT_CLASS_BEST_EFFORT } task_class_t; @@ -52,6 +55,19 @@ union np_flag { } np; }; +struct affinity_observer_args +{ + int lock_od; +}; + +struct gpu_affinity_observer_args +{ + struct affinity_observer_args obs; + int replica_to_gpu_offset; + int nr_simult_users; + int relaxed_rules; +}; + /* The definition of the data that is shared between the kernel and real-time * tasks via a shared page (see litmus/ctrldev.c). * @@ -75,6 +91,9 @@ struct control_page { /* don't export internal data structures to user space (liblitmus) */ #ifdef __KERNEL__ +#include +#include + struct _rt_domain; struct bheap_node; struct release_heap; @@ -100,6 +119,31 @@ struct rt_job { struct pfair_param; +enum klitirqd_sem_status +{ + NEED_TO_REACQUIRE, + REACQUIRING, + NOT_HELD, + HELD +}; + +typedef enum gpu_migration_dist +{ + // TODO: Make this variable against NR_NVIDIA_GPUS + MIG_LOCAL = 0, + MIG_NEAR = 1, + MIG_MED = 2, + MIG_FAR = 3, // 8 GPUs in a binary tree hierarchy + MIG_NONE = 4, + + MIG_LAST = MIG_NONE +} gpu_migration_dist_t; + +typedef struct feedback_est{ + fp_t est; + fp_t accum_err; +} feedback_est_t; + /* RT task parameters for scheduling extensions * These parameters are inherited during clone and therefore must * be explicitly set up before the task set is launched. @@ -114,6 +158,52 @@ struct rt_param { /* is the task present? (true if it can be scheduled) */ unsigned int present:1; +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy threads have minimum priority by default */ + unsigned int is_proxy_thread:1; + + /* pointer to klitirqd currently working on this + task_struct's behalf. only set by the task pointed + to by klitirqd. + + ptr only valid if is_proxy_thread == 0 + */ + struct task_struct* cur_klitirqd; + + /* Used to implement mutual execution exclusion between + * job and klitirqd execution. Job must always hold + * it's klitirqd_sem to execute. klitirqd instance + * must hold the semaphore before executing on behalf + * of a job. + */ + struct mutex klitirqd_sem; + + /* status of held klitirqd_sem, even if the held klitirqd_sem is from + another task (only proxy threads do this though). + */ + atomic_t klitirqd_sem_stat; +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + /* number of top-half interrupts handled on behalf of current job */ + atomic_t nv_int_count; + long unsigned int held_gpus; // bitmap of held GPUs. + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + fp_t gpu_fb_param_a[MIG_LAST+1]; + fp_t gpu_fb_param_b[MIG_LAST+1]; + + gpu_migration_dist_t gpu_migration; + int last_gpu; + feedback_est_t gpu_migration_est[MIG_LAST+1]; // local, near, med, far + + lt_t accum_gpu_time; + lt_t gpu_time_stamp; + + unsigned int suspend_gpu_tracker_on_block:1; +#endif +#endif + #ifdef CONFIG_LITMUS_LOCKING /* Is the task being priority-boosted by a locking protocol? */ unsigned int priority_boosted:1; @@ -133,7 +223,15 @@ struct rt_param { * could point to self if PI does not result in * an increased task priority. */ - struct task_struct* inh_task; + struct task_struct* inh_task; + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + raw_spinlock_t hp_blocked_tasks_lock; + struct binheap_handle hp_blocked_tasks; + + /* pointer to lock upon which is currently blocked */ + struct litmus_lock* blocked_lock; +#endif #ifdef CONFIG_NP_SECTION /* For the FMLP under PSN-EDF, it is required to make the task diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 6e7cabdddae8..24a6858b4b0b 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -11,6 +11,12 @@ #include #endif +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +#include +#endif + +#include + /************************ setup/tear down ********************/ typedef long (*activate_plugin_t) (void); @@ -29,7 +35,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev); */ typedef void (*finish_switch_t)(struct task_struct *prev); - /********************* task state changes ********************/ /* Called to setup a new real-time task. @@ -58,6 +63,47 @@ typedef void (*task_exit_t) (struct task_struct *); typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, void* __user config); +struct affinity_observer; +typedef long (*allocate_affinity_observer_t) ( + struct affinity_observer **aff_obs, int type, + void* __user config); + +typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh); +typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh); +typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, unsigned long irqflags); +typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, unsigned long irqflags); + +typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner); +typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd, + struct task_struct* old_owner); + + +typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet); +typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio, + struct task_struct *new_prio); +typedef void (*run_tasklets_t)(struct task_struct* next); + +typedef raw_spinlock_t* (*get_dgl_spinlock_t) (struct task_struct *t); + + +typedef int (*higher_prio_t)(struct task_struct* a, struct task_struct* b); + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + +typedef enum +{ + BASE, + EFFECTIVE +} comparison_mode_t; + +typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod, + struct task_struct* b, comparison_mode_t b_mod); +#endif + /********************* sys call backends ********************/ /* This function causes the caller to sleep until the next release */ @@ -88,14 +134,40 @@ struct sched_plugin { /* task state changes */ admit_task_t admit_task; - task_new_t task_new; + task_new_t task_new; task_wake_up_t task_wake_up; task_block_t task_block; task_exit_t task_exit; + higher_prio_t compare; + #ifdef CONFIG_LITMUS_LOCKING /* locking protocols */ allocate_lock_t allocate_lock; + increase_prio_t increase_prio; + decrease_prio_t decrease_prio; +#endif +#ifdef CONFIG_LITMUS_NESTED_LOCKING + nested_increase_prio_t nested_increase_prio; + nested_decrease_prio_t nested_decrease_prio; + __higher_prio_t __compare; +#endif +#ifdef CONFIG_LITMUS_DGL_SUPPORT + get_dgl_spinlock_t get_dgl_spinlock; +#endif + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + allocate_affinity_observer_t allocate_aff_obs; +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD + increase_prio_klitirq_t increase_prio_klitirqd; + decrease_prio_klitirqd_t decrease_prio_klitirqd; +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + enqueue_pai_tasklet_t enqueue_pai_tasklet; + change_prio_pai_tasklet_t change_prio_pai_tasklet; + run_tasklets_t run_tasklets; #endif } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h index 7ca34cb13881..b1b71f6c5f0c 100644 --- a/include/litmus/sched_trace.h +++ b/include/litmus/sched_trace.h @@ -10,13 +10,14 @@ struct st_trace_header { u8 type; /* Of what type is this record? */ u8 cpu; /* On which CPU was it recorded? */ u16 pid; /* PID of the task. */ - u32 job; /* The job sequence number. */ -}; + u32 job:24; /* The job sequence number. */ + u8 extra; +} __attribute__((packed)); #define ST_NAME_LEN 16 struct st_name_data { char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ -}; +} __attribute__((packed)); struct st_param_data { /* regular params */ u32 wcet; @@ -25,30 +26,29 @@ struct st_param_data { /* regular params */ u8 partition; u8 class; u8 __unused[2]; -}; +} __attribute__((packed)); struct st_release_data { /* A job is was/is going to be released. */ u64 release; /* What's the release time? */ u64 deadline; /* By when must it finish? */ -}; +} __attribute__((packed)); struct st_assigned_data { /* A job was asigned to a CPU. */ u64 when; u8 target; /* Where should it execute? */ u8 __unused[7]; -}; +} __attribute__((packed)); struct st_switch_to_data { /* A process was switched to on a given CPU. */ u64 when; /* When did this occur? */ u32 exec_time; /* Time the current job has executed. */ u8 __unused[4]; - -}; +} __attribute__((packed)); struct st_switch_away_data { /* A process was switched away from on a given CPU. */ u64 when; u64 exec_time; -}; +} __attribute__((packed)); struct st_completion_data { /* A job completed. */ u64 when; @@ -56,35 +56,108 @@ struct st_completion_data { /* A job completed. */ * next task automatically; set to 0 otherwise. */ u8 __uflags:7; - u8 __unused[7]; -}; + u16 nv_int_count; + u8 __unused[5]; +} __attribute__((packed)); struct st_block_data { /* A task blocks. */ u64 when; u64 __unused; -}; +} __attribute__((packed)); struct st_resume_data { /* A task resumes. */ u64 when; u64 __unused; -}; +} __attribute__((packed)); struct st_action_data { u64 when; u8 action; u8 __unused[7]; -}; +} __attribute__((packed)); struct st_sys_release_data { u64 when; u64 release; -}; +} __attribute__((packed)); + + +struct st_tasklet_release_data { + u64 when; + u64 __unused; +} __attribute__((packed)); + +struct st_tasklet_begin_data { + u64 when; + u16 exe_pid; + u8 __unused[6]; +} __attribute__((packed)); + +struct st_tasklet_end_data { + u64 when; + u16 exe_pid; + u8 flushed; + u8 __unused[5]; +} __attribute__((packed)); + + +struct st_work_release_data { + u64 when; + u64 __unused; +} __attribute__((packed)); + +struct st_work_begin_data { + u64 when; + u16 exe_pid; + u8 __unused[6]; +} __attribute__((packed)); + +struct st_work_end_data { + u64 when; + u16 exe_pid; + u8 flushed; + u8 __unused[5]; +} __attribute__((packed)); + +struct st_effective_priority_change_data { + u64 when; + u16 inh_pid; + u8 __unused[6]; +} __attribute__((packed)); + +struct st_nv_interrupt_begin_data { + u64 when; + u32 device; + u32 serialNumber; +} __attribute__((packed)); + +struct st_nv_interrupt_end_data { + u64 when; + u32 device; + u32 serialNumber; +} __attribute__((packed)); + +struct st_prediction_err_data { + u64 distance; + u64 rel_err; +} __attribute__((packed)); + +struct st_migration_data { + u64 observed; + u64 estimated; +} __attribute__((packed)); + +struct migration_info { + u64 observed; + u64 estimated; + u8 distance; +} __attribute__((packed)); #define DATA(x) struct st_ ## x ## _data x; typedef enum { - ST_NAME = 1, /* Start at one, so that we can spot - * uninitialized records. */ + ST_NAME = 1, /* Start at one, so that we can spot + * uninitialized records. */ ST_PARAM, ST_RELEASE, ST_ASSIGNED, @@ -94,7 +167,19 @@ typedef enum { ST_BLOCK, ST_RESUME, ST_ACTION, - ST_SYS_RELEASE + ST_SYS_RELEASE, + ST_TASKLET_RELEASE, + ST_TASKLET_BEGIN, + ST_TASKLET_END, + ST_WORK_RELEASE, + ST_WORK_BEGIN, + ST_WORK_END, + ST_EFF_PRIO_CHANGE, + ST_NV_INTERRUPT_BEGIN, + ST_NV_INTERRUPT_END, + + ST_PREDICTION_ERR, + ST_MIGRATION, } st_event_record_type_t; struct st_event_record { @@ -113,8 +198,20 @@ struct st_event_record { DATA(resume); DATA(action); DATA(sys_release); + DATA(tasklet_release); + DATA(tasklet_begin); + DATA(tasklet_end); + DATA(work_release); + DATA(work_begin); + DATA(work_end); + DATA(effective_priority_change); + DATA(nv_interrupt_begin); + DATA(nv_interrupt_end); + + DATA(prediction_err); + DATA(migration); } data; -}; +} __attribute__((packed)); #undef DATA @@ -129,6 +226,8 @@ struct st_event_record { ft_event1(id, callback, task) #define SCHED_TRACE2(id, callback, task, xtra) \ ft_event2(id, callback, task, xtra) +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \ + ft_event3(id, callback, task, xtra1, xtra2) /* provide prototypes; needed on sparc64 */ #ifndef NO_TASK_TRACE_DECLS @@ -155,12 +254,58 @@ feather_callback void do_sched_trace_action(unsigned long id, feather_callback void do_sched_trace_sys_release(unsigned long id, lt_t* start); + +feather_callback void do_sched_trace_tasklet_release(unsigned long id, + struct task_struct* owner); +feather_callback void do_sched_trace_tasklet_begin(unsigned long id, + struct task_struct* owner); +feather_callback void do_sched_trace_tasklet_end(unsigned long id, + struct task_struct* owner, + unsigned long flushed); + +feather_callback void do_sched_trace_work_release(unsigned long id, + struct task_struct* owner); +feather_callback void do_sched_trace_work_begin(unsigned long id, + struct task_struct* owner, + struct task_struct* exe); +feather_callback void do_sched_trace_work_end(unsigned long id, + struct task_struct* owner, + struct task_struct* exe, + unsigned long flushed); + +feather_callback void do_sched_trace_eff_prio_change(unsigned long id, + struct task_struct* task, + struct task_struct* inh); + +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, + u32 device); +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, + unsigned long unused); + +feather_callback void do_sched_trace_prediction_err(unsigned long id, + struct task_struct* task, + gpu_migration_dist_t* distance, + fp_t* rel_err); + + + + + +feather_callback void do_sched_trace_migration(unsigned long id, + struct task_struct* task, + struct migration_info* mig_info); + + +/* returns true if we're tracing an interrupt on current CPU */ +/* int is_interrupt_tracing_active(void); */ + #endif #else #define SCHED_TRACE(id, callback, task) /* no tracing */ #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) #endif @@ -193,6 +338,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id, SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when) +#define sched_trace_tasklet_release(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t) + +#define sched_trace_tasklet_begin(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t) + +#define sched_trace_tasklet_end(t, flushed) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed) + + +#define sched_trace_work_release(t) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t) + +#define sched_trace_work_begin(t, e) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e) + +#define sched_trace_work_end(t, e, flushed) \ + SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed) + + +#define sched_trace_eff_prio_change(t, inh) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh) + + +#define sched_trace_nv_interrupt_begin(d) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d) +#define sched_trace_nv_interrupt_end(d) \ + SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) + +#define sched_trace_prediction_err(t, dist, rel_err) \ + SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err) + +#define sched_trace_migration(t, mig_info) \ + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info) + #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ #endif /* __KERNEL__ */ diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h new file mode 100644 index 000000000000..e70e45e4cf51 --- /dev/null +++ b/include/litmus/sched_trace_external.h @@ -0,0 +1,78 @@ +/* + * sched_trace.h -- record scheduler events to a byte stream for offline analysis. + */ +#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_ +#define _LINUX_SCHED_TRACE_EXTERNAL_H_ + + +#ifdef CONFIG_SCHED_TASK_TRACE +extern void __sched_trace_tasklet_begin_external(struct task_struct* t); +static inline void sched_trace_tasklet_begin_external(struct task_struct* t) +{ + __sched_trace_tasklet_begin_external(t); +} + +extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed); +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed) +{ + __sched_trace_tasklet_end_external(t, flushed); +} + +extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e); +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e) +{ + __sched_trace_work_begin_external(t, e); +} + +extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f); +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f) +{ + __sched_trace_work_end_external(t, e, f); +} + +#ifdef CONFIG_LITMUS_NVIDIA +extern void __sched_trace_nv_interrupt_begin_external(u32 device); +static inline void sched_trace_nv_interrupt_begin_external(u32 device) +{ + __sched_trace_nv_interrupt_begin_external(device); +} + +extern void __sched_trace_nv_interrupt_end_external(u32 device); +static inline void sched_trace_nv_interrupt_end_external(u32 device) +{ + __sched_trace_nv_interrupt_end_external(device); +} +#endif + +#else + +// no tracing. +static inline void sched_trace_tasklet_begin_external(struct task_struct* t){} +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){} +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){} +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){} + +#ifdef CONFIG_LITMUS_NVIDIA +static inline void sched_trace_nv_interrupt_begin_external(u32 device){} +static inline void sched_trace_nv_interrupt_end_external(u32 device){} +#endif + +#endif + + +#ifdef CONFIG_LITMUS_NVIDIA + +#define EX_TS(evt) \ +extern void __##evt(void); \ +static inline void EX_##evt(void) { __##evt(); } + +EX_TS(TS_NV_TOPISR_START) +EX_TS(TS_NV_TOPISR_END) +EX_TS(TS_NV_BOTISR_START) +EX_TS(TS_NV_BOTISR_END) +EX_TS(TS_NV_RELEASE_BOTISR_START) +EX_TS(TS_NV_RELEASE_BOTISR_END) + +#endif + +#endif diff --git a/include/litmus/trace.h b/include/litmus/trace.h index e809376d6487..e078aee4234d 100644 --- a/include/litmus/trace.h +++ b/include/litmus/trace.h @@ -103,14 +103,46 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_ #define TS_LOCK_START TIMESTAMP(170) #define TS_LOCK_SUSPEND TIMESTAMP(171) #define TS_LOCK_RESUME TIMESTAMP(172) -#define TS_LOCK_END TIMESTAMP(173) +#define TS_LOCK_END TIMESTAMP(173) + +#ifdef CONFIG_LITMUS_DGL_SUPPORT +#define TS_DGL_LOCK_START TIMESTAMP(175) +#define TS_DGL_LOCK_SUSPEND TIMESTAMP(176) +#define TS_DGL_LOCK_RESUME TIMESTAMP(177) +#define TS_DGL_LOCK_END TIMESTAMP(178) +#endif #define TS_UNLOCK_START TIMESTAMP(180) #define TS_UNLOCK_END TIMESTAMP(181) +#ifdef CONFIG_LITMUS_DGL_SUPPORT +#define TS_DGL_UNLOCK_START TIMESTAMP(185) +#define TS_DGL_UNLOCK_END TIMESTAMP(186) +#endif + #define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) #define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) + +#ifdef CONFIG_LITMUS_NVIDIA + +#define TS_NV_TOPISR_START TIMESTAMP(200) +#define TS_NV_TOPISR_END TIMESTAMP(201) + +#define TS_NV_BOTISR_START TIMESTAMP(202) +#define TS_NV_BOTISR_END TIMESTAMP(203) + +#define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204) +#define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205) + +#endif + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#define TS_NV_SCHED_BOTISR_START TIMESTAMP(206) +#define TS_NV_SCHED_BOTISR_END TIMESTAMP(207) +#endif + + #endif /* !_SYS_TRACE_H_ */ diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index 94264c27d9ac..4fa514c89605 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h @@ -17,5 +17,8 @@ #define __NR_wait_for_ts_release __LSC(9) #define __NR_release_ts __LSC(10) #define __NR_null_call __LSC(11) +#define __NR_litmus_dgl_lock __LSC(12) +#define __NR_litmus_dgl_unlock __LSC(13) +#define __NR_register_nv_device __LSC(14) -#define NR_litmus_syscalls 12 +#define NR_litmus_syscalls 15 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index d5ced0d2642c..f80dc45dc185 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h @@ -29,5 +29,12 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) __SYSCALL(__NR_release_ts, sys_release_ts) #define __NR_null_call __LSC(11) __SYSCALL(__NR_null_call, sys_null_call) +#define __NR_litmus_dgl_lock __LSC(12) +__SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock) +#define __NR_litmus_dgl_unlock __LSC(13) +__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) +#define __NR_register_nv_device __LSC(14) +__SYSCALL(__NR_register_nv_device, sys_register_nv_device) -#define NR_litmus_syscalls 12 + +#define NR_litmus_syscalls 15 diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 298c9276dfdb..2bdcdc3691e5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -542,7 +542,7 @@ static void print_lock(struct held_lock *hlock) print_ip_sym(hlock->acquire_ip); } -static void lockdep_print_held_locks(struct task_struct *curr) +void lockdep_print_held_locks(struct task_struct *curr) { int i, depth = curr->lockdep_depth; @@ -558,6 +558,7 @@ static void lockdep_print_held_locks(struct task_struct *curr) print_lock(curr->held_locks + i); } } +EXPORT_SYMBOL(lockdep_print_held_locks); static void print_kernel_version(void) { @@ -583,6 +584,10 @@ static int static_obj(void *obj) end = (unsigned long) &_end, addr = (unsigned long) obj; + // GLENN + return 1; + + /* * static variable? */ diff --git a/kernel/mutex.c b/kernel/mutex.c index d607ed5dd441..2f363b9bfc1f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -498,3 +498,128 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) return 1; } EXPORT_SYMBOL(atomic_dec_and_mutex_lock); + + + + +void mutex_lock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg) +{ + long state = TASK_UNINTERRUPTIBLE; + + struct task_struct *task = current; + struct mutex_waiter waiter; + unsigned long flags; + + preempt_disable(); + mutex_acquire(&lock->dep_map, subclass, 0, ip); + + spin_lock_mutex(&lock->wait_lock, flags); + + if(pre) + { + if(unlikely(pre(pre_arg))) + { + // this will fuck with lockdep's CONFIG_PROVE_LOCKING... + spin_unlock_mutex(&lock->wait_lock, flags); + preempt_enable(); + return; + } + } + + debug_mutex_lock_common(lock, &waiter); + debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); + + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + waiter.task = task; + + if (atomic_xchg(&lock->count, -1) == 1) + goto done; + + lock_contended(&lock->dep_map, ip); + + for (;;) { + /* + * Lets try to take the lock again - this is needed even if + * we get here for the first time (shortly after failing to + * acquire the lock), to make sure that we get a wakeup once + * it's unlocked. Later on, if we sleep, this is the + * operation that gives us the lock. We xchg it to -1, so + * that when we release the lock, we properly wake up the + * other waiters: + */ + if (atomic_xchg(&lock->count, -1) == 1) + break; + + __set_task_state(task, state); + + /* didnt get the lock, go to sleep: */ + spin_unlock_mutex(&lock->wait_lock, flags); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + spin_lock_mutex(&lock->wait_lock, flags); + } + +done: + lock_acquired(&lock->dep_map, ip); + /* got the lock - rejoice! */ + mutex_remove_waiter(lock, &waiter, current_thread_info()); + mutex_set_owner(lock); + + /* set it to 0 if there are no waiters left: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + + if(post) + post(post_arg); + + spin_unlock_mutex(&lock->wait_lock, flags); + + debug_mutex_free_waiter(&waiter); + preempt_enable(); +} +EXPORT_SYMBOL(mutex_lock_sfx); + +void mutex_unlock_sfx(struct mutex *lock, + side_effect_t pre, unsigned long pre_arg, + side_effect_t post, unsigned long post_arg) +{ + unsigned long flags; + + spin_lock_mutex(&lock->wait_lock, flags); + + if(pre) + pre(pre_arg); + + //mutex_release(&lock->dep_map, nested, _RET_IP_); + mutex_release(&lock->dep_map, 1, _RET_IP_); + debug_mutex_unlock(lock); + + /* + * some architectures leave the lock unlocked in the fastpath failure + * case, others need to leave it locked. In the later case we have to + * unlock it here + */ + if (__mutex_slowpath_needs_to_unlock()) + atomic_set(&lock->count, 1); + + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter = + list_entry(lock->wait_list.next, + struct mutex_waiter, list); + + debug_mutex_wake_waiter(lock, waiter); + + wake_up_process(waiter->task); + } + + if(post) + post(post_arg); + + spin_unlock_mutex(&lock->wait_lock, flags); +} +EXPORT_SYMBOL(mutex_unlock_sfx); diff --git a/kernel/sched.c b/kernel/sched.c index baaca61bc3a3..f3d9a69a3777 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -83,6 +83,10 @@ #include #include +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + static void litmus_tick(struct rq*, struct task_struct*); #define CREATE_TRACE_POINTS @@ -4305,6 +4309,7 @@ pick_next_task(struct rq *rq) BUG(); /* the idle class will always have a runnable task */ } + /* * schedule() is the main scheduler function. */ @@ -4323,6 +4328,10 @@ need_resched: rcu_note_context_switch(cpu); prev = rq->curr; +#ifdef CONFIG_LITMUS_SOFTIRQD + release_klitirqd_lock(prev); +#endif + /* LITMUS^RT: quickly re-evaluate the scheduling decision * if the previous one is no longer valid after CTX. */ @@ -4411,13 +4420,24 @@ litmus_need_resched_nonpreemptible: goto litmus_need_resched_nonpreemptible; preempt_enable_no_resched(); + if (need_resched()) goto need_resched; +#ifdef LITMUS_SOFTIRQD + reacquire_klitirqd_lock(prev); +#endif + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + litmus->run_tasklets(prev); +#endif + srp_ceiling_block(); } EXPORT_SYMBOL(schedule); + + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER static inline bool owner_running(struct mutex *lock, struct task_struct *owner) @@ -4561,6 +4581,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, } } + /** * __wake_up - wake up threads blocked on a waitqueue. * @q: the waitqueue @@ -4747,6 +4768,12 @@ void __sched wait_for_completion(struct completion *x) } EXPORT_SYMBOL(wait_for_completion); +void __sched __wait_for_completion_locked(struct completion *x) +{ + do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(__wait_for_completion_locked); + /** * wait_for_completion_timeout: - waits for completion of a task (w/timeout) * @x: holds the state of this particular completion diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 94a62c0d4ade..c947a046a6d7 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -33,11 +33,11 @@ #include #include -static noinline void __down(struct semaphore *sem); +noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); static noinline int __down_killable(struct semaphore *sem); static noinline int __down_timeout(struct semaphore *sem, long jiffies); -static noinline void __up(struct semaphore *sem); +noinline void __up(struct semaphore *sem); /** * down - acquire the semaphore @@ -190,11 +190,13 @@ EXPORT_SYMBOL(up); /* Functions for the contended case */ +/* struct semaphore_waiter { struct list_head list; struct task_struct *task; int up; }; + */ /* * Because this function is inlined, the 'state' parameter will be @@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state, return -EINTR; } -static noinline void __sched __down(struct semaphore *sem) +noinline void __sched __down(struct semaphore *sem) { __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } +EXPORT_SYMBOL(__down); + static noinline int __sched __down_interruptible(struct semaphore *sem) { @@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); } -static noinline void __sched __up(struct semaphore *sem) +noinline void __sched __up(struct semaphore *sem) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); @@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem) waiter->up = 1; wake_up_process(waiter->task); } +EXPORT_SYMBOL(__up); \ No newline at end of file diff --git a/kernel/softirq.c b/kernel/softirq.c index fca82c32042b..5ce271675662 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -29,6 +29,15 @@ #include #include + +#include +#include + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#include +#endif + /* - No shared variables, all the data are CPU local. - If a softirq needs serialization, let it serialize itself @@ -67,7 +76,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static void wakeup_softirqd(void) +void wakeup_softirqd(void) { /* Interrupts are disabled: no need to stop preemption */ struct task_struct *tsk = __this_cpu_read(ksoftirqd); @@ -193,6 +202,7 @@ void local_bh_enable_ip(unsigned long ip) } EXPORT_SYMBOL(local_bh_enable_ip); + /* * We restart softirq processing MAX_SOFTIRQ_RESTART times, * and we fall back to softirqd after that. @@ -206,65 +216,65 @@ EXPORT_SYMBOL(local_bh_enable_ip); asmlinkage void __do_softirq(void) { - struct softirq_action *h; - __u32 pending; - int max_restart = MAX_SOFTIRQ_RESTART; - int cpu; + struct softirq_action *h; + __u32 pending; + int max_restart = MAX_SOFTIRQ_RESTART; + int cpu; - pending = local_softirq_pending(); - account_system_vtime(current); + pending = local_softirq_pending(); + account_system_vtime(current); - __local_bh_disable((unsigned long)__builtin_return_address(0), - SOFTIRQ_OFFSET); - lockdep_softirq_enter(); + __local_bh_disable((unsigned long)__builtin_return_address(0), + SOFTIRQ_OFFSET); + lockdep_softirq_enter(); - cpu = smp_processor_id(); + cpu = smp_processor_id(); restart: - /* Reset the pending bitmask before enabling irqs */ - set_softirq_pending(0); + /* Reset the pending bitmask before enabling irqs */ + set_softirq_pending(0); - local_irq_enable(); + local_irq_enable(); - h = softirq_vec; - - do { - if (pending & 1) { - unsigned int vec_nr = h - softirq_vec; - int prev_count = preempt_count(); - - kstat_incr_softirqs_this_cpu(vec_nr); - - trace_softirq_entry(vec_nr); - h->action(h); - trace_softirq_exit(vec_nr); - if (unlikely(prev_count != preempt_count())) { - printk(KERN_ERR "huh, entered softirq %u %s %p" - "with preempt_count %08x," - " exited with %08x?\n", vec_nr, - softirq_to_name[vec_nr], h->action, - prev_count, preempt_count()); - preempt_count() = prev_count; - } + h = softirq_vec; - rcu_bh_qs(cpu); - } - h++; - pending >>= 1; - } while (pending); + do { + if (pending & 1) { + unsigned int vec_nr = h - softirq_vec; + int prev_count = preempt_count(); - local_irq_disable(); + kstat_incr_softirqs_this_cpu(vec_nr); - pending = local_softirq_pending(); - if (pending && --max_restart) - goto restart; + trace_softirq_entry(vec_nr); + h->action(h); + trace_softirq_exit(vec_nr); + if (unlikely(prev_count != preempt_count())) { + printk(KERN_ERR "huh, entered softirq %u %s %p" + "with preempt_count %08x," + " exited with %08x?\n", vec_nr, + softirq_to_name[vec_nr], h->action, + prev_count, preempt_count()); + preempt_count() = prev_count; + } - if (pending) - wakeup_softirqd(); + rcu_bh_qs(cpu); + } + h++; + pending >>= 1; + } while (pending); - lockdep_softirq_exit(); + local_irq_disable(); - account_system_vtime(current); - __local_bh_enable(SOFTIRQ_OFFSET); + pending = local_softirq_pending(); + if (pending && --max_restart) + goto restart; + + if (pending) + wakeup_softirqd(); + + lockdep_softirq_exit(); + + account_system_vtime(current); + __local_bh_enable(SOFTIRQ_OFFSET); } #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -402,7 +412,98 @@ struct tasklet_head static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); +#ifdef CONFIG_LITMUS_NVIDIA +static int __do_nv_now(struct tasklet_struct* tasklet) +{ + int success = 1; + + if(tasklet_trylock(tasklet)) { + if (!atomic_read(&tasklet->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) { + BUG(); + } + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + } + else { + success = 0; + } + + tasklet_unlock(tasklet); + } + else { + success = 0; + } + + return success; +} +#endif + + void __tasklet_schedule(struct tasklet_struct *t) +{ +#ifdef CONFIG_LITMUS_NVIDIA + if(is_nvidia_func(t->func)) + { +#if 0 + // do nvidia tasklets right away and return + if(__do_nv_now(t)) + return; +#else + u32 nvidia_device = get_tasklet_nv_device_num(t); + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", + // __FUNCTION__, nvidia_device,litmus_clock()); + + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidia_device, &flags); + + device_owner = get_nv_max_device_owner(nvidia_device); + + if(device_owner==NULL) + { + t->owner = NULL; + } + else + { + if(is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", + __FUNCTION__, nvidia_device,litmus_clock()); + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__,device_owner->pid,nvidia_device); + + t->owner = device_owner; + sched_trace_tasklet_release(t->owner); + + if(likely(_litmus_tasklet_schedule(t,nvidia_device))) + { + unlock_nv_registry(nvidia_device, &flags); + return; + } + else + { + t->owner = NULL; /* fall through to normal scheduling */ + } + } + else + { + t->owner = NULL; + } + } + unlock_nv_registry(nvidia_device, &flags); +#endif + } + +#endif + + ___tasklet_schedule(t); +} +EXPORT_SYMBOL(__tasklet_schedule); + + +void ___tasklet_schedule(struct tasklet_struct *t) { unsigned long flags; @@ -413,10 +514,64 @@ void __tasklet_schedule(struct tasklet_struct *t) raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_restore(flags); } +EXPORT_SYMBOL(___tasklet_schedule); -EXPORT_SYMBOL(__tasklet_schedule); void __tasklet_hi_schedule(struct tasklet_struct *t) +{ +#ifdef CONFIG_LITMUS_NVIDIA + if(is_nvidia_func(t->func)) + { + u32 nvidia_device = get_tasklet_nv_device_num(t); + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", + // __FUNCTION__, nvidia_device,litmus_clock()); + + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidia_device, &flags); + + device_owner = get_nv_max_device_owner(nvidia_device); + + if(device_owner==NULL) + { + t->owner = NULL; + } + else + { + if( is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n", + __FUNCTION__, nvidia_device,litmus_clock()); + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__,device_owner->pid,nvidia_device); + + t->owner = device_owner; + sched_trace_tasklet_release(t->owner); + if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device))) + { + unlock_nv_registry(nvidia_device, &flags); + return; + } + else + { + t->owner = NULL; /* fall through to normal scheduling */ + } + } + else + { + t->owner = NULL; + } + } + unlock_nv_registry(nvidia_device, &flags); + } +#endif + + ___tasklet_hi_schedule(t); +} +EXPORT_SYMBOL(__tasklet_hi_schedule); + +void ___tasklet_hi_schedule(struct tasklet_struct* t) { unsigned long flags; @@ -427,10 +582,64 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) raise_softirq_irqoff(HI_SOFTIRQ); local_irq_restore(flags); } - -EXPORT_SYMBOL(__tasklet_hi_schedule); +EXPORT_SYMBOL(___tasklet_hi_schedule); void __tasklet_hi_schedule_first(struct tasklet_struct *t) +{ + BUG_ON(!irqs_disabled()); +#ifdef CONFIG_LITMUS_NVIDIA + if(is_nvidia_func(t->func)) + { + u32 nvidia_device = get_tasklet_nv_device_num(t); + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", + // __FUNCTION__, nvidia_device,litmus_clock()); + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidia_device, &flags); + + device_owner = get_nv_max_device_owner(nvidia_device); + + if(device_owner==NULL) + { + t->owner = NULL; + } + else + { + if(is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", + __FUNCTION__, nvidia_device,litmus_clock()); + + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__,device_owner->pid,nvidia_device); + + t->owner = device_owner; + sched_trace_tasklet_release(t->owner); + if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device))) + { + unlock_nv_registry(nvidia_device, &flags); + return; + } + else + { + t->owner = NULL; /* fall through to normal scheduling */ + } + } + else + { + t->owner = NULL; + } + } + unlock_nv_registry(nvidia_device, &flags); + } +#endif + + ___tasklet_hi_schedule_first(t); +} +EXPORT_SYMBOL(__tasklet_hi_schedule_first); + +void ___tasklet_hi_schedule_first(struct tasklet_struct* t) { BUG_ON(!irqs_disabled()); @@ -438,8 +647,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) __this_cpu_write(tasklet_hi_vec.head, t); __raise_softirq_irqoff(HI_SOFTIRQ); } - -EXPORT_SYMBOL(__tasklet_hi_schedule_first); +EXPORT_SYMBOL(___tasklet_hi_schedule_first); static void tasklet_action(struct softirq_action *a) { @@ -495,6 +703,7 @@ static void tasklet_hi_action(struct softirq_action *a) if (!atomic_read(&t->count)) { if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); + t->func(t->data); tasklet_unlock(t); continue; @@ -518,8 +727,13 @@ void tasklet_init(struct tasklet_struct *t, t->next = NULL; t->state = 0; atomic_set(&t->count, 0); + t->func = func; t->data = data; + +#ifdef CONFIG_LITMUS_SOFTIRQD + t->owner = NULL; +#endif } EXPORT_SYMBOL(tasklet_init); @@ -534,6 +748,7 @@ void tasklet_kill(struct tasklet_struct *t) yield(); } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } + tasklet_unlock_wait(t); clear_bit(TASKLET_STATE_SCHED, &t->state); } @@ -808,6 +1023,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { if (*i == t) { *i = t->next; + /* If this was the tail element, move the tail ptr */ if (*i == NULL) per_cpu(tasklet_vec, cpu).tail = i; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0400553f0d04..6b59d59ce3cf 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -44,6 +44,13 @@ #include "workqueue_sched.h" +#ifdef CONFIG_LITMUS_NVIDIA +#include +#include +#include +#endif + + enum { /* global_cwq flags */ GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ @@ -1047,9 +1054,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, work_flags |= WORK_STRUCT_DELAYED; worklist = &cwq->delayed_works; } - insert_work(cwq, work, worklist, work_flags); - spin_unlock_irqrestore(&gcwq->lock, flags); } @@ -2687,10 +2692,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync); */ int schedule_work(struct work_struct *work) { - return queue_work(system_wq, work); +#if 0 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) + if(is_nvidia_func(work->func)) + { + u32 nvidiaDevice = get_work_nv_device_num(work); + + //1) Ask Litmus which task owns GPU . (API to be defined.) + unsigned long flags; + struct task_struct* device_owner; + + lock_nv_registry(nvidiaDevice, &flags); + + device_owner = get_nv_max_device_owner(nvidiaDevice); + + //2) If there is an owner, set work->owner to the owner's task struct. + if(device_owner==NULL) + { + work->owner = NULL; + //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice); + } + else + { + if( is_realtime(device_owner)) + { + TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n", + __FUNCTION__, nvidiaDevice,litmus_clock()); + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", + __FUNCTION__, + device_owner->pid, + nvidiaDevice); + + //3) Call litmus_schedule_work() and return (don't execute the rest + // of schedule_schedule()). + work->owner = device_owner; + sched_trace_work_release(work->owner); + if(likely(litmus_schedule_work(work, nvidiaDevice))) + { + unlock_nv_registry(nvidiaDevice, &flags); + return 1; + } + else + { + work->owner = NULL; /* fall through to normal work scheduling */ + } + } + else + { + work->owner = NULL; + } + } + unlock_nv_registry(nvidiaDevice, &flags); + } +#endif +#endif + return(__schedule_work(work)); } EXPORT_SYMBOL(schedule_work); +int __schedule_work(struct work_struct* work) +{ + return queue_work(system_wq, work); +} +EXPORT_SYMBOL(__schedule_work); + /* * schedule_work_on - put work task on a specific cpu * @cpu: cpu to put the work task on diff --git a/litmus/Kconfig b/litmus/Kconfig index 94b48e199577..8c156e4da528 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -60,6 +60,42 @@ config LITMUS_LOCKING Say Yes if you want to include locking protocols such as the FMLP and Baker's SRP. +config LITMUS_AFFINITY_LOCKING + bool "Enable affinity infrastructure in k-exclusion locking protocols." + depends on LITMUS_LOCKING + default n + help + Enable affinity tracking infrastructure in k-exclusion locking protocols. + This only enabled the *infrastructure* not actual affinity algorithms. + + If unsure, say No. + +config LITMUS_NESTED_LOCKING + bool "Support for nested inheritance in locking protocols" + depends on LITMUS_LOCKING + default n + help + Enable nested priority inheritance. + +config LITMUS_DGL_SUPPORT + bool "Support for dynamic group locks" + depends on LITMUS_NESTED_LOCKING + default n + help + Enable dynamic group lock support. + +config LITMUS_MAX_DGL_SIZE + int "Maximum size of a dynamic group lock." + depends on LITMUS_DGL_SUPPORT + range 1 128 + default "10" + help + Dynamic group lock data structures are allocated on the process + stack when a group is requested. We set a maximum size of + locks in a dynamic group lock to avoid dynamic allocation. + + TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE. + endmenu menu "Performance Enhancements" @@ -121,7 +157,7 @@ config SCHED_TASK_TRACE config SCHED_TASK_TRACE_SHIFT int "Buffer size for sched_trace_xxx() events" depends on SCHED_TASK_TRACE - range 8 13 + range 8 15 default 9 help @@ -215,4 +251,114 @@ config PREEMPT_STATE_TRACE endmenu +menu "Interrupt Handling" + +choice + prompt "Scheduling of interrupt bottom-halves in Litmus." + default LITMUS_SOFTIRQD_NONE + depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ + help + Schedule tasklets with known priorities in Litmus. + +config LITMUS_SOFTIRQD_NONE + bool "No tasklet scheduling in Litmus." + help + Don't schedule tasklets in Litmus. Default. + +config LITMUS_SOFTIRQD + bool "Spawn klitirqd interrupt handling threads." + help + Create klitirqd interrupt handling threads. Work must be + specifically dispatched to these workers. (Softirqs for + Litmus tasks are not magically redirected to klitirqd.) + + G-EDF/RM, C-EDF/RM ONLY for now! + + +config LITMUS_PAI_SOFTIRQD + bool "Defer tasklets to context switch points." + help + Only execute scheduled tasklet bottom halves at + scheduling points. Trades context switch overhead + at the cost of non-preemptive durations of bottom half + processing. + + G-EDF/RM, C-EDF/RM ONLY for now! + +endchoice + + +config NR_LITMUS_SOFTIRQD + int "Number of klitirqd." + depends on LITMUS_SOFTIRQD + range 1 4096 + default "1" + help + Should be <= to the number of CPUs in your system. + +config LITMUS_NVIDIA + bool "Litmus handling of NVIDIA interrupts." + default n + help + Direct tasklets from NVIDIA devices to Litmus's klitirqd + or PAI interrupt handling routines. + + If unsure, say No. + +config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT + bool "Enable affinity-aware heuristics to improve GPU assignment." + depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING + default n + help + Enable several heuristics to improve the assignment + of GPUs to real-time tasks to reduce the overheads + of memory migrations. + + If unsure, say No. + +config NV_DEVICE_NUM + int "Number of NVIDIA GPUs." + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD + range 1 4096 + default "1" + help + Should be (<= to the number of CPUs) and + (<= to the number of GPUs) in your system. + +config NV_MAX_SIMULT_USERS + int "Maximum number of threads sharing a GPU simultanously" + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD + range 1 3 + default "2" + help + Should be equal to the #copy_engines + #execution_engines + of the GPUs in your system. + + Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?) + Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx) + Older = 1 (ex. GTX-2xx) + +choice + prompt "CUDA/Driver Version Support" + default CUDA_4_0 + depends on LITMUS_NVIDIA + help + Select the version of CUDA/driver to support. + +config CUDA_4_0 + bool "CUDA 4.0" + depends on LITMUS_NVIDIA + help + Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40) + +config CUDA_3_2 + bool "CUDA 3.2" + depends on LITMUS_NVIDIA + help + Support CUDA 3.2 (dev. driver version: x86_64-260.24) + +endchoice + +endmenu + endmenu diff --git a/litmus/Makefile b/litmus/Makefile index 7338180f196f..080cbf694a41 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -15,9 +15,11 @@ obj-y = sched_plugin.o litmus.o \ locking.o \ srp.o \ bheap.o \ + binheap.o \ ctrldev.o \ sched_gsn_edf.o \ - sched_psn_edf.o + sched_psn_edf.o \ + kfmlp_lock.o obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o @@ -27,3 +29,10 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o + +obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o +obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o +obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o +obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o + +obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o diff --git a/litmus/affinity.c b/litmus/affinity.c index 3fa6dd789400..cd93249b5506 100644 --- a/litmus/affinity.c +++ b/litmus/affinity.c @@ -26,7 +26,7 @@ void init_topology(void) { cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); } printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", - cpu, neigh_info[cpu].size[i], i, + cpu, neigh_info[cpu].size[i], i, *cpumask_bits(neigh_info[cpu].neighbors[i])); } diff --git a/litmus/binheap.c b/litmus/binheap.c new file mode 100644 index 000000000000..8d42403ad52c --- /dev/null +++ b/litmus/binheap.c @@ -0,0 +1,443 @@ +#include + +//extern void dump_node_data(struct binheap_node* parent, struct binheap_node* child); +//extern void dump_node_data2(struct binheap_handle *handle, struct binheap_node* bad_node); + +int binheap_is_in_this_heap(struct binheap_node *node, + struct binheap_handle* heap) +{ + if(!binheap_is_in_heap(node)) { + return 0; + } + + while(node->parent != NULL) { + node = node->parent; + } + + return (node == heap->root); +} + +/* Update the node reference pointers. Same logic as Litmus binomial heap. */ +static void __update_ref(struct binheap_node *parent, + struct binheap_node *child) +{ + *(parent->ref_ptr) = child; + *(child->ref_ptr) = parent; + + swap(parent->ref_ptr, child->ref_ptr); +} + +/* Swaps data between two nodes. */ +static void __binheap_swap(struct binheap_node *parent, + struct binheap_node *child) +{ +// if(parent == BINHEAP_POISON || child == BINHEAP_POISON) { +// dump_node_data(parent, child); +// BUG(); +// } + + swap(parent->data, child->data); + __update_ref(parent, child); +} + + +/* Swaps memory and data between two nodes. Actual nodes swap instead of + * just data. Needed when we delete nodes from the heap. + */ +static void __binheap_swap_safe(struct binheap_handle *handle, + struct binheap_node *a, + struct binheap_node *b) +{ + swap(a->data, b->data); + __update_ref(a, b); + + if((a->parent != NULL) && (a->parent == b->parent)) { + /* special case: shared parent */ + swap(a->parent->left, a->parent->right); + } + else { + /* Update pointers to swap parents. */ + + if(a->parent) { + if(a == a->parent->left) { + a->parent->left = b; + } + else { + a->parent->right = b; + } + } + + if(b->parent) { + if(b == b->parent->left) { + b->parent->left = a; + } + else { + b->parent->right = a; + } + } + + swap(a->parent, b->parent); + } + + /* swap children */ + + if(a->left) { + a->left->parent = b; + + if(a->right) { + a->right->parent = b; + } + } + + if(b->left) { + b->left->parent = a; + + if(b->right) { + b->right->parent = a; + } + } + + swap(a->left, b->left); + swap(a->right, b->right); + + + /* update next/last/root pointers */ + + if(a == handle->next) { + handle->next = b; + } + else if(b == handle->next) { + handle->next = a; + } + + if(a == handle->last) { + handle->last = b; + } + else if(b == handle->last) { + handle->last = a; + } + + if(a == handle->root) { + handle->root = b; + } + else if(b == handle->root) { + handle->root = a; + } +} + + +/** + * Update the pointer to the last node in the complete binary tree. + * Called internally after the root node has been deleted. + */ +static void __binheap_update_last(struct binheap_handle *handle) +{ + struct binheap_node *temp = handle->last; + + /* find a "bend" in the tree. */ + while(temp->parent && (temp == temp->parent->left)) { + temp = temp->parent; + } + + /* step over to sibling if we're not at root */ + if(temp->parent != NULL) { + temp = temp->parent->left; + } + + /* now travel right as far as possible. */ + while(temp->right != NULL) { + temp = temp->right; + } + + /* take one step to the left if we're not at the bottom-most level. */ + if(temp->left != NULL) { + temp = temp->left; + } + + //BUG_ON(!(temp->left == NULL && temp->right == NULL)); + + handle->last = temp; +} + +/** + * Update the pointer to the node that will take the next inserted node. + * Called internally after a node has been inserted. + */ +static void __binheap_update_next(struct binheap_handle *handle) +{ + struct binheap_node *temp = handle->next; + + /* find a "bend" in the tree. */ + while(temp->parent && (temp == temp->parent->right)) { + temp = temp->parent; + } + + /* step over to sibling if we're not at root */ + if(temp->parent != NULL) { + temp = temp->parent->right; + } + + /* now travel left as far as possible. */ + while(temp->left != NULL) { + temp = temp->left; + } + + handle->next = temp; +} + + + +/* bubble node up towards root */ +static void __binheap_bubble_up( + struct binheap_handle *handle, + struct binheap_node *node) +{ + //BUG_ON(!binheap_is_in_heap(node)); +// if(!binheap_is_in_heap(node)) +// { +// dump_node_data2(handle, node); +// BUG(); +// } + + while((node->parent != NULL) && + ((node->data == BINHEAP_POISON) /* let BINHEAP_POISON data bubble to the top */ || + handle->compare(node, node->parent))) { + __binheap_swap(node->parent, node); + node = node->parent; + +// if(!binheap_is_in_heap(node)) +// { +// dump_node_data2(handle, node); +// BUG(); +// } + } +} + + +/* bubble node down, swapping with min-child */ +static void __binheap_bubble_down(struct binheap_handle *handle) +{ + struct binheap_node *node = handle->root; + + while(node->left != NULL) { + if(node->right && handle->compare(node->right, node->left)) { + if(handle->compare(node->right, node)) { + __binheap_swap(node, node->right); + node = node->right; + } + else { + break; + } + } + else { + if(handle->compare(node->left, node)) { + __binheap_swap(node, node->left); + node = node->left; + } + else { + break; + } + } + } +} + + + +void __binheap_add(struct binheap_node *new_node, + struct binheap_handle *handle, + void *data) +{ +// if(binheap_is_in_heap(new_node)) +// { +// dump_node_data2(handle, new_node); +// BUG(); +// } + + new_node->data = data; + new_node->ref = new_node; + new_node->ref_ptr = &(new_node->ref); + + if(!binheap_empty(handle)) { + /* insert left side first */ + if(handle->next->left == NULL) { + handle->next->left = new_node; + new_node->parent = handle->next; + new_node->left = NULL; + new_node->right = NULL; + + handle->last = new_node; + + __binheap_bubble_up(handle, new_node); + } + else { + /* left occupied. insert right. */ + handle->next->right = new_node; + new_node->parent = handle->next; + new_node->left = NULL; + new_node->right = NULL; + + handle->last = new_node; + + __binheap_update_next(handle); + __binheap_bubble_up(handle, new_node); + } + } + else { + /* first node in heap */ + + new_node->parent = NULL; + new_node->left = NULL; + new_node->right = NULL; + + handle->root = new_node; + handle->next = new_node; + handle->last = new_node; + } +} + + + +/** + * Removes the root node from the heap. The node is removed after coalescing + * the binheap_node with its original data pointer at the root of the tree. + * + * The 'last' node in the tree is then swapped up to the root and bubbled + * down. + */ +void __binheap_delete_root(struct binheap_handle *handle, + struct binheap_node *container) +{ + struct binheap_node *root = handle->root; + +// if(!binheap_is_in_heap(container)) +// { +// dump_node_data2(handle, container); +// BUG(); +// } + + if(root != container) { + /* coalesce */ + __binheap_swap_safe(handle, root, container); + root = container; + } + + if(handle->last != root) { + /* swap 'last' node up to root and bubble it down. */ + + struct binheap_node *to_move = handle->last; + + if(to_move->parent != root) { + handle->next = to_move->parent; + + if(handle->next->right == to_move) { + /* disconnect from parent */ + to_move->parent->right = NULL; + handle->last = handle->next->left; + } + else { + /* find new 'last' before we disconnect */ + __binheap_update_last(handle); + + /* disconnect from parent */ + to_move->parent->left = NULL; + } + } + else { + /* 'last' is direct child of root */ + + handle->next = to_move; + + if(to_move == to_move->parent->right) { + to_move->parent->right = NULL; + handle->last = to_move->parent->left; + } + else { + to_move->parent->left = NULL; + handle->last = to_move; + } + } + to_move->parent = NULL; + + /* reconnect as root. We can't just swap data ptrs since root node + * may be freed after this function returns. + */ + to_move->left = root->left; + to_move->right = root->right; + if(to_move->left != NULL) { + to_move->left->parent = to_move; + } + if(to_move->right != NULL) { + to_move->right->parent = to_move; + } + + handle->root = to_move; + + /* bubble down */ + __binheap_bubble_down(handle); + } + else { + /* removing last node in tree */ + handle->root = NULL; + handle->next = NULL; + handle->last = NULL; + } + + /* mark as removed */ + container->parent = BINHEAP_POISON; +} + + +/** + * Delete an arbitrary node. Bubble node to delete up to the root, + * and then delete to root. + */ +void __binheap_delete(struct binheap_node *node_to_delete, + struct binheap_handle *handle) +{ + struct binheap_node *target = node_to_delete->ref; + void *temp_data = target->data; + +// if(!binheap_is_in_heap(node_to_delete)) +// { +// dump_node_data2(handle, node_to_delete); +// BUG(); +// } +// +// if(!binheap_is_in_heap(target)) +// { +// dump_node_data2(handle, target); +// BUG(); +// } + + /* temporarily set data to null to allow node to bubble up to the top. */ + target->data = BINHEAP_POISON; + + __binheap_bubble_up(handle, target); + __binheap_delete_root(handle, node_to_delete); + + node_to_delete->data = temp_data; /* restore node data pointer */ + //node_to_delete->parent = BINHEAP_POISON; /* poison the node */ +} + +/** + * Bubble up a node whose pointer has decreased in value. + */ +void __binheap_decrease(struct binheap_node *orig_node, + struct binheap_handle *handle) +{ + struct binheap_node *target = orig_node->ref; + +// if(!binheap_is_in_heap(orig_node)) +// { +// dump_node_data2(handle, orig_node); +// BUG(); +// } +// +// if(!binheap_is_in_heap(target)) +// { +// dump_node_data2(handle, target); +// BUG(); +// } +// + __binheap_bubble_up(handle, target); +} diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 9b44dc2d8d1e..b346bdd65b3b 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -12,40 +12,61 @@ #include #include +#ifdef CONFIG_LITMUS_NESTED_LOCKING +#include +#endif + #include + + /* edf_higher_prio - returns true if first has a higher EDF priority * than second. Deadline ties are broken by PID. * * both first and second may be NULL */ -int edf_higher_prio(struct task_struct* first, - struct task_struct* second) +#ifdef CONFIG_LITMUS_NESTED_LOCKING +int __edf_higher_prio( + struct task_struct* first, comparison_mode_t first_mode, + struct task_struct* second, comparison_mode_t second_mode) +#else +int edf_higher_prio(struct task_struct* first, struct task_struct* second) +#endif { struct task_struct *first_task = first; struct task_struct *second_task = second; /* There is no point in comparing a task to itself. */ if (first && first == second) { - TRACE_TASK(first, - "WARNING: pointless edf priority comparison.\n"); + TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid); + WARN_ON(1); return 0; } /* check for NULL tasks */ - if (!first || !second) + if (!first || !second) { return first && !second; + } #ifdef CONFIG_LITMUS_LOCKING - - /* Check for inherited priorities. Change task + /* Check for EFFECTIVE priorities. Change task * used for comparison in such a case. */ - if (unlikely(first->rt_param.inh_task)) + if (unlikely(first->rt_param.inh_task) +#ifdef CONFIG_LITMUS_NESTED_LOCKING + && (first_mode == EFFECTIVE) +#endif + ) { first_task = first->rt_param.inh_task; - if (unlikely(second->rt_param.inh_task)) + } + if (unlikely(second->rt_param.inh_task) +#ifdef CONFIG_LITMUS_NESTED_LOCKING + && (second_mode == EFFECTIVE) +#endif + ) { second_task = second->rt_param.inh_task; + } /* Check for priority boosting. Tie-break by start of boosting. */ @@ -53,37 +74,109 @@ int edf_higher_prio(struct task_struct* first, /* first_task is boosted, how about second_task? */ if (!is_priority_boosted(second_task) || lt_before(get_boost_start(first_task), - get_boost_start(second_task))) + get_boost_start(second_task))) { return 1; - else + } + else { return 0; - } else if (unlikely(is_priority_boosted(second_task))) + } + } + else if (unlikely(is_priority_boosted(second_task))) { /* second_task is boosted, first is not*/ return 0; + } #endif +// // rate-monotonic for testing +// if (!is_realtime(second_task)) { +// return true; +// } +// +// if (shorter_period(first_task, second_task)) { +// return true; +// } +// +// if (get_period(first_task) == get_period(second_task)) { +// if (first_task->pid < second_task->pid) { +// return true; +// } +// else if (first_task->pid == second_task->pid) { +// return !second->rt_param.inh_task; +// } +// } + + if (!is_realtime(second_task)) { + return true; + } + + if (earlier_deadline(first_task, second_task)) { + return true; + } + if (get_deadline(first_task) == get_deadline(second_task)) { + + if (shorter_period(first_task, second_task)) { + return true; + } + if (get_rt_period(first_task) == get_rt_period(second_task)) { + if (first_task->pid < second_task->pid) { + return true; + } + if (first_task->pid == second_task->pid) { +#ifdef CONFIG_LITMUS_SOFTIRQD + if (first_task->rt_param.is_proxy_thread < + second_task->rt_param.is_proxy_thread) { + return true; + } + if(first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) { + return !second->rt_param.inh_task; + } +#else + return !second->rt_param.inh_task; +#endif + } + + } + } + + return false; +} + + +#ifdef CONFIG_LITMUS_NESTED_LOCKING +int edf_higher_prio(struct task_struct* first, struct task_struct* second) +{ + return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE); +} + +int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b) +{ + struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); + struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); - return !is_realtime(second_task) || + return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE); +} - /* is the deadline of the first task earlier? - * Then it has higher priority. - */ - earlier_deadline(first_task, second_task) || +int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b) +{ + return edf_max_heap_order(b, a); // swap comparison +} - /* Do we have a deadline tie? - * Then break by PID. - */ - (get_deadline(first_task) == get_deadline(second_task) && - (first_task->pid < second_task->pid || +int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) +{ + struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); + struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); - /* If the PIDs are the same then the task with the inherited - * priority wins. - */ - (first_task->pid == second_task->pid && - !second->rt_param.inh_task))); + return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE); } +int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) +{ + return edf_max_heap_base_priority_order(b, a); // swap comparison +} +#endif + + int edf_ready_order(struct bheap_node* a, struct bheap_node* b) { return edf_higher_prio(bheap2task(a), bheap2task(b)); diff --git a/litmus/fdso.c b/litmus/fdso.c index aa7b384264e3..18fc61b6414a 100644 --- a/litmus/fdso.c +++ b/litmus/fdso.c @@ -20,9 +20,22 @@ extern struct fdso_ops generic_lock_ops; +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +extern struct fdso_ops generic_affinity_ops; +#endif + static const struct fdso_ops* fdso_ops[] = { &generic_lock_ops, /* FMLP_SEM */ &generic_lock_ops, /* SRP_SEM */ + &generic_lock_ops, /* RSM_MUTEX */ + &generic_lock_ops, /* IKGLP_SEM */ + &generic_lock_ops, /* KFMLP_SEM */ +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */ + &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */ + &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */ + &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */ +#endif }; static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c new file mode 100644 index 000000000000..9762be1a085e --- /dev/null +++ b/litmus/gpu_affinity.c @@ -0,0 +1,113 @@ + +#ifdef CONFIG_LITMUS_NVIDIA + +#include +#include +#include + +#include + +#define OBSERVATION_CAP 2*1e9 + +static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) +{ + fp_t relative_err; + fp_t err, new; + fp_t actual = _integer_to_fp(observed); + + err = _sub(actual, fb->est); + new = _add(_mul(a, err), _mul(b, fb->accum_err)); + + relative_err = _div(err, actual); + + fb->est = new; + fb->accum_err = _add(fb->accum_err, err); + + return relative_err; +} + +void update_gpu_estimate(struct task_struct *t, lt_t observed) +{ + feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); + + BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); + + if(unlikely(fb->est.val == 0)) { + // kludge-- cap observed values to prevent whacky estimations. + // whacky stuff happens during the first few jobs. + if(unlikely(observed > OBSERVATION_CAP)) { + TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n", + observed, OBSERVATION_CAP); + observed = OBSERVATION_CAP; + } + + // take the first observation as our estimate + // (initial value of 0 was bogus anyhow) + fb->est = _integer_to_fp(observed); + fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. + } + else { + fp_t rel_err = update_estimate(fb, + tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration], + tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration], + observed); + + if(unlikely(_fp_to_integer(fb->est) <= 0)) { + TRACE_TASK(t, "Invalid estimate. Patching.\n"); + fb->est = _integer_to_fp(observed); + fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. + } + else { +// struct migration_info mig_info; + + sched_trace_prediction_err(t, + &(tsk_rt(t)->gpu_migration), + &rel_err); + +// mig_info.observed = observed; +// mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); +// mig_info.distance = tsk_rt(t)->gpu_migration; +// +// sched_trace_migration(t, &mig_info); + } + } + + TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", + tsk_rt(t)->gpu_migration, + observed, + _fp_to_integer(fb->est), + _point(fb->est)); +} + +gpu_migration_dist_t gpu_migration_distance(int a, int b) +{ + // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs + int i; + int dist; + + if(likely(a >= 0 && b >= 0)) { + for(i = 0; i <= MIG_FAR; ++i) { + if(a>>i == b>>i) { + dist = i; + goto out; + } + } + dist = MIG_NONE; // hopefully never reached. + TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b); + } + else { + dist = MIG_NONE; + } + +out: + TRACE_CUR("Distance %d -> %d is %d\n", + a, b, dist); + + return dist; +} + + + + +#endif + diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c new file mode 100644 index 000000000000..83b708ab85cb --- /dev/null +++ b/litmus/ikglp_lock.c @@ -0,0 +1,2838 @@ +#include +#include + +#include +#include +#include + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#include +#endif + +#include + +// big signed value. +#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF + +int ikglp_max_heap_base_priority_order(struct binheap_node *a, + struct binheap_node *b) +{ + ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); + ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); + + BUG_ON(!d_a); + BUG_ON(!d_b); + + return litmus->__compare(d_a->task, BASE, d_b->task, BASE); +} + +int ikglp_min_heap_base_priority_order(struct binheap_node *a, + struct binheap_node *b) +{ + ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); + ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); + + return litmus->__compare(d_b->task, BASE, d_a->task, BASE); +} + +int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a, + struct binheap_node *b) +{ + ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node); + ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node); + + return litmus->__compare(d_a->task, BASE, d_b->task, BASE); +} + + +int ikglp_min_heap_donee_order(struct binheap_node *a, + struct binheap_node *b) +{ + struct task_struct *prio_a, *prio_b; + + ikglp_donee_heap_node_t *d_a = + binheap_entry(a, ikglp_donee_heap_node_t, node); + ikglp_donee_heap_node_t *d_b = + binheap_entry(b, ikglp_donee_heap_node_t, node); + + if(!d_a->donor_info) { + prio_a = d_a->task; + } + else { + prio_a = d_a->donor_info->task; + BUG_ON(d_a->task != d_a->donor_info->donee_info->task); + } + + if(!d_b->donor_info) { + prio_b = d_b->task; + } + else { + prio_b = d_b->donor_info->task; + BUG_ON(d_b->task != d_b->donor_info->donee_info->task); + } + + // note reversed order + return litmus->__compare(prio_b, BASE, prio_a, BASE); +} + + + +static inline int ikglp_get_idx(struct ikglp_semaphore *sem, + struct fifo_queue *queue) +{ + return (queue - &sem->fifo_queues[0]); +} + +static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem, + struct task_struct *holder) +{ + int i; + for(i = 0; i < sem->nr_replicas; ++i) + if(sem->fifo_queues[i].owner == holder) + return(&sem->fifo_queues[i]); + return(NULL); +} + + + +static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, + wait_queue_t, task_list)->private; + + /* Compare task prios, find high prio task. */ + if(queued != skip && litmus->compare(queued, found)) + found = queued; + } + return found; +} + +static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem, + struct fifo_queue *search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct fifo_queue* step = search_start; + struct fifo_queue* shortest = sem->shortest_fifo_queue; + + do { + step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ? + step+1 : &sem->fifo_queues[0]; + + if(step->count < shortest->count) { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + + }while(step != search_start); + + return(shortest); +} + +static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem) +{ + return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task; +} + + + +#if 0 +static void print_global_list(struct binheap_node* n, int depth) +{ + ikglp_heap_node_t *global_heap_node; + char padding[81] = " "; + + if(n == NULL) { + TRACE_CUR("+-> %p\n", NULL); + return; + } + + global_heap_node = binheap_entry(n, ikglp_heap_node_t, node); + + if(depth*2 <= 80) + padding[depth*2] = '\0'; + + TRACE_CUR("%s+-> %s/%d\n", + padding, + global_heap_node->task->comm, + global_heap_node->task->pid); + + if(n->left) print_global_list(n->left, depth+1); + if(n->right) print_global_list(n->right, depth+1); +} + +static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth) +{ + ikglp_donee_heap_node_t *donee_node; + char padding[81] = " "; + struct task_struct* donor = NULL; + + if(n == NULL) { + TRACE_CUR("+-> %p\n", NULL); + return; + } + + donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node); + + if(depth*2 <= 80) + padding[depth*2] = '\0'; + + if(donee_node->donor_info) { + donor = donee_node->donor_info->task; + } + + TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n", + padding, + donee_node->task->comm, + donee_node->task->pid, + (donor) ? donor->comm : "nil", + (donor) ? donor->pid : -1, + ikglp_get_idx(sem, donee_node->fq)); + + if(n->left) print_donees(sem, n->left, depth+1); + if(n->right) print_donees(sem, n->right, depth+1); +} + +static void print_donors(struct binheap_node *n, int depth) +{ + ikglp_wait_state_t *donor_node; + char padding[81] = " "; + + if(n == NULL) { + TRACE_CUR("+-> %p\n", NULL); + return; + } + + donor_node = binheap_entry(n, ikglp_wait_state_t, node); + + if(depth*2 <= 80) + padding[depth*2] = '\0'; + + + TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n", + padding, + donor_node->task->comm, + donor_node->task->pid, + donor_node->donee_info->task->comm, + donor_node->donee_info->task->pid); + + if(n->left) print_donors(n->left, depth+1); + if(n->right) print_donors(n->right, depth+1); +} +#endif + +static void ikglp_add_global_list(struct ikglp_semaphore *sem, + struct task_struct *t, + ikglp_heap_node_t *node) +{ + + + node->task = t; + INIT_BINHEAP_NODE(&node->node); + + if(sem->top_m_size < sem->m) { + TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", + t->comm, t->pid); +// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); +// print_global_list(sem->top_m.root, 1); + + binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); + ++(sem->top_m_size); + +// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); +// print_global_list(sem->top_m.root, 1); + } + else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { + ikglp_heap_node_t *evicted = + binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node); + + TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n", + t->comm, t->pid, + evicted->task->comm, evicted->task->pid); + +// TRACE_CUR("Not-Top-M Before:\n"); +// print_global_list(sem->not_top_m.root, 1); +// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); +// print_global_list(sem->top_m.root, 1); + + + binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); + INIT_BINHEAP_NODE(&evicted->node); + binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); + + binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); + +// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); +// print_global_list(sem->top_m.root, 1); +// TRACE_CUR("Not-Top-M After:\n"); +// print_global_list(sem->not_top_m.root, 1); + } + else { + TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", + t->comm, t->pid); +// TRACE_CUR("Not-Top-M Before:\n"); +// print_global_list(sem->not_top_m.root, 1); + + binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); + +// TRACE_CUR("Not-Top-M After:\n"); +// print_global_list(sem->not_top_m.root, 1); + } +} + + +static void ikglp_del_global_list(struct ikglp_semaphore *sem, + struct task_struct *t, + ikglp_heap_node_t *node) +{ + BUG_ON(!binheap_is_in_heap(&node->node)); + + TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid); + + if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { + TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); + +// TRACE_CUR("Not-Top-M Before:\n"); +// print_global_list(sem->not_top_m.root, 1); +// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); +// print_global_list(sem->top_m.root, 1); + + + binheap_delete(&node->node, &sem->top_m); + + if(!binheap_empty(&sem->not_top_m)) { + ikglp_heap_node_t *promoted = + binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node); + + TRACE_CUR("Promoting %s/%d to top-m\n", + promoted->task->comm, promoted->task->pid); + + binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node); + INIT_BINHEAP_NODE(&promoted->node); + + binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node); + } + else { + TRACE_CUR("No one to promote to top-m.\n"); + --(sem->top_m_size); + } + +// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); +// print_global_list(sem->top_m.root, 1); +// TRACE_CUR("Not-Top-M After:\n"); +// print_global_list(sem->not_top_m.root, 1); + } + else { + TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); +// TRACE_CUR("Not-Top-M Before:\n"); +// print_global_list(sem->not_top_m.root, 1); + + binheap_delete(&node->node, &sem->not_top_m); + +// TRACE_CUR("Not-Top-M After:\n"); +// print_global_list(sem->not_top_m.root, 1); + } +} + + +static void ikglp_add_donees(struct ikglp_semaphore *sem, + struct fifo_queue *fq, + struct task_struct *t, + ikglp_donee_heap_node_t* node) +{ +// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid); +// TRACE_CUR("donees Before:\n"); +// print_donees(sem, sem->donees.root, 1); + + node->task = t; + node->donor_info = NULL; + node->fq = fq; + INIT_BINHEAP_NODE(&node->node); + + binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node); + +// TRACE_CUR("donees After:\n"); +// print_donees(sem, sem->donees.root, 1); +} + + +static void ikglp_refresh_owners_prio_increase(struct task_struct *t, + struct fifo_queue *fq, + struct ikglp_semaphore *sem, + unsigned long flags) +{ + // priority of 't' has increased (note: 't' might already be hp_waiter). + if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) { + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + struct task_struct *new_prio = NULL; + struct task_struct *owner = fq->owner; + + if(fq->hp_waiter) + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", + fq->hp_waiter->comm, fq->hp_waiter->pid); + else + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); + + if(owner) + { + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); + +// TRACE_TASK(owner, "Heap Before:\n"); +// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0); + + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + fq->hp_waiter = t; + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); + + binheap_decrease(&fq->nest.hp_binheap_node, + &tsk_rt(owner)->hp_blocked_tasks); + +// TRACE_TASK(owner, "Heap After:\n"); +// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0); + + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + if(new_max_eff_prio != old_max_eff_prio) { + TRACE_TASK(t, "is new hp_waiter.\n"); + + if ((effective_priority(owner) == old_max_eff_prio) || + (litmus->__compare(new_max_eff_prio, BASE, + owner, EFFECTIVE))){ + new_prio = new_max_eff_prio; + } + } + else { + TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); + } + + if(new_prio) { + // set new inheritance and propagate + TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n", + owner->comm, owner->pid, + new_prio->comm, new_prio->pid); + litmus->nested_increase_prio(owner, new_prio, &sem->lock, + flags); // unlocks lock. + } + else { + TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n", + new_max_eff_prio->comm, new_max_eff_prio->pid); + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&sem->lock, flags); + } + } + else { + fq->hp_waiter = t; + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); + + TRACE_TASK(t, "no owner.\n"); + unlock_fine_irqrestore(&sem->lock, flags); + } + } + else { + TRACE_TASK(t, "hp_waiter is unaffected.\n"); + unlock_fine_irqrestore(&sem->lock, flags); + } +} + +// hp_waiter has decreased +static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq, + struct ikglp_semaphore *sem, + unsigned long flags) +{ + struct task_struct *owner = fq->owner; + + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + + if(!owner) { + TRACE_CUR("No owner. Returning.\n"); + unlock_fine_irqrestore(&sem->lock, flags); + return; + } + + TRACE_CUR("ikglp_refresh_owners_prio_decrease\n"); + + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks); + fq->nest.hp_waiter_eff_prio = fq->hp_waiter; + binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + if((old_max_eff_prio != new_max_eff_prio) && + (effective_priority(owner) == old_max_eff_prio)) + { + // Need to set new effective_priority for owner + struct task_struct *decreased_prio; + + TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", + ikglp_get_idx(sem, fq)); + + if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { + TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n", + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", + (new_max_eff_prio) ? new_max_eff_prio->pid : -1, + owner->comm, + owner->pid, + ikglp_get_idx(sem, fq)); + + decreased_prio = new_max_eff_prio; + } + else { + TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n", + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", + (new_max_eff_prio) ? new_max_eff_prio->pid : -1, + owner->comm, + owner->pid, + ikglp_get_idx(sem, fq)); + + decreased_prio = NULL; + } + + // beware: recursion + litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock + } + else { + TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&sem->lock, flags); + } +} + + +static void ikglp_remove_donation_from_owner(struct binheap_node *n, + struct fifo_queue *fq, + struct ikglp_semaphore *sem, + unsigned long flags) +{ + struct task_struct *owner = fq->owner; + + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + + BUG_ON(!owner); + + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks); + + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + if((old_max_eff_prio != new_max_eff_prio) && + (effective_priority(owner) == old_max_eff_prio)) + { + // Need to set new effective_priority for owner + struct task_struct *decreased_prio; + + TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", + ikglp_get_idx(sem, fq)); + + if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { + TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n", + ikglp_get_idx(sem, fq)); + decreased_prio = new_max_eff_prio; + } + else { + TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n", + ikglp_get_idx(sem, fq)); + decreased_prio = NULL; + } + + // beware: recursion + litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock + } + else { + TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&sem->lock, flags); + } +} + +static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t, + struct binheap_node *n) +{ + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); + + binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks); + + new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); + + if((old_max_eff_prio != new_max_eff_prio) && + (effective_priority(t) == old_max_eff_prio)) + { + // Need to set new effective_priority for owner + struct task_struct *decreased_prio; + + if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) { + decreased_prio = new_max_eff_prio; + } + else { + decreased_prio = NULL; + } + + tsk_rt(t)->inh_task = decreased_prio; + } + + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); +} + +static void ikglp_get_immediate(struct task_struct* t, + struct fifo_queue *fq, + struct ikglp_semaphore *sem, + unsigned long flags) +{ + // resource available now + TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq)); + + fq->owner = t; + + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); + binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); + + ++(fq->count); + + ikglp_add_global_list(sem, t, &fq->global_heap_node); + ikglp_add_donees(sem, fq, t, &fq->donee_heap_node); + + sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); + sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t); + } +#endif + + unlock_fine_irqrestore(&sem->lock, flags); +} + + + + + +static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem, + struct fifo_queue* fq, + struct task_struct* t, + wait_queue_t *wait, + ikglp_heap_node_t *global_heap_node, + ikglp_donee_heap_node_t *donee_heap_node) +{ + /* resource is not free => must suspend and wait */ + TRACE_TASK(t, "Enqueuing on fq %d.\n", + ikglp_get_idx(sem, fq)); + + init_waitqueue_entry(wait, t); + + __add_wait_queue_tail_exclusive(&fq->wait, wait); + + ++(fq->count); + ++(sem->nr_in_fifos); + + // update global list. + if(likely(global_heap_node)) { + if(binheap_is_in_heap(&global_heap_node->node)) { + WARN_ON(1); + ikglp_del_global_list(sem, t, global_heap_node); + } + ikglp_add_global_list(sem, t, global_heap_node); + } + // update donor eligiblity list. + if(likely(donee_heap_node)) { +// if(binheap_is_in_heap(&donee_heap_node->node)) { +// WARN_ON(1); +// } + ikglp_add_donees(sem, fq, t, donee_heap_node); + } + + if(sem->shortest_fifo_queue == fq) { + sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq); + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); + } +#endif + + TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq)); +} + + +static void ikglp_enqueue_on_fq( + struct ikglp_semaphore *sem, + struct fifo_queue *fq, + ikglp_wait_state_t *wait, + unsigned long flags) +{ + /* resource is not free => must suspend and wait */ + TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n", + ikglp_get_idx(sem, fq)); + + INIT_BINHEAP_NODE(&wait->global_heap_node.node); + INIT_BINHEAP_NODE(&wait->donee_heap_node.node); + + __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node, + &wait->global_heap_node, &wait->donee_heap_node); + + ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock +} + + +static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, + ikglp_wait_state_t *wait) +{ + TRACE_TASK(wait->task, "goes to PQ.\n"); + + wait->pq_node.task = wait->task; // copy over task (little redundant...) + + binheap_add(&wait->pq_node.node, &sem->priority_queue, + ikglp_heap_node_t, node); +} + +static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, + ikglp_wait_state_t *wait) +{ + INIT_BINHEAP_NODE(&wait->global_heap_node.node); + INIT_BINHEAP_NODE(&wait->donee_heap_node.node); + INIT_BINHEAP_NODE(&wait->pq_node.node); + + __ikglp_enqueue_on_pq(sem, wait); +} + +static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem, + ikglp_wait_state_t* wait, + unsigned long flags) +{ + struct task_struct *t = wait->task; + ikglp_donee_heap_node_t *donee_node = NULL; + struct task_struct *donee; + + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + struct task_struct *new_prio = NULL; + + INIT_BINHEAP_NODE(&wait->global_heap_node.node); + INIT_BINHEAP_NODE(&wait->donee_heap_node.node); + INIT_BINHEAP_NODE(&wait->pq_node.node); + INIT_BINHEAP_NODE(&wait->node); + +// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid); +// TRACE_CUR("donors Before:\n"); +// print_donors(sem->donors.root, 1); + + // Add donor to the global list. + ikglp_add_global_list(sem, t, &wait->global_heap_node); + + // Select a donee +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + donee_node = (sem->aff_obs) ? + sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) : + binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); +#else + donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); +#endif + + donee = donee_node->task; + + TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid); + + TRACE_CUR("Temporarily removing %s/%d to donee list.\n", + donee->comm, donee->pid); +// TRACE_CUR("donees Before:\n"); +// print_donees(sem, sem->donees.root, 1); + + //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly + binheap_delete(&donee_node->node, &sem->donees); + +// TRACE_CUR("donees After:\n"); +// print_donees(sem, sem->donees.root, 1); + + + wait->donee_info = donee_node; + + // Add t to donor heap. + binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node); + + // Now adjust the donee's priority. + + // Lock the donee's inheritance heap. + raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); + + if(donee_node->donor_info) { + // Steal donation relation. Evict old donor to PQ. + + // Remove old donor from donor heap + ikglp_wait_state_t *old_wait = donee_node->donor_info; + struct task_struct *old_donor = old_wait->task; + + TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n", + donee->comm, donee->pid, old_donor->comm, old_donor->pid); + + binheap_delete(&old_wait->node, &sem->donors); + + // Remove donation from donee's inheritance heap. + binheap_delete(&old_wait->prio_donation.hp_binheap_node, + &tsk_rt(donee)->hp_blocked_tasks); + // WARNING: have not updated inh_prio! + + // Add old donor to PQ. + __ikglp_enqueue_on_pq(sem, old_wait); + + // Remove old donor from the global heap. + ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node); + } + + // Add back donee's node to the donees heap with increased prio + donee_node->donor_info = wait; + INIT_BINHEAP_NODE(&donee_node->node); + + + TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid); +// TRACE_CUR("donees Before:\n"); +// print_donees(sem, sem->donees.root, 1); + + binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node); + +// TRACE_CUR("donees After:\n"); +// print_donees(sem, sem->donees.root, 1); + + // Add an inheritance/donation to the donee's inheritance heap. + wait->prio_donation.lock = (struct litmus_lock*)sem; + wait->prio_donation.hp_waiter_eff_prio = t; + wait->prio_donation.hp_waiter_ptr = NULL; + INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node); + + binheap_add(&wait->prio_donation.hp_binheap_node, + &tsk_rt(donee)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + + new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); + + if(new_max_eff_prio != old_max_eff_prio) { + if ((effective_priority(donee) == old_max_eff_prio) || + (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){ + TRACE_TASK(t, "Donation increases %s/%d's effective priority\n", + donee->comm, donee->pid); + new_prio = new_max_eff_prio; + } +// else { +// // should be bug. donor would not be in top-m. +// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid); +// WARN_ON(1); +// } +// } +// else { +// // should be bug. donor would not be in top-m. +// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid); +// WARN_ON(1); + } + + if(new_prio) { + struct fifo_queue *donee_fq = donee_node->fq; + + if(donee != donee_fq->owner) { + TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n", + donee->comm, donee->pid, + donee_fq->owner->comm, donee_fq->owner->pid); + + raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); + ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock + } + else { + TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n", + donee->comm, donee->pid); + litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock + } + } + else { + TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n", + new_max_eff_prio->comm, new_max_eff_prio->pid); + raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&sem->lock, flags); + } + + +// TRACE_CUR("donors After:\n"); +// print_donors(sem->donors.root, 1); +} + +int ikglp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct ikglp_semaphore *sem = ikglp_from_lock(l); + unsigned long flags = 0, real_flags; + struct fifo_queue *fq = NULL; + int replica = -EINVAL; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t *dgl_lock; +#endif + + ikglp_wait_state_t wait; + + if (!is_realtime(t)) + return -EPERM; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + dgl_lock = litmus->get_dgl_spinlock(t); +#endif + + raw_spin_lock_irqsave(&sem->real_lock, real_flags); + + lock_global_irqsave(dgl_lock, flags); + lock_fine_irqsave(&sem->lock, flags); + + if(sem->nr_in_fifos < sem->m) { + // enqueue somwhere +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + fq = (sem->aff_obs) ? + sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : + sem->shortest_fifo_queue; +#else + fq = sem->shortest_fifo_queue; +#endif + if(fq->count == 0) { + // take available resource + replica = ikglp_get_idx(sem, fq); + + ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock + + unlock_global_irqrestore(dgl_lock, flags); + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); + goto acquired; + } + else { + wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! + + tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked + mb(); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock + } + } + else { + // donor! + wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! + + tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked + mb(); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { + // enqueue on PQ + ikglp_enqueue_on_pq(sem, &wait); + unlock_fine_irqrestore(&sem->lock, flags); + } + else { + // enqueue as donor + ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock + } + } + + unlock_global_irqrestore(dgl_lock, flags); + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); + + TS_LOCK_SUSPEND; + + schedule(); + + TS_LOCK_RESUME; + + fq = ikglp_get_queue(sem, t); + BUG_ON(!fq); + + replica = ikglp_get_idx(sem, fq); + +acquired: + TRACE_CUR("Acquired lock %d, queue %d\n", + l->ident, replica); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq); + } +#endif + + return replica; +} + +//int ikglp_lock(struct litmus_lock* l) +//{ +// struct task_struct* t = current; +// struct ikglp_semaphore *sem = ikglp_from_lock(l); +// unsigned long flags = 0, real_flags; +// struct fifo_queue *fq = NULL; +// int replica = -EINVAL; +// +//#ifdef CONFIG_LITMUS_DGL_SUPPORT +// raw_spinlock_t *dgl_lock; +//#endif +// +// ikglp_wait_state_t wait; +// +// if (!is_realtime(t)) +// return -EPERM; +// +//#ifdef CONFIG_LITMUS_DGL_SUPPORT +// dgl_lock = litmus->get_dgl_spinlock(t); +//#endif +// +// raw_spin_lock_irqsave(&sem->real_lock, real_flags); +// +// lock_global_irqsave(dgl_lock, flags); +// lock_fine_irqsave(&sem->lock, flags); +// +// +//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +// fq = (sem->aff_obs) ? +// sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : +// sem->shortest_fifo_queue; +//#else +// fq = sem->shortest_fifo_queue; +//#endif +// +// if(fq->count == 0) { +// // take available resource +// replica = ikglp_get_idx(sem, fq); +// +// ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock +// +// unlock_global_irqrestore(dgl_lock, flags); +// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); +// } +// else +// { +// // we have to suspend. +// +// wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! +// +// tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked +// mb(); +// +// /* FIXME: interruptible would be nice some day */ +// set_task_state(t, TASK_UNINTERRUPTIBLE); +// +// if(fq->count < sem->max_fifo_len) { +// // enqueue on fq +// ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock +// } +// else { +// +// TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n"); +// +// // no room in fifos. Go to PQ or donors. +// +// if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { +// // enqueue on PQ +// ikglp_enqueue_on_pq(sem, &wait); +// unlock_fine_irqrestore(&sem->lock, flags); +// } +// else { +// // enqueue as donor +// ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock +// } +// } +// +// unlock_global_irqrestore(dgl_lock, flags); +// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); +// +// TS_LOCK_SUSPEND; +// +// schedule(); +// +// TS_LOCK_RESUME; +// +// fq = ikglp_get_queue(sem, t); +// BUG_ON(!fq); +// +// replica = ikglp_get_idx(sem, fq); +// } +// +// TRACE_CUR("Acquired lock %d, queue %d\n", +// l->ident, replica); +// +//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +// if(sem->aff_obs) { +// return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq); +// } +//#endif +// +// return replica; +//} + +static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem, + struct fifo_queue *fq, + ikglp_wait_state_t *donor_info) +{ + struct task_struct *t = donor_info->task; + + TRACE_CUR("Donor %s/%d being moved to fq %d\n", + t->comm, + t->pid, + ikglp_get_idx(sem, fq)); + + binheap_delete(&donor_info->node, &sem->donors); + + __ikglp_enqueue_on_fq(sem, fq, t, + &donor_info->fq_node, + NULL, // already in global_list, so pass null to prevent adding 2nd time. + &donor_info->donee_heap_node); + + // warning: + // ikglp_update_owners_prio(t, fq, sem, flags) has not been called. +} + +static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem, + struct fifo_queue *fq, + ikglp_wait_state_t *wait) +{ + struct task_struct *t = wait->task; + + TRACE_CUR("PQ request %s/%d being moved to fq %d\n", + t->comm, + t->pid, + ikglp_get_idx(sem, fq)); + + binheap_delete(&wait->pq_node.node, &sem->priority_queue); + + __ikglp_enqueue_on_fq(sem, fq, t, + &wait->fq_node, + &wait->global_heap_node, + &wait->donee_heap_node); + // warning: + // ikglp_update_owners_prio(t, fq, sem, flags) has not been called. +} + +static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal( + struct ikglp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct fifo_queue *fq = NULL; + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->nr_replicas; ++i) { + if( (sem->fifo_queues[i].count > 1) && + (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { + + TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n", + ikglp_get_idx(sem, &sem->fifo_queues[i]), + sem->fifo_queues[i].hp_waiter->comm, + sem->fifo_queues[i].hp_waiter->pid, + (fq) ? ikglp_get_idx(sem, fq) : -1, + (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX", + (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2); + + fq = &sem->fifo_queues[i]; + + WARN_ON(!(fq->hp_waiter)); + } + } + + if(fq) { + struct task_struct *max_hp = fq->hp_waiter; + ikglp_wait_state_t* ret = NULL; + + TRACE_CUR("Searching for %s/%d on fq %d\n", + max_hp->comm, + max_hp->pid, + ikglp_get_idx(sem, fq)); + + BUG_ON(!max_hp); + + list_for_each(pos, &fq->wait.task_list) { + wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list); + + queued = (struct task_struct*) wait->private; + + TRACE_CUR("fq %d entry: %s/%d\n", + ikglp_get_idx(sem, fq), + queued->comm, + queued->pid); + + /* Compare task prios, find high prio task. */ + if (queued == max_hp) { + TRACE_CUR("Found it!\n"); + ret = container_of(wait, ikglp_wait_state_t, fq_node); + } + } + + WARN_ON(!ret); + return ret; + } + + return(NULL); +} + +static void ikglp_steal_to_fq(struct ikglp_semaphore *sem, + struct fifo_queue *fq, + ikglp_wait_state_t *fq_wait) +{ + struct task_struct *t = fq_wait->task; + struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq; + + TRACE_CUR("FQ request %s/%d being moved to fq %d\n", + t->comm, + t->pid, + ikglp_get_idx(sem, fq)); + + fq_wait->donee_heap_node.fq = fq; // just to be safe + + + __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node); + --(fq_steal->count); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t); + } +#endif + + if(t == fq_steal->hp_waiter) { + fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL); + TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", + ikglp_get_idx(sem, fq_steal), + (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil", + (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1); + } + + + // Update shortest. + if(fq_steal->count < sem->shortest_fifo_queue->count) { + sem->shortest_fifo_queue = fq_steal; + } + + __ikglp_enqueue_on_fq(sem, fq, t, + &fq_wait->fq_node, + NULL, + NULL); + + // warning: We have not checked the priority inheritance of fq's owner yet. +} + + +static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem, + struct fifo_queue *fq, + ikglp_wait_state_t *old_wait) +{ + struct task_struct *t = old_wait->task; + + BUG_ON(old_wait->donee_heap_node.fq != fq); + + TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n", + ikglp_get_idx(sem, fq)); + + // need to migrate global_heap_node and donee_heap_node off of the stack + // to the nodes allocated for the owner of this fq. + + // TODO: Enhance binheap() to perform this operation in place. + + ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove + fq->global_heap_node = old_wait->global_heap_node; // copy + ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add + + binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove + fq->donee_heap_node = old_wait->donee_heap_node; // copy + + if(fq->donee_heap_node.donor_info) { + // let donor know that our location has changed + BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link + fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node; + } + INIT_BINHEAP_NODE(&fq->donee_heap_node.node); + binheap_add(&fq->donee_heap_node.node, &sem->donees, + ikglp_donee_heap_node_t, node); // re-add +} + +int ikglp_unlock(struct litmus_lock* l) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(l); + struct task_struct *t = current; + struct task_struct *donee = NULL; + struct task_struct *next = NULL; + struct task_struct *new_on_fq = NULL; + struct fifo_queue *fq_of_new_on_fq = NULL; + + ikglp_wait_state_t *other_donor_info = NULL; + struct fifo_queue *to_steal = NULL; + int need_steal_prio_reeval = 0; + struct fifo_queue *fq; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t *dgl_lock; +#endif + + unsigned long flags = 0, real_flags; + + int err = 0; + + fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner. + + if (!fq) { + err = -EINVAL; + goto out; + } + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + dgl_lock = litmus->get_dgl_spinlock(t); +#endif + raw_spin_lock_irqsave(&sem->real_lock, real_flags); + + lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper + lock_fine_irqsave(&sem->lock, flags); + + TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq)); + + + // Remove 't' from the heaps, but data in nodes will still be good. + ikglp_del_global_list(sem, t, &fq->global_heap_node); + binheap_delete(&fq->donee_heap_node.node, &sem->donees); + + fq->owner = NULL; // no longer owned!! + --(fq->count); + if(fq->count < sem->shortest_fifo_queue->count) { + sem->shortest_fifo_queue = fq; + } + --(sem->nr_in_fifos); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); + sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t); + } +#endif + + // Move the next request into the FQ and update heaps as needed. + // We defer re-evaluation of priorities to later in the function. + if(fq->donee_heap_node.donor_info) { // move my donor to FQ + ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info; + + new_on_fq = donor_info->task; + + // donor moved to FQ + donee = t; + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { + fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); + if(fq_of_new_on_fq->count == 0) { + // ignore it? +// fq_of_new_on_fq = fq; + } + } + else { + fq_of_new_on_fq = fq; + } +#else + fq_of_new_on_fq = fq; +#endif + + TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n", + new_on_fq->comm, new_on_fq->pid, + ikglp_get_idx(sem, fq_of_new_on_fq), + ikglp_get_idx(sem, fq)); + + + ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info); + } + else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ + // move other donor to FQ + // Select a donor +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + other_donor_info = (sem->aff_obs) ? + sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) : + binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); +#else + other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); +#endif + + new_on_fq = other_donor_info->task; + donee = other_donor_info->donee_info->task; + + // update the donee's heap position. + other_donor_info->donee_info->donor_info = NULL; // clear the cross-link + binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { + fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); + if(fq_of_new_on_fq->count == 0) { + // ignore it? +// fq_of_new_on_fq = fq; + } + } + else { + fq_of_new_on_fq = fq; + } +#else + fq_of_new_on_fq = fq; +#endif + + TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n", + new_on_fq->comm, new_on_fq->pid, + ikglp_get_idx(sem, fq_of_new_on_fq), + ikglp_get_idx(sem, fq)); + + ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info); + } + else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ + ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue, + ikglp_heap_node_t, node); + ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t, + pq_node); + + new_on_fq = pq_wait->task; + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { + fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); + if(fq_of_new_on_fq->count == 0) { + // ignore it? +// fq_of_new_on_fq = fq; + } + } + else { + fq_of_new_on_fq = fq; + } +#else + fq_of_new_on_fq = fq; +#endif + + TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n", + new_on_fq->comm, new_on_fq->pid, + ikglp_get_idx(sem, fq_of_new_on_fq), + ikglp_get_idx(sem, fq)); + + ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait); + } + else if(fq->count == 0) { // No PQ and this queue is empty, so steal. + ikglp_wait_state_t *fq_wait; + + TRACE_TASK(t, "Looking to steal a request for fq %d...\n", + ikglp_get_idx(sem, fq)); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + fq_wait = (sem->aff_obs) ? + sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) : + ikglp_find_hp_waiter_to_steal(sem); +#else + fq_wait = ikglp_find_hp_waiter_to_steal(sem); +#endif + + if(fq_wait) { + to_steal = fq_wait->donee_heap_node.fq; + + new_on_fq = fq_wait->task; + fq_of_new_on_fq = fq; + need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter); + + TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n", + new_on_fq->comm, new_on_fq->pid, + ikglp_get_idx(sem, to_steal), + ikglp_get_idx(sem, fq)); + + ikglp_steal_to_fq(sem, fq, fq_wait); + } + else { + TRACE_TASK(t, "Found nothing to steal for fq %d.\n", + ikglp_get_idx(sem, fq)); + } + } + else { // move no one + } + + // 't' must drop all priority and clean up data structures before hand-off. + + // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); + { + int count = 0; + while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) { + binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + ++count; + } + litmus->decrease_prio(t, NULL); + WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible. + } + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); + + + + // Now patch up other priorities. + // + // At most one of the following: + // if(donee && donee != t), decrease prio, propagate to owner, or onward + // if(to_steal), update owner's prio (hp_waiter has already been set) + // + + BUG_ON((other_donor_info != NULL) && (to_steal != NULL)); + + if(other_donor_info) { + struct fifo_queue *other_fq = other_donor_info->donee_info->fq; + + BUG_ON(!donee); + BUG_ON(donee == t); + + TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n", + other_donor_info->task->comm, other_donor_info->task->pid, + donee->comm, donee->pid); + + // need to terminate donation relation. + if(donee == other_fq->owner) { + TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n", + donee->comm, donee->pid, + ikglp_get_idx(sem, other_fq)); + + ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags); + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! + } + else { + TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n", + donee->comm, donee->pid, + ikglp_get_idx(sem, other_fq)); + + ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node); + if(donee == other_fq->hp_waiter) { + TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n", + donee->comm, donee->pid, + ikglp_get_idx(sem, other_fq)); + + other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL); + TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", + ikglp_get_idx(sem, other_fq), + (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil", + (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1); + + ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it. + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! + } + } + } + else if(to_steal) { + TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n", + ikglp_get_idx(sem, to_steal)); + + if(need_steal_prio_reeval) { + ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it. + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! + } + } + + // check for new HP waiter. + if(new_on_fq) { + if(fq == fq_of_new_on_fq) { + // fq->owner is null, so just update the hp_waiter without locking. + if(new_on_fq == fq->hp_waiter) { + TRACE_TASK(t, "new_on_fq is already hp_waiter.\n", + fq->hp_waiter->comm, fq->hp_waiter->pid); + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure... + } + else if(litmus->compare(new_on_fq, fq->hp_waiter)) { + if(fq->hp_waiter) + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", + fq->hp_waiter->comm, fq->hp_waiter->pid); + else + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); + + fq->hp_waiter = new_on_fq; + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); + + TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", + ikglp_get_idx(sem, fq), + (fq->hp_waiter) ? fq->hp_waiter->comm : "nil", + (fq->hp_waiter) ? fq->hp_waiter->pid : -1); + } + } + else { + ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it. + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! + } + } + +wake_kludge: + if(waitqueue_active(&fq->wait)) + { + wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); + ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node); + next = (struct task_struct*) wait->private; + + __remove_wait_queue(&fq->wait, wait); + + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + ikglp_get_idx(sem, fq), + next->comm, next->pid); + + // migrate wait-state to fifo-memory. + ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait); + + /* next becomes the resouce holder */ + fq->owner = next; + tsk_rt(next)->blocked_lock = NULL; + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next); + } +#endif + + /* determine new hp_waiter if necessary */ + if (next == fq->hp_waiter) { + + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL); + TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n", + ikglp_get_idx(sem, fq), + (fq->hp_waiter) ? fq->hp_waiter->comm : "nil", + (fq->hp_waiter) ? fq->hp_waiter->pid : -1); + + fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ? + effective_priority(fq->hp_waiter) : NULL; + + if (fq->hp_waiter) + TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n"); + else + TRACE("no further waiters\n"); + + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); + +// TRACE_TASK(next, "Heap Before:\n"); +// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0); + + binheap_add(&fq->nest.hp_binheap_node, + &tsk_rt(next)->hp_blocked_tasks, + struct nested_info, + hp_binheap_node); + +// TRACE_TASK(next, "Heap After:\n"); +// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0); + + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); + } + else { + /* Well, if 'next' is not the highest-priority waiter, + * then it (probably) ought to inherit the highest-priority + * waiter's priority. */ + TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n", + ikglp_get_idx(sem, fq), + (fq->hp_waiter) ? fq->hp_waiter->comm : "nil", + (fq->hp_waiter) ? fq->hp_waiter->pid : -1); + + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); + + binheap_add(&fq->nest.hp_binheap_node, + &tsk_rt(next)->hp_blocked_tasks, + struct nested_info, + hp_binheap_node); + + /* It is possible that 'next' *should* be the hp_waiter, but isn't + * because that update hasn't yet executed (update operation is + * probably blocked on mutex->lock). So only inherit if the top of + * 'next's top heap node is indeed the effective prio. of hp_waiter. + * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter) + * since the effective priority of hp_waiter can change (and the + * update has not made it to this lock).) + */ + if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) == + fq->nest.hp_waiter_eff_prio)) + { + if(fq->nest.hp_waiter_eff_prio) + litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio); + else + WARN_ON(1); + } + + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); + } + + + // wake up the new resource holder! + wake_up_process(next); + } + if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) { + // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?) + // Wake up the new guy too. + + BUG_ON(fq_of_new_on_fq->owner != NULL); + + fq = fq_of_new_on_fq; + fq_of_new_on_fq = NULL; + goto wake_kludge; + } + + unlock_fine_irqrestore(&sem->lock, flags); + unlock_global_irqrestore(dgl_lock, flags); + + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); + +out: + return err; +} + + + +int ikglp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct ikglp_semaphore *sem = ikglp_from_lock(l); + unsigned long flags; + + int owner = 0; + int i; + + raw_spin_lock_irqsave(&sem->real_lock, flags); + + for(i = 0; i < sem->nr_replicas; ++i) { + if(sem->fifo_queues[i].owner == t) { + owner = 1; + break; + } + } + + raw_spin_unlock_irqrestore(&sem->real_lock, flags); + + if (owner) + ikglp_unlock(l); + + return 0; +} + +void ikglp_free(struct litmus_lock* l) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(l); + + kfree(sem->fifo_queues); + kfree(sem); +} + + + +struct litmus_lock* ikglp_new(int m, + struct litmus_lock_ops* ops, + void* __user arg) +{ + struct ikglp_semaphore* sem; + int nr_replicas = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas))) + { + return(NULL); + } + if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas))) + { + return(NULL); + } + if(nr_replicas < 1) + { + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + return NULL; + } + + sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL); + if(!sem->fifo_queues) + { + kfree(sem); + return NULL; + } + + sem->litmus_lock.ops = ops; + +#ifdef CONFIG_DEBUG_SPINLOCK + { + __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key); + } +#else + raw_spin_lock_init(&sem->lock); +#endif + + raw_spin_lock_init(&sem->real_lock); + + sem->nr_replicas = nr_replicas; + sem->m = m; + sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0); + sem->nr_in_fifos = 0; + + TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n", + sem->m, + sem->nr_replicas, + sem->max_fifo_len); + + for(i = 0; i < nr_replicas; ++i) + { + struct fifo_queue* q = &(sem->fifo_queues[i]); + + q->owner = NULL; + q->hp_waiter = NULL; + init_waitqueue_head(&q->wait); + q->count = 0; + + q->global_heap_node.task = NULL; + INIT_BINHEAP_NODE(&q->global_heap_node.node); + + q->donee_heap_node.task = NULL; + q->donee_heap_node.donor_info = NULL; + q->donee_heap_node.fq = NULL; + INIT_BINHEAP_NODE(&q->donee_heap_node.node); + + q->nest.lock = (struct litmus_lock*)sem; + q->nest.hp_waiter_eff_prio = NULL; + q->nest.hp_waiter_ptr = &q->hp_waiter; + INIT_BINHEAP_NODE(&q->nest.hp_binheap_node); + } + + sem->shortest_fifo_queue = &sem->fifo_queues[0]; + + sem->top_m_size = 0; + + // init heaps + INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order); + INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order); + INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order); + INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order); + INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + sem->aff_obs = NULL; +#endif + + return &sem->litmus_lock; +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + +static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) +{ + int gpu = replica % aff->nr_rsrc; + return gpu; +} + +static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica) +{ + int gpu = __replica_to_gpu(aff, replica) + aff->offset; + return gpu; +} + +static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu) +{ + int replica = gpu - aff->offset; + return replica; +} + + +int ikglp_aff_obs_close(struct affinity_observer* obs) +{ + return 0; +} + +void ikglp_aff_obs_free(struct affinity_observer* obs) +{ + struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); + kfree(ikglp_aff->nr_cur_users_on_rsrc); + kfree(ikglp_aff->q_info); + kfree(ikglp_aff); +} + +static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops, + struct ikglp_affinity_ops* ikglp_ops, + void* __user args) +{ + struct ikglp_affinity* ikglp_aff; + struct gpu_affinity_observer_args aff_args; + struct ikglp_semaphore* sem; + int i; + unsigned long flags; + + if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { + return(NULL); + } + if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { + return(NULL); + } + + sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); + + if(sem->litmus_lock.type != IKGLP_SEM) { + TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); + return(NULL); + } + + if((aff_args.nr_simult_users <= 0) || + (sem->nr_replicas%aff_args.nr_simult_users != 0)) { + TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " + "(%d) per replica. #replicas should be evenly divisible " + "by #simult_users.\n", + sem->litmus_lock.ident, + sem->nr_replicas, + aff_args.nr_simult_users); + return(NULL); + } + + if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { + TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", + NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); +// return(NULL); + } + + ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); + if(!ikglp_aff) { + return(NULL); + } + + ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL); + if(!ikglp_aff->q_info) { + kfree(ikglp_aff); + return(NULL); + } + + ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); + if(!ikglp_aff->nr_cur_users_on_rsrc) { + kfree(ikglp_aff->q_info); + kfree(ikglp_aff); + return(NULL); + } + + affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); + + ikglp_aff->ops = ikglp_ops; + ikglp_aff->offset = aff_args.replica_to_gpu_offset; + ikglp_aff->nr_simult = aff_args.nr_simult_users; + ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; + ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; + + TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, " + "nr_rsrc = %d, relaxed_fifo_len = %d\n", + ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc, + ikglp_aff->relax_max_fifo_len); + + memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); + + for(i = 0; i < sem->nr_replicas; ++i) { + ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; + ikglp_aff->q_info[i].estimated_len = 0; + + // multiple q_info's will point to the same resource (aka GPU) if + // aff_args.nr_simult_users > 1 + ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; + } + + // attach observer to the lock + raw_spin_lock_irqsave(&sem->real_lock, flags); + sem->aff_obs = ikglp_aff; + raw_spin_unlock_irqrestore(&sem->real_lock, flags); + + return &ikglp_aff->obs; +} + + + + +static int gpu_replica_to_resource(struct ikglp_affinity* aff, + struct fifo_queue* fq) { + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + return(replica_to_gpu(aff, ikglp_get_idx(sem, fq))); +} + + +// Smart IKGLP Affinity + +//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff) +//{ +// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); +// struct ikglp_queue_info *shortest = &aff->q_info[0]; +// int i; +// +// for(i = 1; i < sem->nr_replicas; ++i) { +// if(aff->q_info[i].estimated_len < shortest->estimated_len) { +// shortest = &aff->q_info[i]; +// } +// } +// +// return(shortest); +//} + +struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) +{ + // advise_enqueue must be smart as not not break IKGLP rules: + // * No queue can be greater than ceil(m/k) in length. We may return + // such a queue, but IKGLP will be smart enough as to send requests + // to donors or PQ. + // * Cannot let a queue idle if there exist waiting PQ/donors + // -- needed to guarantee parallel progress of waiters. + // + // We may be able to relax some of these constraints, but this will have to + // be carefully evaluated. + // + // Huristic strategy: Find the shortest queue that is not full. + + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + lt_t min_len; + int min_nr_users; + struct ikglp_queue_info *shortest; + struct fifo_queue *to_enqueue; + int i; + int affinity_gpu; + + int max_fifo_len = (aff->relax_max_fifo_len) ? + sem->m : sem->max_fifo_len; + + // simply pick the shortest queue if, we have no affinity, or we have + // affinity with the shortest + if(unlikely(tsk_rt(t)->last_gpu < 0)) { + affinity_gpu = aff->offset; // first gpu + TRACE_CUR("no affinity\n"); + } + else { + affinity_gpu = tsk_rt(t)->last_gpu; + } + + // all things being equal, let's start with the queue with which we have + // affinity. this helps us maintain affinity even when we don't have + // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) + shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; + + // if(shortest == aff->shortest_queue) { + // TRACE_CUR("special case: have affinity with shortest queue\n"); + // goto out; + // } + + min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); + min_nr_users = *(shortest->nr_cur_users); + + TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", + get_gpu_estimate(t, MIG_LOCAL), + ikglp_get_idx(sem, shortest->q), + shortest->q->count, + min_len); + + for(i = 0; i < sem->nr_replicas; ++i) { + if(&aff->q_info[i] != shortest) { + if(aff->q_info[i].q->count < max_fifo_len) { + + lt_t est_len = + aff->q_info[i].estimated_len + + get_gpu_estimate(t, + gpu_migration_distance(tsk_rt(t)->last_gpu, + replica_to_gpu(aff, i))); + + // queue is smaller, or they're equal and the other has a smaller number + // of total users. + // + // tie-break on the shortest number of simult users. this only kicks in + // when there are more than 1 empty queues. + if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ + (est_len < min_len) || /* i-th queue has shortest length */ + ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ + (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { + + shortest = &aff->q_info[i]; + min_len = est_len; + min_nr_users = *(aff->q_info[i].nr_cur_users); + } + + TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", + get_gpu_estimate(t, + gpu_migration_distance(tsk_rt(t)->last_gpu, + replica_to_gpu(aff, i))), + ikglp_get_idx(sem, aff->q_info[i].q), + aff->q_info[i].q->count, + est_len); + } + else { + TRACE_CUR("queue %d is too long. ineligible for enqueue.\n", + ikglp_get_idx(sem, aff->q_info[i].q)); + } + } + } + + if(shortest->q->count >= max_fifo_len) { + TRACE_CUR("selected fq %d is too long, but returning it anyway.\n", + ikglp_get_idx(sem, shortest->q)); + } + + to_enqueue = shortest->q; + TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n", + ikglp_get_idx(sem, to_enqueue), + to_enqueue->count, + ikglp_get_idx(sem, sem->shortest_fifo_queue)); + + return to_enqueue; + + //return(sem->shortest_fifo_queue); +} + + + + +static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff, + int dest_gpu, + struct fifo_queue* fq) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + ikglp_wait_state_t *wait = NULL; + int max_improvement = -(MIG_NONE+1); + int replica = ikglp_get_idx(sem, fq); + + if(waitqueue_active(&fq->wait)) { + int this_gpu = replica_to_gpu(aff, replica); + struct list_head *pos; + + list_for_each(pos, &fq->wait.task_list) { + wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); + ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); + + int tmp_improvement = + gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) - + gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu); + + if(tmp_improvement > max_improvement) { + wait = tmp_wait; + max_improvement = tmp_improvement; + + if(max_improvement >= (MIG_NONE-1)) { + goto out; + } + } + } + + BUG_ON(!wait); + } + else { + TRACE_CUR("fq %d is empty!\n", replica); + } + +out: + + TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n", + replica, + (wait) ? wait->task->comm : "nil", + (wait) ? wait->task->pid : -1, + max_improvement); + + return wait; +} + + +ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff, + struct fifo_queue* dst) +{ + // Huristic strategy: Find task with greatest improvement in affinity. + // + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + ikglp_wait_state_t *to_steal_state = NULL; +// ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem); + int max_improvement = -(MIG_NONE+1); + int replica, i; + int dest_gpu; + + replica = ikglp_get_idx(sem, dst); + dest_gpu = replica_to_gpu(aff, replica); + + for(i = 0; i < sem->nr_replicas; ++i) { + ikglp_wait_state_t *tmp_to_steal_state = + pick_steal(aff, dest_gpu, &sem->fifo_queues[i]); + + if(tmp_to_steal_state) { + int tmp_improvement = + gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) - + gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu); + + if(tmp_improvement > max_improvement) { + to_steal_state = tmp_to_steal_state; + max_improvement = tmp_improvement; + + if(max_improvement >= (MIG_NONE-1)) { + goto out; + } + } + } + } + +out: + if(!to_steal_state) { + TRACE_CUR("Could not find anyone to steal.\n"); + } + else { + TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n", + to_steal_state->task->comm, to_steal_state->task->pid, + ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq), + replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)), + ikglp_get_idx(sem, dst), + dest_gpu, + max_improvement); + +// TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n", +// default_to_steal_state->task->comm, default_to_steal_state->task->pid, +// ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq), +// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)), +// ikglp_get_idx(sem, dst), +// replica_to_gpu(aff, ikglp_get_idx(sem, dst)), +// +// gpu_migration_distance( +// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)), +// tsk_rt(default_to_steal_state->task)->last_gpu) - +// gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu)); + } + + return(to_steal_state); +} + + +static inline int has_donor(wait_queue_t* fq_wait) +{ + ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); + return(wait->donee_heap_node.donor_info != NULL); +} + +static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff, + struct fifo_queue* fq, + int* dist_from_head) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + struct task_struct *donee; + ikglp_donee_heap_node_t *donee_node; + struct task_struct *mth_highest = ikglp_mth_highest(sem); + +// lt_t now = litmus_clock(); +// +// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ", +// ikglp_get_idx(sem, fq), +// mth_highest->comm, mth_highest->pid, +// (int)get_deadline(mth_highest) - now); + + if(fq->owner && + fq->donee_heap_node.donor_info == NULL && + mth_highest != fq->owner && + litmus->__compare(mth_highest, BASE, fq->owner, BASE)) { + donee = fq->owner; + donee_node = &(fq->donee_heap_node); + *dist_from_head = 0; + + BUG_ON(donee != donee_node->task); + + TRACE_CUR("picked owner of fq %d as donee\n", + ikglp_get_idx(sem, fq)); + + goto out; + } + else if(waitqueue_active(&fq->wait)) { + struct list_head *pos; + + +// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d " +// "(mth_highest != fq->owner) = %d " +// "(mth_highest > fq->owner) = %d\n", +// ikglp_get_idx(sem, fq), +// (fq->owner) ? fq->owner->comm : "nil", +// (fq->owner) ? fq->owner->pid : -1, +// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999, +// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil", +// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1, +// (mth_highest != fq->owner), +// (litmus->__compare(mth_highest, BASE, fq->owner, BASE))); + + + *dist_from_head = 1; + + // iterating from the start of the queue is nice since this means + // the donee will be closer to obtaining a resource. + list_for_each(pos, &fq->wait.task_list) { + wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); + ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); + +// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d " +// "(mth_highest != wait->task) = %d " +// "(mth_highest > wait->task) = %d\n", +// ikglp_get_idx(sem, fq), +// dist_from_head, +// wait->task->comm, wait->task->pid, +// (int)get_deadline(wait->task) - now, +// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil", +// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1, +// (mth_highest != wait->task), +// (litmus->__compare(mth_highest, BASE, wait->task, BASE))); + + + if(!has_donor(fq_wait) && + mth_highest != wait->task && + litmus->__compare(mth_highest, BASE, wait->task, BASE)) { + donee = (struct task_struct*) fq_wait->private; + donee_node = &wait->donee_heap_node; + + BUG_ON(donee != donee_node->task); + + TRACE_CUR("picked waiter in fq %d as donee\n", + ikglp_get_idx(sem, fq)); + + goto out; + } + ++(*dist_from_head); + } + } + + donee = NULL; + donee_node = NULL; + //*dist_from_head = sem->max_fifo_len + 1; + *dist_from_head = IKGLP_INVAL_DISTANCE; + + TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); + +out: + + TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n", + ikglp_get_idx(sem, fq), + (donee) ? (donee)->comm : "nil", + (donee) ? (donee)->pid : -1, + *dist_from_head); + + return donee_node; +} + +ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection( + struct ikglp_affinity* aff, + struct task_struct* donor) +{ + // Huristic strategy: Find the highest-priority donee that is waiting on + // a queue closest to our affinity. (1) The donee CANNOT already have a + // donor (exception: donee is the lowest-prio task in the donee heap). + // (2) Requests in 'top_m' heap are ineligible. + // + // Further strategy: amongst elible donees waiting for the same GPU, pick + // the one closest to the head of the FIFO queue (including owners). + // + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + ikglp_donee_heap_node_t *donee_node; + gpu_migration_dist_t distance; + int start, i, j; + + ikglp_donee_heap_node_t *default_donee; + ikglp_wait_state_t *default_donee_donor_info; + + if(tsk_rt(donor)->last_gpu < 0) { + // no affinity. just return the min prio, like standard IKGLP + // TODO: Find something closer to the head of the queue?? + donee_node = binheap_top_entry(&sem->donees, + ikglp_donee_heap_node_t, + node); + goto out; + } + + + // Temporarily break any donation relation the default donee (the lowest + // prio task in the FIFO queues) to make it eligible for selection below. + // + // NOTE: The original donor relation *must* be restored, even if we select + // the default donee throug affinity-aware selection, before returning + // from this function so we don't screw up our heap ordering. + // The standard IKGLP algorithm will steal the donor relationship if needed. + default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); + default_donee_donor_info = default_donee->donor_info; // back-up donor relation + default_donee->donor_info = NULL; // temporarily break any donor relation. + + // initialize our search + donee_node = NULL; + distance = MIG_NONE; + + // TODO: The below search logic may work well for locating nodes to steal + // when an FQ goes idle. Validate this code and apply it to stealing. + + // begin search with affinity GPU. + start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu); + i = start; + do { // "for each gpu" / "for each aff->nr_rsrc" + gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i); + + // only interested in queues that will improve our distance + if(temp_distance < distance || donee_node == NULL) { + int dist_from_head = IKGLP_INVAL_DISTANCE; + + TRACE_CUR("searching for donor on GPU %d", i); + + // visit each queue and pick a donee. bail as soon as we find + // one for this class. + + for(j = 0; j < aff->nr_simult; ++j) { + int temp_dist_from_head; + ikglp_donee_heap_node_t *temp_donee_node; + struct fifo_queue *fq; + + fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]); + temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head); + + if(temp_dist_from_head < dist_from_head) + { + // we check all the FQs for this GPU to spread priorities + // out across the queues. does this decrease jitter? + donee_node = temp_donee_node; + dist_from_head = temp_dist_from_head; + } + } + + if(dist_from_head != IKGLP_INVAL_DISTANCE) { + TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n", + donee_node->task->comm, donee_node->task->pid, + dist_from_head); + } + else { + TRACE_CUR("found no eligible donors from GPU %d\n", i); + } + } + else { + TRACE_CUR("skipping GPU %d (distance = %d, best donor " + "distance = %d)\n", i, temp_distance, distance); + } + + i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around + } while (i != start); + + + // restore old donor info state. + default_donee->donor_info = default_donee_donor_info; + + if(!donee_node) { + donee_node = default_donee; + + TRACE_CUR("Could not find a donee. We have to steal one.\n"); + WARN_ON(default_donee->donor_info == NULL); + } + +out: + + TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n", + donee_node->task->comm, donee_node->task->pid, + ikglp_get_idx(sem, donee_node->fq), + replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)), + donor->comm, donor->pid, tsk_rt(donor)->last_gpu); + + return(donee_node); +} + + + +static void __find_closest_donor(int target_gpu, + struct binheap_node* donor_node, + ikglp_wait_state_t** cur_closest, + int* cur_dist) +{ + ikglp_wait_state_t *this_donor = + binheap_entry(donor_node, ikglp_wait_state_t, node); + + int this_dist = + gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu); + +// TRACE_CUR("%s/%d: dist from target = %d\n", +// this_donor->task->comm, +// this_donor->task->pid, +// this_dist); + + if(this_dist < *cur_dist) { + // take this donor + *cur_dist = this_dist; + *cur_closest = this_donor; + } + else if(this_dist == *cur_dist) { + // priority tie-break. Even though this is a pre-order traversal, + // this is a heap, not a binary tree, so we still need to do a priority + // comparision. + if(!(*cur_closest) || + litmus->compare(this_donor->task, (*cur_closest)->task)) { + *cur_dist = this_dist; + *cur_closest = this_donor; + } + } + + if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist); + if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist); +} + +ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) +{ + // Huristic strategy: Find donor with the closest affinity to fq. + // Tie-break on priority. + + // We need to iterate over all the donors to do this. Unfortunatly, + // our donors are organized in a heap. We'll visit each node with a + // recurisve call. This is realitively safe since there are only sem->m + // donors, at most. We won't recurse too deeply to have to worry about + // our stack. (even with 128 CPUs, our nest depth is at most 7 deep). + + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + ikglp_wait_state_t *donor = NULL; + int distance = MIG_NONE; + int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq)); + ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); + + __find_closest_donor(gpu, sem->donors.root, &donor, &distance); + + TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d " + "(non-aff wanted %s/%d). differs = %d\n", + donor->task->comm, donor->task->pid, + distance, + ikglp_get_idx(sem, fq), + default_donor->task->comm, default_donor->task->pid, + (donor->task != default_donor->task) + ); + + return(donor); +} + + + +void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + int replica = ikglp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + struct ikglp_queue_info *info = &aff->q_info[replica]; + lt_t est_time; + lt_t est_len_before; + + if(current == t) { + tsk_rt(t)->suspend_gpu_tracker_on_block = 1; + } + + est_len_before = info->estimated_len; + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + info->estimated_len += est_time; + + TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", + ikglp_get_idx(sem, info->q), + est_len_before, est_time, + info->estimated_len); + + // if(aff->shortest_queue == info) { + // // we may no longer be the shortest + // aff->shortest_queue = ikglp_aff_find_shortest(aff); + // + // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", + // ikglp_get_idx(sem, aff->shortest_queue->q), + // aff->shortest_queue->q->count, + // aff->shortest_queue->estimated_len); + // } +} + +void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + int replica = ikglp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + struct ikglp_queue_info *info = &aff->q_info[replica]; + lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + + if(est_time > info->estimated_len) { + WARN_ON(1); + info->estimated_len = 0; + } + else { + info->estimated_len -= est_time; + } + + TRACE_CUR("fq %d est len is now %llu\n", + ikglp_get_idx(sem, info->q), + info->estimated_len); + + // check to see if we're the shortest queue now. + // if((aff->shortest_queue != info) && + // (aff->shortest_queue->estimated_len > info->estimated_len)) { + // + // aff->shortest_queue = info; + // + // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", + // ikglp_get_idx(sem, info->q), + // info->q->count, + // info->estimated_len); + // } +} + +void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, + struct fifo_queue* fq, + struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + int replica = ikglp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + + tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration + + TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", + t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration); + + // count the number or resource holders + ++(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 1, t); // register + + tsk_rt(t)->suspend_gpu_tracker_on_block = 0; + reset_gpu_tracker(t); + start_gpu_tracker(t); +} + +void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, + struct fifo_queue* fq, + struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + int replica = ikglp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + lt_t est_time; + + stop_gpu_tracker(t); // stop the tracker before we do anything else. + + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + + // count the number or resource holders + --(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 0, t); // unregister + + // update estimates + update_gpu_estimate(t, get_gpu_time(t)); + + TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. " + "estimated was %llu. diff is %d\n", + t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, + tsk_rt(t)->gpu_migration, + get_gpu_time(t), + est_time, + (long long)get_gpu_time(t) - (long long)est_time); + + tsk_rt(t)->last_gpu = gpu; +} + +struct ikglp_affinity_ops gpu_ikglp_affinity = +{ + .advise_enqueue = gpu_ikglp_advise_enqueue, + .advise_steal = gpu_ikglp_advise_steal, + .advise_donee_selection = gpu_ikglp_advise_donee_selection, + .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq, + + .notify_enqueue = gpu_ikglp_notify_enqueue, + .notify_dequeue = gpu_ikglp_notify_dequeue, + .notify_acquired = gpu_ikglp_notify_acquired, + .notify_freed = gpu_ikglp_notify_freed, + + .replica_to_resource = gpu_replica_to_resource, +}; + +struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops, + void* __user args) +{ + return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args); +} + + + + + + + + +// Simple ikglp Affinity (standard ikglp with auto-gpu registration) + +struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + int min_count; + int min_nr_users; + struct ikglp_queue_info *shortest; + struct fifo_queue *to_enqueue; + int i; + + // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); + + shortest = &aff->q_info[0]; + min_count = shortest->q->count; + min_nr_users = *(shortest->nr_cur_users); + + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", + ikglp_get_idx(sem, shortest->q), + shortest->q->count, + min_nr_users); + + for(i = 1; i < sem->nr_replicas; ++i) { + int len = aff->q_info[i].q->count; + + // queue is smaller, or they're equal and the other has a smaller number + // of total users. + // + // tie-break on the shortest number of simult users. this only kicks in + // when there are more than 1 empty queues. + if((len < min_count) || + ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { + shortest = &aff->q_info[i]; + min_count = shortest->q->count; + min_nr_users = *(aff->q_info[i].nr_cur_users); + } + + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", + ikglp_get_idx(sem, aff->q_info[i].q), + aff->q_info[i].q->count, + *(aff->q_info[i].nr_cur_users)); + } + + to_enqueue = shortest->q; + TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", + ikglp_get_idx(sem, to_enqueue), + ikglp_get_idx(sem, sem->shortest_fifo_queue)); + + return to_enqueue; +} + +ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff, + struct fifo_queue* dst) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n"); + return ikglp_find_hp_waiter_to_steal(sem); +} + +ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); + return(donee); +} + +ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); + return(donor); +} + +void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) +{ + // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n"); +} + +void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) +{ + // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n"); +} + +void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + int replica = ikglp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + + // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n"); + + // count the number or resource holders + ++(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 1, t); // register +} + +void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + int replica = ikglp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + + // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n"); + // count the number or resource holders + --(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 0, t); // unregister +} + +struct ikglp_affinity_ops simple_gpu_ikglp_affinity = +{ + .advise_enqueue = simple_gpu_ikglp_advise_enqueue, + .advise_steal = simple_gpu_ikglp_advise_steal, + .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection, + .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq, + + .notify_enqueue = simple_gpu_ikglp_notify_enqueue, + .notify_dequeue = simple_gpu_ikglp_notify_dequeue, + .notify_acquired = simple_gpu_ikglp_notify_acquired, + .notify_freed = simple_gpu_ikglp_notify_freed, + + .replica_to_resource = gpu_replica_to_resource, +}; + +struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, + void* __user args) +{ + return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args); +} + +#endif + + + + + + + + + diff --git a/litmus/jobs.c b/litmus/jobs.c index 36e314625d86..1d97462cc128 100644 --- a/litmus/jobs.c +++ b/litmus/jobs.c @@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t) { BUG_ON(!t); /* prepare next release */ - t->rt_param.job_params.release = t->rt_param.job_params.deadline; - t->rt_param.job_params.deadline += get_rt_period(t); + + if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) { + /* allow the release point to slip if we've passed our deadline. */ + lt_t now = litmus_clock(); + t->rt_param.job_params.release = + (t->rt_param.job_params.deadline < now) ? + now : t->rt_param.job_params.deadline; + t->rt_param.job_params.deadline = + t->rt_param.job_params.release + get_rt_period(t); + } + else { + t->rt_param.job_params.release = t->rt_param.job_params.deadline; + t->rt_param.job_params.deadline += get_rt_period(t); + } + t->rt_param.job_params.exec_time = 0; /* update job sequence number */ t->rt_param.job_params.job_no++; diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c new file mode 100644 index 000000000000..5ef5e54d600d --- /dev/null +++ b/litmus/kexclu_affinity.c @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include + +#include + +static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg); +static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg); +static int close_generic_aff_obs(struct od_table_entry* entry); +static void destroy_generic_aff_obs(obj_type_t type, void* sem); + +struct fdso_ops generic_affinity_ops = { + .create = create_generic_aff_obs, + .open = open_generic_aff_obs, + .close = close_generic_aff_obs, + .destroy = destroy_generic_aff_obs +}; + +static atomic_t aff_obs_id_gen = ATOMIC_INIT(0); + +static inline bool is_affinity_observer(struct od_table_entry *entry) +{ + return (entry->class == &generic_affinity_ops); +} + +static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry) +{ + BUG_ON(!is_affinity_observer(entry)); + return (struct affinity_observer*) entry->obj->obj; +} + +static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg) +{ + struct affinity_observer* aff_obs; + int err; + + err = litmus->allocate_aff_obs(&aff_obs, type, arg); + if (err == 0) { + BUG_ON(!aff_obs->lock); + aff_obs->type = type; + *obj_ref = aff_obs; + } + return err; +} + +static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg) +{ + struct affinity_observer* aff_obs = get_affinity_observer(entry); + if (aff_obs->ops->open) + return aff_obs->ops->open(aff_obs, arg); + else + return 0; /* default: any task can open it */ +} + +static int close_generic_aff_obs(struct od_table_entry* entry) +{ + struct affinity_observer* aff_obs = get_affinity_observer(entry); + if (aff_obs->ops->close) + return aff_obs->ops->close(aff_obs); + else + return 0; /* default: closing succeeds */ +} + +static void destroy_generic_aff_obs(obj_type_t type, void* obj) +{ + struct affinity_observer* aff_obs = (struct affinity_observer*) obj; + aff_obs->ops->deallocate(aff_obs); +} + + +struct litmus_lock* get_lock_from_od(int od) +{ + extern struct fdso_ops generic_lock_ops; + + struct od_table_entry *entry = get_entry_for_od(od); + + if(entry && entry->class == &generic_lock_ops) { + return (struct litmus_lock*) entry->obj->obj; + } + return NULL; +} + +void affinity_observer_new(struct affinity_observer* aff, + struct affinity_observer_ops* ops, + struct affinity_observer_args* args) +{ + aff->ops = ops; + aff->lock = get_lock_from_od(args->lock_od); + aff->ident = atomic_inc_return(&aff_obs_id_gen); +} \ No newline at end of file diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c new file mode 100644 index 000000000000..bff857ed8d4e --- /dev/null +++ b/litmus/kfmlp_lock.c @@ -0,0 +1,1002 @@ +#include +#include + +#include +#include +#include + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#include +#endif + +#include + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + //if (queued != skip && edf_higher_prio(queued, found)) + if (queued != skip && litmus->compare(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + + }while(step != search_start); + + return(shortest); +} + + +static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, + wait_queue_t** to_steal, + struct kfmlp_queue** to_steal_from) +{ + /* must hold sem->lock */ + + int i; + + *to_steal = NULL; + *to_steal_from = NULL; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((*to_steal_from == NULL) || + //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) ) + { + *to_steal_from = &sem->queues[i]; + } + } + + if(*to_steal_from) + { + struct list_head *pos; + struct task_struct *target = (*to_steal_from)->hp_waiter; + + TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n", + target->comm, + target->pid, + kfmlp_get_idx(sem, *to_steal_from)); + + list_for_each(pos, &(*to_steal_from)->wait.task_list) + { + wait_queue_t *node = list_entry(pos, wait_queue_t, task_list); + struct task_struct *queued = (struct task_struct*) node->private; + /* Compare task prios, find high prio task. */ + if (queued == target) + { + *to_steal = node; + + TRACE_CUR("steal: selected %s/%d from queue %d\n", + queued->comm, queued->pid, + kfmlp_get_idx(sem, *to_steal_from)); + + return queued; + } + } + + TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n", + target->comm, + target->pid, + kfmlp_get_idx(sem, *to_steal_from)); + } + + return NULL; +} + +static void kfmlp_steal_node(struct kfmlp_semaphore *sem, + struct kfmlp_queue *dst, + wait_queue_t *wait, + struct kfmlp_queue *src) +{ + struct task_struct* t = (struct task_struct*) wait->private; + + __remove_wait_queue(&src->wait, wait); + --(src->count); + + if(t == src->hp_waiter) { + src->hp_waiter = kfmlp_find_hp_waiter(src, NULL); + + TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", + kfmlp_get_idx(sem, src), + (src->hp_waiter) ? src->hp_waiter->comm : "nil", + (src->hp_waiter) ? src->hp_waiter->pid : -1); + + if(src->owner && tsk_rt(src->owner)->inh_task == t) { + litmus->decrease_prio(src->owner, src->hp_waiter); + } + } + + if(sem->shortest_queue->count > src->count) { + sem->shortest_queue = src; + TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue)); + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t); + } +#endif + + init_waitqueue_entry(wait, t); + __add_wait_queue_tail_exclusive(&dst->wait, wait); + ++(dst->count); + + if(litmus->compare(t, dst->hp_waiter)) { + dst->hp_waiter = t; + + TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", + kfmlp_get_idx(sem, dst), + t->comm, t->pid); + + if(dst->owner && litmus->compare(t, dst->owner)) + { + litmus->increase_prio(dst->owner, t); + } + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t); + } +#endif +} + + +int kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue = NULL; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t); + } + if(!my_queue) { + my_queue = sem->shortest_queue; + } +#else + my_queue = sem->shortest_queue; +#endif + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n", + kfmlp_get_idx(sem, my_queue), + my_queue->count); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n", + kfmlp_get_idx(sem, my_queue), + (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil", + (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1); + + /* check if we need to activate priority inheritance */ + //if (edf_higher_prio(t, my_queue->hp_waiter)) + if (litmus->compare(t, my_queue->hp_waiter)) { + my_queue->hp_waiter = t; + TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", + kfmlp_get_idx(sem, my_queue), + t->comm, t->pid); + + //if (edf_higher_prio(t, my_queue->owner)) + if (litmus->compare(t, my_queue->owner)) { + litmus->increase_prio(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + + if(my_queue == sem->shortest_queue) { + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + TRACE_CUR("queue %d is the shortest\n", + kfmlp_get_idx(sem, sem->shortest_queue)); + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); + } +#endif + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else { + /* this case may happen if our wait entry was stolen + between queues. record where we went. */ + my_queue = kfmlp_get_queue(sem, t); + + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + + if(my_queue == sem->shortest_queue) { + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + TRACE_CUR("queue %d is the shortest\n", + kfmlp_get_idx(sem, sem->shortest_queue)); + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); + sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t); + } +#endif + + spin_unlock_irqrestore(&sem->lock, flags); + } + + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue); + } +#endif + return kfmlp_get_idx(sem, my_queue); +} + + +int kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue, *to_steal_from; + unsigned long flags; + int err = 0; + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + spin_lock_irqsave(&sem->lock, flags); + + TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = NULL; // clear ownership + --(my_queue->count); + + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + TRACE_CUR("queue %d is the shortest\n", + kfmlp_get_idx(sem, sem->shortest_queue)); + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t); + sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t); + } +#endif + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + litmus->decrease_prio(t, NULL); + + + /* check if there are jobs waiting for this resource */ +RETRY: + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* next becomes the resouce holder */ + my_queue->owner = next; + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next); + } +#endif + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + litmus->increase_prio(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else { + // TODO: put this stealing logic before we attempt to release + // our resource. (simplifies code and gets rid of ugly goto RETRY. + wait_queue_t *wait; + + TRACE_CUR("queue %d: looking to steal someone...\n", + kfmlp_get_idx(sem, my_queue)); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + next = (sem->aff_obs) ? + sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) : + kfmlp_select_hp_steal(sem, &wait, &to_steal_from); +#else + next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from); +#endif + + if(next) { + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid, + kfmlp_get_idx(sem, to_steal_from)); + + kfmlp_steal_node(sem, my_queue, wait, to_steal_from); + + goto RETRY; // will succeed this time. + } + else { + TRACE_CUR("queue %d: no one to steal.\n", + kfmlp_get_idx(sem, my_queue)); + } + } + + spin_unlock_irqrestore(&sem->lock, flags); + +out: + return err; +} + +int kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + kfmlp_unlock(l); + + return 0; +} + +void kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + + + +struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) + { + return(NULL); + } + if(__copy_from_user(&num_resources, args, sizeof(num_resources))) + { + return(NULL); + } + if(num_resources < 1) + { + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + return(NULL); + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + return(NULL); + } + + sem->litmus_lock.ops = ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + sem->aff_obs = NULL; +#endif + + return &sem->litmus_lock; +} + + + + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + +static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica) +{ + int gpu = replica % aff->nr_rsrc; + return gpu; +} + +static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica) +{ + int gpu = __replica_to_gpu(aff, replica) + aff->offset; + return gpu; +} + +static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu) +{ + int replica = gpu - aff->offset; + return replica; +} + + +int kfmlp_aff_obs_close(struct affinity_observer* obs) +{ + return 0; +} + +void kfmlp_aff_obs_free(struct affinity_observer* obs) +{ + struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs); + kfree(kfmlp_aff->nr_cur_users_on_rsrc); + kfree(kfmlp_aff->q_info); + kfree(kfmlp_aff); +} + +static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops, + struct kfmlp_affinity_ops* kfmlp_ops, + void* __user args) +{ + struct kfmlp_affinity* kfmlp_aff; + struct gpu_affinity_observer_args aff_args; + struct kfmlp_semaphore* sem; + int i; + unsigned long flags; + + if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { + return(NULL); + } + if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { + return(NULL); + } + + sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); + + if(sem->litmus_lock.type != KFMLP_SEM) { + TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); + return(NULL); + } + + if((aff_args.nr_simult_users <= 0) || + (sem->num_resources%aff_args.nr_simult_users != 0)) { + TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " + "(%d) per replica. #replicas should be evenly divisible " + "by #simult_users.\n", + sem->litmus_lock.ident, + sem->num_resources, + aff_args.nr_simult_users); + return(NULL); + } + + if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { + TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", + NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); +// return(NULL); + } + + kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); + if(!kfmlp_aff) { + return(NULL); + } + + kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL); + if(!kfmlp_aff->q_info) { + kfree(kfmlp_aff); + return(NULL); + } + + kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); + if(!kfmlp_aff->nr_cur_users_on_rsrc) { + kfree(kfmlp_aff->q_info); + kfree(kfmlp_aff); + return(NULL); + } + + affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs); + + kfmlp_aff->ops = kfmlp_ops; + kfmlp_aff->offset = aff_args.replica_to_gpu_offset; + kfmlp_aff->nr_simult = aff_args.nr_simult_users; + kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; + + memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); + + for(i = 0; i < sem->num_resources; ++i) { + kfmlp_aff->q_info[i].q = &sem->queues[i]; + kfmlp_aff->q_info[i].estimated_len = 0; + + // multiple q_info's will point to the same resource (aka GPU) if + // aff_args.nr_simult_users > 1 + kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)]; + } + + // attach observer to the lock + spin_lock_irqsave(&sem->lock, flags); + sem->aff_obs = kfmlp_aff; + spin_unlock_irqrestore(&sem->lock, flags); + + return &kfmlp_aff->obs; +} + + + + +static int gpu_replica_to_resource(struct kfmlp_affinity* aff, + struct kfmlp_queue* fq) { + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq))); +} + + +// Smart KFMLP Affinity + +//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) +//{ +// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); +// struct kfmlp_queue_info *shortest = &aff->q_info[0]; +// int i; +// +// for(i = 1; i < sem->num_resources; ++i) { +// if(aff->q_info[i].estimated_len < shortest->estimated_len) { +// shortest = &aff->q_info[i]; +// } +// } +// +// return(shortest); +//} + +struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + lt_t min_len; + int min_nr_users; + struct kfmlp_queue_info *shortest; + struct kfmlp_queue *to_enqueue; + int i; + int affinity_gpu; + + // simply pick the shortest queue if, we have no affinity, or we have + // affinity with the shortest + if(unlikely(tsk_rt(t)->last_gpu < 0)) { + affinity_gpu = aff->offset; // first gpu + TRACE_CUR("no affinity\n"); + } + else { + affinity_gpu = tsk_rt(t)->last_gpu; + } + + // all things being equal, let's start with the queue with which we have + // affinity. this helps us maintain affinity even when we don't have + // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) + shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; + +// if(shortest == aff->shortest_queue) { +// TRACE_CUR("special case: have affinity with shortest queue\n"); +// goto out; +// } + + min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); + min_nr_users = *(shortest->nr_cur_users); + + TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", + get_gpu_estimate(t, MIG_LOCAL), + kfmlp_get_idx(sem, shortest->q), + min_len); + + for(i = 0; i < sem->num_resources; ++i) { + if(&aff->q_info[i] != shortest) { + + lt_t est_len = + aff->q_info[i].estimated_len + + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); + + // queue is smaller, or they're equal and the other has a smaller number + // of total users. + // + // tie-break on the shortest number of simult users. this only kicks in + // when there are more than 1 empty queues. + if((est_len < min_len) || + ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { + shortest = &aff->q_info[i]; + min_len = est_len; + min_nr_users = *(aff->q_info[i].nr_cur_users); + } + + TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))), + kfmlp_get_idx(sem, aff->q_info[i].q), + est_len); + } + } + + to_enqueue = shortest->q; + TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", + kfmlp_get_idx(sem, to_enqueue), + kfmlp_get_idx(sem, sem->shortest_queue)); + + return to_enqueue; +} + +struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + + // For now, just steal highest priority waiter + // TODO: Implement affinity-aware stealing. + + return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); +} + + +void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + struct kfmlp_queue_info *info = &aff->q_info[replica]; + lt_t est_time; + lt_t est_len_before; + + if(current == t) { + tsk_rt(t)->suspend_gpu_tracker_on_block = 1; + } + + est_len_before = info->estimated_len; + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + info->estimated_len += est_time; + + TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", + kfmlp_get_idx(sem, info->q), + est_len_before, est_time, + info->estimated_len); + +// if(aff->shortest_queue == info) { +// // we may no longer be the shortest +// aff->shortest_queue = kfmlp_aff_find_shortest(aff); +// +// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", +// kfmlp_get_idx(sem, aff->shortest_queue->q), +// aff->shortest_queue->q->count, +// aff->shortest_queue->estimated_len); +// } +} + +void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + struct kfmlp_queue_info *info = &aff->q_info[replica]; + lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + + if(est_time > info->estimated_len) { + WARN_ON(1); + info->estimated_len = 0; + } + else { + info->estimated_len -= est_time; + } + + TRACE_CUR("fq %d est len is now %llu\n", + kfmlp_get_idx(sem, info->q), + info->estimated_len); + + // check to see if we're the shortest queue now. +// if((aff->shortest_queue != info) && +// (aff->shortest_queue->estimated_len > info->estimated_len)) { +// +// aff->shortest_queue = info; +// +// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", +// kfmlp_get_idx(sem, info->q), +// info->q->count, +// info->estimated_len); +// } +} + +void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + + tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration + + TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", + t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); + + // count the number or resource holders + ++(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 1, t); // register + + tsk_rt(t)->suspend_gpu_tracker_on_block = 0; + reset_gpu_tracker(t); + start_gpu_tracker(t); +} + +void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + lt_t est_time; + + stop_gpu_tracker(t); // stop the tracker before we do anything else. + + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + + tsk_rt(t)->last_gpu = gpu; + + // count the number or resource holders + --(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 0, t); // unregister + + // update estimates + update_gpu_estimate(t, get_gpu_time(t)); + + TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n", + t->comm, t->pid, gpu, + get_gpu_time(t), + est_time, + (long long)get_gpu_time(t) - (long long)est_time); +} + +struct kfmlp_affinity_ops gpu_kfmlp_affinity = +{ + .advise_enqueue = gpu_kfmlp_advise_enqueue, + .advise_steal = gpu_kfmlp_advise_steal, + .notify_enqueue = gpu_kfmlp_notify_enqueue, + .notify_dequeue = gpu_kfmlp_notify_dequeue, + .notify_acquired = gpu_kfmlp_notify_acquired, + .notify_freed = gpu_kfmlp_notify_freed, + .replica_to_resource = gpu_replica_to_resource, +}; + +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops, + void* __user args) +{ + return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args); +} + + + + + + + + +// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration) + +struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int min_count; + int min_nr_users; + struct kfmlp_queue_info *shortest; + struct kfmlp_queue *to_enqueue; + int i; + +// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); + + shortest = &aff->q_info[0]; + min_count = shortest->q->count; + min_nr_users = *(shortest->nr_cur_users); + + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", + kfmlp_get_idx(sem, shortest->q), + shortest->q->count, + min_nr_users); + + for(i = 1; i < sem->num_resources; ++i) { + int len = aff->q_info[i].q->count; + + // queue is smaller, or they're equal and the other has a smaller number + // of total users. + // + // tie-break on the shortest number of simult users. this only kicks in + // when there are more than 1 empty queues. + if((len < min_count) || + ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { + shortest = &aff->q_info[i]; + min_count = shortest->q->count; + min_nr_users = *(aff->q_info[i].nr_cur_users); + } + + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", + kfmlp_get_idx(sem, aff->q_info[i].q), + aff->q_info[i].q->count, + *(aff->q_info[i].nr_cur_users)); + } + + to_enqueue = shortest->q; + TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", + kfmlp_get_idx(sem, to_enqueue), + kfmlp_get_idx(sem, sem->shortest_queue)); + + return to_enqueue; +} + +struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); +// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n"); + return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); +} + +void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ +// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n"); +} + +void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ +// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n"); +} + +void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + +// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n"); + + // count the number or resource holders + ++(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 1, t); // register +} + +void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = replica_to_gpu(aff, replica); + +// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n"); + // count the number or resource holders + --(*(aff->q_info[replica].nr_cur_users)); + + reg_nv_device(gpu, 0, t); // unregister +} + +struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity = +{ + .advise_enqueue = simple_gpu_kfmlp_advise_enqueue, + .advise_steal = simple_gpu_kfmlp_advise_steal, + .notify_enqueue = simple_gpu_kfmlp_notify_enqueue, + .notify_dequeue = simple_gpu_kfmlp_notify_dequeue, + .notify_acquired = simple_gpu_kfmlp_notify_acquired, + .notify_freed = simple_gpu_kfmlp_notify_freed, + .replica_to_resource = gpu_replica_to_resource, +}; + +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, + void* __user args) +{ + return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args); +} + +#endif + diff --git a/litmus/litmus.c b/litmus/litmus.c index 301390148d02..d1f836c8af6e 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -21,6 +21,10 @@ #include #endif +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + /* Number of RT tasks that exist in the system */ atomic_t rt_task_count = ATOMIC_INIT(0); static DEFINE_RAW_SPINLOCK(task_transition_lock); @@ -51,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn) struct release_heap* release_heap_alloc(int gfp_flags); void release_heap_free(struct release_heap* rh); +#ifdef CONFIG_LITMUS_NVIDIA +/* + * sys_register_nv_device + * @nv_device_id: The Nvidia device id that the task want to register + * @reg_action: set to '1' to register the specified device. zero otherwise. + * Syscall for register task's designated nvidia device into NV_DEVICE_REG array + * Returns EFAULT if nv_device_id is out of range. + * 0 if success + */ +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) +{ + /* register the device to caller (aka 'current') */ + return(reg_nv_device(nv_device_id, reg_action, current)); +} +#else +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) +{ + return(-EINVAL); +} +#endif + + /* * sys_set_task_rt_param * @pid: Pid of the task which scheduling parameters must be changed @@ -269,6 +295,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job) return retval; } + /* sys_null_call() is only used for determining raw system call * overheads (kernel entry, kernel exit). It has no useful side effects. * If ts is non-NULL, then the current Feather-Trace time is recorded. @@ -286,12 +313,42 @@ asmlinkage long sys_null_call(cycles_t __user *ts) return ret; } + +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) +void init_gpu_affinity_state(struct task_struct* p) +{ + // under-damped + //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); + //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); + + // emperical; + p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); + p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); + + p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000); + p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000); + + p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000); + p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000); + + p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); + p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); + + p->rt_param.gpu_migration = MIG_NONE; + p->rt_param.last_gpu = -1; +} +#endif + /* p is a real-time task. Re-init its state as a best-effort task. */ static void reinit_litmus_state(struct task_struct* p, int restore) { struct rt_task user_config = {}; void* ctrl_page = NULL; +#ifdef CONFIG_LITMUS_NESTED_LOCKING + binheap_order_t prio_order = NULL; +#endif + if (restore) { /* Safe user-space provided configuration data. * and allocated page. */ @@ -299,11 +356,38 @@ static void reinit_litmus_state(struct task_struct* p, int restore) ctrl_page = p->rt_param.ctrl_page; } +#ifdef CONFIG_LITMUS_NESTED_LOCKING + prio_order = p->rt_param.hp_blocked_tasks.compare; +#endif + /* We probably should not be inheriting any task's priority * at this point in time. */ WARN_ON(p->rt_param.inh_task); +#ifdef CONFIG_LITMUS_NESTED_LOCKING + WARN_ON(p->rt_param.blocked_lock); + WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks)); +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* We probably should not have any tasklets executing for + * us at this time. + */ + WARN_ON(p->rt_param.cur_klitirqd); + WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD); + + if(p->rt_param.cur_klitirqd) + flush_pending(p->rt_param.cur_klitirqd, p); + + if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD) + up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem); +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + WARN_ON(p->rt_param.held_gpus != 0); +#endif + /* Cleanup everything else. */ memset(&p->rt_param, 0, sizeof(p->rt_param)); @@ -312,6 +396,15 @@ static void reinit_litmus_state(struct task_struct* p, int restore) p->rt_param.task_params = user_config; p->rt_param.ctrl_page = ctrl_page; } + +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) + init_gpu_affinity_state(p); +#endif + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order); + raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock); +#endif } long litmus_admit_task(struct task_struct* tsk) @@ -358,6 +451,26 @@ long litmus_admit_task(struct task_struct* tsk) bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); } + +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(tsk)->nv_int_count, 0); +#endif +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) + init_gpu_affinity_state(tsk); +#endif +#ifdef CONFIG_LITMUS_NESTED_LOCKING + tsk_rt(tsk)->blocked_lock = NULL; + raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock); + //INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order); // done by scheduler +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy thread off by default */ + tsk_rt(tsk)is_proxy_thread = 0; + tsk_rt(tsk)cur_klitirqd = NULL; + mutex_init(&tsk_rt(tsk)->klitirqd_sem); + atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD); +#endif + retval = litmus->admit_task(tsk); if (!retval) { @@ -403,7 +516,7 @@ static void synch_on_plugin_switch(void* info) */ int switch_sched_plugin(struct sched_plugin* plugin) { - unsigned long flags; + //unsigned long flags; int ret = 0; BUG_ON(!plugin); @@ -417,8 +530,15 @@ int switch_sched_plugin(struct sched_plugin* plugin) while (atomic_read(&cannot_use_plugin) < num_online_cpus()) cpu_relax(); +#ifdef CONFIG_LITMUS_SOFTIRQD + if(!klitirqd_is_dead()) + { + kill_klitirqd(); + } +#endif + /* stop task transitions */ - raw_spin_lock_irqsave(&task_transition_lock, flags); + //raw_spin_lock_irqsave(&task_transition_lock, flags); /* don't switch if there are active real-time tasks */ if (atomic_read(&rt_task_count) == 0) { @@ -436,7 +556,7 @@ int switch_sched_plugin(struct sched_plugin* plugin) } else ret = -EBUSY; out: - raw_spin_unlock_irqrestore(&task_transition_lock, flags); + //raw_spin_unlock_irqrestore(&task_transition_lock, flags); atomic_set(&cannot_use_plugin, 0); return ret; } diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c new file mode 100644 index 000000000000..300571a81bbd --- /dev/null +++ b/litmus/litmus_pai_softirq.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + + + +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + ret = litmus->enqueue_pai_tasklet(t); + + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_schedule); + + + +// failure causes default Linux handling. +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + return(ret); +} +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); + + +// failure causes default Linux handling. +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + return(ret); +} +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); + + +// failure causes default Linux handling. +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + return(ret); +} +EXPORT_SYMBOL(__litmus_schedule_work); + diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c index 4bf725a36c9c..9ab7e015a3c1 100644 --- a/litmus/litmus_proc.c +++ b/litmus/litmus_proc.c @@ -19,12 +19,19 @@ static struct proc_dir_entry *litmus_dir = NULL, *plugs_dir = NULL, #ifdef CONFIG_RELEASE_MASTER *release_master_file = NULL, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + *klitirqd_file = NULL, #endif *plugs_file = NULL; /* in litmus/sync.c */ int count_tasks_waiting_for_release(void); +extern int proc_read_klitirqd_stats(char *page, char **start, + off_t off, int count, + int *eof, void *data); + static int proc_read_stats(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -161,6 +168,12 @@ int __init init_litmus_proc(void) release_master_file->write_proc = proc_write_release_master; #endif +#ifdef CONFIG_LITMUS_SOFTIRQD + klitirqd_file = + create_proc_read_entry("klitirqd_stats", 0444, litmus_dir, + proc_read_klitirqd_stats, NULL); +#endif + stat_file = create_proc_read_entry("stats", 0444, litmus_dir, proc_read_stats, NULL); @@ -187,6 +200,10 @@ void exit_litmus_proc(void) remove_proc_entry("stats", litmus_dir); if (curr_file) remove_proc_entry("active_plugin", litmus_dir); +#ifdef CONFIG_LITMUS_SOFTIRQD + if (klitirqd_file) + remove_proc_entry("klitirqd_stats", litmus_dir); +#endif #ifdef CONFIG_RELEASE_MASTER if (release_master_file) remove_proc_entry("release_master", litmus_dir); diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c new file mode 100644 index 000000000000..9f7d9da5facb --- /dev/null +++ b/litmus/litmus_softirq.c @@ -0,0 +1,1582 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +/* TODO: Remove unneeded mb() and other barriers. */ + + +/* counts number of daemons ready to handle litmus irqs. */ +static atomic_t num_ready_klitirqds = ATOMIC_INIT(0); + +enum pending_flags +{ + LIT_TASKLET_LOW = 0x1, + LIT_TASKLET_HI = LIT_TASKLET_LOW<<1, + LIT_WORK = LIT_TASKLET_HI<<1 +}; + +/* only support tasklet processing for now. */ +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; + +struct klitirqd_info +{ + struct task_struct* klitirqd; + struct task_struct* current_owner; + int terminating; + + + raw_spinlock_t lock; + + u32 pending; + atomic_t num_hi_pending; + atomic_t num_low_pending; + atomic_t num_work_pending; + + /* in order of priority */ + struct tasklet_head pending_tasklets_hi; + struct tasklet_head pending_tasklets; + struct list_head worklist; +}; + +/* one list for each klitirqd */ +static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD]; + + + + + +int proc_read_klitirqd_stats(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + int len = snprintf(page, PAGE_SIZE, + "num ready klitirqds: %d\n\n", + atomic_read(&num_ready_klitirqds)); + + if(klitirqd_is_ready()) + { + int i; + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + len += + snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */ + "klitirqd_th%d: %s/%d\n" + "\tcurrent_owner: %s/%d\n" + "\tpending: %x\n" + "\tnum hi: %d\n" + "\tnum low: %d\n" + "\tnum work: %d\n\n", + i, + klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid, + (klitirqds[i].current_owner != NULL) ? + klitirqds[i].current_owner->comm : "(null)", + (klitirqds[i].current_owner != NULL) ? + klitirqds[i].current_owner->pid : 0, + klitirqds[i].pending, + atomic_read(&klitirqds[i].num_hi_pending), + atomic_read(&klitirqds[i].num_low_pending), + atomic_read(&klitirqds[i].num_work_pending)); + } + } + + return(len); +} + + + + + +#if 0 +static atomic_t dump_id = ATOMIC_INIT(0); + +static void __dump_state(struct klitirqd_info* which, const char* caller) +{ + struct tasklet_struct* list; + + int id = atomic_inc_return(&dump_id); + + //if(in_interrupt()) + { + if(which->current_owner) + { + TRACE("(id: %d caller: %s)\n" + "klitirqd: %s/%d\n" + "current owner: %s/%d\n" + "pending: %x\n", + id, caller, + which->klitirqd->comm, which->klitirqd->pid, + which->current_owner->comm, which->current_owner->pid, + which->pending); + } + else + { + TRACE("(id: %d caller: %s)\n" + "klitirqd: %s/%d\n" + "current owner: %p\n" + "pending: %x\n", + id, caller, + which->klitirqd->comm, which->klitirqd->pid, + NULL, + which->pending); + } + + list = which->pending_tasklets.head; + while(list) + { + struct tasklet_struct *t = list; + list = list->next; /* advance */ + if(t->owner) + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid); + else + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL); + } + } +} + +static void dump_state(struct klitirqd_info* which, const char* caller) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&which->lock, flags); + __dump_state(which, caller); + raw_spin_unlock_irqrestore(&which->lock, flags); +} +#endif + + +/* forward declarations */ +static void ___litmus_tasklet_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup); +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup); +static void ___litmus_schedule_work(struct work_struct *w, + struct klitirqd_info *which, + int wakeup); + + + +inline unsigned int klitirqd_id(struct task_struct* tsk) +{ + int i; + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(klitirqds[i].klitirqd == tsk) + { + return i; + } + } + + BUG(); + + return 0; +} + + +inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_TASKLET_HI); +} + +inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_TASKLET_LOW); +} + +inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_WORK); +} + +inline static u32 litirq_pending_irqoff(struct klitirqd_info* which) +{ + return(which->pending); +} + + +inline static u32 litirq_pending(struct klitirqd_info* which) +{ + unsigned long flags; + u32 pending; + + raw_spin_lock_irqsave(&which->lock, flags); + pending = litirq_pending_irqoff(which); + raw_spin_unlock_irqrestore(&which->lock, flags); + + return pending; +}; + +inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner) +{ + unsigned long flags; + u32 pending; + + raw_spin_lock_irqsave(&which->lock, flags); + pending = litirq_pending_irqoff(which); + if(pending) + { + if(which->current_owner != owner) + { + pending = 0; // owner switch! + } + } + raw_spin_unlock_irqrestore(&which->lock, flags); + + return pending; +} + + +inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which, + struct mutex** sem, + struct task_struct** t) +{ + unsigned long flags; + u32 pending; + + /* init values */ + *sem = NULL; + *t = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + pending = litirq_pending_irqoff(which); + if(pending) + { + if(which->current_owner != NULL) + { + *t = which->current_owner; + *sem = &tsk_rt(which->current_owner)->klitirqd_sem; + } + else + { + BUG(); + } + } + raw_spin_unlock_irqrestore(&which->lock, flags); + + if(likely(*sem)) + { + return pending; + } + else + { + return 0; + } +} + +/* returns true if the next piece of work to do is from a different owner. + */ +static int tasklet_ownership_change( + struct klitirqd_info* which, + enum pending_flags taskletQ) +{ + /* this function doesn't have to look at work objects since they have + priority below tasklets. */ + + unsigned long flags; + int ret = 0; + + raw_spin_lock_irqsave(&which->lock, flags); + + switch(taskletQ) + { + case LIT_TASKLET_HI: + if(litirq_pending_hi_irqoff(which)) + { + ret = (which->pending_tasklets_hi.head->owner != + which->current_owner); + } + break; + case LIT_TASKLET_LOW: + if(litirq_pending_low_irqoff(which)) + { + ret = (which->pending_tasklets.head->owner != + which->current_owner); + } + break; + default: + break; + } + + raw_spin_unlock_irqrestore(&which->lock, flags); + + TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret); + + return ret; +} + + +static void __reeval_prio(struct klitirqd_info* which) +{ + struct task_struct* next_owner = NULL; + struct task_struct* klitirqd = which->klitirqd; + + /* Check in prio-order */ + u32 pending = litirq_pending_irqoff(which); + + //__dump_state(which, "__reeval_prio: before"); + + if(pending) + { + if(pending & LIT_TASKLET_HI) + { + next_owner = which->pending_tasklets_hi.head->owner; + } + else if(pending & LIT_TASKLET_LOW) + { + next_owner = which->pending_tasklets.head->owner; + } + else if(pending & LIT_WORK) + { + struct work_struct* work = + list_first_entry(&which->worklist, struct work_struct, entry); + next_owner = work->owner; + } + } + + if(next_owner != which->current_owner) + { + struct task_struct* old_owner = which->current_owner; + + /* bind the next owner. */ + which->current_owner = next_owner; + mb(); + + if(next_owner != NULL) + { + if(!in_interrupt()) + { + TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + next_owner->comm, next_owner->pid); + } + else + { + TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + next_owner->comm, next_owner->pid); + } + + litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner); + } + else + { + if(likely(!in_interrupt())) + { + TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n", + __FUNCTION__, klitirqd->comm, klitirqd->pid); + } + else + { + // is this a bug? + TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n", + __FUNCTION__, klitirqd->comm, klitirqd->pid); + } + + BUG_ON(pending != 0); + litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL); + } + } + + //__dump_state(which, "__reeval_prio: after"); +} + +static void reeval_prio(struct klitirqd_info* which) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&which->lock, flags); + __reeval_prio(which); + raw_spin_unlock_irqrestore(&which->lock, flags); +} + + +static void wakeup_litirqd_locked(struct klitirqd_info* which) +{ + /* Interrupts are disabled: no need to stop preemption */ + if (which && which->klitirqd) + { + __reeval_prio(which); /* configure the proper priority */ + + if(which->klitirqd->state != TASK_RUNNING) + { + TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__, + which->klitirqd->comm, which->klitirqd->pid); + + wake_up_process(which->klitirqd); + } + } +} + + +static void do_lit_tasklet(struct klitirqd_info* which, + struct tasklet_head* pending_tasklets) +{ + unsigned long flags; + struct tasklet_struct *list; + atomic_t* count; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "do_lit_tasklet: before steal"); + + /* copy out the tasklets for our private use. */ + list = pending_tasklets->head; + pending_tasklets->head = NULL; + pending_tasklets->tail = &pending_tasklets->head; + + /* remove pending flag */ + which->pending &= (pending_tasklets == &which->pending_tasklets) ? + ~LIT_TASKLET_LOW : + ~LIT_TASKLET_HI; + + count = (pending_tasklets == &which->pending_tasklets) ? + &which->num_low_pending: + &which->num_hi_pending; + + //__dump_state(which, "do_lit_tasklet: after steal"); + + raw_spin_unlock_irqrestore(&which->lock, flags); + + + while(list) + { + struct tasklet_struct *t = list; + + /* advance, lest we forget */ + list = list->next; + + /* execute tasklet if it has my priority and is free */ + if ((t->owner == which->current_owner) && tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + + sched_trace_tasklet_begin(t->owner); + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + { + BUG(); + } + TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__); + t->func(t->data); + tasklet_unlock(t); + + atomic_dec(count); + + sched_trace_tasklet_end(t->owner, 0ul); + + continue; /* process more tasklets */ + } + tasklet_unlock(t); + } + + TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__); + + /* couldn't process tasklet. put it back at the end of the queue. */ + if(pending_tasklets == &which->pending_tasklets) + ___litmus_tasklet_schedule(t, which, 0); + else + ___litmus_tasklet_hi_schedule(t, which, 0); + } +} + + +// returns 1 if priorities need to be changed to continue processing +// pending tasklets. +static int do_litirq(struct klitirqd_info* which) +{ + u32 pending; + int resched = 0; + + if(in_interrupt()) + { + TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__); + return(0); + } + + if(which->klitirqd != current) + { + TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n", + __FUNCTION__, current->comm, current->pid, + which->klitirqd->comm, which->klitirqd->pid); + return(0); + } + + if(!is_realtime(current)) + { + TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n", + __FUNCTION__, current->policy); + return(0); + } + + + /* We only handle tasklets & work objects, no need for RCU triggers? */ + + pending = litirq_pending(which); + if(pending) + { + /* extract the work to do and do it! */ + if(pending & LIT_TASKLET_HI) + { + TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__); + do_lit_tasklet(which, &which->pending_tasklets_hi); + resched = tasklet_ownership_change(which, LIT_TASKLET_HI); + + if(resched) + { + TRACE_CUR("%s: HI tasklets of another owner remain. " + "Skipping any LOW tasklets.\n", __FUNCTION__); + } + } + + if(!resched && (pending & LIT_TASKLET_LOW)) + { + TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__); + do_lit_tasklet(which, &which->pending_tasklets); + resched = tasklet_ownership_change(which, LIT_TASKLET_LOW); + + if(resched) + { + TRACE_CUR("%s: LOW tasklets of another owner remain. " + "Skipping any work objects.\n", __FUNCTION__); + } + } + } + + return(resched); +} + + +static void do_work(struct klitirqd_info* which) +{ + unsigned long flags; + work_func_t f; + struct work_struct* work; + + // only execute one work-queue item to yield to tasklets. + // ...is this a good idea, or should we just batch them? + raw_spin_lock_irqsave(&which->lock, flags); + + if(!litirq_pending_work_irqoff(which)) + { + raw_spin_unlock_irqrestore(&which->lock, flags); + goto no_work; + } + + work = list_first_entry(&which->worklist, struct work_struct, entry); + list_del_init(&work->entry); + + if(list_empty(&which->worklist)) + { + which->pending &= ~LIT_WORK; + } + + raw_spin_unlock_irqrestore(&which->lock, flags); + + + + /* safe to read current_owner outside of lock since only this thread + may write to the pointer. */ + if(work->owner == which->current_owner) + { + TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__); + // do the work! + work_clear_pending(work); + f = work->func; + f(work); /* can't touch 'work' after this point, + the user may have freed it. */ + + atomic_dec(&which->num_work_pending); + } + else + { + TRACE_CUR("%s: Could not invoke work object. Requeuing.\n", + __FUNCTION__); + ___litmus_schedule_work(work, which, 0); + } + +no_work: + return; +} + + +static int set_litmus_daemon_sched(void) +{ + /* set up a daemon job that will never complete. + it should only ever run on behalf of another + real-time task. + + TODO: Transition to a new job whenever a + new tasklet is handled */ + + int ret = 0; + + struct rt_task tp = { + .exec_cost = 0, + .period = 1000000000, /* dummy 1 second period */ + .phase = 0, + .cpu = task_cpu(current), + .budget_policy = NO_ENFORCEMENT, + .cls = RT_CLASS_BEST_EFFORT + }; + + struct sched_param param = { .sched_priority = 0}; + + + /* set task params, mark as proxy thread, and init other data */ + tsk_rt(current)->task_params = tp; + tsk_rt(current)->is_proxy_thread = 1; + tsk_rt(current)->cur_klitirqd = NULL; + mutex_init(&tsk_rt(current)->klitirqd_sem); + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD); + + /* inform the OS we're SCHED_LITMUS -- + sched_setscheduler_nocheck() calls litmus_admit_task(). */ + sched_setscheduler_nocheck(current, SCHED_LITMUS, ¶m); + + return ret; +} + +static void enter_execution_phase(struct klitirqd_info* which, + struct mutex* sem, + struct task_struct* t) +{ + TRACE_CUR("%s: Trying to enter execution phase. " + "Acquiring semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + down_and_set_stat(current, HELD, sem); + TRACE_CUR("%s: Execution phase entered! " + "Acquired semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); +} + +static void exit_execution_phase(struct klitirqd_info* which, + struct mutex* sem, + struct task_struct* t) +{ + TRACE_CUR("%s: Exiting execution phase. " + "Releasing semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD) + { + up_and_set_stat(current, NOT_HELD, sem); + TRACE_CUR("%s: Execution phase exited! " + "Released semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + } + else + { + TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__); + } +} + +/* main loop for klitsoftirqd */ +static int run_klitirqd(void* unused) +{ + struct klitirqd_info* which = &klitirqds[klitirqd_id(current)]; + struct mutex* sem; + struct task_struct* owner; + + int rt_status = set_litmus_daemon_sched(); + + if(rt_status != 0) + { + TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__); + goto rt_failed; + } + + atomic_inc(&num_ready_klitirqds); + + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) + { + preempt_disable(); + if (!litirq_pending(which)) + { + /* sleep for work */ + TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n", + __FUNCTION__); + preempt_enable_no_resched(); + schedule(); + + if(kthread_should_stop()) /* bail out */ + { + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); + continue; + } + + preempt_disable(); + } + + __set_current_state(TASK_RUNNING); + + while (litirq_pending_and_sem_and_owner(which, &sem, &owner)) + { + int needs_resched = 0; + + preempt_enable_no_resched(); + + BUG_ON(sem == NULL); + + // wait to enter execution phase; wait for 'current_owner' to block. + enter_execution_phase(which, sem, owner); + + if(kthread_should_stop()) + { + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); + break; + } + + preempt_disable(); + + /* Double check that there's still pending work and the owner hasn't + * changed. Pending items may have been flushed while we were sleeping. + */ + if(litirq_pending_with_owner(which, owner)) + { + TRACE_CUR("%s: Executing tasklets and/or work objects.\n", + __FUNCTION__); + + needs_resched = do_litirq(which); + + preempt_enable_no_resched(); + + // work objects are preemptible. + if(!needs_resched) + { + do_work(which); + } + + // exit execution phase. + exit_execution_phase(which, sem, owner); + + TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__); + reeval_prio(which); /* check if we need to change priority here */ + } + else + { + TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n", + __FUNCTION__, + owner->comm, owner->pid); + preempt_enable_no_resched(); + + // exit execution phase. + exit_execution_phase(which, sem, owner); + } + + cond_resched(); + preempt_disable(); + } + preempt_enable(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + atomic_dec(&num_ready_klitirqds); + +rt_failed: + litmus_exit_task(current); + + return rt_status; +} + + +struct klitirqd_launch_data +{ + int* cpu_affinity; + struct work_struct work; +}; + +/* executed by a kworker from workqueues */ +static void launch_klitirqd(struct work_struct *work) +{ + int i; + + struct klitirqd_launch_data* launch_data = + container_of(work, struct klitirqd_launch_data, work); + + TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + /* create the daemon threads */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(launch_data->cpu_affinity) + { + klitirqds[i].klitirqd = + kthread_create( + run_klitirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)(long long)launch_data->cpu_affinity[i], + "klitirqd_th%d/%d", + i, + launch_data->cpu_affinity[i]); + + /* litmus will put is in the right cluster. */ + kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]); + } + else + { + klitirqds[i].klitirqd = + kthread_create( + run_klitirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)(long long)(-1), + "klitirqd_th%d", + i); + } + } + + TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + /* unleash the daemons */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + wake_up_process(klitirqds[i].klitirqd); + } + + if(launch_data->cpu_affinity) + kfree(launch_data->cpu_affinity); + kfree(launch_data); +} + + +void spawn_klitirqd(int* affinity) +{ + int i; + struct klitirqd_launch_data* delayed_launch; + + if(atomic_read(&num_ready_klitirqds) != 0) + { + TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n"); + return; + } + + /* init the tasklet & work queues */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + klitirqds[i].terminating = 0; + klitirqds[i].pending = 0; + + klitirqds[i].num_hi_pending.counter = 0; + klitirqds[i].num_low_pending.counter = 0; + klitirqds[i].num_work_pending.counter = 0; + + klitirqds[i].pending_tasklets_hi.head = NULL; + klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head; + + klitirqds[i].pending_tasklets.head = NULL; + klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head; + + INIT_LIST_HEAD(&klitirqds[i].worklist); + + raw_spin_lock_init(&klitirqds[i].lock); + } + + /* wait to flush the initializations to memory since other threads + will access it. */ + mb(); + + /* tell a work queue to launch the threads. we can't make scheduling + calls since we're in an atomic state. */ + TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__); + delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC); + if(affinity) + { + delayed_launch->cpu_affinity = + kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC); + + memcpy(delayed_launch->cpu_affinity, affinity, + sizeof(int)*NR_LITMUS_SOFTIRQD); + } + else + { + delayed_launch->cpu_affinity = NULL; + } + INIT_WORK(&delayed_launch->work, launch_klitirqd); + schedule_work(&delayed_launch->work); +} + + +void kill_klitirqd(void) +{ + if(!klitirqd_is_dead()) + { + int i; + + TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(klitirqds[i].terminating != 1) + { + klitirqds[i].terminating = 1; + mb(); /* just to be sure? */ + flush_pending(klitirqds[i].klitirqd, NULL); + + /* signal termination */ + kthread_stop(klitirqds[i].klitirqd); + } + } + } +} + + +int klitirqd_is_ready(void) +{ + return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD); +} + +int klitirqd_is_dead(void) +{ + return(atomic_read(&num_ready_klitirqds) == 0); +} + + +struct task_struct* get_klitirqd(unsigned int k_id) +{ + return(klitirqds[k_id].klitirqd); +} + + +void flush_pending(struct task_struct* klitirqd_thread, + struct task_struct* owner) +{ + unsigned int k_id = klitirqd_id(klitirqd_thread); + struct klitirqd_info *which = &klitirqds[k_id]; + + unsigned long flags; + struct tasklet_struct *list; + + u32 work_flushed = 0; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "flush_pending: before"); + + // flush hi tasklets. + if(litirq_pending_hi_irqoff(which)) + { + which->pending &= ~LIT_TASKLET_HI; + + list = which->pending_tasklets_hi.head; + which->pending_tasklets_hi.head = NULL; + which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head; + + TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__); + + while(list) + { + struct tasklet_struct *t = list; + list = list->next; + + if(likely((t->owner == owner) || (owner == NULL))) + { + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) + { + BUG(); + } + + work_flushed |= LIT_TASKLET_HI; + + t->owner = NULL; + + // WTF? + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_hi_pending); + ___tasklet_hi_schedule(t); + } + else + { + TRACE("%s: dropped hi tasklet??\n", __FUNCTION__); + BUG(); + } + } + else + { + TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__); + // put back on queue. + ___litmus_tasklet_hi_schedule(t, which, 0); + } + } + } + + // flush low tasklets. + if(litirq_pending_low_irqoff(which)) + { + which->pending &= ~LIT_TASKLET_LOW; + + list = which->pending_tasklets.head; + which->pending_tasklets.head = NULL; + which->pending_tasklets.tail = &which->pending_tasklets.head; + + TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__); + + while(list) + { + struct tasklet_struct *t = list; + list = list->next; + + if(likely((t->owner == owner) || (owner == NULL))) + { + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) + { + BUG(); + } + + work_flushed |= LIT_TASKLET_LOW; + + t->owner = NULL; + sched_trace_tasklet_end(owner, 1ul); + + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_low_pending); + ___tasklet_schedule(t); + } + else + { + TRACE("%s: dropped tasklet??\n", __FUNCTION__); + BUG(); + } + } + else + { + TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__); + // put back on queue + ___litmus_tasklet_schedule(t, which, 0); + } + } + } + + // flush work objects + if(litirq_pending_work_irqoff(which)) + { + which->pending &= ~LIT_WORK; + + TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__); + + while(!list_empty(&which->worklist)) + { + struct work_struct* work = + list_first_entry(&which->worklist, struct work_struct, entry); + list_del_init(&work->entry); + + if(likely((work->owner == owner) || (owner == NULL))) + { + work_flushed |= LIT_WORK; + atomic_dec(&which->num_work_pending); + + work->owner = NULL; + sched_trace_work_end(owner, current, 1ul); + __schedule_work(work); + } + else + { + TRACE("%s: Could not flush a work object.\n", __FUNCTION__); + // put back on queue + ___litmus_schedule_work(work, which, 0); + } + } + } + + //__dump_state(which, "flush_pending: after (before reeval prio)"); + + + mb(); /* commit changes to pending flags */ + + /* reset the scheduling priority */ + if(work_flushed) + { + __reeval_prio(which); + + /* Try to offload flushed tasklets to Linux's ksoftirqd. */ + if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI)) + { + wakeup_softirqd(); + } + } + else + { + TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__); + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + + + + +static void ___litmus_tasklet_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + t->next = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "___litmus_tasklet_schedule: before queuing"); + + *(which->pending_tasklets.tail) = t; + which->pending_tasklets.tail = &t->next; + + old_pending = which->pending; + which->pending |= LIT_TASKLET_LOW; + + atomic_inc(&which->num_low_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wake up the klitirqd */ + } + + //__dump_state(which, "___litmus_tasklet_schedule: after queuing"); + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + /* Can't accept tasklets while we're processing a workqueue + because they're handled by the same thread. This case is + very RARE. + + TODO: Use a separate thread for work objects!!!!!! + */ + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; + ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1); + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_schedule); + + +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + t->next = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + *(which->pending_tasklets_hi.tail) = t; + which->pending_tasklets_hi.tail = &t->next; + + old_pending = which->pending; + which->pending |= LIT_TASKLET_HI; + + atomic_inc(&which->num_hi_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wake up the klitirqd */ + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; + ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1); + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); + + +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + u32 old_pending; + + BUG_ON(!irqs_disabled()); + + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + raw_spin_lock(&klitirqds[k_id].lock); + + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; // success! + + t->next = klitirqds[k_id].pending_tasklets_hi.head; + klitirqds[k_id].pending_tasklets_hi.head = t; + + old_pending = klitirqds[k_id].pending; + klitirqds[k_id].pending |= LIT_TASKLET_HI; + + atomic_inc(&klitirqds[k_id].num_hi_pending); + + mb(); + + if(!old_pending) + wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */ + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + + raw_spin_unlock(&klitirqds[k_id].lock); + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); + + + +static void ___litmus_schedule_work(struct work_struct *w, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + raw_spin_lock_irqsave(&which->lock, flags); + + work_pending(w); + list_add_tail(&w->entry, &which->worklist); + + old_pending = which->pending; + which->pending |= LIT_WORK; + + atomic_inc(&which->num_work_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wakeup the klitirqd */ + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) +{ + int ret = 1; /* assume success */ + if(unlikely(w->owner == NULL) || !is_realtime(w->owner)) + { + TRACE("%s: No owner associated with this work object!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%u!\n", k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + ___litmus_schedule_work(w, &klitirqds[k_id], 1); + else + ret = 0; + return(ret); +} +EXPORT_SYMBOL(__litmus_schedule_work); + + +static int set_klitirqd_sem_status(unsigned long stat) +{ + TRACE_CUR("SETTING STATUS FROM %d TO %d\n", + atomic_read(&tsk_rt(current)->klitirqd_sem_stat), + stat); + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat); + //mb(); + + return(0); +} + +static int set_klitirqd_sem_status_if_not_held(unsigned long stat) +{ + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD) + { + return(set_klitirqd_sem_status(stat)); + } + return(-1); +} + + +void __down_and_reset_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_reset, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif + + mutex_lock_sfx(sem, + set_klitirqd_sem_status_if_not_held, to_reset, + set_klitirqd_sem_status, to_set); +#if 0 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif +} + +void down_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif + + mutex_lock_sfx(sem, + NULL, 0, + set_klitirqd_sem_status, to_set); + +#if 0 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif +} + + +void up_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n", + __FUNCTION__, + task->comm, task->pid); +#endif + + mutex_unlock_sfx(sem, NULL, 0, + set_klitirqd_sem_status, to_set); + +#if 0 + TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n", + __FUNCTION__, + task->comm, task->pid); +#endif +} + + + +void release_klitirqd_lock(struct task_struct* t) +{ + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD)) + { + struct mutex* sem; + struct task_struct* owner = t; + + if(t->state == TASK_RUNNING) + { + TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n"); + return; + } + + if(likely(!tsk_rt(t)->is_proxy_thread)) + { + sem = &tsk_rt(t)->klitirqd_sem; + } + else + { + unsigned int k_id = klitirqd_id(t); + owner = klitirqds[k_id].current_owner; + + BUG_ON(t != klitirqds[k_id].klitirqd); + + if(likely(owner)) + { + sem = &tsk_rt(owner)->klitirqd_sem; + } + else + { + BUG(); + + // We had the rug pulled out from under us. Abort attempt + // to reacquire the lock since our client no longer needs us. + TRACE_CUR("HUH?! How did this happen?\n"); + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + return; + } + } + + //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid); + up_and_set_stat(t, NEED_TO_REACQUIRE, sem); + //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid); + } + /* + else if(is_realtime(t)) + { + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + } + */ +} + +int reacquire_klitirqd_lock(struct task_struct* t) +{ + int ret = 0; + + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE)) + { + struct mutex* sem; + struct task_struct* owner = t; + + if(likely(!tsk_rt(t)->is_proxy_thread)) + { + sem = &tsk_rt(t)->klitirqd_sem; + } + else + { + unsigned int k_id = klitirqd_id(t); + //struct task_struct* owner = klitirqds[k_id].current_owner; + owner = klitirqds[k_id].current_owner; + + BUG_ON(t != klitirqds[k_id].klitirqd); + + if(likely(owner)) + { + sem = &tsk_rt(owner)->klitirqd_sem; + } + else + { + // We had the rug pulled out from under us. Abort attempt + // to reacquire the lock since our client no longer needs us. + TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n"); + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + return(0); + } + } + + //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid); + __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem); + //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid); + } + /* + else if(is_realtime(t)) + { + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + } + */ + + return(ret); +} + diff --git a/litmus/locking.c b/litmus/locking.c index 0c1aa6aa40b7..718a5a3281d7 100644 --- a/litmus/locking.c +++ b/litmus/locking.c @@ -4,6 +4,15 @@ #include #include +#include + +#ifdef CONFIG_LITMUS_DGL_SUPPORT +#include +#endif + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#endif static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); static int open_generic_lock(struct od_table_entry* entry, void* __user arg); @@ -17,6 +26,9 @@ struct fdso_ops generic_lock_ops = { .destroy = destroy_generic_lock }; +static atomic_t lock_id_gen = ATOMIC_INIT(0); + + static inline bool is_lock(struct od_table_entry* entry) { return entry->class == &generic_lock_ops; @@ -34,8 +46,21 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar int err; err = litmus->allocate_lock(&lock, type, arg); - if (err == 0) + if (err == 0) { +#ifdef CONFIG_LITMUS_NESTED_LOCKING + lock->nest.lock = lock; + lock->nest.hp_waiter_eff_prio = NULL; + + INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node); + if(!lock->nest.hp_waiter_ptr) { + TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in " + "most uses. (exception: IKGLP donors)\n"); + } +#endif + lock->type = type; + lock->ident = atomic_inc_return(&lock_id_gen); *obj_ref = lock; + } return err; } @@ -74,7 +99,8 @@ asmlinkage long sys_litmus_lock(int lock_od) entry = get_entry_for_od(lock_od); if (entry && is_lock(entry)) { l = get_lock(entry); - TRACE_CUR("attempts to lock 0x%p\n", l); + //TRACE_CUR("attempts to lock 0x%p\n", l); + TRACE_CUR("attempts to lock %d\n", l->ident); err = l->ops->lock(l); } @@ -96,7 +122,8 @@ asmlinkage long sys_litmus_unlock(int lock_od) entry = get_entry_for_od(lock_od); if (entry && is_lock(entry)) { l = get_lock(entry); - TRACE_CUR("attempts to unlock 0x%p\n", l); + //TRACE_CUR("attempts to unlock 0x%p\n", l); + TRACE_CUR("attempts to unlock %d\n", l->ident); err = l->ops->unlock(l); } @@ -121,8 +148,366 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) return(t); } +#ifdef CONFIG_LITMUS_NESTED_LOCKING + +void print_hp_waiters(struct binheap_node* n, int depth) +{ + struct litmus_lock *l; + struct nested_info *nest; + char padding[81] = " "; + struct task_struct *hp = NULL; + struct task_struct *hp_eff = NULL; + struct task_struct *node_prio = NULL; + + + if(n == NULL) { + TRACE("+-> %p\n", NULL); + return; + } + + nest = binheap_entry(n, struct nested_info, hp_binheap_node); + l = nest->lock; + + if(depth*2 <= 80) + padding[depth*2] = '\0'; + + if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) { + hp = *(nest->hp_waiter_ptr); + + if(tsk_rt(hp)->inh_task) { + hp_eff = tsk_rt(hp)->inh_task; + } + } + + node_prio = nest->hp_waiter_eff_prio; + + TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n", + padding, + (node_prio) ? node_prio->comm : "nil", + (node_prio) ? node_prio->pid : -1, + (hp) ? hp->comm : "nil", + (hp) ? hp->pid : -1, + (hp_eff) ? hp_eff->comm : "nil", + (hp_eff) ? hp_eff->pid : -1, + l->ident); + + if(n->left) print_hp_waiters(n->left, depth+1); + if(n->right) print_hp_waiters(n->right, depth+1); +} +#endif + + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + +void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/) +{ + /* + We pick the next lock in reverse order. This causes inheritance propagation + from locks received earlier to flow in the same direction as regular nested + locking. This might make fine-grain DGL easier in the future. + */ + + BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); + + //WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock); + + // note reverse order + for(dgl_wait->last_primary = dgl_wait->last_primary - 1; + dgl_wait->last_primary >= 0; + --(dgl_wait->last_primary)){ + if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner( + dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) { + + tsk_rt(dgl_wait->task)->blocked_lock = + dgl_wait->locks[dgl_wait->last_primary]; + mb(); + + TRACE_CUR("New blocked lock is %d\n", + dgl_wait->locks[dgl_wait->last_primary]->ident); + + break; + } + } +} + +int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) +{ + // should never be called. + BUG(); + return 1; +} + +void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, + dgl_wait_state_t** dgl_wait, + struct task_struct **task) +{ + wait_queue_t *q; + + *dgl_wait = NULL; + *task = NULL; + + if (waitqueue_active(wq)) { + q = list_entry(wq->task_list.next, + wait_queue_t, task_list); + + if(q->func == dgl_wake_up) { + *dgl_wait = (dgl_wait_state_t*) q->private; + } + else { + *task = (struct task_struct*) q->private; + } + + __remove_wait_queue(wq, q); + } +} + +void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait) +{ + init_waitqueue_entry(wq_node, dgl_wait->task); + wq_node->private = dgl_wait; + wq_node->func = dgl_wake_up; +} + + +static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait) +{ + int i; + unsigned long irqflags; //, dummyflags; + raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task); + + BUG_ON(dgl_wait->task != current); + + raw_spin_lock_irqsave(dgl_lock, irqflags); + + + dgl_wait->nr_remaining = dgl_wait->size; + + TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size); + + // try to acquire each lock. enqueue (non-blocking) if it is unavailable. + for(i = 0; i < dgl_wait->size; ++i) { + struct litmus_lock *l = dgl_wait->locks[i]; + + // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. + + if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) { + --(dgl_wait->nr_remaining); + TRACE_CUR("Acquired lock %d immediatly.\n", l->ident); + } + } + + if(dgl_wait->nr_remaining == 0) { + // acquired entire group immediatly + TRACE_CUR("Acquired all locks in DGL immediatly!\n"); + } + else { + + TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", + dgl_wait->nr_remaining); + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + // KLUDGE: don't count this suspension as time in the critical gpu + // critical section + if(tsk_rt(dgl_wait->task)->held_gpus) { + tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1; + } +#endif + + // note reverse order. see comments in select_next_lock for reason. + for(i = dgl_wait->size - 1; i >= 0; --i) { + struct litmus_lock *l = dgl_wait->locks[i]; + if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe + + TRACE_CUR("Activating priority inheritance on lock %d\n", + l->ident); + + TS_DGL_LOCK_SUSPEND; + + l->ops->enable_priority(l, dgl_wait); + dgl_wait->last_primary = i; + + TRACE_CUR("Suspending for lock %d\n", l->ident); + + raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending + + schedule(); // suspend!!! + + TS_DGL_LOCK_RESUME; + + TRACE_CUR("Woken up from DGL suspension.\n"); + + goto all_acquired; // we should hold all locks when we wake up. + } + } + + TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n"); + //BUG(); + } + + raw_spin_unlock_irqrestore(dgl_lock, irqflags); + +all_acquired: + + // FOR SANITY CHECK FOR TESTING +// for(i = 0; i < dgl_wait->size; ++i) { +// struct litmus_lock *l = dgl_wait->locks[i]; +// BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); +// } + + TRACE_CUR("Acquired entire DGL\n"); + + return 0; +} + +static int supports_dgl(struct litmus_lock *l) +{ + struct litmus_lock_ops* ops = l->ops; + + return (ops->dgl_lock && + ops->is_owner && + ops->enable_priority); +} + +asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size) +{ + struct task_struct *t = current; + long err = -EINVAL; + int dgl_ods[MAX_DGL_SIZE]; + int i; + + dgl_wait_state_t dgl_wait_state; // lives on the stack until all resources in DGL are held. + + if(dgl_size > MAX_DGL_SIZE || dgl_size < 1) + goto out; + + if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int)))) + goto out; + + if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int)))) + goto out; + + if (!is_realtime(t)) { + err = -EPERM; + goto out; + } + + for(i = 0; i < dgl_size; ++i) { + struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); + if(entry && is_lock(entry)) { + dgl_wait_state.locks[i] = get_lock(entry); + if(!supports_dgl(dgl_wait_state.locks[i])) { + TRACE_CUR("Lock %d does not support all required DGL operations.\n", + dgl_wait_state.locks[i]->ident); + goto out; + } + } + else { + TRACE_CUR("Invalid lock identifier\n"); + goto out; + } + } + + dgl_wait_state.task = t; + dgl_wait_state.size = dgl_size; + + TS_DGL_LOCK_START; + err = do_litmus_dgl_lock(&dgl_wait_state); + + /* Note: task my have been suspended or preempted in between! Take + * this into account when computing overheads. */ + TS_DGL_LOCK_END; + +out: + return err; +} + +static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size) +{ + int i; + long err = 0; + + TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size); + + for(i = dgl_size - 1; i >= 0; --i) { // unlock in reverse order + + struct litmus_lock *l = dgl_locks[i]; + long tmp_err; + + TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident); + + tmp_err = l->ops->unlock(l); + + if(tmp_err) { + TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err); + err = tmp_err; + } + } + + TRACE_CUR("DGL unlocked. err = %d\n", err); + + return err; +} + +asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size) +{ + long err = -EINVAL; + int dgl_ods[MAX_DGL_SIZE]; + struct od_table_entry* entry; + int i; + + struct litmus_lock* dgl_locks[MAX_DGL_SIZE]; + + if(dgl_size > MAX_DGL_SIZE || dgl_size < 1) + goto out; + + if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int)))) + goto out; + + if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int)))) + goto out; + + for(i = 0; i < dgl_size; ++i) { + entry = get_entry_for_od(dgl_ods[i]); + if(entry && is_lock(entry)) { + dgl_locks[i] = get_lock(entry); + if(!supports_dgl(dgl_locks[i])) { + TRACE_CUR("Lock %d does not support all required DGL operations.\n", + dgl_locks[i]->ident); + goto out; + } + } + else { + TRACE_CUR("Invalid lock identifier\n"); + goto out; + } + } + + TS_DGL_UNLOCK_START; + err = do_litmus_dgl_unlock(dgl_locks, dgl_size); + + /* Note: task my have been suspended or preempted in between! Take + * this into account when computing overheads. */ + TS_DGL_UNLOCK_END; + +out: + return err; +} + +#else // CONFIG_LITMUS_DGL_SUPPORT + +asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size) +{ + return -ENOSYS; +} + +asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size) +{ + return -ENOSYS; +} + +#endif -#else +#else // CONFIG_LITMUS_LOCKING struct fdso_ops generic_lock_ops = {}; diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c new file mode 100644 index 000000000000..4b86a50d3bd1 --- /dev/null +++ b/litmus/nvidia_info.c @@ -0,0 +1,597 @@ +#include +#include +#include + +#include +#include +#include + +#include + +#include + +typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ +typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ +typedef unsigned char NvU8; /* 0 to 255 */ +typedef unsigned short NvU16; /* 0 to 65535 */ +typedef signed char NvS8; /* -128 to 127 */ +typedef signed short NvS16; /* -32768 to 32767 */ +typedef float NvF32; /* IEEE Single Precision (S1E8M23) */ +typedef double NvF64; /* IEEE Double Precision (S1E11M52) */ +typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ +typedef unsigned int NvU32; /* 0 to 4294967295 */ +typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ +typedef union +{ + volatile NvV8 Reg008[1]; + volatile NvV16 Reg016[1]; + volatile NvV32 Reg032[1]; +} litmus_nv_hwreg_t, * litmus_nv_phwreg_t; + +typedef struct +{ + NvU64 address; + NvU64 size; + NvU32 offset; + NvU32 *map; + litmus_nv_phwreg_t map_u; +} litmus_nv_aperture_t; + +typedef struct +{ + void *priv; /* private data */ + void *os_state; /* os-specific device state */ + + int rmInitialized; + int flags; + + /* PCI config info */ + NvU32 domain; + NvU16 bus; + NvU16 slot; + NvU16 vendor_id; + NvU16 device_id; + NvU16 subsystem_id; + NvU32 gpu_id; + void *handle; + + NvU32 pci_cfg_space[16]; + + /* physical characteristics */ + litmus_nv_aperture_t bars[3]; + litmus_nv_aperture_t *regs; + litmus_nv_aperture_t *fb, ud; + litmus_nv_aperture_t agp; + + NvU32 interrupt_line; + + NvU32 agp_config; + NvU32 agp_status; + + NvU32 primary_vga; + + NvU32 sim_env; + + NvU32 rc_timer_enabled; + + /* list of events allocated for this device */ + void *event_list; + + void *kern_mappings; + +} litmus_nv_state_t; + +typedef struct work_struct litmus_nv_task_t; + +typedef struct litmus_nv_work_s { + litmus_nv_task_t task; + void *data; +} litmus_nv_work_t; + +typedef struct litmus_nv_linux_state_s { + litmus_nv_state_t nv_state; + atomic_t usage_count; + + struct pci_dev *dev; + void *agp_bridge; + void *alloc_queue; + + void *timer_sp; + void *isr_sp; + void *pci_cfgchk_sp; + void *isr_bh_sp; + +#ifdef CONFIG_CUDA_4_0 + char registry_keys[512]; +#endif + + /* keep track of any pending bottom halfes */ + struct tasklet_struct tasklet; + litmus_nv_work_t work; + + /* get a timer callback every second */ + struct timer_list rc_timer; + + /* lock for linux-specific data, not used by core rm */ + struct semaphore ldata_lock; + + /* lock for linux-specific alloc queue */ + struct semaphore at_lock; + +#if 0 +#if defined(NV_USER_MAP) + /* list of user mappings */ + struct nv_usermap_s *usermap_list; + + /* lock for VMware-specific mapping list */ + struct semaphore mt_lock; +#endif /* defined(NV_USER_MAP) */ +#if defined(NV_PM_SUPPORT_OLD_STYLE_APM) + void *apm_nv_dev; +#endif +#endif + + NvU32 device_num; + struct litmus_nv_linux_state_s *next; +} litmus_nv_linux_state_t; + +void dump_nvidia_info(const struct tasklet_struct *t) +{ + litmus_nv_state_t* nvstate = NULL; + litmus_nv_linux_state_t* linuxstate = NULL; + struct pci_dev* pci = NULL; + + nvstate = (litmus_nv_state_t*)(t->data); + + if(nvstate) + { + TRACE("NV State:\n" + "\ttasklet ptr = %p\n" + "\tstate ptr = %p\n" + "\tprivate data ptr = %p\n" + "\tos state ptr = %p\n" + "\tdomain = %u\n" + "\tbus = %u\n" + "\tslot = %u\n" + "\tvender_id = %u\n" + "\tdevice_id = %u\n" + "\tsubsystem_id = %u\n" + "\tgpu_id = %u\n" + "\tinterrupt_line = %u\n", + t, + nvstate, + nvstate->priv, + nvstate->os_state, + nvstate->domain, + nvstate->bus, + nvstate->slot, + nvstate->vendor_id, + nvstate->device_id, + nvstate->subsystem_id, + nvstate->gpu_id, + nvstate->interrupt_line); + + linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); + } + else + { + TRACE("INVALID NVSTATE????\n"); + } + + if(linuxstate) + { + int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); + int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); + int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); + + + TRACE("LINUX NV State:\n" + "\tlinux nv state ptr: %p\n" + "\taddress of tasklet: %p\n" + "\taddress of work: %p\n" + "\tusage_count: %d\n" + "\tdevice_num: %u\n" + "\ttasklet addr == this tasklet: %d\n" + "\tpci: %p\n", + linuxstate, + &(linuxstate->tasklet), + &(linuxstate->work), + atomic_read(&(linuxstate->usage_count)), + linuxstate->device_num, + (t == &(linuxstate->tasklet)), + linuxstate->dev); + + pci = linuxstate->dev; + + TRACE("Offsets:\n" + "\tOffset from LinuxState: %d, %x\n" + "\tOffset from NVState: %d, %x\n" + "\tOffset from parameter: %d, %x\n" + "\tdevice_num: %u\n", + ls_offset, ls_offset, + ns_offset_raw, ns_offset_raw, + ns_offset_desired, ns_offset_desired, + *((u32*)((void*)nvstate + ns_offset_desired))); + } + else + { + TRACE("INVALID LINUXNVSTATE?????\n"); + } + +#if 0 + if(pci) + { + TRACE("PCI DEV Info:\n" + "pci device ptr: %p\n" + "\tdevfn = %d\n" + "\tvendor = %d\n" + "\tdevice = %d\n" + "\tsubsystem_vendor = %d\n" + "\tsubsystem_device = %d\n" + "\tslot # = %d\n", + pci, + pci->devfn, + pci->vendor, + pci->device, + pci->subsystem_vendor, + pci->subsystem_device, + pci->slot->number); + } + else + { + TRACE("INVALID PCIDEV PTR?????\n"); + } +#endif +} + +static struct module* nvidia_mod = NULL; +int init_nvidia_info(void) +{ + mutex_lock(&module_mutex); + nvidia_mod = find_module("nvidia"); + mutex_unlock(&module_mutex); + if(nvidia_mod != NULL) + { + TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, + (void*)(nvidia_mod->module_core), + (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); + init_nv_device_reg(); + return(0); + } + else + { + TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); + return(-1); + } +} + +void shutdown_nvidia_info(void) +{ + nvidia_mod = NULL; + mb(); +} + +/* works with pointers to static data inside the module too. */ +int is_nvidia_func(void* func_addr) +{ + int ret = 0; + if(nvidia_mod) + { + ret = within_module_core((long unsigned int)func_addr, nvidia_mod); + /* + if(ret) + { + TRACE("%s : %p is in NVIDIA module: %d\n", + __FUNCTION__, func_addr, ret); + }*/ + } + + return(ret); +} + +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) +{ + // life is too short to use hard-coded offsets. update this later. + litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); + litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); + + BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); + + return(linuxstate->device_num); + + //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); + +#if 0 + // offset determined though observed behavior of the NV driver. + //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1 + //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2 + + void* state = (void*)(t->data); + void* device_num_ptr = state + DEVICE_NUM_OFFSET; + + //dump_nvidia_info(t); + return(*((u32*)device_num_ptr)); +#endif +} + +u32 get_work_nv_device_num(const struct work_struct *t) +{ + // offset determined though observed behavior of the NV driver. + const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); + void* state = (void*)(t); + void** device_num_ptr = state + DEVICE_NUM_OFFSET; + return(*((u32*)(*device_num_ptr))); +} + + +typedef struct { + raw_spinlock_t lock; + int nr_owners; + struct task_struct* max_prio_owner; + struct task_struct* owners[NV_MAX_SIMULT_USERS]; +}nv_device_registry_t; + +static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; + +int init_nv_device_reg(void) +{ + int i; + + memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); + + for(i = 0; i < NV_DEVICE_NUM; ++i) + { + raw_spin_lock_init(&NV_DEVICE_REG[i].lock); + } + + return(1); +} + +/* use to get nv_device_id by given owner. + (if return -1, can't get the assocaite device id)*/ +/* +int get_nv_device_id(struct task_struct* owner) +{ + int i; + if(!owner) + { + return(-1); + } + for(i = 0; i < NV_DEVICE_NUM; ++i) + { + if(NV_DEVICE_REG[i].device_owner == owner) + return(i); + } + return(-1); +} +*/ + +static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) { + int i; + struct task_struct *found = NULL; + for(i = 0; i < reg->nr_owners; ++i) { + if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) { + found = reg->owners[i]; + } + } + return found; +} + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +void pai_check_priority_increase(struct task_struct *t, int reg_device_id) +{ + unsigned long flags; + nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; + + if(reg->max_prio_owner != t) { + + raw_spin_lock_irqsave(®->lock, flags); + + if(reg->max_prio_owner != t) { + if(litmus->compare(t, reg->max_prio_owner)) { + litmus->change_prio_pai_tasklet(reg->max_prio_owner, t); + reg->max_prio_owner = t; + } + } + + raw_spin_unlock_irqrestore(®->lock, flags); + } +} + + +void pai_check_priority_decrease(struct task_struct *t, int reg_device_id) +{ + unsigned long flags; + nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; + + if(reg->max_prio_owner == t) { + + raw_spin_lock_irqsave(®->lock, flags); + + if(reg->max_prio_owner == t) { + reg->max_prio_owner = find_hp_owner(reg, NULL); + if(reg->max_prio_owner != t) { + litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); + } + } + + raw_spin_unlock_irqrestore(®->lock, flags); + } +} +#endif + +static int __reg_nv_device(int reg_device_id, struct task_struct *t) +{ + int ret = 0; + int i; + struct task_struct *old_max = NULL; + unsigned long flags; + nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; + + if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) { + // TODO: check if taks is already registered. + return ret; // assume already registered. + } + + + raw_spin_lock_irqsave(®->lock, flags); + + if(reg->nr_owners < NV_MAX_SIMULT_USERS) { + TRACE_TASK(t, "registers GPU %d\n", reg_device_id); + for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { + if(reg->owners[i] == NULL) { + reg->owners[i] = t; + + //if(edf_higher_prio(t, reg->max_prio_owner)) { + if(litmus->compare(t, reg->max_prio_owner)) { + old_max = reg->max_prio_owner; + reg->max_prio_owner = t; + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + litmus->change_prio_pai_tasklet(old_max, t); +#endif + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem); +#endif + ++(reg->nr_owners); + + break; + } + } + } + else + { + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); + //ret = -EBUSY; + } + + raw_spin_unlock_irqrestore(®->lock, flags); + + __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); + + return(ret); +} + +static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) +{ + int ret = 0; + int i; + unsigned long flags; + nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; + +#ifdef CONFIG_LITMUS_SOFTIRQD + struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); +#endif + + if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { + return ret; + } + + raw_spin_lock_irqsave(®->lock, flags); + + TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); + + for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { + if(reg->owners[i] == t) { +#ifdef CONFIG_LITMUS_SOFTIRQD + flush_pending(klitirqd_th, t); +#endif + if(reg->max_prio_owner == t) { + reg->max_prio_owner = find_hp_owner(reg, t); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); +#endif + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem); +#endif + + reg->owners[i] = NULL; + --(reg->nr_owners); + + break; + } + } + + raw_spin_unlock_irqrestore(®->lock, flags); + + __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); + + return(ret); +} + + +int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) +{ + int ret; + + if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) + { + if(reg_action) + ret = __reg_nv_device(reg_device_id, t); + else + ret = __clear_reg_nv_device(reg_device_id, t); + } + else + { + ret = -ENODEV; + } + + return(ret); +} + +/* use to get the owner of nv_device_id. */ +struct task_struct* get_nv_max_device_owner(u32 target_device_id) +{ + struct task_struct *owner = NULL; + BUG_ON(target_device_id >= NV_DEVICE_NUM); + owner = NV_DEVICE_REG[target_device_id].max_prio_owner; + return(owner); +} + +void lock_nv_registry(u32 target_device_id, unsigned long* flags) +{ + BUG_ON(target_device_id >= NV_DEVICE_NUM); + + if(in_interrupt()) + TRACE("Locking registry for %d.\n", target_device_id); + else + TRACE_CUR("Locking registry for %d.\n", target_device_id); + + raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); +} + +void unlock_nv_registry(u32 target_device_id, unsigned long* flags) +{ + BUG_ON(target_device_id >= NV_DEVICE_NUM); + + if(in_interrupt()) + TRACE("Unlocking registry for %d.\n", target_device_id); + else + TRACE_CUR("Unlocking registry for %d.\n", target_device_id); + + raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); +} + + +//void increment_nv_int_count(u32 device) +//{ +// unsigned long flags; +// struct task_struct* owner; +// +// lock_nv_registry(device, &flags); +// +// owner = NV_DEVICE_REG[device].device_owner; +// if(owner) +// { +// atomic_inc(&tsk_rt(owner)->nv_int_count); +// } +// +// unlock_nv_registry(device, &flags); +//} +//EXPORT_SYMBOL(increment_nv_int_count); + + diff --git a/litmus/preempt.c b/litmus/preempt.c index 5704d0bf4c0b..28368d5bc046 100644 --- a/litmus/preempt.c +++ b/litmus/preempt.c @@ -30,6 +30,7 @@ void sched_state_will_schedule(struct task_struct* tsk) /* Litmus tasks should never be subject to a remote * set_tsk_need_resched(). */ BUG_ON(is_realtime(tsk)); + #ifdef CONFIG_PREEMPT_STATE_TRACE TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", __builtin_return_address(0)); @@ -45,13 +46,17 @@ void sched_state_ipi(void) /* Cause scheduler to be invoked. * This will cause a transition to WILL_SCHEDULE. */ set_tsk_need_resched(current); + /* TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", current->comm, current->pid); + */ } else { /* ignore */ + /* TRACE_STATE("ignoring IPI in state %x (%s)\n", get_sched_state(), sched_state_name(get_sched_state())); + */ } } diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c new file mode 100644 index 000000000000..75ed87c5ed48 --- /dev/null +++ b/litmus/rsm_lock.c @@ -0,0 +1,796 @@ +#include +#include + +#include +#include +#include + +//#include + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#endif + + +/* caller is responsible for locking */ +static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex, + struct task_struct* skip) +{ + wait_queue_t *q; + struct list_head *pos; + struct task_struct *queued = NULL, *found = NULL; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + dgl_wait_state_t *dgl_wait = NULL; +#endif + + list_for_each(pos, &mutex->wait.task_list) { + q = list_entry(pos, wait_queue_t, task_list); + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + if(q->func == dgl_wake_up) { + dgl_wait = (dgl_wait_state_t*) q->private; + if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) { + queued = dgl_wait->task; + } + else { + queued = NULL; // skip it. + } + } + else { + queued = (struct task_struct*) q->private; + } +#else + queued = (struct task_struct*) q->private; +#endif + + /* Compare task prios, find high prio task. */ + //if (queued && queued != skip && edf_higher_prio(queued, found)) { + if (queued && queued != skip && litmus->compare(queued, found)) { + found = queued; + } + } + return found; +} + + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + +int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t) +{ + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + return(mutex->owner == t); +} + +// return 1 if resource was immediatly acquired. +// Assumes mutex->lock is held. +// Must set task state to TASK_UNINTERRUPTIBLE if task blocks. +int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, + wait_queue_t* wq_node) +{ + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + struct task_struct *t = dgl_wait->task; + + int acquired_immediatly = 0; + + BUG_ON(t != current); + + if (mutex->owner) { + TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident); + + init_dgl_waitqueue_entry(wq_node, dgl_wait); + + set_task_state(t, TASK_UNINTERRUPTIBLE); + __add_wait_queue_tail_exclusive(&mutex->wait, wq_node); + } else { + TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident); + + /* it's ours now */ + mutex->owner = t; + + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); + binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); + + acquired_immediatly = 1; + } + + return acquired_immediatly; +} + +void rsm_mutex_enable_priority(struct litmus_lock *l, + dgl_wait_state_t* dgl_wait) +{ + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + struct task_struct *t = dgl_wait->task; + struct task_struct *owner = mutex->owner; + unsigned long flags = 0; // these are unused under DGL coarse-grain locking + + BUG_ON(owner == t); + + tsk_rt(t)->blocked_lock = l; + mb(); + + //if (edf_higher_prio(t, mutex->hp_waiter)) { + if (litmus->compare(t, mutex->hp_waiter)) { + + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + struct task_struct *new_prio = NULL; + + if(mutex->hp_waiter) + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", + mutex->hp_waiter->comm, mutex->hp_waiter->pid); + else + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); + + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + mutex->hp_waiter = t; + l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter); + binheap_decrease(&l->nest.hp_binheap_node, + &tsk_rt(owner)->hp_blocked_tasks); + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + if(new_max_eff_prio != old_max_eff_prio) { + TRACE_TASK(t, "is new hp_waiter.\n"); + + if ((effective_priority(owner) == old_max_eff_prio) || + //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){ + (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){ + new_prio = new_max_eff_prio; + } + } + else { + TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); + } + + if(new_prio) { + litmus->nested_increase_prio(owner, new_prio, + &mutex->lock, flags); // unlocks lock. + } + else { + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&mutex->lock, flags); + } + } + else { + TRACE_TASK(t, "no change in hp_waiter.\n"); + unlock_fine_irqrestore(&mutex->lock, flags); + } +} + +static void select_next_lock_if_primary(struct litmus_lock *l, + dgl_wait_state_t *dgl_wait) +{ + if(tsk_rt(dgl_wait->task)->blocked_lock == l) { + TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n", + l->ident, dgl_wait->task->comm, dgl_wait->task->pid); + tsk_rt(dgl_wait->task)->blocked_lock = NULL; + mb(); + select_next_lock(dgl_wait /*, l*/); // pick the next lock to be blocked on + } + else { + TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n", + l->ident, dgl_wait->task->comm, dgl_wait->task->pid); + } +} +#endif + + + + +int rsm_mutex_lock(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct task_struct *owner; + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + wait_queue_t wait; + unsigned long flags; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t *dgl_lock; +#endif + + if (!is_realtime(t)) + return -EPERM; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + dgl_lock = litmus->get_dgl_spinlock(t); +#endif + + lock_global_irqsave(dgl_lock, flags); + lock_fine_irqsave(&mutex->lock, flags); + + if (mutex->owner) { + TRACE_TASK(t, "Blocking on lock %d.\n", l->ident); + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + // KLUDGE: don't count this suspension as time in the critical gpu + // critical section + if(tsk_rt(t)->held_gpus) { + tsk_rt(t)->suspend_gpu_tracker_on_block = 1; + } +#endif + + /* resource is not free => must suspend and wait */ + + owner = mutex->owner; + + init_waitqueue_entry(&wait, t); + + tsk_rt(t)->blocked_lock = l; /* record where we are blocked */ + mb(); // needed? + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&mutex->wait, &wait); + + /* check if we need to activate priority inheritance */ + //if (edf_higher_prio(t, mutex->hp_waiter)) { + if (litmus->compare(t, mutex->hp_waiter)) { + + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + struct task_struct *new_prio = NULL; + + if(mutex->hp_waiter) + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", + mutex->hp_waiter->comm, mutex->hp_waiter->pid); + else + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); + + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + mutex->hp_waiter = t; + l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter); + binheap_decrease(&l->nest.hp_binheap_node, + &tsk_rt(owner)->hp_blocked_tasks); + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + if(new_max_eff_prio != old_max_eff_prio) { + TRACE_TASK(t, "is new hp_waiter.\n"); + + if ((effective_priority(owner) == old_max_eff_prio) || + //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){ + (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){ + new_prio = new_max_eff_prio; + } + } + else { + TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); + } + + if(new_prio) { + litmus->nested_increase_prio(owner, new_prio, &mutex->lock, + flags); // unlocks lock. + } + else { + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&mutex->lock, flags); + } + } + else { + TRACE_TASK(t, "no change in hp_waiter.\n"); + + unlock_fine_irqrestore(&mutex->lock, flags); + } + + unlock_global_irqrestore(dgl_lock, flags); + + TS_LOCK_SUSPEND; + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release. + */ + + schedule(); + + TS_LOCK_RESUME; + + /* Since we hold the lock, no other task will change + * ->owner. We can thus check it without acquiring the spin + * lock. */ + BUG_ON(mutex->owner != t); + + TRACE_TASK(t, "Acquired lock %d.\n", l->ident); + + } else { + TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident); + + /* it's ours now */ + mutex->owner = t; + + raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock); + binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock); + + + unlock_fine_irqrestore(&mutex->lock, flags); + unlock_global_irqrestore(dgl_lock, flags); + } + + return 0; +} + + + +int rsm_mutex_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next = NULL; + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + unsigned long flags; + + struct task_struct *old_max_eff_prio; + + int wake_up_task = 1; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + dgl_wait_state_t *dgl_wait = NULL; + raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t); +#endif + + int err = 0; + + if (mutex->owner != t) { + err = -EINVAL; + return err; + } + + lock_global_irqsave(dgl_lock, flags); + lock_fine_irqsave(&mutex->lock, flags); + + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); + + TRACE_TASK(t, "Freeing lock %d\n", l->ident); + + old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); + binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks); + + if(tsk_rt(t)->inh_task){ + struct task_struct *new_max_eff_prio = + top_priority(&tsk_rt(t)->hp_blocked_tasks); + + if((new_max_eff_prio == NULL) || + /* there was a change in eff prio */ + ( (new_max_eff_prio != old_max_eff_prio) && + /* and owner had the old eff prio */ + (effective_priority(t) == old_max_eff_prio)) ) + { + // old_max_eff_prio > new_max_eff_prio + + //if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) { + if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) { + TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d task: %s/%d [%s/%d]\n", + new_max_eff_prio->comm, new_max_eff_prio->pid, + t->comm, t->pid, tsk_rt(t)->inh_task->comm, + tsk_rt(t)->inh_task->pid); + WARN_ON(1); + } + + litmus->decrease_prio(t, new_max_eff_prio); + } + } + + if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) && + tsk_rt(t)->inh_task != NULL) + { + WARN_ON(tsk_rt(t)->inh_task != NULL); + TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n", + tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid); + } + + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); + + + /* check if there are jobs waiting for this resource */ +#ifdef CONFIG_LITMUS_DGL_SUPPORT + __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next); + if(dgl_wait) { + next = dgl_wait->task; + //select_next_lock_if_primary(l, dgl_wait); + } +#else + next = __waitqueue_remove_first(&mutex->wait); +#endif + if (next) { + /* next becomes the resouce holder */ + mutex->owner = next; + TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == mutex->hp_waiter) { + + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next); + l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ? + effective_priority(mutex->hp_waiter) : + NULL; + + if (mutex->hp_waiter) + TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n"); + else + TRACE("no further waiters\n"); + + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); + + binheap_add(&l->nest.hp_binheap_node, + &tsk_rt(next)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + if(dgl_wait) { + select_next_lock_if_primary(l, dgl_wait); + //wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining); + --(dgl_wait->nr_remaining); + wake_up_task = (dgl_wait->nr_remaining == 0); + } +#endif + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); + } + else { + /* Well, if 'next' is not the highest-priority waiter, + * then it (probably) ought to inherit the highest-priority + * waiter's priority. */ + TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident); + + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); + + binheap_add(&l->nest.hp_binheap_node, + &tsk_rt(next)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + if(dgl_wait) { + select_next_lock_if_primary(l, dgl_wait); + --(dgl_wait->nr_remaining); + wake_up_task = (dgl_wait->nr_remaining == 0); + } +#endif + + /* It is possible that 'next' *should* be the hp_waiter, but isn't + * because that update hasn't yet executed (update operation is + * probably blocked on mutex->lock). So only inherit if the top of + * 'next's top heap node is indeed the effective prio. of hp_waiter. + * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter) + * since the effective priority of hp_waiter can change (and the + * update has not made it to this lock).) + */ +#ifdef CONFIG_LITMUS_DGL_SUPPORT + if((l->nest.hp_waiter_eff_prio != NULL) && + (top_priority(&tsk_rt(next)->hp_blocked_tasks) == + l->nest.hp_waiter_eff_prio)) + { + if(dgl_wait && tsk_rt(next)->blocked_lock) { + BUG_ON(wake_up_task); + //if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) { + if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) { + litmus->nested_increase_prio(next, + l->nest.hp_waiter_eff_prio, &mutex->lock, flags); // unlocks lock && hp_blocked_tasks_lock. + goto out; // all spinlocks are released. bail out now. + } + } + else { + litmus->increase_prio(next, l->nest.hp_waiter_eff_prio); + } + } + + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); +#else + if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) == + l->nest.hp_waiter_eff_prio)) + { + litmus->increase_prio(next, l->nest.hp_waiter_eff_prio); + } + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); +#endif + } + + if(wake_up_task) { + TRACE_TASK(next, "waking up since it is no longer blocked.\n"); + + tsk_rt(next)->blocked_lock = NULL; + mb(); + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + // re-enable tracking + if(tsk_rt(next)->held_gpus) { + tsk_rt(next)->suspend_gpu_tracker_on_block = 0; + } +#endif + + wake_up_process(next); + } + else { + TRACE_TASK(next, "is still blocked.\n"); + } + } + else { + /* becomes available */ + mutex->owner = NULL; + } + + unlock_fine_irqrestore(&mutex->lock, flags); + +#ifdef CONFIG_LITMUS_DGL_SUPPORT +out: +#endif + unlock_global_irqrestore(dgl_lock, flags); + + return err; +} + + +void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l, + struct task_struct* t, + raw_spinlock_t* to_unlock, + unsigned long irqflags) +{ + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + + // relay-style locking + lock_fine(&mutex->lock); + unlock_fine(to_unlock); + + if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked + struct task_struct *owner = mutex->owner; + + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + //if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) { + if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) { + TRACE_TASK(t, "is new highest-prio waiter by propagation.\n"); + mutex->hp_waiter = t; + } + if(t == mutex->hp_waiter) { + // reflect the decreased priority in the heap node. + l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter); + + BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node)); + BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node, + &tsk_rt(owner)->hp_blocked_tasks)); + + binheap_decrease(&l->nest.hp_binheap_node, + &tsk_rt(owner)->hp_blocked_tasks); + } + + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + + if(new_max_eff_prio != old_max_eff_prio) { + // new_max_eff_prio > old_max_eff_prio holds. + if ((effective_priority(owner) == old_max_eff_prio) || + //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) { + (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) { + TRACE_CUR("Propagating inheritance to holder of lock %d.\n", + l->ident); + + // beware: recursion + litmus->nested_increase_prio(owner, new_max_eff_prio, + &mutex->lock, irqflags); // unlocks mutex->lock + } + else { + TRACE_CUR("Lower priority than holder %s/%d. No propagation.\n", + owner->comm, owner->pid); + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } + else { + TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n"); + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } + else { + struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock; + + TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident); + if(still_blocked) { + TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n", + still_blocked->ident); + if(still_blocked->ops->propagate_increase_inheritance) { + /* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B) + we know that task 't' has not released any locks behind us in this + chain. Propagation just needs to catch up with task 't'. */ + still_blocked->ops->propagate_increase_inheritance(still_blocked, + t, + &mutex->lock, + irqflags); + } + else { + TRACE_TASK(t, + "Inheritor is blocked on lock (%p) that does not " + "support nesting!\n", + still_blocked); + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } + else { + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } +} + + +void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l, + struct task_struct* t, + raw_spinlock_t* to_unlock, + unsigned long irqflags) +{ + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + + // relay-style locking + lock_fine(&mutex->lock); + unlock_fine(to_unlock); + + if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked + if(t == mutex->hp_waiter) { + struct task_struct *owner = mutex->owner; + + struct task_struct *old_max_eff_prio; + struct task_struct *new_max_eff_prio; + + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); + + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks); + mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL); + l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ? + effective_priority(mutex->hp_waiter) : NULL; + binheap_add(&l->nest.hp_binheap_node, + &tsk_rt(owner)->hp_blocked_tasks, + struct nested_info, hp_binheap_node); + + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); + + if((old_max_eff_prio != new_max_eff_prio) && + (effective_priority(owner) == old_max_eff_prio)) + { + // Need to set new effective_priority for owner + + struct task_struct *decreased_prio; + + TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n", + l->ident); + + //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) { + if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { + TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n", + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", + (new_max_eff_prio) ? new_max_eff_prio->pid : -1, + owner->comm, + owner->pid, + l->ident); + + decreased_prio = new_max_eff_prio; + } + else { + TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n", + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", + (new_max_eff_prio) ? new_max_eff_prio->pid : -1, + owner->comm, + owner->pid, + l->ident); + + decreased_prio = NULL; + } + + // beware: recursion + litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock + } + else { + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } + else { + TRACE_TASK(t, "is not hp_waiter. No propagation.\n"); + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } + else { + struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock; + + TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident); + if(still_blocked) { + TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n", + still_blocked->ident); + if(still_blocked->ops->propagate_decrease_inheritance) { + /* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B) + we know that task 't' has not released any locks behind us in this + chain. propagation just needs to catch up with task 't' */ + still_blocked->ops->propagate_decrease_inheritance(still_blocked, + t, + &mutex->lock, + irqflags); + } + else { + TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", + still_blocked); + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } + else { + unlock_fine_irqrestore(&mutex->lock, irqflags); + } + } +} + + +int rsm_mutex_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct rsm_mutex *mutex = rsm_mutex_from_lock(l); + unsigned long flags; + + int owner; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t); +#endif + + lock_global_irqsave(dgl_lock, flags); + lock_fine_irqsave(&mutex->lock, flags); + + owner = (mutex->owner == t); + + unlock_fine_irqrestore(&mutex->lock, flags); + unlock_global_irqrestore(dgl_lock, flags); + + if (owner) + rsm_mutex_unlock(l); + + return 0; +} + +void rsm_mutex_free(struct litmus_lock* lock) +{ + kfree(rsm_mutex_from_lock(lock)); +} + +struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops) +{ + struct rsm_mutex* mutex; + + mutex = kmalloc(sizeof(*mutex), GFP_KERNEL); + if (!mutex) + return NULL; + + mutex->litmus_lock.ops = ops; + mutex->owner = NULL; + mutex->hp_waiter = NULL; + init_waitqueue_head(&mutex->wait); + + +#ifdef CONFIG_DEBUG_SPINLOCK + { + __raw_spin_lock_init(&mutex->lock, + ((struct litmus_lock*)mutex)->cheat_lockdep, + &((struct litmus_lock*)mutex)->key); + } +#else + raw_spin_lock_init(&mutex->lock); +#endif + + ((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter; + + return &mutex->litmus_lock; +} + diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 480c62bc895b..be14dbec6ed2 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -29,7 +29,7 @@ #include #include #include - +#include #include #include @@ -42,6 +42,16 @@ #include #include +#include + +#ifdef CONFIG_LITMUS_LOCKING +#include +#endif + +#ifdef CONFIG_LITMUS_NESTED_LOCKING +#include +#include +#endif #ifdef CONFIG_SCHED_CPU_AFFINITY #include @@ -49,7 +59,27 @@ /* to configure the cluster size */ #include -#include + +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#endif /* Reference configuration variable. Determines which cache level is used to * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that @@ -70,7 +100,7 @@ typedef struct { struct task_struct* linked; /* only RT tasks */ struct task_struct* scheduled; /* only RT tasks */ atomic_t will_schedule; /* prevent unneeded IPIs */ - struct bheap_node* hn; + struct binheap_node hn; } cpu_entry_t; /* one cpu_entry_t per CPU */ @@ -83,6 +113,14 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; +#endif + /* * In C-EDF there is a cedf domain _per_ cluster * The number of clusters is dynamically determined accordingly to the @@ -96,10 +134,17 @@ typedef struct clusterdomain { /* map of this cluster cpus */ cpumask_var_t cpu_map; /* the cpus queue themselves according to priority in here */ - struct bheap_node *heap_node; - struct bheap cpu_heap; + struct binheap_handle cpu_heap; /* lock for this cluster */ #define cluster_lock domain.ready_lock + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + struct tasklet_head pending_tasklets; +#endif + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t dgl_lock; +#endif } cedf_domain_t; /* a cedf_domain per cluster; allocation is done at init/activation time */ @@ -108,6 +153,22 @@ cedf_domain_t *cedf; #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) #define task_cpu_cluster(task) remote_cluster(get_partition(task)) +/* total number of cluster */ +static int num_clusters; +/* we do not support cluster of different sizes */ +static unsigned int cluster_size; + +static int clusters_allocated = 0; + +#ifdef CONFIG_LITMUS_DGL_SUPPORT +static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t) +{ + cedf_domain_t *cluster = task_cpu_cluster(t); + return(&cluster->dgl_lock); +} +#endif + + /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose * information during the initialization of the plugin (e.g., topology) @@ -115,11 +176,11 @@ cedf_domain_t *cedf; */ #define VERBOSE_INIT -static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) +static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) { - cpu_entry_t *a, *b; - a = _a->value; - b = _b->value; + cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn); + cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn); + /* Note that a and b are inverted: we want the lowest-priority CPU at * the top of the heap. */ @@ -133,20 +194,17 @@ static void update_cpu_position(cpu_entry_t *entry) { cedf_domain_t *cluster = entry->cluster; - if (likely(bheap_node_in_heap(entry->hn))) - bheap_delete(cpu_lower_prio, - &cluster->cpu_heap, - entry->hn); + if (likely(binheap_is_in_heap(&entry->hn))) { + binheap_delete(&entry->hn, &cluster->cpu_heap); + } - bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); + binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); } /* caller must hold cedf lock */ static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) { - struct bheap_node* hn; - hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); - return hn->value; + return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn); } @@ -208,7 +266,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked, } /* unlink - Make sure a task is not linked any longer to an entry - * where it was linked before. Must hold cedf_lock. + * where it was linked before. Must hold cluster_lock. */ static noinline void unlink(struct task_struct* t) { @@ -244,7 +302,7 @@ static void preempt(cpu_entry_t *entry) } /* requeue - Put an unlinked task into gsn-edf domain. - * Caller must hold cedf_lock. + * Caller must hold cluster_lock. */ static noinline void requeue(struct task_struct* task) { @@ -339,13 +397,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); } -/* caller holds cedf_lock */ +/* caller holds cluster_lock */ static noinline void job_completion(struct task_struct *t, int forced) { BUG_ON(!t); sched_trace_task_completion(t, forced); +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + TRACE_TASK(t, "job_completion().\n"); /* set flags */ @@ -389,6 +451,314 @@ static void cedf_tick(struct task_struct* t) } } + + + + + + + + + + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + + +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) +{ + if (!atomic_read(&tasklet->count)) { + if(tasklet->owner) { + sched_trace_tasklet_begin(tasklet->owner); + } + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", + __FUNCTION__, + (tasklet->owner) ? tasklet->owner->pid : -1, + (tasklet->owner) ? 0 : 1); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + + if(tasklet->owner) { + sched_trace_tasklet_end(tasklet->owner, flushed); + } + } + else { + BUG(); + } +} + + +static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) +{ + int work_to_do = 1; + struct tasklet_struct *tasklet = NULL; + unsigned long flags; + + while(work_to_do) { + + TS_NV_SCHED_BOTISR_START; + + raw_spin_lock_irqsave(&cluster->cluster_lock, flags); + + if(cluster->pending_tasklets.head != NULL) { + // remove tasklet at head. + struct tasklet_struct *prev = NULL; + tasklet = cluster->pending_tasklets.head; + + // find a tasklet with prio to execute; skip ones where + // sched_task has a higher priority. + // We use the '!edf' test instead of swaping function arguments since + // both sched_task and owner could be NULL. In this case, we want to + // still execute the tasklet. + while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) { + prev = tasklet; + tasklet = tasklet->next; + } + + if(tasklet) { // found something to execuite + // remove the tasklet from the queue + if(prev) { + prev->next = tasklet->next; + if(prev->next == NULL) { + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.tail = &(prev); + } + } + else { + cluster->pending_tasklets.head = tasklet->next; + if(tasklet->next == NULL) { + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); + } + } + } + else { + TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__); + } + } + else { + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + } + + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); + + if(tasklet) { + __do_lit_tasklet(tasklet, 0ul); + tasklet = NULL; + } + else { + work_to_do = 0; + } + + TS_NV_SCHED_BOTISR_END; + } +} + +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) +{ + struct tasklet_struct* step; + + tasklet->next = NULL; // make sure there are no old values floating around + + step = cluster->pending_tasklets.head; + if(step == NULL) { + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); + // insert at tail. + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else if((*(cluster->pending_tasklets.tail) != NULL) && + edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) { + // insert at tail. + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); + + *(cluster->pending_tasklets.tail) = tasklet; + cluster->pending_tasklets.tail = &(tasklet->next); + } + else { + + // insert the tasklet somewhere in the middle. + + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); + + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { + step = step->next; + } + + // insert tasklet right before step->next. + + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, + tasklet->owner->pid, + (step->owner) ? + step->owner->pid : + -1, + (step->next) ? + ((step->next->owner) ? + step->next->owner->pid : + -1) : + -1); + + tasklet->next = step->next; + step->next = tasklet; + + // patch up the head if needed. + if(cluster->pending_tasklets.head == step) + { + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); + cluster->pending_tasklets.head = tasklet; + } + } +} + +static void cedf_run_tasklets(struct task_struct* sched_task) +{ + cedf_domain_t* cluster; + + preempt_disable(); + + cluster = (is_realtime(sched_task)) ? + task_cpu_cluster(sched_task) : + remote_cluster(smp_processor_id()); + + if(cluster && cluster->pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + do_lit_tasklets(cluster, sched_task); + } + + preempt_enable_no_resched(); +} + + + +static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet) +{ +#if 0 + cedf_domain_t *cluster = NULL; + cpu_entry_t *targetCPU = NULL; + int thisCPU; + int runLocal = 0; + int runNow = 0; + unsigned long flags; + + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + return 0; + } + + cluster = task_cpu_cluster(tasklet->owner); + + raw_spin_lock_irqsave(&cluster->cluster_lock, flags); + + thisCPU = smp_processor_id(); + +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = NULL; + + // use this CPU if it is in our cluster and isn't running any RT work. + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) { + affinity = &(__get_cpu_var(cedf_cpu_entries)); + } + else { + // this CPU is busy or shouldn't run tasklet in this cluster. + // look for available near by CPUs. + // NOTE: Affinity towards owner and not this CPU. Is this right? + affinity = + cedf_get_nearest_available_cpu(cluster, + &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner))); + } + + targetCPU = affinity; + } +#endif + + if (targetCPU == NULL) { + targetCPU = lowest_prio_cpu(cluster); + } + + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) { + if (thisCPU == targetCPU->cpu) { + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); + runLocal = 1; + runNow = 1; + } + else { + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); + runLocal = 0; + runNow = 1; + } + } + else { + runLocal = 0; + runNow = 0; + } + + if(!runLocal) { + // enqueue the tasklet + __add_pai_tasklet(tasklet, cluster); + } + + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); + + + if (runLocal /*&& runNow */) { // runNow == 1 is implied + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); + } + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); + preempt(targetCPU); // need to be protected by cluster_lock? + } + else { + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); + } +#else + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); +#endif + return(1); // success +} + +static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio, + struct task_struct *new_prio) +{ + struct tasklet_struct* step; + unsigned long flags; + cedf_domain_t *cluster; + struct task_struct *probe; + + // identify the cluster by the assignment of these tasks. one should + // be non-NULL. + probe = (old_prio) ? old_prio : new_prio; + + if(probe) { + cluster = task_cpu_cluster(probe); + + if(cluster->pending_tasklets.head != NULL) { + raw_spin_lock_irqsave(&cluster->cluster_lock, flags); + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) { + if(step->owner == old_prio) { + TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid); + step->owner = new_prio; + } + } + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); + } + } + else { + TRACE("%s: Both priorities were NULL\n"); + } +} + +#endif // PAI + /* Getting schedule() right is a bit tricky. schedule() may not make any * assumptions on the state of the current task since it may be called for a * number of reasons. The reasons include a scheduler_tick() determined that it @@ -465,6 +835,19 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) if (blocks) unlink(entry->scheduled); +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) + if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { + if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { + // don't track preemptions or locking protocol suspensions. + TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n"); + stop_gpu_tracker(entry->scheduled); + } + else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { + TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n"); + } + } +#endif + /* Request a sys_exit_np() call if we would like to preempt but cannot. * We need to make sure to update the link structure anyway in case * that we are still linked. Multiple calls to request_exit_np() don't @@ -514,7 +897,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) raw_spin_unlock(&cluster->cluster_lock); #ifdef WANT_ALL_SCHED_EVENTS - TRACE("cedf_lock released, next=0x%p\n", next); + TRACE("cluster_lock released, next=0x%p\n", next); if (next) TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); @@ -522,7 +905,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) TRACE("becomes idle at %llu.\n", litmus_clock()); #endif - return next; } @@ -548,7 +930,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) cpu_entry_t* entry; cedf_domain_t* cluster; - TRACE("gsn edf: task new %d\n", t->pid); + TRACE("c-edf: task new %d\n", t->pid); /* the cluster doesn't change even if t is running */ cluster = task_cpu_cluster(t); @@ -586,7 +968,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) static void cedf_task_wake_up(struct task_struct *task) { unsigned long flags; - lt_t now; + //lt_t now; cedf_domain_t *cluster; TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); @@ -594,6 +976,8 @@ static void cedf_task_wake_up(struct task_struct *task) cluster = task_cpu_cluster(task); raw_spin_lock_irqsave(&cluster->cluster_lock, flags); + +#if 0 // sproadic task model /* We need to take suspensions because of semaphores into * account! If a job resumes after being suspended due to acquiring * a semaphore, it should never be treated as a new job release. @@ -615,7 +999,13 @@ static void cedf_task_wake_up(struct task_struct *task) } } } - cedf_job_arrival(task); +#else + set_rt_flags(task, RT_F_RUNNING); // periodic model +#endif + + if(tsk_rt(task)->linked_on == NO_CPU) + cedf_job_arrival(task); + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); } @@ -642,6 +1032,10 @@ static void cedf_task_exit(struct task_struct * t) unsigned long flags; cedf_domain_t *cluster = task_cpu_cluster(t); +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + cedf_change_prio_pai_tasklet(t, NULL); +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->cluster_lock, flags); unlink(t); @@ -659,13 +1053,536 @@ static void cedf_task_exit(struct task_struct * t) static long cedf_admit_task(struct task_struct* tsk) { +#ifdef CONFIG_LITMUS_NESTED_LOCKING + INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, + edf_max_heap_base_priority_order); +#endif + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; } -/* total number of cluster */ -static int num_clusters; -/* we do not support cluster of different sizes */ -static unsigned int cluster_size; + + +#ifdef CONFIG_LITMUS_LOCKING + +#include + + + +/* called with IRQs off */ +static void __increase_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh) +{ + int linked_on; + int check_preempt = 0; + + cedf_domain_t* cluster = task_cpu_cluster(t); + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + /* this sanity check allows for weaker locking in protocols */ + /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */ + if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { +#endif + TRACE_TASK(t, "inherits priority from %s/%d\n", + prio_inh->comm, prio_inh->pid); + tsk_rt(t)->inh_task = prio_inh; + + linked_on = tsk_rt(t)->linked_on; + + /* If it is scheduled, then we need to reorder the CPU heap. */ + if (linked_on != NO_CPU) { + TRACE_TASK(t, "%s: linked on %d\n", + __FUNCTION__, linked_on); + /* Holder is scheduled; need to re-order CPUs. + * We can't use heap_decrease() here since + * the cpu_heap is ordered in reverse direction, so + * it is actually an increase. */ + binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn, + &cluster->cpu_heap); + binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn, + &cluster->cpu_heap, cpu_entry_t, hn); + + } else { + /* holder may be queued: first stop queue changes */ + raw_spin_lock(&cluster->domain.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "%s: is queued\n", + __FUNCTION__); + /* We need to update the position of holder in some + * heap. Note that this could be a release heap if we + * budget enforcement is used and this job overran. */ + check_preempt = + !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node); + } else { + /* Nothing to do: if it is not queued and not linked + * then it is either sleeping or currently being moved + * by other code (e.g., a timer interrupt handler) that + * will use the correct priority when enqueuing the + * task. */ + TRACE_TASK(t, "%s: is NOT queued => Done.\n", + __FUNCTION__); + } + raw_spin_unlock(&cluster->domain.release_lock); + + /* If holder was enqueued in a release heap, then the following + * preemption check is pointless, but we can't easily detect + * that case. If you want to fix this, then consider that + * simply adding a state flag requires O(n) time to update when + * releasing n tasks, which conflicts with the goal to have + * O(log n) merges. */ + if (check_preempt) { + /* heap_decrease() hit the top level of the heap: make + * sure preemption checks get the right task, not the + * potentially stale cache. */ + bheap_uncache_min(edf_ready_order, + &cluster->domain.ready_queue); + check_for_preemptions(cluster); + } + } +#ifdef CONFIG_LITMUS_NESTED_LOCKING + } + else { + TRACE_TASK(t, "Spurious invalid priority increase. " + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", + t->comm, t->pid, + effective_priority(t)->comm, effective_priority(t)->pid, + (prio_inh) ? prio_inh->comm : "nil", + (prio_inh) ? prio_inh->pid : -1); + WARN_ON(!prio_inh); + } +#endif +} + +/* called with IRQs off */ +static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + cedf_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cluster_lock); + + __increase_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&cluster->cluster_lock); + +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) + if(tsk_rt(t)->held_gpus) { + int i; + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + i < NV_DEVICE_NUM; + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { + pai_check_priority_increase(t, i); + } + } +#endif +} + +/* called with IRQs off */ +static void __decrease_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh) +{ +#ifdef CONFIG_LITMUS_NESTED_LOCKING + if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { +#endif + /* A job only stops inheriting a priority when it releases a + * resource. Thus we can make the following assumption.*/ + if(prio_inh) + TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n", + prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "base priority restored.\n"); + + tsk_rt(t)->inh_task = prio_inh; + + if(tsk_rt(t)->scheduled_on != NO_CPU) { + TRACE_TASK(t, "is scheduled.\n"); + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + cedf_job_arrival(t); + } + else { + cedf_domain_t* cluster = task_cpu_cluster(t); + /* task is queued */ + raw_spin_lock(&cluster->domain.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "is queued.\n"); + + /* decrease in priority, so we have to re-add to binomial heap */ + unlink(t); + cedf_job_arrival(t); + } + else { + TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n"); + } + raw_spin_unlock(&cluster->domain.release_lock); + } +#ifdef CONFIG_LITMUS_NESTED_LOCKING + } + else { + TRACE_TASK(t, "Spurious invalid priority decrease. " + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", + t->comm, t->pid, + effective_priority(t)->comm, effective_priority(t)->pid, + (prio_inh) ? prio_inh->comm : "nil", + (prio_inh) ? prio_inh->pid : -1); + } +#endif +} + +static void decrease_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh) +{ + cedf_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cluster_lock); + __decrease_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d decreases in priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&cluster->cluster_lock); + +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) + if(tsk_rt(t)->held_gpus) { + int i; + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + i < NV_DEVICE_NUM; + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { + pai_check_priority_decrease(t, i); + } + } +#endif +} + + + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cluster_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); + + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio. + + __increase_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&cluster->cluster_lock); +} + + +/* called with IRQs off */ +static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cluster_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + __decrease_priority_inheritance(klitirqd, new_owner); + + tsk_rt(old_owner)->cur_klitirqd = NULL; + + raw_spin_unlock(&cluster->cluster_lock); +} +#endif // CONFIG_LITMUS_SOFTIRQD + + + + + + + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + +/* called with IRQs off */ +/* preconditions: + (1) The 'hp_blocked_tasks_lock' of task 't' is held. + (2) The lock 'to_unlock' is held. + */ +static void nested_increase_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, + unsigned long irqflags) +{ + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; + + if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls. + increase_priority_inheritance(t, prio_inh); // increase our prio. + } + + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. + + + if(blocked_lock) { + if(blocked_lock->ops->propagate_increase_inheritance) { + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", + blocked_lock->ident); + + // beware: recursion + blocked_lock->ops->propagate_increase_inheritance(blocked_lock, + t, to_unlock, + irqflags); + } + else { + TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n", + blocked_lock->ident); + unlock_fine_irqrestore(to_unlock, irqflags); + } + } + else { + TRACE_TASK(t, "is not blocked. No propagation.\n"); + unlock_fine_irqrestore(to_unlock, irqflags); + } +} + +/* called with IRQs off */ +/* preconditions: + (1) The 'hp_blocked_tasks_lock' of task 't' is held. + (2) The lock 'to_unlock' is held. + */ +static void nested_decrease_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, + unsigned long irqflags) +{ + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; + decrease_priority_inheritance(t, prio_inh); + + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. + + if(blocked_lock) { + if(blocked_lock->ops->propagate_decrease_inheritance) { + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", + blocked_lock->ident); + + // beware: recursion + blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t, + to_unlock, + irqflags); + } + else { + TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", + blocked_lock); + unlock_fine_irqrestore(to_unlock, irqflags); + } + } + else { + TRACE_TASK(t, "is not blocked. No propagation.\n"); + unlock_fine_irqrestore(to_unlock, irqflags); + } +} + + +/* ******************** RSM MUTEX ********************** */ + +static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = { + .lock = rsm_mutex_lock, + .unlock = rsm_mutex_unlock, + .close = rsm_mutex_close, + .deallocate = rsm_mutex_free, + + .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance, + .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance, + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + .dgl_lock = rsm_mutex_dgl_lock, + .is_owner = rsm_mutex_is_owner, + .enable_priority = rsm_mutex_enable_priority, +#endif +}; + +static struct litmus_lock* cedf_new_rsm_mutex(void) +{ + return rsm_mutex_new(&cedf_rsm_mutex_lock_ops); +} + +/* ******************** IKGLP ********************** */ + +static struct litmus_lock_ops cedf_ikglp_lock_ops = { + .lock = ikglp_lock, + .unlock = ikglp_unlock, + .close = ikglp_close, + .deallocate = ikglp_free, + + // ikglp can only be an outer-most lock. + .propagate_increase_inheritance = NULL, + .propagate_decrease_inheritance = NULL, +}; + +static struct litmus_lock* cedf_new_ikglp(void* __user arg) +{ + // assumes clusters of uniform size. + return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg); +} + +#endif /* CONFIG_LITMUS_NESTED_LOCKING */ + + + + +/* ******************** KFMLP support ********************** */ + +static struct litmus_lock_ops cedf_kfmlp_lock_ops = { + .lock = kfmlp_lock, + .unlock = kfmlp_unlock, + .close = kfmlp_close, + .deallocate = kfmlp_free, + + // kfmlp can only be an outer-most lock. + .propagate_increase_inheritance = NULL, + .propagate_decrease_inheritance = NULL, +}; + + +static struct litmus_lock* cedf_new_kfmlp(void* __user arg) +{ + return kfmlp_new(&cedf_kfmlp_lock_ops, arg); +} + + +/* **** lock constructor **** */ + +static long cedf_allocate_lock(struct litmus_lock **lock, int type, + void* __user args) +{ + int err; + + switch (type) { +#ifdef CONFIG_LITMUS_NESTED_LOCKING + case RSM_MUTEX: + *lock = cedf_new_rsm_mutex(); + break; + + case IKGLP_SEM: + *lock = cedf_new_ikglp(args); + break; +#endif + case KFMLP_SEM: + *lock = cedf_new_kfmlp(args); + break; + + default: + err = -ENXIO; + goto UNSUPPORTED_LOCK; + }; + + if (*lock) + err = 0; + else + err = -ENOMEM; + +UNSUPPORTED_LOCK: + return err; +} + +#endif // CONFIG_LITMUS_LOCKING + + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +static struct affinity_observer_ops cedf_kfmlp_affinity_ops = { + .close = kfmlp_aff_obs_close, + .deallocate = kfmlp_aff_obs_free, +}; + +#ifdef CONFIG_LITMUS_NESTED_LOCKING +static struct affinity_observer_ops cedf_ikglp_affinity_ops = { + .close = ikglp_aff_obs_close, + .deallocate = ikglp_aff_obs_free, +}; +#endif + +static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs, + int type, + void* __user args) +{ + int err; + + switch (type) { + + case KFMLP_SIMPLE_GPU_AFF_OBS: + *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args); + break; + + case KFMLP_GPU_AFF_OBS: + *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args); + break; + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + case IKGLP_SIMPLE_GPU_AFF_OBS: + *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args); + break; + + case IKGLP_GPU_AFF_OBS: + *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args); + break; +#endif + default: + err = -ENXIO; + goto UNSUPPORTED_AFF_OBS; + }; + + if (*aff_obs) + err = 0; + else + err = -ENOMEM; + +UNSUPPORTED_AFF_OBS: + return err; +} +#endif + + + #ifdef VERBOSE_INIT static void print_cluster_topology(cpumask_var_t mask, int cpu) @@ -680,16 +1597,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu) } #endif -static int clusters_allocated = 0; - static void cleanup_cedf(void) { int i; +#ifdef CONFIG_LITMUS_NVIDIA + shutdown_nvidia_info(); +#endif + if (clusters_allocated) { for (i = 0; i < num_clusters; i++) { kfree(cedf[i].cpus); - kfree(cedf[i].heap_node); free_cpumask_var(cedf[i].cpu_map); } @@ -749,12 +1667,16 @@ static long cedf_activate_plugin(void) cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), GFP_ATOMIC); - cedf[i].heap_node = kmalloc( - cluster_size * sizeof(struct bheap_node), - GFP_ATOMIC); - bheap_init(&(cedf[i].cpu_heap)); + INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio); edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + cedf[i].pending_tasklets.head = NULL; + cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head); +#endif + + if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) return -ENOMEM; #ifdef CONFIG_RELEASE_MASTER @@ -765,6 +1687,10 @@ static long cedf_activate_plugin(void) /* cycle through cluster and add cpus to them */ for (i = 0; i < num_clusters; i++) { +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spin_lock_init(&cedf[i].dgl_lock); +#endif + for_each_online_cpu(cpu) { /* check if the cpu is already in a cluster */ for (j = 0; j < num_clusters; j++) @@ -795,8 +1721,8 @@ static long cedf_activate_plugin(void) atomic_set(&entry->will_schedule, 0); entry->cpu = ccpu; entry->cluster = &cedf[i]; - entry->hn = &(cedf[i].heap_node[cpu_count]); - bheap_node_init(&entry->hn, entry); + + INIT_BINHEAP_NODE(&entry->hn); cpu_count++; @@ -813,6 +1739,40 @@ static long cedf_activate_plugin(void) } } +#ifdef CONFIG_LITMUS_SOFTIRQD + { + /* distribute the daemons evenly across the clusters. */ + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; + int left_over = NR_LITMUS_SOFTIRQD % num_clusters; + + int daemon = 0; + for(i = 0; i < num_clusters; ++i) + { + int num_on_this_cluster = num_daemons_per_cluster; + if(left_over) + { + ++num_on_this_cluster; + --left_over; + } + + for(j = 0; j < num_on_this_cluster; ++j) + { + // first CPU of this cluster + affinity[daemon++] = i*cluster_size; + } + } + + spawn_klitirqd(affinity); + + kfree(affinity); + } +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif + free_cpumask_var(mask); clusters_allocated = 1; return 0; @@ -831,6 +1791,32 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { .task_block = cedf_task_block, .admit_task = cedf_admit_task, .activate_plugin = cedf_activate_plugin, + .compare = edf_higher_prio, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = cedf_allocate_lock, + .increase_prio = increase_priority_inheritance, + .decrease_prio = decrease_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_NESTED_LOCKING + .nested_increase_prio = nested_increase_priority_inheritance, + .nested_decrease_prio = nested_decrease_priority_inheritance, + .__compare = __edf_higher_prio, +#endif +#ifdef CONFIG_LITMUS_DGL_SUPPORT + .get_dgl_spinlock = cedf_get_dgl_spinlock, +#endif +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + .allocate_aff_obs = cedf_allocate_affinity_observer, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .increase_prio_klitirqd = increase_priority_inheritance_klitirqd, + .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd, +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet, + .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet, + .run_tasklets = cedf_run_tasklets, +#endif }; static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 6ed504f4750e..8c48757fa86c 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -12,23 +12,49 @@ #include #include #include +#include +#include #include #include #include #include #include -#include #include #include +#include + +#ifdef CONFIG_LITMUS_LOCKING +#include +#endif + +#ifdef CONFIG_LITMUS_NESTED_LOCKING +#include +#include +#endif #ifdef CONFIG_SCHED_CPU_AFFINITY #include #endif -#include +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +#include +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#endif /* Overview of GSN-EDF operations. * @@ -103,52 +129,70 @@ typedef struct { int cpu; struct task_struct* linked; /* only RT tasks */ struct task_struct* scheduled; /* only RT tasks */ - struct bheap_node* hn; + struct binheap_node hn; } cpu_entry_t; DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); cpu_entry_t* gsnedf_cpus[NR_CPUS]; /* the cpus queue themselves according to priority in here */ -static struct bheap_node gsnedf_heap_node[NR_CPUS]; -static struct bheap gsnedf_cpu_heap; +static struct binheap_handle gsnedf_cpu_heap; static rt_domain_t gsnedf; #define gsnedf_lock (gsnedf.ready_lock) +#ifdef CONFIG_LITMUS_DGL_SUPPORT +static raw_spinlock_t dgl_lock; + +static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t) +{ + return(&dgl_lock); +} +#endif + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; + +struct tasklet_head gsnedf_pending_tasklets; +#endif + /* Uncomment this if you want to see all scheduling decisions in the * TRACE() log. #define WANT_ALL_SCHED_EVENTS */ -static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) +static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) { - cpu_entry_t *a, *b; - a = _a->value; - b = _b->value; + cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn); + cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn); + /* Note that a and b are inverted: we want the lowest-priority CPU at * the top of the heap. */ return edf_higher_prio(b->linked, a->linked); } + /* update_cpu_position - Move the cpu entry to the correct place to maintain * order in the cpu queue. Caller must hold gsnedf lock. */ static void update_cpu_position(cpu_entry_t *entry) { - if (likely(bheap_node_in_heap(entry->hn))) - bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); - bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); + if (likely(binheap_is_in_heap(&entry->hn))) { + binheap_delete(&entry->hn, &gsnedf_cpu_heap); + } + binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn); } /* caller must hold gsnedf lock */ static cpu_entry_t* lowest_prio_cpu(void) { - struct bheap_node* hn; - hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); - return hn->value; + return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn); } @@ -337,6 +381,10 @@ static noinline void job_completion(struct task_struct *t, int forced) sched_trace_task_completion(t, forced); +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + TRACE_TASK(t, "job_completion().\n"); /* set flags */ @@ -379,6 +427,318 @@ static void gsnedf_tick(struct task_struct* t) } } + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + + +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) +{ + if (!atomic_read(&tasklet->count)) { + if(tasklet->owner) { + sched_trace_tasklet_begin(tasklet->owner); + } + + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) + { + BUG(); + } + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", + __FUNCTION__, + (tasklet->owner) ? tasklet->owner->pid : -1, + (tasklet->owner) ? 0 : 1); + tasklet->func(tasklet->data); + tasklet_unlock(tasklet); + + if(tasklet->owner) { + sched_trace_tasklet_end(tasklet->owner, flushed); + } + } + else { + BUG(); + } +} + +static void do_lit_tasklets(struct task_struct* sched_task) +{ + int work_to_do = 1; + struct tasklet_struct *tasklet = NULL; + unsigned long flags; + + while(work_to_do) { + + TS_NV_SCHED_BOTISR_START; + + // execute one tasklet that has higher priority + raw_spin_lock_irqsave(&gsnedf_lock, flags); + + if(gsnedf_pending_tasklets.head != NULL) { + struct tasklet_struct *prev = NULL; + tasklet = gsnedf_pending_tasklets.head; + + while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) { + prev = tasklet; + tasklet = tasklet->next; + } + + // remove the tasklet from the queue + if(prev) { + prev->next = tasklet->next; + if(prev->next == NULL) { + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + gsnedf_pending_tasklets.tail = &(prev); + } + } + else { + gsnedf_pending_tasklets.head = tasklet->next; + if(tasklet->next == NULL) { + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); + } + } + } + else { + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); + } + + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); + + if(tasklet) { + __do_lit_tasklet(tasklet, 0ul); + tasklet = NULL; + } + else { + work_to_do = 0; + } + + TS_NV_SCHED_BOTISR_END; + } +} + +//static void do_lit_tasklets(struct task_struct* sched_task) +//{ +// int work_to_do = 1; +// struct tasklet_struct *tasklet = NULL; +// //struct tasklet_struct *step; +// unsigned long flags; +// +// while(work_to_do) { +// +// TS_NV_SCHED_BOTISR_START; +// +// // remove tasklet at head of list if it has higher priority. +// raw_spin_lock_irqsave(&gsnedf_lock, flags); +// +// if(gsnedf_pending_tasklets.head != NULL) { +// // remove tasklet at head. +// tasklet = gsnedf_pending_tasklets.head; +// +// if(edf_higher_prio(tasklet->owner, sched_task)) { +// +// if(NULL == tasklet->next) { +// // tasklet is at the head, list only has one element +// TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); +// gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); +// } +// +// // remove the tasklet from the queue +// gsnedf_pending_tasklets.head = tasklet->next; +// +// TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); +// } +// else { +// TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); +// tasklet = NULL; +// } +// } +// else { +// TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); +// } +// +// raw_spin_unlock_irqrestore(&gsnedf_lock, flags); +// +// TS_NV_SCHED_BOTISR_END; +// +// if(tasklet) { +// __do_lit_tasklet(tasklet, 0ul); +// tasklet = NULL; +// } +// else { +// work_to_do = 0; +// } +// } +// +// //TRACE("%s: exited.\n", __FUNCTION__); +//} + +static void __add_pai_tasklet(struct tasklet_struct* tasklet) +{ + struct tasklet_struct* step; + + tasklet->next = NULL; // make sure there are no old values floating around + + step = gsnedf_pending_tasklets.head; + if(step == NULL) { + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); + // insert at tail. + *(gsnedf_pending_tasklets.tail) = tasklet; + gsnedf_pending_tasklets.tail = &(tasklet->next); + } + else if((*(gsnedf_pending_tasklets.tail) != NULL) && + edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) { + // insert at tail. + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); + + *(gsnedf_pending_tasklets.tail) = tasklet; + gsnedf_pending_tasklets.tail = &(tasklet->next); + } + else { + // insert the tasklet somewhere in the middle. + + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); + + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { + step = step->next; + } + + // insert tasklet right before step->next. + + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); + + tasklet->next = step->next; + step->next = tasklet; + + // patch up the head if needed. + if(gsnedf_pending_tasklets.head == step) + { + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); + gsnedf_pending_tasklets.head = tasklet; + } + } +} + +static void gsnedf_run_tasklets(struct task_struct* sched_task) +{ + preempt_disable(); + + if(gsnedf_pending_tasklets.head != NULL) { + TRACE("%s: There are tasklets to process.\n", __FUNCTION__); + do_lit_tasklets(sched_task); + } + + preempt_enable_no_resched(); +} + +static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet) +{ + cpu_entry_t *targetCPU = NULL; + int thisCPU; + int runLocal = 0; + int runNow = 0; + unsigned long flags; + + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + return 0; + } + + + raw_spin_lock_irqsave(&gsnedf_lock, flags); + + thisCPU = smp_processor_id(); + +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = NULL; + + // use this CPU if it is in our cluster and isn't running any RT work. + if( +#ifdef CONFIG_RELEASE_MASTER + (thisCPU != gsnedf.release_master) && +#endif + (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) { + affinity = &(__get_cpu_var(gsnedf_cpu_entries)); + } + else { + // this CPU is busy or shouldn't run tasklet in this cluster. + // look for available near by CPUs. + // NOTE: Affinity towards owner and not this CPU. Is this right? + affinity = + gsnedf_get_nearest_available_cpu( + &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner))); + } + + targetCPU = affinity; + } +#endif + + if (targetCPU == NULL) { + targetCPU = lowest_prio_cpu(); + } + + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) { + if (thisCPU == targetCPU->cpu) { + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); + runLocal = 1; + runNow = 1; + } + else { + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); + runLocal = 0; + runNow = 1; + } + } + else { + runLocal = 0; + runNow = 0; + } + + if(!runLocal) { + // enqueue the tasklet + __add_pai_tasklet(tasklet); + } + + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); + + + if (runLocal /*&& runNow */) { // runNow == 1 is implied + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); + __do_lit_tasklet(tasklet, 0ul); + } + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); + preempt(targetCPU); // need to be protected by cedf_lock? + } + else { + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); + } + + return(1); // success +} + +static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio, + struct task_struct *new_prio) +{ + struct tasklet_struct* step; + unsigned long flags; + + if(gsnedf_pending_tasklets.head != NULL) { + raw_spin_lock_irqsave(&gsnedf_lock, flags); + for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) { + if(step->owner == old_prio) { + TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid); + step->owner = new_prio; + } + } + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); + } +} + +#endif // end PAI + + /* Getting schedule() right is a bit tricky. schedule() may not make any * assumptions on the state of the current task since it may be called for a * number of reasons. The reasons include a scheduler_tick() determined that it @@ -437,21 +797,32 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); #endif + /* if (exists) TRACE_TASK(prev, "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " "state:%d sig:%d\n", blocks, out_of_time, np, sleep, preempt, prev->state, signal_pending(prev)); + */ + if (entry->linked && preempt) TRACE_TASK(prev, "will be preempted by %s/%d\n", entry->linked->comm, entry->linked->pid); - /* If a task blocks we have no choice but to reschedule. */ - if (blocks) + if (blocks) { unlink(entry->scheduled); + } + +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) + if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { + if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { + stop_gpu_tracker(entry->scheduled); + } + } +#endif /* Request a sys_exit_np() call if we would like to preempt but cannot. * We need to make sure to update the link structure anyway in case @@ -492,12 +863,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) entry->scheduled->rt_param.scheduled_on = NO_CPU; TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); } - } else + } + else + { /* Only override Linux scheduler if we have a real-time task * scheduled that needs to continue. */ if (exists) next = prev; + } sched_state_task_picked(); @@ -524,6 +898,7 @@ static void gsnedf_finish_switch(struct task_struct *prev) cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); entry->scheduled = is_realtime(current) ? current : NULL; + #ifdef WANT_ALL_SCHED_EVENTS TRACE_TASK(prev, "switched away from\n"); #endif @@ -572,11 +947,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) static void gsnedf_task_wake_up(struct task_struct *task) { unsigned long flags; - lt_t now; + //lt_t now; TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); raw_spin_lock_irqsave(&gsnedf_lock, flags); + + +#if 0 // sporadic task model /* We need to take suspensions because of semaphores into * account! If a job resumes after being suspended due to acquiring * a semaphore, it should never be treated as a new job release. @@ -598,19 +976,26 @@ static void gsnedf_task_wake_up(struct task_struct *task) } } } +#else // periodic task model + set_rt_flags(task, RT_F_RUNNING); +#endif + gsnedf_job_arrival(task); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); } static void gsnedf_task_block(struct task_struct *t) { + // TODO: is this called on preemption?? unsigned long flags; TRACE_TASK(t, "block at %llu\n", litmus_clock()); /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); + unlink(t); + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); BUG_ON(!is_realtime(t)); @@ -621,6 +1006,10 @@ static void gsnedf_task_exit(struct task_struct * t) { unsigned long flags; +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + gsnedf_change_prio_pai_tasklet(t, NULL); +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); unlink(t); @@ -637,101 +1026,423 @@ static void gsnedf_task_exit(struct task_struct * t) static long gsnedf_admit_task(struct task_struct* tsk) { +#ifdef CONFIG_LITMUS_NESTED_LOCKING + INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, + edf_max_heap_base_priority_order); +#endif + return 0; } + + + + + #ifdef CONFIG_LITMUS_LOCKING #include /* called with IRQs off */ -static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +static void __increase_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh) { int linked_on; int check_preempt = 0; +#ifdef CONFIG_LITMUS_NESTED_LOCKING + /* this sanity check allows for weaker locking in protocols */ + /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */ + if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { +#endif + TRACE_TASK(t, "inherits priority from %s/%d\n", + prio_inh->comm, prio_inh->pid); + tsk_rt(t)->inh_task = prio_inh; + + linked_on = tsk_rt(t)->linked_on; + + /* If it is scheduled, then we need to reorder the CPU heap. */ + if (linked_on != NO_CPU) { + TRACE_TASK(t, "%s: linked on %d\n", + __FUNCTION__, linked_on); + /* Holder is scheduled; need to re-order CPUs. + * We can't use heap_decrease() here since + * the cpu_heap is ordered in reverse direction, so + * it is actually an increase. */ + binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap); + binheap_add(&gsnedf_cpus[linked_on]->hn, + &gsnedf_cpu_heap, cpu_entry_t, hn); + } else { + /* holder may be queued: first stop queue changes */ + raw_spin_lock(&gsnedf.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "%s: is queued\n", + __FUNCTION__); + /* We need to update the position of holder in some + * heap. Note that this could be a release heap if we + * budget enforcement is used and this job overran. */ + check_preempt = + !bheap_decrease(edf_ready_order, + tsk_rt(t)->heap_node); + } else { + /* Nothing to do: if it is not queued and not linked + * then it is either sleeping or currently being moved + * by other code (e.g., a timer interrupt handler) that + * will use the correct priority when enqueuing the + * task. */ + TRACE_TASK(t, "%s: is NOT queued => Done.\n", + __FUNCTION__); + } + raw_spin_unlock(&gsnedf.release_lock); + + /* If holder was enqueued in a release heap, then the following + * preemption check is pointless, but we can't easily detect + * that case. If you want to fix this, then consider that + * simply adding a state flag requires O(n) time to update when + * releasing n tasks, which conflicts with the goal to have + * O(log n) merges. */ + if (check_preempt) { + /* heap_decrease() hit the top level of the heap: make + * sure preemption checks get the right task, not the + * potentially stale cache. */ + bheap_uncache_min(edf_ready_order, + &gsnedf.ready_queue); + check_for_preemptions(); + } + } +#ifdef CONFIG_LITMUS_NESTED_LOCKING + } + else { + TRACE_TASK(t, "Spurious invalid priority increase. " + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", + t->comm, t->pid, + effective_priority(t)->comm, effective_priority(t)->pid, + (prio_inh) ? prio_inh->comm : "nil", + (prio_inh) ? prio_inh->pid : -1); + WARN_ON(!prio_inh); + } +#endif +} + +/* called with IRQs off */ +static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ raw_spin_lock(&gsnedf_lock); - TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); - tsk_rt(t)->inh_task = prio_inh; - - linked_on = tsk_rt(t)->linked_on; - - /* If it is scheduled, then we need to reorder the CPU heap. */ - if (linked_on != NO_CPU) { - TRACE_TASK(t, "%s: linked on %d\n", - __FUNCTION__, linked_on); - /* Holder is scheduled; need to re-order CPUs. - * We can't use heap_decrease() here since - * the cpu_heap is ordered in reverse direction, so - * it is actually an increase. */ - bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, - gsnedf_cpus[linked_on]->hn); - bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, - gsnedf_cpus[linked_on]->hn); - } else { - /* holder may be queued: first stop queue changes */ - raw_spin_lock(&gsnedf.release_lock); - if (is_queued(t)) { - TRACE_TASK(t, "%s: is queued\n", - __FUNCTION__); - /* We need to update the position of holder in some - * heap. Note that this could be a release heap if we - * budget enforcement is used and this job overran. */ - check_preempt = - !bheap_decrease(edf_ready_order, - tsk_rt(t)->heap_node); - } else { - /* Nothing to do: if it is not queued and not linked - * then it is either sleeping or currently being moved - * by other code (e.g., a timer interrupt handler) that - * will use the correct priority when enqueuing the - * task. */ - TRACE_TASK(t, "%s: is NOT queued => Done.\n", - __FUNCTION__); + __increase_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&gsnedf_lock); + +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) + if(tsk_rt(t)->held_gpus) { + int i; + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + i < NV_DEVICE_NUM; + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { + pai_check_priority_increase(t, i); + } + } +#endif +} + + +/* called with IRQs off */ +static void __decrease_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh) +{ +#ifdef CONFIG_LITMUS_NESTED_LOCKING + if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { +#endif + /* A job only stops inheriting a priority when it releases a + * resource. Thus we can make the following assumption.*/ + if(prio_inh) + TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n", + prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "base priority restored.\n"); + + tsk_rt(t)->inh_task = prio_inh; + + if(tsk_rt(t)->scheduled_on != NO_CPU) { + TRACE_TASK(t, "is scheduled.\n"); + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + gsnedf_job_arrival(t); } - raw_spin_unlock(&gsnedf.release_lock); - - /* If holder was enqueued in a release heap, then the following - * preemption check is pointless, but we can't easily detect - * that case. If you want to fix this, then consider that - * simply adding a state flag requires O(n) time to update when - * releasing n tasks, which conflicts with the goal to have - * O(log n) merges. */ - if (check_preempt) { - /* heap_decrease() hit the top level of the heap: make - * sure preemption checks get the right task, not the - * potentially stale cache. */ - bheap_uncache_min(edf_ready_order, - &gsnedf.ready_queue); - check_for_preemptions(); + else { + /* task is queued */ + raw_spin_lock(&gsnedf.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "is queued.\n"); + + /* decrease in priority, so we have to re-add to binomial heap */ + unlink(t); + gsnedf_job_arrival(t); + } + else { + TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n"); + } + raw_spin_unlock(&gsnedf.release_lock); } +#ifdef CONFIG_LITMUS_NESTED_LOCKING + } + else { + TRACE_TASK(t, "Spurious invalid priority decrease. " + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", + t->comm, t->pid, + effective_priority(t)->comm, effective_priority(t)->pid, + (prio_inh) ? prio_inh->comm : "nil", + (prio_inh) ? prio_inh->pid : -1); } +#endif +} + +static void decrease_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh) +{ + raw_spin_lock(&gsnedf_lock); + __decrease_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d decreases in priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif raw_spin_unlock(&gsnedf_lock); + +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) + if(tsk_rt(t)->held_gpus) { + int i; + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + i < NV_DEVICE_NUM; + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { + pai_check_priority_decrease(t, i); + } + } +#endif } + +#ifdef CONFIG_LITMUS_SOFTIRQD /* called with IRQs off */ -static void clear_priority_inheritance(struct task_struct* t) +static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) { + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + raw_spin_lock(&gsnedf_lock); - /* A job only stops inheriting a priority when it releases a - * resource. Thus we can make the following assumption.*/ - BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } - TRACE_TASK(t, "priority restored\n"); - tsk_rt(t)->inh_task = NULL; + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); - /* Check if rescheduling is necessary. We can't use heap_decrease() - * since the priority was effectively lowered. */ - unlink(t); - gsnedf_job_arrival(t); + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio. + + __increase_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); raw_spin_unlock(&gsnedf_lock); } +/* called with IRQs off */ +static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&gsnedf_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + __decrease_priority_inheritance(klitirqd, new_owner); + + tsk_rt(old_owner)->cur_klitirqd = NULL; + + raw_spin_unlock(&gsnedf_lock); +} +#endif + + + + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + +/* called with IRQs off */ +/* preconditions: + (1) The 'hp_blocked_tasks_lock' of task 't' is held. + (2) The lock 'to_unlock' is held. + */ +static void nested_increase_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, + unsigned long irqflags) +{ + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; + + if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls. + increase_priority_inheritance(t, prio_inh); // increase our prio. + } + + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. + + + if(blocked_lock) { + if(blocked_lock->ops->propagate_increase_inheritance) { + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", + blocked_lock->ident); + + // beware: recursion + blocked_lock->ops->propagate_increase_inheritance(blocked_lock, + t, to_unlock, + irqflags); + } + else { + TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n", + blocked_lock->ident); + unlock_fine_irqrestore(to_unlock, irqflags); + } + } + else { + TRACE_TASK(t, "is not blocked. No propagation.\n"); + unlock_fine_irqrestore(to_unlock, irqflags); + } +} + +/* called with IRQs off */ +/* preconditions: + (1) The 'hp_blocked_tasks_lock' of task 't' is held. + (2) The lock 'to_unlock' is held. + */ +static void nested_decrease_priority_inheritance(struct task_struct* t, + struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, + unsigned long irqflags) +{ + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; + decrease_priority_inheritance(t, prio_inh); + + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. + + if(blocked_lock) { + if(blocked_lock->ops->propagate_decrease_inheritance) { + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", + blocked_lock->ident); + + // beware: recursion + blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t, + to_unlock, + irqflags); + } + else { + TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", + blocked_lock); + unlock_fine_irqrestore(to_unlock, irqflags); + } + } + else { + TRACE_TASK(t, "is not blocked. No propagation.\n"); + unlock_fine_irqrestore(to_unlock, irqflags); + } +} + + +/* ******************** RSM MUTEX ********************** */ + +static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = { + .lock = rsm_mutex_lock, + .unlock = rsm_mutex_unlock, + .close = rsm_mutex_close, + .deallocate = rsm_mutex_free, + + .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance, + .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance, + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + .dgl_lock = rsm_mutex_dgl_lock, + .is_owner = rsm_mutex_is_owner, + .enable_priority = rsm_mutex_enable_priority, +#endif +}; + +static struct litmus_lock* gsnedf_new_rsm_mutex(void) +{ + return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops); +} + +/* ******************** IKGLP ********************** */ + +static struct litmus_lock_ops gsnedf_ikglp_lock_ops = { + .lock = ikglp_lock, + .unlock = ikglp_unlock, + .close = ikglp_close, + .deallocate = ikglp_free, + + // ikglp can only be an outer-most lock. + .propagate_increase_inheritance = NULL, + .propagate_decrease_inheritance = NULL, +}; + +static struct litmus_lock* gsnedf_new_ikglp(void* __user arg) +{ + return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg); +} + +#endif /* CONFIG_LITMUS_NESTED_LOCKING */ + + +/* ******************** KFMLP support ********************** */ + +static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = { + .lock = kfmlp_lock, + .unlock = kfmlp_unlock, + .close = kfmlp_close, + .deallocate = kfmlp_free, + + // kfmlp can only be an outer-most lock. + .propagate_increase_inheritance = NULL, + .propagate_decrease_inheritance = NULL, +}; + + +static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg) +{ + return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg); +} + /* ******************** FMLP support ********************** */ /* struct for semaphore with priority inheritance */ @@ -797,7 +1508,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l) if (edf_higher_prio(t, sem->hp_waiter)) { sem->hp_waiter = t; if (edf_higher_prio(t, sem->owner)) - set_priority_inheritance(sem->owner, sem->hp_waiter); + increase_priority_inheritance(sem->owner, sem->hp_waiter); } TS_LOCK_SUSPEND; @@ -865,7 +1576,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l) /* Well, if next is not the highest-priority waiter, * then it ought to inherit the highest-priority * waiter's priority. */ - set_priority_inheritance(next, sem->hp_waiter); + increase_priority_inheritance(next, sem->hp_waiter); } /* wake up next */ @@ -876,7 +1587,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l) /* we lose the benefit of priority inheritance (if any) */ if (tsk_rt(t)->inh_task) - clear_priority_inheritance(t); + decrease_priority_inheritance(t, NULL); out: spin_unlock_irqrestore(&sem->wait.lock, flags); @@ -914,6 +1625,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = { .lock = gsnedf_fmlp_lock, .unlock = gsnedf_fmlp_unlock, .deallocate = gsnedf_fmlp_free, + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + .propagate_increase_inheritance = NULL, + .propagate_decrease_inheritance = NULL +#endif }; static struct litmus_lock* gsnedf_new_fmlp(void) @@ -932,47 +1648,121 @@ static struct litmus_lock* gsnedf_new_fmlp(void) return &sem->litmus_lock; } -/* **** lock constructor **** */ - static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, - void* __user unused) + void* __user args) { - int err = -ENXIO; + int err; - /* GSN-EDF currently only supports the FMLP for global resources. */ switch (type) { case FMLP_SEM: /* Flexible Multiprocessor Locking Protocol */ *lock = gsnedf_new_fmlp(); - if (*lock) - err = 0; - else - err = -ENOMEM; + break; +#ifdef CONFIG_LITMUS_NESTED_LOCKING + case RSM_MUTEX: + *lock = gsnedf_new_rsm_mutex(); break; + case IKGLP_SEM: + *lock = gsnedf_new_ikglp(args); + break; +#endif + case KFMLP_SEM: + *lock = gsnedf_new_kfmlp(args); + break; + default: + err = -ENXIO; + goto UNSUPPORTED_LOCK; }; + if (*lock) + err = 0; + else + err = -ENOMEM; + +UNSUPPORTED_LOCK: return err; } +#endif // CONFIG_LITMUS_LOCKING + + + + + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = { + .close = kfmlp_aff_obs_close, + .deallocate = kfmlp_aff_obs_free, +}; + +#ifdef CONFIG_LITMUS_NESTED_LOCKING +static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = { + .close = ikglp_aff_obs_close, + .deallocate = ikglp_aff_obs_free, +}; #endif +static long gsnedf_allocate_affinity_observer( + struct affinity_observer **aff_obs, + int type, + void* __user args) +{ + int err; + + switch (type) { + + case KFMLP_SIMPLE_GPU_AFF_OBS: + *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args); + break; + + case KFMLP_GPU_AFF_OBS: + *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args); + break; + +#ifdef CONFIG_LITMUS_NESTED_LOCKING + case IKGLP_SIMPLE_GPU_AFF_OBS: + *aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args); + break; + + case IKGLP_GPU_AFF_OBS: + *aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args); + break; +#endif + default: + err = -ENXIO; + goto UNSUPPORTED_AFF_OBS; + }; + + if (*aff_obs) + err = 0; + else + err = -ENOMEM; + +UNSUPPORTED_AFF_OBS: + return err; +} +#endif + + + + static long gsnedf_activate_plugin(void) { int cpu; cpu_entry_t *entry; - bheap_init(&gsnedf_cpu_heap); + INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio); #ifdef CONFIG_RELEASE_MASTER gsnedf.release_master = atomic_read(&release_master_cpu); #endif for_each_online_cpu(cpu) { entry = &per_cpu(gsnedf_cpu_entries, cpu); - bheap_node_init(&entry->hn, entry); + INIT_BINHEAP_NODE(&entry->hn); entry->linked = NULL; entry->scheduled = NULL; #ifdef CONFIG_RELEASE_MASTER @@ -986,6 +1776,20 @@ static long gsnedf_activate_plugin(void) } #endif } + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + gsnedf_pending_tasklets.head = NULL; + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD + spawn_klitirqd(NULL); +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif + return 0; } @@ -1002,8 +1806,31 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { .task_block = gsnedf_task_block, .admit_task = gsnedf_admit_task, .activate_plugin = gsnedf_activate_plugin, + .compare = edf_higher_prio, #ifdef CONFIG_LITMUS_LOCKING .allocate_lock = gsnedf_allocate_lock, + .increase_prio = increase_priority_inheritance, + .decrease_prio = decrease_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_NESTED_LOCKING + .nested_increase_prio = nested_increase_priority_inheritance, + .nested_decrease_prio = nested_decrease_priority_inheritance, + .__compare = __edf_higher_prio, +#endif +#ifdef CONFIG_LITMUS_DGL_SUPPORT + .get_dgl_spinlock = gsnedf_get_dgl_spinlock, +#endif +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + .allocate_aff_obs = gsnedf_allocate_affinity_observer, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .increase_prio_klitirqd = increase_priority_inheritance_klitirqd, + .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd, +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet, + .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet, + .run_tasklets = gsnedf_run_tasklets, #endif }; @@ -1013,15 +1840,20 @@ static int __init init_gsn_edf(void) int cpu; cpu_entry_t *entry; - bheap_init(&gsnedf_cpu_heap); + INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio); /* initialize CPU state */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for (cpu = 0; cpu < NR_CPUS; ++cpu) { entry = &per_cpu(gsnedf_cpu_entries, cpu); gsnedf_cpus[cpu] = entry; entry->cpu = cpu; - entry->hn = &gsnedf_heap_node[cpu]; - bheap_node_init(&entry->hn, entry); + + INIT_BINHEAP_NODE(&entry->hn); } + +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spin_lock_init(&dgl_lock); +#endif + edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); return register_sched_plugin(&gsn_edf_plugin); } diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c index 5a15ce938984..9a6fe487718e 100644 --- a/litmus/sched_litmus.c +++ b/litmus/sched_litmus.c @@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) } #ifdef __ARCH_WANT_UNLOCKED_CTXSW if (next->oncpu) + { TRACE_TASK(next, "waiting for !oncpu"); + } while (next->oncpu) { cpu_relax(); mb(); diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 00a1900d6457..245e41c25a5d 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -13,6 +13,10 @@ #include #include +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + /* * Generic function to trigger preemption on either local or remote cpu * from scheduler plugins. The key feature is that this function is @@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void) static long litmus_dummy_activate_plugin(void) { +#ifdef CONFIG_LITMUS_NVIDIA + shutdown_nvidia_info(); +#endif return 0; } @@ -110,14 +117,93 @@ static long litmus_dummy_deactivate_plugin(void) return 0; } -#ifdef CONFIG_LITMUS_LOCKING +static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b) +{ + TRACE_CUR("WARNING: Dummy compare function called!\n"); + return 0; +} +#ifdef CONFIG_LITMUS_LOCKING static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, void* __user config) { return -ENXIO; } +static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh) +{ +} + +static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh) +{ +} +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ +} + +static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ +} +#endif + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t) +{ + TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); + return(0); // failure. +} + +static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio, + struct task_struct *new_prio) +{ + TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); +} + +static void litmus_dummy_run_tasklets(struct task_struct* t) +{ + //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); +} +#endif + +#ifdef CONFIG_LITMUS_NESTED_LOCKING +static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, unsigned long irqflags) +{ +} + +static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh, + raw_spinlock_t *to_unlock, unsigned long irqflags) +{ +} + +static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod, + struct task_struct* b, comparison_mode_t b_mode) +{ + TRACE_CUR("WARNING: Dummy compare function called!\n"); + return 0; +} +#endif + +#ifdef CONFIG_LITMUS_DGL_SUPPORT +static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t) +{ + return NULL; +} +#endif + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs, + int type, + void* __user config) +{ + return -ENXIO; +} #endif @@ -136,9 +222,33 @@ struct sched_plugin linux_sched_plugin = { .finish_switch = litmus_dummy_finish_switch, .activate_plugin = litmus_dummy_activate_plugin, .deactivate_plugin = litmus_dummy_deactivate_plugin, + .compare = litmus_dummy_compare, #ifdef CONFIG_LITMUS_LOCKING .allocate_lock = litmus_dummy_allocate_lock, + .increase_prio = litmus_dummy_increase_prio, + .decrease_prio = litmus_dummy_decrease_prio, +#endif +#ifdef CONFIG_LITMUS_NESTED_LOCKING + .nested_increase_prio = litmus_dummy_nested_increase_prio, + .nested_decrease_prio = litmus_dummy_nested_decrease_prio, + .__compare = litmus_dummy___compare, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd, + .decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd, +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet, + .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet, + .run_tasklets = litmus_dummy_run_tasklets, +#endif +#ifdef CONFIG_LITMUS_DGL_SUPPORT + .get_dgl_spinlock = litmus_dummy_get_dgl_spinlock, #endif +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + .allocate_aff_obs = litmus_dummy_allocate_aff_obs, +#endif + .admit_task = litmus_dummy_admit_task }; @@ -174,8 +284,31 @@ int register_sched_plugin(struct sched_plugin* plugin) CHECK(complete_job); CHECK(activate_plugin); CHECK(deactivate_plugin); + CHECK(compare); #ifdef CONFIG_LITMUS_LOCKING CHECK(allocate_lock); + CHECK(increase_prio); + CHECK(decrease_prio); +#endif +#ifdef CONFIG_LITMUS_NESTED_LOCKING + CHECK(nested_increase_prio); + CHECK(nested_decrease_prio); + CHECK(__compare); +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + CHECK(increase_prio_klitirqd); + CHECK(decrease_prio_klitirqd); +#endif +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD + CHECK(enqueue_pai_tasklet); + CHECK(change_prio_pai_tasklet); + CHECK(run_tasklets); +#endif +#ifdef CONFIG_LITMUS_DGL_SUPPORT + CHECK(get_dgl_spinlock); +#endif +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + CHECK(allocate_aff_obs); #endif CHECK(admit_task); diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 5ef8d09ab41f..f7f575346b54 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -16,13 +17,13 @@ #include -#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) +#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11)) #define now() litmus_clock() struct local_buffer { - struct st_event_record record[NO_EVENTS]; - char flag[NO_EVENTS]; + struct st_event_record record[NUM_EVENTS]; + char flag[NUM_EVENTS]; struct ft_buffer ftbuf; }; @@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void) int i, ok = 0, err; printk("Allocated %u sched_trace_xxx() events per CPU " "(buffer size: %d bytes)\n", - NO_EVENTS, (int) sizeof(struct local_buffer)); + NUM_EVENTS, (int) sizeof(struct local_buffer)); err = ftdev_init(&st_dev, THIS_MODULE, num_online_cpus(), "sched_trace"); @@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void) for (i = 0; i < st_dev.minor_cnt; i++) { buf = &per_cpu(st_event_buffer, i); - ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, + ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS, sizeof(struct st_event_record), buf->flag, buf->record); @@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id, { struct task_struct *t = (struct task_struct*) _task; struct st_event_record* rec; - if (is_realtime(t)) { + //if (is_realtime(t)) /* comment out to trace EVERYTHING */ + { rec = get_record(ST_SWITCH_TO, t); if (rec) { rec->data.switch_to.when = now(); @@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id, { struct task_struct *t = (struct task_struct*) _task; struct st_event_record* rec; - if (is_realtime(t)) { + //if (is_realtime(t)) /* comment out to trace EVERYTHING */ + { rec = get_record(ST_SWITCH_AWAY, t); if (rec) { rec->data.switch_away.when = now(); @@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id, if (rec) { rec->data.completion.when = now(); rec->data.completion.forced = forced; +#ifdef LITMUS_NVIDIA + rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); +#endif put_record(rec); } } @@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id, put_record(rec); } } + + + + +feather_callback void do_sched_trace_prediction_err(unsigned long id, + unsigned long _task, + unsigned long _distance, + unsigned long _rel_err) +{ + struct task_struct *t = (struct task_struct*) _task; + struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t); + + if (rec) { + gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance; + fp_t* rel_err = (fp_t*) _rel_err; + + rec->data.prediction_err.distance = *distance; + rec->data.prediction_err.rel_err = rel_err->val; + put_record(rec); + } +} + + +feather_callback void do_sched_trace_migration(unsigned long id, + unsigned long _task, + unsigned long _mig_info) +{ + struct task_struct *t = (struct task_struct*) _task; + struct st_event_record *rec = get_record(ST_MIGRATION, t); + + if (rec) { + struct migration_info* mig_info = (struct migration_info*) _mig_info; + + rec->hdr.extra = mig_info->distance; + rec->data.migration.observed = mig_info->observed; + rec->data.migration.estimated = mig_info->estimated; + + put_record(rec); + } +} + + + + + + + + + +feather_callback void do_sched_trace_tasklet_release(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t); + + if (rec) { + rec->data.tasklet_release.when = now(); + put_record(rec); + } +} + + +feather_callback void do_sched_trace_tasklet_begin(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t); + + if (rec) { + rec->data.tasklet_begin.when = now(); + + if(!in_interrupt()) + rec->data.tasklet_begin.exe_pid = current->pid; + else + rec->data.tasklet_begin.exe_pid = 0; + + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_tasklet_begin); + + +feather_callback void do_sched_trace_tasklet_end(unsigned long id, + unsigned long _owner, + unsigned long _flushed) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_END, t); + + if (rec) { + rec->data.tasklet_end.when = now(); + rec->data.tasklet_end.flushed = _flushed; + + if(!in_interrupt()) + rec->data.tasklet_end.exe_pid = current->pid; + else + rec->data.tasklet_end.exe_pid = 0; + + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_tasklet_end); + + +feather_callback void do_sched_trace_work_release(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_RELEASE, t); + + if (rec) { + rec->data.work_release.when = now(); + put_record(rec); + } +} + + +feather_callback void do_sched_trace_work_begin(unsigned long id, + unsigned long _owner, + unsigned long _exe) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_BEGIN, t); + + if (rec) { + struct task_struct *exe = (struct task_struct*) _exe; + rec->data.work_begin.exe_pid = exe->pid; + rec->data.work_begin.when = now(); + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_work_begin); + + +feather_callback void do_sched_trace_work_end(unsigned long id, + unsigned long _owner, + unsigned long _exe, + unsigned long _flushed) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_END, t); + + if (rec) { + struct task_struct *exe = (struct task_struct*) _exe; + rec->data.work_end.exe_pid = exe->pid; + rec->data.work_end.flushed = _flushed; + rec->data.work_end.when = now(); + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_work_end); + + +feather_callback void do_sched_trace_eff_prio_change(unsigned long id, + unsigned long _task, + unsigned long _inh) +{ + struct task_struct *t = (struct task_struct*) _task; + struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t); + + if (rec) { + struct task_struct *inh = (struct task_struct*) _inh; + rec->data.effective_priority_change.when = now(); + rec->data.effective_priority_change.inh_pid = (inh != NULL) ? + inh->pid : + 0xffff; + + put_record(rec); + } +} + +/* pray for no nesting of nv interrupts on same CPU... */ +struct tracing_interrupt_map +{ + int active; + int count; + unsigned long data[128]; // assume nesting less than 128... + unsigned long serial[128]; +}; +DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); + + +DEFINE_PER_CPU(u32, intCounter); + +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, + unsigned long _device) +{ + struct st_event_record *rec; + u32 serialNum; + + { + u32* serial; + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(!int_map->active == 0xcafebabe) + { + int_map->count++; + } + else + { + int_map->active = 0xcafebabe; + int_map->count = 1; + } + //int_map->data[int_map->count-1] = _device; + + serial = &per_cpu(intCounter, smp_processor_id()); + *serial += num_online_cpus(); + serialNum = *serial; + int_map->serial[int_map->count-1] = serialNum; + } + + rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); + if(rec) { + u32 device = _device; + rec->data.nv_interrupt_begin.when = now(); + rec->data.nv_interrupt_begin.device = device; + rec->data.nv_interrupt_begin.serialNumber = serialNum; + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin); + +/* +int is_interrupt_tracing_active(void) +{ + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + return 1; + return 0; +} +*/ + +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device) +{ + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + { + struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL); + + int_map->count--; + if(int_map->count == 0) + int_map->active = 0; + + if(rec) { + u32 device = _device; + rec->data.nv_interrupt_end.when = now(); + //rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; + rec->data.nv_interrupt_end.device = device; + rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count]; + put_record(rec); + } + } +} +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end); + + + + + + + + + diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c new file mode 100644 index 000000000000..cf8e1d78aa77 --- /dev/null +++ b/litmus/sched_trace_external.c @@ -0,0 +1,64 @@ +#include + +#include +#include +#include + +void __sched_trace_tasklet_begin_external(struct task_struct* t) +{ + sched_trace_tasklet_begin(t); +} +EXPORT_SYMBOL(__sched_trace_tasklet_begin_external); + +void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed) +{ + sched_trace_tasklet_end(t, flushed); +} +EXPORT_SYMBOL(__sched_trace_tasklet_end_external); + + + +void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e) +{ + sched_trace_work_begin(t, e); +} +EXPORT_SYMBOL(__sched_trace_work_begin_external); + +void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f) +{ + sched_trace_work_end(t, e, f); +} +EXPORT_SYMBOL(__sched_trace_work_end_external); + + + +void __sched_trace_nv_interrupt_begin_external(u32 device) +{ + //unsigned long _device = device; + sched_trace_nv_interrupt_begin((unsigned long)device); +} +EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); + +void __sched_trace_nv_interrupt_end_external(u32 device) +{ + //unsigned long _device = device; + sched_trace_nv_interrupt_end((unsigned long)device); +} +EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); + + +#ifdef CONFIG_LITMUS_NVIDIA + +#define EXX_TS(evt) \ +void __##evt(void) { evt; } \ +EXPORT_SYMBOL(__##evt); + +EXX_TS(TS_NV_TOPISR_START) +EXX_TS(TS_NV_TOPISR_END) +EXX_TS(TS_NV_BOTISR_START) +EXX_TS(TS_NV_BOTISR_END) +EXX_TS(TS_NV_RELEASE_BOTISR_START) +EXX_TS(TS_NV_RELEASE_BOTISR_END) + +#endif + -- cgit v1.2.2 From 9a19f35c9c287cb8abd5bcf276ae8d1a3e876907 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Mon, 20 Aug 2012 16:49:50 -0400 Subject: Improve readability of EDF comparisons. Restructured the EDF task comparison code to improve readability. Recoded chained logical expression embedded in return statement into a series of if/else blocks. --- litmus/edf_common.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 9b44dc2d8d1e..668737f0fbf9 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -63,25 +63,35 @@ int edf_higher_prio(struct task_struct* first, #endif - - return !is_realtime(second_task) || - - /* is the deadline of the first task earlier? + /* Determine the task with earliest deadline, with + * tie-break logic. + */ + if (unlikely(!is_realtime(second_task))) { + return 1; + } + else if (earlier_deadline(first_task, second_task)) { + /* Is the deadline of the first task earlier? * Then it has higher priority. */ - earlier_deadline(first_task, second_task) || - - /* Do we have a deadline tie? - * Then break by PID. - */ - (get_deadline(first_task) == get_deadline(second_task) && - (first_task->pid < second_task->pid || + return 1; + } + else if (get_deadline(first_task) == get_deadline(second_task)) { + /* Need to tie break */ - /* If the PIDs are the same then the task with the inherited - * priority wins. - */ - (first_task->pid == second_task->pid && - !second->rt_param.inh_task))); + /* Tie break by pid */ + if (first_task->pid < second_task->pid) { + return 1; + } + else if (first_task->pid == second_task->pid) { + /* If the PIDs are the same then the task with the + * inherited priority wins. + */ + if (!second_task->rt_param.inh_task) { + return 1; + } + } + } + return 0; /* fall-through. prio(second_task) > prio(first_task) */ } int edf_ready_order(struct bheap_node* a, struct bheap_node* b) -- cgit v1.2.2 From 077aaecac31331b65442275843932314049a2ceb Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Mon, 20 Aug 2012 17:28:55 -0400 Subject: EDF priority tie-breaks. Instead of tie-breaking by PID (which is a static priority tie-break), we can tie-break by other job-level-unique parameters. This is desirable because tasks are equaly affected by tardiness since static priority tie-breaks cause tasks with greater PID values to experience the most tardiness. There are four tie-break methods: 1) Lateness. If two jobs, J_{1,i} and J_{2,j} of tasks T_1 and T_2, respectively, have equal deadlines, we favor the job of the task that had the worst lateness for jobs J_{1,i-1} and J_{2,j-1}. Note: Unlike tardiness, lateness may be less than zero. This occurs when a job finishes before its deadline. 2) Normalized Lateness. The same as #1, except lateness is first normalized by each task's relative deadline. This prevents tasks with short relative deadlines and small execution requirements from always losing tie-breaks. 3) Hash. The job tuple (PID, Job#) is used to generate a hash. Hash values are then compared. A job has ~50% chance of winning a tie-break with respect to another job. Note: Emperical testing shows that some jobs can have +/- ~1.5% advantage in tie-breaks. Linux's built-in hash function is not totally a uniform hash. 4) PIDs. PID-based tie-break used in prior versions of Litmus. Conflicts: litmus/edf_common.c --- include/litmus/fpmath.h | 145 ++++++++++++++++++++++++++++++++++++++++++++++ include/litmus/litmus.h | 2 +- include/litmus/rt_param.h | 6 ++ litmus/Kconfig | 46 +++++++++++++++ litmus/edf_common.c | 110 +++++++++++++++++++++++++++++------ litmus/jobs.c | 8 +++ 6 files changed, 297 insertions(+), 20 deletions(-) create mode 100644 include/litmus/fpmath.h diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h new file mode 100644 index 000000000000..04d4bcaeae96 --- /dev/null +++ b/include/litmus/fpmath.h @@ -0,0 +1,145 @@ +#ifndef __FP_MATH_H__ +#define __FP_MATH_H__ + +#ifndef __KERNEL__ +#include +#define abs(x) (((x) < 0) ? -(x) : x) +#endif + +// Use 64-bit because we want to track things at the nanosecond scale. +// This can lead to very large numbers. +typedef int64_t fpbuf_t; +typedef struct +{ + fpbuf_t val; +} fp_t; + +#define FP_SHIFT 10 +#define ROUND_BIT (FP_SHIFT - 1) + +#define _fp(x) ((fp_t) {x}) + +#ifdef __KERNEL__ +static const fp_t LITMUS_FP_ZERO = {.val = 0}; +static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)}; +#endif + +static inline fp_t FP(fpbuf_t x) +{ + return _fp(((fpbuf_t) x) << FP_SHIFT); +} + +/* divide two integers to obtain a fixed point value */ +static inline fp_t _frac(fpbuf_t a, fpbuf_t b) +{ + return _fp(FP(a).val / (b)); +} + +static inline fpbuf_t _point(fp_t x) +{ + return (x.val % (1 << FP_SHIFT)); + +} + +#define fp2str(x) x.val +/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */ +#define _FP_ "%ld/1024" + +static inline fpbuf_t _floor(fp_t x) +{ + return x.val >> FP_SHIFT; +} + +/* FIXME: negative rounding */ +static inline fpbuf_t _round(fp_t x) +{ + return _floor(x) + ((x.val >> ROUND_BIT) & 1); +} + +/* multiply two fixed point values */ +static inline fp_t _mul(fp_t a, fp_t b) +{ + return _fp((a.val * b.val) >> FP_SHIFT); +} + +static inline fp_t _div(fp_t a, fp_t b) +{ +#if !defined(__KERNEL__) && !defined(unlikely) +#define unlikely(x) (x) +#define DO_UNDEF_UNLIKELY +#endif + /* try not to overflow */ + if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) )) + return _fp((a.val / b.val) << FP_SHIFT); + else + return _fp((a.val << FP_SHIFT) / b.val); +#ifdef DO_UNDEF_UNLIKELY +#undef unlikely +#undef DO_UNDEF_UNLIKELY +#endif +} + +static inline fp_t _add(fp_t a, fp_t b) +{ + return _fp(a.val + b.val); +} + +static inline fp_t _sub(fp_t a, fp_t b) +{ + return _fp(a.val - b.val); +} + +static inline fp_t _neg(fp_t x) +{ + return _fp(-x.val); +} + +static inline fp_t _abs(fp_t x) +{ + return _fp(abs(x.val)); +} + +/* works the same as casting float/double to integer */ +static inline fpbuf_t _fp_to_integer(fp_t x) +{ + return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1); +} + +static inline fp_t _integer_to_fp(fpbuf_t x) +{ + return _frac(x,1); +} + +static inline int _leq(fp_t a, fp_t b) +{ + return a.val <= b.val; +} + +static inline int _geq(fp_t a, fp_t b) +{ + return a.val >= b.val; +} + +static inline int _lt(fp_t a, fp_t b) +{ + return a.val < b.val; +} + +static inline int _gt(fp_t a, fp_t b) +{ + return a.val > b.val; +} + +static inline int _eq(fp_t a, fp_t b) +{ + return a.val == b.val; +} + +static inline fp_t _max(fp_t a, fp_t b) +{ + if (a.val < b.val) + return b; + else + return a; +} +#endif diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 338245abd6ed..807b7888695a 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -63,7 +63,7 @@ void litmus_exit_task(struct task_struct *tsk); #define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) #define get_deadline(t) (tsk_rt(t)->job_params.deadline) #define get_release(t) (tsk_rt(t)->job_params.release) - +#define get_lateness(t) (tsk_rt(t)->job_params.lateness) #define is_hrt(t) \ (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 89ac0dda7d3d..fac939dbd33a 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -110,6 +110,12 @@ struct rt_job { /* How much service has this job received so far? */ lt_t exec_time; + /* By how much did the prior job miss its deadline by? + * Value differs from tardiness in that lateness may + * be negative (when job finishes before its deadline). + */ + long long lateness; + /* Which job is this. This is used to let user space * specify which job to wait for, which is important if jobs * overrun. If we just call sys_sleep_next_period() then we diff --git a/litmus/Kconfig b/litmus/Kconfig index 68459d4dca41..48ff3e3c657c 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -79,6 +79,52 @@ config SCHED_CPU_AFFINITY Say Yes if unsure. +choice + prompt "EDF Tie-Break Behavior" + default EDF_TIE_BREAK_LATENESS_NORM + help + Allows the configuration of tie-breaking behavior when the deadlines + of two EDF-scheduled tasks are equal. + + config EDF_TIE_BREAK_LATENESS + bool "Lateness-based Tie Break" + help + Break ties between to jobs, A and B, based upon the lateness of their + prior jobs. The job with the greatest lateness has priority. Note that + lateness has a negative value if the prior job finished before its + deadline. + + config EDF_TIE_BREAK_LATENESS_NORM + bool "Normalized Lateness-based Tie Break" + help + Break ties between to jobs, A and B, based upon the lateness, normalized + by relative deadline, their prior jobs. The job with the greatest + normalized lateness has priority. Note that lateness has a negative value + if the prior job finished before its deadline. + + Normalized lateness tie-breaks are likely desireable over non-normalized + tie-breaks if the execution times and/or relative deadlines of tasks in a + task set vary greatly. + + config EDF_TIE_BREAK_HASH + bool "Hash-based Tie Breaks" + help + Break ties between two jobs, A and B, with equal deadlines by using a + uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job + A has ~50% of winning a given tie-break. + + config EDF_PID_TIE_BREAK + bool "PID-based Tie Breaks" + help + Break ties based upon OS-assigned process IDs. Use this option if + required by algorithm's real-time analysis or per-task response-time + jitter must be minimized in overload conditions. + + NOTES: + * This tie-breaking method was default in Litmus 2012.2 and before. + +endchoice + endmenu menu "Tracing" diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 668737f0fbf9..52205df3ea8b 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -14,6 +14,32 @@ #include +#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM +#include +#endif + +#ifdef CONFIG_EDF_TIE_BREAK_HASH +#include +static inline long edf_hash(struct task_struct *t) +{ + /* pid is 32 bits, so normally we would shove that into the + * upper 32-bits and and put the job number in the bottom + * and hash the 64-bit number with hash_64(). Sadly, + * in testing, hash_64() doesn't distribute keys were the + * upper bits are close together (as would be the case with + * pids) and job numbers are equal (as would be the case with + * synchronous task sets with all relative deadlines equal). + * + * A 2006 Linux patch proposed the following solution + * (but for some reason it wasn't accepted...). + * + * At least this workaround works for 32-bit systems as well. + */ + return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32); +} +#endif + + /* edf_higher_prio - returns true if first has a higher EDF priority * than second. Deadline ties are broken by PID. * @@ -63,32 +89,78 @@ int edf_higher_prio(struct task_struct* first, #endif - /* Determine the task with earliest deadline, with - * tie-break logic. - */ - if (unlikely(!is_realtime(second_task))) { - return 1; - } - else if (earlier_deadline(first_task, second_task)) { - /* Is the deadline of the first task earlier? - * Then it has higher priority. - */ + if (earlier_deadline(first_task, second_task)) { return 1; } else if (get_deadline(first_task) == get_deadline(second_task)) { - /* Need to tie break */ - - /* Tie break by pid */ - if (first_task->pid < second_task->pid) { + /* Need to tie break. All methods must set pid_break to 0/1 if + * first_task does not have priority over second_task. + */ + int pid_break; + + +#if defined(CONFIG_EDF_TIE_BREAK_LATENESS) + /* Tie break by lateness. Jobs with greater lateness get + * priority. This should spread tardiness across all tasks, + * especially in task sets where all tasks have the same + * period and relative deadlines. + */ + if (get_lateness(first_task) > get_lateness(second_task)) { return 1; } - else if (first_task->pid == second_task->pid) { - /* If the PIDs are the same then the task with the - * inherited priority wins. - */ - if (!second_task->rt_param.inh_task) { + pid_break = (get_lateness(first_task) == get_lateness(second_task)); + + +#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM) + /* Tie break by lateness, normalized by relative deadline. Jobs with + * greater normalized lateness get priority. + * + * Note: Considered using the algebraically equivalent + * lateness(first)*relative_deadline(second) > + lateness(second)*relative_deadline(first) + * to avoid fixed-point math, but values are prone to overflow if inputs + * are on the order of several seconds, even in 64-bit. + */ + fp_t fnorm = _frac(get_lateness(first_task), + get_rt_relative_deadline(first_task)); + fp_t snorm = _frac(get_lateness(second_task), + get_rt_relative_deadline(second_task)); + if (_gt(fnorm, snorm)) { + return 1; + } + pid_break = _eq(fnorm, snorm); + + +#elif defined(CONFIG_EDF_TIE_BREAK_HASH) + /* Tie break by comparing hashs of (pid, job#) tuple. There should be + * a 50% chance that first_task has a higher priority than second_task. + */ + long fhash = edf_hash(first_task); + long shash = edf_hash(second_task); + if (fhash < shash) { + return 1; + } + pid_break = (fhash == shash); +#else + + + /* CONFIG_EDF_PID_TIE_BREAK */ + pid_break = 1; // fall through to tie-break by pid; +#endif + + /* Tie break by pid */ + if(pid_break) { + if (first_task->pid < second_task->pid) { return 1; } + else if (first_task->pid == second_task->pid) { + /* If the PIDs are the same then the task with the + * inherited priority wins. + */ + if (!second_task->rt_param.inh_task) { + return 1; + } + } } } return 0; /* fall-through. prio(second_task) > prio(first_task) */ diff --git a/litmus/jobs.c b/litmus/jobs.c index bc8246572e54..fb093c03d53d 100644 --- a/litmus/jobs.c +++ b/litmus/jobs.c @@ -23,6 +23,14 @@ static inline void setup_release(struct task_struct *t, lt_t release) void prepare_for_next_period(struct task_struct *t) { BUG_ON(!t); + + /* Record lateness before we set up the next job's + * release and deadline. Lateness may be negative. + */ + t->rt_param.job_params.lateness = + (long long)litmus_clock() - + (long long)t->rt_param.job_params.deadline; + setup_release(t, get_release(t) + get_rt_period(t)); } -- cgit v1.2.2 From 00c173dc87b14b8422cea2aa129a2fc99689a05d Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Fri, 31 Aug 2012 11:10:33 -0400 Subject: enable migration tracing and short-cut interrupts --- kernel/softirq.c | 2 +- litmus/gpu_affinity.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 5ce271675662..b013046e8c36 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -445,7 +445,7 @@ void __tasklet_schedule(struct tasklet_struct *t) #ifdef CONFIG_LITMUS_NVIDIA if(is_nvidia_func(t->func)) { -#if 0 +#if 1 // do nvidia tasklets right away and return if(__do_nv_now(t)) return; diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 9762be1a085e..55bb5e1128ec 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c @@ -58,17 +58,17 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. } else { -// struct migration_info mig_info; + struct migration_info mig_info; sched_trace_prediction_err(t, &(tsk_rt(t)->gpu_migration), &rel_err); -// mig_info.observed = observed; -// mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); -// mig_info.distance = tsk_rt(t)->gpu_migration; -// -// sched_trace_migration(t, &mig_info); + mig_info.observed = observed; + mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); + mig_info.distance = tsk_rt(t)->gpu_migration; + + sched_trace_migration(t, &mig_info); } } -- cgit v1.2.2 From 6a225701acf7d79f292eeffcd99d6f00b02c180b Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Fri, 7 Sep 2012 23:25:01 -0400 Subject: Infrastructure for Litmus signals. Added signals to Litmus. Specifcally, SIG_BUDGET signals are delivered (when requested by real-time tasks) when a budget is exceeded. Note: pfair not currently supported (but it probably could be). --- include/litmus/budget.h | 20 +++++++++++++++--- include/litmus/rt_param.h | 16 +++++++++++++- include/litmus/signal.h | 47 +++++++++++++++++++++++++++++++++++++++++ litmus/budget.c | 16 ++++++++++++-- litmus/jobs.c | 2 ++ litmus/litmus.c | 10 +++++++++ litmus/sched_cedf.c | 47 +++++++++++++++++++++++++++-------------- litmus/sched_gsn_edf.c | 54 ++++++++++++++++++++++++++++++----------------- litmus/sched_pfp.c | 40 ++++++++++++++++++++++++----------- litmus/sched_psn_edf.c | 41 +++++++++++++++++++++++------------ 10 files changed, 227 insertions(+), 66 deletions(-) create mode 100644 include/litmus/signal.h diff --git a/include/litmus/budget.h b/include/litmus/budget.h index 33344ee8d5f9..763b31c0e9f6 100644 --- a/include/litmus/budget.h +++ b/include/litmus/budget.h @@ -5,6 +5,9 @@ * the next task. */ void update_enforcement_timer(struct task_struct* t); +/* Send SIG_BUDGET to a real-time task. */ +void send_sigbudget(struct task_struct* t); + inline static int budget_exhausted(struct task_struct* t) { return get_exec_time(t) >= get_exec_cost(t); @@ -19,10 +22,21 @@ inline static lt_t budget_remaining(struct task_struct* t) return 0; } -#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) +#define budget_enforced(t) (\ + tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) + +#define budget_precisely_tracked(t) (\ + tsk_rt(t)->task_params.budget_policy == PRECISE_ENFORCEMENT || \ + tsk_rt(t)->task_params.budget_signal_policy == PRECISE_SIGNALS) + +#define budget_signalled(t) (\ + tsk_rt(t)->task_params.budget_signal_policy != NO_SIGNALS) + +#define budget_precisely_signalled(t) (\ + tsk_rt(t)->task_params.budget_policy == PRECISE_SIGNALS) -#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \ - == PRECISE_ENFORCEMENT) +#define sigbudget_sent(t) (\ + test_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags)) static inline int requeue_preempted_job(struct task_struct* t) { diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 89ac0dda7d3d..637fe6b84f9d 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -30,9 +30,15 @@ typedef enum { typedef enum { NO_ENFORCEMENT, /* job may overrun unhindered */ QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ - PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */ + PRECISE_ENFORCEMENT, /* budgets are enforced with hrtimers */ } budget_policy_t; +typedef enum { + NO_SIGNALS, /* job receives no signals when it exhausts its budget */ + QUANTUM_SIGNALS, /* budget signals are only sent on quantum boundaries */ + PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */ +} budget_signal_policy_t; + /* We use the common priority interpretation "lower index == higher priority", * which is commonly used in fixed-priority schedulability analysis papers. * So, a numerically lower priority value implies higher scheduling priority, @@ -62,6 +68,7 @@ struct rt_task { unsigned int priority; task_class_t cls; budget_policy_t budget_policy; /* ignored by pfair */ + budget_signal_policy_t budget_signal_policy; /* currently ignored by pfair */ }; union np_flag { @@ -118,8 +125,15 @@ struct rt_job { * Increase this sequence number when a job is released. */ unsigned int job_no; + + /* bits: + * 0th: Set if a budget exhaustion signal has already been sent for + * the current job. */ + unsigned long flags; }; +#define RT_JOB_SIG_BUDGET_SENT 0 + struct pfair_param; /* RT task parameters for scheduling extensions diff --git a/include/litmus/signal.h b/include/litmus/signal.h new file mode 100644 index 000000000000..b3d82b294984 --- /dev/null +++ b/include/litmus/signal.h @@ -0,0 +1,47 @@ +#ifndef LITMUS_SIGNAL_H +#define LITMUS_SIGNAL_H + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* Signals used by Litmus to asynchronously communicate events + * to real-time tasks. + * + * Signal values overlap with [SIGRTMIN, SIGRTMAX], so beware of + * application-level conflicts when dealing with COTS user-level + * code. + */ + +/* Sent to a Litmus task when all of the following conditions are true: + * (1) The task has exhausted its budget. + * (2) budget_signal_policy is QUANTUM_SIGNALS or PRECISE_SIGNALS. + * + * Note: If a task does not have a registered handler for SIG_BUDGET, + * the signal will cause the task to terminate (default action). + */ + +/* Assigned values start at SIGRTMAX and decrease, hopefully reducing + * likelihood of user-level conflicts. + */ +#define SIG_BUDGET (SIGRTMAX - 0) + +/* +Future signals could include: + +#define SIG_DEADLINE_MISS (SIGRTMAX - 1) +#define SIG_CRIT_LEVEL_CHANGE (SIGRTMAX - 2) +*/ + +#define SIGLITMUSMIN SIG_BUDGET + +#ifdef __KERNEL__ +#if (SIGLITMUSMIN < SIGRTMIN) +/* no compile-time check in user-space since SIGRTMIN may be a variable. */ +#error "Too many LITMUS^RT signals!" +#endif +#endif + +#endif diff --git a/litmus/budget.c b/litmus/budget.c index f7712be29adb..518174a37a3b 100644 --- a/litmus/budget.c +++ b/litmus/budget.c @@ -1,11 +1,13 @@ #include #include #include +#include #include #include #include +#include struct enforcement_timer { /* The enforcement timer is used to accurately police @@ -64,7 +66,7 @@ static void arm_enforcement_timer(struct enforcement_timer* et, /* Calling this when there is no budget left for the task * makes no sense, unless the task is non-preemptive. */ - BUG_ON(budget_exhausted(t) && (!is_np(t))); + BUG_ON(budget_exhausted(t) && !is_np(t)); /* __hrtimer_start_range_ns() cancels the timer * anyway, so we don't have to check whether it is still armed */ @@ -86,7 +88,7 @@ void update_enforcement_timer(struct task_struct* t) { struct enforcement_timer* et = &__get_cpu_var(budget_timer); - if (t && budget_precisely_enforced(t)) { + if (t && budget_precisely_tracked(t) && !sigbudget_sent(t)) { /* Make sure we call into the scheduler when this budget * expires. */ arm_enforcement_timer(et, t); @@ -96,6 +98,16 @@ void update_enforcement_timer(struct task_struct* t) } } +void send_sigbudget(struct task_struct* t) +{ + if (!test_and_set_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags)) { + /* signal has not yet been sent and we are responsible for sending + * since we just set the sent-bit when it was previously 0. */ + + TRACE_TASK(t, "SIG_BUDGET being sent!\n"); + send_sig(SIG_BUDGET, t, 1); /* '1' denotes signal sent from kernel */ + } +} static int __init init_budget_enforcement(void) { diff --git a/litmus/jobs.c b/litmus/jobs.c index bc8246572e54..4981665a37bf 100644 --- a/litmus/jobs.c +++ b/litmus/jobs.c @@ -13,6 +13,8 @@ static inline void setup_release(struct task_struct *t, lt_t release) t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t); t->rt_param.job_params.exec_time = 0; + clear_bit(RT_JOB_SIG_BUDGET_SENT, &t->rt_param.job_params.flags); + /* update job sequence number */ t->rt_param.job_params.job_no++; diff --git a/litmus/litmus.c b/litmus/litmus.c index 81384327e850..3526749852aa 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -136,6 +136,16 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) pid, tp.budget_policy); goto out_unlock; } + if (tp.budget_signal_policy != NO_SIGNALS && + tp.budget_signal_policy != QUANTUM_SIGNALS && + tp.budget_signal_policy != PRECISE_SIGNALS) + { + printk(KERN_INFO "litmus: real-time task %d rejected " + "because unsupported budget signalling policy " + "specified (%d)\n", + pid, tp.budget_signal_policy); + goto out_unlock; + } target->rt_param.task_params = tp; diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index b0c16e34d2c5..208f067934fc 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -371,21 +371,29 @@ static noinline void job_completion(struct task_struct *t, int forced) */ static void cedf_tick(struct task_struct* t) { - if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { - if (!is_np(t)) { - /* np tasks will be preempted when they become - * preemptable again - */ - litmus_reschedule_local(); - set_will_schedule(); - TRACE("cedf_scheduler_tick: " - "%d is preemptable " - " => FORCE_RESCHED\n", t->pid); - } else if (is_user_np(t)) { - TRACE("cedf_scheduler_tick: " - "%d is non-preemptable, " - "preemption delayed.\n", t->pid); - request_exit_np(t); + if (is_realtime(t) && budget_exhausted(t)) + { + if (budget_signalled(t) && !sigbudget_sent(t)) { + /* signal exhaustion */ + send_sigbudget(t); + } + + if (budget_enforced(t)) { + if (!is_np(t)) { + /* np tasks will be preempted when they become + * preemptable again + */ + litmus_reschedule_local(); + set_will_schedule(); + TRACE("cedf_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } else if (is_user_np(t)) { + TRACE("cedf_scheduler_tick: " + "%d is non-preemptable, " + "preemption delayed.\n", t->pid); + request_exit_np(t); + } } } } @@ -415,7 +423,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) { cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); cedf_domain_t *cluster = entry->cluster; - int out_of_time, sleep, preempt, np, exists, blocks; + int out_of_time, signal_budget, sleep, preempt, np, exists, blocks; struct task_struct* next = NULL; #ifdef CONFIG_RELEASE_MASTER @@ -442,6 +450,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) out_of_time = exists && budget_enforced(entry->scheduled) && budget_exhausted(entry->scheduled); + signal_budget = exists && + budget_signalled(entry->scheduled) && + budget_exhausted(entry->scheduled) && + !sigbudget_sent(entry->scheduled); np = exists && is_np(entry->scheduled); sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; preempt = entry->scheduled != entry->linked; @@ -460,6 +472,9 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) TRACE_TASK(prev, "will be preempted by %s/%d\n", entry->linked->comm, entry->linked->pid); + /* Send the signal that the budget has been exhausted */ + if (signal_budget) + send_sigbudget(entry->scheduled); /* If a task blocks we have no choice but to reschedule. */ diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index c3344b9d288f..c1f25b56e51e 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -362,20 +362,28 @@ static noinline void job_completion(struct task_struct *t, int forced) */ static void gsnedf_tick(struct task_struct* t) { - if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { - if (!is_np(t)) { - /* np tasks will be preempted when they become - * preemptable again - */ - litmus_reschedule_local(); - TRACE("gsnedf_scheduler_tick: " - "%d is preemptable " - " => FORCE_RESCHED\n", t->pid); - } else if (is_user_np(t)) { - TRACE("gsnedf_scheduler_tick: " - "%d is non-preemptable, " - "preemption delayed.\n", t->pid); - request_exit_np(t); + if (is_realtime(t) && budget_exhausted(t)) + { + if (budget_signalled(t) && !sigbudget_sent(t)) { + /* signal exhaustion */ + send_sigbudget(t); + } + + if (budget_enforced(t)) { + if (!is_np(t)) { + /* np tasks will be preempted when they become + * preemptable again + */ + litmus_reschedule_local(); + TRACE("gsnedf_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } else if (is_user_np(t)) { + TRACE("gsnedf_scheduler_tick: " + "%d is non-preemptable, " + "preemption delayed.\n", t->pid); + request_exit_np(t); + } } } } @@ -404,7 +412,7 @@ static void gsnedf_tick(struct task_struct* t) static struct task_struct* gsnedf_schedule(struct task_struct * prev) { cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); - int out_of_time, sleep, preempt, np, exists, blocks; + int out_of_time, signal_budget, sleep, preempt, np, exists, blocks; struct task_struct* next = NULL; #ifdef CONFIG_RELEASE_MASTER @@ -427,8 +435,13 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) /* (0) Determine state */ exists = entry->scheduled != NULL; blocks = exists && !is_running(entry->scheduled); - out_of_time = exists && budget_enforced(entry->scheduled) - && budget_exhausted(entry->scheduled); + out_of_time = exists && + budget_enforced(entry->scheduled) && + budget_exhausted(entry->scheduled); + signal_budget = exists && + budget_signalled(entry->scheduled) && + budget_exhausted(entry->scheduled) && + !sigbudget_sent(entry->scheduled); np = exists && is_np(entry->scheduled); sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; preempt = entry->scheduled != entry->linked; @@ -439,14 +452,17 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) if (exists) TRACE_TASK(prev, - "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " + "blocks:%d out_of_time:%d signal_budget: %d np:%d sleep:%d preempt:%d " "state:%d sig:%d\n", - blocks, out_of_time, np, sleep, preempt, + blocks, out_of_time, signal_budget, np, sleep, preempt, prev->state, signal_pending(prev)); if (entry->linked && preempt) TRACE_TASK(prev, "will be preempted by %s/%d\n", entry->linked->comm, entry->linked->pid); + /* Send the signal that the budget has been exhausted */ + if (signal_budget) + send_sigbudget(entry->scheduled); /* If a task blocks we have no choice but to reschedule. */ diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c index 62be699629b1..6129eb94d3ea 100644 --- a/litmus/sched_pfp.c +++ b/litmus/sched_pfp.c @@ -135,17 +135,25 @@ static void pfp_tick(struct task_struct *t) */ BUG_ON(is_realtime(t) && t != pfp->scheduled); - if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { - if (!is_np(t)) { - litmus_reschedule_local(); - TRACE("pfp_scheduler_tick: " - "%d is preemptable " - " => FORCE_RESCHED\n", t->pid); - } else if (is_user_np(t)) { - TRACE("pfp_scheduler_tick: " - "%d is non-preemptable, " - "preemption delayed.\n", t->pid); - request_exit_np(t); + if (is_realtime(t) && budget_exhausted(t)) + { + if (budget_signalled(t) && !sigbudget_sent(t)) { + /* signal exhaustion */ + send_sigbudget(t); + } + + if (budget_enforced(t)) { + if (!is_np(t)) { + litmus_reschedule_local(); + TRACE("pfp_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } else if (is_user_np(t)) { + TRACE("pfp_scheduler_tick: " + "%d is non-preemptable, " + "preemption delayed.\n", t->pid); + request_exit_np(t); + } } } } @@ -155,7 +163,7 @@ static struct task_struct* pfp_schedule(struct task_struct * prev) pfp_domain_t* pfp = local_pfp; struct task_struct* next; - int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate; + int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched, migrate; raw_spin_lock(&pfp->slock); @@ -172,6 +180,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev) out_of_time = exists && budget_enforced(pfp->scheduled) && budget_exhausted(pfp->scheduled); + signal_budget = exists && + budget_signalled(pfp->scheduled) && + budget_exhausted(pfp->scheduled) && + !sigbudget_sent(pfp->scheduled); np = exists && is_np(pfp->scheduled); sleep = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP; migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; @@ -183,6 +195,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev) */ resched = preempt; + /* Send the signal that the budget has been exhausted */ + if (signal_budget) + send_sigbudget(pfp->scheduled); + /* If a task blocks we have no choice but to reschedule. */ if (blocks) diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c index b0c8126bd44a..a5fda133bad9 100644 --- a/litmus/sched_psn_edf.c +++ b/litmus/sched_psn_edf.c @@ -169,17 +169,25 @@ static void psnedf_tick(struct task_struct *t) */ BUG_ON(is_realtime(t) && t != pedf->scheduled); - if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { - if (!is_np(t)) { - litmus_reschedule_local(); - TRACE("psnedf_scheduler_tick: " - "%d is preemptable " - " => FORCE_RESCHED\n", t->pid); - } else if (is_user_np(t)) { - TRACE("psnedf_scheduler_tick: " - "%d is non-preemptable, " - "preemption delayed.\n", t->pid); - request_exit_np(t); + if (is_realtime(t) && budget_exhausted(t)) + { + if (budget_signalled(t) && !sigbudget_sent(t)) { + /* signal exhaustion */ + send_sigbudget(t); + } + + if (budget_enforced(t)) { + if (!is_np(t)) { + litmus_reschedule_local(); + TRACE("psnedf_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } else if (is_user_np(t)) { + TRACE("psnedf_scheduler_tick: " + "%d is non-preemptable, " + "preemption delayed.\n", t->pid); + request_exit_np(t); + } } } } @@ -190,8 +198,7 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev) rt_domain_t* edf = &pedf->domain; struct task_struct* next; - int out_of_time, sleep, preempt, - np, exists, blocks, resched; + int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched; raw_spin_lock(&pedf->slock); @@ -208,6 +215,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev) out_of_time = exists && budget_enforced(pedf->scheduled) && budget_exhausted(pedf->scheduled); + signal_budget = exists && + budget_signalled(pedf->scheduled) && + budget_exhausted(pedf->scheduled) && + !sigbudget_sent(pedf->scheduled); np = exists && is_np(pedf->scheduled); sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP; preempt = edf_preemption_needed(edf, prev); @@ -218,6 +229,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev) */ resched = preempt; + /* Send the signal that the budget has been exhausted */ + if (signal_budget) + send_sigbudget(pedf->scheduled); + /* If a task blocks we have no choice but to reschedule. */ if (blocks) -- cgit v1.2.2 From d3c32e91e3fce2a57083a734efae6d9de06ec02f Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sat, 8 Sep 2012 10:26:00 -0400 Subject: Fixed type-os and clarified text in litmus/Kconfig --- litmus/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/litmus/Kconfig b/litmus/Kconfig index 48ff3e3c657c..f2dbfb396883 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -89,7 +89,7 @@ choice config EDF_TIE_BREAK_LATENESS bool "Lateness-based Tie Break" help - Break ties between to jobs, A and B, based upon the lateness of their + Break ties between two jobs, A and B, based upon the lateness of their prior jobs. The job with the greatest lateness has priority. Note that lateness has a negative value if the prior job finished before its deadline. @@ -97,8 +97,8 @@ choice config EDF_TIE_BREAK_LATENESS_NORM bool "Normalized Lateness-based Tie Break" help - Break ties between to jobs, A and B, based upon the lateness, normalized - by relative deadline, their prior jobs. The job with the greatest + Break ties between two jobs, A and B, based upon the lateness, normalized + by relative deadline, of their prior jobs. The job with the greatest normalized lateness has priority. Note that lateness has a negative value if the prior job finished before its deadline. @@ -116,9 +116,9 @@ choice config EDF_PID_TIE_BREAK bool "PID-based Tie Breaks" help - Break ties based upon OS-assigned process IDs. Use this option if + Break ties based upon OS-assigned thread IDs. Use this option if required by algorithm's real-time analysis or per-task response-time - jitter must be minimized in overload conditions. + jitter must be minimized. NOTES: * This tie-breaking method was default in Litmus 2012.2 and before. -- cgit v1.2.2 From 193a19c94a32f2e2a0e973f0a98cf4a098cefa15 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 9 Sep 2012 13:42:13 -0400 Subject: simple average tracking --- include/litmus/gpu_affinity.h | 15 +++++----- include/litmus/rt_param.h | 15 ++++++++-- litmus/gpu_affinity.c | 66 +++++++++++++++++++++++++++++++++++++++---- litmus/litmus.c | 3 +- 4 files changed, 83 insertions(+), 16 deletions(-) diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h index 6b3fb8b28745..d64a15cbf2a5 100644 --- a/include/litmus/gpu_affinity.h +++ b/include/litmus/gpu_affinity.h @@ -31,17 +31,18 @@ static inline lt_t get_gpu_time(struct task_struct* t) static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) { - int i; - fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); - lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... +// int i; +// fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); +// lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... + lt_t val = t->rt_param.gpu_migration_est[dist].avg; - WARN_ON(temp < 0); +// WARN_ON(temp < 0); // lower-bound a distant migration to be at least equal to the level // below it. - for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { - val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); - } +// for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { +// val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); +// } return ((val > 0) ? val : dist+1); } diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 0198884eab86..a441badd30cc 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -144,6 +144,17 @@ typedef struct feedback_est{ fp_t accum_err; } feedback_est_t; + +#define AVG_EST_WINDOW_SIZE 20 + +typedef struct avg_est{ + lt_t history[AVG_EST_WINDOW_SIZE]; + uint16_t count; + uint16_t idx; + lt_t sum; + lt_t avg; +} avg_est_t; + /* RT task parameters for scheduling extensions * These parameters are inherited during clone and therefore must * be explicitly set up before the task set is launched. @@ -190,12 +201,10 @@ struct rt_param { long unsigned int held_gpus; // bitmap of held GPUs. #ifdef CONFIG_LITMUS_AFFINITY_LOCKING - fp_t gpu_fb_param_a[MIG_LAST+1]; - fp_t gpu_fb_param_b[MIG_LAST+1]; + avg_est_t gpu_migration_est[MIG_LAST+1]; gpu_migration_dist_t gpu_migration; int last_gpu; - feedback_est_t gpu_migration_est[MIG_LAST+1]; // local, near, med, far lt_t accum_gpu_time; lt_t gpu_time_stamp; diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 55bb5e1128ec..2cdf18bc7dd6 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c @@ -7,7 +7,14 @@ #include -#define OBSERVATION_CAP 2*1e9 +#define OBSERVATION_CAP ((lt_t)(2e9)) + +// reason for skew: high outliers are less +// frequent and way out of bounds +#define HI_THRESHOLD 2 +#define LO_THRESHOLD 4 + +#define MIN(a, b) ((a < b) ? a : b) static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) { @@ -28,10 +35,59 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) void update_gpu_estimate(struct task_struct *t, lt_t observed) { - feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); + //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); + avg_est_t *est; + struct migration_info mig_info; BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); + est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); + + if (unlikely(observed > OBSERVATION_CAP)) { + TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n", + observed, + OBSERVATION_CAP); + return; + } + +#if 0 + // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out + // of range of the average, but only filter if enough samples + // have been taken. + if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { + if (unlikely(observed < est->avg/LO_THRESHOLD)) { + TRACE_TASK(t, "Observation is too small: %llu\n", + observed); + return; + } + else if (unlikely(observed > est->avg*HI_THRESHOLD)) { + TRACE_TASK(t, "Observation is too large: %llu\n", + observed); + return; + } + } +#endif + + if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { + ++est->count; + } + else { + est->sum -= est->history[est->idx]; + } + + mig_info.observed = observed; + mig_info.estimated = est->avg; + mig_info.distance = tsk_rt(t)->gpu_migration; + sched_trace_migration(t, &mig_info); + + + est->history[est->idx] = observed; + est->sum += observed; + est->avg = est->sum/est->count; + est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; + + +#if 0 if(unlikely(fb->est.val == 0)) { // kludge-- cap observed values to prevent whacky estimations. // whacky stuff happens during the first few jobs. @@ -71,12 +127,12 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) sched_trace_migration(t, &mig_info); } } +#endif - TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", + TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n", tsk_rt(t)->gpu_migration, observed, - _fp_to_integer(fb->est), - _point(fb->est)); + est->avg); } gpu_migration_dist_t gpu_migration_distance(int a, int b) diff --git a/litmus/litmus.c b/litmus/litmus.c index d1f836c8af6e..91ec65894379 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -321,6 +321,7 @@ void init_gpu_affinity_state(struct task_struct* p) //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); +#if 0 // emperical; p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); @@ -333,7 +334,7 @@ void init_gpu_affinity_state(struct task_struct* p) p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); - +#endif p->rt_param.gpu_migration = MIG_NONE; p->rt_param.last_gpu = -1; } -- cgit v1.2.2 From 901fdd9c22790039a76c1d3ee01828a2f124f6f3 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Mon, 10 Sep 2012 11:27:08 -0400 Subject: standard devation-based gpu affinity predictor --- include/litmus/rt_param.h | 1 + litmus/gpu_affinity.c | 67 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index a441badd30cc..04239c747f06 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -152,6 +152,7 @@ typedef struct avg_est{ uint16_t count; uint16_t idx; lt_t sum; + lt_t std; lt_t avg; } avg_est_t; diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 2cdf18bc7dd6..896f3248b8a2 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c @@ -11,8 +11,10 @@ // reason for skew: high outliers are less // frequent and way out of bounds -#define HI_THRESHOLD 2 -#define LO_THRESHOLD 4 +//#define HI_THRESHOLD 2 +//#define LO_THRESHOLD 4 + +#define NUM_STDEV 2 #define MIN(a, b) ((a < b) ? a : b) @@ -33,6 +35,44 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) return relative_err; } +lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count) +{ + /* brute force: takes about as much time as incremental running methods when + * count < 50 (on Bonham). Brute force also less prone to overflow. + */ + lt_t sqdeviations = 0; + uint16_t i; + for(i = 0; i < count; ++i) + { + lt_t temp = (int64_t)nums[i] - (int64_t)avg; + sqdeviations += temp * temp; + } + return sqdeviations/count; +} + +lt_t isqrt(lt_t n) +{ + /* integer square root using babylonian method + * (algo taken from wikipedia */ + lt_t res = 0; + lt_t bit = ((lt_t)1) << (sizeof(n)*8-2); + while (bit > n) { + bit >>= 2; + } + + while (bit != 0) { + if (n >= res + bit) { + n -= res + bit; + res = (res >> 1) + bit; + } + else { + res >>= 1; + } + bit >>= 2; + } + return res; +} + void update_gpu_estimate(struct task_struct *t, lt_t observed) { //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); @@ -65,8 +105,28 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) observed); return; } - } #endif + // filter values outside NUM_STDEVx the standard deviation, + // but only filter if enough samples have been taken. + if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { + lt_t lower, upper; + + lt_t range = est->std*NUM_STDEV; + lower = est->avg - MIN(range, est->avg); // no underflow. + + if (unlikely(observed < lower)) { + TRACE_TASK(t, "Observation is too small: %llu\n", observed); + return; + } + + upper = est->avg + range; + if (unlikely(observed > upper)) { + TRACE_TASK(t, "Observation is too large: %llu\n", observed); + return; + } + } + + if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { ++est->count; @@ -84,6 +144,7 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) est->history[est->idx] = observed; est->sum += observed; est->avg = est->sum/est->count; + est->std = isqrt(varience(est->history, est->avg, est->count)); est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; -- cgit v1.2.2 From a916d9b2feaeb5934e1f8ba30fde74193a60e8d1 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Tue, 11 Sep 2012 15:25:29 -0400 Subject: Fix hang from bug in edf_common.c --- include/litmus/fdso.h | 22 +++++++++++----------- litmus/edf_common.c | 36 +++++++++++++++++++++++++++++++----- litmus/fdso.c | 10 ++++++---- litmus/preempt.c | 5 +++-- 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h index 35be59b970ee..1469c0fd0460 100644 --- a/include/litmus/fdso.h +++ b/include/litmus/fdso.h @@ -20,20 +20,20 @@ typedef enum { FMLP_SEM = 0, SRP_SEM = 1, - RSM_MUTEX = 2, - IKGLP_SEM = 3, - KFMLP_SEM = 4, + MPCP_SEM = 2, + MPCP_VS_SEM = 3, + DPCP_SEM = 4, - IKGLP_SIMPLE_GPU_AFF_OBS = 5, - IKGLP_GPU_AFF_OBS = 6, - KFMLP_SIMPLE_GPU_AFF_OBS = 7, - KFMLP_GPU_AFF_OBS = 8, + PCP_SEM = 5, - MPCP_SEM = 9, - MPCP_VS_SEM = 10, - DPCP_SEM = 11, + RSM_MUTEX = 6, + IKGLP_SEM = 7, + KFMLP_SEM = 8, - PCP_SEM = 12, + IKGLP_SIMPLE_GPU_AFF_OBS = 9, + IKGLP_GPU_AFF_OBS = 10, + KFMLP_SIMPLE_GPU_AFF_OBS = 11, + KFMLP_GPU_AFF_OBS = 12, MAX_OBJ_TYPE = 12 } obj_type_t; diff --git a/litmus/edf_common.c b/litmus/edf_common.c index a1cdc10ea6f1..39ce1816ee04 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -185,11 +185,37 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return 1; } #endif - /* If the PIDs are the same then the task with the - * inherited priority wins. - */ - if (!second_task->rt_param.inh_task) { - return 1; + /* Something could be wrong if you get this far. */ + if (unlikely(first->rt_param.inh_task == + second->rt_param.inh_task)) { + /* Both tasks have the same inherited priority. + * Likely in a bug-condition. + */ + if (likely(first->pid < second->pid)) { + return 1; + } + else if (first->pid == second->pid) { + WARN_ON(1); + } + } + else { + /* At least one task must inherit */ + BUG_ON(!first->rt_param.inh_task && + !second->rt_param.inh_task); + + /* The task with the inherited priority wins. */ + if (!second->rt_param.inh_task) { + TRACE_CUR("unusual comparison: " + "first = %s/%d first_task = %s/%d " + "second = %s/%d second_task = %s/%d\n", + first->comm, first->pid, + (first->rt_param.inh_task) ? first->rt_param.inh_task->comm : "(nil)", + (first->rt_param.inh_task) ? first->rt_param.inh_task->pid : 0, + second->comm, second->pid, + (second->rt_param.inh_task) ? second->rt_param.inh_task->comm : "(nil)", + (second->rt_param.inh_task) ? second->rt_param.inh_task->pid : 0); + return 1; + } } } } diff --git a/litmus/fdso.c b/litmus/fdso.c index bac6a35fa17d..2411d16ba486 100644 --- a/litmus/fdso.c +++ b/litmus/fdso.c @@ -27,6 +27,12 @@ extern struct fdso_ops generic_affinity_ops; static const struct fdso_ops* fdso_ops[] = { &generic_lock_ops, /* FMLP_SEM */ &generic_lock_ops, /* SRP_SEM */ + + &generic_lock_ops, /* MPCP_SEM */ + &generic_lock_ops, /* MPCP_VS_SEM */ + &generic_lock_ops, /* DPCP_SEM */ + &generic_lock_ops, /* PCP_SEM */ + &generic_lock_ops, /* RSM_MUTEX */ &generic_lock_ops, /* IKGLP_SEM */ &generic_lock_ops, /* KFMLP_SEM */ @@ -36,10 +42,6 @@ static const struct fdso_ops* fdso_ops[] = { &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */ &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */ #endif - &generic_lock_ops, /* MPCP_SEM */ - &generic_lock_ops, /* MPCP_VS_SEM */ - &generic_lock_ops, /* DPCP_SEM */ - &generic_lock_ops, /* PCP_SEM */ }; static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) diff --git a/litmus/preempt.c b/litmus/preempt.c index 28368d5bc046..a2cae3648e15 100644 --- a/litmus/preempt.c +++ b/litmus/preempt.c @@ -26,10 +26,11 @@ void sched_state_will_schedule(struct task_struct* tsk) set_sched_state(PICKED_WRONG_TASK); else set_sched_state(WILL_SCHEDULE); - } else + } else { /* Litmus tasks should never be subject to a remote * set_tsk_need_resched(). */ - BUG_ON(is_realtime(tsk)); + //BUG_ON(is_realtime(tsk)); + } #ifdef CONFIG_PREEMPT_STATE_TRACE TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", -- cgit v1.2.2 From 55e04c94b925b0790c2ae0a79f16e939e9bb2846 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Tue, 11 Sep 2012 18:16:55 -0400 Subject: changed gpu filtering to 1.5 stdev. --- litmus/gpu_affinity.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 896f3248b8a2..7d73105b4181 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c @@ -14,7 +14,8 @@ //#define HI_THRESHOLD 2 //#define LO_THRESHOLD 4 -#define NUM_STDEV 2 +#define NUM_STDEV_NUM 1 +#define NUM_STDEV_DENOM 2 #define MIN(a, b) ((a < b) ? a : b) @@ -111,7 +112,7 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { lt_t lower, upper; - lt_t range = est->std*NUM_STDEV; + lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM; lower = est->avg - MIN(range, est->avg); // no underflow. if (unlikely(observed < lower)) { -- cgit v1.2.2 From fd3aa01f176cf12b1625f4f46ba01f3340bb57ed Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Tue, 11 Sep 2012 19:36:11 -0400 Subject: blarg --- include/litmus/rt_param.h | 5 +++ include/litmus/unistd_32.h | 4 +- include/litmus/unistd_64.h | 5 ++- litmus/litmus.c | 109 ++++++++++++++++++++++++++++++++++++--------- 4 files changed, 99 insertions(+), 24 deletions(-) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 89ac0dda7d3d..21430623a940 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -157,6 +157,11 @@ struct rt_param { */ struct task_struct* inh_task; + + struct task_struct* hp_group; + unsigned int is_slave:1; + + #ifdef CONFIG_NP_SECTION /* For the FMLP under PSN-EDF, it is required to make the task * non-preemptive from kernel space. In order not to interfere with diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index 94264c27d9ac..bcb8f1183b4f 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h @@ -18,4 +18,6 @@ #define __NR_release_ts __LSC(10) #define __NR_null_call __LSC(11) -#define NR_litmus_syscalls 12 +#define __NR_slave_non_rt_threads _LSC(12) + +#define NR_litmus_syscalls 13 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index d5ced0d2642c..5f56d5947343 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h @@ -30,4 +30,7 @@ __SYSCALL(__NR_release_ts, sys_release_ts) #define __NR_null_call __LSC(11) __SYSCALL(__NR_null_call, sys_null_call) -#define NR_litmus_syscalls 12 +#define __NR_slave_non_rt_threads __LSC(12) +__SYSCALL(__NR_slave_non_rt_threads, sys_slave_non_rt_threads) + +#define NR_litmus_syscalls 13 diff --git a/litmus/litmus.c b/litmus/litmus.c index 81384327e850..2300281b6b30 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -290,6 +290,60 @@ asmlinkage long sys_null_call(cycles_t __user *ts) return ret; } + + + + + + + + + +long __litmus_admit_task(struct task_struct* tsk); + +asmlinkage long sys_slave_non_rt_threads(void) +{ + long retval = 0; + struct task_struct *leader = current->group_leader; + struct task_struct *t; + struct task_struct *hp = NULL; + + read_lock_irq(&tasklist_lock); + + is_realtime(target) + + t = leader; + do { + TRACE_CUR("threads in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); + + if (tsk_rt(t)->heap_node == NULL) { + retval = __litmus_admit_task(t); + + if (retval != 0) break; + + /* hasn't been admitted into rt. make it a slave. */ + tsk_rt(t)->slave = 1; + } + else if (is_realtime(t)) + if (litmus->compare(t, hp)) { + hp = t; + } + } + + t = next_thread(t); + } while(t != leader); + + if (hp) { + /* set up inheritance */ + + } + + read_unlock_irq(&tasklist_lock); + + return 0; +} + + /* p is a real-time task. Re-init its state as a best-effort task. */ static void reinit_litmus_state(struct task_struct* p, int restore) { @@ -318,32 +372,11 @@ static void reinit_litmus_state(struct task_struct* p, int restore) } } -long litmus_admit_task(struct task_struct* tsk) +long __litmus_admit_task(struct task_struct* tsk) { long retval = 0; unsigned long flags; - BUG_ON(is_realtime(tsk)); - - if (get_rt_relative_deadline(tsk) == 0 || - get_exec_cost(tsk) > - min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) { - TRACE_TASK(tsk, - "litmus admit: invalid task parameters " - "(e = %lu, p = %lu, d = %lu)\n", - get_exec_cost(tsk), get_rt_period(tsk), - get_rt_relative_deadline(tsk)); - retval = -EINVAL; - goto out; - } - - if (!cpu_online(get_partition(tsk))) { - TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", - get_partition(tsk)); - retval = -EINVAL; - goto out; - } - INIT_LIST_HEAD(&tsk_rt(tsk)->list); /* avoid scheduler plugin changing underneath us */ @@ -375,6 +408,38 @@ long litmus_admit_task(struct task_struct* tsk) out_unlock: raw_spin_unlock_irqrestore(&task_transition_lock, flags); + + return retval; +} + +long litmus_admit_task(struct task_struct* tsk) +{ + long retval = 0; + unsigned long flags; + + BUG_ON(is_realtime(tsk)); + + if (get_rt_relative_deadline(tsk) == 0 || + get_exec_cost(tsk) > + min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) { + TRACE_TASK(tsk, + "litmus admit: invalid task parameters " + "(e = %lu, p = %lu, d = %lu)\n", + get_exec_cost(tsk), get_rt_period(tsk), + get_rt_relative_deadline(tsk)); + retval = -EINVAL; + goto out; + } + + if (!cpu_online(get_partition(tsk))) { + TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", + get_partition(tsk)); + retval = -EINVAL; + goto out; + } + + retval = __litmus_admit_task(tsk); + out: return retval; } -- cgit v1.2.2 From 4ad6ba08f0dab67bbd89a26b27f1cc86e3c45c13 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Fri, 14 Sep 2012 08:34:36 -0400 Subject: checkpoint for aux_tasks. can still deadlock --- include/linux/sched.h | 4 +- include/litmus/aux_tasks.h | 33 ++++ include/litmus/litmus.h | 1 + include/litmus/rt_param.h | 17 +- include/litmus/sched_plugin.h | 7 + litmus/Makefile | 2 +- litmus/aux_tasks.c | 387 ++++++++++++++++++++++++++++++++++++++++++ litmus/edf_common.c | 22 ++- litmus/litmus.c | 111 ++++++------ litmus/nvidia_info.c | 48 ++++++ litmus/rt_domain.c | 13 +- litmus/sched_cedf.c | 12 +- litmus/sched_gsn_edf.c | 161 ++++++++++++++++-- litmus/sched_plugin.c | 16 ++ 14 files changed, 746 insertions(+), 88 deletions(-) create mode 100644 include/litmus/aux_tasks.h create mode 100644 litmus/aux_tasks.c diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c990d13ae35..5d1c041be809 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1532,8 +1532,10 @@ struct task_struct { #endif struct prop_local_single dirties; - /* LITMUS RT parameters and state */ + /*** LITMUS RT parameters and state ***/ struct rt_param rt_param; + struct aux_data aux_data; + /*****/ /* references to PI semaphores, etc. */ struct od_table_entry *od_table; diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h new file mode 100644 index 000000000000..8e50ac85b082 --- /dev/null +++ b/include/litmus/aux_tasks.h @@ -0,0 +1,33 @@ +#ifndef LITMUS_AUX_taskS +#define LITMUS_AUX_taskS + +struct task_struct; + +#define MAGIC_AUX_TASK_PERIOD ~((lt_t)0) + +/* admit an aux task with default parameters */ +//int admit_aux_task(struct task_struct *t); + +/* call on an aux task when it exits real-time */ +int exit_aux_task(struct task_struct *t); + +/* call when an aux_owner becomes real-time */ +long enable_aux_task_owner(struct task_struct *t); + +/* call when an aux_owner exits real-time */ +long disable_aux_task_owner(struct task_struct *t); + + +/* collectivelly make all aux tasks in the process of leader inherit from hp */ +//int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp); + +/* collectivelly make all aux tasks in the process of leader inherit from hp */ +//int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp); + +/* call when an aux_owner increases its priority */ +int aux_task_owner_increase_priority(struct task_struct *t); + +/* call when an aux_owner decreases its priority */ +int aux_task_owner_decrease_priority(struct task_struct *t); + +#endif \ No newline at end of file diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 1d70ab713571..f9829167294d 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -44,6 +44,7 @@ void litmus_exit_task(struct task_struct *tsk); ((t)->rt_param.transition_pending) #define tsk_rt(t) (&(t)->rt_param) +#define tsk_aux(t) (&(t)->aux_data) /* Realtime utility macros */ #define get_rt_flags(t) (tsk_rt(t)->flags) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 02b750a9570b..2a6c70f1dd37 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -285,9 +285,13 @@ struct rt_param { #endif - struct task_struct* hp_group; - unsigned int is_slave:1; - unsigned int has_slaves:1; +#ifdef CONFIG_LITMUS_LOCKING + unsigned int is_aux_task:1; + unsigned int has_aux_tasks:1; + + struct list_head aux_task_node; + struct binheap_node aux_task_owner_node; +#endif #ifdef CONFIG_NP_SECTION @@ -354,6 +358,13 @@ struct rt_param { struct control_page * ctrl_page; }; +struct aux_data +{ + struct list_head aux_tasks; + struct binheap aux_task_owners; + unsigned int initialized:1; +}; + /* Possible RT flags */ #define RT_F_RUNNING 0x00000000 #define RT_F_SLEEP 0x00000001 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 24a6858b4b0b..bd75e7c09a10 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -70,6 +70,10 @@ typedef long (*allocate_affinity_observer_t) ( typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh); typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh); + +typedef int (*__increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh); +typedef int (*__decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh); + typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh, raw_spinlock_t *to_unlock, unsigned long irqflags); typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh, @@ -146,6 +150,9 @@ struct sched_plugin { allocate_lock_t allocate_lock; increase_prio_t increase_prio; decrease_prio_t decrease_prio; + + __increase_prio_t __increase_prio; + __decrease_prio_t __decrease_prio; #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING nested_increase_prio_t nested_increase_prio; diff --git a/litmus/Makefile b/litmus/Makefile index 59c018560ee9..f2dd7be7ae4a 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o -obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o +obj-$(CONFIG_LITMUS_LOCKING) += aux_tasks.o kfmlp_lock.o obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c new file mode 100644 index 000000000000..c197a95fc3a1 --- /dev/null +++ b/litmus/aux_tasks.c @@ -0,0 +1,387 @@ +#ifdef CONFIG_LITMUS_LOCKING + +#include +#include +#include +#include +#include + +static int admit_aux_task(struct task_struct *t) +{ + int retval = 0; + struct task_struct *leader = t->group_leader; + + struct rt_task tp = { + .exec_cost = 0, + .period = MAGIC_AUX_TASK_PERIOD, + .relative_deadline = MAGIC_AUX_TASK_PERIOD, + .phase = 0, + .cpu = task_cpu(leader), /* take CPU of group leader */ + .budget_policy = NO_ENFORCEMENT, + .cls = RT_CLASS_BEST_EFFORT + }; + + struct sched_param param = { .sched_priority = 0}; + + tsk_rt(t)->task_params = tp; + retval = sched_setscheduler_nocheck(t, SCHED_LITMUS, ¶m); + + return retval; +} + +int exit_aux_task(struct task_struct *t) +{ + int retval = 0; + struct task_struct *leader = t->group_leader; + + BUG_ON(!tsk_rt(t)->is_aux_task); + + TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + + list_del(&tsk_rt(t)->aux_task_node); + + tsk_rt(t)->is_aux_task = 0; + + if (tsk_rt(t)->inh_task) { + litmus->decrease_prio(t, NULL); + } + + return retval; +} + +static int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp) +{ + int retval = 0; + struct list_head *pos; + + TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); + + list_for_each(pos, &tsk_aux(leader)->aux_tasks) { + struct task_struct *aux = + container_of(list_entry(pos, struct rt_param, aux_task_node), + struct task_struct, rt_param); + + if (!is_realtime(aux)) { +#if 0 + /* currently can't do this here because of scheduler deadlock on itself */ + TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid); + retval = admit_aux_task(aux); + + if (retval != 0) { + TRACE_CUR("failed to admit aux task %s/%d\n", aux->comm, aux->pid); + goto out; + } +#endif + TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid); + } + + // aux tasks don't touch rt locks, so no nested call needed. + TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid); + retval = litmus->__increase_prio(aux, hp); + } + + //out: + return retval; +} + +static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp) +{ + int retval = 0; + struct list_head *pos; + + TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); + + list_for_each(pos, &tsk_aux(leader)->aux_tasks) { + struct task_struct *aux = + container_of(list_entry(pos, struct rt_param, aux_task_node), + struct task_struct, rt_param); + + if (!is_realtime(aux)) { +#if 0 + /* currently can't do this here because of scheduler deadlock on itself */ + TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid); + retval = admit_aux_task(aux); + + if (retval != 0) + goto out; + + if (hp) { + // aux tasks don't touch rt locks, so no nested call needed. + TRACE_CUR("decreasing (actually increasing) %s/%d.\n", aux->comm, aux->pid); + retval = litmus->__increase_prio(aux, hp); + } +#endif + + TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid); + } + else { + TRACE_CUR("decreasing %s/%d.\n", aux->comm, aux->pid); + retval = litmus->__decrease_prio(aux, hp); + } + } + + //out: + return retval; +} + +int aux_task_owner_increase_priority(struct task_struct *t) +{ + int retval = 0; + struct task_struct *leader; + struct task_struct *hp = NULL; + + BUG_ON(!tsk_rt(t)->has_aux_tasks); + BUG_ON(!is_realtime(t)); + BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)); + + leader = t->group_leader; + + TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid); + + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + + if (hp == t) { + goto out; // already hp, nothing to do. + } + + binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); + + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + + if (hp == t) { + TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + retval = aux_tasks_increase_priority(leader, + (tsk_rt(hp)->inh_task) ? tsk_rt(hp)->inh_task : hp); + } + +out: + return retval; +} + +int aux_task_owner_decrease_priority(struct task_struct *t) +{ + int retval = 0; + struct task_struct *leader; + struct task_struct *hp = NULL; + struct task_struct *new_hp = NULL; + + BUG_ON(!tsk_rt(t)->has_aux_tasks); + BUG_ON(!is_realtime(t)); + BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)); + + leader = t->group_leader; + + TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid); + + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); + binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, + struct rt_param, aux_task_owner_node); + new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + + if (hp == t && new_hp != t) { + TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + retval = aux_tasks_decrease_priority(leader, + (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp); + } + + return retval; +} + + + +long enable_aux_task_owner(struct task_struct *t) +{ + long retval = 0; + struct task_struct *leader = t->group_leader; + struct task_struct *hp; + + if (!tsk_rt(t)->has_aux_tasks) { + TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid); + return -1; + } + + BUG_ON(!is_realtime(t)); + + if (binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { + TRACE_CUR("task %s/%d is already active\n", t->comm, t->pid); + goto out; + } + + binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, + struct rt_param, aux_task_owner_node); + + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + if (hp == t) { + /* we're the new hp */ + TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + + retval = aux_tasks_increase_priority(leader, + (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); + } + + +out: + return retval; +} + +long disable_aux_task_owner(struct task_struct *t) +{ + long retval = 0; + struct task_struct *leader = t->group_leader; + struct task_struct *hp; + struct task_struct *new_hp = NULL; + + if (!tsk_rt(t)->has_aux_tasks) { + TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid); + return -1; + } + + BUG_ON(!is_realtime(t)); + + if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { + TRACE_CUR("task %s/%d is already not active\n", t->comm, t->pid); + goto out; + } + + TRACE_CUR("task %s/%d exiting from group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); + + if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { + new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + } + + if (hp == t && new_hp != t) { + struct task_struct *to_inh = NULL; + + TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + + if (new_hp) { + to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp; + } + + retval = aux_tasks_decrease_priority(leader, to_inh); + } + +out: + return retval; +} + + +static int aux_task_owner_max_priority_order(struct binheap_node *a, + struct binheap_node *b) +{ + struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + + BUG_ON(!d_a); + BUG_ON(!d_b); + + return litmus->compare(d_a, d_b); +} + + +asmlinkage long sys_slave_non_rt_threads(void) +{ + long retval = 0; + struct task_struct *leader; + struct task_struct *t; + + read_lock_irq(&tasklist_lock); + + leader = current->group_leader; + +#if 0 + t = leader; + do { + if (tsk_rt(t)->has_aux_tasks || tsk_rt(t)->is_aux_task) { + printk("slave_non_rt_tasks may only be called once per process.\n"); + retval = -EINVAL; + goto out_unlock; + } + } while (t != leader); +#endif + + if (!tsk_aux(leader)->initialized) { + INIT_LIST_HEAD(&tsk_aux(leader)->aux_tasks); + INIT_BINHEAP_HANDLE(&tsk_aux(leader)->aux_task_owners, aux_task_owner_max_priority_order); + tsk_aux(leader)->initialized = 1; + } + + t = leader; + do { + /* doesn't hurt to initialize them both */ + INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); + INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); + + TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n", + leader->comm, leader->pid, t->comm, t->pid, + tsk_rt(t)->task_params.period); + + /* inspect heap_node to see if it is an rt task */ + if (tsk_rt(t)->task_params.period == 0 || + tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) { + if (!tsk_rt(t)->is_aux_task) { + TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); + /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ + tsk_rt(t)->is_aux_task = 1; + list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); + + (void)admit_aux_task(t); + } + else { + TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); + } + } + else { + if (!tsk_rt(t)->has_aux_tasks) { + TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); + tsk_rt(t)->has_aux_tasks = 1; + if (is_realtime(t)) { + binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, + struct rt_param, aux_task_owner_node); + } + } + else { + TRACE_CUR("task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); + } + } + + t = next_thread(t); + } while(t != leader); + + + if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { + struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid); + retval = aux_tasks_increase_priority(leader, + (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); + } + + //out_unlock: + read_unlock_irq(&tasklist_lock); + + return retval; +} + +#else + +asmlinkage long sys_slave_non_rt_tasks(void) +{ + printk("Unsupported. Recompile with CONFIG_LITMUS_LOCKING.\n"); + return -EINVAL; +} + +#endif diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 39ce1816ee04..9b439299e5fc 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -74,6 +74,23 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) } #ifdef CONFIG_LITMUS_LOCKING + /* aux threads with no inheritance have lowest priority; however, do a PID + * tie break if both threads are aux threads with no inheritance. + */ + if (unlikely(first->rt_param.is_aux_task && !first->rt_param.inh_task)) { + if (second->rt_param.is_aux_task && !second->rt_param.inh_task) { + /* pid break */ + if (first->pid < second->pid) { + return 1; + } + } + return 0; + } + if (unlikely(second->rt_param.is_aux_task && !second->rt_param.inh_task)) { + /* no need for pid break -- case already tested */ + return 1; + } + /* Check for EFFECTIVE priorities. Change task * used for comparison in such a case. */ @@ -191,7 +208,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) /* Both tasks have the same inherited priority. * Likely in a bug-condition. */ - if (likely(first->pid < second->pid)) { + if (first->pid < second->pid) { return 1; } else if (first->pid == second->pid) { @@ -205,6 +222,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) /* The task with the inherited priority wins. */ if (!second->rt_param.inh_task) { + /* + * common with aux tasks. TRACE_CUR("unusual comparison: " "first = %s/%d first_task = %s/%d " "second = %s/%d second_task = %s/%d\n", @@ -214,6 +233,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) second->comm, second->pid, (second->rt_param.inh_task) ? second->rt_param.inh_task->comm : "(nil)", (second->rt_param.inh_task) ? second->rt_param.inh_task->pid : 0); + */ return 1; } } diff --git a/litmus/litmus.c b/litmus/litmus.c index 83e8ef3f42af..1b4182ac3337 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -25,6 +25,10 @@ #include #endif +#ifdef CONFIG_LITMUS_LOCKING +#include +#endif + /* Number of RT tasks that exist in the system */ atomic_t rt_task_count = ATOMIC_INIT(0); static DEFINE_RAW_SPINLOCK(task_transition_lock); @@ -327,60 +331,6 @@ asmlinkage long sys_null_call(cycles_t __user *ts) return ret; } - -long __litmus_admit_task(struct task_struct* tsk); - -asmlinkage long sys_slave_non_rt_threads(void) -{ - long retval = 0; - struct task_struct *leader = current->group_leader; - struct task_struct *t; - struct task_struct *hp = NULL; - - read_lock_irq(&tasklist_lock); - - t = leader; - do { - TRACE_CUR("threads in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); - - if (tsk_rt(t)->heap_node == NULL) { - retval = __litmus_admit_task(t); - - if (retval != 0) break; - - /* hasn't been admitted into rt. make it a slave. */ - tsk_rt(t)->slave = 1; - } - else { - tsk_rt(t)->has_slaves = 1; - - if (is_realtime(t) && litmus->compare(t, hp)) { - hp = t; - } - } - - t = next_thread(t); - } while(t != leader); - - if (hp) { - TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid); - - /* set up inheritance */ - leader->hp_group = hp; - - t = leader; - do { - if (tsk_rt(t)->slave) { - litmus->increase_prio(t); - } - } while(t != leader); - } - - read_unlock_irq(&tasklist_lock); - - return 0; -} - #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) void init_gpu_affinity_state(struct task_struct* p) { @@ -412,11 +362,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore) { struct rt_task user_config = {}; void* ctrl_page = NULL; - + #ifdef CONFIG_LITMUS_NESTED_LOCKING binheap_order_t prio_order = NULL; #endif + TRACE_TASK(p, "reinit_litmus_state: restore = %d\n", restore); + if (restore) { /* Safe user-space provided configuration data. * and allocated page. */ @@ -428,10 +380,12 @@ static void reinit_litmus_state(struct task_struct* p, int restore) prio_order = p->rt_param.hp_blocked_tasks.compare; #endif +#ifdef CONFIG_LITMUS_LOCKING /* We probably should not be inheriting any task's priority * at this point in time. */ WARN_ON(p->rt_param.inh_task); +#endif #ifdef CONFIG_LITMUS_NESTED_LOCKING WARN_ON(p->rt_param.blocked_lock); @@ -459,6 +413,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore) /* Cleanup everything else. */ memset(&p->rt_param, 0, sizeof(p->rt_param)); +#ifdef CONFIG_LITMUS_LOCKING + /* also clear out the aux_data. the !restore case is only called on + * fork (initial thread creation). */ + if (!restore) + memset(&p->aux_data, 0, sizeof(p->aux_data)); +#endif + /* Restore preserved fields. */ if (restore) { p->rt_param.task_params = user_config; @@ -475,7 +436,12 @@ static void reinit_litmus_state(struct task_struct* p, int restore) #endif } + +#ifdef CONFIG_LITMUS_LOCKING +long __litmus_admit_task(struct task_struct* tsk, int clear_aux) +#else long __litmus_admit_task(struct task_struct* tsk) +#endif { long retval = 0; unsigned long flags; @@ -520,6 +486,14 @@ long __litmus_admit_task(struct task_struct* tsk) atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD); #endif +#ifdef CONFIG_LITMUS_LOCKING + /* turns out our aux thread isn't really an aux thread. */ + if (clear_aux && tsk_rt(tsk)->is_aux_task) { + exit_aux_task(tsk); + tsk_rt(tsk)->has_aux_tasks = 1; + } +#endif + retval = litmus->admit_task(tsk); if (!retval) { @@ -537,8 +511,7 @@ out_unlock: long litmus_admit_task(struct task_struct* tsk) { long retval = 0; - unsigned long flags; - + BUG_ON(is_realtime(tsk)); if (get_rt_relative_deadline(tsk) == 0 || @@ -560,8 +533,12 @@ long litmus_admit_task(struct task_struct* tsk) goto out; } +#ifdef CONFIG_LITMUS_LOCKING + retval = __litmus_admit_task(tsk, (tsk_rt(tsk)->task_params.period != MAGIC_AUX_TASK_PERIOD)); +#else retval = __litmus_admit_task(tsk); - +#endif + out: return retval; } @@ -574,7 +551,7 @@ void litmus_exit_task(struct task_struct* tsk) litmus->task_exit(tsk); BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); - bheap_node_free(tsk_rt(tsk)->heap_node); + bheap_node_free(tsk_rt(tsk)->heap_node); release_heap_free(tsk_rt(tsk)->rel_heap); atomic_dec(&rt_task_count); @@ -647,14 +624,22 @@ out: */ void litmus_fork(struct task_struct* p) { + reinit_litmus_state(p, 0); + if (is_realtime(p)) { + TRACE_TASK(p, "fork, is real-time\n"); /* clean out any litmus related state, don't preserve anything */ - reinit_litmus_state(p, 0); + //reinit_litmus_state(p, 0); /* Don't let the child be a real-time task. */ p->sched_reset_on_fork = 1; - } else + } else { /* non-rt tasks might have ctrl_page set */ tsk_rt(p)->ctrl_page = NULL; + + /* still don't inherit any parental parameters */ + //memset(&p->rt_param, 0, sizeof(p->rt_param)); + //memset(&p->aux_data, 0, sizeof(p->aux_data)); + } /* od tables are never inherited across a fork */ p->od_table = NULL; @@ -751,6 +736,10 @@ static int __init _init_litmus(void) init_topology(); #endif +#ifdef CONFIG_LITMUS_NVIDIA + //init_nvidia_info(); +#endif + return 0; } diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 4b86a50d3bd1..b6ead58802f6 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -244,9 +244,56 @@ void dump_nvidia_info(const struct tasklet_struct *t) #endif } + + static struct module* nvidia_mod = NULL; + + +#if 0 +static int nvidia_ready_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + mutex_lock(&module_mutex); + nvidia_mod = find_module("nvidia"); + mutex_unlock(&module_mutex); + + if(nvidia_mod != NULL) + { + TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, + (void*)(nvidia_mod->module_core), + (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); + init_nv_device_reg(); + return(0); + } + else + { + TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); + } +} + +static int nvidia_going_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + nvidia_mod = NULL; + mb(); + + return 0; +} + +static struct notifier_block nvidia_ready = { + .notifier_call = nvidia_ready_module_notify, + .priority = 1, +}; + +static struct notifier_block nvidia_going = { + .notifier_call = nvidia_going_module_notify, + .priority = 1, +}; +#endif + int init_nvidia_info(void) { +#if 1 mutex_lock(&module_mutex); nvidia_mod = find_module("nvidia"); mutex_unlock(&module_mutex); @@ -263,6 +310,7 @@ int init_nvidia_info(void) TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); return(-1); } +#endif } void shutdown_nvidia_info(void) diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index d0b796611bea..d4f030728d3c 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c @@ -300,10 +300,15 @@ void rt_domain_init(rt_domain_t *rt, */ void __add_ready(rt_domain_t* rt, struct task_struct *new) { - TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu " - "to ready queue at %llu\n", - new->comm, new->pid, - get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), + TRACE("rt: adding %s/%d (%llu, %llu, %llu) " + "[inh_task: %s/%d (%llu, %llu %llu)] " + "rel=%llu to ready queue at %llu\n", + new->comm, new->pid, get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), + (tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->comm : "(nil)", + (tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->pid : 0, + (tsk_rt(new)->inh_task) ? get_exec_cost(tsk_rt(new)->inh_task) : 0, + (tsk_rt(new)->inh_task) ? get_rt_period(tsk_rt(new)->inh_task) : 0, + (tsk_rt(new)->inh_task) ? get_rt_relative_deadline(tsk_rt(new)->inh_task) : 0, get_release(new), litmus_clock()); BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index d98de4579394..f030f027b486 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -1086,9 +1086,10 @@ static long cedf_admit_task(struct task_struct* tsk) /* called with IRQs off */ -static void __increase_priority_inheritance(struct task_struct* t, +static int __increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { + int success = 1; int linked_on; int check_preempt = 0; @@ -1166,8 +1167,10 @@ static void __increase_priority_inheritance(struct task_struct* t, (prio_inh) ? prio_inh->comm : "nil", (prio_inh) ? prio_inh->pid : -1); WARN_ON(!prio_inh); + success = 0; } #endif + return success; } /* called with IRQs off */ @@ -1204,9 +1207,10 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str } /* called with IRQs off */ -static void __decrease_priority_inheritance(struct task_struct* t, +static int __decrease_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { + int success = 1; #ifdef CONFIG_LITMUS_NESTED_LOCKING if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { #endif @@ -1254,8 +1258,10 @@ static void __decrease_priority_inheritance(struct task_struct* t, effective_priority(t)->comm, effective_priority(t)->pid, (prio_inh) ? prio_inh->comm : "nil", (prio_inh) ? prio_inh->pid : -1); + success = 0; } #endif + return success; } static void decrease_priority_inheritance(struct task_struct* t, @@ -1812,6 +1818,8 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { .allocate_lock = cedf_allocate_lock, .increase_prio = increase_priority_inheritance, .decrease_prio = decrease_priority_inheritance, + .__increase_prio = __increase_priority_inheritance, + .__decrease_prio = __decrease_priority_inheritance, #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING .nested_increase_prio = nested_increase_priority_inheritance, diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 83b2f04b1532..5b8ca6698423 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -29,6 +29,7 @@ #ifdef CONFIG_LITMUS_LOCKING #include +#include #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING @@ -295,11 +296,37 @@ static noinline void requeue(struct task_struct* task) /* sanity check before insertion */ BUG_ON(is_queued(task)); - if (is_released(task, litmus_clock())) - __add_ready(&gsnedf, task); + if (is_released(task, litmus_clock())) { + + if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) { + /* aux_task probably transitioned to real-time while it was blocked */ + TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); + } + else { + __add_ready(&gsnedf, task); + +#if 0 + if (tsk_rt(task)->has_aux_tasks) { + + TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid); + /* allow it's prio inheritance to act on aux threads */ + enable_aux_task_owner(task); + } +#endif + } + } else { /* it has got to wait */ add_release(&gsnedf, task); + +#if 0 + if (tsk_rt(task)->has_aux_tasks) { + + TRACE_CUR("%s/%d is waiting for release and has aux tasks.\n", task->comm, task->pid); + /* prevent prio inheritance from acting while it's not ready */ + disable_aux_task_owner(task); + } +#endif } } @@ -366,10 +393,45 @@ static noinline void gsnedf_job_arrival(struct task_struct* task) static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) { unsigned long flags; + //struct bheap_node* node; raw_spin_lock_irqsave(&gsnedf_lock, flags); +#if 0 + node = tasks->head; + while(node) { + struct task_struct *task = bheap2task(node); + + if (tsk_rt(task)->has_aux_tasks) { + + TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid); + + /* allow it's prio inheritance to act on aux threads */ + enable_aux_task_owner(task); + } + + /* pre-order sub-tree traversal */ + if (node->child) { + /* go down */ + node = node->child; + } + else if(node->parent && node->parent->next) { + /* go up a level and across */ + node = node->parent->next; + } + else if(!node->parent && node->next) { + /* go to the next binomial tree */ + node = node->next; + } + else { + /* the end! */ + node = NULL; + } + } +#endif + __merge_ready(rt, tasks); + check_for_preemptions(); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); @@ -387,11 +449,12 @@ static noinline void job_completion(struct task_struct *t, int forced) #endif TRACE_TASK(t, "job_completion().\n"); - + /* set flags */ set_rt_flags(t, RT_F_SLEEP); /* prepare for next period */ prepare_for_next_period(t); + if (is_released(t, litmus_clock())) sched_trace_task_release(t); /* unlink */ @@ -902,8 +965,7 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) else if (exists && !next) TRACE("becomes idle at %llu.\n", litmus_clock()); #endif - - + return next; } @@ -997,13 +1059,18 @@ static void gsnedf_task_wake_up(struct task_struct *task) set_rt_flags(task, RT_F_RUNNING); #endif + if (tsk_rt(task)->has_aux_tasks) { + + TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); + disable_aux_task_owner(task); + } + gsnedf_job_arrival(task); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); } static void gsnedf_task_block(struct task_struct *t) { - // TODO: is this called on preemption?? unsigned long flags; TRACE_TASK(t, "block at %llu\n", litmus_clock()); @@ -1013,6 +1080,12 @@ static void gsnedf_task_block(struct task_struct *t) unlink(t); + if (tsk_rt(t)->has_aux_tasks) { + + TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); + enable_aux_task_owner(t); + } + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); BUG_ON(!is_realtime(t)); @@ -1027,8 +1100,22 @@ static void gsnedf_task_exit(struct task_struct * t) gsnedf_change_prio_pai_tasklet(t, NULL); #endif +#ifdef CONFIG_LITMUS_LOCKING + if (tsk_rt(t)->is_aux_task) { + exit_aux_task(t); /* cannot be called with gsnedf_lock held */ + } +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); + +#ifdef CONFIG_LITMUS_LOCKING + /* make sure we clean up on our way out */ + if(tsk_rt(t)->has_aux_tasks) { + disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */ + } +#endif + unlink(t); if (tsk_rt(t)->scheduled_on != NO_CPU) { gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; @@ -1037,7 +1124,7 @@ static void gsnedf_task_exit(struct task_struct * t) raw_spin_unlock_irqrestore(&gsnedf_lock, flags); BUG_ON(!is_realtime(t)); - TRACE_TASK(t, "RIP\n"); + TRACE_TASK(t, "RIP\n"); } @@ -1061,12 +1148,20 @@ static long gsnedf_admit_task(struct task_struct* tsk) #include /* called with IRQs off */ -static void __increase_priority_inheritance(struct task_struct* t, +static int __increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { + int success = 1; int linked_on; int check_preempt = 0; + if (prio_inh && prio_inh == effective_priority(t)) { + /* relationship already established. */ + TRACE_TASK(t, "already has effective priority of %s/%d\n", + prio_inh->comm, prio_inh->pid); + goto out; + } + #ifdef CONFIG_LITMUS_NESTED_LOCKING /* this sanity check allows for weaker locking in protocols */ /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */ @@ -1126,28 +1221,40 @@ static void __increase_priority_inheritance(struct task_struct* t, &gsnedf.ready_queue); check_for_preemptions(); } + + + /* propagate to aux tasks */ + if (tsk_rt(t)->has_aux_tasks) { + aux_task_owner_increase_priority(t); + } } #ifdef CONFIG_LITMUS_NESTED_LOCKING } else { TRACE_TASK(t, "Spurious invalid priority increase. " - "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", t->comm, t->pid, effective_priority(t)->comm, effective_priority(t)->pid, (prio_inh) ? prio_inh->comm : "nil", (prio_inh) ? prio_inh->pid : -1); WARN_ON(!prio_inh); + success = 0; } #endif + +out: + return success; } /* called with IRQs off */ static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { + int success; + raw_spin_lock(&gsnedf_lock); - __increase_priority_inheritance(t, prio_inh); + success = __increase_priority_inheritance(t, prio_inh); #ifdef CONFIG_LITMUS_SOFTIRQD if(tsk_rt(t)->cur_klitirqd != NULL) @@ -1160,7 +1267,7 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str #endif raw_spin_unlock(&gsnedf_lock); - + #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) if(tsk_rt(t)->held_gpus) { int i; @@ -1175,9 +1282,19 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str /* called with IRQs off */ -static void __decrease_priority_inheritance(struct task_struct* t, +static int __decrease_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { + int success = 1; + + if (prio_inh == tsk_rt(t)->inh_task) { + /* relationship already established. */ + TRACE_TASK(t, "already inherits priority from %s/%d\n", + (prio_inh) ? prio_inh->comm : "(nil)", + (prio_inh) ? prio_inh->pid : 0); + goto out; + } + #ifdef CONFIG_LITMUS_NESTED_LOCKING if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { #endif @@ -1214,6 +1331,11 @@ static void __decrease_priority_inheritance(struct task_struct* t, } raw_spin_unlock(&gsnedf.release_lock); } + + /* propagate to aux tasks */ + if (tsk_rt(t)->has_aux_tasks) { + aux_task_owner_decrease_priority(t); + } #ifdef CONFIG_LITMUS_NESTED_LOCKING } else { @@ -1224,16 +1346,23 @@ static void __decrease_priority_inheritance(struct task_struct* t, effective_priority(t)->comm, effective_priority(t)->pid, (prio_inh) ? prio_inh->comm : "nil", (prio_inh) ? prio_inh->pid : -1); + success = 0; } #endif + +out: + return success; } static void decrease_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { + int success; + raw_spin_lock(&gsnedf_lock); - __decrease_priority_inheritance(t, prio_inh); - + + success = __decrease_priority_inheritance(t, prio_inh); + #ifdef CONFIG_LITMUS_SOFTIRQD if(tsk_rt(t)->cur_klitirqd != NULL) { @@ -1245,7 +1374,7 @@ static void decrease_priority_inheritance(struct task_struct* t, #endif raw_spin_unlock(&gsnedf_lock); - + #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) if(tsk_rt(t)->held_gpus) { int i; @@ -1828,6 +1957,8 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { .allocate_lock = gsnedf_allocate_lock, .increase_prio = increase_priority_inheritance, .decrease_prio = decrease_priority_inheritance, + .__increase_prio = __increase_priority_inheritance, + .__decrease_prio = __decrease_priority_inheritance, #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING .nested_increase_prio = nested_increase_priority_inheritance, diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 245e41c25a5d..d24c9167cff8 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -137,6 +137,18 @@ static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh) { } + +static int litmus_dummy___increase_prio(struct task_struct* t, struct task_struct* prio_inh) +{ + TRACE_CUR("WARNING: Dummy litmus_dummy___increase_prio called!\n"); + return 0; +} + +static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struct* prio_inh) +{ + TRACE_CUR("WARNING: Dummy litmus_dummy___decrease_prio called!\n"); + return 0; +} #endif #ifdef CONFIG_LITMUS_SOFTIRQD @@ -227,6 +239,8 @@ struct sched_plugin linux_sched_plugin = { .allocate_lock = litmus_dummy_allocate_lock, .increase_prio = litmus_dummy_increase_prio, .decrease_prio = litmus_dummy_decrease_prio, + .__increase_prio = litmus_dummy___increase_prio, + .__decrease_prio = litmus_dummy___decrease_prio, #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING .nested_increase_prio = litmus_dummy_nested_increase_prio, @@ -289,6 +303,8 @@ int register_sched_plugin(struct sched_plugin* plugin) CHECK(allocate_lock); CHECK(increase_prio); CHECK(decrease_prio); + CHECK(__increase_prio); + CHECK(__decrease_prio); #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING CHECK(nested_increase_prio); -- cgit v1.2.2 From 4e8f9b7c2e9134ca31feb91dee3609a95df6de56 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 16 Sep 2012 17:44:37 -0400 Subject: Implement real-time aux threads. G-EDF only. --- include/litmus/aux_tasks.h | 4 +- include/litmus/litmus.h | 4 +- include/litmus/rt_param.h | 4 +- include/litmus/sched_plugin.h | 2 +- kernel/sched.c | 24 ++++- litmus/aux_tasks.c | 243 +++++++++++++++++++++++------------------- litmus/edf_common.c | 83 +++++++++++---- litmus/litmus.c | 44 +++----- litmus/preempt.c | 25 ++++- litmus/sched_gsn_edf.c | 169 ++++++++++++++--------------- litmus/sched_litmus.c | 4 +- litmus/sched_plugin.c | 22 +++- 12 files changed, 357 insertions(+), 271 deletions(-) diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h index 8e50ac85b082..3bb6b26fef09 100644 --- a/include/litmus/aux_tasks.h +++ b/include/litmus/aux_tasks.h @@ -3,8 +3,6 @@ struct task_struct; -#define MAGIC_AUX_TASK_PERIOD ~((lt_t)0) - /* admit an aux task with default parameters */ //int admit_aux_task(struct task_struct *t); @@ -30,4 +28,4 @@ int aux_task_owner_increase_priority(struct task_struct *t); /* call when an aux_owner decreases its priority */ int aux_task_owner_decrease_priority(struct task_struct *t); -#endif \ No newline at end of file +#endif diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index f9829167294d..db2987a24686 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -184,8 +184,10 @@ static inline int request_exit_np_atomic(struct task_struct *t) * retry loop here since tasks might exploit that to * keep the kernel busy indefinitely. */ } - } else + } + else { return 0; + } } #else diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 2a6c70f1dd37..c45ba23d7650 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -284,11 +284,11 @@ struct rt_param { struct litmus_lock* blocked_lock; #endif - + #ifdef CONFIG_LITMUS_LOCKING unsigned int is_aux_task:1; unsigned int has_aux_tasks:1; - + struct list_head aux_task_node; struct binheap_node aux_task_owner_node; #endif diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index bd75e7c09a10..65736b2a9199 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -150,7 +150,7 @@ struct sched_plugin { allocate_lock_t allocate_lock; increase_prio_t increase_prio; decrease_prio_t decrease_prio; - + __increase_prio_t __increase_prio; __decrease_prio_t __decrease_prio; #endif diff --git a/kernel/sched.c b/kernel/sched.c index 9e8d8698323b..0e4b3d40cd29 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2703,8 +2703,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) unsigned long flags; int cpu, success = 0; - if (is_realtime(p)) + if (is_realtime(p)) { + //WARN_ON(1); TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); + } smp_wmb(); raw_spin_lock_irqsave(&p->pi_lock, flags); @@ -3169,6 +3171,12 @@ asmlinkage void schedule_tail(struct task_struct *prev) { struct rq *rq = this_rq(); + sched_trace_task_switch_to(current); + + if (sched_state_validate_switch()) { + WARN_ON(1); + } + finish_task_switch(rq, prev); /* @@ -4416,8 +4424,16 @@ litmus_need_resched_nonpreemptible: post_schedule(rq); - if (sched_state_validate_switch()) + if (sched_state_validate_switch()) { + TRACE_CUR("cpu %d: have to redo scheduling decision!\n", cpu); goto litmus_need_resched_nonpreemptible; + } + else if (current->policy == SCHED_LITMUS) { + TRACE_CUR("cpu %d: valid switch to rt task %s/%d.\n", cpu, current->comm, current->pid); + } + else { +// TRACE_CUR("cpu %d: switch: %s/%d\n", cpu, current->comm, current->pid); + } preempt_enable_no_resched(); @@ -4430,8 +4446,8 @@ litmus_need_resched_nonpreemptible: #ifdef CONFIG_LITMUS_PAI_SOFTIRQD litmus->run_tasklets(prev); -#endif - +#endif + srp_ceiling_block(); } EXPORT_SYMBOL(schedule); diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c index c197a95fc3a1..5057137bbbea 100644 --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -10,22 +10,37 @@ static int admit_aux_task(struct task_struct *t) { int retval = 0; struct task_struct *leader = t->group_leader; - + + + /* budget enforcement increments job numbers. job numbers are used in + * tie-breaking of aux_tasks. method helps ensure: + * 1) aux threads with no inherited priority can starve another (they share + * the CPUs equally. + * 2) aux threads that inherit the same priority cannot starve each other. + * + * Assuming aux threads are well-behavied (they do very little work and + * suspend), risk of starvation should not be an issue, but this is a + * fail-safe. + */ struct rt_task tp = { - .exec_cost = 0, - .period = MAGIC_AUX_TASK_PERIOD, - .relative_deadline = MAGIC_AUX_TASK_PERIOD, + //.period = MAGIC_AUX_TASK_PERIOD, + //.relative_deadline = MAGIC_AUX_TASK_PERIOD, + .period = 1000000, /* has to wait 1 ms before it can run again once it has exhausted budget */ + .relative_deadline = 1000000, + .exec_cost = 1000000, /* allow full utilization */ .phase = 0, .cpu = task_cpu(leader), /* take CPU of group leader */ - .budget_policy = NO_ENFORCEMENT, + //.budget_policy = NO_ENFORCEMENT, + .budget_policy = QUANTUM_ENFORCEMENT, + .budget_signal_policy = NO_SIGNALS, .cls = RT_CLASS_BEST_EFFORT }; - + struct sched_param param = { .sched_priority = 0}; - + tsk_rt(t)->task_params = tp; retval = sched_setscheduler_nocheck(t, SCHED_LITMUS, ¶m); - + return retval; } @@ -33,19 +48,19 @@ int exit_aux_task(struct task_struct *t) { int retval = 0; struct task_struct *leader = t->group_leader; - + BUG_ON(!tsk_rt(t)->is_aux_task); - + TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); - + list_del(&tsk_rt(t)->aux_task_node); - + tsk_rt(t)->is_aux_task = 0; - + if (tsk_rt(t)->inh_task) { litmus->decrease_prio(t, NULL); } - + return retval; } @@ -53,34 +68,23 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s { int retval = 0; struct list_head *pos; - + TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); - + list_for_each(pos, &tsk_aux(leader)->aux_tasks) { struct task_struct *aux = container_of(list_entry(pos, struct rt_param, aux_task_node), struct task_struct, rt_param); if (!is_realtime(aux)) { -#if 0 - /* currently can't do this here because of scheduler deadlock on itself */ - TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid); - retval = admit_aux_task(aux); - - if (retval != 0) { - TRACE_CUR("failed to admit aux task %s/%d\n", aux->comm, aux->pid); - goto out; - } -#endif TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid); } - + // aux tasks don't touch rt locks, so no nested call needed. TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid); retval = litmus->__increase_prio(aux, hp); } - - //out: + return retval; } @@ -88,30 +92,15 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s { int retval = 0; struct list_head *pos; - + TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); - + list_for_each(pos, &tsk_aux(leader)->aux_tasks) { struct task_struct *aux = container_of(list_entry(pos, struct rt_param, aux_task_node), struct task_struct, rt_param); - + if (!is_realtime(aux)) { -#if 0 - /* currently can't do this here because of scheduler deadlock on itself */ - TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid); - retval = admit_aux_task(aux); - - if (retval != 0) - goto out; - - if (hp) { - // aux tasks don't touch rt locks, so no nested call needed. - TRACE_CUR("decreasing (actually increasing) %s/%d.\n", aux->comm, aux->pid); - retval = litmus->__increase_prio(aux, hp); - } -#endif - TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid); } else { @@ -119,8 +108,7 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s retval = litmus->__decrease_prio(aux, hp); } } - - //out: + return retval; } @@ -133,20 +121,20 @@ int aux_task_owner_increase_priority(struct task_struct *t) BUG_ON(!tsk_rt(t)->has_aux_tasks); BUG_ON(!is_realtime(t)); BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)); - + leader = t->group_leader; - + TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid); hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); - + if (hp == t) { goto out; // already hp, nothing to do. } - + binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); - + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); @@ -155,7 +143,7 @@ int aux_task_owner_increase_priority(struct task_struct *t) retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task) ? tsk_rt(hp)->inh_task : hp); } - + out: return retval; } @@ -166,15 +154,15 @@ int aux_task_owner_decrease_priority(struct task_struct *t) struct task_struct *leader; struct task_struct *hp = NULL; struct task_struct *new_hp = NULL; - + BUG_ON(!tsk_rt(t)->has_aux_tasks); BUG_ON(!is_realtime(t)); BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)); - + leader = t->group_leader; - + TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid); - + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); @@ -182,7 +170,7 @@ int aux_task_owner_decrease_priority(struct task_struct *t) struct rt_param, aux_task_owner_node); new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); - + if (hp == t && new_hp != t) { TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); retval = aux_tasks_decrease_priority(leader, @@ -204,28 +192,28 @@ long enable_aux_task_owner(struct task_struct *t) TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid); return -1; } - + BUG_ON(!is_realtime(t)); - + if (binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { TRACE_CUR("task %s/%d is already active\n", t->comm, t->pid); goto out; } - + binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node); - + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); if (hp == t) { /* we're the new hp */ TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); - + retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); } - + out: return retval; } @@ -236,42 +224,42 @@ long disable_aux_task_owner(struct task_struct *t) struct task_struct *leader = t->group_leader; struct task_struct *hp; struct task_struct *new_hp = NULL; - + if (!tsk_rt(t)->has_aux_tasks) { TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid); return -1; } - + BUG_ON(!is_realtime(t)); - + if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { TRACE_CUR("task %s/%d is already not active\n", t->comm, t->pid); goto out; } - + TRACE_CUR("task %s/%d exiting from group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); - + hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); - + if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); } - + if (hp == t && new_hp != t) { struct task_struct *to_inh = NULL; - + TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); - + if (new_hp) { to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp; } - + retval = aux_tasks_decrease_priority(leader, to_inh); } - + out: return retval; } @@ -284,60 +272,47 @@ static int aux_task_owner_max_priority_order(struct binheap_node *a, struct task_struct, rt_param); struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); - + BUG_ON(!d_a); BUG_ON(!d_b); - + return litmus->compare(d_a, d_b); } -asmlinkage long sys_slave_non_rt_threads(void) +static long __do_enable_slave_non_rt_threads(void) { long retval = 0; struct task_struct *leader; struct task_struct *t; - read_lock_irq(&tasklist_lock); - leader = current->group_leader; - -#if 0 - t = leader; - do { - if (tsk_rt(t)->has_aux_tasks || tsk_rt(t)->is_aux_task) { - printk("slave_non_rt_tasks may only be called once per process.\n"); - retval = -EINVAL; - goto out_unlock; - } - } while (t != leader); -#endif - + if (!tsk_aux(leader)->initialized) { INIT_LIST_HEAD(&tsk_aux(leader)->aux_tasks); INIT_BINHEAP_HANDLE(&tsk_aux(leader)->aux_task_owners, aux_task_owner_max_priority_order); tsk_aux(leader)->initialized = 1; } - + t = leader; do { /* doesn't hurt to initialize them both */ INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); - + TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n", leader->comm, leader->pid, t->comm, t->pid, tsk_rt(t)->task_params.period); - + /* inspect heap_node to see if it is an rt task */ - if (tsk_rt(t)->task_params.period == 0 || - tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) { + if (tsk_rt(t)->task_params.period == 0) { //|| + // tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) { if (!tsk_rt(t)->is_aux_task) { TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ tsk_rt(t)->is_aux_task = 1; list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); - + (void)admit_aux_task(t); } else { @@ -348,10 +323,6 @@ asmlinkage long sys_slave_non_rt_threads(void) if (!tsk_rt(t)->has_aux_tasks) { TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); tsk_rt(t)->has_aux_tasks = 1; - if (is_realtime(t)) { - binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, - struct rt_param, aux_task_owner_node); - } } else { TRACE_CUR("task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); @@ -361,16 +332,72 @@ asmlinkage long sys_slave_non_rt_threads(void) t = next_thread(t); } while(t != leader); - + if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid); retval = aux_tasks_increase_priority(leader, - (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); + (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); + } + + return retval; +} + +static long __do_disable_slave_non_rt_threads(void) +{ + long retval = 0; + struct task_struct *leader; + struct task_struct *t; + + leader = current->group_leader; + + t = leader; + do { + if (tsk_rt(t)->is_aux_task) { + + TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid); + + if (is_realtime(t)) { + long temp_retval; + struct sched_param param = { .sched_priority = 0}; + + TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid); + + temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, ¶m); + + if (temp_retval != 0) { + TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid); + if (retval == 0) { + retval = temp_retval; + } + else { + TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval); + } + } + } + + tsk_rt(t)->is_aux_task = 0; + } + t = next_thread(t); + } while(t != leader); + + return retval; +} + +asmlinkage long sys_slave_non_rt_threads(int enable) +{ + long retval; + + read_lock_irq(&tasklist_lock); + + if (enable) { + retval = __do_enable_slave_non_rt_threads(); + } + else { + retval = __do_disable_slave_non_rt_threads(); } - //out_unlock: read_unlock_irq(&tasklist_lock); return retval; @@ -378,7 +405,7 @@ asmlinkage long sys_slave_non_rt_threads(void) #else -asmlinkage long sys_slave_non_rt_tasks(void) +asmlinkage long sys_slave_non_rt_tasks(int enable) { printk("Unsupported. Recompile with CONFIG_LITMUS_LOCKING.\n"); return -EINVAL; diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 9b439299e5fc..ca06f6ec103e 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -22,7 +22,7 @@ #include #endif -#ifdef CONFIG_EDF_TIE_BREAK_HASH +//#ifdef CONFIG_EDF_TIE_BREAK_HASH #include static inline long edf_hash(struct task_struct *t) { @@ -41,7 +41,22 @@ static inline long edf_hash(struct task_struct *t) */ return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32); } -#endif +//#endif + +int aux_tie_break(struct task_struct *first, struct task_struct *second) +{ + long fhash = edf_hash(first); + long shash = edf_hash(second); + if (fhash < shash) { + TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, 1); + return 1; + } + else if(fhash == shash) { + TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, (first->pid < second->pid)); + return first->pid < second->pid; + } + return 0; +} /* edf_higher_prio - returns true if first has a higher EDF priority @@ -60,6 +75,11 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) struct task_struct *first_task = first; struct task_struct *second_task = second; + int first_lo_aux; + int second_lo_aux; + int first_hi_aux; + int second_hi_aux; + /* There is no point in comparing a task to itself. */ if (first && first == second) { TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid); @@ -74,23 +94,34 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) } #ifdef CONFIG_LITMUS_LOCKING - /* aux threads with no inheritance have lowest priority; however, do a PID - * tie break if both threads are aux threads with no inheritance. - */ - if (unlikely(first->rt_param.is_aux_task && !first->rt_param.inh_task)) { - if (second->rt_param.is_aux_task && !second->rt_param.inh_task) { - /* pid break */ - if (first->pid < second->pid) { - return 1; - } - } + + first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task; + second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task; + + if (first_lo_aux && !second_lo_aux) { + TRACE_CUR("%s/%d >> %s/%d --- 0\n", first->comm, first->pid, second->comm, second->pid); return 0; } - if (unlikely(second->rt_param.is_aux_task && !second->rt_param.inh_task)) { - /* no need for pid break -- case already tested */ + else if (second_lo_aux && !first_lo_aux) { + TRACE_CUR("%s/%d >> %s/%d --- 1\n", first->comm, first->pid, second->comm, second->pid); return 1; } - + else if (first_lo_aux && second_lo_aux) { + int aux_lo_tie_break = aux_tie_break(first, second); + TRACE_CUR("low aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_lo_tie_break); + return aux_lo_tie_break; + } + + first_hi_aux = first->rt_param.is_aux_task && first->rt_param.inh_task; + second_hi_aux = second->rt_param.is_aux_task && second->rt_param.inh_task; + + if (first_hi_aux && second_hi_aux && first->rt_param.inh_task == second->rt_param.inh_task) { + int aux_hi_tie_break = aux_tie_break(first, second); + TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break); + return aux_hi_tie_break; + } + + /* Check for EFFECTIVE priorities. Change task * used for comparison in such a case. */ @@ -149,7 +180,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) */ if (get_lateness(first_task) > get_lateness(second_task)) { return 1; - } + } pid_break = (get_lateness(first_task) == get_lateness(second_task)); @@ -171,8 +202,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return 1; } pid_break = _eq(fnorm, snorm); - - + + #elif defined(CONFIG_EDF_TIE_BREAK_HASH) /* Tie break by comparing hashs of (pid, job#) tuple. There should be * a 50% chance that first_task has a higher priority than second_task. @@ -184,8 +215,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) } pid_break = (fhash == shash); #else - - + + /* CONFIG_EDF_PID_TIE_BREAK */ pid_break = 1; // fall through to tie-break by pid; #endif @@ -197,11 +228,17 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) } else if (first_task->pid == second_task->pid) { #ifdef CONFIG_LITMUS_SOFTIRQD - if (first_task->rt_param.is_proxy_thread < + if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread) { return 1; } #endif + if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) { + TRACE_CUR("AUX BREAK!\n"); + return 1; + } + + /* Something could be wrong if you get this far. */ if (unlikely(first->rt_param.inh_task == second->rt_param.inh_task)) { @@ -220,8 +257,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) BUG_ON(!first->rt_param.inh_task && !second->rt_param.inh_task); - /* The task with the inherited priority wins. */ - if (!second->rt_param.inh_task) { + /* The task withOUT the inherited priority wins. */ + if (second->rt_param.inh_task) { /* * common with aux tasks. TRACE_CUR("unusual comparison: " diff --git a/litmus/litmus.c b/litmus/litmus.c index 1b4182ac3337..e2bf2a7ad01b 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -338,7 +338,7 @@ void init_gpu_affinity_state(struct task_struct* p) //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); -#if 0 +#if 0 // emperical; p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); @@ -362,13 +362,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore) { struct rt_task user_config = {}; void* ctrl_page = NULL; - + #ifdef CONFIG_LITMUS_NESTED_LOCKING binheap_order_t prio_order = NULL; #endif TRACE_TASK(p, "reinit_litmus_state: restore = %d\n", restore); - + if (restore) { /* Safe user-space provided configuration data. * and allocated page. */ @@ -419,7 +419,7 @@ static void reinit_litmus_state(struct task_struct* p, int restore) if (!restore) memset(&p->aux_data, 0, sizeof(p->aux_data)); #endif - + /* Restore preserved fields. */ if (restore) { p->rt_param.task_params = user_config; @@ -437,11 +437,8 @@ static void reinit_litmus_state(struct task_struct* p, int restore) } -#ifdef CONFIG_LITMUS_LOCKING -long __litmus_admit_task(struct task_struct* tsk, int clear_aux) -#else + long __litmus_admit_task(struct task_struct* tsk) -#endif { long retval = 0; unsigned long flags; @@ -486,14 +483,6 @@ long __litmus_admit_task(struct task_struct* tsk) atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD); #endif -#ifdef CONFIG_LITMUS_LOCKING - /* turns out our aux thread isn't really an aux thread. */ - if (clear_aux && tsk_rt(tsk)->is_aux_task) { - exit_aux_task(tsk); - tsk_rt(tsk)->has_aux_tasks = 1; - } -#endif - retval = litmus->admit_task(tsk); if (!retval) { @@ -511,7 +500,7 @@ out_unlock: long litmus_admit_task(struct task_struct* tsk) { long retval = 0; - + BUG_ON(is_realtime(tsk)); if (get_rt_relative_deadline(tsk) == 0 || @@ -533,12 +522,8 @@ long litmus_admit_task(struct task_struct* tsk) goto out; } -#ifdef CONFIG_LITMUS_LOCKING - retval = __litmus_admit_task(tsk, (tsk_rt(tsk)->task_params.period != MAGIC_AUX_TASK_PERIOD)); -#else retval = __litmus_admit_task(tsk); -#endif - + out: return retval; } @@ -624,18 +609,21 @@ out: */ void litmus_fork(struct task_struct* p) { - reinit_litmus_state(p, 0); - if (is_realtime(p)) { TRACE_TASK(p, "fork, is real-time\n"); + /* clean out any litmus related state, don't preserve anything */ - //reinit_litmus_state(p, 0); + reinit_litmus_state(p, 0); + /* Don't let the child be a real-time task. */ p->sched_reset_on_fork = 1; + } else { /* non-rt tasks might have ctrl_page set */ tsk_rt(p)->ctrl_page = NULL; - + + reinit_litmus_state(p, 0); + /* still don't inherit any parental parameters */ //memset(&p->rt_param, 0, sizeof(p->rt_param)); //memset(&p->aux_data, 0, sizeof(p->aux_data)); @@ -736,10 +724,6 @@ static int __init _init_litmus(void) init_topology(); #endif -#ifdef CONFIG_LITMUS_NVIDIA - //init_nvidia_info(); -#endif - return 0; } diff --git a/litmus/preempt.c b/litmus/preempt.c index a2cae3648e15..c9ccc80c1df9 100644 --- a/litmus/preempt.c +++ b/litmus/preempt.c @@ -74,25 +74,37 @@ void litmus_reschedule(int cpu) * is not aware of the need to reschedule at this point. */ /* is a context switch in progress? */ - if (cpu_is_in_sched_state(cpu, TASK_PICKED)) + if (cpu_is_in_sched_state(cpu, TASK_PICKED)) { picked_transition_ok = sched_state_transition_on( cpu, TASK_PICKED, PICKED_WRONG_TASK); + TRACE_CUR("cpu %d: picked_transition_ok = %d\n", cpu, picked_transition_ok); + } + else { + TRACE_CUR("cpu %d: picked_transition_ok = 0 (static)\n", cpu); + } + if (!picked_transition_ok && cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { /* We either raced with the end of the context switch, or the * CPU was in TASK_SCHEDULED anyway. */ scheduled_transition_ok = sched_state_transition_on( cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); + TRACE_CUR("cpu %d: scheduled_transition_ok = %d\n", cpu, scheduled_transition_ok); + } + else { + TRACE_CUR("cpu %d: scheduled_transition_ok = 0 (static)\n", cpu); } /* If the CPU was in state TASK_SCHEDULED, then we need to cause the * scheduler to be invoked. */ if (scheduled_transition_ok) { - if (smp_processor_id() == cpu) + if (smp_processor_id() == cpu) { set_tsk_need_resched(current); - else + } + else { smp_send_reschedule(cpu); + } } TRACE_STATE("%s picked-ok:%d sched-ok:%d\n", @@ -103,11 +115,16 @@ void litmus_reschedule(int cpu) void litmus_reschedule_local(void) { - if (is_in_sched_state(TASK_PICKED)) + if (is_in_sched_state(TASK_PICKED)) { set_sched_state(PICKED_WRONG_TASK); + + TRACE_CUR("cpu %d: transitioned to PICKED_WRONG_TASK\n", smp_processor_id()); + } else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) { set_sched_state(WILL_SCHEDULE); set_tsk_need_resched(current); + + TRACE_CUR("cpu %d: transitioned to WILL_SCHEDULE\n", smp_processor_id()); } } diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 5b8ca6698423..270e06c20bbf 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -167,6 +167,7 @@ struct tasklet_head gsnedf_pending_tasklets; * TRACE() log. #define WANT_ALL_SCHED_EVENTS */ +//#define WANT_ALL_SCHED_EVENTS static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) { @@ -209,8 +210,17 @@ static noinline void link_task_to_cpu(struct task_struct* linked, struct task_struct* tmp; int on_cpu; + //int print = (linked != NULL || entry->linked != NULL); + BUG_ON(linked && !is_realtime(linked)); + /* + if (print) { + TRACE_CUR("linked = %s/%d\n", (linked) ? linked->comm : "(nil)", (linked)? linked->pid : 0); + TRACE_CUR("entry->linked = %s/%d\n", (entry->linked) ? entry->linked->comm : "(nil)", (entry->linked)? entry->linked->pid : 0); + } + */ + /* Currently linked task is set to be unlinked. */ if (entry->linked) { entry->linked->rt_param.linked_on = NO_CPU; @@ -246,12 +256,18 @@ static noinline void link_task_to_cpu(struct task_struct* linked, linked->rt_param.linked_on = entry->cpu; } entry->linked = linked; -#ifdef WANT_ALL_SCHED_EVENTS - if (linked) - TRACE_TASK(linked, "linked to %d.\n", entry->cpu); - else - TRACE("NULL linked to %d.\n", entry->cpu); -#endif + + /* + if (print) { + //#ifdef WANT_ALL_SCHED_EVENTS + if (linked) + TRACE_TASK(linked, "linked to %d.\n", entry->cpu); + else + TRACE("NULL linked to %d.\n", entry->cpu); + //#endif + } + */ + update_cpu_position(entry); } @@ -297,36 +313,19 @@ static noinline void requeue(struct task_struct* task) BUG_ON(is_queued(task)); if (is_released(task, litmus_clock())) { - + if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) { /* aux_task probably transitioned to real-time while it was blocked */ TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); + unlink(task); /* really needed? */ } else { __add_ready(&gsnedf, task); - -#if 0 - if (tsk_rt(task)->has_aux_tasks) { - - TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid); - /* allow it's prio inheritance to act on aux threads */ - enable_aux_task_owner(task); - } -#endif } } else { /* it has got to wait */ add_release(&gsnedf, task); - -#if 0 - if (tsk_rt(task)->has_aux_tasks) { - - TRACE_CUR("%s/%d is waiting for release and has aux tasks.\n", task->comm, task->pid); - /* prevent prio inheritance from acting while it's not ready */ - disable_aux_task_owner(task); - } -#endif } } @@ -368,7 +367,8 @@ static void check_for_preemptions(void) &per_cpu(gsnedf_cpu_entries, task_cpu(task))); if (affinity) last = affinity; - else if (requeue_preempted_job(last->linked)) + + if (requeue_preempted_job(last->linked)) requeue(last->linked); } #else @@ -393,45 +393,11 @@ static noinline void gsnedf_job_arrival(struct task_struct* task) static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) { unsigned long flags; - //struct bheap_node* node; raw_spin_lock_irqsave(&gsnedf_lock, flags); -#if 0 - node = tasks->head; - while(node) { - struct task_struct *task = bheap2task(node); - - if (tsk_rt(task)->has_aux_tasks) { - - TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid); - - /* allow it's prio inheritance to act on aux threads */ - enable_aux_task_owner(task); - } - - /* pre-order sub-tree traversal */ - if (node->child) { - /* go down */ - node = node->child; - } - else if(node->parent && node->parent->next) { - /* go up a level and across */ - node = node->parent->next; - } - else if(!node->parent && node->next) { - /* go to the next binomial tree */ - node = node->next; - } - else { - /* the end! */ - node = NULL; - } - } -#endif - __merge_ready(rt, tasks); - + check_for_preemptions(); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); @@ -449,12 +415,12 @@ static noinline void job_completion(struct task_struct *t, int forced) #endif TRACE_TASK(t, "job_completion().\n"); - + /* set flags */ set_rt_flags(t, RT_F_SLEEP); /* prepare for next period */ prepare_for_next_period(t); - + if (is_released(t, litmus_clock())) sched_trace_task_release(t); /* unlink */ @@ -497,6 +463,10 @@ static void gsnedf_tick(struct task_struct* t) } } } + + if(is_realtime(t)) { + TRACE_TASK(t, "tick %llu\n", litmus_clock()); + } } @@ -838,6 +808,8 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) int out_of_time, signal_budget, sleep, preempt, np, exists, blocks; struct task_struct* next = NULL; + //int completion = 0; + #ifdef CONFIG_RELEASE_MASTER /* Bail out early if we are the release master. * The release master never schedules any real-time tasks. @@ -873,22 +845,22 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); #endif - /* - if (exists) + if (exists) { TRACE_TASK(prev, "blocks:%d out_of_time:%d signal_budget: %d np:%d sleep:%d preempt:%d " "state:%d sig:%d\n", blocks, out_of_time, signal_budget, np, sleep, preempt, prev->state, signal_pending(prev)); - */ + } if (entry->linked && preempt) TRACE_TASK(prev, "will be preempted by %s/%d\n", entry->linked->comm, entry->linked->pid); /* Send the signal that the budget has been exhausted */ - if (signal_budget) + if (signal_budget) { send_sigbudget(entry->scheduled); + } /* If a task blocks we have no choice but to reschedule. */ @@ -919,8 +891,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) * this. Don't do a job completion if we block (can't have timers running * for blocked jobs). */ - if (!np && (out_of_time || sleep) && !blocks) + if (!np && (out_of_time || sleep) && !blocks) { job_completion(entry->scheduled, !sleep); + //completion = 1; + } /* Link pending task if we became unlinked. */ @@ -953,8 +927,21 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) next = prev; } +#if 0 + if (completion) { + TRACE_CUR("switching away from a completion\n"); + } +#endif + sched_state_task_picked(); +#if 0 + if (next && is_realtime(next) && tsk_rt(next)->is_aux_task && !tsk_rt(next)->inh_task) { + TRACE_TASK(next, "is aux with no inheritance. preventing it from actually running.\n"); + next = NULL; + } +#endif + raw_spin_unlock(&gsnedf_lock); #ifdef WANT_ALL_SCHED_EVENTS @@ -965,7 +952,7 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) else if (exists && !next) TRACE("becomes idle at %llu.\n", litmus_clock()); #endif - + return next; } @@ -991,7 +978,7 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) unsigned long flags; cpu_entry_t* entry; - TRACE("gsn edf: task new %d\n", t->pid); + TRACE("gsn edf: task new = %d on_rq = %d running = %d\n", t->pid, on_rq, running); raw_spin_lock_irqsave(&gsnedf_lock, flags); @@ -1060,11 +1047,11 @@ static void gsnedf_task_wake_up(struct task_struct *task) #endif if (tsk_rt(task)->has_aux_tasks) { - + TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); disable_aux_task_owner(task); } - + gsnedf_job_arrival(task); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); } @@ -1081,11 +1068,11 @@ static void gsnedf_task_block(struct task_struct *t) unlink(t); if (tsk_rt(t)->has_aux_tasks) { - + TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); enable_aux_task_owner(t); } - + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); BUG_ON(!is_realtime(t)); @@ -1105,17 +1092,17 @@ static void gsnedf_task_exit(struct task_struct * t) exit_aux_task(t); /* cannot be called with gsnedf_lock held */ } #endif - + /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); - + #ifdef CONFIG_LITMUS_LOCKING /* make sure we clean up on our way out */ if(tsk_rt(t)->has_aux_tasks) { disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */ } #endif - + unlink(t); if (tsk_rt(t)->scheduled_on != NO_CPU) { gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; @@ -1161,7 +1148,7 @@ static int __increase_priority_inheritance(struct task_struct* t, prio_inh->comm, prio_inh->pid); goto out; } - + #ifdef CONFIG_LITMUS_NESTED_LOCKING /* this sanity check allows for weaker locking in protocols */ /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */ @@ -1221,8 +1208,8 @@ static int __increase_priority_inheritance(struct task_struct* t, &gsnedf.ready_queue); check_for_preemptions(); } - - + + /* propagate to aux tasks */ if (tsk_rt(t)->has_aux_tasks) { aux_task_owner_increase_priority(t); @@ -1242,7 +1229,7 @@ static int __increase_priority_inheritance(struct task_struct* t, success = 0; } #endif - + out: return success; } @@ -1251,7 +1238,7 @@ out: static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { int success; - + raw_spin_lock(&gsnedf_lock); success = __increase_priority_inheritance(t, prio_inh); @@ -1267,7 +1254,7 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str #endif raw_spin_unlock(&gsnedf_lock); - + #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) if(tsk_rt(t)->held_gpus) { int i; @@ -1286,7 +1273,7 @@ static int __decrease_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { int success = 1; - + if (prio_inh == tsk_rt(t)->inh_task) { /* relationship already established. */ TRACE_TASK(t, "already inherits priority from %s/%d\n", @@ -1294,7 +1281,7 @@ static int __decrease_priority_inheritance(struct task_struct* t, (prio_inh) ? prio_inh->pid : 0); goto out; } - + #ifdef CONFIG_LITMUS_NESTED_LOCKING if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { #endif @@ -1331,7 +1318,7 @@ static int __decrease_priority_inheritance(struct task_struct* t, } raw_spin_unlock(&gsnedf.release_lock); } - + /* propagate to aux tasks */ if (tsk_rt(t)->has_aux_tasks) { aux_task_owner_decrease_priority(t); @@ -1349,7 +1336,7 @@ static int __decrease_priority_inheritance(struct task_struct* t, success = 0; } #endif - + out: return success; } @@ -1358,11 +1345,11 @@ static void decrease_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { int success; - + raw_spin_lock(&gsnedf_lock); - + success = __decrease_priority_inheritance(t, prio_inh); - + #ifdef CONFIG_LITMUS_SOFTIRQD if(tsk_rt(t)->cur_klitirqd != NULL) { @@ -1374,7 +1361,7 @@ static void decrease_priority_inheritance(struct task_struct* t, #endif raw_spin_unlock(&gsnedf_lock); - + #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) if(tsk_rt(t)->held_gpus) { int i; diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c index 9a6fe487718e..62854b576796 100644 --- a/litmus/sched_litmus.c +++ b/litmus/sched_litmus.c @@ -177,8 +177,10 @@ static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, litmus->task_wake_up(p); rq->litmus.nr_running++; - } else + } else { TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); + //WARN_ON(1); + } } static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index d24c9167cff8..f9423861eb1f 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -31,11 +31,19 @@ void preempt_if_preemptable(struct task_struct* t, int cpu) int reschedule = 0; - if (!t) + TRACE_CUR("preempt_if_preemptable: %s/%d\n", + (t) ? t->comm : "(nil)", + (t) ? t->pid : 0); + + if (!t) { + TRACE_CUR("unconditionally reshcedule\n"); /* move non-real-time task out of the way */ reschedule = 1; + } else { if (smp_processor_id() == cpu) { + TRACE_CUR("preempt local cpu.\n"); + /* local CPU case */ /* check if we need to poke userspace */ if (is_user_np(t)) @@ -47,14 +55,22 @@ void preempt_if_preemptable(struct task_struct* t, int cpu) * currently-executing task */ reschedule = 1; } else { + int is_knp = is_kernel_np(t); + int reqexit = request_exit_np_atomic(t); + TRACE_CUR("preempt remote cpu: isknp = %d reqexit = %d\n", is_knp, reqexit); + /* Remote CPU case. Only notify if it's not a kernel * NP section and if we didn't set the userspace * flag. */ - reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); + //reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); + reschedule = !(is_knp || reqexit); } } - if (likely(reschedule)) + + if (likely(reschedule)) { + TRACE_CUR("calling litmus_reschedule()\n"); litmus_reschedule(cpu); + } } -- cgit v1.2.2 From c58a74c8ad2d2b1b01be12afb9bac58dfef0d16a Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 16 Sep 2012 18:29:36 -0400 Subject: Added CONFIG_REALTIME_AUX_TASKS option Auxillary task features were enabled by CONFIG_LITMUS_LOCKING. Made auxillary tasks a seperate feature that depends upon CONFIG_LITMUS_LOCKING. --- include/linux/sched.h | 2 ++ include/litmus/rt_param.h | 4 +++- include/litmus/unistd_32.h | 2 +- include/litmus/unistd_64.h | 6 +++--- litmus/Kconfig | 31 +++++++++++++++++++++++++++++++ litmus/Makefile | 3 ++- litmus/aux_tasks.c | 23 ++++++++++------------- litmus/edf_common.c | 29 ++++++++++++++++++----------- litmus/litmus.c | 8 ++------ litmus/sched_gsn_edf.c | 26 ++++++++++++++++++-------- 10 files changed, 90 insertions(+), 44 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5d1c041be809..d580959f9f5c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1534,7 +1534,9 @@ struct task_struct { /*** LITMUS RT parameters and state ***/ struct rt_param rt_param; +#ifdef CONFIG_REALTIME_AUX_TASKS struct aux_data aux_data; +#endif /*****/ /* references to PI semaphores, etc. */ diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index c45ba23d7650..8b9e14c461dc 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -285,7 +285,7 @@ struct rt_param { #endif -#ifdef CONFIG_LITMUS_LOCKING +#ifdef CONFIG_REALTIME_AUX_TASKS unsigned int is_aux_task:1; unsigned int has_aux_tasks:1; @@ -358,12 +358,14 @@ struct rt_param { struct control_page * ctrl_page; }; +#ifdef CONFIG_REALTIME_AUX_TASKS struct aux_data { struct list_head aux_tasks; struct binheap aux_task_owners; unsigned int initialized:1; }; +#endif /* Possible RT flags */ #define RT_F_RUNNING 0x00000000 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index 4fd92956d13f..c86b743408ed 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h @@ -21,6 +21,6 @@ #define __NR_litmus_dgl_unlock __LSC(13) #define __NR_register_nv_device __LSC(14) -#define __NR_slave_non_rt_threads _LSC(15) +#define __NR_set_aux_tasks _LSC(15) #define NR_litmus_syscalls 16 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index abb45c181e8e..3825bc129dbd 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h @@ -33,10 +33,10 @@ __SYSCALL(__NR_null_call, sys_null_call) __SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock) #define __NR_litmus_dgl_unlock __LSC(13) __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) -#define __NR_register_nv_device __LSC(14) +#define __NR_register_nv_device __LSC(14) __SYSCALL(__NR_register_nv_device, sys_register_nv_device) -#define __NR_slave_non_rt_threads __LSC(15) -__SYSCALL(__NR_slave_non_rt_threads, sys_slave_non_rt_threads) +#define __NR_set_aux_tasks __LSC(15) +__SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks) #define NR_litmus_syscalls 16 diff --git a/litmus/Kconfig b/litmus/Kconfig index 95e0671e2aec..c5dbc4a176ae 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -34,6 +34,37 @@ config RELEASE_MASTER (http://www.cs.unc.edu/~anderson/papers.html). Currently only supported by GSN-EDF. +config REALTIME_AUX_TASKS + bool "Real-Time Auxillary Tasks" + depends on LITMUS_LOCKING + default n + help + Adds a system call that forces all non-real-time threads in a process + to become auxillary real-time tasks. These tasks inherit the priority of + the highest-prio *BLOCKED* real-time task (non-auxillary) in the process. + This allows the integration of COTS code that has background helper threads + used primarily for message passing and synchronization. If these + background threads are NOT real-time scheduled, then unbounded priority + inversions may occur if a real-time task blocks on a non-real-time thread. + + Beware of the following pitfalls: + 1) Auxillary threads should not be CPU intensive. They should mostly + block on mutexes and condition variables. Violating this will + likely prevent meaningful analysis. + 2) Since there may be more than one auxillary thread per process, + priority inversions may occur with respect to single-threaded + task models if/when one of threads are scheduled simultanously + with another of the same identity. + 3) Busy-wait deadlock is likely between normal real-time tasks and + auxillary tasks synchronize using _preemptive_ spinlocks that do + not use priority inheritance. + + These pitfalls are mitgated by the fact that auxillary tasks only + inherit priorities from blocked tasks (Blocking signifies that the + blocked task _may_ be waiting on an auxillary task to perform some + work.). Futher, auxillary tasks without an inherited priority are + _always_ scheduled with a priority less than any normal real-time task!! + endmenu menu "Real-Time Synchronization" diff --git a/litmus/Makefile b/litmus/Makefile index f2dd7be7ae4a..67d8b8ee72bc 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -18,6 +18,7 @@ obj-y = sched_plugin.o litmus.o \ bheap.o \ binheap.o \ ctrldev.o \ + aux_tasks.o \ sched_gsn_edf.o \ sched_psn_edf.o \ sched_pfp.o @@ -31,7 +32,7 @@ obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o -obj-$(CONFIG_LITMUS_LOCKING) += aux_tasks.o kfmlp_lock.o +obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c index 5057137bbbea..b0617accdf7f 100644 --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -1,8 +1,8 @@ -#ifdef CONFIG_LITMUS_LOCKING - #include #include #include + +#ifdef CONFIG_REALTIME_AUX_TASKS #include #include @@ -23,14 +23,11 @@ static int admit_aux_task(struct task_struct *t) * fail-safe. */ struct rt_task tp = { - //.period = MAGIC_AUX_TASK_PERIOD, - //.relative_deadline = MAGIC_AUX_TASK_PERIOD, - .period = 1000000, /* has to wait 1 ms before it can run again once it has exhausted budget */ + .period = 1000000, /* 1ms */ .relative_deadline = 1000000, .exec_cost = 1000000, /* allow full utilization */ .phase = 0, .cpu = task_cpu(leader), /* take CPU of group leader */ - //.budget_policy = NO_ENFORCEMENT, .budget_policy = QUANTUM_ENFORCEMENT, .budget_signal_policy = NO_SIGNALS, .cls = RT_CLASS_BEST_EFFORT @@ -280,7 +277,7 @@ static int aux_task_owner_max_priority_order(struct binheap_node *a, } -static long __do_enable_slave_non_rt_threads(void) +static long __do_enable_aux_tasks(void) { long retval = 0; struct task_struct *leader; @@ -344,7 +341,7 @@ static long __do_enable_slave_non_rt_threads(void) return retval; } -static long __do_disable_slave_non_rt_threads(void) +static long __do_disable_aux_tasks(void) { long retval = 0; struct task_struct *leader; @@ -385,17 +382,17 @@ static long __do_disable_slave_non_rt_threads(void) return retval; } -asmlinkage long sys_slave_non_rt_threads(int enable) +asmlinkage long sys_set_aux_tasks(int enable) { long retval; read_lock_irq(&tasklist_lock); if (enable) { - retval = __do_enable_slave_non_rt_threads(); + retval = __do_enable_aux_tasks(); } else { - retval = __do_disable_slave_non_rt_threads(); + retval = __do_disable_aux_tasks(); } read_unlock_irq(&tasklist_lock); @@ -405,9 +402,9 @@ asmlinkage long sys_slave_non_rt_threads(int enable) #else -asmlinkage long sys_slave_non_rt_tasks(int enable) +asmlinkage long sys_set_aux_tasks(int enable) { - printk("Unsupported. Recompile with CONFIG_LITMUS_LOCKING.\n"); + printk("Unsupported. Recompile with CONFIG_REALTIME_AUX_TASKS.\n"); return -EINVAL; } diff --git a/litmus/edf_common.c b/litmus/edf_common.c index ca06f6ec103e..7e0d3a5d0c4d 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -22,7 +22,7 @@ #include #endif -//#ifdef CONFIG_EDF_TIE_BREAK_HASH +#if defined(CONFIG_EDF_TIE_BREAK_HASH) || defined(CONFIG_REALTIME_AUX_TASKS) #include static inline long edf_hash(struct task_struct *t) { @@ -41,8 +41,9 @@ static inline long edf_hash(struct task_struct *t) */ return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32); } -//#endif +#endif +#ifdef CONFIG_REALTIME_AUX_TASKS int aux_tie_break(struct task_struct *first, struct task_struct *second) { long fhash = edf_hash(first); @@ -57,6 +58,7 @@ int aux_tie_break(struct task_struct *first, struct task_struct *second) } return 0; } +#endif /* edf_higher_prio - returns true if first has a higher EDF priority @@ -75,11 +77,6 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) struct task_struct *first_task = first; struct task_struct *second_task = second; - int first_lo_aux; - int second_lo_aux; - int first_hi_aux; - int second_hi_aux; - /* There is no point in comparing a task to itself. */ if (first && first == second) { TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid); @@ -93,8 +90,14 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return first && !second; } -#ifdef CONFIG_LITMUS_LOCKING +#ifdef CONFIG_REALTIME_AUX_TASKS + { + /* statically prioritize all auxillary tasks that have no inheritance + * below all other regular real-time tasks. + */ + int first_lo_aux, second_lo_aux; + int first_hi_aux, second_hi_aux; first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task; second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task; @@ -120,8 +123,10 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break); return aux_hi_tie_break; } + } +#endif - +#ifdef CONFIG_LITMUS_LOCKING /* Check for EFFECTIVE priorities. Change task * used for comparison in such a case. */ @@ -233,11 +238,13 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return 1; } #endif + +#ifdef CONFIG_REALTIME_AUX_TASKS + /* is this dead code? */ if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) { - TRACE_CUR("AUX BREAK!\n"); return 1; } - +#endif /* Something could be wrong if you get this far. */ if (unlikely(first->rt_param.inh_task == diff --git a/litmus/litmus.c b/litmus/litmus.c index e2bf2a7ad01b..d368202ab8c3 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -25,7 +25,7 @@ #include #endif -#ifdef CONFIG_LITMUS_LOCKING +#ifdef CONFIG_REALTIME_AUX_TASKS #include #endif @@ -413,7 +413,7 @@ static void reinit_litmus_state(struct task_struct* p, int restore) /* Cleanup everything else. */ memset(&p->rt_param, 0, sizeof(p->rt_param)); -#ifdef CONFIG_LITMUS_LOCKING +#ifdef CONFIG_REALTIME_AUX_TASKS /* also clear out the aux_data. the !restore case is only called on * fork (initial thread creation). */ if (!restore) @@ -623,10 +623,6 @@ void litmus_fork(struct task_struct* p) tsk_rt(p)->ctrl_page = NULL; reinit_litmus_state(p, 0); - - /* still don't inherit any parental parameters */ - //memset(&p->rt_param, 0, sizeof(p->rt_param)); - //memset(&p->aux_data, 0, sizeof(p->aux_data)); } /* od tables are never inherited across a fork */ diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 270e06c20bbf..5fc330f14a0e 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -29,7 +29,6 @@ #ifdef CONFIG_LITMUS_LOCKING #include -#include #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING @@ -41,6 +40,10 @@ #include #endif +#ifdef CONFIG_REALTIME_AUX_TASKS +#include +#endif + #ifdef CONFIG_LITMUS_SOFTIRQD #include #endif @@ -313,15 +316,15 @@ static noinline void requeue(struct task_struct* task) BUG_ON(is_queued(task)); if (is_released(task, litmus_clock())) { - +#ifdef CONFIG_REALTIME_AUX_TASKS if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) { /* aux_task probably transitioned to real-time while it was blocked */ TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); unlink(task); /* really needed? */ } - else { + else +#endif __add_ready(&gsnedf, task); - } } else { /* it has got to wait */ @@ -1046,11 +1049,12 @@ static void gsnedf_task_wake_up(struct task_struct *task) set_rt_flags(task, RT_F_RUNNING); #endif +#ifdef CONFIG_REALTIME_AUX_TASKS if (tsk_rt(task)->has_aux_tasks) { - TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); disable_aux_task_owner(task); } +#endif gsnedf_job_arrival(task); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); @@ -1067,11 +1071,13 @@ static void gsnedf_task_block(struct task_struct *t) unlink(t); +#ifdef CONFIG_REALTIME_AUX_TASKS if (tsk_rt(t)->has_aux_tasks) { TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); enable_aux_task_owner(t); } +#endif raw_spin_unlock_irqrestore(&gsnedf_lock, flags); @@ -1087,7 +1093,7 @@ static void gsnedf_task_exit(struct task_struct * t) gsnedf_change_prio_pai_tasklet(t, NULL); #endif -#ifdef CONFIG_LITMUS_LOCKING +#ifdef CONFIG_REALTIME_AUX_TASKS if (tsk_rt(t)->is_aux_task) { exit_aux_task(t); /* cannot be called with gsnedf_lock held */ } @@ -1096,7 +1102,7 @@ static void gsnedf_task_exit(struct task_struct * t) /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); -#ifdef CONFIG_LITMUS_LOCKING +#ifdef CONFIG_REALTIME_AUX_TASKS /* make sure we clean up on our way out */ if(tsk_rt(t)->has_aux_tasks) { disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */ @@ -1209,11 +1215,12 @@ static int __increase_priority_inheritance(struct task_struct* t, check_for_preemptions(); } - +#ifdef CONFIG_REALTIME_AUX_TASKS /* propagate to aux tasks */ if (tsk_rt(t)->has_aux_tasks) { aux_task_owner_increase_priority(t); } +#endif } #ifdef CONFIG_LITMUS_NESTED_LOCKING } @@ -1319,10 +1326,13 @@ static int __decrease_priority_inheritance(struct task_struct* t, raw_spin_unlock(&gsnedf.release_lock); } +#ifdef CONFIG_REALTIME_AUX_TASKS /* propagate to aux tasks */ if (tsk_rt(t)->has_aux_tasks) { aux_task_owner_decrease_priority(t); } +#endif + #ifdef CONFIG_LITMUS_NESTED_LOCKING } else { -- cgit v1.2.2 From 6b3b85da89aee11ed47369833470b9282dd5994f Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 16 Sep 2012 18:45:05 -0400 Subject: C-EDF support for auxillary tasks. Extended auxillary task support to C-EDF. Modeld after G-EDF. --- litmus/sched_cedf.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++--- litmus/sched_gsn_edf.c | 6 ++-- 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index f030f027b486..f5c9807090a1 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -58,6 +58,10 @@ #include #endif +#ifdef CONFIG_REALTIME_AUX_TASKS +#include +#endif + /* to configure the cluster size */ #include @@ -313,7 +317,15 @@ static noinline void requeue(struct task_struct* task) BUG_ON(is_queued(task)); if (is_released(task, litmus_clock())) - __add_ready(&cluster->domain, task); +#ifdef CONFIG_REALTIME_AUX_TASKS + if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) { + /* aux_task probably transitioned to real-time while it was blocked */ + TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); + unlink(task); /* really needed? */ + } + else +#endif + __add_ready(&cluster->domain, task); else { /* it has got to wait */ add_release(&cluster->domain, task); @@ -1019,9 +1031,14 @@ static void cedf_task_wake_up(struct task_struct *task) set_rt_flags(task, RT_F_RUNNING); // periodic model #endif - if(tsk_rt(task)->linked_on == NO_CPU) - cedf_job_arrival(task); +#ifdef CONFIG_REALTIME_AUX_TASKS + if (tsk_rt(task)->has_aux_tasks) { + TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); + disable_aux_task_owner(task); + } +#endif + cedf_job_arrival(task); raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); } @@ -1036,7 +1053,17 @@ static void cedf_task_block(struct task_struct *t) /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->cluster_lock, flags); + unlink(t); + +#ifdef CONFIG_REALTIME_AUX_TASKS + if (tsk_rt(t)->has_aux_tasks) { + + TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); + enable_aux_task_owner(t); + } +#endif + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); BUG_ON(!is_realtime(t)); @@ -1052,8 +1079,22 @@ static void cedf_task_exit(struct task_struct * t) cedf_change_prio_pai_tasklet(t, NULL); #endif +#ifdef CONFIG_REALTIME_AUX_TASKS + if (tsk_rt(t)->is_aux_task) { + exit_aux_task(t); /* cannot be called with gsnedf_lock held */ + } +#endif + /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->cluster_lock, flags); + +#ifdef CONFIG_REALTIME_AUX_TASKS + /* make sure we clean up on our way out */ + if(tsk_rt(t)->has_aux_tasks) { + disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */ + } +#endif + unlink(t); if (tsk_rt(t)->scheduled_on != NO_CPU) { cpu_entry_t *cpu; @@ -1092,8 +1133,16 @@ static int __increase_priority_inheritance(struct task_struct* t, int success = 1; int linked_on; int check_preempt = 0; + cedf_domain_t* cluster; - cedf_domain_t* cluster = task_cpu_cluster(t); + if (prio_inh && prio_inh == effective_priority(t)) { + /* relationship already established. */ + TRACE_TASK(t, "already has effective priority of %s/%d\n", + prio_inh->comm, prio_inh->pid); + goto out; + } + + cluster = task_cpu_cluster(t); #ifdef CONFIG_LITMUS_NESTED_LOCKING /* this sanity check allows for weaker locking in protocols */ @@ -1155,6 +1204,13 @@ static int __increase_priority_inheritance(struct task_struct* t, &cluster->domain.ready_queue); check_for_preemptions(cluster); } + +#ifdef CONFIG_REALTIME_AUX_TASKS + /* propagate to aux tasks */ + if (tsk_rt(t)->has_aux_tasks) { + aux_task_owner_increase_priority(t); + } +#endif } #ifdef CONFIG_LITMUS_NESTED_LOCKING } @@ -1170,6 +1226,8 @@ static int __increase_priority_inheritance(struct task_struct* t, success = 0; } #endif + +out: return success; } @@ -1211,6 +1269,15 @@ static int __decrease_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { int success = 1; + + if (prio_inh == tsk_rt(t)->inh_task) { + /* relationship already established. */ + TRACE_TASK(t, "already inherits priority from %s/%d\n", + (prio_inh) ? prio_inh->comm : "(nil)", + (prio_inh) ? prio_inh->pid : 0); + goto out; + } + #ifdef CONFIG_LITMUS_NESTED_LOCKING if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { #endif @@ -1248,6 +1315,14 @@ static int __decrease_priority_inheritance(struct task_struct* t, } raw_spin_unlock(&cluster->domain.release_lock); } + +#ifdef CONFIG_REALTIME_AUX_TASKS + /* propagate to aux tasks */ + if (tsk_rt(t)->has_aux_tasks) { + aux_task_owner_decrease_priority(t); + } +#endif + #ifdef CONFIG_LITMUS_NESTED_LOCKING } else { @@ -1261,6 +1336,8 @@ static int __decrease_priority_inheritance(struct task_struct* t, success = 0; } #endif + +out: return success; } diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 5fc330f14a0e..ed9b4697a5a2 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -170,7 +170,6 @@ struct tasklet_head gsnedf_pending_tasklets; * TRACE() log. #define WANT_ALL_SCHED_EVENTS */ -//#define WANT_ALL_SCHED_EVENTS static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) { @@ -370,8 +369,7 @@ static void check_for_preemptions(void) &per_cpu(gsnedf_cpu_entries, task_cpu(task))); if (affinity) last = affinity; - - if (requeue_preempted_job(last->linked)) + else if (requeue_preempted_job(last->linked)) requeue(last->linked); } #else @@ -467,9 +465,11 @@ static void gsnedf_tick(struct task_struct* t) } } + /* if(is_realtime(t)) { TRACE_TASK(t, "tick %llu\n", litmus_clock()); } + */ } -- cgit v1.2.2 From 2d1fe1a20a9f2784ec4172429f31c228274ed8ac Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 16 Sep 2012 20:05:32 -0400 Subject: Add support for CUDA 5.0 (release candidate) --- litmus/Kconfig | 10 ++++++++-- litmus/nvidia_info.c | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/litmus/Kconfig b/litmus/Kconfig index c5dbc4a176ae..b28fe2c09acd 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -435,11 +435,17 @@ config NV_MAX_SIMULT_USERS choice prompt "CUDA/Driver Version Support" - default CUDA_4_0 + default CUDA_5_0 depends on LITMUS_NVIDIA help Select the version of CUDA/driver to support. - + +config CUDA_5_0 + bool "CUDA 5.0" + depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS + help + Support CUDA 5.0 RCx (dev. driver version: x86_64-304.33) + config CUDA_4_0 bool "CUDA 4.0" depends on LITMUS_NVIDIA diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index b6ead58802f6..d04c6efa5f05 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -31,6 +31,9 @@ typedef union typedef struct { NvU64 address; +#ifdef CONFIG_CUDA_5_0 + NvU64 strapped_size; +#endif NvU64 size; NvU32 offset; NvU32 *map; @@ -42,7 +45,9 @@ typedef struct void *priv; /* private data */ void *os_state; /* os-specific device state */ +#ifndef CONFIG_CUDA_5_0 int rmInitialized; +#endif int flags; /* PCI config info */ @@ -101,7 +106,7 @@ typedef struct litmus_nv_linux_state_s { void *pci_cfgchk_sp; void *isr_bh_sp; -#ifdef CONFIG_CUDA_4_0 +#if defined(CONFIG_CUDA_4_0) || defined(CONFIG_CUDA_5_0) char registry_keys[512]; #endif -- cgit v1.2.2 From 0fb745065f08796fe4f17acb9b9edacc1e374842 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Mon, 17 Sep 2012 09:57:37 -0400 Subject: Do processor state transitions in schedule_tail(). Fixes a bug in Litmus where processor scheduling states could become corrupted. Corruption can occur when a just-forked thread is externally forced to be scheduled by SCHED_LITMUS before this just-forked thread can complete post-fork processing. Specifically, before schedule_tail() has completed. --- kernel/sched.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 2229d0deec4b..65aba7ec564d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3163,16 +3163,26 @@ static inline void post_schedule(struct rq *rq) asmlinkage void schedule_tail(struct task_struct *prev) __releases(rq->lock) { - struct rq *rq = this_rq(); - + struct rq *rq; + + preempt_disable(); + + rq = this_rq(); finish_task_switch(rq, prev); + sched_trace_task_switch_to(current); + /* * FIXME: do we need to worry about rq being invalidated by the * task_switch? */ post_schedule(rq); + if (sched_state_validate_switch()) + litmus_reschedule_local(); + + preempt_enable(); + #ifdef __ARCH_WANT_UNLOCKED_CTXSW /* In this case, finish_task_switch does not reenable preemption */ preempt_enable(); -- cgit v1.2.2 From ba54b1096870fba6e3bbb99aafc713e76b747353 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Mon, 17 Sep 2012 19:31:04 -0400 Subject: Fixed three bugs with aux threads and nested locks Fixes two bugs with nested locks: 1) List of aux threads could become corrupted. -- moved modifications to be within scheduler lock. 2) Fixed bad EDF comparison ordering that could lead to schedule thrashing in an infinite loop. 3) Prevent aux threads from inheriting a priority from a task that is blocked on a real-time litmus lock. (since the aux threads can't possibly hold these locks, we don't have to worry about inheritance.) --- include/litmus/locking.h | 3 +++ include/litmus/rt_param.h | 1 + litmus/aux_tasks.c | 58 ++++++++++++++++++++++++++++++----------------- litmus/edf_common.c | 10 ++++++++ litmus/ikglp_lock.c | 2 +- litmus/kfmlp_lock.c | 2 +- litmus/locking.c | 29 ++++++++++++++++++++++++ litmus/rsm_lock.c | 2 +- litmus/sched_cedf.c | 17 ++++++-------- litmus/sched_gsn_edf.c | 19 +++++++--------- 10 files changed, 98 insertions(+), 45 deletions(-) diff --git a/include/litmus/locking.h b/include/litmus/locking.h index 296bbf6f7af0..4a5f198a0407 100644 --- a/include/litmus/locking.h +++ b/include/litmus/locking.h @@ -156,5 +156,8 @@ struct litmus_lock_ops { #endif +void suspend_for_lock(void); + + #endif diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 8b9e14c461dc..44f85a366574 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -288,6 +288,7 @@ struct rt_param { #ifdef CONFIG_REALTIME_AUX_TASKS unsigned int is_aux_task:1; unsigned int has_aux_tasks:1; + unsigned int hide_from_aux_tasks:1; struct list_head aux_task_node; struct binheap_node aux_task_owner_node; diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c index b0617accdf7f..efda7dc0bd76 100644 --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -55,7 +55,7 @@ int exit_aux_task(struct task_struct *t) tsk_rt(t)->is_aux_task = 0; if (tsk_rt(t)->inh_task) { - litmus->decrease_prio(t, NULL); + litmus->__decrease_prio(t, NULL); } return retval; @@ -114,31 +114,37 @@ int aux_task_owner_increase_priority(struct task_struct *t) int retval = 0; struct task_struct *leader; struct task_struct *hp = NULL; + struct task_struct *hp_eff = NULL; - BUG_ON(!tsk_rt(t)->has_aux_tasks); BUG_ON(!is_realtime(t)); - BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)); + BUG_ON(!tsk_rt(t)->has_aux_tasks); leader = t->group_leader; + if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { + WARN_ON(!is_running(t)); + TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n", + t->comm, t->pid, leader->comm, leader->pid); + goto out; + } + TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid); hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); + hp_eff = effective_priority(hp); - if (hp == t) { - goto out; // already hp, nothing to do. + if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */ + binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); } - binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); - hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); - if (hp == t) { + if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */ + hp_eff = effective_priority(hp); TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); - retval = aux_tasks_increase_priority(leader, - (tsk_rt(hp)->inh_task) ? tsk_rt(hp)->inh_task : hp); + retval = aux_tasks_increase_priority(leader, hp_eff); } out: @@ -150,30 +156,41 @@ int aux_task_owner_decrease_priority(struct task_struct *t) int retval = 0; struct task_struct *leader; struct task_struct *hp = NULL; - struct task_struct *new_hp = NULL; + struct task_struct *hp_eff = NULL; - BUG_ON(!tsk_rt(t)->has_aux_tasks); BUG_ON(!is_realtime(t)); - BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)); + BUG_ON(!tsk_rt(t)->has_aux_tasks); leader = t->group_leader; + if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { + WARN_ON(!is_running(t)); + TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n", + t->comm, t->pid, leader->comm, leader->pid); + goto out; + } + TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid); hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); + hp_eff = effective_priority(hp); binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node); - new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), - struct task_struct, rt_param); - if (hp == t && new_hp != t) { - TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); - retval = aux_tasks_decrease_priority(leader, - (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp); + if (hp == t) { /* t was originally the hp */ + struct task_struct *new_hp = + container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */ + hp_eff = effective_priority(new_hp); + TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + retval = aux_tasks_decrease_priority(leader, hp_eff); + } } +out: return retval; } @@ -302,8 +319,7 @@ static long __do_enable_aux_tasks(void) tsk_rt(t)->task_params.period); /* inspect heap_node to see if it is an rt task */ - if (tsk_rt(t)->task_params.period == 0) { //|| - // tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) { + if (tsk_rt(t)->task_params.period == 0) { if (!tsk_rt(t)->is_aux_task) { TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 7e0d3a5d0c4d..f4881452373d 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -237,6 +237,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) second_task->rt_param.is_proxy_thread) { return 1; } + else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) { #endif #ifdef CONFIG_REALTIME_AUX_TASKS @@ -244,6 +245,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) { return 1; } + else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) { #endif /* Something could be wrong if you get this far. */ @@ -281,6 +283,14 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return 1; } } + +#ifdef CONFIG_REALTIME_AUX_TASKS + } +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + } +#endif + } } } diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 83b708ab85cb..bd7bfc0f48ac 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c @@ -963,7 +963,7 @@ int ikglp_lock(struct litmus_lock* l) TS_LOCK_SUSPEND; - schedule(); + suspend_for_lock(); TS_LOCK_RESUME; diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index bff857ed8d4e..ab472330095d 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c @@ -267,7 +267,7 @@ int kfmlp_lock(struct litmus_lock* l) * when we wake up; we are guaranteed to have the lock since * there is only one wake up per release (or steal). */ - schedule(); + suspend_for_lock(); if(my_queue->owner == t) { diff --git a/litmus/locking.c b/litmus/locking.c index 12a23eb715cc..16c936ba8139 100644 --- a/litmus/locking.c +++ b/litmus/locking.c @@ -540,6 +540,35 @@ out: return passed; } + +void suspend_for_lock(void) +{ +#ifdef CONFIG_REALTIME_AUX_TASKS + unsigned int restore = 0; + struct task_struct *t = current; + unsigned int hide; + + if (tsk_rt(t)->has_aux_tasks) { + /* hide from aux tasks so they can't inherit our priority when we block + * for a litmus lock. inheritance is already going to a litmus lock + * holder. */ + hide = tsk_rt(t)->hide_from_aux_tasks; + restore = 1; + tsk_rt(t)->hide_from_aux_tasks = 1; + } +#endif + + schedule(); + +#ifdef CONFIG_REALTIME_AUX_TASKS + if (restore) { + /* restore our state */ + tsk_rt(t)->hide_from_aux_tasks = hide; + } +#endif +} + + #else // CONFIG_LITMUS_LOCKING struct fdso_ops generic_lock_ops = {}; diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c index 75ed87c5ed48..3dfd8ae9d221 100644 --- a/litmus/rsm_lock.c +++ b/litmus/rsm_lock.c @@ -289,7 +289,7 @@ int rsm_mutex_lock(struct litmus_lock* l) * there is only one wake up per release. */ - schedule(); + suspend_for_lock(); TS_LOCK_RESUME; diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index f5c9807090a1..6746d4d6033e 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -1032,7 +1032,7 @@ static void cedf_task_wake_up(struct task_struct *task) #endif #ifdef CONFIG_REALTIME_AUX_TASKS - if (tsk_rt(task)->has_aux_tasks) { + if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) { TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); disable_aux_task_owner(task); } @@ -1057,7 +1057,7 @@ static void cedf_task_block(struct task_struct *t) unlink(t); #ifdef CONFIG_REALTIME_AUX_TASKS - if (tsk_rt(t)->has_aux_tasks) { + if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); enable_aux_task_owner(t); @@ -1079,19 +1079,16 @@ static void cedf_task_exit(struct task_struct * t) cedf_change_prio_pai_tasklet(t, NULL); #endif -#ifdef CONFIG_REALTIME_AUX_TASKS - if (tsk_rt(t)->is_aux_task) { - exit_aux_task(t); /* cannot be called with gsnedf_lock held */ - } -#endif - /* unlink if necessary */ raw_spin_lock_irqsave(&cluster->cluster_lock, flags); #ifdef CONFIG_REALTIME_AUX_TASKS /* make sure we clean up on our way out */ - if(tsk_rt(t)->has_aux_tasks) { - disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */ + if (unlikely(tsk_rt(t)->is_aux_task)) { + exit_aux_task(t); + } + else if(tsk_rt(t)->has_aux_tasks) { + disable_aux_task_owner(t); } #endif diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index ed9b4697a5a2..04b189e54b03 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -1050,7 +1050,7 @@ static void gsnedf_task_wake_up(struct task_struct *task) #endif #ifdef CONFIG_REALTIME_AUX_TASKS - if (tsk_rt(task)->has_aux_tasks) { + if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) { TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); disable_aux_task_owner(task); } @@ -1072,7 +1072,7 @@ static void gsnedf_task_block(struct task_struct *t) unlink(t); #ifdef CONFIG_REALTIME_AUX_TASKS - if (tsk_rt(t)->has_aux_tasks) { + if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); enable_aux_task_owner(t); @@ -1093,19 +1093,16 @@ static void gsnedf_task_exit(struct task_struct * t) gsnedf_change_prio_pai_tasklet(t, NULL); #endif -#ifdef CONFIG_REALTIME_AUX_TASKS - if (tsk_rt(t)->is_aux_task) { - exit_aux_task(t); /* cannot be called with gsnedf_lock held */ - } -#endif - /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); #ifdef CONFIG_REALTIME_AUX_TASKS /* make sure we clean up on our way out */ - if(tsk_rt(t)->has_aux_tasks) { - disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */ + if (unlikely(tsk_rt(t)->is_aux_task)) { + exit_aux_task(t); + } + else if(tsk_rt(t)->has_aux_tasks) { + disable_aux_task_owner(t); } #endif @@ -1664,7 +1661,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l) * there is only one wake up per release. */ - schedule(); + suspend_for_lock(); TS_LOCK_RESUME; -- cgit v1.2.2 From 33cb64c787070d6b60a02ea40064d717d3b9dc07 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Mon, 17 Sep 2012 19:42:28 -0400 Subject: Description of refined aux task inheritance. --- litmus/Kconfig | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/litmus/Kconfig b/litmus/Kconfig index b28fe2c09acd..a32f42898148 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -41,11 +41,12 @@ config REALTIME_AUX_TASKS help Adds a system call that forces all non-real-time threads in a process to become auxillary real-time tasks. These tasks inherit the priority of - the highest-prio *BLOCKED* real-time task (non-auxillary) in the process. - This allows the integration of COTS code that has background helper threads - used primarily for message passing and synchronization. If these - background threads are NOT real-time scheduled, then unbounded priority - inversions may occur if a real-time task blocks on a non-real-time thread. + the highest-prio *BLOCKED* (but NOT blocked on a Litmus lock) real-time + task (non-auxillary) in the process. This allows the integration of COTS + code that has background helper threads used primarily for message passing + and synchronization. If these background threads are NOT real-time scheduled, + then unbounded priority inversions may occur if a real-time task blocks on + a non-real-time thread. Beware of the following pitfalls: 1) Auxillary threads should not be CPU intensive. They should mostly @@ -65,6 +66,14 @@ config REALTIME_AUX_TASKS work.). Futher, auxillary tasks without an inherited priority are _always_ scheduled with a priority less than any normal real-time task!! + NOTE: Aux tasks do not _directly_ inherit a priority from rt tasks that + are blocked on Litmus locks. Aux task should be COTS code that know nothing + of Litmus, so they won't hold Litmus locks. Nothing the aux task can do can + _directly_ unblock the rt task blocked on a Litmus lock. However, the lock + holder that blocks the rt task CAN block on I/O and contribute its priority + to the aux tasks. Aux tasks may still _indirectly_ inherit the priority of + the blocked rt task via the lock holder. + endmenu menu "Real-Time Synchronization" -- cgit v1.2.2 From 5dbf603e3ca76a3903b83aacc6bb19e6aa0d924a Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 11 Nov 2012 11:43:03 -0500 Subject: boost aux tasks unconditionally --- litmus/aux_tasks.c | 26 +++++++++++++++++++++++++- litmus/edf_common.c | 24 ++++++++++++++++++++---- litmus/litmus.c | 4 ++-- litmus/locking.c | 4 ++++ 4 files changed, 51 insertions(+), 7 deletions(-) diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c index efda7dc0bd76..9572d960b46a 100644 --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -50,6 +50,7 @@ int exit_aux_task(struct task_struct *t) TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); +#if 0 list_del(&tsk_rt(t)->aux_task_node); tsk_rt(t)->is_aux_task = 0; @@ -57,6 +58,7 @@ int exit_aux_task(struct task_struct *t) if (tsk_rt(t)->inh_task) { litmus->__decrease_prio(t, NULL); } +#endif return retval; } @@ -64,6 +66,8 @@ int exit_aux_task(struct task_struct *t) static int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp) { int retval = 0; + +#if 0 struct list_head *pos; TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); @@ -81,6 +85,7 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid); retval = litmus->__increase_prio(aux, hp); } +#endif return retval; } @@ -88,6 +93,8 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp) { int retval = 0; + +#if 0 struct list_head *pos; TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); @@ -105,6 +112,7 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s retval = litmus->__decrease_prio(aux, hp); } } +#endif return retval; } @@ -112,6 +120,8 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s int aux_task_owner_increase_priority(struct task_struct *t) { int retval = 0; + +#if 0 struct task_struct *leader; struct task_struct *hp = NULL; struct task_struct *hp_eff = NULL; @@ -146,6 +156,7 @@ int aux_task_owner_increase_priority(struct task_struct *t) TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); retval = aux_tasks_increase_priority(leader, hp_eff); } +#endif out: return retval; @@ -154,6 +165,8 @@ out: int aux_task_owner_decrease_priority(struct task_struct *t) { int retval = 0; + +#if 0 struct task_struct *leader; struct task_struct *hp = NULL; struct task_struct *hp_eff = NULL; @@ -189,6 +202,7 @@ int aux_task_owner_decrease_priority(struct task_struct *t) retval = aux_tasks_decrease_priority(leader, hp_eff); } } +#endif out: return retval; @@ -199,6 +213,8 @@ out: long enable_aux_task_owner(struct task_struct *t) { long retval = 0; + +#if 0 struct task_struct *leader = t->group_leader; struct task_struct *hp; @@ -226,7 +242,7 @@ long enable_aux_task_owner(struct task_struct *t) retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); } - +#endif out: return retval; @@ -235,6 +251,8 @@ out: long disable_aux_task_owner(struct task_struct *t) { long retval = 0; + +#if 0 struct task_struct *leader = t->group_leader; struct task_struct *hp; struct task_struct *new_hp = NULL; @@ -273,6 +291,7 @@ long disable_aux_task_owner(struct task_struct *t) retval = aux_tasks_decrease_priority(leader, to_inh); } +#endif out: return retval; @@ -324,7 +343,10 @@ static long __do_enable_aux_tasks(void) TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ tsk_rt(t)->is_aux_task = 1; + +#if 0 list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); +#endif (void)admit_aux_task(t); } @@ -346,6 +368,7 @@ static long __do_enable_aux_tasks(void) } while(t != leader); +#if 0 if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); @@ -353,6 +376,7 @@ static long __do_enable_aux_tasks(void) retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); } +#endif return retval; } diff --git a/litmus/edf_common.c b/litmus/edf_common.c index f4881452373d..916b1b4309b7 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -92,10 +92,20 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) #ifdef CONFIG_REALTIME_AUX_TASKS { - /* statically prioritize all auxillary tasks that have no inheritance - * below all other regular real-time tasks. - */ + /* run aux tasks at max priority */ + if (first->rt_param.is_aux_task != second->rt_param.is_aux_task) + { + return (first->rt_param.is_aux_task > second->rt_param.is_aux_task); + } + else if(first->rt_param.is_aux_task && second->rt_param.is_aux_task) + { + first = first->group_leader; + second = second->group_leader; + } + + +#if 0 int first_lo_aux, second_lo_aux; int first_hi_aux, second_hi_aux; first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task; @@ -123,6 +133,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break); return aux_hi_tie_break; } +#endif + } #endif @@ -240,12 +252,14 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) { #endif +#if 0 #ifdef CONFIG_REALTIME_AUX_TASKS /* is this dead code? */ if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) { return 1; } else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) { +#endif #endif /* Something could be wrong if you get this far. */ @@ -283,10 +297,12 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return 1; } } - +#if 0 #ifdef CONFIG_REALTIME_AUX_TASKS } #endif +#endif + #ifdef CONFIG_LITMUS_SOFTIRQD } #endif diff --git a/litmus/litmus.c b/litmus/litmus.c index d368202ab8c3..953a591fad5f 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -388,8 +388,8 @@ static void reinit_litmus_state(struct task_struct* p, int restore) #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING - WARN_ON(p->rt_param.blocked_lock); - WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks)); +// WARN_ON(p->rt_param.blocked_lock); +// WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks)); #endif #ifdef CONFIG_LITMUS_SOFTIRQD diff --git a/litmus/locking.c b/litmus/locking.c index 16c936ba8139..22f46df4308a 100644 --- a/litmus/locking.c +++ b/litmus/locking.c @@ -544,6 +544,7 @@ out: void suspend_for_lock(void) { #ifdef CONFIG_REALTIME_AUX_TASKS +#if 0 unsigned int restore = 0; struct task_struct *t = current; unsigned int hide; @@ -556,16 +557,19 @@ void suspend_for_lock(void) restore = 1; tsk_rt(t)->hide_from_aux_tasks = 1; } +#endif #endif schedule(); #ifdef CONFIG_REALTIME_AUX_TASKS +#if 0 if (restore) { /* restore our state */ tsk_rt(t)->hide_from_aux_tasks = hide; } #endif +#endif } -- cgit v1.2.2 From dede6a6b8ce09f48295d8ba4635480c98ef85284 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 11 Nov 2012 13:10:43 -0500 Subject: improve ikglp heuristics --- include/litmus/ikglp_lock.h | 4 ++ include/litmus/rt_param.h | 5 ++ litmus/ikglp_lock.c | 117 +++++++++++++++++++++++++++++++++++++++++--- litmus/litmus.c | 5 ++ 4 files changed, 123 insertions(+), 8 deletions(-) diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h index 0b89c8135360..9d0cd3d1904e 100644 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h @@ -114,6 +114,7 @@ struct ikglp_queue_info struct fifo_queue* q; lt_t estimated_len; int *nr_cur_users; + int *nr_aff_users; }; struct ikglp_affinity_ops @@ -128,6 +129,8 @@ struct ikglp_affinity_ops void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding) + + int (*notify_exit)(struct ikglp_affinity* aff, struct task_struct* t); }; struct ikglp_affinity @@ -136,6 +139,7 @@ struct ikglp_affinity struct ikglp_affinity_ops *ops; struct ikglp_queue_info *q_info; int *nr_cur_users_on_rsrc; + int *nr_aff_on_rsrc; int offset; int nr_simult; int nr_rsrc; diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 44f85a366574..cb7c3ac64339 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -188,6 +188,8 @@ typedef struct feedback_est{ #define AVG_EST_WINDOW_SIZE 20 +typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk); + typedef struct avg_est{ lt_t history[AVG_EST_WINDOW_SIZE]; uint16_t count; @@ -248,6 +250,9 @@ struct rt_param { gpu_migration_dist_t gpu_migration; int last_gpu; + notify_rsrc_exit_t rsrc_exit_cb; + void* rsrc_exit_cb_args; + lt_t accum_gpu_time; lt_t gpu_time_stamp; diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index bd7bfc0f48ac..9c57bc24e8bd 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c @@ -1896,7 +1896,18 @@ int ikglp_aff_obs_close(struct affinity_observer* obs) void ikglp_aff_obs_free(struct affinity_observer* obs) { struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); + + // make sure the thread destroying this semaphore will not + // call the exit callback on a destroyed lock. + struct task_struct *t = current; + if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff) + { + tsk_rt(t)->rsrc_exit_cb = NULL; + tsk_rt(t)->rsrc_exit_cb_args = NULL; + } + kfree(ikglp_aff->nr_cur_users_on_rsrc); + kfree(ikglp_aff->nr_aff_on_rsrc); kfree(ikglp_aff->q_info); kfree(ikglp_aff); } @@ -1960,6 +1971,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* return(NULL); } + ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); + if(!ikglp_aff->nr_aff_on_rsrc) { + kfree(ikglp_aff->nr_cur_users_on_rsrc); + kfree(ikglp_aff->q_info); + kfree(ikglp_aff); + return(NULL); + } + affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); ikglp_aff->ops = ikglp_ops; @@ -1974,6 +1993,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ikglp_aff->relax_max_fifo_len); memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); + memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); for(i = 0; i < sem->nr_replicas; ++i) { ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; @@ -1982,6 +2002,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* // multiple q_info's will point to the same resource (aka GPU) if // aff_args.nr_simult_users > 1 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; + ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; } // attach observer to the lock @@ -2035,7 +2056,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); lt_t min_len; - int min_nr_users; + int min_nr_users, min_nr_aff_users; struct ikglp_queue_info *shortest; struct fifo_queue *to_enqueue; int i; @@ -2044,11 +2065,20 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t int max_fifo_len = (aff->relax_max_fifo_len) ? sem->m : sem->max_fifo_len; - // simply pick the shortest queue if, we have no affinity, or we have - // affinity with the shortest + // if we have no affinity, find the GPU with the least number of users + // with active affinity if(unlikely(tsk_rt(t)->last_gpu < 0)) { - affinity_gpu = aff->offset; // first gpu - TRACE_CUR("no affinity\n"); + int temp_min = aff->nr_aff_on_rsrc[0]; + affinity_gpu = aff->offset; + + for(i = 1; i < aff->nr_rsrc; ++i) { + if(aff->nr_aff_on_rsrc[i] < temp_min) { + affinity_gpu = aff->offset + i; + } + } + + TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n", + affinity_gpu, temp_min); } else { affinity_gpu = tsk_rt(t)->last_gpu; @@ -2066,6 +2096,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); min_nr_users = *(shortest->nr_cur_users); + min_nr_aff_users = *(shortest->nr_aff_users); + TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", get_gpu_estimate(t, MIG_LOCAL), @@ -2088,14 +2120,21 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t // // tie-break on the shortest number of simult users. this only kicks in // when there are more than 1 empty queues. + + // TODO: Make "est_len < min_len" a fuzzy function that allows + // queues "close enough" in length to be considered equal. + if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ (est_len < min_len) || /* i-th queue has shortest length */ ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ - (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { + ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) || + ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && + (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) { shortest = &aff->q_info[i]; min_len = est_len; min_nr_users = *(aff->q_info[i].nr_cur_users); + min_nr_aff_users = *(aff->q_info[i].nr_aff_users); } TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", @@ -2612,6 +2651,51 @@ void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, // } } +int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + unsigned long flags = 0, real_flags; + int aff_rsrc; +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t *dgl_lock; + + dgl_lock = litmus->get_dgl_spinlock(t); +#endif + + if (tsk_rt(t)->last_gpu < 0) + return 0; + + raw_spin_lock_irqsave(&sem->real_lock, real_flags); + lock_global_irqsave(dgl_lock, flags); + lock_fine_irqsave(&sem->lock, flags); + + // decrement affinity count on old GPU + aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; + --(aff->nr_aff_on_rsrc[aff_rsrc]); + + if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { + WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); + aff->nr_aff_on_rsrc[aff_rsrc] = 0; + } + + unlock_fine_irqrestore(&sem->lock, flags); + unlock_global_irqrestore(dgl_lock, flags); + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); + + return 0; +} + +int gpu_ikglp_notify_exit_trampoline(struct task_struct* t) +{ + struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args; + if(likely(aff)) { + return gpu_ikglp_notify_exit(aff, t); + } + else { + return -1; + } +} + void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) @@ -2619,15 +2703,28 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); + int last_gpu = tsk_rt(t)->last_gpu; - tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration + tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", - t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration); + t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration); // count the number or resource holders ++(*(aff->q_info[replica].nr_cur_users)); + if(gpu != last_gpu) { + if(last_gpu >= 0) { + int old_rsrc = last_gpu - aff->offset; + --(aff->nr_aff_on_rsrc[old_rsrc]); + } + + // increment affinity count on new GPU + ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); + tsk_rt(t)->rsrc_exit_cb_args = aff; + tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; + } + reg_nv_device(gpu, 1, t); // register tsk_rt(t)->suspend_gpu_tracker_on_block = 0; @@ -2679,6 +2776,8 @@ struct ikglp_affinity_ops gpu_ikglp_affinity = .notify_acquired = gpu_ikglp_notify_acquired, .notify_freed = gpu_ikglp_notify_freed, + .notify_exit = gpu_ikglp_notify_exit, + .replica_to_resource = gpu_replica_to_resource, }; @@ -2817,6 +2916,8 @@ struct ikglp_affinity_ops simple_gpu_ikglp_affinity = .notify_acquired = simple_gpu_ikglp_notify_acquired, .notify_freed = simple_gpu_ikglp_notify_freed, + .notify_exit = NULL, + .replica_to_resource = gpu_replica_to_resource, }; diff --git a/litmus/litmus.c b/litmus/litmus.c index d368202ab8c3..143c746c344e 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -533,6 +533,11 @@ void litmus_exit_task(struct task_struct* tsk) if (is_realtime(tsk)) { sched_trace_task_completion(tsk, 1); + if (tsk_rt(tsk)->rsrc_exit_cb) { + int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk); + WARN_ON(ret != 0); + } + litmus->task_exit(tsk); BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); -- cgit v1.2.2 From 3ee5f13b8213270ba30e4b3625dff46b1cc8326f Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Fri, 30 Nov 2012 13:36:03 -0500 Subject: More improvements on affinity heuristics --- include/litmus/gpu_affinity.h | 16 ++++++++++ include/litmus/ikglp_lock.h | 2 +- litmus/edf_common.c | 2 +- litmus/ikglp_lock.c | 68 ++++++++++++++++++++++++++++++++----------- 4 files changed, 69 insertions(+), 19 deletions(-) diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h index d64a15cbf2a5..47da725717b0 100644 --- a/include/litmus/gpu_affinity.h +++ b/include/litmus/gpu_affinity.h @@ -31,6 +31,21 @@ static inline lt_t get_gpu_time(struct task_struct* t) static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) { + int i; + lt_t val; + + if(dist == MIG_NONE) { + dist = MIG_LOCAL; + } + + val = t->rt_param.gpu_migration_est[dist].avg; + for(i = dist-1; i >= 0; --i) { + if(t->rt_param.gpu_migration_est[i].avg > val) { + val = t->rt_param.gpu_migration_est[i].avg; + } + } + +#if 0 // int i; // fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); // lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... @@ -43,6 +58,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t // for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { // val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); // } +#endif return ((val > 0) ? val : dist+1); } diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h index 9d0cd3d1904e..89d9c37c7631 100644 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h @@ -139,7 +139,7 @@ struct ikglp_affinity struct ikglp_affinity_ops *ops; struct ikglp_queue_info *q_info; int *nr_cur_users_on_rsrc; - int *nr_aff_on_rsrc; + int64_t *nr_aff_on_rsrc; int offset; int nr_simult; int nr_rsrc; diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 916b1b4309b7..a9bf0c08e125 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -272,7 +272,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return 1; } else if (first->pid == second->pid) { - WARN_ON(1); + //WARN_ON(1); } } else { diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 9c57bc24e8bd..16ae621bbf75 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c @@ -1887,6 +1887,19 @@ static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu) return replica; } +static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b) +{ + return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b)); +} + +static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica) +{ + if(tsk_rt(t)->last_gpu >= 0) + { + return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica)); + } + return 0; +} int ikglp_aff_obs_close(struct affinity_observer* obs) { @@ -1971,7 +1984,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* return(NULL); } - ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); + ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); if(!ikglp_aff->nr_aff_on_rsrc) { kfree(ikglp_aff->nr_cur_users_on_rsrc); kfree(ikglp_aff->q_info); @@ -1993,7 +2006,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ikglp_aff->relax_max_fifo_len); memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); - memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); + memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc)); for(i = 0; i < sem->nr_replicas; ++i) { ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; @@ -2057,7 +2070,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); lt_t min_len; int min_nr_users, min_nr_aff_users; - struct ikglp_queue_info *shortest; + struct ikglp_queue_info *shortest, *aff_queue; struct fifo_queue *to_enqueue; int i; int affinity_gpu; @@ -2087,7 +2100,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t // all things being equal, let's start with the queue with which we have // affinity. this helps us maintain affinity even when we don't have // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) - shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; + aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; + shortest = aff_queue; // if(shortest == aff->shortest_queue) { // TRACE_CUR("special case: have affinity with shortest queue\n"); @@ -2108,29 +2122,46 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t for(i = 0; i < sem->nr_replicas; ++i) { if(&aff->q_info[i] != shortest) { if(aff->q_info[i].q->count < max_fifo_len) { + int want = 0; - lt_t est_len = - aff->q_info[i].estimated_len + + lt_t migration = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); + lt_t est_len = aff->q_info[i].estimated_len + migration; - // queue is smaller, or they're equal and the other has a smaller number - // of total users. - // - // tie-break on the shortest number of simult users. this only kicks in - // when there are more than 1 empty queues. + // queue is smaller, or they're equal and the other has a smaller number + // of total users. + // + // tie-break on the shortest number of simult users. this only kicks in + // when there are more than 1 empty queues. // TODO: Make "est_len < min_len" a fuzzy function that allows // queues "close enough" in length to be considered equal. - if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ - (est_len < min_len) || /* i-th queue has shortest length */ - ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ - ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) || - ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && - (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) { + /* NOTE: 'shortest' starts out with affinity GPU */ + if(unlikely(shortest->q->count >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */ + want = 1; + } + else if(est_len < min_len) { + want = 1; /* i-th queue has shortest length */ + } + else if(unlikely(est_len == min_len)) { /* equal lengths */ + if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */ + if(has_affinity(aff, t, i)) { + want = 1; /* switch to maintain affinity */ + } + else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */ + want = 1; + } + else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */ + (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */ + want = 1; + } + } + } + if(want) { shortest = &aff->q_info[i]; min_len = est_len; min_nr_users = *(aff->q_info[i].nr_cur_users); @@ -2672,6 +2703,7 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) // decrement affinity count on old GPU aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; --(aff->nr_aff_on_rsrc[aff_rsrc]); +// aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t); if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); @@ -2717,10 +2749,12 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, if(last_gpu >= 0) { int old_rsrc = last_gpu - aff->offset; --(aff->nr_aff_on_rsrc[old_rsrc]); +// aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t)); } // increment affinity count on new GPU ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); +// aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t)); tsk_rt(t)->rsrc_exit_cb_args = aff; tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; } -- cgit v1.2.2 From 964297dd588ee6feab1aedecb2611bece2681973 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Fri, 30 Nov 2012 14:33:52 -0500 Subject: Fix compilation warnings. --- include/litmus/ikglp_lock.h | 2 +- litmus/Kconfig | 22 ++++++++++++++++++++++ litmus/aux_tasks.c | 18 +++++++++--------- litmus/edf_common.c | 21 +++++++-------------- litmus/ikglp_lock.c | 3 +++ 5 files changed, 42 insertions(+), 24 deletions(-) mode change 100644 => 100755 include/litmus/ikglp_lock.h mode change 100644 => 100755 litmus/Kconfig mode change 100644 => 100755 litmus/aux_tasks.c mode change 100644 => 100755 litmus/edf_common.c mode change 100644 => 100755 litmus/ikglp_lock.c diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h old mode 100644 new mode 100755 index 89d9c37c7631..af155eadbb35 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h @@ -114,7 +114,7 @@ struct ikglp_queue_info struct fifo_queue* q; lt_t estimated_len; int *nr_cur_users; - int *nr_aff_users; + int64_t *nr_aff_users; }; struct ikglp_affinity_ops diff --git a/litmus/Kconfig b/litmus/Kconfig old mode 100644 new mode 100755 index a32f42898148..16087b9e4e81 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -56,6 +56,26 @@ config REALTIME_AUX_TASKS priority inversions may occur with respect to single-threaded task models if/when one of threads are scheduled simultanously with another of the same identity. + +choice + prompt "Scheduling prioritization of AUX tasks." + default REALTIME_AUX_TASK_PRIORITY_BOOSTED + help + Select the prioritization method for auxillary tasks. + +config REALTIME_AUX_TASK_PRIORITY_BOOSTED + bool "Boosted" + help + Run all auxillary task threads at a maximum priority. Useful for + temporarily working around bugs during development. + +config REALTIME_AUX_TASK_PRIORITY_INHERITANCE + bool "Inheritance" + help + Auxillary tasks inherit the maximum priority from blocked real-time + threads within the same process. + + Additional pitfall: 3) Busy-wait deadlock is likely between normal real-time tasks and auxillary tasks synchronize using _preemptive_ spinlocks that do not use priority inheritance. @@ -73,9 +93,11 @@ config REALTIME_AUX_TASKS holder that blocks the rt task CAN block on I/O and contribute its priority to the aux tasks. Aux tasks may still _indirectly_ inherit the priority of the blocked rt task via the lock holder. +endchoice endmenu + menu "Real-Time Synchronization" config NP_SECTION diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c old mode 100644 new mode 100755 index 9572d960b46a..bd7bcbed58fe --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -50,7 +50,7 @@ int exit_aux_task(struct task_struct *t) TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE list_del(&tsk_rt(t)->aux_task_node); tsk_rt(t)->is_aux_task = 0; @@ -67,7 +67,7 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s { int retval = 0; -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE struct list_head *pos; TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); @@ -94,7 +94,7 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s { int retval = 0; -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE struct list_head *pos; TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); @@ -121,7 +121,7 @@ int aux_task_owner_increase_priority(struct task_struct *t) { int retval = 0; -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE struct task_struct *leader; struct task_struct *hp = NULL; struct task_struct *hp_eff = NULL; @@ -166,7 +166,7 @@ int aux_task_owner_decrease_priority(struct task_struct *t) { int retval = 0; -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE struct task_struct *leader; struct task_struct *hp = NULL; struct task_struct *hp_eff = NULL; @@ -214,7 +214,7 @@ long enable_aux_task_owner(struct task_struct *t) { long retval = 0; -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE struct task_struct *leader = t->group_leader; struct task_struct *hp; @@ -252,7 +252,7 @@ long disable_aux_task_owner(struct task_struct *t) { long retval = 0; -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE struct task_struct *leader = t->group_leader; struct task_struct *hp; struct task_struct *new_hp = NULL; @@ -344,7 +344,7 @@ static long __do_enable_aux_tasks(void) /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ tsk_rt(t)->is_aux_task = 1; -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); #endif @@ -368,7 +368,7 @@ static long __do_enable_aux_tasks(void) } while(t != leader); -#if 0 +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); diff --git a/litmus/edf_common.c b/litmus/edf_common.c old mode 100644 new mode 100755 index a9bf0c08e125..5a3f5b417f73 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -43,7 +43,7 @@ static inline long edf_hash(struct task_struct *t) } #endif -#ifdef CONFIG_REALTIME_AUX_TASKS +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE int aux_tie_break(struct task_struct *first, struct task_struct *second) { long fhash = edf_hash(first); @@ -90,9 +90,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return first && !second; } -#ifdef CONFIG_REALTIME_AUX_TASKS - { +#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED) /* run aux tasks at max priority */ if (first->rt_param.is_aux_task != second->rt_param.is_aux_task) { @@ -103,9 +102,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) first = first->group_leader; second = second->group_leader; } - - -#if 0 +#elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) + { int first_lo_aux, second_lo_aux; int first_hi_aux, second_hi_aux; first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task; @@ -133,11 +131,10 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break); return aux_hi_tie_break; } -#endif - } #endif + #ifdef CONFIG_LITMUS_LOCKING /* Check for EFFECTIVE priorities. Change task * used for comparison in such a case. @@ -252,14 +249,12 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) { #endif -#if 0 -#ifdef CONFIG_REALTIME_AUX_TASKS +#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) /* is this dead code? */ if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) { return 1; } else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) { -#endif #endif /* Something could be wrong if you get this far. */ @@ -297,11 +292,9 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return 1; } } -#if 0 -#ifdef CONFIG_REALTIME_AUX_TASKS +#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) } #endif -#endif #ifdef CONFIG_LITMUS_SOFTIRQD } diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c old mode 100644 new mode 100755 index 16ae621bbf75..b29828344dd1 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c @@ -2598,7 +2598,10 @@ ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, str ikglp_wait_state_t *donor = NULL; int distance = MIG_NONE; int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq)); + +#ifdef CONFIG_SCHED_DEBUG_TRACE ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); +#endif __find_closest_donor(gpu, sem->donors.root, &donor, &distance); -- cgit v1.2.2 From 983773f990053cb0ced72afb4b69594e5d32c779 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 6 Dec 2012 20:37:20 -0500 Subject: AUX_FUTURE and revised inh-based aux tie break --- include/litmus/aux_tasks.h | 2 + include/litmus/litmus.h | 1 + include/litmus/rt_param.h | 7 ++ kernel/fork.c | 6 +- litmus/aux_tasks.c | 198 +++++++++++++++++++++++++++++++-------------- litmus/edf_common.c | 78 ++++++++---------- litmus/litmus.c | 8 ++ 7 files changed, 192 insertions(+), 108 deletions(-) mode change 100644 => 100755 include/litmus/aux_tasks.h mode change 100644 => 100755 include/litmus/litmus.h mode change 100644 => 100755 include/litmus/rt_param.h mode change 100644 => 100755 litmus/litmus.c diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h old mode 100644 new mode 100755 index 3bb6b26fef09..87745c1c0df0 --- a/include/litmus/aux_tasks.h +++ b/include/litmus/aux_tasks.h @@ -6,6 +6,8 @@ struct task_struct; /* admit an aux task with default parameters */ //int admit_aux_task(struct task_struct *t); +int make_aux_task_if_required(struct task_struct *t); + /* call on an aux task when it exits real-time */ int exit_aux_task(struct task_struct *t); diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h old mode 100644 new mode 100755 index db2987a24686..711b88e2b3d1 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -32,6 +32,7 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); #define NO_CPU 0xffffffff void litmus_fork(struct task_struct *tsk); +void litmus_post_fork_thread(struct task_struct *tsk); void litmus_exec(void); /* clean up real-time state of a task */ void exit_litmus(struct task_struct *dead_tsk); diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h old mode 100644 new mode 100755 index cb7c3ac64339..aca78a835529 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -41,6 +41,12 @@ typedef enum { PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */ } budget_signal_policy_t; +typedef enum { + AUX_ENABLE = 0x1, + AUX_CURRENT = (AUX_ENABLE<<1), + AUX_FUTURE = (AUX_CURRENT<<2) +} aux_flags_t; + /* We use the common priority interpretation "lower index == higher priority", * which is commonly used in fixed-priority schedulability analysis papers. * So, a numerically lower priority value implies higher scheduling priority, @@ -370,6 +376,7 @@ struct aux_data struct list_head aux_tasks; struct binheap aux_task_owners; unsigned int initialized:1; + unsigned int aux_future:1; }; #endif diff --git a/kernel/fork.c b/kernel/fork.c index 25c6111fe3a6..7491c4f5e78c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1370,8 +1370,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); - if (clone_flags & CLONE_THREAD) + if (clone_flags & CLONE_THREAD) { threadgroup_fork_read_unlock(current); +#ifdef CONFIG_REALTIME_AUX_TASKS + litmus_post_fork_thread(p); +#endif + } perf_event_fork(p); return p; diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c index bd7bcbed58fe..e5f3c82d32e9 100755 --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -25,7 +25,7 @@ static int admit_aux_task(struct task_struct *t) struct rt_task tp = { .period = 1000000, /* 1ms */ .relative_deadline = 1000000, - .exec_cost = 1000000, /* allow full utilization */ + .exec_cost = 1000000, /* allow full utilization with buget tracking */ .phase = 0, .cpu = task_cpu(leader), /* take CPU of group leader */ .budget_policy = QUANTUM_ENFORCEMENT, @@ -44,17 +44,15 @@ static int admit_aux_task(struct task_struct *t) int exit_aux_task(struct task_struct *t) { int retval = 0; - struct task_struct *leader = t->group_leader; BUG_ON(!tsk_rt(t)->is_aux_task); - TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); + TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, t->group_leader->comm, t->group_leader->pid); + tsk_rt(t)->is_aux_task = 0; + #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE list_del(&tsk_rt(t)->aux_task_node); - - tsk_rt(t)->is_aux_task = 0; - if (tsk_rt(t)->inh_task) { litmus->__decrease_prio(t, NULL); } @@ -80,10 +78,14 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s if (!is_realtime(aux)) { TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid); } - - // aux tasks don't touch rt locks, so no nested call needed. - TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid); - retval = litmus->__increase_prio(aux, hp); + else if(tsk_rt(aux)->inh_task == hp) { + TRACE_CUR("skipping real-time aux task %s/%d that already inherits from %s/%d\n", aux->comm, aux->pid, hp->comm, hp->pid); + } + else { + // aux tasks don't touch rt locks, so no nested call needed. + TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid); + retval = litmus->__increase_prio(aux, hp); + } } #endif @@ -208,6 +210,54 @@ out: return retval; } +int make_aux_task_if_required(struct task_struct *t) +{ + struct task_struct *leader; + int retval = 0; + + read_lock_irq(&tasklist_lock); + + leader = t->group_leader; + + if(!tsk_aux(leader)->initialized || !tsk_aux(leader)->aux_future) { + goto out; + } + + TRACE_CUR("Making %s/%d in %s/%d an aux thread.\n", t->comm, t->pid, leader->comm, leader->pid); + + INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); + INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); + + retval = admit_aux_task(t); + if (retval == 0) { + tsk_rt(t)->is_aux_task = 1; + +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE + list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); + + if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { + struct task_struct *hp = + container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), + struct task_struct, rt_param); + + TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid); + + retval = litmus->__increase_prio(t, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); + + if (retval != 0) { + /* don't know how to recover from bugs with prio inheritance. better just crash. */ + read_unlock_irq(&tasklist_lock); + BUG(); + } + } +#endif + } + +out: + read_unlock_irq(&tasklist_lock); + + return retval; +} long enable_aux_task_owner(struct task_struct *t) @@ -313,11 +363,12 @@ static int aux_task_owner_max_priority_order(struct binheap_node *a, } -static long __do_enable_aux_tasks(void) +static long __do_enable_aux_tasks(int flags) { long retval = 0; struct task_struct *leader; struct task_struct *t; + int aux_tasks_added = 0; leader = current->group_leader; @@ -327,34 +378,52 @@ static long __do_enable_aux_tasks(void) tsk_aux(leader)->initialized = 1; } + if (flags & AUX_FUTURE) { + tsk_aux(leader)->aux_future = 1; + } + t = leader; do { - /* doesn't hurt to initialize them both */ - INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); - INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); + if (!tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->is_aux_task) { + /* This may harmlessly reinit unused nodes. TODO: Don't reinit already init nodes. */ + /* doesn't hurt to initialize both nodes */ + INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); + INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); + } TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n", leader->comm, leader->pid, t->comm, t->pid, tsk_rt(t)->task_params.period); - - /* inspect heap_node to see if it is an rt task */ + + /* inspect period to see if it is an rt task */ if (tsk_rt(t)->task_params.period == 0) { - if (!tsk_rt(t)->is_aux_task) { - TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); - /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ - tsk_rt(t)->is_aux_task = 1; - + if (flags && AUX_CURRENT) { + if (!tsk_rt(t)->is_aux_task) { + int admit_ret; + + TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); + + admit_ret = admit_aux_task(t); + + if (admit_ret == 0) { + /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ + tsk_rt(t)->is_aux_task = 1; + aux_tasks_added = 1; + #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE - list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); + list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); #endif - - (void)admit_aux_task(t); + } + } + else { + TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); + } } else { - TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); + TRACE_CUR("Not changing thread in %s/%d to AUX task: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); } } - else { + else if (!tsk_rt(t)->is_aux_task) { /* don't let aux tasks get aux tasks of their own */ if (!tsk_rt(t)->has_aux_tasks) { TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); tsk_rt(t)->has_aux_tasks = 1; @@ -369,19 +438,18 @@ static long __do_enable_aux_tasks(void) #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE - if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { + if (aux_tasks_added && !binheap_empty(&tsk_aux(leader)->aux_task_owners)) { struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); - TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid); - retval = aux_tasks_increase_priority(leader, - (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); + TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid); + retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); } #endif return retval; } -static long __do_disable_aux_tasks(void) +static long __do_disable_aux_tasks(int flags) { long retval = 0; struct task_struct *leader; @@ -389,50 +457,56 @@ static long __do_disable_aux_tasks(void) leader = current->group_leader; - t = leader; - do { - if (tsk_rt(t)->is_aux_task) { - - TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid); - - if (is_realtime(t)) { - long temp_retval; - struct sched_param param = { .sched_priority = 0}; - - TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid); - - temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, ¶m); - - if (temp_retval != 0) { - TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid); - if (retval == 0) { - retval = temp_retval; - } - else { - TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval); + if (flags & AUX_FUTURE) { + tsk_aux(leader)->aux_future = 0; + } + + if (flags & AUX_CURRENT) { + t = leader; + do { + if (tsk_rt(t)->is_aux_task) { + + TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid); + + if (is_realtime(t)) { + long temp_retval; + struct sched_param param = { .sched_priority = 0}; + + TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid); + + temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, ¶m); + + if (temp_retval != 0) { + TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid); + if (retval == 0) { + retval = temp_retval; + } + else { + TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval); + } } } - } - tsk_rt(t)->is_aux_task = 0; - } - t = next_thread(t); - } while(t != leader); + tsk_rt(t)->is_aux_task = 0; + } + t = next_thread(t); + } while(t != leader); + } return retval; } -asmlinkage long sys_set_aux_tasks(int enable) +asmlinkage long sys_set_aux_tasks(int flags) { long retval; read_lock_irq(&tasklist_lock); - if (enable) { - retval = __do_enable_aux_tasks(); + if (flags & AUX_ENABLE) { + retval = __do_enable_aux_tasks(flags); } else { - retval = __do_disable_aux_tasks(); + retval = __do_disable_aux_tasks(flags); } read_unlock_irq(&tasklist_lock); @@ -442,7 +516,7 @@ asmlinkage long sys_set_aux_tasks(int enable) #else -asmlinkage long sys_set_aux_tasks(int enable) +asmlinkage long sys_set_aux_tasks(int flags) { printk("Unsupported. Recompile with CONFIG_REALTIME_AUX_TASKS.\n"); return -EINVAL; diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 5a3f5b417f73..c279bf12a7f5 100755 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -22,7 +22,7 @@ #include #endif -#if defined(CONFIG_EDF_TIE_BREAK_HASH) || defined(CONFIG_REALTIME_AUX_TASKS) +#if defined(CONFIG_EDF_TIE_BREAK_HASH) #include static inline long edf_hash(struct task_struct *t) { @@ -43,23 +43,6 @@ static inline long edf_hash(struct task_struct *t) } #endif -#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE -int aux_tie_break(struct task_struct *first, struct task_struct *second) -{ - long fhash = edf_hash(first); - long shash = edf_hash(second); - if (fhash < shash) { - TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, 1); - return 1; - } - else if(fhash == shash) { - TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, (first->pid < second->pid)); - return first->pid < second->pid; - } - return 0; -} -#endif - /* edf_higher_prio - returns true if first has a higher EDF priority * than second. Deadline ties are broken by PID. @@ -93,44 +76,47 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED) /* run aux tasks at max priority */ + /* TODO: Actually use prio-boosting. */ if (first->rt_param.is_aux_task != second->rt_param.is_aux_task) { return (first->rt_param.is_aux_task > second->rt_param.is_aux_task); } else if(first->rt_param.is_aux_task && second->rt_param.is_aux_task) { + if(first->group_leader == second->group_leader) { + TRACE_CUR("aux tie break!\n"); // tie-break by BASE priority of the aux tasks + goto aux_tie_break; + } first = first->group_leader; second = second->group_leader; } #elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) { - int first_lo_aux, second_lo_aux; - int first_hi_aux, second_hi_aux; - first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task; - second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task; - - if (first_lo_aux && !second_lo_aux) { - TRACE_CUR("%s/%d >> %s/%d --- 0\n", first->comm, first->pid, second->comm, second->pid); - return 0; - } - else if (second_lo_aux && !first_lo_aux) { - TRACE_CUR("%s/%d >> %s/%d --- 1\n", first->comm, first->pid, second->comm, second->pid); - return 1; - } - else if (first_lo_aux && second_lo_aux) { - int aux_lo_tie_break = aux_tie_break(first, second); - TRACE_CUR("low aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_lo_tie_break); - return aux_lo_tie_break; - } - - first_hi_aux = first->rt_param.is_aux_task && first->rt_param.inh_task; - second_hi_aux = second->rt_param.is_aux_task && second->rt_param.inh_task; - - if (first_hi_aux && second_hi_aux && first->rt_param.inh_task == second->rt_param.inh_task) { - int aux_hi_tie_break = aux_tie_break(first, second); - TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break); - return aux_hi_tie_break; - } + int first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task; + int second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task; + + /* prioritize aux tasks without inheritance below real-time tasks */ + if (first_lo_aux || second_lo_aux) { + // one of these is an aux task without inheritance. + if(first_lo_aux && second_lo_aux) { + TRACE_CUR("aux tie break!\n"); // tie-break by BASE priority of the aux tasks + goto aux_tie_break; + } + else { + // make the aux thread lowest priority real-time task + int temp = (first_lo_aux) ? !is_realtime(second) : !is_realtime(first); + TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); + return temp; + } + } + + if (first->rt_param.is_aux_task && second->rt_param.is_aux_task && + first->rt_param.inh_task == second->rt_param.inh_task) { // inh_task is !NULL for both tasks since neither was a lo_aux task + // Both aux tasks inherit from the same task, so tie-break + // by base priority of the aux tasks. + TRACE_CUR("aux tie break!\n"); + goto aux_tie_break; + } } #endif @@ -174,6 +160,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) #endif +aux_tie_break: + if (!is_realtime(second_task)) { return 1; } diff --git a/litmus/litmus.c b/litmus/litmus.c old mode 100644 new mode 100755 index cfd14852502b..8bc159b2fcce --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -634,6 +634,14 @@ void litmus_fork(struct task_struct* p) p->od_table = NULL; } +/* Called right before copy_process() returns a forked thread. */ +void litmus_post_fork_thread(struct task_struct* p) +{ +#ifdef CONFIG_REALTIME_AUX_TASKS + make_aux_task_if_required(p); +#endif +} + /* Called upon execve(). * current is doing the exec. * Don't let address space specific stuff leak. -- cgit v1.2.2 From 1caac0a4cd3027de123306ac7b12bf4c0393f3ed Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Fri, 7 Dec 2012 14:31:07 -0500 Subject: Send reboot notifications to Litmus This patch causes reboot notifications to be send to Litmus. With this patch, Litmus attempts to switch back to the Linux-plugin before the reboot proceeds. Any failures to switch back are reported via printk() (the reboot is not halted). --- litmus/litmus.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/litmus/litmus.c b/litmus/litmus.c index 8bc159b2fcce..065ef7d3192a 100755 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -704,6 +705,25 @@ static struct sysrq_key_op sysrq_kill_rt_tasks_op = { extern struct sched_plugin linux_sched_plugin; +static int litmus_shutdown_nb(struct notifier_block *unused1, + unsigned long unused2, void *unused3) +{ + /* Attempt to switch back to regular Linux scheduling. + * Forces the active plugin to clean up. + */ + if (litmus != &linux_sched_plugin) { + int ret = switch_sched_plugin(&linux_sched_plugin); + if (ret) { + printk("Auto-shutdown of active Litmus plugin failed.\n"); + } + } + return NOTIFY_DONE; +} + +static struct notifier_block shutdown_notifier = { + .notifier_call = litmus_shutdown_nb, +}; + static int __init _init_litmus(void) { /* Common initializers, @@ -733,11 +753,15 @@ static int __init _init_litmus(void) init_topology(); #endif + register_reboot_notifier(&shutdown_notifier); + return 0; } static void _exit_litmus(void) { + unregister_reboot_notifier(&shutdown_notifier); + exit_litmus_proc(); kmem_cache_destroy(bheap_node_cache); kmem_cache_destroy(release_heap_cache); -- cgit v1.2.2 From fccce270a5540021b544d439595fa0a736242ff0 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 9 Dec 2012 16:34:36 -0500 Subject: Specify aux thread granularity in jiffies --- litmus/aux_tasks.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c index e5f3c82d32e9..20f477f6e3bc 100755 --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -6,12 +6,16 @@ #include #include +#include + +#define AUX_SLICE_NR_JIFFIES 1 +#define AUX_SLICE_NS ((NSEC_PER_SEC / HZ) * AUX_SLICE_NR_JIFFIES) + static int admit_aux_task(struct task_struct *t) { int retval = 0; struct task_struct *leader = t->group_leader; - /* budget enforcement increments job numbers. job numbers are used in * tie-breaking of aux_tasks. method helps ensure: * 1) aux threads with no inherited priority can starve another (they share @@ -23,9 +27,9 @@ static int admit_aux_task(struct task_struct *t) * fail-safe. */ struct rt_task tp = { - .period = 1000000, /* 1ms */ - .relative_deadline = 1000000, - .exec_cost = 1000000, /* allow full utilization with buget tracking */ + .period = AUX_SLICE_NS, + .relative_deadline = AUX_SLICE_NS, + .exec_cost = AUX_SLICE_NS, /* allow full utilization with buget tracking */ .phase = 0, .cpu = task_cpu(leader), /* take CPU of group leader */ .budget_policy = QUANTUM_ENFORCEMENT, -- cgit v1.2.2 From fbd9574e298157b54c38f82f536e5cea8f766dff Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 9 Dec 2012 16:53:50 -0500 Subject: Rename klitirqd klmirqd. --- include/litmus/aux_tasks.h | 0 include/litmus/ikglp_lock.h | 0 include/litmus/litmus.h | 0 include/litmus/litmus_softirq.h | 42 ++--- include/litmus/rt_param.h | 18 +- include/litmus/sched_plugin.h | 8 +- litmus/Kconfig | 10 +- litmus/aux_tasks.c | 0 litmus/edf_common.c | 0 litmus/ikglp_lock.c | 0 litmus/litmus.c | 22 +-- litmus/litmus_proc.c | 14 +- litmus/litmus_softirq.c | 388 ++++++++++++++++++++-------------------- litmus/nvidia_info.c | 8 +- litmus/sched_cedf.c | 48 ++--- litmus/sched_gsn_edf.c | 44 ++--- litmus/sched_plugin.c | 12 +- 17 files changed, 307 insertions(+), 307 deletions(-) mode change 100755 => 100644 include/litmus/aux_tasks.h mode change 100755 => 100644 include/litmus/ikglp_lock.h mode change 100755 => 100644 include/litmus/litmus.h mode change 100755 => 100644 include/litmus/rt_param.h mode change 100755 => 100644 litmus/Kconfig mode change 100755 => 100644 litmus/aux_tasks.c mode change 100755 => 100644 litmus/edf_common.c mode change 100755 => 100644 litmus/ikglp_lock.c mode change 100755 => 100644 litmus/litmus.c diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h old mode 100755 new mode 100644 diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h old mode 100755 new mode 100644 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h old mode 100755 new mode 100644 diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h index 1eb5ea1a6c4b..46fe89148505 100644 --- a/include/litmus/litmus_softirq.h +++ b/include/litmus/litmus_softirq.h @@ -13,7 +13,7 @@ Tasklets are current scheduled in FIFO order with NO priority inheritance for "blocked" tasklets. - klitirqd assumes the priority of the owner of the + klmirqd assumes the priority of the owner of the tasklet when the tasklet is next to execute. Currently, hi-tasklets are scheduled before @@ -21,7 +21,7 @@ And likewise, low-tasklets are scheduled before work queue objects. This priority inversion probably needs to be fixed, though it is not an issue if our work with - GPUs as GPUs are owned (and associated klitirqds) for + GPUs as GPUs are owned (and associated klmirqds) for exclusive time periods, thus no inversions can occur. */ @@ -30,7 +30,7 @@ #define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD -/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons. +/* Spawns NR_LITMUS_SOFTIRQD klmirqd daemons. Actual launch of threads is deffered to kworker's workqueue, so daemons will likely not be immediately running when this function returns, though the required @@ -52,43 +52,43 @@ FIXME: change array to a CPU topology or array of cpumasks */ -void spawn_klitirqd(int* affinity); +void spawn_klmirqd(int* affinity); -/* Raises a flag to tell klitirqds to terminate. +/* Raises a flag to tell klmirqds to terminate. Termination is async, so some threads may be running after function return. */ -void kill_klitirqd(void); +void kill_klmirqd(void); /* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready to handle tasklets. 0, otherwise.*/ -int klitirqd_is_ready(void); +int klmirqd_is_ready(void); /* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready to handle tasklets. 0, otherwise.*/ -int klitirqd_is_dead(void); +int klmirqd_is_dead(void); /* Flushes all pending work out to the OS for regular * tasklet/work processing of the specified 'owner' * - * PRECOND: klitirqd_thread must have a clear entry + * PRECOND: klmirqd_thread must have a clear entry * in the GPU registry, otherwise this call will become - * a no-op as work will loop back to the klitirqd_thread. + * a no-op as work will loop back to the klmirqd_thread. * * Pass NULL for owner to flush ALL pending items. */ -void flush_pending(struct task_struct* klitirqd_thread, +void flush_pending(struct task_struct* klmirqd_thread, struct task_struct* owner); -struct task_struct* get_klitirqd(unsigned int k_id); +struct task_struct* get_klmirqd(unsigned int k_id); extern int __litmus_tasklet_schedule( struct tasklet_struct *t, unsigned int k_id); -/* schedule a tasklet on klitirqd #k_id */ +/* schedule a tasklet on klmirqd #k_id */ static inline int litmus_tasklet_schedule( struct tasklet_struct *t, unsigned int k_id) @@ -113,7 +113,7 @@ static inline int _litmus_tasklet_schedule( extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id); -/* schedule a hi tasklet on klitirqd #k_id */ +/* schedule a hi tasklet on klmirqd #k_id */ static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) { @@ -138,7 +138,7 @@ extern int __litmus_tasklet_hi_schedule_first( struct tasklet_struct *t, unsigned int k_id); -/* schedule a hi tasklet on klitirqd #k_id on next go-around */ +/* schedule a hi tasklet on klmirqd #k_id on next go-around */ /* PRECONDITION: Interrupts must be disabled. */ static inline int litmus_tasklet_hi_schedule_first( struct tasklet_struct *t, @@ -178,22 +178,22 @@ static inline int litmus_schedule_work( ///////////// mutex operations for client threads. void down_and_set_stat(struct task_struct* t, - enum klitirqd_sem_status to_set, + enum klmirqd_sem_status to_set, struct mutex* sem); void __down_and_reset_and_set_stat(struct task_struct* t, - enum klitirqd_sem_status to_reset, - enum klitirqd_sem_status to_set, + enum klmirqd_sem_status to_reset, + enum klmirqd_sem_status to_set, struct mutex* sem); void up_and_set_stat(struct task_struct* t, - enum klitirqd_sem_status to_set, + enum klmirqd_sem_status to_set, struct mutex* sem); -void release_klitirqd_lock(struct task_struct* t); +void release_klmirqd_lock(struct task_struct* t); -int reacquire_klitirqd_lock(struct task_struct* t); +int reacquire_klmirqd_lock(struct task_struct* t); #endif diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h old mode 100755 new mode 100644 index aca78a835529..47301c04d862 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -166,7 +166,7 @@ struct rt_job { struct pfair_param; -enum klitirqd_sem_status +enum klmirqd_sem_status { NEED_TO_REACQUIRE, REACQUIRING, @@ -223,26 +223,26 @@ struct rt_param { /* proxy threads have minimum priority by default */ unsigned int is_proxy_thread:1; - /* pointer to klitirqd currently working on this + /* pointer to klmirqd currently working on this task_struct's behalf. only set by the task pointed - to by klitirqd. + to by klmirqd. ptr only valid if is_proxy_thread == 0 */ - struct task_struct* cur_klitirqd; + struct task_struct* cur_klmirqd; /* Used to implement mutual execution exclusion between - * job and klitirqd execution. Job must always hold - * it's klitirqd_sem to execute. klitirqd instance + * job and klmirqd execution. Job must always hold + * it's klmirqd_sem to execute. klmirqd instance * must hold the semaphore before executing on behalf * of a job. */ - struct mutex klitirqd_sem; + struct mutex klmirqd_sem; - /* status of held klitirqd_sem, even if the held klitirqd_sem is from + /* status of held klmirqd_sem, even if the held klmirqd_sem is from another task (only proxy threads do this though). */ - atomic_t klitirqd_sem_stat; + atomic_t klmirqd_sem_stat; #endif #ifdef CONFIG_LITMUS_NVIDIA diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 65736b2a9199..e8127f427d56 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -79,10 +79,10 @@ typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh, raw_spinlock_t *to_unlock, unsigned long irqflags); -typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd, +typedef void (*increase_prio_klitirq_t)(struct task_struct* klmirqd, struct task_struct* old_owner, struct task_struct* new_owner); -typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd, +typedef void (*decrease_prio_klmirqd_t)(struct task_struct* klmirqd, struct task_struct* old_owner); @@ -168,8 +168,8 @@ struct sched_plugin { #endif #ifdef CONFIG_LITMUS_SOFTIRQD - increase_prio_klitirq_t increase_prio_klitirqd; - decrease_prio_klitirqd_t decrease_prio_klitirqd; + increase_prio_klitirq_t increase_prio_klmirqd; + decrease_prio_klmirqd_t decrease_prio_klmirqd; #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD enqueue_pai_tasklet_t enqueue_pai_tasklet; diff --git a/litmus/Kconfig b/litmus/Kconfig old mode 100755 new mode 100644 index 16087b9e4e81..f2434b87239b --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -392,11 +392,11 @@ config LITMUS_SOFTIRQD_NONE Don't schedule tasklets in Litmus. Default. config LITMUS_SOFTIRQD - bool "Spawn klitirqd interrupt handling threads." + bool "Spawn klmirqd interrupt handling threads." help - Create klitirqd interrupt handling threads. Work must be + Create klmirqd interrupt handling threads. Work must be specifically dispatched to these workers. (Softirqs for - Litmus tasks are not magically redirected to klitirqd.) + Litmus tasks are not magically redirected to klmirqd.) G-EDF/RM, C-EDF/RM ONLY for now! @@ -415,7 +415,7 @@ endchoice config NR_LITMUS_SOFTIRQD - int "Number of klitirqd." + int "Number of klmirqd." depends on LITMUS_SOFTIRQD range 1 4096 default "1" @@ -426,7 +426,7 @@ config LITMUS_NVIDIA bool "Litmus handling of NVIDIA interrupts." default n help - Direct tasklets from NVIDIA devices to Litmus's klitirqd + Direct tasklets from NVIDIA devices to Litmus's klmirqd or PAI interrupt handling routines. If unsure, say No. diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c old mode 100755 new mode 100644 diff --git a/litmus/edf_common.c b/litmus/edf_common.c old mode 100755 new mode 100644 diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c old mode 100755 new mode 100644 diff --git a/litmus/litmus.c b/litmus/litmus.c old mode 100755 new mode 100644 index 065ef7d3192a..3b8017397e80 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -397,14 +397,14 @@ static void reinit_litmus_state(struct task_struct* p, int restore) /* We probably should not have any tasklets executing for * us at this time. */ - WARN_ON(p->rt_param.cur_klitirqd); - WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD); + WARN_ON(p->rt_param.cur_klmirqd); + WARN_ON(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD); - if(p->rt_param.cur_klitirqd) - flush_pending(p->rt_param.cur_klitirqd, p); + if(p->rt_param.cur_klmirqd) + flush_pending(p->rt_param.cur_klmirqd, p); - if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD) - up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem); + if(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD) + up_and_set_stat(p, NOT_HELD, &p->rt_param.klmirqd_sem); #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -479,9 +479,9 @@ long __litmus_admit_task(struct task_struct* tsk) #ifdef CONFIG_LITMUS_SOFTIRQD /* proxy thread off by default */ tsk_rt(tsk)is_proxy_thread = 0; - tsk_rt(tsk)cur_klitirqd = NULL; - mutex_init(&tsk_rt(tsk)->klitirqd_sem); - atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD); + tsk_rt(tsk)cur_klmirqd = NULL; + mutex_init(&tsk_rt(tsk)->klmirqd_sem); + atomic_set(&tsk_rt(tsk)->klmirqd_sem_stat, NOT_HELD); #endif retval = litmus->admit_task(tsk); @@ -580,9 +580,9 @@ int switch_sched_plugin(struct sched_plugin* plugin) cpu_relax(); #ifdef CONFIG_LITMUS_SOFTIRQD - if(!klitirqd_is_dead()) + if(!klmirqd_is_dead()) { - kill_klitirqd(); + kill_klmirqd(); } #endif diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c index 9ab7e015a3c1..136fecfb0b8b 100644 --- a/litmus/litmus_proc.c +++ b/litmus/litmus_proc.c @@ -21,14 +21,14 @@ static struct proc_dir_entry *litmus_dir = NULL, *release_master_file = NULL, #endif #ifdef CONFIG_LITMUS_SOFTIRQD - *klitirqd_file = NULL, + *klmirqd_file = NULL, #endif *plugs_file = NULL; /* in litmus/sync.c */ int count_tasks_waiting_for_release(void); -extern int proc_read_klitirqd_stats(char *page, char **start, +extern int proc_read_klmirqd_stats(char *page, char **start, off_t off, int count, int *eof, void *data); @@ -169,9 +169,9 @@ int __init init_litmus_proc(void) #endif #ifdef CONFIG_LITMUS_SOFTIRQD - klitirqd_file = - create_proc_read_entry("klitirqd_stats", 0444, litmus_dir, - proc_read_klitirqd_stats, NULL); + klmirqd_file = + create_proc_read_entry("klmirqd_stats", 0444, litmus_dir, + proc_read_klmirqd_stats, NULL); #endif stat_file = create_proc_read_entry("stats", 0444, litmus_dir, @@ -201,8 +201,8 @@ void exit_litmus_proc(void) if (curr_file) remove_proc_entry("active_plugin", litmus_dir); #ifdef CONFIG_LITMUS_SOFTIRQD - if (klitirqd_file) - remove_proc_entry("klitirqd_stats", litmus_dir); + if (klmirqd_file) + remove_proc_entry("klmirqd_stats", litmus_dir); #endif #ifdef CONFIG_RELEASE_MASTER if (release_master_file) diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index 9f7d9da5facb..73a3053e662b 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c @@ -20,7 +20,7 @@ /* counts number of daemons ready to handle litmus irqs. */ -static atomic_t num_ready_klitirqds = ATOMIC_INIT(0); +static atomic_t num_ready_klmirqds = ATOMIC_INIT(0); enum pending_flags { @@ -36,9 +36,9 @@ struct tasklet_head struct tasklet_struct **tail; }; -struct klitirqd_info +struct klmirqd_info { - struct task_struct* klitirqd; + struct task_struct* klmirqd; struct task_struct* current_owner; int terminating; @@ -56,44 +56,44 @@ struct klitirqd_info struct list_head worklist; }; -/* one list for each klitirqd */ -static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD]; +/* one list for each klmirqd */ +static struct klmirqd_info klmirqds[NR_LITMUS_SOFTIRQD]; -int proc_read_klitirqd_stats(char *page, char **start, +int proc_read_klmirqd_stats(char *page, char **start, off_t off, int count, int *eof, void *data) { int len = snprintf(page, PAGE_SIZE, - "num ready klitirqds: %d\n\n", - atomic_read(&num_ready_klitirqds)); + "num ready klmirqds: %d\n\n", + atomic_read(&num_ready_klmirqds)); - if(klitirqd_is_ready()) + if(klmirqd_is_ready()) { int i; for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) { len += snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */ - "klitirqd_th%d: %s/%d\n" + "klmirqd_th%d: %s/%d\n" "\tcurrent_owner: %s/%d\n" "\tpending: %x\n" "\tnum hi: %d\n" "\tnum low: %d\n" "\tnum work: %d\n\n", i, - klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid, - (klitirqds[i].current_owner != NULL) ? - klitirqds[i].current_owner->comm : "(null)", - (klitirqds[i].current_owner != NULL) ? - klitirqds[i].current_owner->pid : 0, - klitirqds[i].pending, - atomic_read(&klitirqds[i].num_hi_pending), - atomic_read(&klitirqds[i].num_low_pending), - atomic_read(&klitirqds[i].num_work_pending)); + klmirqds[i].klmirqd->comm, klmirqds[i].klmirqd->pid, + (klmirqds[i].current_owner != NULL) ? + klmirqds[i].current_owner->comm : "(null)", + (klmirqds[i].current_owner != NULL) ? + klmirqds[i].current_owner->pid : 0, + klmirqds[i].pending, + atomic_read(&klmirqds[i].num_hi_pending), + atomic_read(&klmirqds[i].num_low_pending), + atomic_read(&klmirqds[i].num_work_pending)); } } @@ -107,7 +107,7 @@ int proc_read_klitirqd_stats(char *page, char **start, #if 0 static atomic_t dump_id = ATOMIC_INIT(0); -static void __dump_state(struct klitirqd_info* which, const char* caller) +static void __dump_state(struct klmirqd_info* which, const char* caller) { struct tasklet_struct* list; @@ -118,22 +118,22 @@ static void __dump_state(struct klitirqd_info* which, const char* caller) if(which->current_owner) { TRACE("(id: %d caller: %s)\n" - "klitirqd: %s/%d\n" + "klmirqd: %s/%d\n" "current owner: %s/%d\n" "pending: %x\n", id, caller, - which->klitirqd->comm, which->klitirqd->pid, + which->klmirqd->comm, which->klmirqd->pid, which->current_owner->comm, which->current_owner->pid, which->pending); } else { TRACE("(id: %d caller: %s)\n" - "klitirqd: %s/%d\n" + "klmirqd: %s/%d\n" "current owner: %p\n" "pending: %x\n", id, caller, - which->klitirqd->comm, which->klitirqd->pid, + which->klmirqd->comm, which->klmirqd->pid, NULL, which->pending); } @@ -151,7 +151,7 @@ static void __dump_state(struct klitirqd_info* which, const char* caller) } } -static void dump_state(struct klitirqd_info* which, const char* caller) +static void dump_state(struct klmirqd_info* which, const char* caller) { unsigned long flags; @@ -164,23 +164,23 @@ static void dump_state(struct klitirqd_info* which, const char* caller) /* forward declarations */ static void ___litmus_tasklet_schedule(struct tasklet_struct *t, - struct klitirqd_info *which, + struct klmirqd_info *which, int wakeup); static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, - struct klitirqd_info *which, + struct klmirqd_info *which, int wakeup); static void ___litmus_schedule_work(struct work_struct *w, - struct klitirqd_info *which, + struct klmirqd_info *which, int wakeup); -inline unsigned int klitirqd_id(struct task_struct* tsk) +inline unsigned int klmirqd_id(struct task_struct* tsk) { int i; for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) { - if(klitirqds[i].klitirqd == tsk) + if(klmirqds[i].klmirqd == tsk) { return i; } @@ -192,28 +192,28 @@ inline unsigned int klitirqd_id(struct task_struct* tsk) } -inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which) +inline static u32 litirq_pending_hi_irqoff(struct klmirqd_info* which) { return (which->pending & LIT_TASKLET_HI); } -inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which) +inline static u32 litirq_pending_low_irqoff(struct klmirqd_info* which) { return (which->pending & LIT_TASKLET_LOW); } -inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which) +inline static u32 litirq_pending_work_irqoff(struct klmirqd_info* which) { return (which->pending & LIT_WORK); } -inline static u32 litirq_pending_irqoff(struct klitirqd_info* which) +inline static u32 litirq_pending_irqoff(struct klmirqd_info* which) { return(which->pending); } -inline static u32 litirq_pending(struct klitirqd_info* which) +inline static u32 litirq_pending(struct klmirqd_info* which) { unsigned long flags; u32 pending; @@ -225,7 +225,7 @@ inline static u32 litirq_pending(struct klitirqd_info* which) return pending; }; -inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner) +inline static u32 litirq_pending_with_owner(struct klmirqd_info* which, struct task_struct* owner) { unsigned long flags; u32 pending; @@ -245,7 +245,7 @@ inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct } -inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which, +inline static u32 litirq_pending_and_sem_and_owner(struct klmirqd_info* which, struct mutex** sem, struct task_struct** t) { @@ -264,7 +264,7 @@ inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which, if(which->current_owner != NULL) { *t = which->current_owner; - *sem = &tsk_rt(which->current_owner)->klitirqd_sem; + *sem = &tsk_rt(which->current_owner)->klmirqd_sem; } else { @@ -286,7 +286,7 @@ inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which, /* returns true if the next piece of work to do is from a different owner. */ static int tasklet_ownership_change( - struct klitirqd_info* which, + struct klmirqd_info* which, enum pending_flags taskletQ) { /* this function doesn't have to look at work objects since they have @@ -319,16 +319,16 @@ static int tasklet_ownership_change( raw_spin_unlock_irqrestore(&which->lock, flags); - TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret); + TRACE_TASK(which->klmirqd, "ownership change needed: %d\n", ret); return ret; } -static void __reeval_prio(struct klitirqd_info* which) +static void __reeval_prio(struct klmirqd_info* which) { struct task_struct* next_owner = NULL; - struct task_struct* klitirqd = which->klitirqd; + struct task_struct* klmirqd = which->klmirqd; /* Check in prio-order */ u32 pending = litirq_pending_irqoff(which); @@ -366,43 +366,43 @@ static void __reeval_prio(struct klitirqd_info* which) if(!in_interrupt()) { TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, - ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, - ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm, + ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid, next_owner->comm, next_owner->pid); } else { TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, - ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, - ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm, + ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid, next_owner->comm, next_owner->pid); } - litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner); + litmus->increase_prio_inheritance_klmirqd(klmirqd, old_owner, next_owner); } else { if(likely(!in_interrupt())) { TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n", - __FUNCTION__, klitirqd->comm, klitirqd->pid); + __FUNCTION__, klmirqd->comm, klmirqd->pid); } else { // is this a bug? TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n", - __FUNCTION__, klitirqd->comm, klitirqd->pid); + __FUNCTION__, klmirqd->comm, klmirqd->pid); } BUG_ON(pending != 0); - litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL); + litmus->decrease_prio_inheritance_klmirqd(klmirqd, old_owner, NULL); } } //__dump_state(which, "__reeval_prio: after"); } -static void reeval_prio(struct klitirqd_info* which) +static void reeval_prio(struct klmirqd_info* which) { unsigned long flags; @@ -412,25 +412,25 @@ static void reeval_prio(struct klitirqd_info* which) } -static void wakeup_litirqd_locked(struct klitirqd_info* which) +static void wakeup_litirqd_locked(struct klmirqd_info* which) { /* Interrupts are disabled: no need to stop preemption */ - if (which && which->klitirqd) + if (which && which->klmirqd) { __reeval_prio(which); /* configure the proper priority */ - if(which->klitirqd->state != TASK_RUNNING) - { - TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__, - which->klitirqd->comm, which->klitirqd->pid); + if(which->klmirqd->state != TASK_RUNNING) + { + TRACE("%s: Waking up klmirqd: %s/%d\n", __FUNCTION__, + which->klmirqd->comm, which->klmirqd->pid); - wake_up_process(which->klitirqd); + wake_up_process(which->klmirqd); } } } -static void do_lit_tasklet(struct klitirqd_info* which, +static void do_lit_tasklet(struct klmirqd_info* which, struct tasklet_head* pending_tasklets) { unsigned long flags; @@ -503,7 +503,7 @@ static void do_lit_tasklet(struct klitirqd_info* which, // returns 1 if priorities need to be changed to continue processing // pending tasklets. -static int do_litirq(struct klitirqd_info* which) +static int do_litirq(struct klmirqd_info* which) { u32 pending; int resched = 0; @@ -514,17 +514,17 @@ static int do_litirq(struct klitirqd_info* which) return(0); } - if(which->klitirqd != current) + if(which->klmirqd != current) { TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n", __FUNCTION__, current->comm, current->pid, - which->klitirqd->comm, which->klitirqd->pid); + which->klmirqd->comm, which->klmirqd->pid); return(0); } if(!is_realtime(current)) { - TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n", + TRACE_CUR("%s: exiting early: klmirqd is not real-time. Sched Policy = %d\n", __FUNCTION__, current->policy); return(0); } @@ -567,7 +567,7 @@ static int do_litirq(struct klitirqd_info* which) } -static void do_work(struct klitirqd_info* which) +static void do_work(struct klmirqd_info* which) { unsigned long flags; work_func_t f; @@ -646,9 +646,9 @@ static int set_litmus_daemon_sched(void) /* set task params, mark as proxy thread, and init other data */ tsk_rt(current)->task_params = tp; tsk_rt(current)->is_proxy_thread = 1; - tsk_rt(current)->cur_klitirqd = NULL; - mutex_init(&tsk_rt(current)->klitirqd_sem); - atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD); + tsk_rt(current)->cur_klmirqd = NULL; + mutex_init(&tsk_rt(current)->klmirqd_sem); + atomic_set(&tsk_rt(current)->klmirqd_sem_stat, NOT_HELD); /* inform the OS we're SCHED_LITMUS -- sched_setscheduler_nocheck() calls litmus_admit_task(). */ @@ -657,7 +657,7 @@ static int set_litmus_daemon_sched(void) return ret; } -static void enter_execution_phase(struct klitirqd_info* which, +static void enter_execution_phase(struct klmirqd_info* which, struct mutex* sem, struct task_struct* t) { @@ -670,14 +670,14 @@ static void enter_execution_phase(struct klitirqd_info* which, t->comm, t->pid); } -static void exit_execution_phase(struct klitirqd_info* which, +static void exit_execution_phase(struct klmirqd_info* which, struct mutex* sem, struct task_struct* t) { TRACE_CUR("%s: Exiting execution phase. " "Releasing semaphore of %s/%d\n", __FUNCTION__, t->comm, t->pid); - if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD) + if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) == HELD) { up_and_set_stat(current, NOT_HELD, sem); TRACE_CUR("%s: Execution phase exited! " @@ -691,9 +691,9 @@ static void exit_execution_phase(struct klitirqd_info* which, } /* main loop for klitsoftirqd */ -static int run_klitirqd(void* unused) +static int run_klmirqd(void* unused) { - struct klitirqd_info* which = &klitirqds[klitirqd_id(current)]; + struct klmirqd_info* which = &klmirqds[klmirqd_id(current)]; struct mutex* sem; struct task_struct* owner; @@ -705,7 +705,7 @@ static int run_klitirqd(void* unused) goto rt_failed; } - atomic_inc(&num_ready_klitirqds); + atomic_inc(&num_ready_klmirqds); set_current_state(TASK_INTERRUPTIBLE); @@ -793,7 +793,7 @@ static int run_klitirqd(void* unused) } __set_current_state(TASK_RUNNING); - atomic_dec(&num_ready_klitirqds); + atomic_dec(&num_ready_klmirqds); rt_failed: litmus_exit_task(current); @@ -802,57 +802,57 @@ rt_failed: } -struct klitirqd_launch_data +struct klmirqd_launch_data { int* cpu_affinity; struct work_struct work; }; /* executed by a kworker from workqueues */ -static void launch_klitirqd(struct work_struct *work) +static void launch_klmirqd(struct work_struct *work) { int i; - struct klitirqd_launch_data* launch_data = - container_of(work, struct klitirqd_launch_data, work); + struct klmirqd_launch_data* launch_data = + container_of(work, struct klmirqd_launch_data, work); - TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + TRACE("%s: Creating %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); /* create the daemon threads */ for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) { if(launch_data->cpu_affinity) { - klitirqds[i].klitirqd = + klmirqds[i].klmirqd = kthread_create( - run_klitirqd, + run_klmirqd, /* treat the affinity as a pointer, we'll cast it back later */ (void*)(long long)launch_data->cpu_affinity[i], - "klitirqd_th%d/%d", + "klmirqd_th%d/%d", i, launch_data->cpu_affinity[i]); /* litmus will put is in the right cluster. */ - kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]); + kthread_bind(klmirqds[i].klmirqd, launch_data->cpu_affinity[i]); } else { - klitirqds[i].klitirqd = + klmirqds[i].klmirqd = kthread_create( - run_klitirqd, + run_klmirqd, /* treat the affinity as a pointer, we'll cast it back later */ (void*)(long long)(-1), - "klitirqd_th%d", + "klmirqd_th%d", i); } } - TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + TRACE("%s: Launching %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); /* unleash the daemons */ for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) { - wake_up_process(klitirqds[i].klitirqd); + wake_up_process(klmirqds[i].klmirqd); } if(launch_data->cpu_affinity) @@ -861,36 +861,36 @@ static void launch_klitirqd(struct work_struct *work) } -void spawn_klitirqd(int* affinity) +void spawn_klmirqd(int* affinity) { int i; - struct klitirqd_launch_data* delayed_launch; + struct klmirqd_launch_data* delayed_launch; - if(atomic_read(&num_ready_klitirqds) != 0) + if(atomic_read(&num_ready_klmirqds) != 0) { - TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n"); + TRACE("%s: At least one klmirqd is already running! Need to call kill_klmirqd()?\n"); return; } /* init the tasklet & work queues */ for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) { - klitirqds[i].terminating = 0; - klitirqds[i].pending = 0; + klmirqds[i].terminating = 0; + klmirqds[i].pending = 0; - klitirqds[i].num_hi_pending.counter = 0; - klitirqds[i].num_low_pending.counter = 0; - klitirqds[i].num_work_pending.counter = 0; + klmirqds[i].num_hi_pending.counter = 0; + klmirqds[i].num_low_pending.counter = 0; + klmirqds[i].num_work_pending.counter = 0; - klitirqds[i].pending_tasklets_hi.head = NULL; - klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head; + klmirqds[i].pending_tasklets_hi.head = NULL; + klmirqds[i].pending_tasklets_hi.tail = &klmirqds[i].pending_tasklets_hi.head; - klitirqds[i].pending_tasklets.head = NULL; - klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head; + klmirqds[i].pending_tasklets.head = NULL; + klmirqds[i].pending_tasklets.tail = &klmirqds[i].pending_tasklets.head; - INIT_LIST_HEAD(&klitirqds[i].worklist); + INIT_LIST_HEAD(&klmirqds[i].worklist); - raw_spin_lock_init(&klitirqds[i].lock); + raw_spin_lock_init(&klmirqds[i].lock); } /* wait to flush the initializations to memory since other threads @@ -899,8 +899,8 @@ void spawn_klitirqd(int* affinity) /* tell a work queue to launch the threads. we can't make scheduling calls since we're in an atomic state. */ - TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__); - delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC); + TRACE("%s: Setting callback up to launch klmirqds\n", __FUNCTION__); + delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC); if(affinity) { delayed_launch->cpu_affinity = @@ -913,57 +913,57 @@ void spawn_klitirqd(int* affinity) { delayed_launch->cpu_affinity = NULL; } - INIT_WORK(&delayed_launch->work, launch_klitirqd); + INIT_WORK(&delayed_launch->work, launch_klmirqd); schedule_work(&delayed_launch->work); } -void kill_klitirqd(void) +void kill_klmirqd(void) { - if(!klitirqd_is_dead()) + if(!klmirqd_is_dead()) { int i; - TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + TRACE("%s: Killing %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) { - if(klitirqds[i].terminating != 1) + if(klmirqds[i].terminating != 1) { - klitirqds[i].terminating = 1; + klmirqds[i].terminating = 1; mb(); /* just to be sure? */ - flush_pending(klitirqds[i].klitirqd, NULL); + flush_pending(klmirqds[i].klmirqd, NULL); /* signal termination */ - kthread_stop(klitirqds[i].klitirqd); + kthread_stop(klmirqds[i].klmirqd); } } } } -int klitirqd_is_ready(void) +int klmirqd_is_ready(void) { - return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD); + return(atomic_read(&num_ready_klmirqds) == NR_LITMUS_SOFTIRQD); } -int klitirqd_is_dead(void) +int klmirqd_is_dead(void) { - return(atomic_read(&num_ready_klitirqds) == 0); + return(atomic_read(&num_ready_klmirqds) == 0); } -struct task_struct* get_klitirqd(unsigned int k_id) +struct task_struct* get_klmirqd(unsigned int k_id) { - return(klitirqds[k_id].klitirqd); + return(klmirqds[k_id].klmirqd); } -void flush_pending(struct task_struct* klitirqd_thread, +void flush_pending(struct task_struct* klmirqd_thread, struct task_struct* owner) { - unsigned int k_id = klitirqd_id(klitirqd_thread); - struct klitirqd_info *which = &klitirqds[k_id]; + unsigned int k_id = klmirqd_id(klmirqd_thread); + struct klmirqd_info *which = &klmirqds[k_id]; unsigned long flags; struct tasklet_struct *list; @@ -1129,7 +1129,7 @@ void flush_pending(struct task_struct* klitirqd_thread, static void ___litmus_tasklet_schedule(struct tasklet_struct *t, - struct klitirqd_info *which, + struct klmirqd_info *which, int wakeup) { unsigned long flags; @@ -1153,7 +1153,7 @@ static void ___litmus_tasklet_schedule(struct tasklet_struct *t, if(!old_pending && wakeup) { - wakeup_litirqd_locked(which); /* wake up the klitirqd */ + wakeup_litirqd_locked(which); /* wake up the klmirqd */ } //__dump_state(which, "___litmus_tasklet_schedule: after queuing"); @@ -1172,11 +1172,11 @@ int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) { - TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id); BUG(); } - if(likely(!klitirqds[k_id].terminating)) + if(likely(!klmirqds[k_id].terminating)) { /* Can't accept tasklets while we're processing a workqueue because they're handled by the same thread. This case is @@ -1184,10 +1184,10 @@ int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) TODO: Use a separate thread for work objects!!!!!! */ - if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0)) { ret = 1; - ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1); + ___litmus_tasklet_schedule(t, &klmirqds[k_id], 1); } else { @@ -1202,7 +1202,7 @@ EXPORT_SYMBOL(__litmus_tasklet_schedule); static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, - struct klitirqd_info *which, + struct klmirqd_info *which, int wakeup) { unsigned long flags; @@ -1224,7 +1224,7 @@ static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, if(!old_pending && wakeup) { - wakeup_litirqd_locked(which); /* wake up the klitirqd */ + wakeup_litirqd_locked(which); /* wake up the klmirqd */ } raw_spin_unlock_irqrestore(&which->lock, flags); @@ -1241,22 +1241,22 @@ int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) { - TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id); BUG(); } - if(unlikely(!klitirqd_is_ready())) + if(unlikely(!klmirqd_is_ready())) { - TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id); BUG(); } - if(likely(!klitirqds[k_id].terminating)) + if(likely(!klmirqds[k_id].terminating)) { - if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0)) { ret = 1; - ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1); + ___litmus_tasklet_hi_schedule(t, &klmirqds[k_id], 1); } else { @@ -1285,36 +1285,36 @@ int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_ if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) { - TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id); + TRACE("%s: No klmirqd_th%u!\n", __FUNCTION__, k_id); BUG(); } - if(unlikely(!klitirqd_is_ready())) + if(unlikely(!klmirqd_is_ready())) { - TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id); BUG(); } - if(likely(!klitirqds[k_id].terminating)) + if(likely(!klmirqds[k_id].terminating)) { - raw_spin_lock(&klitirqds[k_id].lock); + raw_spin_lock(&klmirqds[k_id].lock); - if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0)) { ret = 1; // success! - t->next = klitirqds[k_id].pending_tasklets_hi.head; - klitirqds[k_id].pending_tasklets_hi.head = t; + t->next = klmirqds[k_id].pending_tasklets_hi.head; + klmirqds[k_id].pending_tasklets_hi.head = t; - old_pending = klitirqds[k_id].pending; - klitirqds[k_id].pending |= LIT_TASKLET_HI; + old_pending = klmirqds[k_id].pending; + klmirqds[k_id].pending |= LIT_TASKLET_HI; - atomic_inc(&klitirqds[k_id].num_hi_pending); + atomic_inc(&klmirqds[k_id].num_hi_pending); mb(); if(!old_pending) - wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */ + wakeup_litirqd_locked(&klmirqds[k_id]); /* wake up the klmirqd */ } else { @@ -1322,7 +1322,7 @@ int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_ __FUNCTION__); } - raw_spin_unlock(&klitirqds[k_id].lock); + raw_spin_unlock(&klmirqds[k_id].lock); } return(ret); } @@ -1332,7 +1332,7 @@ EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); static void ___litmus_schedule_work(struct work_struct *w, - struct klitirqd_info *which, + struct klmirqd_info *which, int wakeup) { unsigned long flags; @@ -1352,7 +1352,7 @@ static void ___litmus_schedule_work(struct work_struct *w, if(!old_pending && wakeup) { - wakeup_litirqd_locked(which); /* wakeup the klitirqd */ + wakeup_litirqd_locked(which); /* wakeup the klmirqd */ } raw_spin_unlock_irqrestore(&which->lock, flags); @@ -1369,18 +1369,18 @@ int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) { - TRACE("%s: No klitirqd_th%u!\n", k_id); + TRACE("%s: No klmirqd_th%u!\n", k_id); BUG(); } - if(unlikely(!klitirqd_is_ready())) + if(unlikely(!klmirqd_is_ready())) { - TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id); BUG(); } - if(likely(!klitirqds[k_id].terminating)) - ___litmus_schedule_work(w, &klitirqds[k_id], 1); + if(likely(!klmirqds[k_id].terminating)) + ___litmus_schedule_work(w, &klmirqds[k_id], 1); else ret = 0; return(ret); @@ -1388,34 +1388,34 @@ int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) EXPORT_SYMBOL(__litmus_schedule_work); -static int set_klitirqd_sem_status(unsigned long stat) +static int set_klmirqd_sem_status(unsigned long stat) { TRACE_CUR("SETTING STATUS FROM %d TO %d\n", - atomic_read(&tsk_rt(current)->klitirqd_sem_stat), + atomic_read(&tsk_rt(current)->klmirqd_sem_stat), stat); - atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat); + atomic_set(&tsk_rt(current)->klmirqd_sem_stat, stat); //mb(); return(0); } -static int set_klitirqd_sem_status_if_not_held(unsigned long stat) +static int set_klmirqd_sem_status_if_not_held(unsigned long stat) { - if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD) + if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) != HELD) { - return(set_klitirqd_sem_status(stat)); + return(set_klmirqd_sem_status(stat)); } return(-1); } void __down_and_reset_and_set_stat(struct task_struct* t, - enum klitirqd_sem_status to_reset, - enum klitirqd_sem_status to_set, + enum klmirqd_sem_status to_reset, + enum klmirqd_sem_status to_set, struct mutex* sem) { #if 0 - struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem); struct task_struct* task = container_of(param, struct task_struct, rt_param); TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", @@ -1423,8 +1423,8 @@ void __down_and_reset_and_set_stat(struct task_struct* t, #endif mutex_lock_sfx(sem, - set_klitirqd_sem_status_if_not_held, to_reset, - set_klitirqd_sem_status, to_set); + set_klmirqd_sem_status_if_not_held, to_reset, + set_klmirqd_sem_status, to_set); #if 0 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", __FUNCTION__, task->comm, task->pid); @@ -1432,11 +1432,11 @@ void __down_and_reset_and_set_stat(struct task_struct* t, } void down_and_set_stat(struct task_struct* t, - enum klitirqd_sem_status to_set, + enum klmirqd_sem_status to_set, struct mutex* sem) { #if 0 - struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem); struct task_struct* task = container_of(param, struct task_struct, rt_param); TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", @@ -1445,7 +1445,7 @@ void down_and_set_stat(struct task_struct* t, mutex_lock_sfx(sem, NULL, 0, - set_klitirqd_sem_status, to_set); + set_klmirqd_sem_status, to_set); #if 0 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", @@ -1455,11 +1455,11 @@ void down_and_set_stat(struct task_struct* t, void up_and_set_stat(struct task_struct* t, - enum klitirqd_sem_status to_set, + enum klmirqd_sem_status to_set, struct mutex* sem) { #if 0 - struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem); struct task_struct* task = container_of(param, struct task_struct, rt_param); TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n", @@ -1468,7 +1468,7 @@ void up_and_set_stat(struct task_struct* t, #endif mutex_unlock_sfx(sem, NULL, 0, - set_klitirqd_sem_status, to_set); + set_klmirqd_sem_status, to_set); #if 0 TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n", @@ -1479,33 +1479,33 @@ void up_and_set_stat(struct task_struct* t, -void release_klitirqd_lock(struct task_struct* t) +void release_klmirqd_lock(struct task_struct* t) { - if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD)) + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == HELD)) { struct mutex* sem; struct task_struct* owner = t; if(t->state == TASK_RUNNING) { - TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n"); + TRACE_TASK(t, "NOT giving up klmirqd_sem because we're not blocked!\n"); return; } if(likely(!tsk_rt(t)->is_proxy_thread)) { - sem = &tsk_rt(t)->klitirqd_sem; + sem = &tsk_rt(t)->klmirqd_sem; } else { - unsigned int k_id = klitirqd_id(t); - owner = klitirqds[k_id].current_owner; + unsigned int k_id = klmirqd_id(t); + owner = klmirqds[k_id].current_owner; - BUG_ON(t != klitirqds[k_id].klitirqd); + BUG_ON(t != klmirqds[k_id].klmirqd); if(likely(owner)) { - sem = &tsk_rt(owner)->klitirqd_sem; + sem = &tsk_rt(owner)->klmirqd_sem; } else { @@ -1514,7 +1514,7 @@ void release_klitirqd_lock(struct task_struct* t) // We had the rug pulled out from under us. Abort attempt // to reacquire the lock since our client no longer needs us. TRACE_CUR("HUH?! How did this happen?\n"); - atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD); return; } } @@ -1526,42 +1526,42 @@ void release_klitirqd_lock(struct task_struct* t) /* else if(is_realtime(t)) { - TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat); } */ } -int reacquire_klitirqd_lock(struct task_struct* t) +int reacquire_klmirqd_lock(struct task_struct* t) { int ret = 0; - if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE)) + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == NEED_TO_REACQUIRE)) { struct mutex* sem; struct task_struct* owner = t; if(likely(!tsk_rt(t)->is_proxy_thread)) { - sem = &tsk_rt(t)->klitirqd_sem; + sem = &tsk_rt(t)->klmirqd_sem; } else { - unsigned int k_id = klitirqd_id(t); - //struct task_struct* owner = klitirqds[k_id].current_owner; - owner = klitirqds[k_id].current_owner; + unsigned int k_id = klmirqd_id(t); + //struct task_struct* owner = klmirqds[k_id].current_owner; + owner = klmirqds[k_id].current_owner; - BUG_ON(t != klitirqds[k_id].klitirqd); + BUG_ON(t != klmirqds[k_id].klmirqd); if(likely(owner)) { - sem = &tsk_rt(owner)->klitirqd_sem; + sem = &tsk_rt(owner)->klmirqd_sem; } else { // We had the rug pulled out from under us. Abort attempt // to reacquire the lock since our client no longer needs us. - TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n"); - atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + TRACE_CUR("No longer needs to reacquire klmirqd_sem!\n"); + atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD); return(0); } } @@ -1573,7 +1573,7 @@ int reacquire_klitirqd_lock(struct task_struct* t) /* else if(is_realtime(t)) { - TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat); } */ diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index d04c6efa5f05..22586cde8255 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -506,7 +506,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t) } #ifdef CONFIG_LITMUS_SOFTIRQD - down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem); + down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem); #endif ++(reg->nr_owners); @@ -535,7 +535,7 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; #ifdef CONFIG_LITMUS_SOFTIRQD - struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); + struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id); #endif if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { @@ -549,7 +549,7 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { if(reg->owners[i] == t) { #ifdef CONFIG_LITMUS_SOFTIRQD - flush_pending(klitirqd_th, t); + flush_pending(klmirqd_th, t); #endif if(reg->max_prio_owner == t) { reg->max_prio_owner = find_hp_owner(reg, t); @@ -559,7 +559,7 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) } #ifdef CONFIG_LITMUS_SOFTIRQD - up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem); + up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem); #endif reg->owners[i] = NULL; diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 6746d4d6033e..44c8336c5061 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -1143,7 +1143,7 @@ static int __increase_priority_inheritance(struct task_struct* t, #ifdef CONFIG_LITMUS_NESTED_LOCKING /* this sanity check allows for weaker locking in protocols */ - /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */ + /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */ if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { #endif TRACE_TASK(t, "inherits priority from %s/%d\n", @@ -1238,12 +1238,12 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str __increase_priority_inheritance(t, prio_inh); #ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klitirqd != NULL) + if(tsk_rt(t)->cur_klmirqd != NULL) { TRACE_TASK(t, "%s/%d inherits a new priority!\n", - tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + __increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); } #endif @@ -1347,12 +1347,12 @@ static void decrease_priority_inheritance(struct task_struct* t, __decrease_priority_inheritance(t, prio_inh); #ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klitirqd != NULL) + if(tsk_rt(t)->cur_klmirqd != NULL) { TRACE_TASK(t, "%s/%d decreases in priority!\n", - tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + __decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); } #endif @@ -1376,13 +1376,13 @@ static void decrease_priority_inheritance(struct task_struct* t, #ifdef CONFIG_LITMUS_SOFTIRQD /* called with IRQs off */ -static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, +static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd, struct task_struct* old_owner, struct task_struct* new_owner) { - cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + cedf_domain_t* cluster = task_cpu_cluster(klmirqd); - BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); raw_spin_lock(&cluster->cluster_lock); @@ -1391,18 +1391,18 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, if(old_owner) { // unreachable? - tsk_rt(old_owner)->cur_klitirqd = NULL; + tsk_rt(old_owner)->cur_klmirqd = NULL; } - TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n", new_owner->comm, new_owner->pid); - tsk_rt(new_owner)->cur_klitirqd = klitirqd; + tsk_rt(new_owner)->cur_klmirqd = klmirqd; } - __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio. + __decrease_priority_inheritance(klmirqd, NULL); // kludge to clear out cur prio. - __increase_priority_inheritance(klitirqd, + __increase_priority_inheritance(klmirqd, (tsk_rt(new_owner)->inh_task == NULL) ? new_owner : tsk_rt(new_owner)->inh_task); @@ -1412,21 +1412,21 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, /* called with IRQs off */ -static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd, +static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd, struct task_struct* old_owner, struct task_struct* new_owner) { - cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + cedf_domain_t* cluster = task_cpu_cluster(klmirqd); - BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); raw_spin_lock(&cluster->cluster_lock); - TRACE_TASK(klitirqd, "priority restored\n"); + TRACE_TASK(klmirqd, "priority restored\n"); - __decrease_priority_inheritance(klitirqd, new_owner); + __decrease_priority_inheritance(klmirqd, new_owner); - tsk_rt(old_owner)->cur_klitirqd = NULL; + tsk_rt(old_owner)->cur_klmirqd = NULL; raw_spin_unlock(&cluster->cluster_lock); } @@ -1859,7 +1859,7 @@ static long cedf_activate_plugin(void) } } - spawn_klitirqd(affinity); + spawn_klmirqd(affinity); kfree(affinity); } @@ -1907,8 +1907,8 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { .allocate_aff_obs = cedf_allocate_affinity_observer, #endif #ifdef CONFIG_LITMUS_SOFTIRQD - .increase_prio_klitirqd = increase_priority_inheritance_klitirqd, - .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd, + .increase_prio_klmirqd = increase_priority_inheritance_klmirqd, + .decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd, #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet, diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 04b189e54b03..d52be9325044 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -1154,7 +1154,7 @@ static int __increase_priority_inheritance(struct task_struct* t, #ifdef CONFIG_LITMUS_NESTED_LOCKING /* this sanity check allows for weaker locking in protocols */ - /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */ + /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */ if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { #endif TRACE_TASK(t, "inherits priority from %s/%d\n", @@ -1248,12 +1248,12 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str success = __increase_priority_inheritance(t, prio_inh); #ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klitirqd != NULL) + if(tsk_rt(t)->cur_klmirqd != NULL) { TRACE_TASK(t, "%s/%d inherits a new priority!\n", - tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + __increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); } #endif @@ -1358,12 +1358,12 @@ static void decrease_priority_inheritance(struct task_struct* t, success = __decrease_priority_inheritance(t, prio_inh); #ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klitirqd != NULL) + if(tsk_rt(t)->cur_klmirqd != NULL) { TRACE_TASK(t, "%s/%d decreases in priority!\n", - tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + __decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); } #endif @@ -1384,11 +1384,11 @@ static void decrease_priority_inheritance(struct task_struct* t, #ifdef CONFIG_LITMUS_SOFTIRQD /* called with IRQs off */ -static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, +static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd, struct task_struct* old_owner, struct task_struct* new_owner) { - BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); raw_spin_lock(&gsnedf_lock); @@ -1397,18 +1397,18 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, if(old_owner) { // unreachable? - tsk_rt(old_owner)->cur_klitirqd = NULL; + tsk_rt(old_owner)->cur_klmirqd = NULL; } - TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n", new_owner->comm, new_owner->pid); - tsk_rt(new_owner)->cur_klitirqd = klitirqd; + tsk_rt(new_owner)->cur_klmirqd = klmirqd; } - __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio. + __decrease_priority_inheritance(klmirqd, NULL); // kludge to clear out cur prio. - __increase_priority_inheritance(klitirqd, + __increase_priority_inheritance(klmirqd, (tsk_rt(new_owner)->inh_task == NULL) ? new_owner : tsk_rt(new_owner)->inh_task); @@ -1418,19 +1418,19 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd, /* called with IRQs off */ -static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd, +static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd, struct task_struct* old_owner, struct task_struct* new_owner) { - BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); raw_spin_lock(&gsnedf_lock); - TRACE_TASK(klitirqd, "priority restored\n"); + TRACE_TASK(klmirqd, "priority restored\n"); - __decrease_priority_inheritance(klitirqd, new_owner); + __decrease_priority_inheritance(klmirqd, new_owner); - tsk_rt(old_owner)->cur_klitirqd = NULL; + tsk_rt(old_owner)->cur_klmirqd = NULL; raw_spin_unlock(&gsnedf_lock); } @@ -1923,7 +1923,7 @@ static long gsnedf_activate_plugin(void) #endif #ifdef CONFIG_LITMUS_SOFTIRQD - spawn_klitirqd(NULL); + spawn_klmirqd(NULL); #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -1966,8 +1966,8 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { .allocate_aff_obs = gsnedf_allocate_affinity_observer, #endif #ifdef CONFIG_LITMUS_SOFTIRQD - .increase_prio_klitirqd = increase_priority_inheritance_klitirqd, - .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd, + .increase_prio_klmirqd = increase_priority_inheritance_klmirqd, + .decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd, #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet, diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index f9423861eb1f..cda67e0f6bc8 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -168,13 +168,13 @@ static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struc #endif #ifdef CONFIG_LITMUS_SOFTIRQD -static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd, +static void litmus_dummy_increase_prio_klmirqd(struct task_struct* klmirqd, struct task_struct* old_owner, struct task_struct* new_owner) { } -static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd, +static void litmus_dummy_decrease_prio_klmirqd(struct task_struct* klmirqd, struct task_struct* old_owner) { } @@ -264,8 +264,8 @@ struct sched_plugin linux_sched_plugin = { .__compare = litmus_dummy___compare, #endif #ifdef CONFIG_LITMUS_SOFTIRQD - .increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd, - .decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd, + .increase_prio_klmirqd = litmus_dummy_increase_prio_klmirqd, + .decrease_prio_klmirqd = litmus_dummy_decrease_prio_klmirqd, #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet, @@ -328,8 +328,8 @@ int register_sched_plugin(struct sched_plugin* plugin) CHECK(__compare); #endif #ifdef CONFIG_LITMUS_SOFTIRQD - CHECK(increase_prio_klitirqd); - CHECK(decrease_prio_klitirqd); + CHECK(increase_prio_klmirqd); + CHECK(decrease_prio_klmirqd); #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD CHECK(enqueue_pai_tasklet); -- cgit v1.2.2 From c8483ef0959672310bf4ebb72e1a308b00543f74 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Tue, 11 Dec 2012 22:01:01 -0500 Subject: make klmirqd work like aux tasks. checkpoint. this code is untested! --- include/linux/interrupt.h | 6 + include/litmus/aux_tasks.h | 10 - include/litmus/litmus_softirq.h | 164 ++--- include/litmus/nvidia_info.h | 35 +- include/litmus/rt_param.h | 70 +- include/litmus/sched_plugin.h | 11 - include/litmus/signal.h | 2 +- include/litmus/unistd_32.h | 5 +- include/litmus/unistd_64.h | 6 +- kernel/sched.c | 8 - kernel/softirq.c | 14 + litmus/Kconfig | 20 +- litmus/aux_tasks.c | 40 +- litmus/edf_common.c | 60 +- litmus/ikglp_lock.c | 12 +- litmus/jobs.c | 2 +- litmus/kfmlp_lock.c | 11 +- litmus/litmus.c | 48 +- litmus/litmus_softirq.c | 1460 ++++++++++++++------------------------- litmus/locking.c | 56 +- litmus/nvidia_info.c | 743 +++++++++++++++----- litmus/sched_cedf.c | 165 +---- litmus/sched_gsn_edf.c | 136 ++-- litmus/sched_plugin.c | 20 - 24 files changed, 1458 insertions(+), 1646 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 57a7bc8807be..9fc31289a1bb 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -507,6 +507,12 @@ struct tasklet_struct #endif }; +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; + #define DECLARE_TASKLET(name, func, data) \ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data } diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h index 87745c1c0df0..255bbafcc6b7 100644 --- a/include/litmus/aux_tasks.h +++ b/include/litmus/aux_tasks.h @@ -3,9 +3,6 @@ struct task_struct; -/* admit an aux task with default parameters */ -//int admit_aux_task(struct task_struct *t); - int make_aux_task_if_required(struct task_struct *t); /* call on an aux task when it exits real-time */ @@ -17,13 +14,6 @@ long enable_aux_task_owner(struct task_struct *t); /* call when an aux_owner exits real-time */ long disable_aux_task_owner(struct task_struct *t); - -/* collectivelly make all aux tasks in the process of leader inherit from hp */ -//int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp); - -/* collectivelly make all aux tasks in the process of leader inherit from hp */ -//int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp); - /* call when an aux_owner increases its priority */ int aux_task_owner_increase_priority(struct task_struct *t); diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h index 46fe89148505..52e3f7e74ab1 100644 --- a/include/litmus/litmus_softirq.h +++ b/include/litmus/litmus_softirq.h @@ -5,129 +5,113 @@ #include /* - Threaded tasklet handling for Litmus. Tasklets - are scheduled with the priority of the tasklet's - owner---that is, the RT task on behalf the tasklet - runs. - - Tasklets are current scheduled in FIFO order with - NO priority inheritance for "blocked" tasklets. + Threaded tasklet/workqueue handling for Litmus. + Items are scheduled in the following order: hi-tasklet, + lo-tasklet, workqueue. Items are scheduled in FIFO order + within each of these classes. klmirqd assumes the priority of the owner of the tasklet when the tasklet is next to execute. - Currently, hi-tasklets are scheduled before - low-tasklets, regardless of priority of low-tasklets. - And likewise, low-tasklets are scheduled before work - queue objects. This priority inversion probably needs - to be fixed, though it is not an issue if our work with - GPUs as GPUs are owned (and associated klmirqds) for - exclusive time periods, thus no inversions can - occur. + The base-priority of a klimirqd thread is below all regular + real-time tasks, but above all other Linux scheduling + classes (klmirqd threads are within the SHCED_LITMUS class). + Regular real-time tasks may increase the priority of + a klmirqd thread, but klmirqd is unaware of this + (this was not the case in prior incarnations of klmirqd). */ - -#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD - -/* Spawns NR_LITMUS_SOFTIRQD klmirqd daemons. - Actual launch of threads is deffered to kworker's - workqueue, so daemons will likely not be immediately - running when this function returns, though the required - data will be initialized. - - @affinity_set: an array expressing the processor affinity - for each of the NR_LITMUS_SOFTIRQD daemons. May be set - to NULL for global scheduling. - - - Examples - - 8-CPU system with two CPU clusters: - affinity[] = {0, 0, 0, 0, 3, 3, 3, 3} - NOTE: Daemons not actually bound to specified CPU, but rather - cluster in which the CPU resides. - - 8-CPU system, partitioned: - affinity[] = {0, 1, 2, 3, 4, 5, 6, 7} - - FIXME: change array to a CPU topology or array of cpumasks - - */ -void spawn_klmirqd(int* affinity); - +/* Initialize klmirqd */ +void init_klmirqd(void); /* Raises a flag to tell klmirqds to terminate. - Termination is async, so some threads may be running - after function return. */ + Termination is async, so some threads may be running + after function return. */ void kill_klmirqd(void); +void kill_klmirqd_thread(struct task_struct* klmirqd_thread); /* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready - to handle tasklets. 0, otherwise.*/ + to handle tasklets. 0, otherwise.*/ int klmirqd_is_ready(void); /* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready - to handle tasklets. 0, otherwise.*/ + to handle tasklets. 0, otherwise.*/ int klmirqd_is_dead(void); -/* Flushes all pending work out to the OS for regular - * tasklet/work processing of the specified 'owner' - * - * PRECOND: klmirqd_thread must have a clear entry - * in the GPU registry, otherwise this call will become - * a no-op as work will loop back to the klmirqd_thread. - * - * Pass NULL for owner to flush ALL pending items. + +typedef int (*klmirqd_cb_t) (void *arg); + +typedef struct +{ + klmirqd_cb_t func; + void* arg; +} klmirqd_callback_t; + +/* Launches a klmirqd thread with the provided affinity. + + Actual launch of threads is deffered to kworker's + workqueue, so daemons will likely not be immediately + running when this function returns, though the required + data will be initialized. + + cpu == -1 for no affinity */ -void flush_pending(struct task_struct* klmirqd_thread, - struct task_struct* owner); +int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb); -struct task_struct* get_klmirqd(unsigned int k_id); +/* Flushes all pending work out to the OS for regular + * tasklet/work processing. + */ +void flush_pending(struct task_struct* klmirqd_thread); extern int __litmus_tasklet_schedule( struct tasklet_struct *t, - unsigned int k_id); + struct task_struct *klmirqd_thread); /* schedule a tasklet on klmirqd #k_id */ static inline int litmus_tasklet_schedule( struct tasklet_struct *t, - unsigned int k_id) + struct task_struct *klmirqd_thread) { int ret = 0; - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - ret = __litmus_tasklet_schedule(t, k_id); + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + ret = __litmus_tasklet_schedule(t, klmirqd_thread); + } return(ret); } /* for use by __tasklet_schedule() */ static inline int _litmus_tasklet_schedule( struct tasklet_struct *t, - unsigned int k_id) + struct task_struct *klmirqd_thread) { - return(__litmus_tasklet_schedule(t, k_id)); + return(__litmus_tasklet_schedule(t, klmirqd_thread)); } extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, - unsigned int k_id); + struct task_struct *klmirqd_thread); /* schedule a hi tasklet on klmirqd #k_id */ static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t, - unsigned int k_id) + struct task_struct *klmirqd_thread) { int ret = 0; - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - ret = __litmus_tasklet_hi_schedule(t, k_id); + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + ret = __litmus_tasklet_hi_schedule(t, klmirqd_thread); + } return(ret); } /* for use by __tasklet_hi_schedule() */ static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t, - unsigned int k_id) + struct task_struct *klmirqd_thread) { - return(__litmus_tasklet_hi_schedule(t, k_id)); + return(__litmus_tasklet_hi_schedule(t, klmirqd_thread)); } @@ -136,26 +120,27 @@ static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t, extern int __litmus_tasklet_hi_schedule_first( struct tasklet_struct *t, - unsigned int k_id); + struct task_struct *klmirqd_thread); /* schedule a hi tasklet on klmirqd #k_id on next go-around */ /* PRECONDITION: Interrupts must be disabled. */ static inline int litmus_tasklet_hi_schedule_first( struct tasklet_struct *t, - unsigned int k_id) + struct task_struct *klmirqd_thread) { int ret = 0; - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - ret = __litmus_tasklet_hi_schedule_first(t, k_id); + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + ret = __litmus_tasklet_hi_schedule_first(t, klmirqd_thread); + } return(ret); } /* for use by __tasklet_hi_schedule_first() */ static inline int _litmus_tasklet_hi_schedule_first( struct tasklet_struct *t, - unsigned int k_id) + struct task_struct *klmirqd_thread) { - return(__litmus_tasklet_hi_schedule_first(t, k_id)); + return(__litmus_tasklet_hi_schedule_first(t, klmirqd_thread)); } @@ -164,36 +149,13 @@ static inline int _litmus_tasklet_hi_schedule_first( extern int __litmus_schedule_work( struct work_struct* w, - unsigned int k_id); + struct task_struct *klmirqd_thread); static inline int litmus_schedule_work( struct work_struct* w, - unsigned int k_id) + struct task_struct *klmirqd_thread) { - return(__litmus_schedule_work(w, k_id)); + return(__litmus_schedule_work(w, klmirqd_thread)); } - - -///////////// mutex operations for client threads. - -void down_and_set_stat(struct task_struct* t, - enum klmirqd_sem_status to_set, - struct mutex* sem); - -void __down_and_reset_and_set_stat(struct task_struct* t, - enum klmirqd_sem_status to_reset, - enum klmirqd_sem_status to_set, - struct mutex* sem); - -void up_and_set_stat(struct task_struct* t, - enum klmirqd_sem_status to_set, - struct mutex* sem); - - - -void release_klmirqd_lock(struct task_struct* t); - -int reacquire_klmirqd_lock(struct task_struct* t); - #endif diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h index 97c9577141db..6f354c8b00ac 100644 --- a/include/litmus/nvidia_info.h +++ b/include/litmus/nvidia_info.h @@ -6,10 +6,9 @@ #include - -//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM -#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS + +/* Functions used for decoding NVIDIA blobs. */ int init_nvidia_info(void); void shutdown_nvidia_info(void); @@ -18,29 +17,33 @@ int is_nvidia_func(void* func_addr); void dump_nvidia_info(const struct tasklet_struct *t); - // Returns the Nvidia device # associated with provided tasklet and work_struct. u32 get_tasklet_nv_device_num(const struct tasklet_struct *t); u32 get_work_nv_device_num(const struct work_struct *t); -int init_nv_device_reg(void); -//int get_nv_device_id(struct task_struct* owner); - -int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t); +/* Functions for figuring out the priority of GPU-using tasks */ struct task_struct* get_nv_max_device_owner(u32 target_device_id); -//int is_nv_device_owner(u32 target_device_id); - -void lock_nv_registry(u32 reg_device_id, unsigned long* flags); -void unlock_nv_registry(u32 reg_device_id, unsigned long* flags); -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD -void pai_check_priority_increase(struct task_struct *t, int reg_device_id); -void pai_check_priority_decrease(struct task_struct *t, int reg_device_id); +#ifdef CONFIG_LITMUS_SOFTIRQD +struct task_struct* get_nv_klmirqd_thread(u32 target_device_id); #endif -//void increment_nv_int_count(u32 device); +/* call when the GPU-holding task, t, blocks */ +long enable_gpu_owner(struct task_struct *t); + +/* call when the GPU-holding task, t, resumes */ +long disable_gpu_owner(struct task_struct *t); + +/* call when the GPU-holding task, t, increases its priority */ +int gpu_owner_increase_priority(struct task_struct *t); + +/* call when the GPU-holding task, t, decreases its priority */ +int gpu_owner_decrease_priority(struct task_struct *t); + + +int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t); #endif diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 47301c04d862..c8ee64569dbb 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -128,6 +128,10 @@ struct control_page { #include #include +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + struct _rt_domain; struct bheap_node; struct release_heap; @@ -205,6 +209,38 @@ typedef struct avg_est{ lt_t avg; } avg_est_t; + + +#ifdef CONFIG_LITMUS_SOFTIRQD +//struct tasklet_head +//{ +// struct tasklet_struct *head; +// struct tasklet_struct **tail; +//}; + +struct klmirqd_info +{ + struct task_struct* klmirqd; + struct task_struct* current_owner; + unsigned int terminating:1; + + raw_spinlock_t lock; + + u32 pending; + atomic_t num_hi_pending; + atomic_t num_low_pending; + atomic_t num_work_pending; + + /* in order of priority */ + struct tasklet_head pending_tasklets_hi; + struct tasklet_head pending_tasklets; + struct list_head worklist; + + struct list_head klmirqd_reg; +}; +#endif + + /* RT task parameters for scheduling extensions * These parameters are inherited during clone and therefore must * be explicitly set up before the task set is launched. @@ -221,34 +257,21 @@ struct rt_param { #ifdef CONFIG_LITMUS_SOFTIRQD /* proxy threads have minimum priority by default */ - unsigned int is_proxy_thread:1; - - /* pointer to klmirqd currently working on this - task_struct's behalf. only set by the task pointed - to by klmirqd. + unsigned int is_interrupt_thread:1; - ptr only valid if is_proxy_thread == 0 - */ - struct task_struct* cur_klmirqd; - - /* Used to implement mutual execution exclusion between - * job and klmirqd execution. Job must always hold - * it's klmirqd_sem to execute. klmirqd instance - * must hold the semaphore before executing on behalf - * of a job. - */ - struct mutex klmirqd_sem; - - /* status of held klmirqd_sem, even if the held klmirqd_sem is from - another task (only proxy threads do this though). + /* pointer to data used by klmirqd thread. + * + * ptr only valid if is_interrupt_thread == 1 */ - atomic_t klmirqd_sem_stat; + struct klmirqd_info* klmirqd_info; #endif #ifdef CONFIG_LITMUS_NVIDIA /* number of top-half interrupts handled on behalf of current job */ atomic_t nv_int_count; long unsigned int held_gpus; // bitmap of held GPUs. + struct binheap_node gpu_owner_node; // just one GPU for now... + unsigned int hide_from_gpu:1; #ifdef CONFIG_LITMUS_AFFINITY_LOCKING avg_est_t gpu_migration_est[MIG_LAST+1]; @@ -370,6 +393,13 @@ struct rt_param { struct control_page * ctrl_page; }; +//#ifdef CONFIG_LITMUS_SOFTIRQD +//struct klmirqd_data +//{ +// struct binheap klmirqd_users; +//}; +//#endif + #ifdef CONFIG_REALTIME_AUX_TASKS struct aux_data { diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index e8127f427d56..a13d1a2992fe 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -79,12 +79,6 @@ typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh, raw_spinlock_t *to_unlock, unsigned long irqflags); -typedef void (*increase_prio_klitirq_t)(struct task_struct* klmirqd, - struct task_struct* old_owner, - struct task_struct* new_owner); -typedef void (*decrease_prio_klmirqd_t)(struct task_struct* klmirqd, - struct task_struct* old_owner); - typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet); typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio, @@ -166,11 +160,6 @@ struct sched_plugin { #ifdef CONFIG_LITMUS_AFFINITY_LOCKING allocate_affinity_observer_t allocate_aff_obs; #endif - -#ifdef CONFIG_LITMUS_SOFTIRQD - increase_prio_klitirq_t increase_prio_klmirqd; - decrease_prio_klmirqd_t decrease_prio_klmirqd; -#endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD enqueue_pai_tasklet_t enqueue_pai_tasklet; change_prio_pai_tasklet_t change_prio_pai_tasklet; diff --git a/include/litmus/signal.h b/include/litmus/signal.h index b3d82b294984..38c3207951e0 100644 --- a/include/litmus/signal.h +++ b/include/litmus/signal.h @@ -9,7 +9,7 @@ /* Signals used by Litmus to asynchronously communicate events * to real-time tasks. - * + * * Signal values overlap with [SIGRTMIN, SIGRTMAX], so beware of * application-level conflicts when dealing with COTS user-level * code. diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index c86b743408ed..7265ffadf555 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h @@ -19,8 +19,7 @@ #define __NR_null_call __LSC(11) #define __NR_litmus_dgl_lock __LSC(12) #define __NR_litmus_dgl_unlock __LSC(13) -#define __NR_register_nv_device __LSC(14) -#define __NR_set_aux_tasks _LSC(15) +#define __NR_set_aux_tasks _LSC(14) -#define NR_litmus_syscalls 16 +#define NR_litmus_syscalls 15 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index 3825bc129dbd..51e730124dde 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h @@ -33,10 +33,8 @@ __SYSCALL(__NR_null_call, sys_null_call) __SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock) #define __NR_litmus_dgl_unlock __LSC(13) __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) -#define __NR_register_nv_device __LSC(14) -__SYSCALL(__NR_register_nv_device, sys_register_nv_device) -#define __NR_set_aux_tasks __LSC(15) +#define __NR_set_aux_tasks __LSC(14) __SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks) -#define NR_litmus_syscalls 16 +#define NR_litmus_syscalls 15 diff --git a/kernel/sched.c b/kernel/sched.c index 251c89eaf24e..840f87bce097 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4340,10 +4340,6 @@ need_resched: rcu_note_context_switch(cpu); prev = rq->curr; -#ifdef CONFIG_LITMUS_SOFTIRQD - release_klitirqd_lock(prev); -#endif - /* LITMUS^RT: quickly re-evaluate the scheduling decision * if the previous one is no longer valid after CTX. */ @@ -4444,10 +4440,6 @@ litmus_need_resched_nonpreemptible: if (need_resched()) goto need_resched; -#ifdef LITMUS_SOFTIRQD - reacquire_klitirqd_lock(prev); -#endif - #ifdef CONFIG_LITMUS_PAI_SOFTIRQD litmus->run_tasklets(prev); #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index b013046e8c36..053aec196a50 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -403,11 +403,13 @@ void open_softirq(int nr, void (*action)(struct softirq_action *)) /* * Tasklets */ +/* struct tasklet_head { struct tasklet_struct *head; struct tasklet_struct **tail; }; +*/ static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); @@ -522,6 +524,11 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) #ifdef CONFIG_LITMUS_NVIDIA if(is_nvidia_func(t->func)) { +#if 1 + // do nvidia tasklets right away and return + if(__do_nv_now(t)) + return; +#else u32 nvidia_device = get_tasklet_nv_device_num(t); // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", // __FUNCTION__, nvidia_device,litmus_clock()); @@ -564,6 +571,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } } unlock_nv_registry(nvidia_device, &flags); +#endif } #endif @@ -590,6 +598,11 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) #ifdef CONFIG_LITMUS_NVIDIA if(is_nvidia_func(t->func)) { +#if 1 + // do nvidia tasklets right away and return + if(__do_nv_now(t)) + return; +#else u32 nvidia_device = get_tasklet_nv_device_num(t); // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", // __FUNCTION__, nvidia_device,litmus_clock()); @@ -632,6 +645,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) } } unlock_nv_registry(nvidia_device, &flags); +#endif } #endif diff --git a/litmus/Kconfig b/litmus/Kconfig index f2434b87239b..9aeae659ae32 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -382,7 +382,7 @@ menu "Interrupt Handling" choice prompt "Scheduling of interrupt bottom-halves in Litmus." default LITMUS_SOFTIRQD_NONE - depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ + depends on LITMUS_LOCKING help Schedule tasklets with known priorities in Litmus. @@ -398,7 +398,7 @@ config LITMUS_SOFTIRQD specifically dispatched to these workers. (Softirqs for Litmus tasks are not magically redirected to klmirqd.) - G-EDF/RM, C-EDF/RM ONLY for now! + G-EDF, C-EDF ONLY for now! config LITMUS_PAI_SOFTIRQD @@ -409,19 +409,11 @@ config LITMUS_PAI_SOFTIRQD at the cost of non-preemptive durations of bottom half processing. - G-EDF/RM, C-EDF/RM ONLY for now! + G-EDF, C-EDF ONLY for now! endchoice -config NR_LITMUS_SOFTIRQD - int "Number of klmirqd." - depends on LITMUS_SOFTIRQD - range 1 4096 - default "1" - help - Should be <= to the number of CPUs in your system. - config LITMUS_NVIDIA bool "Litmus handling of NVIDIA interrupts." default n @@ -445,7 +437,7 @@ config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT config NV_DEVICE_NUM int "Number of NVIDIA GPUs." depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD - range 1 4096 + range 1 16 default "1" help Should be (<= to the number of CPUs) and @@ -453,11 +445,11 @@ config NV_DEVICE_NUM config NV_MAX_SIMULT_USERS int "Maximum number of threads sharing a GPU simultanously" - depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD + depends on LITMUS_NVIDIA range 1 3 default "2" help - Should be equal to the #copy_engines + #execution_engines + Should be at least equal to the #copy_engines + #execution_engines of the GPUs in your system. Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?) diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c index 20f477f6e3bc..ef26bba3be77 100644 --- a/litmus/aux_tasks.c +++ b/litmus/aux_tasks.c @@ -54,7 +54,7 @@ int exit_aux_task(struct task_struct *t) TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, t->group_leader->comm, t->group_leader->pid); tsk_rt(t)->is_aux_task = 0; - + #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE list_del(&tsk_rt(t)->aux_task_node); if (tsk_rt(t)->inh_task) { @@ -218,36 +218,36 @@ int make_aux_task_if_required(struct task_struct *t) { struct task_struct *leader; int retval = 0; - - read_lock_irq(&tasklist_lock); - + + read_lock_irq(&tasklist_lock); + leader = t->group_leader; if(!tsk_aux(leader)->initialized || !tsk_aux(leader)->aux_future) { goto out; } - + TRACE_CUR("Making %s/%d in %s/%d an aux thread.\n", t->comm, t->pid, leader->comm, leader->pid); - + INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); - + retval = admit_aux_task(t); if (retval == 0) { tsk_rt(t)->is_aux_task = 1; - -#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE + +#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); - + if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), struct task_struct, rt_param); - + TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid); - + retval = litmus->__increase_prio(t, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); - + if (retval != 0) { /* don't know how to recover from bugs with prio inheritance. better just crash. */ read_unlock_irq(&tasklist_lock); @@ -256,7 +256,7 @@ int make_aux_task_if_required(struct task_struct *t) } #endif } - + out: read_unlock_irq(&tasklist_lock); @@ -385,7 +385,7 @@ static long __do_enable_aux_tasks(int flags) if (flags & AUX_FUTURE) { tsk_aux(leader)->aux_future = 1; } - + t = leader; do { if (!tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->is_aux_task) { @@ -398,22 +398,22 @@ static long __do_enable_aux_tasks(int flags) TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n", leader->comm, leader->pid, t->comm, t->pid, tsk_rt(t)->task_params.period); - + /* inspect period to see if it is an rt task */ if (tsk_rt(t)->task_params.period == 0) { if (flags && AUX_CURRENT) { if (!tsk_rt(t)->is_aux_task) { int admit_ret; - + TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); admit_ret = admit_aux_task(t); - + if (admit_ret == 0) { /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ tsk_rt(t)->is_aux_task = 1; aux_tasks_added = 1; - + #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); #endif @@ -464,7 +464,7 @@ static long __do_disable_aux_tasks(int flags) if (flags & AUX_FUTURE) { tsk_aux(leader)->aux_future = 0; } - + if (flags & AUX_CURRENT) { t = leader; do { diff --git a/litmus/edf_common.c b/litmus/edf_common.c index c279bf12a7f5..27b728a55669 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -73,6 +73,22 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return first && !second; } + /* There is some goofy stuff in this code here. There are three subclasses + * within the SCHED_LITMUS scheduling class: + * 1) Auxiliary tasks: COTS helper threads from the application level that + * are forced to be real-time. + * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers. + * 3) Normal Litmus tasks. + * + * At their base priorities, #3 > #2 > #1. However, #1 and #2 threads might + * inherit a priority from a task of #3. + * + * The code proceeds in the following manner: + * 1) Make aux and klmirqd threads with base-priorities have low priorities. + * 2) Determine effective priorities. + * 3) Perform priority comparison. Favor #3 over #1 and #2 in case of tie. + */ + #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED) /* run aux tasks at max priority */ @@ -109,7 +125,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) return temp; } } - + if (first->rt_param.is_aux_task && second->rt_param.is_aux_task && first->rt_param.inh_task == second->rt_param.inh_task) { // inh_task is !NULL for both tasks since neither was a lo_aux task // Both aux tasks inherit from the same task, so tie-break @@ -120,6 +136,36 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) } #endif +#ifdef CONFIG_LITMUS_SOFTIRQD + { + int first_lo_klmirqd = first->rt_param.is_interrupt_thread && !first->rt_param.inh_task; + int second_lo_klmirqd = second->rt_param.is_interrupt_thread && !second->rt_param.inh_task; + + /* prioritize aux tasks without inheritance below real-time tasks */ + if (first_lo_klmirqd || second_lo_klmirqd) { + // one of these is an klmirqd thread without inheritance. + if(first_lo_klmirqd && second_lo_klmirqd) { + TRACE_CUR("klmirqd tie break!\n"); // tie-break by BASE priority of the aux tasks + goto klmirqd_tie_break; + } + else { + // make the klmirqd thread (second) lowest priority real-time task + int temp = (first_lo_klmirqd) ? !is_realtime(second) : !is_realtime(first); + TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); + return temp; + } + } + + if (first->rt_param.is_interrupt_thread && second->rt_param.is_interrupt_thread && + first->rt_param.inh_task == second->rt_param.inh_task) { // inh_task is !NULL for both tasks since neither was a lo_klmirqd task + // Both klmirqd tasks inherit from the same task, so tie-break + // by base priority of the klmirqd tasks. + TRACE_CUR("klmirqd tie break!\n"); + goto klmirqd_tie_break; + } + } +#endif + #ifdef CONFIG_LITMUS_LOCKING /* Check for EFFECTIVE priorities. Change task @@ -161,7 +207,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) #endif aux_tie_break: - +klmirqd_tie_break: + if (!is_realtime(second_task)) { return 1; } @@ -230,15 +277,13 @@ aux_tie_break: } else if (first_task->pid == second_task->pid) { #ifdef CONFIG_LITMUS_SOFTIRQD - if (first_task->rt_param.is_proxy_thread < - second_task->rt_param.is_proxy_thread) { + if (first_task->rt_param.is_interrupt_thread < second_task->rt_param.is_interrupt_thread) { return 1; } - else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) { + else if (first_task->rt_param.is_interrupt_thread == second_task->rt_param.is_interrupt_thread) { #endif #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) - /* is this dead code? */ if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) { return 1; } @@ -246,8 +291,7 @@ aux_tie_break: #endif /* Something could be wrong if you get this far. */ - if (unlikely(first->rt_param.inh_task == - second->rt_param.inh_task)) { + if (unlikely(first->rt_param.inh_task == second->rt_param.inh_task)) { /* Both tasks have the same inherited priority. * Likely in a bug-condition. */ diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index b29828344dd1..a4ae74331782 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c @@ -1960,11 +1960,11 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* return(NULL); } - if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { - TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", - NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); -// return(NULL); - } +// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { +// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", +// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); +//// return(NULL); +// } ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); if(!ikglp_aff) { @@ -2124,7 +2124,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t if(aff->q_info[i].q->count < max_fifo_len) { int want = 0; - lt_t migration = + lt_t migration = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); diff --git a/litmus/jobs.c b/litmus/jobs.c index 9fe4eb1fa168..8593a8d2f107 100644 --- a/litmus/jobs.c +++ b/litmus/jobs.c @@ -30,7 +30,7 @@ void prepare_for_next_period(struct task_struct *t) * release and deadline. Lateness may be negative. */ t->rt_param.job_params.lateness = - (long long)litmus_clock() - + (long long)litmus_clock() - (long long)t->rt_param.job_params.deadline; setup_release(t, get_release(t) + get_rt_period(t)); diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index ab472330095d..785a095275e6 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c @@ -587,11 +587,11 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* return(NULL); } - if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { - TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", - NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); -// return(NULL); - } +// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { +// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", +// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); +//// return(NULL); +// } kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); if(!kfmlp_aff) { @@ -829,6 +829,7 @@ void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* f reg_nv_device(gpu, 1, t); // register + tsk_rt(t)->suspend_gpu_tracker_on_block = 0; reset_gpu_tracker(t); start_gpu_tracker(t); diff --git a/litmus/litmus.c b/litmus/litmus.c index 3b8017397e80..fa244ba53e22 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -60,28 +60,6 @@ void bheap_node_free(struct bheap_node* hn) struct release_heap* release_heap_alloc(int gfp_flags); void release_heap_free(struct release_heap* rh); -#ifdef CONFIG_LITMUS_NVIDIA -/* - * sys_register_nv_device - * @nv_device_id: The Nvidia device id that the task want to register - * @reg_action: set to '1' to register the specified device. zero otherwise. - * Syscall for register task's designated nvidia device into NV_DEVICE_REG array - * Returns EFAULT if nv_device_id is out of range. - * 0 if success - */ -asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) -{ - /* register the device to caller (aka 'current') */ - return(reg_nv_device(nv_device_id, reg_action, current)); -} -#else -asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) -{ - return(-EINVAL); -} -#endif - - /* * sys_set_task_rt_param * @pid: Pid of the task which scheduling parameters must be changed @@ -393,22 +371,11 @@ static void reinit_litmus_state(struct task_struct* p, int restore) // WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks)); #endif -#ifdef CONFIG_LITMUS_SOFTIRQD - /* We probably should not have any tasklets executing for - * us at this time. - */ - WARN_ON(p->rt_param.cur_klmirqd); - WARN_ON(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD); - - if(p->rt_param.cur_klmirqd) - flush_pending(p->rt_param.cur_klmirqd, p); - - if(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD) - up_and_set_stat(p, NOT_HELD, &p->rt_param.klmirqd_sem); -#endif #ifdef CONFIG_LITMUS_NVIDIA WARN_ON(p->rt_param.held_gpus != 0); + + INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node); #endif /* Cleanup everything else. */ @@ -477,11 +444,9 @@ long __litmus_admit_task(struct task_struct* tsk) //INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order); // done by scheduler #endif #ifdef CONFIG_LITMUS_SOFTIRQD - /* proxy thread off by default */ - tsk_rt(tsk)is_proxy_thread = 0; - tsk_rt(tsk)cur_klmirqd = NULL; - mutex_init(&tsk_rt(tsk)->klmirqd_sem); - atomic_set(&tsk_rt(tsk)->klmirqd_sem_stat, NOT_HELD); + /* not an interrupt thread by default */ + tsk_rt(tsk)->is_interrupt_thread = 0; + tsk_rt(tsk)->klmirqd_info = NULL; #endif retval = litmus->admit_task(tsk); @@ -580,8 +545,7 @@ int switch_sched_plugin(struct sched_plugin* plugin) cpu_relax(); #ifdef CONFIG_LITMUS_SOFTIRQD - if(!klmirqd_is_dead()) - { + if (!klmirqd_is_dead()) { kill_klmirqd(); } #endif diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index 73a3053e662b..44e2d38ad982 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c @@ -18,10 +18,6 @@ /* TODO: Remove unneeded mb() and other barriers. */ - -/* counts number of daemons ready to handle litmus irqs. */ -static atomic_t num_ready_klmirqds = ATOMIC_INIT(0); - enum pending_flags { LIT_TASKLET_LOW = 0x1, @@ -29,35 +25,313 @@ enum pending_flags LIT_WORK = LIT_TASKLET_HI<<1 }; -/* only support tasklet processing for now. */ -struct tasklet_head +struct klmirqd_registration { - struct tasklet_struct *head; - struct tasklet_struct **tail; + raw_spinlock_t lock; + u32 nr_threads; + unsigned int initialized:1; + unsigned int shuttingdown:1; + struct list_head threads; }; -struct klmirqd_info +static atomic_t klmirqd_id_gen = ATOMIC_INIT(0); + +static struct klmirqd_registration klmirqd_state; + + + +void init_klmirqd(void) +{ + raw_spin_lock_init(&klmirqd_state.lock); + + klmirqd_state.nr_threads = 0; + klmirqd_state.initialized = 1; + klmirqd_state.shuttingdown = 0; + INIT_LIST_HEAD(&klmirqd_state.threads); +} + +static int __klmirqd_is_ready(void) +{ + return (klmirqd_state.initialized == 1 && klmirqd_state.shuttingdown == 0); +} + +int klmirqd_is_ready(void) +{ + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&klmirqd_state.lock, flags); + ret = __klmirqd_is_ready(); + raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); + + return ret; +} + +int klmirqd_is_dead(void) +{ + return(!klmirqd_is_ready()); +} + + +void kill_klmirqd(void) { - struct task_struct* klmirqd; - struct task_struct* current_owner; - int terminating; + if(!klmirqd_is_dead()) + { + unsigned long flags; + struct list_head *pos; + + raw_spin_lock_irqsave(&klmirqd_state.lock, flags); + + TRACE("%s: Killing all klmirqd threads! (%d of them)\n", __FUNCTION__, klmirqd_state.nr_threads); + klmirqd_state.shuttingdown = 1; - raw_spinlock_t lock; + list_for_each(pos, &klmirqd_state.threads) { + struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg); - u32 pending; - atomic_t num_hi_pending; - atomic_t num_low_pending; - atomic_t num_work_pending; + if(info->terminating != 1) + { + info->terminating = 1; + mb(); /* just to be sure? */ + flush_pending(info->klmirqd); - /* in order of priority */ - struct tasklet_head pending_tasklets_hi; - struct tasklet_head pending_tasklets; - struct list_head worklist; + /* signal termination */ + kthread_stop(info->klmirqd); + } + } + + raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); + } +} + + + +void kill_klmirqd_thread(struct task_struct* klmirqd_thread) +{ + unsigned long flags; + struct klmirqd_info* info; + + if (!tsk_rt(klmirqd_thread)->is_interrupt_thread) { + TRACE("%s/%d is not a klmirqd thread\n", klmirqd_thread->comm, klmirqd_thread->pid); + return; + } + + TRACE("%s: Killing klmirqd thread %s/%d\n", __FUNCTION__, klmirqd_thread->comm, klmirqd_thread->pid); + + raw_spin_lock_irqsave(&klmirqd_state.lock, flags); + + info = tsk_rt(klmirqd_thread)->klmirqd_info; + + if(info->terminating != 1) { + info->terminating = 1; + mb(); + + flush_pending(klmirqd_thread); + kthread_stop(klmirqd_thread); + } + + raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); +} + + + +struct klmirqd_launch_data +{ + int cpu_affinity; + klmirqd_callback_t* cb; + struct work_struct work; }; -/* one list for each klmirqd */ -static struct klmirqd_info klmirqds[NR_LITMUS_SOFTIRQD]; +static int run_klmirqd(void* callback); + + +/* executed by a kworker from workqueues */ +static void __launch_klmirqd_thread(struct work_struct *work) +{ + int id; + struct task_struct* thread = NULL; + struct klmirqd_launch_data* launch_data = + container_of(work, struct klmirqd_launch_data, work); + + TRACE("%s: Creating klmirqd thread\n", __FUNCTION__); + + id = atomic_inc_return(&klmirqd_id_gen); + + if (launch_data->cpu_affinity != -1) { + thread = kthread_create( + run_klmirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)launch_data->cb, + "klmirqd_th%d/%d", + id, + launch_data->cpu_affinity); + + /* litmus will put is in the right cluster. */ + kthread_bind(thread, launch_data->cpu_affinity); + + TRACE("%s: Launching klmirqd_th%d/%d\n", __FUNCTION__, id, launch_data->cpu_affinity); + } + else { + thread = kthread_create( + run_klmirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)launch_data->cb, + "klmirqd_th%d", + id); + + TRACE("%s: Launching klmirqd_th%d\n", __FUNCTION__, id); + } + + if (thread) { + wake_up_process(thread); + } + else { + TRACE("Could not create klmirqd/%d thread!\n", id); + } + + kfree(launch_data); +} + + +int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb) +{ + struct klmirqd_launch_data* delayed_launch; + + if (!klmirqd_is_ready()) { + TRACE("klmirqd is not ready. Check that it was initialized!\n"); + return -1; + } + + /* tell a work queue to launch the threads. we can't make scheduling + calls since we're in an atomic state. */ + delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC); + delayed_launch->cpu_affinity = cpu; + delayed_launch->cb = cb; + INIT_WORK(&delayed_launch->work, __launch_klmirqd_thread); + schedule_work(&delayed_launch->work); + + return 0; +} + + + + +#define KLMIRQD_SLICE_NR_JIFFIES 1 +#define KLMIRQD_SLICE_NS ((NSEC_PER_SEC / HZ) * KLMIRQD_SLICE_NR_JIFFIES) + +static int set_litmus_daemon_sched(struct task_struct* tsk) +{ + int ret = 0; + + struct rt_task tp = { + .period = KLMIRQD_SLICE_NS, /* dummy 1 second period */ + .relative_deadline = KLMIRQD_SLICE_NS, + .exec_cost = KLMIRQD_SLICE_NS, + .phase = 0, + .cpu = task_cpu(current), + .budget_policy = NO_ENFORCEMENT, + .budget_signal_policy = NO_SIGNALS, + .cls = RT_CLASS_BEST_EFFORT + }; + + struct sched_param param = { .sched_priority = 0}; + + TRACE_CUR("Setting %s/%d as daemon thread.\n", tsk->comm, tsk->pid); + + /* set task params */ + tsk_rt(tsk)->task_params = tp; + tsk_rt(tsk)->is_interrupt_thread = 1; + + /* inform the OS we're SCHED_LITMUS -- + sched_setscheduler_nocheck() calls litmus_admit_task(). */ + sched_setscheduler_nocheck(tsk, SCHED_LITMUS, ¶m); + + return ret; +} + +static int register_klmirqd(struct task_struct* tsk) +{ + int retval = 0; + unsigned long flags; + struct klmirqd_info *info = NULL; + + if (!tsk_rt(tsk)->is_interrupt_thread) { + TRACE("Only proxy threads already running in Litmus may become klmirqd threads!\n"); + WARN_ON(1); + retval = -1; + goto out; + } + + raw_spin_lock_irqsave(&klmirqd_state.lock, flags); + + if (!__klmirqd_is_ready()) { + TRACE("klmirqd is not ready! Did you forget to initialize it?\n"); + WARN_ON(1); + retval = -1; + goto out_unlock; + } + + /* allocate and initialize klmirqd data for the thread */ + info = kmalloc(sizeof(struct klmirqd_info), GFP_KERNEL); + if (!info) { + TRACE("Failed to allocate klmirqd_info struct!\n"); + retval = -1; /* todo: pick better code */ + goto out_unlock; + } + memset(info, 0, sizeof(struct klmirqd_info)); + info->klmirqd = tsk; + info->pending_tasklets_hi.tail = &info->pending_tasklets_hi.head; + info->pending_tasklets.tail = &info->pending_tasklets.head; + INIT_LIST_HEAD(&info->worklist); + INIT_LIST_HEAD(&info->klmirqd_reg); + raw_spin_lock_init(&info->lock); + + + /* now register with klmirqd */ + list_add_tail(&info->klmirqd_reg, &klmirqd_state.threads); + ++klmirqd_state.nr_threads; + + /* update the task struct to point to klmirqd info */ + tsk_rt(tsk)->klmirqd_info = info; + +out_unlock: + raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); + +out: + return retval; +} + +static int unregister_klmirqd(struct task_struct* tsk) +{ + int retval = 0; + unsigned long flags; + struct klmirqd_info *info = tsk_rt(tsk)->klmirqd_info; + + if (!tsk_rt(tsk)->is_interrupt_thread || !info) { + TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid); + WARN_ON(1); + retval = -1; + goto out; + } + + raw_spin_lock_irqsave(&klmirqd_state.lock, flags); + + /* remove the entry in the klmirqd thread list */ + list_del(&info->klmirqd_reg); + --klmirqd_state.nr_threads; + + /* remove link to klmirqd info from thread */ + tsk_rt(tsk)->klmirqd_info = NULL; + + /* clean up memory */ + kfree(info); + + raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); + +out: + return retval; +} + @@ -67,35 +341,50 @@ int proc_read_klmirqd_stats(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = snprintf(page, PAGE_SIZE, - "num ready klmirqds: %d\n\n", - atomic_read(&num_ready_klmirqds)); - - if(klmirqd_is_ready()) - { - int i; - for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) - { - len += - snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */ - "klmirqd_th%d: %s/%d\n" - "\tcurrent_owner: %s/%d\n" - "\tpending: %x\n" - "\tnum hi: %d\n" - "\tnum low: %d\n" - "\tnum work: %d\n\n", - i, - klmirqds[i].klmirqd->comm, klmirqds[i].klmirqd->pid, - (klmirqds[i].current_owner != NULL) ? - klmirqds[i].current_owner->comm : "(null)", - (klmirqds[i].current_owner != NULL) ? - klmirqds[i].current_owner->pid : 0, - klmirqds[i].pending, - atomic_read(&klmirqds[i].num_hi_pending), - atomic_read(&klmirqds[i].num_low_pending), - atomic_read(&klmirqds[i].num_work_pending)); + unsigned long flags; + int len; + + raw_spin_lock_irqsave(&klmirqd_state.lock, flags); + + if (klmirqd_state.initialized) { + if (!klmirqd_state.shuttingdown) { + struct list_head *pos; + + len = snprintf(page, PAGE_SIZE, + "num ready klmirqds: %d\n\n", + klmirqd_state.nr_threads); + + list_for_each(pos, &klmirqd_state.threads) { + struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg); + + len += + snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */ + "klmirqd_thread: %s/%d\n" + "\tcurrent_owner: %s/%d\n" + "\tpending: %x\n" + "\tnum hi: %d\n" + "\tnum low: %d\n" + "\tnum work: %d\n\n", + info->klmirqd->comm, info->klmirqd->pid, + (info->current_owner != NULL) ? + info->current_owner->comm : "(null)", + (info->current_owner != NULL) ? + info->current_owner->pid : 0, + info->pending, + atomic_read(&info->num_hi_pending), + atomic_read(&info->num_low_pending), + atomic_read(&info->num_work_pending)); + } + } + else { + len = snprintf(page, PAGE_SIZE, "klmirqd is shutting down\n"); } } + else { + len = snprintf(page, PAGE_SIZE, "klmirqd is not initialized!\n"); + } + + raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); return(len); } @@ -162,6 +451,15 @@ static void dump_state(struct klmirqd_info* which, const char* caller) #endif + + + + + + + + + /* forward declarations */ static void ___litmus_tasklet_schedule(struct tasklet_struct *t, struct klmirqd_info *which, @@ -174,24 +472,6 @@ static void ___litmus_schedule_work(struct work_struct *w, int wakeup); - -inline unsigned int klmirqd_id(struct task_struct* tsk) -{ - int i; - for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) - { - if(klmirqds[i].klmirqd == tsk) - { - return i; - } - } - - BUG(); - - return 0; -} - - inline static u32 litirq_pending_hi_irqoff(struct klmirqd_info* which) { return (which->pending & LIT_TASKLET_HI); @@ -225,200 +505,11 @@ inline static u32 litirq_pending(struct klmirqd_info* which) return pending; }; -inline static u32 litirq_pending_with_owner(struct klmirqd_info* which, struct task_struct* owner) -{ - unsigned long flags; - u32 pending; - - raw_spin_lock_irqsave(&which->lock, flags); - pending = litirq_pending_irqoff(which); - if(pending) - { - if(which->current_owner != owner) - { - pending = 0; // owner switch! - } - } - raw_spin_unlock_irqrestore(&which->lock, flags); - - return pending; -} - - -inline static u32 litirq_pending_and_sem_and_owner(struct klmirqd_info* which, - struct mutex** sem, - struct task_struct** t) -{ - unsigned long flags; - u32 pending; - - /* init values */ - *sem = NULL; - *t = NULL; - - raw_spin_lock_irqsave(&which->lock, flags); - - pending = litirq_pending_irqoff(which); - if(pending) - { - if(which->current_owner != NULL) - { - *t = which->current_owner; - *sem = &tsk_rt(which->current_owner)->klmirqd_sem; - } - else - { - BUG(); - } - } - raw_spin_unlock_irqrestore(&which->lock, flags); - - if(likely(*sem)) - { - return pending; - } - else - { - return 0; - } -} - -/* returns true if the next piece of work to do is from a different owner. - */ -static int tasklet_ownership_change( - struct klmirqd_info* which, - enum pending_flags taskletQ) -{ - /* this function doesn't have to look at work objects since they have - priority below tasklets. */ - - unsigned long flags; - int ret = 0; - - raw_spin_lock_irqsave(&which->lock, flags); - - switch(taskletQ) - { - case LIT_TASKLET_HI: - if(litirq_pending_hi_irqoff(which)) - { - ret = (which->pending_tasklets_hi.head->owner != - which->current_owner); - } - break; - case LIT_TASKLET_LOW: - if(litirq_pending_low_irqoff(which)) - { - ret = (which->pending_tasklets.head->owner != - which->current_owner); - } - break; - default: - break; - } - - raw_spin_unlock_irqrestore(&which->lock, flags); - - TRACE_TASK(which->klmirqd, "ownership change needed: %d\n", ret); - - return ret; -} - - -static void __reeval_prio(struct klmirqd_info* which) -{ - struct task_struct* next_owner = NULL; - struct task_struct* klmirqd = which->klmirqd; - - /* Check in prio-order */ - u32 pending = litirq_pending_irqoff(which); - - //__dump_state(which, "__reeval_prio: before"); - - if(pending) - { - if(pending & LIT_TASKLET_HI) - { - next_owner = which->pending_tasklets_hi.head->owner; - } - else if(pending & LIT_TASKLET_LOW) - { - next_owner = which->pending_tasklets.head->owner; - } - else if(pending & LIT_WORK) - { - struct work_struct* work = - list_first_entry(&which->worklist, struct work_struct, entry); - next_owner = work->owner; - } - } - - if(next_owner != which->current_owner) - { - struct task_struct* old_owner = which->current_owner; - - /* bind the next owner. */ - which->current_owner = next_owner; - mb(); - - if(next_owner != NULL) - { - if(!in_interrupt()) - { - TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, - ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm, - ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid, - next_owner->comm, next_owner->pid); - } - else - { - TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, - ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm, - ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid, - next_owner->comm, next_owner->pid); - } - - litmus->increase_prio_inheritance_klmirqd(klmirqd, old_owner, next_owner); - } - else - { - if(likely(!in_interrupt())) - { - TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n", - __FUNCTION__, klmirqd->comm, klmirqd->pid); - } - else - { - // is this a bug? - TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n", - __FUNCTION__, klmirqd->comm, klmirqd->pid); - } - - BUG_ON(pending != 0); - litmus->decrease_prio_inheritance_klmirqd(klmirqd, old_owner, NULL); - } - } - - //__dump_state(which, "__reeval_prio: after"); -} - -static void reeval_prio(struct klmirqd_info* which) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&which->lock, flags); - __reeval_prio(which); - raw_spin_unlock_irqrestore(&which->lock, flags); -} - - static void wakeup_litirqd_locked(struct klmirqd_info* which) { /* Interrupts are disabled: no need to stop preemption */ if (which && which->klmirqd) { - __reeval_prio(which); /* configure the proper priority */ - if(which->klmirqd->state != TASK_RUNNING) { TRACE("%s: Waking up klmirqd: %s/%d\n", __FUNCTION__, @@ -468,7 +559,7 @@ static void do_lit_tasklet(struct klmirqd_info* which, list = list->next; /* execute tasklet if it has my priority and is free */ - if ((t->owner == which->current_owner) && tasklet_trylock(t)) { + if (tasklet_trylock(t)) { if (!atomic_read(&t->count)) { sched_trace_tasklet_begin(t->owner); @@ -503,15 +594,14 @@ static void do_lit_tasklet(struct klmirqd_info* which, // returns 1 if priorities need to be changed to continue processing // pending tasklets. -static int do_litirq(struct klmirqd_info* which) +static void do_litirq(struct klmirqd_info* which) { u32 pending; - int resched = 0; if(in_interrupt()) { TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__); - return(0); + return; } if(which->klmirqd != current) @@ -519,59 +609,40 @@ static int do_litirq(struct klmirqd_info* which) TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n", __FUNCTION__, current->comm, current->pid, which->klmirqd->comm, which->klmirqd->pid); - return(0); + return; } if(!is_realtime(current)) { TRACE_CUR("%s: exiting early: klmirqd is not real-time. Sched Policy = %d\n", __FUNCTION__, current->policy); - return(0); + return; } /* We only handle tasklets & work objects, no need for RCU triggers? */ pending = litirq_pending(which); - if(pending) - { + if(pending) { /* extract the work to do and do it! */ - if(pending & LIT_TASKLET_HI) - { + if(pending & LIT_TASKLET_HI) { TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__); do_lit_tasklet(which, &which->pending_tasklets_hi); - resched = tasklet_ownership_change(which, LIT_TASKLET_HI); - - if(resched) - { - TRACE_CUR("%s: HI tasklets of another owner remain. " - "Skipping any LOW tasklets.\n", __FUNCTION__); - } } - if(!resched && (pending & LIT_TASKLET_LOW)) - { + if(pending & LIT_TASKLET_LOW) { TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__); do_lit_tasklet(which, &which->pending_tasklets); - resched = tasklet_ownership_change(which, LIT_TASKLET_LOW); - - if(resched) - { - TRACE_CUR("%s: LOW tasklets of another owner remain. " - "Skipping any work objects.\n", __FUNCTION__); - } } } - - return(resched); } static void do_work(struct klmirqd_info* which) { unsigned long flags; - work_func_t f; struct work_struct* work; + work_func_t f; // only execute one work-queue item to yield to tasklets. // ...is this a good idea, or should we just batch them? @@ -594,125 +665,58 @@ static void do_work(struct klmirqd_info* which) raw_spin_unlock_irqrestore(&which->lock, flags); + TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__); + // do the work! + work_clear_pending(work); + f = work->func; + f(work); /* can't touch 'work' after this point, + the user may have freed it. */ - /* safe to read current_owner outside of lock since only this thread - may write to the pointer. */ - if(work->owner == which->current_owner) - { - TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__); - // do the work! - work_clear_pending(work); - f = work->func; - f(work); /* can't touch 'work' after this point, - the user may have freed it. */ - - atomic_dec(&which->num_work_pending); - } - else - { - TRACE_CUR("%s: Could not invoke work object. Requeuing.\n", - __FUNCTION__); - ___litmus_schedule_work(work, which, 0); - } + atomic_dec(&which->num_work_pending); no_work: return; } -static int set_litmus_daemon_sched(void) -{ - /* set up a daemon job that will never complete. - it should only ever run on behalf of another - real-time task. - - TODO: Transition to a new job whenever a - new tasklet is handled */ - - int ret = 0; - - struct rt_task tp = { - .exec_cost = 0, - .period = 1000000000, /* dummy 1 second period */ - .phase = 0, - .cpu = task_cpu(current), - .budget_policy = NO_ENFORCEMENT, - .cls = RT_CLASS_BEST_EFFORT - }; - - struct sched_param param = { .sched_priority = 0}; - - - /* set task params, mark as proxy thread, and init other data */ - tsk_rt(current)->task_params = tp; - tsk_rt(current)->is_proxy_thread = 1; - tsk_rt(current)->cur_klmirqd = NULL; - mutex_init(&tsk_rt(current)->klmirqd_sem); - atomic_set(&tsk_rt(current)->klmirqd_sem_stat, NOT_HELD); - - /* inform the OS we're SCHED_LITMUS -- - sched_setscheduler_nocheck() calls litmus_admit_task(). */ - sched_setscheduler_nocheck(current, SCHED_LITMUS, ¶m); - - return ret; -} - -static void enter_execution_phase(struct klmirqd_info* which, - struct mutex* sem, - struct task_struct* t) -{ - TRACE_CUR("%s: Trying to enter execution phase. " - "Acquiring semaphore of %s/%d\n", __FUNCTION__, - t->comm, t->pid); - down_and_set_stat(current, HELD, sem); - TRACE_CUR("%s: Execution phase entered! " - "Acquired semaphore of %s/%d\n", __FUNCTION__, - t->comm, t->pid); -} - -static void exit_execution_phase(struct klmirqd_info* which, - struct mutex* sem, - struct task_struct* t) -{ - TRACE_CUR("%s: Exiting execution phase. " - "Releasing semaphore of %s/%d\n", __FUNCTION__, - t->comm, t->pid); - if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) == HELD) - { - up_and_set_stat(current, NOT_HELD, sem); - TRACE_CUR("%s: Execution phase exited! " - "Released semaphore of %s/%d\n", __FUNCTION__, - t->comm, t->pid); - } - else - { - TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__); - } -} /* main loop for klitsoftirqd */ -static int run_klmirqd(void* unused) +static int run_klmirqd(void* callback) { - struct klmirqd_info* which = &klmirqds[klmirqd_id(current)]; - struct mutex* sem; - struct task_struct* owner; - - int rt_status = set_litmus_daemon_sched(); + int retval = 0; + struct klmirqd_info* info = NULL; + klmirqd_callback_t* cb = (klmirqd_callback_t*)(callback); - if(rt_status != 0) - { + retval = set_litmus_daemon_sched(current); + if (retval != 0) { TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__); - goto rt_failed; + goto failed; } - atomic_inc(&num_ready_klmirqds); + retval = register_klmirqd(current); + if (retval != 0) { + TRACE_CUR("%s: Failed to become a klmirqd thread.\n", __FUNCTION__); + goto failed; + } + + if (cb && cb->func) { + retval = cb->func(cb->arg); + if (retval != 0) { + TRACE_CUR("%s: klmirqd callback reported failure. retval = %d\n", __FUNCTION__, retval); + goto failed_unregister; + } + } + + /* enter the interrupt handling workloop */ + + info = tsk_rt(current)->klmirqd_info; set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { preempt_disable(); - if (!litirq_pending(which)) + if (!litirq_pending(info)) { /* sleep for work */ TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n", @@ -731,17 +735,10 @@ static int run_klmirqd(void* unused) __set_current_state(TASK_RUNNING); - while (litirq_pending_and_sem_and_owner(which, &sem, &owner)) + while (litirq_pending(info)) { - int needs_resched = 0; - preempt_enable_no_resched(); - BUG_ON(sem == NULL); - - // wait to enter execution phase; wait for 'current_owner' to block. - enter_execution_phase(which, sem, owner); - if(kthread_should_stop()) { TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); @@ -753,36 +750,23 @@ static int run_klmirqd(void* unused) /* Double check that there's still pending work and the owner hasn't * changed. Pending items may have been flushed while we were sleeping. */ - if(litirq_pending_with_owner(which, owner)) + if(litirq_pending(info)) { TRACE_CUR("%s: Executing tasklets and/or work objects.\n", __FUNCTION__); - needs_resched = do_litirq(which); + do_litirq(info); preempt_enable_no_resched(); // work objects are preemptible. - if(!needs_resched) - { - do_work(which); - } - - // exit execution phase. - exit_execution_phase(which, sem, owner); - - TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__); - reeval_prio(which); /* check if we need to change priority here */ + do_work(info); } else { - TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n", - __FUNCTION__, - owner->comm, owner->pid); - preempt_enable_no_resched(); + TRACE_CUR("%s: Pending work was flushed!\n", __FUNCTION__); - // exit execution phase. - exit_execution_phase(which, sem, owner); + preempt_enable_no_resched(); } cond_resched(); @@ -793,182 +777,38 @@ static int run_klmirqd(void* unused) } __set_current_state(TASK_RUNNING); - atomic_dec(&num_ready_klmirqds); +failed_unregister: + /* remove our registration from klmirqd */ + unregister_klmirqd(current); -rt_failed: +failed: litmus_exit_task(current); - return rt_status; + return retval; } -struct klmirqd_launch_data -{ - int* cpu_affinity; - struct work_struct work; -}; - -/* executed by a kworker from workqueues */ -static void launch_klmirqd(struct work_struct *work) +void flush_pending(struct task_struct* tsk) { - int i; - - struct klmirqd_launch_data* launch_data = - container_of(work, struct klmirqd_launch_data, work); - - TRACE("%s: Creating %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); - - /* create the daemon threads */ - for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) - { - if(launch_data->cpu_affinity) - { - klmirqds[i].klmirqd = - kthread_create( - run_klmirqd, - /* treat the affinity as a pointer, we'll cast it back later */ - (void*)(long long)launch_data->cpu_affinity[i], - "klmirqd_th%d/%d", - i, - launch_data->cpu_affinity[i]); - - /* litmus will put is in the right cluster. */ - kthread_bind(klmirqds[i].klmirqd, launch_data->cpu_affinity[i]); - } - else - { - klmirqds[i].klmirqd = - kthread_create( - run_klmirqd, - /* treat the affinity as a pointer, we'll cast it back later */ - (void*)(long long)(-1), - "klmirqd_th%d", - i); - } - } - - TRACE("%s: Launching %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); - - /* unleash the daemons */ - for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) - { - wake_up_process(klmirqds[i].klmirqd); - } - - if(launch_data->cpu_affinity) - kfree(launch_data->cpu_affinity); - kfree(launch_data); -} - + unsigned long flags; + struct tasklet_struct *list; + u32 work_flushed = 0; -void spawn_klmirqd(int* affinity) -{ - int i; - struct klmirqd_launch_data* delayed_launch; + struct klmirqd_info *which; - if(atomic_read(&num_ready_klmirqds) != 0) - { - TRACE("%s: At least one klmirqd is already running! Need to call kill_klmirqd()?\n"); + if (!tsk_rt(tsk)->is_interrupt_thread) { + TRACE("%s/%d is not a proxy thread\n", tsk->comm, tsk->pid); + WARN_ON(1); return; } - /* init the tasklet & work queues */ - for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) - { - klmirqds[i].terminating = 0; - klmirqds[i].pending = 0; - - klmirqds[i].num_hi_pending.counter = 0; - klmirqds[i].num_low_pending.counter = 0; - klmirqds[i].num_work_pending.counter = 0; - - klmirqds[i].pending_tasklets_hi.head = NULL; - klmirqds[i].pending_tasklets_hi.tail = &klmirqds[i].pending_tasklets_hi.head; - - klmirqds[i].pending_tasklets.head = NULL; - klmirqds[i].pending_tasklets.tail = &klmirqds[i].pending_tasklets.head; - - INIT_LIST_HEAD(&klmirqds[i].worklist); - - raw_spin_lock_init(&klmirqds[i].lock); - } - - /* wait to flush the initializations to memory since other threads - will access it. */ - mb(); - - /* tell a work queue to launch the threads. we can't make scheduling - calls since we're in an atomic state. */ - TRACE("%s: Setting callback up to launch klmirqds\n", __FUNCTION__); - delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC); - if(affinity) - { - delayed_launch->cpu_affinity = - kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC); - - memcpy(delayed_launch->cpu_affinity, affinity, - sizeof(int)*NR_LITMUS_SOFTIRQD); - } - else - { - delayed_launch->cpu_affinity = NULL; - } - INIT_WORK(&delayed_launch->work, launch_klmirqd); - schedule_work(&delayed_launch->work); -} - - -void kill_klmirqd(void) -{ - if(!klmirqd_is_dead()) - { - int i; - - TRACE("%s: Killing %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); - - for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) - { - if(klmirqds[i].terminating != 1) - { - klmirqds[i].terminating = 1; - mb(); /* just to be sure? */ - flush_pending(klmirqds[i].klmirqd, NULL); - - /* signal termination */ - kthread_stop(klmirqds[i].klmirqd); - } - } + which = tsk_rt(tsk)->klmirqd_info; + if (!which) { + TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid); + WARN_ON(1); + return; } -} - - -int klmirqd_is_ready(void) -{ - return(atomic_read(&num_ready_klmirqds) == NR_LITMUS_SOFTIRQD); -} - -int klmirqd_is_dead(void) -{ - return(atomic_read(&num_ready_klmirqds) == 0); -} - - -struct task_struct* get_klmirqd(unsigned int k_id) -{ - return(klmirqds[k_id].klmirqd); -} - - -void flush_pending(struct task_struct* klmirqd_thread, - struct task_struct* owner) -{ - unsigned int k_id = klmirqd_id(klmirqd_thread); - struct klmirqd_info *which = &klmirqds[k_id]; - - unsigned long flags; - struct tasklet_struct *list; - u32 work_flushed = 0; raw_spin_lock_irqsave(&which->lock, flags); @@ -990,35 +830,27 @@ void flush_pending(struct task_struct* klmirqd_thread, struct tasklet_struct *t = list; list = list->next; - if(likely((t->owner == owner) || (owner == NULL))) + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) { - if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) - { - BUG(); - } + BUG(); + } - work_flushed |= LIT_TASKLET_HI; + work_flushed |= LIT_TASKLET_HI; - t->owner = NULL; + t->owner = NULL; - // WTF? - if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - { - atomic_dec(&which->num_hi_pending); - ___tasklet_hi_schedule(t); - } - else - { - TRACE("%s: dropped hi tasklet??\n", __FUNCTION__); - BUG(); - } + // WTF? + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_hi_pending); + ___tasklet_hi_schedule(t); } else { - TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__); - // put back on queue. - ___litmus_tasklet_hi_schedule(t, which, 0); + TRACE("%s: dropped hi tasklet??\n", __FUNCTION__); + BUG(); } + } } @@ -1038,34 +870,25 @@ void flush_pending(struct task_struct* klmirqd_thread, struct tasklet_struct *t = list; list = list->next; - if(likely((t->owner == owner) || (owner == NULL))) + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) { - if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) - { - BUG(); - } + BUG(); + } - work_flushed |= LIT_TASKLET_LOW; + work_flushed |= LIT_TASKLET_LOW; - t->owner = NULL; - sched_trace_tasklet_end(owner, 1ul); + t->owner = NULL; +// sched_trace_tasklet_end(owner, 1ul); - if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - { - atomic_dec(&which->num_low_pending); - ___tasklet_schedule(t); - } - else - { - TRACE("%s: dropped tasklet??\n", __FUNCTION__); - BUG(); - } + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_low_pending); + ___tasklet_schedule(t); } else { - TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__); - // put back on queue - ___litmus_tasklet_schedule(t, which, 0); + TRACE("%s: dropped tasklet??\n", __FUNCTION__); + BUG(); } } } @@ -1083,21 +906,12 @@ void flush_pending(struct task_struct* klmirqd_thread, list_first_entry(&which->worklist, struct work_struct, entry); list_del_init(&work->entry); - if(likely((work->owner == owner) || (owner == NULL))) - { - work_flushed |= LIT_WORK; - atomic_dec(&which->num_work_pending); + work_flushed |= LIT_WORK; + atomic_dec(&which->num_work_pending); - work->owner = NULL; - sched_trace_work_end(owner, current, 1ul); - __schedule_work(work); - } - else - { - TRACE("%s: Could not flush a work object.\n", __FUNCTION__); - // put back on queue - ___litmus_schedule_work(work, which, 0); - } + work->owner = NULL; +// sched_trace_work_end(owner, current, 1ul); + __schedule_work(work); } } @@ -1106,22 +920,6 @@ void flush_pending(struct task_struct* klmirqd_thread, mb(); /* commit changes to pending flags */ - /* reset the scheduling priority */ - if(work_flushed) - { - __reeval_prio(which); - - /* Try to offload flushed tasklets to Linux's ksoftirqd. */ - if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI)) - { - wakeup_softirqd(); - } - } - else - { - TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__); - } - raw_spin_unlock_irqrestore(&which->lock, flags); } @@ -1161,39 +959,27 @@ static void ___litmus_tasklet_schedule(struct tasklet_struct *t, raw_spin_unlock_irqrestore(&which->lock, flags); } -int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) + +int __litmus_tasklet_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread) { int ret = 0; /* assume failure */ - if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) - { - TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); - BUG(); - } + struct klmirqd_info* info; - if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) - { - TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id); - BUG(); - } + if (unlikely(!is_realtime(klmirqd_thread) || + !tsk_rt(klmirqd_thread)->is_interrupt_thread || + !tsk_rt(klmirqd_thread)->klmirqd_info)) { + TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid); + return ret; + } - if(likely(!klmirqds[k_id].terminating)) - { - /* Can't accept tasklets while we're processing a workqueue - because they're handled by the same thread. This case is - very RARE. + info = tsk_rt(klmirqd_thread)->klmirqd_info; - TODO: Use a separate thread for work objects!!!!!! - */ - if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0)) - { - ret = 1; - ___litmus_tasklet_schedule(t, &klmirqds[k_id], 1); - } - else - { - TRACE("%s: rejected tasklet because of pending work.\n", - __FUNCTION__); - } + if (likely(!info->terminating)) { + ret = 1; + ___litmus_tasklet_schedule(t, info, 1); + } + else { + TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); } return(ret); } @@ -1230,100 +1016,77 @@ static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, raw_spin_unlock_irqrestore(&which->lock, flags); } -int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread) { int ret = 0; /* assume failure */ - if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) - { - TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); - BUG(); - } + struct klmirqd_info* info; - if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) - { - TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id); - BUG(); - } + if (unlikely(!is_realtime(klmirqd_thread) || + !tsk_rt(klmirqd_thread)->is_interrupt_thread || + !tsk_rt(klmirqd_thread)->klmirqd_info)) { + TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid); + return ret; + } - if(unlikely(!klmirqd_is_ready())) - { - TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id); - BUG(); - } + info = tsk_rt(klmirqd_thread)->klmirqd_info; - if(likely(!klmirqds[k_id].terminating)) - { - if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0)) - { - ret = 1; - ___litmus_tasklet_hi_schedule(t, &klmirqds[k_id], 1); - } - else - { - TRACE("%s: rejected tasklet because of pending work.\n", - __FUNCTION__); - } + if (likely(!info->terminating)) { + ret = 1; + ___litmus_tasklet_hi_schedule(t, info, 1); + } + else { + TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); } + return(ret); } EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); -int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, struct task_struct* klmirqd_thread) { int ret = 0; /* assume failure */ u32 old_pending; + struct klmirqd_info* info; BUG_ON(!irqs_disabled()); - if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) - { - TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); - BUG(); - } + if (unlikely(!is_realtime(klmirqd_thread) || + !tsk_rt(klmirqd_thread)->is_interrupt_thread || + !tsk_rt(klmirqd_thread)->klmirqd_info)) { + TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid); + return ret; + } - if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) - { - TRACE("%s: No klmirqd_th%u!\n", __FUNCTION__, k_id); - BUG(); - } + info = tsk_rt(klmirqd_thread)->klmirqd_info; - if(unlikely(!klmirqd_is_ready())) - { - TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id); - BUG(); - } + if (likely(!info->terminating)) { - if(likely(!klmirqds[k_id].terminating)) - { - raw_spin_lock(&klmirqds[k_id].lock); + raw_spin_lock(&info->lock); - if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0)) - { - ret = 1; // success! + ret = 1; // success! - t->next = klmirqds[k_id].pending_tasklets_hi.head; - klmirqds[k_id].pending_tasklets_hi.head = t; + t->next = info->pending_tasklets_hi.head; + info->pending_tasklets_hi.head = t; - old_pending = klmirqds[k_id].pending; - klmirqds[k_id].pending |= LIT_TASKLET_HI; + old_pending = info->pending; + info->pending |= LIT_TASKLET_HI; - atomic_inc(&klmirqds[k_id].num_hi_pending); + atomic_inc(&info->num_hi_pending); - mb(); + mb(); - if(!old_pending) - wakeup_litirqd_locked(&klmirqds[k_id]); /* wake up the klmirqd */ - } - else - { - TRACE("%s: rejected tasklet because of pending work.\n", - __FUNCTION__); + if(!old_pending) { + wakeup_litirqd_locked(info); /* wake up the klmirqd */ } - raw_spin_unlock(&klmirqds[k_id].lock); + raw_spin_unlock(&info->lock); } + else { + TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); + } + return(ret); } @@ -1358,225 +1121,30 @@ static void ___litmus_schedule_work(struct work_struct *w, raw_spin_unlock_irqrestore(&which->lock, flags); } -int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) +int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_thread) { int ret = 1; /* assume success */ - if(unlikely(w->owner == NULL) || !is_realtime(w->owner)) - { - TRACE("%s: No owner associated with this work object!\n", __FUNCTION__); - BUG(); - } - - if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) - { - TRACE("%s: No klmirqd_th%u!\n", k_id); - BUG(); - } - - if(unlikely(!klmirqd_is_ready())) - { - TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id); - BUG(); - } - - if(likely(!klmirqds[k_id].terminating)) - ___litmus_schedule_work(w, &klmirqds[k_id], 1); - else - ret = 0; - return(ret); -} -EXPORT_SYMBOL(__litmus_schedule_work); - - -static int set_klmirqd_sem_status(unsigned long stat) -{ - TRACE_CUR("SETTING STATUS FROM %d TO %d\n", - atomic_read(&tsk_rt(current)->klmirqd_sem_stat), - stat); - atomic_set(&tsk_rt(current)->klmirqd_sem_stat, stat); - //mb(); - - return(0); -} - -static int set_klmirqd_sem_status_if_not_held(unsigned long stat) -{ - if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) != HELD) - { - return(set_klmirqd_sem_status(stat)); - } - return(-1); -} - - -void __down_and_reset_and_set_stat(struct task_struct* t, - enum klmirqd_sem_status to_reset, - enum klmirqd_sem_status to_set, - struct mutex* sem) -{ -#if 0 - struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem); - struct task_struct* task = container_of(param, struct task_struct, rt_param); - - TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", - __FUNCTION__, task->comm, task->pid); -#endif - - mutex_lock_sfx(sem, - set_klmirqd_sem_status_if_not_held, to_reset, - set_klmirqd_sem_status, to_set); -#if 0 - TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", - __FUNCTION__, task->comm, task->pid); -#endif -} - -void down_and_set_stat(struct task_struct* t, - enum klmirqd_sem_status to_set, - struct mutex* sem) -{ -#if 0 - struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem); - struct task_struct* task = container_of(param, struct task_struct, rt_param); - - TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", - __FUNCTION__, task->comm, task->pid); -#endif - - mutex_lock_sfx(sem, - NULL, 0, - set_klmirqd_sem_status, to_set); - -#if 0 - TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", - __FUNCTION__, task->comm, task->pid); -#endif -} - - -void up_and_set_stat(struct task_struct* t, - enum klmirqd_sem_status to_set, - struct mutex* sem) -{ -#if 0 - struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem); - struct task_struct* task = container_of(param, struct task_struct, rt_param); - - TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n", - __FUNCTION__, - task->comm, task->pid); -#endif - - mutex_unlock_sfx(sem, NULL, 0, - set_klmirqd_sem_status, to_set); - -#if 0 - TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n", - __FUNCTION__, - task->comm, task->pid); -#endif -} - - - -void release_klmirqd_lock(struct task_struct* t) -{ - if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == HELD)) - { - struct mutex* sem; - struct task_struct* owner = t; - - if(t->state == TASK_RUNNING) - { - TRACE_TASK(t, "NOT giving up klmirqd_sem because we're not blocked!\n"); - return; - } - - if(likely(!tsk_rt(t)->is_proxy_thread)) - { - sem = &tsk_rt(t)->klmirqd_sem; - } - else - { - unsigned int k_id = klmirqd_id(t); - owner = klmirqds[k_id].current_owner; - - BUG_ON(t != klmirqds[k_id].klmirqd); - - if(likely(owner)) - { - sem = &tsk_rt(owner)->klmirqd_sem; - } - else - { - BUG(); + struct klmirqd_info* info; - // We had the rug pulled out from under us. Abort attempt - // to reacquire the lock since our client no longer needs us. - TRACE_CUR("HUH?! How did this happen?\n"); - atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD); - return; - } - } - - //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid); - up_and_set_stat(t, NEED_TO_REACQUIRE, sem); - //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid); - } - /* - else if(is_realtime(t)) - { - TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat); + if (unlikely(!is_realtime(klmirqd_thread) || + !tsk_rt(klmirqd_thread)->is_interrupt_thread || + !tsk_rt(klmirqd_thread)->klmirqd_info)) { + TRACE("%s: %s/%d can't handle work items\n", klmirqd_thread->comm, klmirqd_thread->pid); + return ret; } - */ -} - -int reacquire_klmirqd_lock(struct task_struct* t) -{ - int ret = 0; - if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == NEED_TO_REACQUIRE)) - { - struct mutex* sem; - struct task_struct* owner = t; - - if(likely(!tsk_rt(t)->is_proxy_thread)) - { - sem = &tsk_rt(t)->klmirqd_sem; - } - else - { - unsigned int k_id = klmirqd_id(t); - //struct task_struct* owner = klmirqds[k_id].current_owner; - owner = klmirqds[k_id].current_owner; + info = tsk_rt(klmirqd_thread)->klmirqd_info; - BUG_ON(t != klmirqds[k_id].klmirqd); - - if(likely(owner)) - { - sem = &tsk_rt(owner)->klmirqd_sem; - } - else - { - // We had the rug pulled out from under us. Abort attempt - // to reacquire the lock since our client no longer needs us. - TRACE_CUR("No longer needs to reacquire klmirqd_sem!\n"); - atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD); - return(0); - } - } - //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid); - __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem); - //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid); + if (likely(!info->terminating)) { + ___litmus_schedule_work(w, info, 1); } - /* - else if(is_realtime(t)) - { - TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat); + else { + TRACE("%s: Work rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); + ret = 0; } - */ return(ret); } +EXPORT_SYMBOL(__litmus_schedule_work); diff --git a/litmus/locking.c b/litmus/locking.c index 22f46df4308a..7af1dd69a079 100644 --- a/litmus/locking.c +++ b/litmus/locking.c @@ -543,32 +543,54 @@ out: void suspend_for_lock(void) { -#ifdef CONFIG_REALTIME_AUX_TASKS -#if 0 - unsigned int restore = 0; +#if defined(CONFIG_REALTIME_AUX_TASKS) || defined(CONFIG_LITMUS_NVIDIA) struct task_struct *t = current; - unsigned int hide; - - if (tsk_rt(t)->has_aux_tasks) { - /* hide from aux tasks so they can't inherit our priority when we block - * for a litmus lock. inheritance is already going to a litmus lock - * holder. */ - hide = tsk_rt(t)->hide_from_aux_tasks; - restore = 1; - tsk_rt(t)->hide_from_aux_tasks = 1; - } #endif + +#ifdef CONFIG_REALTIME_AUX_TASKS + unsigned int aux_restore = 0; + unsigned int aux_hide; +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + unsigned int gpu_restore = 0; + unsigned int gpu_hide; +#endif + + +//#ifdef CONFIG_REALTIME_AUX_TASKS +// if (tsk_rt(t)->has_aux_tasks) { +// /* hide from aux tasks so they can't inherit our priority when we block +// * for a litmus lock. inheritance is already going to a litmus lock +// * holder. */ +// aux_hide = tsk_rt(t)->hide_from_aux_tasks; +// aux_restore = 1; +// tsk_rt(t)->hide_from_aux_tasks = 1; +// } +//#endif + +#ifdef CONFIG_LITMUS_NVIDIA + if (tsk_rt(t)->held_gpus) { + gpu_hide = tsk_rt(t)->hide_from_gpu; + gpu_restore = 1; + tsk_rt(t)->hide_from_gpu = 1; + } #endif schedule(); -#ifdef CONFIG_REALTIME_AUX_TASKS -#if 0 - if (restore) { +#ifdef CONFIG_LITMUS_NVIDIA + if (gpu_restore) { /* restore our state */ - tsk_rt(t)->hide_from_aux_tasks = hide; + tsk_rt(t)->hide_from_gpu = gpu_hide; } #endif + +#ifdef CONFIG_REALTIME_AUX_TASKS + if (aux_restore) { + /* restore our state */ + tsk_rt(t)->hide_from_aux_tasks = aux_hide; + } #endif } diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 22586cde8255..b29f4d3f0dac 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -10,6 +10,10 @@ #include +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ typedef unsigned char NvU8; /* 0 to 255 */ @@ -296,9 +300,14 @@ static struct notifier_block nvidia_going = { }; #endif + + +static int init_nv_device_reg(void); +static int shutdown_nv_device_reg(void); + + int init_nvidia_info(void) { -#if 1 mutex_lock(&module_mutex); nvidia_mod = find_module("nvidia"); mutex_unlock(&module_mutex); @@ -315,13 +324,14 @@ int init_nvidia_info(void) TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); return(-1); } -#endif } void shutdown_nvidia_info(void) { nvidia_mod = NULL; mb(); + + shutdown_nv_device_reg(); } /* works with pointers to static data inside the module too. */ @@ -351,20 +361,6 @@ u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); return(linuxstate->device_num); - - //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); - -#if 0 - // offset determined though observed behavior of the NV driver. - //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1 - //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2 - - void* state = (void*)(t->data); - void* device_num_ptr = state + DEVICE_NUM_OFFSET; - - //dump_nvidia_info(t); - return(*((u32*)device_num_ptr)); -#endif } u32 get_work_nv_device_num(const struct work_struct *t) @@ -377,203 +373,452 @@ u32 get_work_nv_device_num(const struct work_struct *t) } +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + + typedef struct { - raw_spinlock_t lock; - int nr_owners; - struct task_struct* max_prio_owner; - struct task_struct* owners[NV_MAX_SIMULT_USERS]; + raw_spinlock_t lock; /* not needed if GPU not shared between scheudling domains */ + struct binheap owners; + +#ifdef CONFIG_LITMUS_SOFTIRQD + klmirqd_callback_t callback; + struct task_struct* thread; + int ready:1; /* todo: make threads check for the ready flag */ +#endif }nv_device_registry_t; + static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; -int init_nv_device_reg(void) + + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +static int nvidia_klmirqd_cb(void *arg) { - int i; + unsigned long flags; + int reg_device_id = (int)(long long)(arg); + nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; - memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); + TRACE("nv klmirqd callback for GPU %d\n", reg_device_id); - for(i = 0; i < NV_DEVICE_NUM; ++i) - { - raw_spin_lock_init(&NV_DEVICE_REG[i].lock); - } + raw_spin_lock_irqsave(®->lock, flags); + reg->thread = current; + reg->ready = 1; + raw_spin_unlock_irqrestore(®->lock, flags); - return(1); + return 0; } +#endif -/* use to get nv_device_id by given owner. - (if return -1, can't get the assocaite device id)*/ -/* -int get_nv_device_id(struct task_struct* owner) + +static int gpu_owner_max_priority_order(struct binheap_node *a, + struct binheap_node *b) { - int i; - if(!owner) - { - return(-1); - } - for(i = 0; i < NV_DEVICE_NUM; ++i) - { - if(NV_DEVICE_REG[i].device_owner == owner) - return(i); - } - return(-1); + struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + + BUG_ON(!d_a); + BUG_ON(!d_b); + + return litmus->compare(d_a, d_b); } -*/ -static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) { +static int init_nv_device_reg(void) +{ int i; - struct task_struct *found = NULL; - for(i = 0; i < reg->nr_owners; ++i) { - if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) { - found = reg->owners[i]; + +#ifdef CONFIG_LITMUS_SOFTIRQD + if (!klmirqd_is_ready()) { + TRACE("klmirqd is not ready!\n"); + return 0; + } +#endif + + memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); + mb(); + + + for(i = 0; i < NV_DEVICE_NUM; ++i) { + raw_spin_lock_init(&NV_DEVICE_REG[i].lock); + INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); + +#ifdef CONFIG_LITMUS_SOFTIRQD + // TODO: Make thread spawning this a litmus plugin call. + NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb; + NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i); + mb(); + + if(launch_klmirqd_thread(0, &NV_DEVICE_REG[i].callback) != 0) { + TRACE("Failed to create klmirqd thread for GPU %d\n", i); } +#endif } - return found; + + return(1); } -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD -void pai_check_priority_increase(struct task_struct *t, int reg_device_id) + +/* The following code is full of nasty race conditions... */ +/* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */ +static int shutdown_nv_device_reg(void) { - unsigned long flags; - nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; + TRACE("Shutting down nv device registration.\n"); + +#ifdef CONFIG_LITMUS_SOFTIRQD + { + int i; + nv_device_registry_t *reg; - if(reg->max_prio_owner != t) { + for (i = 0; i < NV_DEVICE_NUM; ++i) { - raw_spin_lock_irqsave(®->lock, flags); + TRACE("Shutting down GPU %d.\n", i); - if(reg->max_prio_owner != t) { - if(litmus->compare(t, reg->max_prio_owner)) { - litmus->change_prio_pai_tasklet(reg->max_prio_owner, t); - reg->max_prio_owner = t; + reg = &NV_DEVICE_REG[i]; + + if (reg->thread && reg->ready) { + kill_klmirqd_thread(reg->thread); + + /* assume that all goes according to plan... */ + reg->thread = NULL; + reg->ready = 0; } - } - raw_spin_unlock_irqrestore(®->lock, flags); + while (!binheap_empty(®->owners)) { + binheap_delete_root(®->owners, struct rt_param, gpu_owner_node); + } + } } +#endif + + return(1); } -void pai_check_priority_decrease(struct task_struct *t, int reg_device_id) +/* use to get the owner of nv_device_id. */ +struct task_struct* get_nv_max_device_owner(u32 target_device_id) { - unsigned long flags; - nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; + struct task_struct *owner = NULL; + nv_device_registry_t *reg; - if(reg->max_prio_owner == t) { + BUG_ON(target_device_id >= NV_DEVICE_NUM); - raw_spin_lock_irqsave(®->lock, flags); + reg = &NV_DEVICE_REG[target_device_id]; - if(reg->max_prio_owner == t) { - reg->max_prio_owner = find_hp_owner(reg, NULL); - if(reg->max_prio_owner != t) { - litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); - } - } + if (!binheap_empty(®->owners)) { + struct task_struct *hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid); + } - raw_spin_unlock_irqrestore(®->lock, flags); + return(owner); +} + +#ifdef CONFIG_LITMUS_SOFTIRQD +struct task_struct* get_nv_klmirqd_thread(u32 target_device_id) +{ + struct task_struct *klmirqd = NULL; + nv_device_registry_t *reg; + + BUG_ON(target_device_id >= NV_DEVICE_NUM); + + reg = &NV_DEVICE_REG[target_device_id]; + + if(likely(reg->ready)) { + klmirqd = reg->thread; } + + return klmirqd; } #endif -static int __reg_nv_device(int reg_device_id, struct task_struct *t) + + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp) { - int ret = 0; - int i; - struct task_struct *old_max = NULL; - unsigned long flags; - nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; + int retval = 0; - if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) { - // TODO: check if taks is already registered. - return ret; // assume already registered. - } + TRACE_CUR("Increasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid); + /* the klmirqd thread should never attempt to hold a litmus-level real-time + * so nested support is not required */ + retval = litmus->__increase_prio(klmirqd, hp); - raw_spin_lock_irqsave(®->lock, flags); + return retval; +} + +static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp) +{ + int retval = 0; - if(reg->nr_owners < NV_MAX_SIMULT_USERS) { - TRACE_TASK(t, "registers GPU %d\n", reg_device_id); - for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { - if(reg->owners[i] == NULL) { - reg->owners[i] = t; + TRACE_CUR("Decreasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid); - //if(edf_higher_prio(t, reg->max_prio_owner)) { - if(litmus->compare(t, reg->max_prio_owner)) { - old_max = reg->max_prio_owner; - reg->max_prio_owner = t; + /* the klmirqd thread should never attempt to hold a litmus-level real-time + * so nested support is not required */ + retval = litmus->__decrease_prio(klmirqd, hp); -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD - litmus->change_prio_pai_tasklet(old_max, t); + return retval; +} #endif - } + + + + +/* call when an aux_owner becomes real-time */ +long enable_gpu_owner(struct task_struct *t) +{ + long retval = 0; +// unsigned long flags; + int gpu; + nv_device_registry_t *reg; #ifdef CONFIG_LITMUS_SOFTIRQD - down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem); + struct task_struct *hp; #endif - ++(reg->nr_owners); - break; - } - } + if (!tsk_rt(t)->held_gpus) { + TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); + return -1; } - else - { - TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); - //ret = -EBUSY; + + BUG_ON(!is_realtime(t)); + + gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + + if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { + TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu); + goto out; } - raw_spin_unlock_irqrestore(®->lock, flags); + /* update the registration (and maybe klmirqd) */ + reg = &NV_DEVICE_REG[gpu]; - __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); +// raw_spin_lock_irqsave(®->lock, flags); - return(ret); + binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, + struct rt_param, gpu_owner_node); + + +#ifdef CONFIG_LITMUS_SOFTIRQD + hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + + if (hp == t) { + /* we're the new hp */ + TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu); + + retval = gpu_klmirqd_increase_priority(reg->thread, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); + } +#endif + +// raw_spin_unlock_irqsave(®->lock, flags); + +out: + return retval; } -static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) +/* call when an aux_owner exits real-time */ +long disable_gpu_owner(struct task_struct *t) { - int ret = 0; - int i; - unsigned long flags; - nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; + long retval = 0; +// unsigned long flags; + int gpu; + nv_device_registry_t *reg; #ifdef CONFIG_LITMUS_SOFTIRQD - struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id); + struct task_struct *hp; + struct task_struct *new_hp = NULL; #endif - if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { - return ret; + if (!tsk_rt(t)->held_gpus) { + TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); + return -1; } - raw_spin_lock_irqsave(®->lock, flags); + BUG_ON(!is_realtime(t)); - TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); + gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + + if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { + TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu); + goto out; + } + + TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu); + + + reg = &NV_DEVICE_REG[gpu]; + +// raw_spin_lock_irqsave(®->lock, flags); - for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { - if(reg->owners[i] == t) { -#ifdef CONFIG_LITMUS_SOFTIRQD - flush_pending(klmirqd_th, t); -#endif - if(reg->max_prio_owner == t) { - reg->max_prio_owner = find_hp_owner(reg, t); -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD - litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); -#endif - } #ifdef CONFIG_LITMUS_SOFTIRQD - up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem); + hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + + binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); + + + if (!binheap_empty(®->owners)) { + new_hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + } + + if (hp == t && new_hp != t) { + struct task_struct *to_inh = NULL; + + TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu); + + if (new_hp) { + to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp; + } + + retval = gpu_klmirqd_decrease_priority(reg->thread, to_inh); + } +#else + binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); #endif - reg->owners[i] = NULL; - --(reg->nr_owners); +// raw_spin_unlock_irqsave(®->lock, flags); + + +out: + return retval; +} + + + + + + + + + + +int gpu_owner_increase_priority(struct task_struct *t) +{ + int retval = 0; + int gpu; + nv_device_registry_t *reg; + + struct task_struct *hp = NULL; + struct task_struct *hp_eff = NULL; + + BUG_ON(!is_realtime(t)); + BUG_ON(!tsk_rt(t)->held_gpus); + + gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + + if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { + WARN_ON(!is_running(t)); + TRACE_CUR("gpu klmirqd may not inherit from %s/%d on GPU %d\n", + t->comm, t->pid, gpu); + goto out; + } + + + + + TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu); + reg = &NV_DEVICE_REG[gpu]; + + hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + hp_eff = effective_priority(hp); + + if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */ + binheap_decrease(&tsk_rt(t)->gpu_owner_node, ®->owners); + } + + hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + + if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */ + hp_eff = effective_priority(hp); + TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu); + + retval = gpu_klmirqd_increase_priority(reg->thread, hp_eff); + } + +out: + return retval; +} + + +int gpu_owner_decrease_priority(struct task_struct *t) +{ + int retval = 0; + int gpu; + nv_device_registry_t *reg; + + struct task_struct *hp = NULL; + struct task_struct *hp_eff = NULL; - break; + BUG_ON(!is_realtime(t)); + BUG_ON(!tsk_rt(t)->held_gpus); + + gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); + + if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { + WARN_ON(!is_running(t)); + TRACE_CUR("aux tasks may not inherit from %s/%d on GPU %d\n", + t->comm, t->pid, gpu); + goto out; + } + + TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu); + reg = &NV_DEVICE_REG[gpu]; + + hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + hp_eff = effective_priority(hp); + binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); + binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, + struct rt_param, gpu_owner_node); + + if (hp == t) { /* t was originally the hp */ + struct task_struct *new_hp = + container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), + struct task_struct, rt_param); + if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */ + hp_eff = effective_priority(new_hp); + TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu); + retval = gpu_klmirqd_decrease_priority(reg->thread, hp_eff); } } - raw_spin_unlock_irqrestore(®->lock, flags); +out: + return retval; +} + + + + + + + + + +static int __reg_nv_device(int reg_device_id, struct task_struct *t) +{ + __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); + + return(0); +} +static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) +{ __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); - return(ret); + return(0); } @@ -596,55 +841,213 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) return(ret); } -/* use to get the owner of nv_device_id. */ -struct task_struct* get_nv_max_device_owner(u32 target_device_id) -{ - struct task_struct *owner = NULL; - BUG_ON(target_device_id >= NV_DEVICE_NUM); - owner = NV_DEVICE_REG[target_device_id].max_prio_owner; - return(owner); -} -void lock_nv_registry(u32 target_device_id, unsigned long* flags) -{ - BUG_ON(target_device_id >= NV_DEVICE_NUM); - if(in_interrupt()) - TRACE("Locking registry for %d.\n", target_device_id); - else - TRACE_CUR("Locking registry for %d.\n", target_device_id); - raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); -} -void unlock_nv_registry(u32 target_device_id, unsigned long* flags) -{ - BUG_ON(target_device_id >= NV_DEVICE_NUM); - if(in_interrupt()) - TRACE("Unlocking registry for %d.\n", target_device_id); - else - TRACE_CUR("Unlocking registry for %d.\n", target_device_id); - raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); -} -//void increment_nv_int_count(u32 device) + + + + + + + + + + +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +//void pai_check_priority_increase(struct task_struct *t, int reg_device_id) +//{ +// unsigned long flags; +// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; +// +// +// +// if(reg->max_prio_owner != t) { +// +// raw_spin_lock_irqsave(®->lock, flags); +// +// if(reg->max_prio_owner != t) { +// if(litmus->compare(t, reg->max_prio_owner)) { +// litmus->change_prio_pai_tasklet(reg->max_prio_owner, t); +// reg->max_prio_owner = t; +// } +// } +// +// raw_spin_unlock_irqrestore(®->lock, flags); +// } +//} +// +// +//void pai_check_priority_decrease(struct task_struct *t, int reg_device_id) //{ // unsigned long flags; -// struct task_struct* owner; +// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; // -// lock_nv_registry(device, &flags); +// if(reg->max_prio_owner == t) { // -// owner = NV_DEVICE_REG[device].device_owner; -// if(owner) +// raw_spin_lock_irqsave(®->lock, flags); +// +// if(reg->max_prio_owner == t) { +// reg->max_prio_owner = find_hp_owner(reg, NULL); +// if(reg->max_prio_owner != t) { +// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); +// } +// } +// +// raw_spin_unlock_irqrestore(®->lock, flags); +// } +//} +#endif + + + + + +//static int __reg_nv_device(int reg_device_id, struct task_struct *t) +//{ +// int ret = 0; +// int i; +// struct task_struct *old_max = NULL; +// +// +// raw_spin_lock_irqsave(®->lock, flags); +// +// if(reg->nr_owners < NV_MAX_SIMULT_USERS) { +// TRACE_TASK(t, "registers GPU %d\n", reg_device_id); +// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { +// if(reg->owners[i] == NULL) { +// reg->owners[i] = t; +// +// //if(edf_higher_prio(t, reg->max_prio_owner)) { +// if(litmus->compare(t, reg->max_prio_owner)) { +// old_max = reg->max_prio_owner; +// reg->max_prio_owner = t; +// +//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +// litmus->change_prio_pai_tasklet(old_max, t); +//#endif +// } +// +//#ifdef CONFIG_LITMUS_SOFTIRQD +// down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem); +//#endif +// ++(reg->nr_owners); +// +// break; +// } +// } +// } +// else // { -// atomic_inc(&tsk_rt(owner)->nv_int_count); +// TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); +// //ret = -EBUSY; // } // -// unlock_nv_registry(device, &flags); +// raw_spin_unlock_irqrestore(®->lock, flags); +// +// __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); +// +// return(ret); +//} +// +//static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) +//{ +// int ret = 0; +// int i; +// unsigned long flags; +// nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; +// +//#ifdef CONFIG_LITMUS_SOFTIRQD +// struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id); +//#endif +// +// if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { +// return ret; +// } +// +// raw_spin_lock_irqsave(®->lock, flags); +// +// TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); +// +// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { +// if(reg->owners[i] == t) { +//#ifdef CONFIG_LITMUS_SOFTIRQD +// flush_pending(klmirqd_th, t); +//#endif +// if(reg->max_prio_owner == t) { +// reg->max_prio_owner = find_hp_owner(reg, t); +//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD +// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); +//#endif +// } +// +//#ifdef CONFIG_LITMUS_SOFTIRQD +// up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem); +//#endif +// +// reg->owners[i] = NULL; +// --(reg->nr_owners); +// +// break; +// } +// } +// +// raw_spin_unlock_irqrestore(®->lock, flags); +// +// __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); +// +// return(ret); +//} +// +// +//int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) +//{ +// int ret; +// +// if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) +// { +// if(reg_action) +// ret = __reg_nv_device(reg_device_id, t); +// else +// ret = __clear_reg_nv_device(reg_device_id, t); +// } +// else +// { +// ret = -ENODEV; +// } +// +// return(ret); +//} + + + +//void lock_nv_registry(u32 target_device_id, unsigned long* flags) +//{ +// BUG_ON(target_device_id >= NV_DEVICE_NUM); +// +// if(in_interrupt()) +// TRACE("Locking registry for %d.\n", target_device_id); +// else +// TRACE_CUR("Locking registry for %d.\n", target_device_id); +// +// raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); +//} +// +//void unlock_nv_registry(u32 target_device_id, unsigned long* flags) +//{ +// BUG_ON(target_device_id >= NV_DEVICE_NUM); +// +// if(in_interrupt()) +// TRACE("Unlocking registry for %d.\n", target_device_id); +// else +// TRACE_CUR("Unlocking registry for %d.\n", target_device_id); +// +// raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); //} -//EXPORT_SYMBOL(increment_nv_int_count); diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 44c8336c5061..84aafca78cde 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -44,6 +44,7 @@ #include #include +#include #ifdef CONFIG_LITMUS_LOCKING #include @@ -75,7 +76,6 @@ #ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include -#include #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -118,14 +118,6 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) -#ifdef CONFIG_LITMUS_PAI_SOFTIRQD -struct tasklet_head -{ - struct tasklet_struct *head; - struct tasklet_struct **tail; -}; -#endif - /* * In C-EDF there is a cedf domain _per_ cluster * The number of clusters is dynamically determined accordingly to the @@ -1038,6 +1030,13 @@ static void cedf_task_wake_up(struct task_struct *task) } #endif +#ifdef CONFIG_LITMUS_NVIDIA + if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) { + TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid); + disable_gpu_owner(task); + } +#endif + cedf_job_arrival(task); raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); } @@ -1064,6 +1063,14 @@ static void cedf_task_block(struct task_struct *t) } #endif +#ifdef CONFIG_LITMUS_NVIDIA + if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { + + TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); + enable_gpu_owner(t); + } +#endif + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); BUG_ON(!is_realtime(t)); @@ -1092,6 +1099,13 @@ static void cedf_task_exit(struct task_struct * t) } #endif +#ifdef CONFIG_LITMUS_NVIDIA + /* make sure we clean up on our way out */ + if(tsk_rt(t)->held_gpus) { + disable_gpu_owner(t); + } +#endif + unlink(t); if (tsk_rt(t)->scheduled_on != NO_CPU) { cpu_entry_t *cpu; @@ -1208,6 +1222,13 @@ static int __increase_priority_inheritance(struct task_struct* t, aux_task_owner_increase_priority(t); } #endif + +#ifdef CONFIG_LITMUS_NVIDIA + /* propagate to gpu klmirqd */ + if (tsk_rt(t)->held_gpus) { + gpu_owner_increase_priority(t); + } +#endif } #ifdef CONFIG_LITMUS_NESTED_LOCKING } @@ -1237,16 +1258,6 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str __increase_priority_inheritance(t, prio_inh); -#ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klmirqd != NULL) - { - TRACE_TASK(t, "%s/%d inherits a new priority!\n", - tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - - __increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); - } -#endif - raw_spin_unlock(&cluster->cluster_lock); #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) @@ -1320,6 +1331,13 @@ static int __decrease_priority_inheritance(struct task_struct* t, } #endif +#ifdef CONFIG_LITMUS_NVIDIA + /* propagate to gpu */ + if (tsk_rt(t)->held_gpus) { + gpu_owner_decrease_priority(t); + } +#endif + #ifdef CONFIG_LITMUS_NESTED_LOCKING } else { @@ -1346,16 +1364,6 @@ static void decrease_priority_inheritance(struct task_struct* t, raw_spin_lock(&cluster->cluster_lock); __decrease_priority_inheritance(t, prio_inh); -#ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klmirqd != NULL) - { - TRACE_TASK(t, "%s/%d decreases in priority!\n", - tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - - __decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); - } -#endif - raw_spin_unlock(&cluster->cluster_lock); #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) @@ -1371,73 +1379,6 @@ static void decrease_priority_inheritance(struct task_struct* t, } - - - -#ifdef CONFIG_LITMUS_SOFTIRQD -/* called with IRQs off */ -static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd, - struct task_struct* old_owner, - struct task_struct* new_owner) -{ - cedf_domain_t* cluster = task_cpu_cluster(klmirqd); - - BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); - - raw_spin_lock(&cluster->cluster_lock); - - if(old_owner != new_owner) - { - if(old_owner) - { - // unreachable? - tsk_rt(old_owner)->cur_klmirqd = NULL; - } - - TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n", - new_owner->comm, new_owner->pid); - - tsk_rt(new_owner)->cur_klmirqd = klmirqd; - } - - __decrease_priority_inheritance(klmirqd, NULL); // kludge to clear out cur prio. - - __increase_priority_inheritance(klmirqd, - (tsk_rt(new_owner)->inh_task == NULL) ? - new_owner : - tsk_rt(new_owner)->inh_task); - - raw_spin_unlock(&cluster->cluster_lock); -} - - -/* called with IRQs off */ -static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd, - struct task_struct* old_owner, - struct task_struct* new_owner) -{ - cedf_domain_t* cluster = task_cpu_cluster(klmirqd); - - BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); - - raw_spin_lock(&cluster->cluster_lock); - - TRACE_TASK(klmirqd, "priority restored\n"); - - __decrease_priority_inheritance(klmirqd, new_owner); - - tsk_rt(old_owner)->cur_klmirqd = NULL; - - raw_spin_unlock(&cluster->cluster_lock); -} -#endif // CONFIG_LITMUS_SOFTIRQD - - - - - - - #ifdef CONFIG_LITMUS_NESTED_LOCKING /* called with IRQs off */ @@ -1836,33 +1777,7 @@ static long cedf_activate_plugin(void) } #ifdef CONFIG_LITMUS_SOFTIRQD - { - /* distribute the daemons evenly across the clusters. */ - int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); - int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; - int left_over = NR_LITMUS_SOFTIRQD % num_clusters; - - int daemon = 0; - for(i = 0; i < num_clusters; ++i) - { - int num_on_this_cluster = num_daemons_per_cluster; - if(left_over) - { - ++num_on_this_cluster; - --left_over; - } - - for(j = 0; j < num_on_this_cluster; ++j) - { - // first CPU of this cluster - affinity[daemon++] = i*cluster_size; - } - } - - spawn_klmirqd(affinity); - - kfree(affinity); - } + init_klmirqd(); #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -1906,10 +1821,6 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { #ifdef CONFIG_LITMUS_AFFINITY_LOCKING .allocate_aff_obs = cedf_allocate_affinity_observer, #endif -#ifdef CONFIG_LITMUS_SOFTIRQD - .increase_prio_klmirqd = increase_priority_inheritance_klmirqd, - .decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd, -#endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet, .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet, diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index d52be9325044..f27c104ea027 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -26,6 +26,7 @@ #include #include +#include #ifdef CONFIG_LITMUS_LOCKING #include @@ -50,7 +51,6 @@ #ifdef CONFIG_LITMUS_PAI_SOFTIRQD #include -#include #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -156,12 +156,6 @@ static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t) #endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD -struct tasklet_head -{ - struct tasklet_struct *head; - struct tasklet_struct **tail; -}; - struct tasklet_head gsnedf_pending_tasklets; #endif @@ -938,13 +932,6 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) sched_state_task_picked(); -#if 0 - if (next && is_realtime(next) && tsk_rt(next)->is_aux_task && !tsk_rt(next)->inh_task) { - TRACE_TASK(next, "is aux with no inheritance. preventing it from actually running.\n"); - next = NULL; - } -#endif - raw_spin_unlock(&gsnedf_lock); #ifdef WANT_ALL_SCHED_EVENTS @@ -1056,6 +1043,13 @@ static void gsnedf_task_wake_up(struct task_struct *task) } #endif +#ifdef CONFIG_LITMUS_NVIDIA + if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) { + TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid); + disable_gpu_owner(task); + } +#endif + gsnedf_job_arrival(task); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); } @@ -1079,6 +1073,14 @@ static void gsnedf_task_block(struct task_struct *t) } #endif +#ifdef CONFIG_LITMUS_NVIDIA + if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { + + TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); + enable_gpu_owner(t); + } +#endif + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); BUG_ON(!is_realtime(t)); @@ -1106,6 +1108,13 @@ static void gsnedf_task_exit(struct task_struct * t) } #endif +#ifdef CONFIG_LITMUS_NVIDIA + /* make sure we clean up on our way out */ + if(tsk_rt(t)->held_gpus) { + disable_gpu_owner(t); + } +#endif + unlink(t); if (tsk_rt(t)->scheduled_on != NO_CPU) { gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; @@ -1154,7 +1163,6 @@ static int __increase_priority_inheritance(struct task_struct* t, #ifdef CONFIG_LITMUS_NESTED_LOCKING /* this sanity check allows for weaker locking in protocols */ - /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */ if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { #endif TRACE_TASK(t, "inherits priority from %s/%d\n", @@ -1218,6 +1226,14 @@ static int __increase_priority_inheritance(struct task_struct* t, aux_task_owner_increase_priority(t); } #endif + +#ifdef CONFIG_LITMUS_NVIDIA + /* propagate to gpu klmirqd */ + if (tsk_rt(t)->held_gpus) { + gpu_owner_increase_priority(t); + } +#endif + } #ifdef CONFIG_LITMUS_NESTED_LOCKING } @@ -1247,16 +1263,6 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str success = __increase_priority_inheritance(t, prio_inh); -#ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klmirqd != NULL) - { - TRACE_TASK(t, "%s/%d inherits a new priority!\n", - tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - - __increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); - } -#endif - raw_spin_unlock(&gsnedf_lock); #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) @@ -1330,6 +1336,14 @@ static int __decrease_priority_inheritance(struct task_struct* t, } #endif +#ifdef CONFIG_LITMUS_NVIDIA + /* propagate to gpu */ + if (tsk_rt(t)->held_gpus) { + gpu_owner_decrease_priority(t); + } +#endif + + #ifdef CONFIG_LITMUS_NESTED_LOCKING } else { @@ -1357,16 +1371,6 @@ static void decrease_priority_inheritance(struct task_struct* t, success = __decrease_priority_inheritance(t, prio_inh); -#ifdef CONFIG_LITMUS_SOFTIRQD - if(tsk_rt(t)->cur_klmirqd != NULL) - { - TRACE_TASK(t, "%s/%d decreases in priority!\n", - tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid); - - __decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh); - } -#endif - raw_spin_unlock(&gsnedf_lock); #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) @@ -1382,62 +1386,6 @@ static void decrease_priority_inheritance(struct task_struct* t, } -#ifdef CONFIG_LITMUS_SOFTIRQD -/* called with IRQs off */ -static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd, - struct task_struct* old_owner, - struct task_struct* new_owner) -{ - BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); - - raw_spin_lock(&gsnedf_lock); - - if(old_owner != new_owner) - { - if(old_owner) - { - // unreachable? - tsk_rt(old_owner)->cur_klmirqd = NULL; - } - - TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n", - new_owner->comm, new_owner->pid); - - tsk_rt(new_owner)->cur_klmirqd = klmirqd; - } - - __decrease_priority_inheritance(klmirqd, NULL); // kludge to clear out cur prio. - - __increase_priority_inheritance(klmirqd, - (tsk_rt(new_owner)->inh_task == NULL) ? - new_owner : - tsk_rt(new_owner)->inh_task); - - raw_spin_unlock(&gsnedf_lock); -} - - -/* called with IRQs off */ -static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd, - struct task_struct* old_owner, - struct task_struct* new_owner) -{ - BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread)); - - raw_spin_lock(&gsnedf_lock); - - TRACE_TASK(klmirqd, "priority restored\n"); - - __decrease_priority_inheritance(klmirqd, new_owner); - - tsk_rt(old_owner)->cur_klmirqd = NULL; - - raw_spin_unlock(&gsnedf_lock); -} -#endif - - - #ifdef CONFIG_LITMUS_NESTED_LOCKING @@ -1923,7 +1871,7 @@ static long gsnedf_activate_plugin(void) #endif #ifdef CONFIG_LITMUS_SOFTIRQD - spawn_klmirqd(NULL); + init_klmirqd(); #endif #ifdef CONFIG_LITMUS_NVIDIA @@ -1965,10 +1913,6 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { #ifdef CONFIG_LITMUS_AFFINITY_LOCKING .allocate_aff_obs = gsnedf_allocate_affinity_observer, #endif -#ifdef CONFIG_LITMUS_SOFTIRQD - .increase_prio_klmirqd = increase_priority_inheritance_klmirqd, - .decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd, -#endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet, .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet, diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index cda67e0f6bc8..30c216fd6fdc 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -167,18 +167,6 @@ static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struc } #endif -#ifdef CONFIG_LITMUS_SOFTIRQD -static void litmus_dummy_increase_prio_klmirqd(struct task_struct* klmirqd, - struct task_struct* old_owner, - struct task_struct* new_owner) -{ -} - -static void litmus_dummy_decrease_prio_klmirqd(struct task_struct* klmirqd, - struct task_struct* old_owner) -{ -} -#endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t) @@ -263,10 +251,6 @@ struct sched_plugin linux_sched_plugin = { .nested_decrease_prio = litmus_dummy_nested_decrease_prio, .__compare = litmus_dummy___compare, #endif -#ifdef CONFIG_LITMUS_SOFTIRQD - .increase_prio_klmirqd = litmus_dummy_increase_prio_klmirqd, - .decrease_prio_klmirqd = litmus_dummy_decrease_prio_klmirqd, -#endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet, .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet, @@ -327,10 +311,6 @@ int register_sched_plugin(struct sched_plugin* plugin) CHECK(nested_decrease_prio); CHECK(__compare); #endif -#ifdef CONFIG_LITMUS_SOFTIRQD - CHECK(increase_prio_klmirqd); - CHECK(decrease_prio_klmirqd); -#endif #ifdef CONFIG_LITMUS_PAI_SOFTIRQD CHECK(enqueue_pai_tasklet); CHECK(change_prio_pai_tasklet); -- cgit v1.2.2 From 9207c7f874e7754391fdf184187fc763455466c5 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Wed, 12 Dec 2012 14:45:17 -0500 Subject: Cluster assignment of nv klmirqd threads. --- include/litmus/sched_plugin.h | 8 ++++++++ litmus/nvidia_info.c | 15 +++++++++------ litmus/sched_cedf.c | 22 ++++++++++++++++++++++ litmus/sched_gsn_edf.c | 10 +++++++++- litmus/sched_plugin.c | 14 ++++++++++++++ 5 files changed, 62 insertions(+), 7 deletions(-) diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index a13d1a2992fe..cfa218504d75 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -102,6 +102,10 @@ typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod, struct task_struct* b, comparison_mode_t b_mod); #endif +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) +typedef int (*default_cpu_for_gpu_t)(int gpu, int num_gpus); +#endif + /********************* sys call backends ********************/ /* This function causes the caller to sleep until the next release */ @@ -165,6 +169,10 @@ struct sched_plugin { change_prio_pai_tasklet_t change_prio_pai_tasklet; run_tasklets_t run_tasklets; #endif + +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) + default_cpu_for_gpu_t map_gpu_to_cpu; +#endif } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index b29f4d3f0dac..0b39dcc84115 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -449,13 +449,16 @@ static int init_nv_device_reg(void) INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); #ifdef CONFIG_LITMUS_SOFTIRQD - // TODO: Make thread spawning this a litmus plugin call. - NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb; - NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i); - mb(); + { + int default_cpu = litmus->map_gpu_to_cpu(i, NV_DEVICE_NUM); + + NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb; + NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i); + mb(); - if(launch_klmirqd_thread(0, &NV_DEVICE_REG[i].callback) != 0) { - TRACE("Failed to create klmirqd thread for GPU %d\n", i); + if(launch_klmirqd_thread(default_cpu, &NV_DEVICE_REG[i].callback) != 0) { + TRACE("Failed to create klmirqd thread for GPU %d\n", i); + } } #endif } diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 84aafca78cde..35ea1544ce69 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -1652,6 +1652,25 @@ static void cleanup_cedf(void) } } +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) +static int cedf_map_gpu_to_cpu(int gpu, int num_gpus) +{ + /* TODO: Some sort of smart clustering on the PCIe bus topology */ + int num_gpu_clusters = num_clusters; + unsigned int gpu_cluster_size = num_gpus / num_gpu_clusters; + int cpu_cluster = gpu / gpu_cluster_size; + int default_cpu = cedf[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster + + if(num_gpus % num_gpu_clusters != 0) { + TRACE("GPU clusters are of non-uniform size!\n"); + } + + TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu); + + return default_cpu; +} +#endif + static long cedf_activate_plugin(void) { int i, j, cpu, ccpu, cpu_count; @@ -1826,6 +1845,9 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet, .run_tasklets = cedf_run_tasklets, #endif +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) + .map_gpu_to_cpu = cedf_map_gpu_to_cpu, +#endif }; static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index f27c104ea027..1b5d8d73dc16 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -1835,7 +1835,12 @@ UNSUPPORTED_AFF_OBS: #endif - +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) +static int gsnedf_map_gpu_to_cpu(int gpu, int num_gpus) +{ + return 0; // CPU_0 is default in all cases. +} +#endif static long gsnedf_activate_plugin(void) @@ -1918,6 +1923,9 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet, .run_tasklets = gsnedf_run_tasklets, #endif +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) + .map_gpu_to_cpu = gsnedf_map_gpu_to_cpu, +#endif }; diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 30c216fd6fdc..ea89f5fedcab 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -222,6 +222,13 @@ static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs, } #endif +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) +static int litmus_dummy_map_gpu_to_cpu(int gpu, int num_gpus) +{ + return 0; +} +#endif + /* The default scheduler plugin. It doesn't do anything and lets Linux do its * job. @@ -262,6 +269,9 @@ struct sched_plugin linux_sched_plugin = { #ifdef CONFIG_LITMUS_AFFINITY_LOCKING .allocate_aff_obs = litmus_dummy_allocate_aff_obs, #endif +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) + .map_gpu_to_cpu = litmus_dummy_map_gpu_to_cpu, +#endif .admit_task = litmus_dummy_admit_task }; @@ -322,6 +332,10 @@ int register_sched_plugin(struct sched_plugin* plugin) #ifdef CONFIG_LITMUS_AFFINITY_LOCKING CHECK(allocate_aff_obs); #endif +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) + CHECK(map_gpu_to_cpu); +#endif + CHECK(admit_task); if (!plugin->release_at) -- cgit v1.2.2 From 2ccc2c4cc981a68e703082e6e32f5483ad87b61c Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Wed, 12 Dec 2012 16:38:55 -0500 Subject: Use num_online_gpus() Note that num_online_gpus() merely reports the staticly configured maximum number of available GPUs. Will make dynamic in the future. --- include/litmus/nvidia_info.h | 6 ++++-- include/litmus/sched_plugin.h | 2 +- litmus/nvidia_info.c | 8 ++++---- litmus/sched_cedf.c | 27 +++++++++++++++++++-------- litmus/sched_gsn_edf.c | 2 +- litmus/sched_plugin.c | 2 +- 6 files changed, 30 insertions(+), 17 deletions(-) diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h index 6f354c8b00ac..8c2a5524512e 100644 --- a/include/litmus/nvidia_info.h +++ b/include/litmus/nvidia_info.h @@ -8,6 +8,10 @@ #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM +/* TODO: Make this a function that checks the PCIe bus or maybe proc settings */ +#define num_online_gpus() (NV_DEVICE_NUM) + + /* Functions used for decoding NVIDIA blobs. */ int init_nvidia_info(void); @@ -21,8 +25,6 @@ void dump_nvidia_info(const struct tasklet_struct *t); u32 get_tasklet_nv_device_num(const struct tasklet_struct *t); u32 get_work_nv_device_num(const struct work_struct *t); - - /* Functions for figuring out the priority of GPU-using tasks */ struct task_struct* get_nv_max_device_owner(u32 target_device_id); diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index cfa218504d75..78004381a6cc 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h @@ -103,7 +103,7 @@ typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod, #endif #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) -typedef int (*default_cpu_for_gpu_t)(int gpu, int num_gpus); +typedef int (*default_cpu_for_gpu_t)(int gpu); #endif diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 0b39dcc84115..7883296a7a18 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -444,13 +444,13 @@ static int init_nv_device_reg(void) mb(); - for(i = 0; i < NV_DEVICE_NUM; ++i) { + for(i = 0; i < num_online_gpus(); ++i) { raw_spin_lock_init(&NV_DEVICE_REG[i].lock); INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); #ifdef CONFIG_LITMUS_SOFTIRQD { - int default_cpu = litmus->map_gpu_to_cpu(i, NV_DEVICE_NUM); + int default_cpu = litmus->map_gpu_to_cpu(i); NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb; NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i); @@ -478,7 +478,7 @@ static int shutdown_nv_device_reg(void) int i; nv_device_registry_t *reg; - for (i = 0; i < NV_DEVICE_NUM; ++i) { + for (i = 0; i < num_online_gpus(); ++i) { TRACE("Shutting down GPU %d.\n", i); @@ -829,7 +829,7 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) { int ret; - if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) + if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0)) { if(reg_action) ret = __reg_nv_device(reg_device_id, t); diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 35ea1544ce69..46de8041cf59 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -157,6 +157,13 @@ static unsigned int cluster_size; static int clusters_allocated = 0; + +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) +static int num_gpu_clusters; +static unsigned int gpu_cluster_size; +#endif + + #ifdef CONFIG_LITMUS_DGL_SUPPORT static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t) { @@ -1653,18 +1660,11 @@ static void cleanup_cedf(void) } #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) -static int cedf_map_gpu_to_cpu(int gpu, int num_gpus) +static int cedf_map_gpu_to_cpu(int gpu) { - /* TODO: Some sort of smart clustering on the PCIe bus topology */ - int num_gpu_clusters = num_clusters; - unsigned int gpu_cluster_size = num_gpus / num_gpu_clusters; int cpu_cluster = gpu / gpu_cluster_size; int default_cpu = cedf[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster - if(num_gpus % num_gpu_clusters != 0) { - TRACE("GPU clusters are of non-uniform size!\n"); - } - TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu); return default_cpu; @@ -1717,6 +1717,17 @@ static long cedf_activate_plugin(void) printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", num_clusters, cluster_size); + +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) + num_gpu_clusters = min(num_clusters, num_online_gpus()); + gpu_cluster_size = num_online_gpus() / num_gpu_clusters; + + if (((num_online_gpus() % gpu_cluster_size) != 0) || + (num_gpu_clusters != num_clusters)) { + printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n"); + } +#endif + /* initialize clusters */ cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); for (i = 0; i < num_clusters; i++) { diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 1b5d8d73dc16..4ac573a6f0f7 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -1836,7 +1836,7 @@ UNSUPPORTED_AFF_OBS: #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) -static int gsnedf_map_gpu_to_cpu(int gpu, int num_gpus) +static int gsnedf_map_gpu_to_cpu(int gpu) { return 0; // CPU_0 is default in all cases. } diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index ea89f5fedcab..76ff892122aa 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -223,7 +223,7 @@ static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs, #endif #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) -static int litmus_dummy_map_gpu_to_cpu(int gpu, int num_gpus) +static int litmus_dummy_map_gpu_to_cpu(int gpu) { return 0; } -- cgit v1.2.2 From 4ea2c9490eaf9df55ccbfe6f4c56518fc4bdce8f Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 13 Dec 2012 11:44:23 -0500 Subject: Fix klmirqd plugin switching to not panic. --- litmus/Kconfig | 13 ------------- litmus/litmus.c | 23 ++++++++++------------- litmus/litmus_softirq.c | 27 +++++++++++++++++++++------ litmus/nvidia_info.c | 5 ++++- 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/litmus/Kconfig b/litmus/Kconfig index 9aeae659ae32..8ca66b4d687c 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -443,19 +443,6 @@ config NV_DEVICE_NUM Should be (<= to the number of CPUs) and (<= to the number of GPUs) in your system. -config NV_MAX_SIMULT_USERS - int "Maximum number of threads sharing a GPU simultanously" - depends on LITMUS_NVIDIA - range 1 3 - default "2" - help - Should be at least equal to the #copy_engines + #execution_engines - of the GPUs in your system. - - Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?) - Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx) - Older = 1 (ex. GTX-2xx) - choice prompt "CUDA/Driver Version Support" default CUDA_5_0 diff --git a/litmus/litmus.c b/litmus/litmus.c index fa244ba53e22..f98aa9d778a2 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -445,8 +445,8 @@ long __litmus_admit_task(struct task_struct* tsk) #endif #ifdef CONFIG_LITMUS_SOFTIRQD /* not an interrupt thread by default */ - tsk_rt(tsk)->is_interrupt_thread = 0; - tsk_rt(tsk)->klmirqd_info = NULL; + //tsk_rt(tsk)->is_interrupt_thread = 0; + //tsk_rt(tsk)->klmirqd_info = NULL; #endif retval = litmus->admit_task(tsk); @@ -523,11 +523,6 @@ static void synch_on_plugin_switch(void* info) cpu_relax(); } -/* Switching a plugin in use is tricky. - * We must watch out that no real-time tasks exists - * (and that none is created in parallel) and that the plugin is not - * currently in use on any processor (in theory). - */ int switch_sched_plugin(struct sched_plugin* plugin) { //unsigned long flags; @@ -535,20 +530,21 @@ int switch_sched_plugin(struct sched_plugin* plugin) BUG_ON(!plugin); +#ifdef CONFIG_LITMUS_SOFTIRQD + if (!klmirqd_is_dead()) { + kill_klmirqd(); + } +#endif + /* forbid other cpus to use the plugin */ atomic_set(&cannot_use_plugin, 1); /* send IPI to force other CPUs to synch with us */ smp_call_function(synch_on_plugin_switch, NULL, 0); /* wait until all other CPUs have started synch */ - while (atomic_read(&cannot_use_plugin) < num_online_cpus()) + while (atomic_read(&cannot_use_plugin) < num_online_cpus()) { cpu_relax(); - -#ifdef CONFIG_LITMUS_SOFTIRQD - if (!klmirqd_is_dead()) { - kill_klmirqd(); } -#endif /* stop task transitions */ //raw_spin_lock_irqsave(&task_transition_lock, flags); @@ -571,6 +567,7 @@ int switch_sched_plugin(struct sched_plugin* plugin) out: //raw_spin_unlock_irqrestore(&task_transition_lock, flags); atomic_set(&cannot_use_plugin, 0); + return ret; } diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index 44e2d38ad982..9c5ecab5e8d9 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c @@ -79,6 +79,7 @@ void kill_klmirqd(void) { unsigned long flags; struct list_head *pos; + struct list_head *q; raw_spin_lock_irqsave(&klmirqd_state.lock, flags); @@ -86,7 +87,7 @@ void kill_klmirqd(void) klmirqd_state.shuttingdown = 1; - list_for_each(pos, &klmirqd_state.threads) { + list_for_each_safe(pos, q, &klmirqd_state.threads) { struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg); if(info->terminating != 1) @@ -96,7 +97,9 @@ void kill_klmirqd(void) flush_pending(info->klmirqd); /* signal termination */ + raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); kthread_stop(info->klmirqd); + raw_spin_lock_irqsave(&klmirqd_state.lock, flags); } } @@ -219,7 +222,7 @@ int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb) #define KLMIRQD_SLICE_NR_JIFFIES 1 #define KLMIRQD_SLICE_NS ((NSEC_PER_SEC / HZ) * KLMIRQD_SLICE_NR_JIFFIES) -static int set_litmus_daemon_sched(struct task_struct* tsk) +static int become_litmus_daemon(struct task_struct* tsk) { int ret = 0; @@ -249,6 +252,16 @@ static int set_litmus_daemon_sched(struct task_struct* tsk) return ret; } +static int become_normal_daemon(struct task_struct* tsk) +{ + int ret = 0; + + struct sched_param param = { .sched_priority = 0}; + sched_setscheduler_nocheck(tsk, SCHED_NORMAL, ¶m); + + return ret; +} + static int register_klmirqd(struct task_struct* tsk) { int retval = 0; @@ -318,6 +331,7 @@ static int unregister_klmirqd(struct task_struct* tsk) /* remove the entry in the klmirqd thread list */ list_del(&info->klmirqd_reg); + mb(); --klmirqd_state.nr_threads; /* remove link to klmirqd info from thread */ @@ -687,7 +701,7 @@ static int run_klmirqd(void* callback) struct klmirqd_info* info = NULL; klmirqd_callback_t* cb = (klmirqd_callback_t*)(callback); - retval = set_litmus_daemon_sched(current); + retval = become_litmus_daemon(current); if (retval != 0) { TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__); goto failed; @@ -696,7 +710,7 @@ static int run_klmirqd(void* callback) retval = register_klmirqd(current); if (retval != 0) { TRACE_CUR("%s: Failed to become a klmirqd thread.\n", __FUNCTION__); - goto failed; + goto failed_sched_normal; } if (cb && cb->func) { @@ -781,9 +795,10 @@ failed_unregister: /* remove our registration from klmirqd */ unregister_klmirqd(current); -failed: - litmus_exit_task(current); +failed_sched_normal: + become_normal_daemon(current); +failed: return retval; } diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 7883296a7a18..3d38b168d9ba 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -322,7 +322,10 @@ int init_nvidia_info(void) else { TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); - return(-1); + + init_nv_device_reg(); + return(0); +// return(-1); } } -- cgit v1.2.2 From a3e1d14976fbb0859ad91afdbea13786255648da Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 13 Dec 2012 13:12:59 -0500 Subject: blah --- litmus/litmus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litmus/litmus.c b/litmus/litmus.c index f98aa9d778a2..4ee1c6ca7801 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -647,7 +647,7 @@ int sys_kill(int pid, int sig); static void sysrq_handle_kill_rt_tasks(int key) { - struct task_struct *t; + struct task_struct *t; // test read_lock(&tasklist_lock); for_each_process(t) { if (is_realtime(t)) { -- cgit v1.2.2 From bb9b9d2075a717ea77cb83c30d55aed366bececf Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 13 Dec 2012 13:13:21 -0500 Subject: test2 --- litmus/litmus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litmus/litmus.c b/litmus/litmus.c index 4ee1c6ca7801..f98aa9d778a2 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -647,7 +647,7 @@ int sys_kill(int pid, int sig); static void sysrq_handle_kill_rt_tasks(int key) { - struct task_struct *t; // test + struct task_struct *t; read_lock(&tasklist_lock); for_each_process(t) { if (is_realtime(t)) { -- cgit v1.2.2 From 8f4bc19471bd49f4dcf6ab20254b7c71ec12e4e2 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 13 Dec 2012 17:15:17 -0500 Subject: Fix several klmirqd bugs. 1) Deadlock in litmus_task_exit()-- added litmus_pre_task_exit() to be called without the Linux runqueue lock held. 2) Prioritization of base-prio klmirqd/aux threads vs. normal real-time tasks. 3) Initialization of gpu owner binheap node moved to *after* memset(0) of rt_params. 4) Exit path of klmirqd threads. --- include/litmus/litmus.h | 2 ++ kernel/sched.c | 3 ++ litmus/Kconfig | 8 +++++ litmus/edf_common.c | 20 ++++++++++-- litmus/litmus.c | 36 +++++++++++--------- litmus/litmus_softirq.c | 4 +++ litmus/nvidia_info.c | 87 +++++++++++++++++++++++++++++++++++++++++++++---- litmus/sched_cedf.c | 6 ++-- 8 files changed, 138 insertions(+), 28 deletions(-) diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 711b88e2b3d1..54f33e835682 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -38,6 +38,8 @@ void litmus_exec(void); void exit_litmus(struct task_struct *dead_tsk); long litmus_admit_task(struct task_struct *tsk); + +void litmus_pre_exit_task(struct task_struct *tsk); // called before litmus_exit_task, but without run queue locks held void litmus_exit_task(struct task_struct *tsk); #define is_realtime(t) ((t)->policy == SCHED_LITMUS) diff --git a/kernel/sched.c b/kernel/sched.c index 840f87bce097..a1f10984adb3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5288,6 +5288,9 @@ recheck: if (retval) return retval; } + else if (p->policy == SCHED_LITMUS) { + litmus_pre_exit_task(p); + } /* * make sure no PI-waiters arrive (or leave) while we are diff --git a/litmus/Kconfig b/litmus/Kconfig index 8ca66b4d687c..b704e893e9be 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -470,6 +470,14 @@ config CUDA_3_2 endchoice +config LITMUS_NV_KLMIRQD_DEBUG + bool "Raise fake sporadic tasklets to test nv klimirqd threads." + depends on LITMUS_NVIDIA && LITMUS_SOFTIRQD + default n + help + Causes tasklets to be sporadically dispatched to waiting klmirqd + threads. + endmenu endmenu diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 27b728a55669..255e4f36e413 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -119,8 +119,15 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) goto aux_tie_break; } else { + // make the aux thread lowest priority real-time task - int temp = (first_lo_aux) ? !is_realtime(second) : !is_realtime(first); + int temp = 0; + if (first_lo_aux && is_realtime(second)) { +// temp = 0; + } + else if(second_lo_aux && is_realtime(first)) { + temp = 1; + } TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); return temp; } @@ -149,8 +156,15 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) goto klmirqd_tie_break; } else { - // make the klmirqd thread (second) lowest priority real-time task - int temp = (first_lo_klmirqd) ? !is_realtime(second) : !is_realtime(first); + // make the klmirqd thread the lowest-priority real-time task + // but (above low-prio aux tasks and Linux tasks) + int temp = 0; + if (first_lo_klmirqd && is_realtime(second)) { +// temp = 0; + } + else if(second_lo_klmirqd && is_realtime(first)) { + temp = 1; + } TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); return temp; } diff --git a/litmus/litmus.c b/litmus/litmus.c index f98aa9d778a2..1aada57176de 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -355,8 +355,8 @@ static void reinit_litmus_state(struct task_struct* p, int restore) ctrl_page = p->rt_param.ctrl_page; } -#ifdef CONFIG_LITMUS_NESTED_LOCKING - prio_order = p->rt_param.hp_blocked_tasks.compare; +#ifdef CONFIG_LITMUS_NVIDIA + WARN_ON(p->rt_param.held_gpus != 0); #endif #ifdef CONFIG_LITMUS_LOCKING @@ -367,15 +367,7 @@ static void reinit_litmus_state(struct task_struct* p, int restore) #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING -// WARN_ON(p->rt_param.blocked_lock); -// WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks)); -#endif - - -#ifdef CONFIG_LITMUS_NVIDIA - WARN_ON(p->rt_param.held_gpus != 0); - - INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node); + prio_order = p->rt_param.hp_blocked_tasks.compare; #endif /* Cleanup everything else. */ @@ -384,8 +376,9 @@ static void reinit_litmus_state(struct task_struct* p, int restore) #ifdef CONFIG_REALTIME_AUX_TASKS /* also clear out the aux_data. the !restore case is only called on * fork (initial thread creation). */ - if (!restore) + if (!restore) { memset(&p->aux_data, 0, sizeof(p->aux_data)); + } #endif /* Restore preserved fields. */ @@ -394,6 +387,10 @@ static void reinit_litmus_state(struct task_struct* p, int restore) p->rt_param.ctrl_page = ctrl_page; } +#ifdef CONFIG_LITMUS_NVIDIA + INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node); +#endif + #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) init_gpu_affinity_state(p); #endif @@ -494,15 +491,20 @@ out: return retval; } -void litmus_exit_task(struct task_struct* tsk) +void litmus_pre_exit_task(struct task_struct* tsk) { if (is_realtime(tsk)) { - sched_trace_task_completion(tsk, 1); - if (tsk_rt(tsk)->rsrc_exit_cb) { int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk); WARN_ON(ret != 0); } + } +} + +void litmus_exit_task(struct task_struct* tsk) +{ + if (is_realtime(tsk)) { + sched_trace_task_completion(tsk, 1); litmus->task_exit(tsk); @@ -637,8 +639,10 @@ void exit_litmus(struct task_struct *dead_tsk) } /* main cleanup only for RT tasks */ - if (is_realtime(dead_tsk)) + if (is_realtime(dead_tsk)) { + litmus_pre_exit_task(dead_tsk); /* todo: double check that no Linux rq lock is held */ litmus_exit_task(dead_tsk); + } } diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index 9c5ecab5e8d9..be06405021c5 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c @@ -1163,3 +1163,7 @@ int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_th } EXPORT_SYMBOL(__litmus_schedule_work); + + + + diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 3d38b168d9ba..059a7e7ac715 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -258,6 +258,8 @@ void dump_nvidia_info(const struct tasklet_struct *t) static struct module* nvidia_mod = NULL; + + #if 0 static int nvidia_ready_module_notify(struct notifier_block *self, unsigned long val, void *data) @@ -390,6 +392,10 @@ typedef struct { struct task_struct* thread; int ready:1; /* todo: make threads check for the ready flag */ #endif + +#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG + struct tasklet_struct nv_klmirqd_dbg_tasklet; +#endif }nv_device_registry_t; @@ -397,8 +403,6 @@ static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; - - #ifdef CONFIG_LITMUS_SOFTIRQD static int nvidia_klmirqd_cb(void *arg) { @@ -417,6 +421,63 @@ static int nvidia_klmirqd_cb(void *arg) } #endif +#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG +struct nv_klmirqd_dbg_timer_struct +{ + struct hrtimer timer; +}; + +static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer; + +static void nv_klmirqd_arm_dbg_timer(lt_t relative_time) +{ + lt_t when_to_fire = litmus_clock() + relative_time; + + TRACE("next nv tasklet in %d ns\n", relative_time); + + __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer, + ns_to_ktime(when_to_fire), + 0, + HRTIMER_MODE_ABS_PINNED, + 0); +} + +static void nv_klmirqd_dbg_tasklet_func(unsigned long arg) +{ + lt_t now = litmus_clock(); + nv_device_registry_t *reg = (nv_device_registry_t*)arg; + int gpunum = reg - &NV_DEVICE_REG[0]; + + TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum); + + /* set up the next timer */ + nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. +} + + +static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer) +{ + lt_t now = litmus_clock(); + int gpu = (int)(now % num_online_gpus()); + nv_device_registry_t *reg; + + TRACE("nvklmirqd_timer invoked!\n"); + + reg = &NV_DEVICE_REG[gpu]; + + if (reg->thread && reg->ready) { + TRACE("Adding a tasklet for GPU %d\n", gpu); + litmus_tasklet_schedule(®->nv_klmirqd_dbg_tasklet, reg->thread); + } + else { + TRACE("nv klmirqd is not ready!\n"); + nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. + } + + return HRTIMER_NORESTART; +} +#endif + static int gpu_owner_max_priority_order(struct binheap_node *a, struct binheap_node *b) @@ -451,6 +512,10 @@ static int init_nv_device_reg(void) raw_spin_lock_init(&NV_DEVICE_REG[i].lock); INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); +#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG + tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]); +#endif + #ifdef CONFIG_LITMUS_SOFTIRQD { int default_cpu = litmus->map_gpu_to_cpu(i); @@ -466,6 +531,12 @@ static int init_nv_device_reg(void) #endif } +#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG + hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func; + nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000); +#endif + return(1); } @@ -578,7 +649,7 @@ static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct tas -/* call when an aux_owner becomes real-time */ +/* call when an gpu owner becomes real-time */ long enable_gpu_owner(struct task_struct *t) { long retval = 0; @@ -631,7 +702,7 @@ out: return retval; } -/* call when an aux_owner exits real-time */ +/* call when an gpu owner exits real-time */ long disable_gpu_owner(struct task_struct *t) { long retval = 0; @@ -773,9 +844,9 @@ int gpu_owner_decrease_priority(struct task_struct *t) gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); - if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { + if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { WARN_ON(!is_running(t)); - TRACE_CUR("aux tasks may not inherit from %s/%d on GPU %d\n", + TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", t->comm, t->pid, gpu); goto out; } @@ -862,6 +933,10 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) + + + + diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 46de8041cf59..a454832b2aa8 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -1717,17 +1717,17 @@ static long cedf_activate_plugin(void) printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", num_clusters, cluster_size); - + #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) num_gpu_clusters = min(num_clusters, num_online_gpus()); gpu_cluster_size = num_online_gpus() / num_gpu_clusters; - + if (((num_online_gpus() % gpu_cluster_size) != 0) || (num_gpu_clusters != num_clusters)) { printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n"); } #endif - + /* initialize clusters */ cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); for (i = 0; i < num_clusters; i++) { -- cgit v1.2.2 From fa1229f9776c7ecc99baa187e0b485ebdbfdd78c Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 13 Dec 2012 18:39:27 -0500 Subject: Allow klmirqd threads to be given names. --- include/litmus/litmus_softirq.h | 7 +++- litmus/Kconfig | 3 +- litmus/litmus.c | 12 +++--- litmus/litmus_softirq.c | 84 +++++++++++++++++++++++++++++------------ litmus/nvidia_info.c | 5 ++- litmus/sched_gsn_edf.c | 2 +- 6 files changed, 80 insertions(+), 33 deletions(-) diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h index 52e3f7e74ab1..cfef08187464 100644 --- a/include/litmus/litmus_softirq.h +++ b/include/litmus/litmus_softirq.h @@ -57,8 +57,13 @@ typedef struct data will be initialized. cpu == -1 for no affinity + + provide a name at most 31 (32, + null terminator) characters long. + name == NULL for a default name. (all names are appended with + base-CPU affinity) */ -int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb); +#define MAX_KLMIRQD_NAME_LEN 31 +int launch_klmirqd_thread(char* name, int cpu, klmirqd_callback_t* cb); /* Flushes all pending work out to the OS for regular diff --git a/litmus/Kconfig b/litmus/Kconfig index b704e893e9be..c05405094ea4 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -476,7 +476,8 @@ config LITMUS_NV_KLMIRQD_DEBUG default n help Causes tasklets to be sporadically dispatched to waiting klmirqd - threads. + threads. WARNING! Kernel panic may occur if you switch between + LITMUS plugins! endmenu diff --git a/litmus/litmus.c b/litmus/litmus.c index 1aada57176de..1b4b9d25dbdc 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -556,14 +556,16 @@ int switch_sched_plugin(struct sched_plugin* plugin) ret = litmus->deactivate_plugin(); if (0 != ret) goto out; - ret = plugin->activate_plugin(); + + litmus = plugin; // switch + mb(); // make sure it's seen everywhere. + ret = litmus->activate_plugin(); if (0 != ret) { printk(KERN_INFO "Can't activate %s (%d).\n", - plugin->plugin_name, ret); - plugin = &linux_sched_plugin; + litmus->plugin_name, ret); + litmus = &linux_sched_plugin; } - printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); - litmus = plugin; + printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", litmus->plugin_name); } else ret = -EBUSY; out: diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index be06405021c5..464a78d780ad 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c @@ -34,7 +34,7 @@ struct klmirqd_registration struct list_head threads; }; -static atomic_t klmirqd_id_gen = ATOMIC_INIT(0); +static atomic_t klmirqd_id_gen = ATOMIC_INIT(-1); static struct klmirqd_registration klmirqd_state; @@ -136,12 +136,11 @@ void kill_klmirqd_thread(struct task_struct* klmirqd_thread) raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); } - - struct klmirqd_launch_data { int cpu_affinity; klmirqd_callback_t* cb; + char name[MAX_KLMIRQD_NAME_LEN+1]; struct work_struct work; }; @@ -156,47 +155,76 @@ static void __launch_klmirqd_thread(struct work_struct *work) struct klmirqd_launch_data* launch_data = container_of(work, struct klmirqd_launch_data, work); - TRACE("%s: Creating klmirqd thread\n", __FUNCTION__); + TRACE("Creating klmirqd thread\n"); + - id = atomic_inc_return(&klmirqd_id_gen); if (launch_data->cpu_affinity != -1) { - thread = kthread_create( - run_klmirqd, - /* treat the affinity as a pointer, we'll cast it back later */ - (void*)launch_data->cb, - "klmirqd_th%d/%d", - id, - launch_data->cpu_affinity); + if (launch_data->name[0] == '\0') { + id = atomic_inc_return(&klmirqd_id_gen); + TRACE("Launching klmirqd_th%d/%d\n", id, launch_data->cpu_affinity); + + thread = kthread_create( + run_klmirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)launch_data->cb, + "klmirqd_th%d/%d", + id, + launch_data->cpu_affinity); + } + else { + TRACE("Launching %s/%d\n", launch_data->name, launch_data->cpu_affinity); + + thread = kthread_create( + run_klmirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)launch_data->cb, + "%s/%d", + launch_data->name, + launch_data->cpu_affinity); + } /* litmus will put is in the right cluster. */ kthread_bind(thread, launch_data->cpu_affinity); - - TRACE("%s: Launching klmirqd_th%d/%d\n", __FUNCTION__, id, launch_data->cpu_affinity); } else { - thread = kthread_create( - run_klmirqd, - /* treat the affinity as a pointer, we'll cast it back later */ - (void*)launch_data->cb, - "klmirqd_th%d", - id); - - TRACE("%s: Launching klmirqd_th%d\n", __FUNCTION__, id); + if (launch_data->name[0] == '\0') { + id = atomic_inc_return(&klmirqd_id_gen); + TRACE("Launching klmirqd_th%d\n", id); + + thread = kthread_create( + run_klmirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)launch_data->cb, + "klmirqd_th%d", + id); + + } + else { + TRACE("Launching %s\n", launch_data->name); + + thread = kthread_create( + run_klmirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)launch_data->cb, + launch_data->name); + } + + } if (thread) { wake_up_process(thread); } else { - TRACE("Could not create klmirqd/%d thread!\n", id); + TRACE("Could not create thread!\n"); } kfree(launch_data); } -int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb) +int launch_klmirqd_thread(char* name, int cpu, klmirqd_callback_t* cb) { struct klmirqd_launch_data* delayed_launch; @@ -211,6 +239,14 @@ int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb) delayed_launch->cpu_affinity = cpu; delayed_launch->cb = cb; INIT_WORK(&delayed_launch->work, __launch_klmirqd_thread); + + if(name) { + snprintf(delayed_launch->name, MAX_KLMIRQD_NAME_LEN+1, "%s", name); + } + else { + delayed_launch->name[0] = '\0'; + } + schedule_work(&delayed_launch->work); return 0; diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 059a7e7ac715..5a63fb732e8b 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c @@ -496,6 +496,7 @@ static int gpu_owner_max_priority_order(struct binheap_node *a, static int init_nv_device_reg(void) { int i; + char name[MAX_KLMIRQD_NAME_LEN+1]; #ifdef CONFIG_LITMUS_SOFTIRQD if (!klmirqd_is_ready()) { @@ -520,11 +521,13 @@ static int init_nv_device_reg(void) { int default_cpu = litmus->map_gpu_to_cpu(i); + snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i); + NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb; NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i); mb(); - if(launch_klmirqd_thread(default_cpu, &NV_DEVICE_REG[i].callback) != 0) { + if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].callback) != 0) { TRACE("Failed to create klmirqd thread for GPU %d\n", i); } } diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 4ac573a6f0f7..7eb44fee1861 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -1838,7 +1838,7 @@ UNSUPPORTED_AFF_OBS: #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) static int gsnedf_map_gpu_to_cpu(int gpu) { - return 0; // CPU_0 is default in all cases. + return -1; // No CPU affinity needed. } #endif -- cgit v1.2.2 From 642eadd6b82daaeeb3247c2417bf58d113639a1c Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Tue, 8 Jan 2013 18:19:43 -0500 Subject: Extend non-rt support to sync-releases. Note that non-rt tasks are released immediately. That is, the 'delay' provided to the release_ts() system-call is ignored for non-rt tasks. --- litmus/sync.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/litmus/sync.c b/litmus/sync.c index bf75fde5450b..9fb6366f002f 100644 --- a/litmus/sync.c +++ b/litmus/sync.c @@ -65,7 +65,11 @@ static long do_release_ts(lt_t start) struct __wait_queue, task_list)->private; task_count++; - litmus->release_at(t, start + t->rt_param.task_params.phase); + /* RT tasks can be delayed. Non-RT tasks are released + immediately. */ + if (is_realtime(t)) { + litmus->release_at(t, start + t->rt_param.task_params.phase); + } sched_trace_task_release(t); } @@ -80,10 +84,8 @@ static long do_release_ts(lt_t start) asmlinkage long sys_wait_for_ts_release(void) { long ret = -EPERM; - struct task_struct *t = current; - if (is_realtime(t)) - ret = do_wait_for_ts_release(); + ret = do_wait_for_ts_release(); return ret; } @@ -97,8 +99,11 @@ asmlinkage long sys_release_ts(lt_t __user *__delay) /* FIXME: check capabilities... */ ret = copy_from_user(&delay, __delay, sizeof(delay)); - if (ret == 0) + if (ret == 0) { + /* Note: Non-rt tasks that participate in a sync release cannot be + delayed. They will be released immediately. */ ret = do_release_ts(litmus_clock() + delay); + } return ret; } -- cgit v1.2.2 From 1235a665a5e00dc762e6646c01381b3ed5019d86 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Wed, 9 Jan 2013 17:00:54 -0500 Subject: Enable sched_trace log injection from userspace --- include/litmus/rt_param.h | 22 ++++++++++++++ include/litmus/unistd_32.h | 6 ++-- include/litmus/unistd_64.h | 5 +++- litmus/litmus.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 3 deletions(-) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index c8ee64569dbb..43daaf84101d 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -47,6 +47,28 @@ typedef enum { AUX_FUTURE = (AUX_CURRENT<<2) } aux_flags_t; +/* mirror of st_event_record_type_t + * Assume all are UNsupported, unless otherwise stated. */ +typedef enum { + ST_INJECT_NAME = 1, /* supported */ + ST_INJECT_PARAM, /* supported */ + ST_INJECT_RELEASE, /* supported */ + ST_INJECT_ASSIGNED, + ST_INJECT_SWITCH_TO, + ST_INJECT_SWITCH_AWAY, + ST_INJECT_COMPLETION, /* supported */ + ST_INJECT_BLOCK, + ST_INJECT_RESUME, + ST_INJECT_ACTION, + ST_INJECT_SYS_RELEASE, /* supported */ +} sched_trace_injection_events_t; + +struct st_inject_args { + lt_t release; + lt_t deadline; + unsigned int job_no; +}; + /* We use the common priority interpretation "lower index == higher priority", * which is commonly used in fixed-priority schedulability analysis papers. * So, a numerically lower priority value implies higher scheduling priority, diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index 7265ffadf555..d1fe84a5d574 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h @@ -20,6 +20,8 @@ #define __NR_litmus_dgl_lock __LSC(12) #define __NR_litmus_dgl_unlock __LSC(13) -#define __NR_set_aux_tasks _LSC(14) +#define __NR_set_aux_tasks __LSC(14) -#define NR_litmus_syscalls 15 +#define __NR_sched_trace_event __LSC(15) + +#define NR_litmus_syscalls 16 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index 51e730124dde..75f9fcb897f5 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h @@ -37,4 +37,7 @@ __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) #define __NR_set_aux_tasks __LSC(14) __SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks) -#define NR_litmus_syscalls 15 +#define __NR_sched_trace_event __LSC(15) +__SYSCALL(__NR_sched_trace_event, sys_sched_trace_event) + +#define NR_litmus_syscalls 16 diff --git a/litmus/litmus.c b/litmus/litmus.c index 1b4b9d25dbdc..6a1095aa7725 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -310,6 +310,79 @@ asmlinkage long sys_null_call(cycles_t __user *ts) return ret; } + +asmlinkage long sys_sched_trace_event(int event, struct st_inject_args __user *__args) +{ + long retval = 0; + struct task_struct* t = current; + + struct st_inject_args args; + + if (is_realtime(t)) { + printk(KERN_WARNING "Only non-real-time tasks may inject sched_trace events.\n"); + retval = -EINVAL; + goto out; + } + + if (__args && copy_from_user(&args, __args, sizeof(args))) { + retval = -EFAULT; + goto out; + } + + switch(event) { + /*************************************/ + /* events that don't need parameters */ + /*************************************/ + case ST_INJECT_NAME: + sched_trace_task_name(t); + break; + case ST_INJECT_PARAM: + /* presumes sporadic_task_ns() has already been called + * and valid data has been initialized even if the calling + * task is SCHED_NORMAL. */ + sched_trace_task_param(t); + break; + + /*******************************/ + /* events that need parameters */ + /*******************************/ + case ST_INJECT_COMPLETION: + if (!__args) { + retval = -EINVAL; + goto out; + } + + /* slam in the data */ + t->rt_param.job_params.job_no = args.job_no; + + sched_trace_task_completion(t, 0); + break; + case ST_INJECT_RELEASE: + if (!__args) { + retval = -EINVAL; + goto out; + } + + /* slam in the data */ + tsk_rt(t)->job_params.release = args.release; + tsk_rt(t)->job_params.deadline = args.deadline; + + sched_trace_task_release(t); + break; + + /**********************/ + /* unsupported events */ + /**********************/ + default: + retval = -EINVAL; + break; + } + +out: + return retval; +} + + #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) void init_gpu_affinity_state(struct task_struct* p) { -- cgit v1.2.2