From 3d5537c160c1484e8d562b9828baf679cc53f67a Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Thu, 2 Jun 2011 16:06:05 -0400 Subject: Full patch for klitirqd with Nvidia GPU support. --- litmus/Kconfig | 89 +++ litmus/Makefile | 4 + litmus/affinity.c | 49 ++ litmus/edf_common.c | 6 + litmus/fdso.c | 1 + litmus/litmus.c | 82 ++- litmus/litmus_proc.c | 17 + litmus/litmus_softirq.c | 1579 +++++++++++++++++++++++++++++++++++++++++ litmus/locking.c | 1 - litmus/nvidia_info.c | 526 ++++++++++++++ litmus/preempt.c | 7 + litmus/sched_cedf.c | 852 +++++++++++++++++++++- litmus/sched_gsn_edf.c | 756 ++++++++++++++++++-- litmus/sched_litmus.c | 2 + litmus/sched_plugin.c | 29 + litmus/sched_task_trace.c | 216 +++++- litmus/sched_trace_external.c | 45 ++ 17 files changed, 4184 insertions(+), 77 deletions(-) create mode 100644 litmus/affinity.c create mode 100644 litmus/litmus_softirq.c create mode 100644 litmus/nvidia_info.c create mode 100644 litmus/sched_trace_external.c (limited to 'litmus') diff --git a/litmus/Kconfig b/litmus/Kconfig index ad8dc8308cf0..7e865d4dd703 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -62,6 +62,25 @@ config LITMUS_LOCKING endmenu +menu "Performance Enhancements" + +config SCHED_CPU_AFFINITY + bool "Local Migration Affinity" + default y + help + Rescheduled tasks prefer CPUs near to their previously used CPU. This + may improve performance through possible preservation of cache affinity. + + Warning: May make bugs ahrder to find since tasks may migrate less often. + + NOTES: + * Pfair/PD^2 does not support this option. + * Only x86 currently supported. + + Say Yes if unsure. + +endmenu + menu "Tracing" config FEATHER_TRACE @@ -182,4 +201,74 @@ config SCHED_DEBUG_TRACE_CALLER endmenu +menu "Interrupt Handling" + +config LITMUS_THREAD_ALL_SOFTIRQ + bool "Process all softirqs in ksoftirqd threads." + default n + help + (Experimental) Thread all softirqs to ksoftirqd + daemon threads, similar to PREEMPT_RT. I/O + throughput will will drop with this enabled, but + latencies due to interrupts will be reduced. + + WARNING: Timer responsiveness will likely be + decreased as timer callbacks are also threaded. + This is unlike PREEEMPT_RTs hardirqs. + + If unsure, say No. + +config LITMUS_SOFTIRQD + bool "Spawn klitirqd interrupt handling threads." + depends on LITMUS_LOCKING + default n + help + Create klitirqd interrupt handling threads. Work must be + specifically dispatched to these workers. (Softirqs for + Litmus tasks are not magically redirected to klitirqd.) + + G-EDF ONLY for now! + + If unsure, say No. + +config NR_LITMUS_SOFTIRQD + int "Number of klitirqd." + depends on LITMUS_SOFTIRQD + range 1 4096 + default "1" + help + Should be <= to the number of CPUs in your system. + +config LITMUS_NVIDIA + bool "Litmus handling of NVIDIA interrupts." + depends on LITMUS_SOFTIRQD + default n + help + Direct tasklets from NVIDIA devices to Litmus's klitirqd. + + If unsure, say No. + +choice + prompt "CUDA/Driver Version Support" + default CUDA_4_0 + depends on LITMUS_NVIDIA + help + Select the version of CUDA/driver to support. + +config CUDA_4_0 + bool "CUDA 4.0" + depends on LITMUS_NVIDIA + help + Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40) + +config CUDA_3_2 + bool "CUDA 3.2" + depends on LITMUS_NVIDIA + help + Support CUDA 3.2 (dev. driver version: x86_64-260.24) + +endchoice + +endmenu + endmenu diff --git a/litmus/Makefile b/litmus/Makefile index ad9936e07b83..892e01c2e1b3 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -21,8 +21,12 @@ obj-y = sched_plugin.o litmus.o \ obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o +obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o + +obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o +obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o diff --git a/litmus/affinity.c b/litmus/affinity.c new file mode 100644 index 000000000000..3b430d18885b --- /dev/null +++ b/litmus/affinity.c @@ -0,0 +1,49 @@ +#include + +#include + +struct neighborhood neigh_info[NR_CPUS]; + +/* called by _init_litmus() */ +void init_topology(void) +{ + int cpu; + int i; + int chk; + int depth = num_cache_leaves; + + if(depth > NUM_CACHE_LEVELS) + depth = NUM_CACHE_LEVELS; + + for_each_online_cpu(cpu) + { + for(i = 0; i < depth; ++i) + { + long unsigned int firstbits; + + chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i); + if(chk) /* failed */ + { + neigh_info[cpu].size[i] = 0; + } + else + { + /* size = num bits in mask */ + neigh_info[cpu].size[i] = cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); + } + firstbits = *neigh_info[cpu].neighbors[i]->bits; + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", + cpu, neigh_info[cpu].size[i], i, firstbits); + } + + /* set data for non-existent levels */ + for(; i < NUM_CACHE_LEVELS; ++i) + { + neigh_info[cpu].size[i] = 0; + + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", + cpu, neigh_info[cpu].size[i], i, 0lu); + } + } +} + diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 9b44dc2d8d1e..fbd67ab5f467 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c @@ -65,6 +65,12 @@ int edf_higher_prio(struct task_struct* first, return !is_realtime(second_task) || + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy threads always lose w/o inheritance. */ + (first_task->rt_param.is_proxy_thread < + second_task->rt_param.is_proxy_thread) || +#endif /* is the deadline of the first task earlier? * Then it has higher priority. diff --git a/litmus/fdso.c b/litmus/fdso.c index aa7b384264e3..2b7f9ba85857 100644 --- a/litmus/fdso.c +++ b/litmus/fdso.c @@ -22,6 +22,7 @@ extern struct fdso_ops generic_lock_ops; static const struct fdso_ops* fdso_ops[] = { &generic_lock_ops, /* FMLP_SEM */ + &generic_lock_ops, /* KFMLP_SEM */ &generic_lock_ops, /* SRP_SEM */ }; diff --git a/litmus/litmus.c b/litmus/litmus.c index 26938acacafc..29363c6ad565 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -17,6 +17,14 @@ #include #include +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + /* Number of RT tasks that exist in the system */ atomic_t rt_task_count = ATOMIC_INIT(0); static DEFINE_RAW_SPINLOCK(task_transition_lock); @@ -47,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn) struct release_heap* release_heap_alloc(int gfp_flags); void release_heap_free(struct release_heap* rh); +#ifdef CONFIG_LITMUS_NVIDIA +/* + * sys_register_nv_device + * @nv_device_id: The Nvidia device id that the task want to register + * @reg_action: set to '1' to register the specified device. zero otherwise. + * Syscall for register task's designated nvidia device into NV_DEVICE_REG array + * Returns EFAULT if nv_device_id is out of range. + * 0 if success + */ +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) +{ + /* register the device to caller (aka 'current') */ + return(reg_nv_device(nv_device_id, reg_action)); +} +#else +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action) +{ + return(-EINVAL); +} +#endif + + /* * sys_set_task_rt_param * @pid: Pid of the task which scheduling parameters must be changed @@ -115,7 +145,7 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) tp.cls != RT_CLASS_BEST_EFFORT) { printk(KERN_INFO "litmus: real-time task %d rejected " - "because its class is invalid\n"); + "because its class is invalid\n", pid); goto out_unlock; } if (tp.budget_policy != NO_ENFORCEMENT && @@ -131,6 +161,22 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) target->rt_param.task_params = tp; +#ifdef CONFIG_LITMUS_SOFTIRQD + /* proxy thread off by default */ + target->rt_param.is_proxy_thread = 0; + target->rt_param.cur_klitirqd = NULL; + //init_MUTEX(&target->rt_param.klitirqd_sem); + mutex_init(&target->rt_param.klitirqd_sem); + //init_completion(&target->rt_param.klitirqd_sem); + //target->rt_param.klitirqd_sem_stat = NOT_HELD; + atomic_set(&target->rt_param.klitirqd_sem_stat, NOT_HELD); +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&target->rt_param.nv_int_count, 0); +#endif + + retval = 0; out_unlock: read_unlock_irq(&tasklist_lock); @@ -265,6 +311,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job) return retval; } + /* sys_null_call() is only used for determining raw system call * overheads (kernel entry, kernel exit). It has no useful side effects. * If ts is non-NULL, then the current Feather-Trace time is recorded. @@ -278,7 +325,7 @@ asmlinkage long sys_null_call(cycles_t __user *ts) now = get_cycles(); ret = put_user(now, ts); } - + return ret; } @@ -299,6 +346,20 @@ static void reinit_litmus_state(struct task_struct* p, int restore) * at this point in time. */ WARN_ON(p->rt_param.inh_task); + +#ifdef CONFIG_LITMUS_SOFTIRQD + /* We probably should not have any tasklets executing for + * us at this time. + */ + WARN_ON(p->rt_param.cur_klitirqd); + WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD); + + if(p->rt_param.cur_klitirqd) + flush_pending(p->rt_param.cur_klitirqd, p); + + if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD) + up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem); +#endif /* Cleanup everything else. */ memset(&p->rt_param, 0, sizeof(p->rt_param)); @@ -399,7 +460,7 @@ static void synch_on_plugin_switch(void* info) */ int switch_sched_plugin(struct sched_plugin* plugin) { - unsigned long flags; + //unsigned long flags; int ret = 0; BUG_ON(!plugin); @@ -413,8 +474,15 @@ int switch_sched_plugin(struct sched_plugin* plugin) while (atomic_read(&cannot_use_plugin) < num_online_cpus()) cpu_relax(); +#ifdef CONFIG_LITMUS_SOFTIRQD + if(!klitirqd_is_dead()) + { + kill_klitirqd(); + } +#endif + /* stop task transitions */ - raw_spin_lock_irqsave(&task_transition_lock, flags); + //raw_spin_lock_irqsave(&task_transition_lock, flags); /* don't switch if there are active real-time tasks */ if (atomic_read(&rt_task_count) == 0) { @@ -432,7 +500,7 @@ int switch_sched_plugin(struct sched_plugin* plugin) } else ret = -EBUSY; out: - raw_spin_unlock_irqrestore(&task_transition_lock, flags); + //raw_spin_unlock_irqrestore(&task_transition_lock, flags); atomic_set(&cannot_use_plugin, 0); return ret; } @@ -540,6 +608,10 @@ static int __init _init_litmus(void) init_litmus_proc(); +#ifdef CONFIG_SCHED_CPU_AFFINITY + init_topology(); +#endif + return 0; } diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c index 4bf725a36c9c..381513366c7a 100644 --- a/litmus/litmus_proc.c +++ b/litmus/litmus_proc.c @@ -19,12 +19,19 @@ static struct proc_dir_entry *litmus_dir = NULL, *plugs_dir = NULL, #ifdef CONFIG_RELEASE_MASTER *release_master_file = NULL, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + *klitirqd_file = NULL, #endif *plugs_file = NULL; /* in litmus/sync.c */ int count_tasks_waiting_for_release(void); +extern int proc_read_klitirqd_stats(char *page, char **start, + off_t off, int count, + int *eof, void *data); + static int proc_read_stats(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -161,6 +168,12 @@ int __init init_litmus_proc(void) release_master_file->write_proc = proc_write_release_master; #endif +#ifdef CONFIG_LITMUS_SOFTIRQD + klitirqd_file = + create_proc_read_entry("klitirqd_stats", 0444, litmus_dir, + proc_read_klitirqd_stats, NULL); +#endif + stat_file = create_proc_read_entry("stats", 0444, litmus_dir, proc_read_stats, NULL); @@ -187,6 +200,10 @@ void exit_litmus_proc(void) remove_proc_entry("stats", litmus_dir); if (curr_file) remove_proc_entry("active_plugin", litmus_dir); +#ifdef CONFIG_LITMUS_SOFTIRQD + if (klitirqd_file) + remove_proc_entry("klitirqd_stats", litmus_dir); +#endif #ifdef CONFIG_RELEASE_MASTER if (release_master_file) remove_proc_entry("release_master", litmus_dir); diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c new file mode 100644 index 000000000000..271e770dbaea --- /dev/null +++ b/litmus/litmus_softirq.c @@ -0,0 +1,1579 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +/* TODO: Remove unneeded mb() and other barriers. */ + + +/* counts number of daemons ready to handle litmus irqs. */ +static atomic_t num_ready_klitirqds = ATOMIC_INIT(0); + +enum pending_flags +{ + LIT_TASKLET_LOW = 0x1, + LIT_TASKLET_HI = LIT_TASKLET_LOW<<1, + LIT_WORK = LIT_TASKLET_HI<<1 +}; + +/* only support tasklet processing for now. */ +struct tasklet_head +{ + struct tasklet_struct *head; + struct tasklet_struct **tail; +}; + +struct klitirqd_info +{ + struct task_struct* klitirqd; + struct task_struct* current_owner; + int terminating; + + + raw_spinlock_t lock; + + u32 pending; + atomic_t num_hi_pending; + atomic_t num_low_pending; + atomic_t num_work_pending; + + /* in order of priority */ + struct tasklet_head pending_tasklets_hi; + struct tasklet_head pending_tasklets; + struct list_head worklist; +}; + +/* one list for each klitirqd */ +static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD]; + + + + + +int proc_read_klitirqd_stats(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + int len = snprintf(page, PAGE_SIZE, + "num ready klitirqds: %d\n\n", + atomic_read(&num_ready_klitirqds)); + + if(klitirqd_is_ready()) + { + int i; + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + len += + snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */ + "klitirqd_th%d: %s/%d\n" + "\tcurrent_owner: %s/%d\n" + "\tpending: %x\n" + "\tnum hi: %d\n" + "\tnum low: %d\n" + "\tnum work: %d\n\n", + i, + klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid, + (klitirqds[i].current_owner != NULL) ? + klitirqds[i].current_owner->comm : "(null)", + (klitirqds[i].current_owner != NULL) ? + klitirqds[i].current_owner->pid : 0, + klitirqds[i].pending, + atomic_read(&klitirqds[i].num_hi_pending), + atomic_read(&klitirqds[i].num_low_pending), + atomic_read(&klitirqds[i].num_work_pending)); + } + } + + return(len); +} + + + + + +#if 0 +static atomic_t dump_id = ATOMIC_INIT(0); + +static void __dump_state(struct klitirqd_info* which, const char* caller) +{ + struct tasklet_struct* list; + + int id = atomic_inc_return(&dump_id); + + //if(in_interrupt()) + { + if(which->current_owner) + { + TRACE("(id: %d caller: %s)\n" + "klitirqd: %s/%d\n" + "current owner: %s/%d\n" + "pending: %x\n", + id, caller, + which->klitirqd->comm, which->klitirqd->pid, + which->current_owner->comm, which->current_owner->pid, + which->pending); + } + else + { + TRACE("(id: %d caller: %s)\n" + "klitirqd: %s/%d\n" + "current owner: %p\n" + "pending: %x\n", + id, caller, + which->klitirqd->comm, which->klitirqd->pid, + NULL, + which->pending); + } + + list = which->pending_tasklets.head; + while(list) + { + struct tasklet_struct *t = list; + list = list->next; /* advance */ + if(t->owner) + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid); + else + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL); + } + } +} + +static void dump_state(struct klitirqd_info* which, const char* caller) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&which->lock, flags); + __dump_state(which, caller); + raw_spin_unlock_irqrestore(&which->lock, flags); +} +#endif + + +/* forward declarations */ +static void ___litmus_tasklet_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup); +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup); +static void ___litmus_schedule_work(struct work_struct *w, + struct klitirqd_info *which, + int wakeup); + + + +inline unsigned int klitirqd_id(struct task_struct* tsk) +{ + int i; + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(klitirqds[i].klitirqd == tsk) + { + return i; + } + } + + BUG(); + + return 0; +} + + +inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_TASKLET_HI); +} + +inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_TASKLET_LOW); +} + +inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which) +{ + return (which->pending & LIT_WORK); +} + +inline static u32 litirq_pending_irqoff(struct klitirqd_info* which) +{ + return(which->pending); +} + + +inline static u32 litirq_pending(struct klitirqd_info* which) +{ + unsigned long flags; + u32 pending; + + raw_spin_lock_irqsave(&which->lock, flags); + pending = litirq_pending_irqoff(which); + raw_spin_unlock_irqrestore(&which->lock, flags); + + return pending; +}; + +inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner) +{ + unsigned long flags; + u32 pending; + + raw_spin_lock_irqsave(&which->lock, flags); + pending = litirq_pending_irqoff(which); + if(pending) + { + if(which->current_owner != owner) + { + pending = 0; // owner switch! + } + } + raw_spin_unlock_irqrestore(&which->lock, flags); + + return pending; +} + + +inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which, + struct mutex** sem, + struct task_struct** t) +{ + unsigned long flags; + u32 pending; + + /* init values */ + *sem = NULL; + *t = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + pending = litirq_pending_irqoff(which); + if(pending) + { + if(which->current_owner != NULL) + { + *t = which->current_owner; + *sem = &tsk_rt(which->current_owner)->klitirqd_sem; + } + else + { + BUG(); + } + } + raw_spin_unlock_irqrestore(&which->lock, flags); + + if(likely(*sem)) + { + return pending; + } + else + { + return 0; + } +} + +/* returns true if the next piece of work to do is from a different owner. + */ +static int tasklet_ownership_change( + struct klitirqd_info* which, + enum pending_flags taskletQ) +{ + /* this function doesn't have to look at work objects since they have + priority below tasklets. */ + + unsigned long flags; + int ret = 0; + + raw_spin_lock_irqsave(&which->lock, flags); + + switch(taskletQ) + { + case LIT_TASKLET_HI: + if(litirq_pending_hi_irqoff(which)) + { + ret = (which->pending_tasklets_hi.head->owner != + which->current_owner); + } + break; + case LIT_TASKLET_LOW: + if(litirq_pending_low_irqoff(which)) + { + ret = (which->pending_tasklets.head->owner != + which->current_owner); + } + break; + default: + break; + } + + raw_spin_unlock_irqrestore(&which->lock, flags); + + TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret); + + return ret; +} + + +static void __reeval_prio(struct klitirqd_info* which) +{ + struct task_struct* next_owner = NULL; + struct task_struct* klitirqd = which->klitirqd; + + /* Check in prio-order */ + u32 pending = litirq_pending_irqoff(which); + + //__dump_state(which, "__reeval_prio: before"); + + if(pending) + { + if(pending & LIT_TASKLET_HI) + { + next_owner = which->pending_tasklets_hi.head->owner; + } + else if(pending & LIT_TASKLET_LOW) + { + next_owner = which->pending_tasklets.head->owner; + } + else if(pending & LIT_WORK) + { + struct work_struct* work = + list_first_entry(&which->worklist, struct work_struct, entry); + next_owner = work->owner; + } + } + + if(next_owner != which->current_owner) + { + struct task_struct* old_owner = which->current_owner; + + /* bind the next owner. */ + which->current_owner = next_owner; + mb(); + + if(next_owner != NULL) + { + if(!in_interrupt()) + { + TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + next_owner->comm, next_owner->pid); + } + else + { + TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm, + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid, + next_owner->comm, next_owner->pid); + } + + litmus->set_prio_inh_klitirqd(klitirqd, old_owner, next_owner); + } + else + { + if(likely(!in_interrupt())) + { + TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n", + __FUNCTION__, klitirqd->comm, klitirqd->pid); + } + else + { + // is this a bug? + TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n", + __FUNCTION__, klitirqd->comm, klitirqd->pid); + } + + BUG_ON(pending != 0); + litmus->clear_prio_inh_klitirqd(klitirqd, old_owner); + } + } + + //__dump_state(which, "__reeval_prio: after"); +} + +static void reeval_prio(struct klitirqd_info* which) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&which->lock, flags); + __reeval_prio(which); + raw_spin_unlock_irqrestore(&which->lock, flags); +} + + +static void wakeup_litirqd_locked(struct klitirqd_info* which) +{ + /* Interrupts are disabled: no need to stop preemption */ + if (which && which->klitirqd) + { + __reeval_prio(which); /* configure the proper priority */ + + if(which->klitirqd->state != TASK_RUNNING) + { + TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__, + which->klitirqd->comm, which->klitirqd->pid); + + wake_up_process(which->klitirqd); + } + } +} + + +static void do_lit_tasklet(struct klitirqd_info* which, + struct tasklet_head* pending_tasklets) +{ + unsigned long flags; + struct tasklet_struct *list; + atomic_t* count; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "do_lit_tasklet: before steal"); + + /* copy out the tasklets for our private use. */ + list = pending_tasklets->head; + pending_tasklets->head = NULL; + pending_tasklets->tail = &pending_tasklets->head; + + /* remove pending flag */ + which->pending &= (pending_tasklets == &which->pending_tasklets) ? + ~LIT_TASKLET_LOW : + ~LIT_TASKLET_HI; + + count = (pending_tasklets == &which->pending_tasklets) ? + &which->num_low_pending: + &which->num_hi_pending; + + //__dump_state(which, "do_lit_tasklet: after steal"); + + raw_spin_unlock_irqrestore(&which->lock, flags); + + + while(list) + { + struct tasklet_struct *t = list; + + /* advance, lest we forget */ + list = list->next; + + /* execute tasklet if it has my priority and is free */ + if ((t->owner == which->current_owner) && tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + { + BUG(); + } + TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__); + t->func(t->data); + tasklet_unlock(t); + + atomic_dec(count); + + continue; /* process more tasklets */ + } + tasklet_unlock(t); + } + + TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__); + + /* couldn't process tasklet. put it back at the end of the queue. */ + if(pending_tasklets == &which->pending_tasklets) + ___litmus_tasklet_schedule(t, which, 0); + else + ___litmus_tasklet_hi_schedule(t, which, 0); + } +} + + +// returns 1 if priorities need to be changed to continue processing +// pending tasklets. +static int do_litirq(struct klitirqd_info* which) +{ + u32 pending; + int resched = 0; + + if(in_interrupt()) + { + TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__); + return(0); + } + + if(which->klitirqd != current) + { + TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n", + __FUNCTION__, current->comm, current->pid, + which->klitirqd->comm, which->klitirqd->pid); + return(0); + } + + if(!is_realtime(current)) + { + TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n", + __FUNCTION__, current->policy); + return(0); + } + + + /* We only handle tasklets & work objects, no need for RCU triggers? */ + + pending = litirq_pending(which); + if(pending) + { + /* extract the work to do and do it! */ + if(pending & LIT_TASKLET_HI) + { + TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__); + do_lit_tasklet(which, &which->pending_tasklets_hi); + resched = tasklet_ownership_change(which, LIT_TASKLET_HI); + + if(resched) + { + TRACE_CUR("%s: HI tasklets of another owner remain. " + "Skipping any LOW tasklets.\n", __FUNCTION__); + } + } + + if(!resched && (pending & LIT_TASKLET_LOW)) + { + TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__); + do_lit_tasklet(which, &which->pending_tasklets); + resched = tasklet_ownership_change(which, LIT_TASKLET_LOW); + + if(resched) + { + TRACE_CUR("%s: LOW tasklets of another owner remain. " + "Skipping any work objects.\n", __FUNCTION__); + } + } + } + + return(resched); +} + + +static void do_work(struct klitirqd_info* which) +{ + unsigned long flags; + work_func_t f; + struct work_struct* work; + + // only execute one work-queue item to yield to tasklets. + // ...is this a good idea, or should we just batch them? + raw_spin_lock_irqsave(&which->lock, flags); + + if(!litirq_pending_work_irqoff(which)) + { + raw_spin_unlock_irqrestore(&which->lock, flags); + goto no_work; + } + + work = list_first_entry(&which->worklist, struct work_struct, entry); + list_del_init(&work->entry); + + if(list_empty(&which->worklist)) + { + which->pending &= ~LIT_WORK; + } + + raw_spin_unlock_irqrestore(&which->lock, flags); + + + + /* safe to read current_owner outside of lock since only this thread + may write to the pointer. */ + if(work->owner == which->current_owner) + { + TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__); + // do the work! + work_clear_pending(work); + f = work->func; + f(work); /* can't touch 'work' after this point, + the user may have freed it. */ + + atomic_dec(&which->num_work_pending); + } + else + { + TRACE_CUR("%s: Could not invoke work object. Requeuing.\n", + __FUNCTION__); + ___litmus_schedule_work(work, which, 0); + } + +no_work: + return; +} + + +static int set_litmus_daemon_sched(void) +{ + /* set up a daemon job that will never complete. + it should only ever run on behalf of another + real-time task. + + TODO: Transition to a new job whenever a + new tasklet is handled */ + + int ret = 0; + + struct rt_task tp = { + .exec_cost = 0, + .period = 1000000000, /* dummy 1 second period */ + .phase = 0, + .cpu = task_cpu(current), + .budget_policy = NO_ENFORCEMENT, + .cls = RT_CLASS_BEST_EFFORT + }; + + struct sched_param param = { .sched_priority = 0}; + + + /* set task params, mark as proxy thread, and init other data */ + tsk_rt(current)->task_params = tp; + tsk_rt(current)->is_proxy_thread = 1; + tsk_rt(current)->cur_klitirqd = NULL; + //init_MUTEX(&tsk_rt(current)->klitirqd_sem); + mutex_init(&tsk_rt(current)->klitirqd_sem); + //init_completion(&tsk_rt(current)->klitirqd_sem); + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD); + + /* inform the OS we're SCHED_LITMUS -- + sched_setscheduler_nocheck() calls litmus_admit_task(). */ + sched_setscheduler_nocheck(current, SCHED_LITMUS, ¶m); + + return ret; +} + +static void enter_execution_phase(struct klitirqd_info* which, + struct mutex* sem, + struct task_struct* t) +{ + TRACE_CUR("%s: Trying to enter execution phase. " + "Acquiring semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + down_and_set_stat(current, HELD, sem); + TRACE_CUR("%s: Execution phase entered! " + "Acquired semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); +} + +static void exit_execution_phase(struct klitirqd_info* which, + struct mutex* sem, + struct task_struct* t) +{ + TRACE_CUR("%s: Exiting execution phase. " + "Releasing semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD) + { + up_and_set_stat(current, NOT_HELD, sem); + TRACE_CUR("%s: Execution phase exited! " + "Released semaphore of %s/%d\n", __FUNCTION__, + t->comm, t->pid); + } + else + { + TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__); + } +} + +/* main loop for klitsoftirqd */ +static int run_klitirqd(void* unused) +{ + struct klitirqd_info* which = &klitirqds[klitirqd_id(current)]; + struct mutex* sem; + struct task_struct* owner; + + int rt_status = set_litmus_daemon_sched(); + + if(rt_status != 0) + { + TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__); + goto rt_failed; + } + + atomic_inc(&num_ready_klitirqds); + + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) + { + preempt_disable(); + if (!litirq_pending(which)) + { + /* sleep for work */ + TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n", + __FUNCTION__); + preempt_enable_no_resched(); + schedule(); + + if(kthread_should_stop()) /* bail out */ + { + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); + continue; + } + + preempt_disable(); + } + + __set_current_state(TASK_RUNNING); + + while (litirq_pending_and_sem_and_owner(which, &sem, &owner)) + { + int needs_resched = 0; + + preempt_enable_no_resched(); + + BUG_ON(sem == NULL); + + // wait to enter execution phase; wait for 'current_owner' to block. + enter_execution_phase(which, sem, owner); + + if(kthread_should_stop()) + { + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); + break; + } + + preempt_disable(); + + /* Double check that there's still pending work and the owner hasn't + * changed. Pending items may have been flushed while we were sleeping. + */ + if(litirq_pending_with_owner(which, owner)) + { + TRACE_CUR("%s: Executing tasklets and/or work objects.\n", + __FUNCTION__); + + needs_resched = do_litirq(which); + + preempt_enable_no_resched(); + + // work objects are preemptible. + if(!needs_resched) + { + do_work(which); + } + + // exit execution phase. + exit_execution_phase(which, sem, owner); + + TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__); + reeval_prio(which); /* check if we need to change priority here */ + } + else + { + TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n", + __FUNCTION__, + owner->comm, owner->pid); + preempt_enable_no_resched(); + + // exit execution phase. + exit_execution_phase(which, sem, owner); + } + + cond_resched(); + preempt_disable(); + } + preempt_enable(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + atomic_dec(&num_ready_klitirqds); + +rt_failed: + litmus_exit_task(current); + + return rt_status; +} + + +struct klitirqd_launch_data +{ + int* cpu_affinity; + struct work_struct work; +}; + +/* executed by a kworker from workqueues */ +static void launch_klitirqd(struct work_struct *work) +{ + int i; + + struct klitirqd_launch_data* launch_data = + container_of(work, struct klitirqd_launch_data, work); + + TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + /* create the daemon threads */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(launch_data->cpu_affinity) + { + klitirqds[i].klitirqd = + kthread_create( + run_klitirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)(long long)launch_data->cpu_affinity[i], + "klitirqd_th%d/%d", + i, + launch_data->cpu_affinity[i]); + + /* litmus will put is in the right cluster. */ + kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]); + } + else + { + klitirqds[i].klitirqd = + kthread_create( + run_klitirqd, + /* treat the affinity as a pointer, we'll cast it back later */ + (void*)(long long)(-1), + "klitirqd_th%d", + i); + } + } + + TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + /* unleash the daemons */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + wake_up_process(klitirqds[i].klitirqd); + } + + if(launch_data->cpu_affinity) + kfree(launch_data->cpu_affinity); + kfree(launch_data); +} + + +void spawn_klitirqd(int* affinity) +{ + int i; + struct klitirqd_launch_data* delayed_launch; + + if(atomic_read(&num_ready_klitirqds) != 0) + { + TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n"); + return; + } + + /* init the tasklet & work queues */ + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + klitirqds[i].terminating = 0; + klitirqds[i].pending = 0; + + klitirqds[i].num_hi_pending.counter = 0; + klitirqds[i].num_low_pending.counter = 0; + klitirqds[i].num_work_pending.counter = 0; + + klitirqds[i].pending_tasklets_hi.head = NULL; + klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head; + + klitirqds[i].pending_tasklets.head = NULL; + klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head; + + INIT_LIST_HEAD(&klitirqds[i].worklist); + + raw_spin_lock_init(&klitirqds[i].lock); + } + + /* wait to flush the initializations to memory since other threads + will access it. */ + mb(); + + /* tell a work queue to launch the threads. we can't make scheduling + calls since we're in an atomic state. */ + TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__); + delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC); + if(affinity) + { + delayed_launch->cpu_affinity = + kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC); + + memcpy(delayed_launch->cpu_affinity, affinity, + sizeof(int)*NR_LITMUS_SOFTIRQD); + } + else + { + delayed_launch->cpu_affinity = NULL; + } + INIT_WORK(&delayed_launch->work, launch_klitirqd); + schedule_work(&delayed_launch->work); +} + + +void kill_klitirqd(void) +{ + if(!klitirqd_is_dead()) + { + int i; + + TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD); + + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i) + { + if(klitirqds[i].terminating != 1) + { + klitirqds[i].terminating = 1; + mb(); /* just to be sure? */ + flush_pending(klitirqds[i].klitirqd, NULL); + + /* signal termination */ + kthread_stop(klitirqds[i].klitirqd); + } + } + } +} + + +int klitirqd_is_ready(void) +{ + return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD); +} + +int klitirqd_is_dead(void) +{ + return(atomic_read(&num_ready_klitirqds) == 0); +} + + +struct task_struct* get_klitirqd(unsigned int k_id) +{ + return(klitirqds[k_id].klitirqd); +} + + +void flush_pending(struct task_struct* klitirqd_thread, + struct task_struct* owner) +{ + unsigned int k_id = klitirqd_id(klitirqd_thread); + struct klitirqd_info *which = &klitirqds[k_id]; + + unsigned long flags; + struct tasklet_struct *list; + + u32 work_flushed = 0; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "flush_pending: before"); + + // flush hi tasklets. + if(litirq_pending_hi_irqoff(which)) + { + which->pending &= ~LIT_TASKLET_HI; + + list = which->pending_tasklets_hi.head; + which->pending_tasklets_hi.head = NULL; + which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head; + + TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__); + + while(list) + { + struct tasklet_struct *t = list; + list = list->next; + + if(likely((t->owner == owner) || (owner == NULL))) + { + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) + { + BUG(); + } + + work_flushed |= LIT_TASKLET_HI; + + t->owner = NULL; + + // WTF? + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_hi_pending); + ___tasklet_hi_schedule(t); + } + else + { + TRACE("%s: dropped hi tasklet??\n", __FUNCTION__); + BUG(); + } + } + else + { + TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__); + // put back on queue. + ___litmus_tasklet_hi_schedule(t, which, 0); + } + } + } + + // flush low tasklets. + if(litirq_pending_low_irqoff(which)) + { + which->pending &= ~LIT_TASKLET_LOW; + + list = which->pending_tasklets.head; + which->pending_tasklets.head = NULL; + which->pending_tasklets.tail = &which->pending_tasklets.head; + + TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__); + + while(list) + { + struct tasklet_struct *t = list; + list = list->next; + + if(likely((t->owner == owner) || (owner == NULL))) + { + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) + { + BUG(); + } + + work_flushed |= LIT_TASKLET_LOW; + + t->owner = NULL; + sched_trace_tasklet_end(owner, 1ul); + + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + { + atomic_dec(&which->num_low_pending); + ___tasklet_schedule(t); + } + else + { + TRACE("%s: dropped tasklet??\n", __FUNCTION__); + BUG(); + } + } + else + { + TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__); + // put back on queue + ___litmus_tasklet_schedule(t, which, 0); + } + } + } + + // flush work objects + if(litirq_pending_work_irqoff(which)) + { + which->pending &= ~LIT_WORK; + + TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__); + + while(!list_empty(&which->worklist)) + { + struct work_struct* work = + list_first_entry(&which->worklist, struct work_struct, entry); + list_del_init(&work->entry); + + if(likely((work->owner == owner) || (owner == NULL))) + { + work_flushed |= LIT_WORK; + atomic_dec(&which->num_work_pending); + + work->owner = NULL; + sched_trace_work_end(owner, current, 1ul); + __schedule_work(work); + } + else + { + TRACE("%s: Could not flush a work object.\n", __FUNCTION__); + // put back on queue + ___litmus_schedule_work(work, which, 0); + } + } + } + + //__dump_state(which, "flush_pending: after (before reeval prio)"); + + + mb(); /* commit changes to pending flags */ + + /* reset the scheduling priority */ + if(work_flushed) + { + __reeval_prio(which); + + /* Try to offload flushed tasklets to Linux's ksoftirqd. */ + if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI)) + { + wakeup_softirqd(); + } + } + else + { + TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__); + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + + + + +static void ___litmus_tasklet_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + t->next = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + //__dump_state(which, "___litmus_tasklet_schedule: before queuing"); + + *(which->pending_tasklets.tail) = t; + which->pending_tasklets.tail = &t->next; + + old_pending = which->pending; + which->pending |= LIT_TASKLET_LOW; + + atomic_inc(&which->num_low_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wake up the klitirqd */ + } + + //__dump_state(which, "___litmus_tasklet_schedule: after queuing"); + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + /* Can't accept tasklets while we're processing a workqueue + because they're handled by the same thread. This case is + very RARE. + + TODO: Use a separate thread for work objects!!!!!! + */ + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; + ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1); + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_schedule); + + +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + t->next = NULL; + + raw_spin_lock_irqsave(&which->lock, flags); + + *(which->pending_tasklets_hi.tail) = t; + which->pending_tasklets_hi.tail = &t->next; + + old_pending = which->pending; + which->pending |= LIT_TASKLET_HI; + + atomic_inc(&which->num_hi_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wake up the klitirqd */ + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; + ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1); + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); + + +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) +{ + int ret = 0; /* assume failure */ + u32 old_pending; + + BUG_ON(!irqs_disabled()); + + if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) + { + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + { + raw_spin_lock(&klitirqds[k_id].lock); + + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0)) + { + ret = 1; // success! + + t->next = klitirqds[k_id].pending_tasklets_hi.head; + klitirqds[k_id].pending_tasklets_hi.head = t; + + old_pending = klitirqds[k_id].pending; + klitirqds[k_id].pending |= LIT_TASKLET_HI; + + atomic_inc(&klitirqds[k_id].num_hi_pending); + + mb(); + + if(!old_pending) + wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */ + } + else + { + TRACE("%s: rejected tasklet because of pending work.\n", + __FUNCTION__); + } + + raw_spin_unlock(&klitirqds[k_id].lock); + } + return(ret); +} + +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); + + + +static void ___litmus_schedule_work(struct work_struct *w, + struct klitirqd_info *which, + int wakeup) +{ + unsigned long flags; + u32 old_pending; + + raw_spin_lock_irqsave(&which->lock, flags); + + work_pending(w); + list_add_tail(&w->entry, &which->worklist); + + old_pending = which->pending; + which->pending |= LIT_WORK; + + atomic_inc(&which->num_work_pending); + + mb(); + + if(!old_pending && wakeup) + { + wakeup_litirqd_locked(which); /* wakeup the klitirqd */ + } + + raw_spin_unlock_irqrestore(&which->lock, flags); +} + +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) +{ + int ret = 1; /* assume success */ + if(unlikely(w->owner == NULL) || !is_realtime(w->owner)) + { + TRACE("%s: No owner associated with this work object!\n", __FUNCTION__); + BUG(); + } + + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) + { + TRACE("%s: No klitirqd_th%u!\n", k_id); + BUG(); + } + + if(unlikely(!klitirqd_is_ready())) + { + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id); + BUG(); + } + + if(likely(!klitirqds[k_id].terminating)) + ___litmus_schedule_work(w, &klitirqds[k_id], 1); + else + ret = 0; + return(ret); +} +EXPORT_SYMBOL(__litmus_schedule_work); + + +static int set_klitirqd_sem_status(unsigned long stat) +{ + TRACE_CUR("SETTING STATUS FROM %d TO %d\n", + atomic_read(&tsk_rt(current)->klitirqd_sem_stat), + stat); + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat); + //mb(); + + return(0); +} + +static int set_klitirqd_sem_status_if_not_held(unsigned long stat) +{ + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD) + { + return(set_klitirqd_sem_status(stat)); + } + return(-1); +} + + +void __down_and_reset_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_reset, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif + + mutex_lock_sfx(sem, + set_klitirqd_sem_status_if_not_held, to_reset, + set_klitirqd_sem_status, to_set); +#if 0 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif +} + +void down_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif + + mutex_lock_sfx(sem, + NULL, 0, + set_klitirqd_sem_status, to_set); + +#if 0 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n", + __FUNCTION__, task->comm, task->pid); +#endif +} + + +void up_and_set_stat(struct task_struct* t, + enum klitirqd_sem_status to_set, + struct mutex* sem) +{ +#if 0 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem); + struct task_struct* task = container_of(param, struct task_struct, rt_param); + + TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n", + __FUNCTION__, + task->comm, task->pid); +#endif + + mutex_unlock_sfx(sem, NULL, 0, + set_klitirqd_sem_status, to_set); + +#if 0 + TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n", + __FUNCTION__, + task->comm, task->pid); +#endif +} + + + +void release_klitirqd_lock(struct task_struct* t) +{ + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD)) + { + struct mutex* sem; + struct task_struct* owner = t; + + if(t->state == TASK_RUNNING) + { + TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n"); + return; + } + + if(likely(!tsk_rt(t)->is_proxy_thread)) + { + sem = &tsk_rt(t)->klitirqd_sem; + } + else + { + unsigned int k_id = klitirqd_id(t); + owner = klitirqds[k_id].current_owner; + + BUG_ON(t != klitirqds[k_id].klitirqd); + + if(likely(owner)) + { + sem = &tsk_rt(owner)->klitirqd_sem; + } + else + { + BUG(); + + // We had the rug pulled out from under us. Abort attempt + // to reacquire the lock since our client no longer needs us. + TRACE_CUR("HUH?! How did this happen?\n"); + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + return; + } + } + + //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid); + up_and_set_stat(t, NEED_TO_REACQUIRE, sem); + //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid); + } + /* + else if(is_realtime(t)) + { + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + } + */ +} + +int reacquire_klitirqd_lock(struct task_struct* t) +{ + int ret = 0; + + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE)) + { + struct mutex* sem; + struct task_struct* owner = t; + + if(likely(!tsk_rt(t)->is_proxy_thread)) + { + sem = &tsk_rt(t)->klitirqd_sem; + } + else + { + unsigned int k_id = klitirqd_id(t); + //struct task_struct* owner = klitirqds[k_id].current_owner; + owner = klitirqds[k_id].current_owner; + + BUG_ON(t != klitirqds[k_id].klitirqd); + + if(likely(owner)) + { + sem = &tsk_rt(owner)->klitirqd_sem; + } + else + { + // We had the rug pulled out from under us. Abort attempt + // to reacquire the lock since our client no longer needs us. + TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n"); + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD); + return(0); + } + } + + //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid); + __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem); + //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid); + } + /* + else if(is_realtime(t)) + { + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat); + } + */ + + return(ret); +} + diff --git a/litmus/locking.c b/litmus/locking.c index 2693f1aca859..cfce98e7480d 100644 --- a/litmus/locking.c +++ b/litmus/locking.c @@ -121,7 +121,6 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) return(t); } - #else struct fdso_ops generic_lock_ops = {}; diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c new file mode 100644 index 000000000000..78f035244d21 --- /dev/null +++ b/litmus/nvidia_info.c @@ -0,0 +1,526 @@ +#include +#include +#include + +#include +#include +#include + +typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ +typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ +typedef unsigned char NvU8; /* 0 to 255 */ +typedef unsigned short NvU16; /* 0 to 65535 */ +typedef signed char NvS8; /* -128 to 127 */ +typedef signed short NvS16; /* -32768 to 32767 */ +typedef float NvF32; /* IEEE Single Precision (S1E8M23) */ +typedef double NvF64; /* IEEE Double Precision (S1E11M52) */ +typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ +typedef unsigned int NvU32; /* 0 to 4294967295 */ +typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ +typedef union +{ + volatile NvV8 Reg008[1]; + volatile NvV16 Reg016[1]; + volatile NvV32 Reg032[1]; +} litmus_nv_hwreg_t, * litmus_nv_phwreg_t; + +typedef struct +{ + NvU64 address; + NvU64 size; + NvU32 offset; + NvU32 *map; + litmus_nv_phwreg_t map_u; +} litmus_nv_aperture_t; + +typedef struct +{ + void *priv; /* private data */ + void *os_state; /* os-specific device state */ + + int rmInitialized; + int flags; + + /* PCI config info */ + NvU32 domain; + NvU16 bus; + NvU16 slot; + NvU16 vendor_id; + NvU16 device_id; + NvU16 subsystem_id; + NvU32 gpu_id; + void *handle; + + NvU32 pci_cfg_space[16]; + + /* physical characteristics */ + litmus_nv_aperture_t bars[3]; + litmus_nv_aperture_t *regs; + litmus_nv_aperture_t *fb, ud; + litmus_nv_aperture_t agp; + + NvU32 interrupt_line; + + NvU32 agp_config; + NvU32 agp_status; + + NvU32 primary_vga; + + NvU32 sim_env; + + NvU32 rc_timer_enabled; + + /* list of events allocated for this device */ + void *event_list; + + void *kern_mappings; + +} litmus_nv_state_t; + +typedef struct work_struct litmus_nv_task_t; + +typedef struct litmus_nv_work_s { + litmus_nv_task_t task; + void *data; +} litmus_nv_work_t; + +typedef struct litmus_nv_linux_state_s { + litmus_nv_state_t nv_state; + atomic_t usage_count; + + struct pci_dev *dev; + void *agp_bridge; + void *alloc_queue; + + void *timer_sp; + void *isr_sp; + void *pci_cfgchk_sp; + void *isr_bh_sp; + +#ifdef CONFIG_CUDA_4_0 + char registry_keys[512]; +#endif + + /* keep track of any pending bottom halfes */ + struct tasklet_struct tasklet; + litmus_nv_work_t work; + + /* get a timer callback every second */ + struct timer_list rc_timer; + + /* lock for linux-specific data, not used by core rm */ + struct semaphore ldata_lock; + + /* lock for linux-specific alloc queue */ + struct semaphore at_lock; + +#if 0 +#if defined(NV_USER_MAP) + /* list of user mappings */ + struct nv_usermap_s *usermap_list; + + /* lock for VMware-specific mapping list */ + struct semaphore mt_lock; +#endif /* defined(NV_USER_MAP) */ +#if defined(NV_PM_SUPPORT_OLD_STYLE_APM) + void *apm_nv_dev; +#endif +#endif + + NvU32 device_num; + struct litmus_nv_linux_state_s *next; +} litmus_nv_linux_state_t; + +void dump_nvidia_info(const struct tasklet_struct *t) +{ + litmus_nv_state_t* nvstate = NULL; + litmus_nv_linux_state_t* linuxstate = NULL; + struct pci_dev* pci = NULL; + + nvstate = (litmus_nv_state_t*)(t->data); + + if(nvstate) + { + TRACE("NV State:\n" + "\ttasklet ptr = %p\n" + "\tstate ptr = %p\n" + "\tprivate data ptr = %p\n" + "\tos state ptr = %p\n" + "\tdomain = %u\n" + "\tbus = %u\n" + "\tslot = %u\n" + "\tvender_id = %u\n" + "\tdevice_id = %u\n" + "\tsubsystem_id = %u\n" + "\tgpu_id = %u\n" + "\tinterrupt_line = %u\n", + t, + nvstate, + nvstate->priv, + nvstate->os_state, + nvstate->domain, + nvstate->bus, + nvstate->slot, + nvstate->vendor_id, + nvstate->device_id, + nvstate->subsystem_id, + nvstate->gpu_id, + nvstate->interrupt_line); + + linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); + } + else + { + TRACE("INVALID NVSTATE????\n"); + } + + if(linuxstate) + { + int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); + int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); + int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); + + + TRACE("LINUX NV State:\n" + "\tlinux nv state ptr: %p\n" + "\taddress of tasklet: %p\n" + "\taddress of work: %p\n" + "\tusage_count: %d\n" + "\tdevice_num: %u\n" + "\ttasklet addr == this tasklet: %d\n" + "\tpci: %p\n", + linuxstate, + &(linuxstate->tasklet), + &(linuxstate->work), + atomic_read(&(linuxstate->usage_count)), + linuxstate->device_num, + (t == &(linuxstate->tasklet)), + linuxstate->dev); + + pci = linuxstate->dev; + + TRACE("Offsets:\n" + "\tOffset from LinuxState: %d, %x\n" + "\tOffset from NVState: %d, %x\n" + "\tOffset from parameter: %d, %x\n" + "\tdevice_num: %u\n", + ls_offset, ls_offset, + ns_offset_raw, ns_offset_raw, + ns_offset_desired, ns_offset_desired, + *((u32*)((void*)nvstate + ns_offset_desired))); + } + else + { + TRACE("INVALID LINUXNVSTATE?????\n"); + } + +#if 0 + if(pci) + { + TRACE("PCI DEV Info:\n" + "pci device ptr: %p\n" + "\tdevfn = %d\n" + "\tvendor = %d\n" + "\tdevice = %d\n" + "\tsubsystem_vendor = %d\n" + "\tsubsystem_device = %d\n" + "\tslot # = %d\n", + pci, + pci->devfn, + pci->vendor, + pci->device, + pci->subsystem_vendor, + pci->subsystem_device, + pci->slot->number); + } + else + { + TRACE("INVALID PCIDEV PTR?????\n"); + } +#endif +} + +static struct module* nvidia_mod = NULL; +int init_nvidia_info(void) +{ + mutex_lock(&module_mutex); + nvidia_mod = find_module("nvidia"); + mutex_unlock(&module_mutex); + if(nvidia_mod != NULL) + { + TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, + (void*)(nvidia_mod->module_core), + (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); + init_nv_device_reg(); + return(0); + } + else + { + TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); + return(-1); + } +} + + +/* works with pointers to static data inside the module too. */ +int is_nvidia_func(void* func_addr) +{ + int ret = 0; + if(nvidia_mod) + { + ret = within_module_core((long unsigned int)func_addr, nvidia_mod); + /* + if(ret) + { + TRACE("%s : %p is in NVIDIA module: %d\n", + __FUNCTION__, func_addr, ret); + }*/ + } + + return(ret); +} + +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) +{ + // life is too short to use hard-coded offsets. update this later. + litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); + litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); + + BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); + + return(linuxstate->device_num); + + //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); + +#if 0 + // offset determined though observed behavior of the NV driver. + //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1 + //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2 + + void* state = (void*)(t->data); + void* device_num_ptr = state + DEVICE_NUM_OFFSET; + + //dump_nvidia_info(t); + return(*((u32*)device_num_ptr)); +#endif +} + +u32 get_work_nv_device_num(const struct work_struct *t) +{ + // offset determined though observed behavior of the NV driver. + const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); + void* state = (void*)(t); + void** device_num_ptr = state + DEVICE_NUM_OFFSET; + return(*((u32*)(*device_num_ptr))); +} + + + +typedef struct { + raw_spinlock_t lock; + struct task_struct *device_owner; +}nv_device_registry_t; + +static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; + +int init_nv_device_reg(void) +{ + int i; + + //memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); + + for(i = 0; i < NV_DEVICE_NUM; ++i) + { + raw_spin_lock_init(&NV_DEVICE_REG[i].lock); + NV_DEVICE_REG[i].device_owner = NULL; + } + + return(1); +} + +/* use to get nv_device_id by given owner. + (if return -1, can't get the assocaite device id)*/ +/* +int get_nv_device_id(struct task_struct* owner) +{ + int i; + if(!owner) + { + return(-1); + } + for(i = 0; i < NV_DEVICE_NUM; ++i) + { + if(NV_DEVICE_REG[i].device_owner == owner) + return(i); + } + return(-1); +} +*/ + + + +static int __reg_nv_device(int reg_device_id) +{ + struct task_struct* old = + cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, + NULL, + current); + + mb(); + + if(likely(old == NULL)) + { + down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); + TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); + return(0); + } + else + { + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); + return(-EBUSY); + } + +#if 0 + //unsigned long flags; + //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); + //lock_nv_registry(reg_device_id, &flags); + + if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL)) + { + NV_DEVICE_REG[reg_device_id].device_owner = current; + mb(); // needed? + + // release spin lock before chance of going to sleep. + //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); + //unlock_nv_registry(reg_device_id, &flags); + + down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); + TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); + return(0); + } + else + { + //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); + //unlock_nv_registry(reg_device_id, &flags); + + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); + return(-EBUSY); + } +#endif +} + +static int __clear_reg_nv_device(int de_reg_device_id) +{ + int ret; + unsigned long flags; + struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); + struct task_struct* old; + + lock_nv_registry(de_reg_device_id, &flags); + + old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, + current, + NULL); + + mb(); + + if(likely(old == current)) + { + flush_pending(klitirqd_th, current); + //unlock_nv_registry(de_reg_device_id, &flags); + + up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem); + + unlock_nv_registry(de_reg_device_id, &flags); + ret = 0; + + TRACE_CUR("%s: semaphore released.\n",__FUNCTION__); + } + else + { + unlock_nv_registry(de_reg_device_id, &flags); + ret = -EINVAL; + + if(old) + TRACE_CUR("%s: device %d is not registered for this process's use! %s/%d is!\n", + __FUNCTION__, de_reg_device_id, old->comm, old->pid); + else + TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", + __FUNCTION__, de_reg_device_id); + } + + return(ret); +} + + +int reg_nv_device(int reg_device_id, int reg_action) +{ + int ret; + + if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) + { + if(reg_action) + ret = __reg_nv_device(reg_device_id); + else + ret = __clear_reg_nv_device(reg_device_id); + } + else + { + ret = -ENODEV; + } + + return(ret); +} + +/* use to get the owner of nv_device_id. */ +struct task_struct* get_nv_device_owner(u32 target_device_id) +{ + struct task_struct* owner; + BUG_ON(target_device_id >= NV_DEVICE_NUM); + owner = NV_DEVICE_REG[target_device_id].device_owner; + return(owner); +} + +void lock_nv_registry(u32 target_device_id, unsigned long* flags) +{ + BUG_ON(target_device_id >= NV_DEVICE_NUM); + + if(in_interrupt()) + TRACE("Locking registry for %d.\n", target_device_id); + else + TRACE_CUR("Locking registry for %d.\n", target_device_id); + + raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); +} + +void unlock_nv_registry(u32 target_device_id, unsigned long* flags) +{ + BUG_ON(target_device_id >= NV_DEVICE_NUM); + + if(in_interrupt()) + TRACE("Unlocking registry for %d.\n", target_device_id); + else + TRACE_CUR("Unlocking registry for %d.\n", target_device_id); + + raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); +} + + +void increment_nv_int_count(u32 device) +{ + unsigned long flags; + struct task_struct* owner; + + lock_nv_registry(device, &flags); + + owner = NV_DEVICE_REG[device].device_owner; + if(owner) + { + atomic_inc(&tsk_rt(owner)->nv_int_count); + } + + unlock_nv_registry(device, &flags); +} +EXPORT_SYMBOL(increment_nv_int_count); + + diff --git a/litmus/preempt.c b/litmus/preempt.c index ebe2e3461895..08b98c3b57bf 100644 --- a/litmus/preempt.c +++ b/litmus/preempt.c @@ -30,8 +30,11 @@ void sched_state_will_schedule(struct task_struct* tsk) /* Litmus tasks should never be subject to a remote * set_tsk_need_resched(). */ BUG_ON(is_realtime(tsk)); + +/* TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", __builtin_return_address(0)); +*/ } /* Called by the IPI handler after another CPU called smp_send_resched(). */ @@ -43,13 +46,17 @@ void sched_state_ipi(void) /* Cause scheduler to be invoked. * This will cause a transition to WILL_SCHEDULE. */ set_tsk_need_resched(current); + /* TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", current->comm, current->pid); + */ } else { /* ignore */ + /* TRACE_STATE("ignoring IPI in state %x (%s)\n", get_sched_state(), sched_state_name(get_sched_state())); + */ } } diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 73fe1c442a0d..9b0a8d3b624d 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -45,7 +46,18 @@ /* to configure the cluster size */ #include -#include + +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif /* Reference configuration variable. Determines which cache level is used to * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that @@ -95,7 +107,7 @@ typedef struct clusterdomain { struct bheap_node *heap_node; struct bheap cpu_heap; /* lock for this cluster */ -#define lock domain.ready_lock +#define cedf_lock domain.ready_lock } cedf_domain_t; /* a cedf_domain per cluster; allocation is done at init/activation time */ @@ -257,21 +269,50 @@ static noinline void requeue(struct task_struct* task) } } +#ifdef CONFIG_SCHED_CPU_AFFINITY +static cpu_entry_t* cedf_get_nearest_available_cpu( + cedf_domain_t *cluster, cpu_entry_t* start) +{ + cpu_entry_t* affinity; + + get_nearest_available_cpu(affinity, start, cedf_cpu_entries, -1); + + /* make sure CPU is in our cluster */ + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) + return(affinity); + else + return(NULL); +} +#endif + + /* check for any necessary preemptions */ static void check_for_preemptions(cedf_domain_t *cluster) { struct task_struct *task; - cpu_entry_t* last; + cpu_entry_t *last; for(last = lowest_prio_cpu(cluster); edf_preemption_needed(&cluster->domain, last->linked); last = lowest_prio_cpu(cluster)) { /* preemption necessary */ task = __take_ready(&cluster->domain); - TRACE("check_for_preemptions: attempting to link task %d to %d\n", - task->pid, last->cpu); +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = + cedf_get_nearest_available_cpu(cluster, + &per_cpu(cedf_cpu_entries, task_cpu(task))); + if(affinity) + last = affinity; + else if(last->linked) + requeue(last->linked); + } +#else if (last->linked) requeue(last->linked); +#endif + TRACE("check_for_preemptions: attempting to link task %d to %d\n", + task->pid, last->cpu); link_task_to_cpu(task, last); preempt(last); } @@ -292,12 +333,12 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); unsigned long flags; - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); __merge_ready(&cluster->domain, tasks); check_for_preemptions(cluster); - raw_spin_unlock_irqrestore(&cluster->lock, flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); } /* caller holds cedf_lock */ @@ -307,6 +348,10 @@ static noinline void job_completion(struct task_struct *t, int forced) sched_trace_task_completion(t, forced); +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + TRACE_TASK(t, "job_completion().\n"); /* set flags */ @@ -378,7 +423,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) int out_of_time, sleep, preempt, np, exists, blocks; struct task_struct* next = NULL; - raw_spin_lock(&cluster->lock); + raw_spin_lock(&cluster->cedf_lock); clear_will_schedule(); /* sanity checking */ @@ -462,7 +507,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) next = prev; sched_state_task_picked(); - raw_spin_unlock(&cluster->lock); + raw_spin_unlock(&cluster->cedf_lock); #ifdef WANT_ALL_SCHED_EVENTS TRACE("cedf_lock released, next=0x%p\n", next); @@ -504,7 +549,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) /* the cluster doesn't change even if t is running */ cluster = task_cpu_cluster(t); - raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); /* setup job params */ release_at(t, litmus_clock()); @@ -521,20 +566,22 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) t->rt_param.linked_on = NO_CPU; cedf_job_arrival(t); - raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); } static void cedf_task_wake_up(struct task_struct *task) { unsigned long flags; - lt_t now; + //lt_t now; cedf_domain_t *cluster; TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); cluster = task_cpu_cluster(task); - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); + +#if 0 // sporadic task model /* We need to take suspensions because of semaphores into * account! If a job resumes after being suspended due to acquiring * a semaphore, it should never be treated as a new job release. @@ -556,8 +603,17 @@ static void cedf_task_wake_up(struct task_struct *task) } } } - cedf_job_arrival(task); - raw_spin_unlock_irqrestore(&cluster->lock, flags); +#endif + + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); + set_rt_flags(task, RT_F_RUNNING); // periodic model + + if(tsk_rt(task)->linked_on == NO_CPU) + cedf_job_arrival(task); + else + TRACE("WTF, mate?!\n"); + + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); } static void cedf_task_block(struct task_struct *t) @@ -570,9 +626,9 @@ static void cedf_task_block(struct task_struct *t) cluster = task_cpu_cluster(t); /* unlink if necessary */ - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); unlink(t); - raw_spin_unlock_irqrestore(&cluster->lock, flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); BUG_ON(!is_realtime(t)); } @@ -584,7 +640,7 @@ static void cedf_task_exit(struct task_struct * t) cedf_domain_t *cluster = task_cpu_cluster(t); /* unlink if necessary */ - raw_spin_lock_irqsave(&cluster->lock, flags); + raw_spin_lock_irqsave(&cluster->cedf_lock, flags); unlink(t); if (tsk_rt(t)->scheduled_on != NO_CPU) { cpu_entry_t *cpu; @@ -592,7 +648,7 @@ static void cedf_task_exit(struct task_struct * t) cpu->scheduled = NULL; tsk_rt(t)->scheduled_on = NO_CPU; } - raw_spin_unlock_irqrestore(&cluster->lock, flags); + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); BUG_ON(!is_realtime(t)); TRACE_TASK(t, "RIP\n"); @@ -603,6 +659,721 @@ static long cedf_admit_task(struct task_struct* tsk) return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; } + + + + + + + + + + + + +#ifdef CONFIG_LITMUS_LOCKING + +#include + + +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + int linked_on; + int check_preempt = 0; + + cedf_domain_t* cluster = task_cpu_cluster(t); + + if(prio_inh != NULL) + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "inherits priority from %p\n", prio_inh); + + sched_trace_eff_prio_change(t, prio_inh); + + tsk_rt(t)->inh_task = prio_inh; + + linked_on = tsk_rt(t)->linked_on; + + /* If it is scheduled, then we need to reorder the CPU heap. */ + if (linked_on != NO_CPU) { + TRACE_TASK(t, "%s: linked on %d\n", + __FUNCTION__, linked_on); + /* Holder is scheduled; need to re-order CPUs. + * We can't use heap_decrease() here since + * the cpu_heap is ordered in reverse direction, so + * it is actually an increase. */ + bheap_delete(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(cedf_cpu_entries, linked_on).hn); + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, + per_cpu(cedf_cpu_entries, linked_on).hn); + } else { + /* holder may be queued: first stop queue changes */ + raw_spin_lock(&cluster->domain.release_lock); + if (is_queued(t)) { + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); + + /* We need to update the position of holder in some + * heap. Note that this could be a release heap if we + * budget enforcement is used and this job overran. */ + check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node); + + } else { + /* Nothing to do: if it is not queued and not linked + * then it is either sleeping or currently being moved + * by other code (e.g., a timer interrupt handler) that + * will use the correct priority when enqueuing the + * task. */ + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); + } + raw_spin_unlock(&cluster->domain.release_lock); + + /* If holder was enqueued in a release heap, then the following + * preemption check is pointless, but we can't easily detect + * that case. If you want to fix this, then consider that + * simply adding a state flag requires O(n) time to update when + * releasing n tasks, which conflicts with the goal to have + * O(log n) merges. */ + if (check_preempt) { + /* heap_decrease() hit the top level of the heap: make + * sure preemption checks get the right task, not the + * potentially stale cache. */ + bheap_uncache_min(edf_ready_order, &cluster->domain.ready_queue); + check_for_preemptions(cluster); + } + } +} + +/* called with IRQs off */ +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + cedf_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cedf_lock); + + __set_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + + raw_spin_unlock(&cluster->cedf_lock); +} + + +/* called with IRQs off */ +static void __clear_priority_inheritance(struct task_struct* t) +{ + TRACE_TASK(t, "priority restored\n"); + + if(tsk_rt(t)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(t, NULL); + + tsk_rt(t)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + cedf_job_arrival(t); + } + else + { + __set_priority_inheritance(t, NULL); + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); + + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(tsk_rt(t)->cur_klitirqd); + cedf_job_arrival(tsk_rt(t)->cur_klitirqd); + } + else + { + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); + } + } +#endif +} + +/* called with IRQs off */ +static void clear_priority_inheritance(struct task_struct* t) +{ + cedf_domain_t* cluster = task_cpu_cluster(t); + + raw_spin_lock(&cluster->cedf_lock); + __clear_priority_inheritance(t); + raw_spin_unlock(&cluster->cedf_lock); +} + + + +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cedf_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); + + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __set_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&cluster->cedf_lock); +} + +/* called with IRQs off */ +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ + cedf_domain_t* cluster = task_cpu_cluster(klitirqd); + + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&cluster->cedf_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) + { + tsk_rt(klitirqd)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(klitirqd); + cedf_job_arrival(klitirqd); + } + else + { + __set_priority_inheritance(klitirqd, NULL); + } + + tsk_rt(old_owner)->cur_klitirqd = NULL; + + raw_spin_unlock(&cluster->cedf_lock); +} +#endif // CONFIG_LITMUS_SOFTIRQD + + +/* ******************** KFMLP support ********************** */ + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + if (queued != skip && edf_higher_prio(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest( + struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + }while(step != search_start); + + return(shortest); +} + +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct kfmlp_queue *my_queue = NULL; + struct task_struct *max_hp = NULL; + + + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((my_queue == NULL) || + (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + { + my_queue = &sem->queues[i]; + } + } + + if(my_queue) + { + cedf_domain_t* cluster; + + max_hp = my_queue->hp_waiter; + BUG_ON(!max_hp); + + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + max_hp->comm, max_hp->pid, + kfmlp_get_idx(sem, my_queue)); + + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); + + /* + if(my_queue->hp_waiter) + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->hp_waiter->comm, + my_queue->hp_waiter->pid); + else + TRACE_CUR("queue %d: new hp_waiter is %p\n", + kfmlp_get_idx(sem, my_queue), NULL); + */ + + cluster = task_cpu_cluster(max_hp); + + raw_spin_lock(&cluster->cedf_lock); + + /* + if(my_queue->owner) + TRACE_CUR("queue %d: owner is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->owner->comm, + my_queue->owner->pid); + else + TRACE_CUR("queue %d: owner is %p\n", + kfmlp_get_idx(sem, my_queue), + NULL); + */ + + if(tsk_rt(my_queue->owner)->inh_task == max_hp) + { + __clear_priority_inheritance(my_queue->owner); + if(my_queue->hp_waiter != NULL) + { + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + raw_spin_unlock(&cluster->cedf_lock); + + list_for_each(pos, &my_queue->wait.task_list) + { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + /* Compare task prios, find high prio task. */ + if (queued == max_hp) + { + /* + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", + kfmlp_get_idx(sem, my_queue)); + */ + __remove_wait_queue(&my_queue->wait, + list_entry(pos, wait_queue_t, task_list)); + break; + } + } + --(my_queue->count); + } + + return(max_hp); +} + +int cedf_kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = sem->shortest_queue; + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", + kfmlp_get_idx(sem, my_queue)); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + /* check if we need to activate priority inheritance */ + if (edf_higher_prio(t, my_queue->hp_waiter)) + { + my_queue->hp_waiter = t; + if (edf_higher_prio(t, my_queue->owner)) + { + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) + { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else + { + /* this case may happen if our wait entry was stolen + between queues. record where we went.*/ + my_queue = kfmlp_get_queue(sem, t); + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else + { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + spin_unlock_irqrestore(&sem->lock, flags); + } + + return kfmlp_get_idx(sem, my_queue); +} + +int cedf_kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + /* next becomes the resouce holder */ + my_queue->owner = next; + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + set_priority_inheritance(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + + /* + if(next) + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + + my_queue->owner = next; + + if(next) + { + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + } + } + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + clear_priority_inheritance(t); + +out: + spin_unlock_irqrestore(&sem->lock, flags); + + return err; +} + +int cedf_kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + cedf_kfmlp_unlock(l); + + return 0; +} + +void cedf_kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + +static struct litmus_lock_ops cedf_kfmlp_lock_ops = { + .close = cedf_kfmlp_close, + .lock = cedf_kfmlp_lock, + .unlock = cedf_kfmlp_unlock, + .deallocate = cedf_kfmlp_free, +}; + +static struct litmus_lock* cedf_new_kfmlp(void* __user arg, int* ret_code) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(num_resources < 1) + { + *ret_code = -EINVAL; + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + *ret_code = -ENOMEM; + return NULL; + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + *ret_code = -ENOMEM; + return NULL; + } + + sem->litmus_lock.ops = &cedf_kfmlp_lock_ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + + *ret_code = 0; + return &sem->litmus_lock; +} + + +/* **** lock constructor **** */ + +static long cedf_allocate_lock(struct litmus_lock **lock, int type, + void* __user arg) +{ + int err = -ENXIO; + + /* C-EDF currently only supports the FMLP for global resources + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ + switch (type) { + case KFMLP_SEM: + *lock = cedf_new_kfmlp(arg, &err); + break; + }; + + return err; +} + +#endif // CONFIG_LITMUS_LOCKING + + + + + + /* total number of cluster */ static int num_clusters; /* we do not support cluster of different sizes */ @@ -746,6 +1517,40 @@ static long cedf_activate_plugin(void) break; } } + +#ifdef CONFIG_LITMUS_SOFTIRQD + { + /* distribute the daemons evenly across the clusters. */ + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; + int left_over = NR_LITMUS_SOFTIRQD % num_clusters; + + int daemon = 0; + for(i = 0; i < num_clusters; ++i) + { + int num_on_this_cluster = num_daemons_per_cluster; + if(left_over) + { + ++num_on_this_cluster; + --left_over; + } + + for(j = 0; j < num_on_this_cluster; ++j) + { + // first CPU of this cluster + affinity[daemon++] = i*cluster_size; + } + } + + spawn_klitirqd(affinity); + + kfree(affinity); + } +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif free_cpumask_var(mask); clusters_allocated = 1; @@ -765,6 +1570,15 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { .task_block = cedf_task_block, .admit_task = cedf_admit_task, .activate_plugin = cedf_activate_plugin, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = cedf_allocate_lock, + .set_prio_inh = set_priority_inheritance, + .clear_prio_inh = clear_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, +#endif }; static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 3092797480f8..d04e0703c154 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -12,6 +12,8 @@ #include #include #include +#include + #include #include @@ -25,6 +27,19 @@ #include +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +#include +#endif + +#ifdef CONFIG_LITMUS_NVIDIA +#include +#endif + + /* Overview of GSN-EDF operations. * * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This @@ -253,21 +268,52 @@ static noinline void requeue(struct task_struct* task) } } +#ifdef CONFIG_SCHED_CPU_AFFINITY +static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t* start) +{ + cpu_entry_t* affinity; + + get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries, +#ifdef CONFIG_RELEASE_MASTER + gsnedf.release_master +#else + -1 +#endif + ); + + return(affinity); +} +#endif + /* check for any necessary preemptions */ static void check_for_preemptions(void) { struct task_struct *task; - cpu_entry_t* last; + cpu_entry_t *last; for(last = lowest_prio_cpu(); edf_preemption_needed(&gsnedf, last->linked); last = lowest_prio_cpu()) { /* preemption necessary */ task = __take_ready(&gsnedf); - TRACE("check_for_preemptions: attempting to link task %d to %d\n", - task->pid, last->cpu); + +#ifdef CONFIG_SCHED_CPU_AFFINITY + { + cpu_entry_t* affinity = gsnedf_get_nearest_available_cpu( + &per_cpu(gsnedf_cpu_entries, task_cpu(task))); + if(affinity) + last = affinity; + else if(last->linked) + requeue(last->linked); + } +#else if (last->linked) requeue(last->linked); +#endif + + TRACE("check_for_preemptions: attempting to link task %d to %d\n", + task->pid, last->cpu); + link_task_to_cpu(task, last); preempt(last); } @@ -277,7 +323,7 @@ static void check_for_preemptions(void) static noinline void gsnedf_job_arrival(struct task_struct* task) { BUG_ON(!task); - + requeue(task); check_for_preemptions(); } @@ -298,9 +344,13 @@ static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) static noinline void job_completion(struct task_struct *t, int forced) { BUG_ON(!t); - + sched_trace_task_completion(t, forced); +#ifdef CONFIG_LITMUS_NVIDIA + atomic_set(&tsk_rt(t)->nv_int_count, 0); +#endif + TRACE_TASK(t, "job_completion().\n"); /* set flags */ @@ -401,17 +451,19 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); #endif + /* if (exists) TRACE_TASK(prev, "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " "state:%d sig:%d\n", blocks, out_of_time, np, sleep, preempt, prev->state, signal_pending(prev)); + */ + if (entry->linked && preempt) TRACE_TASK(prev, "will be preempted by %s/%d\n", entry->linked->comm, entry->linked->pid); - /* If a task blocks we have no choice but to reschedule. */ if (blocks) @@ -456,12 +508,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) entry->scheduled->rt_param.scheduled_on = NO_CPU; TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); } - } else + } + else + { /* Only override Linux scheduler if we have a real-time task * scheduled that needs to continue. */ if (exists) next = prev; + } sched_state_task_picked(); @@ -486,8 +541,9 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) static void gsnedf_finish_switch(struct task_struct *prev) { cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); - + entry->scheduled = is_realtime(current) ? current : NULL; + #ifdef WANT_ALL_SCHED_EVENTS TRACE_TASK(prev, "switched away from\n"); #endif @@ -536,11 +592,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) static void gsnedf_task_wake_up(struct task_struct *task) { unsigned long flags; - lt_t now; - + lt_t now; + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); raw_spin_lock_irqsave(&gsnedf_lock, flags); + + +#if 0 // sporadic task model /* We need to take suspensions because of semaphores into * account! If a job resumes after being suspended due to acquiring * a semaphore, it should never be treated as a new job release. @@ -562,19 +621,26 @@ static void gsnedf_task_wake_up(struct task_struct *task) } } } +#else // periodic task model + set_rt_flags(task, RT_F_RUNNING); +#endif + gsnedf_job_arrival(task); raw_spin_unlock_irqrestore(&gsnedf_lock, flags); } static void gsnedf_task_block(struct task_struct *t) { + // TODO: is this called on preemption?? unsigned long flags; TRACE_TASK(t, "block at %llu\n", litmus_clock()); /* unlink if necessary */ raw_spin_lock_irqsave(&gsnedf_lock, flags); + unlink(t); + raw_spin_unlock_irqrestore(&gsnedf_lock, flags); BUG_ON(!is_realtime(t)); @@ -608,51 +674,53 @@ static long gsnedf_admit_task(struct task_struct* tsk) #include -/* called with IRQs off */ -static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) + +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) { int linked_on; - int check_preempt = 0; - - raw_spin_lock(&gsnedf_lock); - - TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + int check_preempt = 0; + + if(prio_inh != NULL) + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); + else + TRACE_TASK(t, "inherits priority from %p\n", prio_inh); + + sched_trace_eff_prio_change(t, prio_inh); + tsk_rt(t)->inh_task = prio_inh; - + linked_on = tsk_rt(t)->linked_on; - + /* If it is scheduled, then we need to reorder the CPU heap. */ if (linked_on != NO_CPU) { TRACE_TASK(t, "%s: linked on %d\n", - __FUNCTION__, linked_on); + __FUNCTION__, linked_on); /* Holder is scheduled; need to re-order CPUs. * We can't use heap_decrease() here since * the cpu_heap is ordered in reverse direction, so * it is actually an increase. */ bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, - gsnedf_cpus[linked_on]->hn); + gsnedf_cpus[linked_on]->hn); bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, - gsnedf_cpus[linked_on]->hn); + gsnedf_cpus[linked_on]->hn); } else { /* holder may be queued: first stop queue changes */ raw_spin_lock(&gsnedf.release_lock); if (is_queued(t)) { - TRACE_TASK(t, "%s: is queued\n", - __FUNCTION__); + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); + /* We need to update the position of holder in some * heap. Note that this could be a release heap if we * budget enforcement is used and this job overran. */ - check_preempt = - !bheap_decrease(edf_ready_order, - tsk_rt(t)->heap_node); + check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node); + } else { /* Nothing to do: if it is not queued and not linked * then it is either sleeping or currently being moved * by other code (e.g., a timer interrupt handler) that * will use the correct priority when enqueuing the * task. */ - TRACE_TASK(t, "%s: is NOT queued => Done.\n", - __FUNCTION__); + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); } raw_spin_unlock(&gsnedf.release_lock); @@ -666,34 +734,148 @@ static void set_priority_inheritance(struct task_struct* t, struct task_struct* /* heap_decrease() hit the top level of the heap: make * sure preemption checks get the right task, not the * potentially stale cache. */ - bheap_uncache_min(edf_ready_order, - &gsnedf.ready_queue); + bheap_uncache_min(edf_ready_order, &gsnedf.ready_queue); check_for_preemptions(); } } +} +/* called with IRQs off */ +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) +{ + raw_spin_lock(&gsnedf_lock); + + __set_priority_inheritance(t, prio_inh); + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inherits a new priority!\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); + } +#endif + raw_spin_unlock(&gsnedf_lock); } + +/* called with IRQs off */ +static void __clear_priority_inheritance(struct task_struct* t) +{ + TRACE_TASK(t, "priority restored\n"); + + if(tsk_rt(t)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(t, NULL); + + tsk_rt(t)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(t); + gsnedf_job_arrival(t); + } + else + { + __set_priority_inheritance(t, NULL); + } + +#ifdef CONFIG_LITMUS_SOFTIRQD + if(tsk_rt(t)->cur_klitirqd != NULL) + { + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); + + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) + { + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); + + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(tsk_rt(t)->cur_klitirqd); + gsnedf_job_arrival(tsk_rt(t)->cur_klitirqd); + } + else + { + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); + } + } +#endif +} + /* called with IRQs off */ static void clear_priority_inheritance(struct task_struct* t) { raw_spin_lock(&gsnedf_lock); + __clear_priority_inheritance(t); + raw_spin_unlock(&gsnedf_lock); +} - /* A job only stops inheriting a priority when it releases a - * resource. Thus we can make the following assumption.*/ - BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); - - TRACE_TASK(t, "priority restored\n"); - tsk_rt(t)->inh_task = NULL; +#ifdef CONFIG_LITMUS_SOFTIRQD +/* called with IRQs off */ +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&gsnedf_lock); + + if(old_owner != new_owner) + { + if(old_owner) + { + // unreachable? + tsk_rt(old_owner)->cur_klitirqd = NULL; + } + + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", + new_owner->comm, new_owner->pid); - /* Check if rescheduling is necessary. We can't use heap_decrease() - * since the priority was effectively lowered. */ - unlink(t); - gsnedf_job_arrival(t); + tsk_rt(new_owner)->cur_klitirqd = klitirqd; + } + + __set_priority_inheritance(klitirqd, + (tsk_rt(new_owner)->inh_task == NULL) ? + new_owner : + tsk_rt(new_owner)->inh_task); + + raw_spin_unlock(&gsnedf_lock); +} +/* called with IRQs off */ +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); + + raw_spin_lock(&gsnedf_lock); + + TRACE_TASK(klitirqd, "priority restored\n"); + + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) + { + tsk_rt(klitirqd)->inh_task = NULL; + + /* Check if rescheduling is necessary. We can't use heap_decrease() + * since the priority was effectively lowered. */ + unlink(klitirqd); + gsnedf_job_arrival(klitirqd); + } + else + { + __set_priority_inheritance(klitirqd, NULL); + } + + tsk_rt(old_owner)->cur_klitirqd = NULL; + raw_spin_unlock(&gsnedf_lock); } +#endif /* ******************** FMLP support ********************** */ @@ -892,11 +1074,477 @@ static struct litmus_lock* gsnedf_new_fmlp(void) return &sem->litmus_lock; } + + + + + + +/* ******************** KFMLP support ********************** */ + +/* struct for semaphore with priority inheritance */ +struct kfmlp_queue +{ + wait_queue_head_t wait; + struct task_struct* owner; + struct task_struct* hp_waiter; + int count; /* number of waiters + holder */ +}; + +struct kfmlp_semaphore +{ + struct litmus_lock litmus_lock; + + spinlock_t lock; + + int num_resources; /* aka k */ + + struct kfmlp_queue *queues; /* array */ + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ +}; + +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct kfmlp_semaphore, litmus_lock); +} + +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, + struct kfmlp_queue* queue) +{ + return (queue - &sem->queues[0]); +} + +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, + struct task_struct* holder) +{ + int i; + for(i = 0; i < sem->num_resources; ++i) + if(sem->queues[i].owner == holder) + return(&sem->queues[i]); + return(NULL); +} + +/* caller is responsible for locking */ +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, + struct task_struct *skip) +{ + struct list_head *pos; + struct task_struct *queued, *found = NULL; + + list_for_each(pos, &kqueue->wait.task_list) { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + + /* Compare task prios, find high prio task. */ + if (queued != skip && edf_higher_prio(queued, found)) + found = queued; + } + return found; +} + +static inline struct kfmlp_queue* kfmlp_find_shortest( + struct kfmlp_semaphore* sem, + struct kfmlp_queue* search_start) +{ + // we start our search at search_start instead of at the beginning of the + // queue list to load-balance across all resources. + struct kfmlp_queue* step = search_start; + struct kfmlp_queue* shortest = sem->shortest_queue; + + do + { + step = (step+1 != &sem->queues[sem->num_resources]) ? + step+1 : &sem->queues[0]; + if(step->count < shortest->count) + { + shortest = step; + if(step->count == 0) + break; /* can't get any shorter */ + } + }while(step != search_start); + + return(shortest); +} + +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +{ + /* must hold sem->lock */ + + struct kfmlp_queue *my_queue = NULL; + struct task_struct *max_hp = NULL; + + + struct list_head *pos; + struct task_struct *queued; + int i; + + for(i = 0; i < sem->num_resources; ++i) + { + if( (sem->queues[i].count > 1) && + ((my_queue == NULL) || + (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + { + my_queue = &sem->queues[i]; + } + } + + if(my_queue) + { + max_hp = my_queue->hp_waiter; + + BUG_ON(!max_hp); + + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", + kfmlp_get_idx(sem, my_queue), + max_hp->comm, max_hp->pid, + kfmlp_get_idx(sem, my_queue)); + + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); + + /* + if(my_queue->hp_waiter) + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->hp_waiter->comm, + my_queue->hp_waiter->pid); + else + TRACE_CUR("queue %d: new hp_waiter is %p\n", + kfmlp_get_idx(sem, my_queue), NULL); + */ + + raw_spin_lock(&gsnedf_lock); + + /* + if(my_queue->owner) + TRACE_CUR("queue %d: owner is %s/%d\n", + kfmlp_get_idx(sem, my_queue), + my_queue->owner->comm, + my_queue->owner->pid); + else + TRACE_CUR("queue %d: owner is %p\n", + kfmlp_get_idx(sem, my_queue), + NULL); + */ + + if(tsk_rt(my_queue->owner)->inh_task == max_hp) + { + __clear_priority_inheritance(my_queue->owner); + if(my_queue->hp_waiter != NULL) + { + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + raw_spin_unlock(&gsnedf_lock); + + list_for_each(pos, &my_queue->wait.task_list) + { + queued = (struct task_struct*) list_entry(pos, wait_queue_t, + task_list)->private; + /* Compare task prios, find high prio task. */ + if (queued == max_hp) + { + /* + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", + kfmlp_get_idx(sem, my_queue)); + */ + __remove_wait_queue(&my_queue->wait, + list_entry(pos, wait_queue_t, task_list)); + break; + } + } + --(my_queue->count); + } + + return(max_hp); +} + +int gsnedf_kfmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue* my_queue; + wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = sem->shortest_queue; + + if (my_queue->owner) { + /* resource is not free => must suspend and wait */ + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", + kfmlp_get_idx(sem, my_queue)); + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); + + /* check if we need to activate priority inheritance */ + if (edf_higher_prio(t, my_queue->hp_waiter)) + { + my_queue->hp_waiter = t; + if (edf_higher_prio(t, my_queue->owner)) + { + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); + } + } + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release (or steal). + */ + schedule(); + + + if(my_queue->owner == t) + { + TRACE_CUR("queue %d: acquired through waiting\n", + kfmlp_get_idx(sem, my_queue)); + } + else + { + /* this case may happen if our wait entry was stolen + between queues. record where we went. */ + my_queue = kfmlp_get_queue(sem, t); + + BUG_ON(!my_queue); + TRACE_CUR("queue %d: acquired through stealing\n", + kfmlp_get_idx(sem, my_queue)); + } + } + else + { + TRACE_CUR("queue %d: acquired immediately\n", + kfmlp_get_idx(sem, my_queue)); + + my_queue->owner = t; + + ++(my_queue->count); + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + + spin_unlock_irqrestore(&sem->lock, flags); + } + + return kfmlp_get_idx(sem, my_queue); +} + +int gsnedf_kfmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + + if (!my_queue) { + err = -EINVAL; + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&my_queue->wait); + if (next) { + /* + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + /* next becomes the resouce holder */ + my_queue->owner = next; + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", + kfmlp_get_idx(sem, my_queue), next->comm, next->pid); + + /* determine new hp_waiter if necessary */ + if (next == my_queue->hp_waiter) { + TRACE_TASK(next, "was highest-prio waiter\n"); + /* next has the highest priority --- it doesn't need to + * inherit. However, we need to make sure that the + * next-highest priority in the queue is reflected in + * hp_waiter. */ + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); + if (my_queue->hp_waiter) + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); + else + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); + } else { + /* Well, if next is not the highest-priority waiter, + * then it ought to inherit the highest-priority + * waiter's priority. */ + set_priority_inheritance(next, my_queue->hp_waiter); + } + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + + /* + if(next) + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + */ + + my_queue->owner = next; + + if(next) + { + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", + kfmlp_get_idx(sem, my_queue), + next->comm, next->pid); + + /* wake up next */ + wake_up_process(next); + } + else + { + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); + + --(my_queue->count); + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + } + } + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + clear_priority_inheritance(t); + +out: + spin_unlock_irqrestore(&sem->lock, flags); + + return err; +} + +int gsnedf_kfmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + struct kfmlp_queue *my_queue; + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->lock, flags); + + my_queue = kfmlp_get_queue(sem, t); + owner = (my_queue) ? (my_queue->owner == t) : 0; + + spin_unlock_irqrestore(&sem->lock, flags); + + if (owner) + gsnedf_kfmlp_unlock(l); + + return 0; +} + +void gsnedf_kfmlp_free(struct litmus_lock* l) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(l); + kfree(sem->queues); + kfree(sem); +} + +static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = { + .close = gsnedf_kfmlp_close, + .lock = gsnedf_kfmlp_lock, + .unlock = gsnedf_kfmlp_unlock, + .deallocate = gsnedf_kfmlp_free, +}; + +static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg, int* ret_code) +{ + struct kfmlp_semaphore* sem; + int num_resources = 0; + int i; + + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) + { + *ret_code = -EINVAL; + return(NULL); + } + if(num_resources < 1) + { + *ret_code = -EINVAL; + return(NULL); + } + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if(!sem) + { + *ret_code = -ENOMEM; + return NULL; + } + + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); + if(!sem->queues) + { + kfree(sem); + *ret_code = -ENOMEM; + return NULL; + } + + sem->litmus_lock.ops = &gsnedf_kfmlp_lock_ops; + spin_lock_init(&sem->lock); + sem->num_resources = num_resources; + + for(i = 0; i < num_resources; ++i) + { + sem->queues[i].owner = NULL; + sem->queues[i].hp_waiter = NULL; + init_waitqueue_head(&sem->queues[i].wait); + sem->queues[i].count = 0; + } + + sem->shortest_queue = &sem->queues[0]; + + *ret_code = 0; + return &sem->litmus_lock; +} + + + + + /* **** lock constructor **** */ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, - void* __user unused) + void* __user arg) { int err = -ENXIO; @@ -911,7 +1559,10 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, else err = -ENOMEM; break; - + + case KFMLP_SEM: + *lock = gsnedf_new_kfmlp(arg, &err); + break; }; return err; @@ -919,7 +1570,6 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, #endif - static long gsnedf_activate_plugin(void) { int cpu; @@ -946,6 +1596,15 @@ static long gsnedf_activate_plugin(void) } #endif } + +#ifdef CONFIG_LITMUS_SOFTIRQD + spawn_klitirqd(NULL); +#endif + +#ifdef CONFIG_LITMUS_NVIDIA + init_nvidia_info(); +#endif + return 0; } @@ -963,8 +1622,15 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { .admit_task = gsnedf_admit_task, .activate_plugin = gsnedf_activate_plugin, #ifdef CONFIG_LITMUS_LOCKING - .allocate_lock = gsnedf_allocate_lock, + .allocate_lock = gsnedf_allocate_lock, + .set_prio_inh = set_priority_inheritance, + .clear_prio_inh = clear_priority_inheritance, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, #endif + }; diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c index e6952896dc4b..1bca2e1a33cd 100644 --- a/litmus/sched_litmus.c +++ b/litmus/sched_litmus.c @@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) } #ifdef __ARCH_WANT_UNLOCKED_CTXSW if (next->oncpu) + { TRACE_TASK(next, "waiting for !oncpu"); + } while (next->oncpu) { cpu_relax(); mb(); diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index d54886df1f57..8802670a4b0b 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c @@ -129,6 +129,27 @@ static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, return -ENXIO; } +static void litmus_dummy_set_prio_inh(struct task_struct* a, struct task_struct* b) +{ +} + +static void litmus_dummy_clear_prio_inh(struct task_struct* t) +{ +} + +#endif + +#ifdef CONFIG_LITMUS_SOFTIRQD +static void litmus_dummy_set_prio_inh_klitirq(struct task_struct* klitirqd, + struct task_struct* old_owner, + struct task_struct* new_owner) +{ +} + +static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd, + struct task_struct* old_owner) +{ +} #endif @@ -149,6 +170,12 @@ struct sched_plugin linux_sched_plugin = { .deactivate_plugin = litmus_dummy_deactivate_plugin, #ifdef CONFIG_LITMUS_LOCKING .allocate_lock = litmus_dummy_allocate_lock, + .set_prio_inh = litmus_dummy_set_prio_inh, + .clear_prio_inh = litmus_dummy_clear_prio_inh, +#endif +#ifdef CONFIG_LITMUS_SOFTIRQD + .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq, + .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd, #endif .admit_task = litmus_dummy_admit_task }; @@ -187,6 +214,8 @@ int register_sched_plugin(struct sched_plugin* plugin) CHECK(deactivate_plugin); #ifdef CONFIG_LITMUS_LOCKING CHECK(allocate_lock); + CHECK(set_prio_inh); + CHECK(clear_prio_inh); #endif CHECK(admit_task); diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 5ef8d09ab41f..7aeb99b668d3 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -16,13 +17,13 @@ #include -#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) +#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11)) #define now() litmus_clock() struct local_buffer { - struct st_event_record record[NO_EVENTS]; - char flag[NO_EVENTS]; + struct st_event_record record[NUM_EVENTS]; + char flag[NUM_EVENTS]; struct ft_buffer ftbuf; }; @@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void) int i, ok = 0, err; printk("Allocated %u sched_trace_xxx() events per CPU " "(buffer size: %d bytes)\n", - NO_EVENTS, (int) sizeof(struct local_buffer)); + NUM_EVENTS, (int) sizeof(struct local_buffer)); err = ftdev_init(&st_dev, THIS_MODULE, num_online_cpus(), "sched_trace"); @@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void) for (i = 0; i < st_dev.minor_cnt; i++) { buf = &per_cpu(st_event_buffer, i); - ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, + ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS, sizeof(struct st_event_record), buf->flag, buf->record); @@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id, { struct task_struct *t = (struct task_struct*) _task; struct st_event_record* rec; - if (is_realtime(t)) { + //if (is_realtime(t)) /* comment out to trace EVERYTHING */ + { rec = get_record(ST_SWITCH_TO, t); if (rec) { rec->data.switch_to.when = now(); @@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id, { struct task_struct *t = (struct task_struct*) _task; struct st_event_record* rec; - if (is_realtime(t)) { + //if (is_realtime(t)) /* comment out to trace EVERYTHING */ + { rec = get_record(ST_SWITCH_AWAY, t); if (rec) { rec->data.switch_away.when = now(); @@ -188,6 +191,7 @@ feather_callback void do_sched_trace_task_completion(unsigned long id, if (rec) { rec->data.completion.when = now(); rec->data.completion.forced = forced; + rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); put_record(rec); } } @@ -239,3 +243,201 @@ feather_callback void do_sched_trace_action(unsigned long id, put_record(rec); } } + + +feather_callback void do_sched_trace_tasklet_release(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t); + + if (rec) { + rec->data.tasklet_release.when = now(); + put_record(rec); + } +} + + +feather_callback void do_sched_trace_tasklet_begin(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t); + + if (rec) { + rec->data.tasklet_begin.when = now(); + + if(!in_interrupt()) + rec->data.tasklet_begin.exe_pid = current->pid; + else + rec->data.tasklet_begin.exe_pid = 0; + + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_tasklet_begin); + + +feather_callback void do_sched_trace_tasklet_end(unsigned long id, + unsigned long _owner, + unsigned long _flushed) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_TASKLET_END, t); + + if (rec) { + rec->data.tasklet_end.when = now(); + rec->data.tasklet_end.flushed = _flushed; + + if(!in_interrupt()) + rec->data.tasklet_end.exe_pid = current->pid; + else + rec->data.tasklet_end.exe_pid = 0; + + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_tasklet_end); + + +feather_callback void do_sched_trace_work_release(unsigned long id, + unsigned long _owner) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_RELEASE, t); + + if (rec) { + rec->data.work_release.when = now(); + put_record(rec); + } +} + + +feather_callback void do_sched_trace_work_begin(unsigned long id, + unsigned long _owner, + unsigned long _exe) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_BEGIN, t); + + if (rec) { + struct task_struct *exe = (struct task_struct*) _exe; + rec->data.work_begin.exe_pid = exe->pid; + rec->data.work_begin.when = now(); + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_work_begin); + + +feather_callback void do_sched_trace_work_end(unsigned long id, + unsigned long _owner, + unsigned long _exe, + unsigned long _flushed) +{ + struct task_struct *t = (struct task_struct*) _owner; + struct st_event_record *rec = get_record(ST_WORK_END, t); + + if (rec) { + struct task_struct *exe = (struct task_struct*) _exe; + rec->data.work_end.exe_pid = exe->pid; + rec->data.work_end.flushed = _flushed; + rec->data.work_end.when = now(); + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_work_end); + + +feather_callback void do_sched_trace_eff_prio_change(unsigned long id, + unsigned long _task, + unsigned long _inh) +{ + struct task_struct *t = (struct task_struct*) _task; + struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t); + + if (rec) { + struct task_struct *inh = (struct task_struct*) _inh; + rec->data.effective_priority_change.when = now(); + rec->data.effective_priority_change.inh_pid = (inh != NULL) ? + inh->pid : + 0xffff; + + put_record(rec); + } +} + + +/* pray for no nesting of nv interrupts on same CPU... */ +struct tracing_interrupt_map +{ + int active; + int count; + unsigned long data[128]; // assume nesting less than 128... +}; +DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); + +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, + unsigned long _device) +{ + struct st_event_record *rec; + + { + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + { + int_map->count++; + } + else + { + int_map->active = 0xcafebabe; + int_map->count = 1; + } + int_map->data[int_map->count-1] = _device; + } + + rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); + if(rec) { + u32 device = _device; + rec->data.nv_interrupt_begin.when = now(); + rec->data.nv_interrupt_begin.device = device; + put_record(rec); + } +} +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin); + +/* +int is_interrupt_tracing_active(void) +{ + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + return 1; + return 0; +} +*/ + +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long unused) +{ + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); + if(int_map->active == 0xcafebabe) + { + struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL); + + int_map->count--; + if(int_map->count == 0) + int_map->active = 0; + + if(rec) { + rec->data.nv_interrupt_end.when = now(); + rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; + put_record(rec); + } + } +} +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end); + + + + + + diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c new file mode 100644 index 000000000000..d7d7d8bae298 --- /dev/null +++ b/litmus/sched_trace_external.c @@ -0,0 +1,45 @@ +#include + +#include +#include + +void __sched_trace_tasklet_begin_external(struct task_struct* t) +{ + sched_trace_tasklet_begin(t); +} +EXPORT_SYMBOL(__sched_trace_tasklet_begin_external); + +void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed) +{ + sched_trace_tasklet_end(t, flushed); +} +EXPORT_SYMBOL(__sched_trace_tasklet_end_external); + + + +void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e) +{ + sched_trace_work_begin(t, e); +} +EXPORT_SYMBOL(__sched_trace_work_begin_external); + +void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f) +{ + sched_trace_work_end(t, e, f); +} +EXPORT_SYMBOL(__sched_trace_work_end_external); + + + +void __sched_trace_nv_interrupt_begin_external(u32 device) +{ + unsigned long _device = device; + sched_trace_nv_interrupt_begin(_device); +} +EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); + +void __sched_trace_nv_interrupt_end_external(void) +{ + sched_trace_nv_interrupt_end(); +} +EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); -- cgit v1.2.2