diff --git a/include/litmus/cheap.h b/include/litmus/cheap.h new file mode 100644 index 0000000..39c29fb --- /dev/null +++ b/include/litmus/cheap.h @@ -0,0 +1,62 @@ +/* cheap.h -- A concurrent heap implementation. + * + * (c) 2009 Bjoern B. Brandenburg, + * + * Based on: + * + * G. Hunt, M. Micheal, S. Parthasarath, and M. Scott + * "An efficient algorithm for concurrent priority queue heaps." + * Information Processing Letters, 60(3): 151-157, November 1996 + */ + +#ifndef CHEAP_H + +#include "linux/spinlock.h" + +#define CHEAP_EMPTY 0xffffffff +#define CHEAP_READY 0xffffff00 +#define CHEAP_ROOT 0 + +struct cheap_node { + spinlock_t lock; + + unsigned int tag; + void* value; +}; + +struct cheap { + spinlock_t lock; + + unsigned int next; + unsigned int size; + + struct cheap_node* heap; +}; + +typedef int (*cheap_prio_t)(void* a, void* b); + +void cheap_init(struct cheap* ch, + unsigned int size, + struct cheap_node* nodes); + +int cheap_insert(cheap_prio_t higher_prio, + struct cheap* ch, + void* item, + int pid); + +void* cheap_peek(struct cheap* ch); + +typedef int (*cheap_take_predicate_t)(void* ctx, void* value); + +void* cheap_take_if(cheap_take_predicate_t pred, + void* pred_ctx, + cheap_prio_t higher_prio, + struct cheap* ch); + +static inline void* cheap_take(cheap_prio_t higher_prio, + struct cheap* ch) +{ + return cheap_take_if(NULL, NULL, higher_prio, ch); +} + +#endif diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 6c7a4c5..1b99049 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h @@ -86,7 +86,6 @@ void litmus_exit_task(struct task_struct *tsk); #define get_rt_phase(t) (tsk_rt(t)->task_params.phase) #define get_partition(t) (tsk_rt(t)->task_params.cpu) #define get_deadline(t) (tsk_rt(t)->job_params.deadline) -#define get_release(t) (tsk_rt(t)->job_params.release) #define get_class(t) (tsk_rt(t)->task_params.cls) inline static int budget_exhausted(struct task_struct* t) @@ -102,6 +101,8 @@ inline static int budget_exhausted(struct task_struct* t) #define is_be(t) \ (tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT) +#define get_release(t) (tsk_rt(t)->job_params.release) + /* Our notion of time within LITMUS: kernel monotonic time. */ static inline lt_t litmus_clock(void) { diff --git a/litmus/Kconfig b/litmus/Kconfig index f73a454..c2a7756 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -19,7 +19,7 @@ config SRP bool "Stack Resource Policy (SRP)" default n help - Include support for Baker's Stack Resource Policy. + Include support for Baker's Stack Resource Policy. Say Yes if you want FMLP local long critical section synchronization support. @@ -43,7 +43,7 @@ config FEATHER_TRACE help Feather-Trace basic tracing infrastructure. Includes device file driver and instrumentation point support. - + config SCHED_TASK_TRACE bool "Trace real-time tasks" diff --git a/litmus/Makefile b/litmus/Makefile index 837f697..d9e8dc0 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -6,11 +6,14 @@ obj-y = sched_plugin.o litmus.o \ edf_common.o jobs.o \ rt_domain.o fdso.o sync.o \ fmlp.o srp.o norqlock.o \ - heap.o \ + cheap.o heap.o \ sched_gsn_edf.o \ sched_psn_edf.o \ sched_cedf.o \ - sched_pfair.o + sched_pfair.o \ + sched_gq_edf.o \ + sched_gedf.o \ + sched_ghq_edf.o obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o diff --git a/litmus/cheap.c b/litmus/cheap.c new file mode 100644 index 0000000..9fc68fd --- /dev/null +++ b/litmus/cheap.c @@ -0,0 +1,249 @@ +#include "litmus/cheap.h" + +static unsigned int __cheap_parent(unsigned int child) +{ + return (child - 1) / 2; +} + +static unsigned int __cheap_left_child(unsigned int parent) +{ + return parent * 2 + 1; +} + +static unsigned int __cheap_right_child(unsigned int parent) +{ + return parent * 2 + 2; +} + +static void __cheap_swap(struct cheap_node* a, struct cheap_node* b) +{ + unsigned int tag; + void* val; + tag = a->tag; + val = a->value; + a->tag = b->tag; + a->value = b->value; + b->tag = tag; + b->value = val; +} + +void cheap_init(struct cheap* ch, unsigned int size, + struct cheap_node* nodes) +{ + unsigned int i; + spin_lock_init(&ch->lock); + ch->next = 0; + ch->size = size; + ch->heap = nodes; + + for (i = 0; i < size; i++) { + spin_lock_init(&ch->heap[i].lock); + ch->heap[i].tag = CHEAP_EMPTY; + ch->heap[i].value = NULL; + } +} + +void* cheap_peek(struct cheap* ch) +{ + void* val; + spin_lock(&ch->heap[CHEAP_ROOT].lock); + val = ch->heap[CHEAP_ROOT].tag != CHEAP_EMPTY ? + ch->heap[CHEAP_ROOT].value : NULL; + spin_unlock(&ch->heap[CHEAP_ROOT].lock); + return val; +} + +int cheap_insert(cheap_prio_t higher_prio, + struct cheap* ch, + void* item, + int pid) +{ + int stop = 0; + unsigned int child, parent, locked; + unsigned int wait_for_parent_state; + + lockdep_off(); /* generates false positives */ + + spin_lock(&ch->lock); + if (ch->next < ch->size) { + /* ok, node allocated */ + child = ch->next++; + spin_lock(&ch->heap[child].lock); + ch->heap[child].tag = pid; + ch->heap[child].value = item; + spin_unlock(&ch->lock); + } else { + /* out of space! */ + spin_unlock(&ch->lock); + lockdep_on(); + return -1; + } + + spin_unlock(&ch->heap[child].lock); + + /* bubble up */ + while (!stop && child > CHEAP_ROOT) { + parent = __cheap_parent(child); + spin_lock(&ch->heap[parent].lock); + spin_lock(&ch->heap[child].lock); + locked = child; + wait_for_parent_state = CHEAP_EMPTY; + if (ch->heap[parent].tag == CHEAP_READY && + ch->heap[child].tag == pid) { + /* no interference */ + if (higher_prio(ch->heap[child].value, + ch->heap[parent].value)) { + /* out of order; swap and move up */ + __cheap_swap(ch->heap + child, + ch->heap + parent); + child = parent; + } else { + /* In order; we are done. */ + ch->heap[child].tag = CHEAP_READY; + stop = 1; + } + } else if (ch->heap[parent].tag == CHEAP_EMPTY) { + /* Concurrent extract moved child to root; + * we are done. + */ + stop = 1; + } else if (ch->heap[child].tag != pid) { + /* Concurrent extract moved child up; + * we go after it. + */ + child = parent; + } else { + /* Some other process needs to act first. + * We need to back off a little in order + * to give the others a chance to acquire the + * parent's lock. + */ + wait_for_parent_state = ch->heap[parent].tag; + } + + spin_unlock(&ch->heap[locked].lock); + spin_unlock(&ch->heap[parent].lock); + + while (wait_for_parent_state != CHEAP_EMPTY && + ((volatile unsigned int) ch->heap[parent].tag) == + wait_for_parent_state) + cpu_relax(); + + } + if (!stop && child == CHEAP_ROOT) { + spin_lock(&ch->heap[child].lock); + if (ch->heap[child].tag == pid) + ch->heap[child].tag = CHEAP_READY; + spin_unlock(&ch->heap[child].lock); + } + + lockdep_on(); + return 0; +} + +void* cheap_take_if(cheap_take_predicate_t pred, + void* pred_ctx, + cheap_prio_t higher_prio, + struct cheap* ch) +{ + void *val, *cval; + unsigned int ctag; + unsigned int left, right, child, parent; + + lockdep_off(); + spin_lock(&ch->lock); + if (ch->next > CHEAP_ROOT) { + child = ch->next - 1; + spin_lock(&ch->heap[child].lock); + /* see if callback wants this item + */ + if (!pred || pred(pred_ctx, ch->heap[child].value)) + /* yes, proceed */ + ch->next--; + else { + /* no, cleanup and return */ + spin_unlock(&ch->heap[child].lock); + child = ch->size; + } + } else + child = ch->size; + spin_unlock(&ch->lock); + + if (child == ch->size) { + lockdep_on(); + /* empty heap */ + return NULL; + } + + /* take value from last leaf */ + cval = ch->heap[child].value; + ctag = ch->heap[child].tag; + /* free last leaf */ + ch->heap[child].tag = CHEAP_EMPTY; + ch->heap[child].value = NULL; + + /* unlock before locking root to maintain locking order */ + spin_unlock(&ch->heap[child].lock); + + spin_lock(&ch->heap[CHEAP_ROOT].lock); + if (ch->heap[CHEAP_ROOT].tag == CHEAP_EMPTY) { + /* heap became empty, we got the last one */ + spin_unlock(&ch->heap[CHEAP_ROOT].lock); + lockdep_on(); + return cval; + } else { + /* grab value of root (=min), replace with + * what we got from the last leaf + */ + val = ch->heap[CHEAP_ROOT].value; + ch->heap[CHEAP_ROOT].value = cval; + ch->heap[CHEAP_ROOT].tag = CHEAP_READY; + } + + /* Bubble down. We are still holding the ROOT (=parent) lock. */ + child = 0; + parent = CHEAP_ROOT; + while (parent < __cheap_parent(ch->size)) { + left = __cheap_left_child(parent); + right = __cheap_right_child(parent); + spin_lock(&ch->heap[left].lock); + if (ch->heap[left].tag == CHEAP_EMPTY) { + /* end of the heap, done */ + spin_unlock(&ch->heap[left].lock); + break; + } else if (right < ch->size) { + /* right child node exists */ + spin_lock(&ch->heap[right].lock); + if (ch->heap[right].tag == CHEAP_EMPTY || + higher_prio(ch->heap[left].value, + ch->heap[right].value)) { + /* left child node has higher priority */ + spin_unlock(&ch->heap[right].lock); + child = left; + } else { + /* right child node has higher priority */ + spin_unlock(&ch->heap[left].lock); + child = right; + } + } else { + /* right child node does not exist */ + child = left; + } + if (higher_prio(ch->heap[child].value, + ch->heap[parent].value)) { + /* parent and child out of order */ + __cheap_swap(ch->heap + child, + ch->heap + parent); + spin_unlock(&ch->heap[parent].lock); + /* move down */ + parent = child; + } else { + /* in order; we are done. */ + spin_unlock(&ch->heap[child].lock); + break; + } + } + spin_unlock(&ch->heap[parent].lock); + lockdep_on(); + return val; +} diff --git a/litmus/fdso.c b/litmus/fdso.c index 7a16f64..bdc0466 100644 --- a/litmus/fdso.c +++ b/litmus/fdso.c @@ -134,7 +134,7 @@ static struct od_table_entry* get_od_entry(struct task_struct* t) table = t->od_table; if (!table) { - table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, + table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, GFP_KERNEL); t->od_table = table; } diff --git a/litmus/litmus.c b/litmus/litmus.c index 3562322..b286f8f 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -490,11 +490,11 @@ asmlinkage long sys_exit_np(void) /* sys_null_call() is only used for determining raw system call * overheads (kernel entry, kernel exit). It has no useful side effects. - * If ts is non-NULL, then the current Feather-Trace time is recorded. + * If ts is non-NULL, then the current Feather-Trace time is recorded. */ asmlinkage long sys_null_call(cycles_t __user *ts) { - long ret = 0; + long ret = 0; cycles_t now; if (ts) { @@ -789,7 +789,7 @@ static int proc_write_release_master(struct file *file, if (count > 63) return -EINVAL; - + if (copy_from_user(msg, buffer, count)) return -EFAULT; @@ -799,7 +799,7 @@ static int proc_write_release_master(struct file *file, if (count > 1 && msg[count - 1] == '\n') msg[count - 1] = '\0'; - if (strcmp(msg, "NO_CPU") == 0) { + if (strcmp(msg, "NO_CPU") == 0) { atomic_set(&release_master_cpu, NO_CPU); return count; } else { diff --git a/litmus/norqlock.c b/litmus/norqlock.c index 1a17d6c..b812f34 100644 --- a/litmus/norqlock.c +++ b/litmus/norqlock.c @@ -50,7 +50,7 @@ void tick_no_rqlock(void) local_irq_save(flags); - wl = &__get_cpu_var(norq_worklist); + wl = &__get_cpu_var(norq_worklist); if (wl->hrtimer_hack) { /* bail out! */ diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index 2d0920c..dabf196 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c @@ -163,7 +163,7 @@ static void reinit_release_heap(struct task_struct* t) /* initialize */ heap_init(&rh->heap); - + atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); } diff --git a/litmus/sched_gedf.c b/litmus/sched_gedf.c new file mode 100644 index 0000000..9d07b1b --- /dev/null +++ b/litmus/sched_gedf.c @@ -0,0 +1,621 @@ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#define GEDF_MAX_TASKS 1000 + +/* cpu_entry_t - maintain the linked and scheduled state + */ +typedef struct { + int cpu; + struct task_struct* linked; /* only RT tasks */ + int picked; /* linked was seen */ + struct task_struct* scheduled; /* only RT tasks */ + struct heap_node* hn; +} cpu_entry_t; +DEFINE_PER_CPU(cpu_entry_t, gedf_cpu_entries); + +cpu_entry_t* gedf_cpus[NR_CPUS]; + +/* the cpus queue themselves according to priority in here */ +static struct heap_node gedf_heap_node[NR_CPUS]; +static struct heap gedf_cpu_heap; + +DEFINE_SPINLOCK(gedf_cpu_lock); /* synchronize access to cpu heap */ + +static struct cheap_node gedf_cheap_nodes[GEDF_MAX_TASKS]; +static struct cheap gedf_ready_queue; + +static rt_domain_t gedf; /* used only for the release queue */ + +static int cpu_lower_prio(struct heap_node *_a, struct heap_node *_b) +{ + cpu_entry_t *a, *b; + a = _a->value; + b = _b->value; + /* Note that a and b are inverted: we want the lowest-priority CPU at + * the top of the heap. + */ + return edf_higher_prio(b->linked, a->linked); +} + +static void remove_from_cpu_heap(cpu_entry_t* entry) +{ + if (likely(heap_node_in_heap(entry->hn))) + heap_delete(cpu_lower_prio, &gedf_cpu_heap, entry->hn); +} + +/* update_cpu_position - Move the cpu entry to the correct place to maintain + * order in the cpu queue. Caller must hold gedf lock. + */ +static void update_cpu_position(cpu_entry_t *entry) +{ + remove_from_cpu_heap(entry); + heap_insert(cpu_lower_prio, &gedf_cpu_heap, entry->hn); +} + +/* caller must hold gedf lock */ +static cpu_entry_t* lowest_prio_cpu(int take) +{ + struct heap_node* hn; + if (take) + hn = heap_take(cpu_lower_prio, &gedf_cpu_heap); + else + hn = heap_peek(cpu_lower_prio, &gedf_cpu_heap); + return hn ? hn->value : NULL; +} + + +/* link_task_to_cpu - Update the link of a CPU. + * Handles the case where the to-be-linked task is already + * scheduled on a different CPU. + */ +static noinline void link_task_to_cpu(struct task_struct* linked, + cpu_entry_t *entry) +{ + cpu_entry_t *sched = NULL; + struct task_struct* tmp; + int on_cpu; + + BUG_ON(linked && !is_realtime(linked)); + + /* Currently linked task is set to be unlinked. */ + if (entry->linked) { + entry->linked->rt_param.linked_on = NO_CPU; + } + + /* Link new task to CPU. */ + if (linked) { + set_rt_flags(linked, RT_F_RUNNING); + /* handle task is already scheduled somewhere! */ + on_cpu = linked->rt_param.scheduled_on; + if (on_cpu != NO_CPU) { + sched = &per_cpu(gedf_cpu_entries, on_cpu); + /* this should only happen if not linked already */ + BUG_ON(sched->linked == linked); + + /* If we are already scheduled on the CPU to which we + * wanted to link, we don't need to do the swap -- + * we just link ourselves to the CPU and depend on + * the caller to get things right. + * + * But only swap if the other node is in the queue. + * If it is not, then it is being updated + * concurrently and some other task was already + * picked for it. + */ + if (entry != sched && heap_node_in_heap(sched->hn)) { + TRACE_TASK(linked, + "already scheduled on %d, " + "updating link.\n", + sched->cpu); + tmp = sched->linked; + linked->rt_param.linked_on = sched->cpu; + sched->linked = linked; + sched->picked = 1; + update_cpu_position(sched); + linked = tmp; + } + } + if (linked) /* might be NULL due to swap */ + linked->rt_param.linked_on = entry->cpu; + } + entry->linked = linked; + entry->picked = entry == sched; /* set to one if we linked to the + * the CPU that the task is + * executing on + */ + if (linked) + TRACE_TASK(linked, "linked to %d.\n", entry->cpu); + else + TRACE("NULL linked to %d.\n", entry->cpu); + update_cpu_position(entry); +} + +/* unlink - Make sure a task is not linked any longer to an entry + * where it was linked before. Must hold gedf_lock. + */ +static noinline void unlink(struct task_struct* t) +{ + cpu_entry_t *entry; + + if (t->rt_param.linked_on != NO_CPU) { + /* unlink */ + entry = &per_cpu(gedf_cpu_entries, t->rt_param.linked_on); + t->rt_param.linked_on = NO_CPU; + link_task_to_cpu(NULL, entry); + } +} + + +/* preempt - force a CPU to reschedule + */ +static noinline void preempt(cpu_entry_t *entry) +{ + if (smp_processor_id() == entry->cpu) + set_tsk_need_resched(current); + else + smp_send_reschedule(entry->cpu); +} + + +static void add_to_ready_queue(struct task_struct* task) +{ + TRACE_TASK(task, "adding to ready queue\n"); + cheap_insert((cheap_prio_t) edf_higher_prio, + &gedf_ready_queue, + task, + smp_processor_id()); +} + +/* requeue - Put an unlinked task into gsn-edf domain. + * Caller must hold gedf_lock. + * + * call unlocked, but with preemptions disabled! + */ +static noinline void requeue(struct task_struct* task) +{ + if (is_released(task, litmus_clock())) + add_to_ready_queue(task); + else + /* it has got to wait */ + add_release(&gedf, task); +} + +static int preemption_required(cpu_entry_t* last, + struct task_struct* task) +{ + if (edf_higher_prio(task, last->linked)) { + /* yes, drop lock before dequeuing task + * and dequeue cpu state + */ + last = lowest_prio_cpu(1); + lockdep_on(); /* let lockdep see we actually released it */ + spin_unlock(&gedf_cpu_lock); + lockdep_off(); + return 1; + } else + return 0; +} + +/* check for any necessary preemptions */ +static void check_for_preemptions(void) +{ + int done = 0; + unsigned long flags; + struct task_struct *task, *unlinked; + cpu_entry_t* last; + + + local_irq_save(flags); + while (!done) { + unlinked = NULL; + spin_lock(&gedf_cpu_lock); + last = lowest_prio_cpu(0); + if (likely(last)) { + task = cheap_take_if( + (cheap_take_predicate_t) preemption_required, + last, + (cheap_prio_t) edf_higher_prio, + &gedf_ready_queue); + if (task) { + TRACE_TASK(task, "removed from ready Q\n"); + /* cpu lock was dropped, reacquire */ + spin_lock(&gedf_cpu_lock); + if (last->linked && !last->picked) + /* can be requeued by us */ + unlinked = last->linked; + TRACE("check_for_preemptions: " + "attempting to link task %d to %d\n", + task->pid, last->cpu); + link_task_to_cpu(task, last); + update_cpu_position(last); + } else + /* no preemption required */ + done = 1; + } else + /* all gone, being checked elsewhere? */ + done = 1; + spin_unlock(&gedf_cpu_lock); + if (unlinked) + /* stick it back into the queue */ + requeue(unlinked); + if (last && !done) + /* we have a preemption, send IPI */ + preempt(last); + } + local_irq_restore(flags); +} + +/* gedf_job_arrival: task is either resumed or released + * call only unlocked! + */ +static noinline void gedf_job_arrival(struct task_struct* task) +{ + requeue(task); + check_for_preemptions(); +} + +static void gedf_release_jobs(rt_domain_t* rt, struct heap* tasks) +{ + struct heap_node* hn; + struct task_struct* t; + unsigned long flags; + + + local_irq_save(flags); + /* insert unlocked */ + while ((hn = heap_take(edf_ready_order, tasks))) { + t = (struct task_struct*) hn->value; + TRACE_TASK(t, "to be merged into ready queue " + "(is_released:%d, is_running:%d)\n", + is_released(t, litmus_clock()), + is_running(t)); + add_to_ready_queue(t); + } + + local_irq_restore(flags); + check_for_preemptions(); +} + +/* caller holds gedf_lock */ +static noinline int job_completion(cpu_entry_t* entry, int forced) +{ + + struct task_struct *t = entry->scheduled; + + sched_trace_task_completion(t, forced); + + TRACE_TASK(t, "job_completion().\n"); + + /* set flags */ + set_rt_flags(t, RT_F_SLEEP); + /* prepare for next period */ + prepare_for_next_period(t); + if (is_released(t, litmus_clock())) + sched_trace_task_release(t); + + + if (is_released(t, litmus_clock())){ + /* we changed the priority, see if we need to preempt */ + set_rt_flags(t, RT_F_RUNNING); + update_cpu_position(entry); + return 1; + } + else { + /* it has got to wait */ + unlink(t); + add_release(&gedf, t); + return 0; + } +} + +/* gedf_tick - this function is called for every local timer + * interrupt. + * + * checks whether the current task has expired and checks + * whether we need to preempt it if it has not expired + */ +static void gedf_tick(struct task_struct* t) +{ + if (is_realtime(t) && budget_exhausted(t)) + set_tsk_need_resched(t); +} + +static struct task_struct* gedf_schedule(struct task_struct * prev) +{ + cpu_entry_t* entry = &__get_cpu_var(gedf_cpu_entries); + int out_of_time, sleep, preempt, exists, blocks; + struct task_struct* next = NULL; + + /* Bail out early if we are the release master. + * The release master never schedules any real-time tasks. + */ + if (gedf.release_master == entry->cpu) + return NULL; + + TRACE_TASK(prev, "invoked gedf_schedule.\n"); + + /* sanity checking */ + BUG_ON(entry->scheduled && entry->scheduled != prev); + BUG_ON(entry->scheduled && !is_realtime(prev)); + BUG_ON(is_realtime(prev) && !entry->scheduled); + + /* (0) Determine state */ + exists = entry->scheduled != NULL; + blocks = exists && !is_running(entry->scheduled); + out_of_time = exists && budget_exhausted(entry->scheduled); + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; + + spin_lock(&gedf_cpu_lock); + + preempt = entry->scheduled != entry->linked; + + if (exists) + TRACE_TASK(prev, + "blocks:%d out_of_time:%d sleep:%d preempt:%d " + "state:%d sig:%d\n", + blocks, out_of_time, sleep, preempt, + prev->state, signal_pending(prev)); + if (preempt && entry->linked) + TRACE_TASK(prev, "will be preempted by %s/%d\n", + entry->linked->comm, entry->linked->pid); + + /* If a task blocks we have no choice but to reschedule. + */ + if (blocks) + unlink(entry->scheduled); + + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. Don't do a job completion if we block (can't have timers + * running for blocked jobs). Preemptions go first for the same reason. + */ + if ((out_of_time || sleep) && !blocks && !preempt) { + if (job_completion(entry, !sleep)) { + /* Task might stay with us. + * Drop locks and check for preemptions. + */ + spin_unlock(&gedf_cpu_lock); + /* anything to update ? */ + check_for_preemptions(); + spin_lock(&gedf_cpu_lock); + /* if something higher priority got linked, + * then we need to add the task into the + * ready queue (since it wasn't added by + * check_for_preemptions b/c picked==1. + */ + if (entry->linked != prev) + add_to_ready_queue(prev); + } + } + + /* Link pending task if we became unlinked. + * NOTE: Do not hold locks while performing ready queue updates + * since we want concurrent access to the queue. + */ + if (!entry->linked) { + if (exists) + /* We are committed to descheduling; erase marker + * before we drop the lock. + */ + tsk_rt(prev)->scheduled_on = NO_CPU; + spin_unlock(&gedf_cpu_lock); + check_for_preemptions(); /* update links */ + spin_lock(&gedf_cpu_lock); + } + + /* The final scheduling decision. Do we need to switch for some reason? + * If linked is different from scheduled, then select linked as next. + */ + if (entry->linked != entry->scheduled) { + /* Schedule a linked job? */ + if (entry->linked) { + entry->linked->rt_param.scheduled_on = entry->cpu; + next = entry->linked; + } + if (entry->scheduled) + entry->scheduled->rt_param.scheduled_on = NO_CPU; + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + /* Mark entry->linked as being ours. Do this unconditionally since + * entry->linked might have become reassigned to us while we dropped + * the lock even though we never descheduled it. In this case, + * entry->picked became reset. + */ + entry->picked = 1; + if (next) + tsk_rt(next)->scheduled_on = entry->cpu; + spin_unlock(&gedf_cpu_lock); + if (exists && preempt && !blocks) + /* stick preempted task back into the ready queue */ + gedf_job_arrival(prev); + + if (next) + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + else if (exists && !next) + TRACE("becomes idle at %llu.\n", litmus_clock()); + + return next; +} + + +/* _finish_switch - we just finished the switch away from prev + */ +static void gedf_finish_switch(struct task_struct *prev) +{ + cpu_entry_t* entry = &__get_cpu_var(gedf_cpu_entries); + + entry->scheduled = is_realtime(current) ? current : NULL; + TRACE_TASK(prev, "switched away from\n"); +} + + +/* Prepare a task for running in RT mode + */ +static void gedf_task_new(struct task_struct * t, int on_rq, int running) +{ + unsigned long flags; + cpu_entry_t* entry; + + TRACE("gedf: task new %d\n", t->pid); + + spin_lock_irqsave(&gedf_cpu_lock, flags); + + /* setup job params */ + release_at(t, litmus_clock()); + + if (running) { + entry = &per_cpu(gedf_cpu_entries, task_cpu(t)); + BUG_ON(entry->scheduled); + if (entry->cpu != gedf.release_master) { + entry->scheduled = t; + t->rt_param.scheduled_on = task_cpu(t); + } else { + preempt(entry); + tsk_rt(t)->scheduled_on = NO_CPU; + } + } else { + tsk_rt(t)->scheduled_on = NO_CPU; + } + tsk_rt(t)->linked_on = NO_CPU; + + spin_unlock_irqrestore(&gedf_cpu_lock, flags); + + if (!running || entry->cpu == gedf.release_master) + /* schedule() will not insert task into ready_queue */ + gedf_job_arrival(t); +} + +static void gedf_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + lt_t now; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + + spin_lock_irqsave(&gedf_cpu_lock, flags); + now = litmus_clock(); + if (is_tardy(task, now)) { + /* new sporadic release */ + release_at(task, now); + sched_trace_task_release(task); + } + spin_unlock_irqrestore(&gedf_cpu_lock, flags); + gedf_job_arrival(task); +} + +static void gedf_task_block(struct task_struct *t) +{ + TRACE_TASK(t, "block at %llu\n", litmus_clock()); +} + +static void gedf_task_exit(struct task_struct * t) +{ + unsigned long flags; + + /* unlink if necessary */ + spin_lock_irqsave(&gedf_cpu_lock, flags); + /* remove from CPU state, if necessary */ + unlink(t); + if (tsk_rt(t)->scheduled_on != NO_CPU) { + gedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; + tsk_rt(t)->scheduled_on = NO_CPU; + } else { + /* FIXME: If t is currently queued, then we need to + * dequeue it now; otherwise it will probably + * cause a crash once it is dequeued. + */ + TRACE_TASK(t, "called gedf_task_exit(), " + "but is not scheduled!\n"); + } + spin_unlock_irqrestore(&gedf_cpu_lock, flags); + + TRACE_TASK(t, "RIP\n"); +} + +static long gedf_admit_task(struct task_struct* tsk) +{ + return 0; +} + + +static long gedf_activate_plugin(void) +{ + int cpu; + cpu_entry_t *entry; + + heap_init(&gedf_cpu_heap); + gedf.release_master = atomic_read(&release_master_cpu); + + for_each_online_cpu(cpu) { + entry = &per_cpu(gedf_cpu_entries, cpu); + heap_node_init(&entry->hn, entry); + entry->linked = NULL; + entry->scheduled = NULL; + entry->picked = 0; + if (cpu != gedf.release_master) { + TRACE("G-EDF: Initializing CPU #%d.\n", cpu); + update_cpu_position(entry); + } else { + TRACE("G-EDF: CPU %d is release master.\n", cpu); + } + } + return 0; +} + + +/* Plugin object */ +static struct sched_plugin gedf_plugin __cacheline_aligned_in_smp = { + .plugin_name = "G-EDF", + .finish_switch = gedf_finish_switch, + .tick = gedf_tick, + .task_new = gedf_task_new, + .complete_job = complete_job, + .task_exit = gedf_task_exit, + .schedule = gedf_schedule, + .task_wake_up = gedf_task_wake_up, + .task_block = gedf_task_block, + .admit_task = gedf_admit_task, + .activate_plugin = gedf_activate_plugin, +}; + + +static int __init init_gedf(void) +{ + int cpu; + cpu_entry_t *entry; + + cheap_init(&gedf_ready_queue, GEDF_MAX_TASKS, gedf_cheap_nodes); + /* initialize CPU state */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + entry = &per_cpu(gedf_cpu_entries, cpu); + gedf_cpus[cpu] = entry; + entry->cpu = cpu; + entry->hn = &gedf_heap_node[cpu]; + heap_node_init(&entry->hn, entry); + } + edf_domain_init(&gedf, NULL, gedf_release_jobs); + return register_sched_plugin(&gedf_plugin); +} + + +module_init(init_gedf); diff --git a/litmus/sched_ghq_edf.c b/litmus/sched_ghq_edf.c new file mode 100644 index 0000000..a9b1d06 --- /dev/null +++ b/litmus/sched_ghq_edf.c @@ -0,0 +1,720 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +/* cpu_entry_t - maintain the linked and scheduled state + */ +typedef struct { + int cpu; + struct task_struct* linked; /* only RT tasks */ + int picked; /* linked was seen */ + struct task_struct* scheduled; /* only RT tasks */ + struct heap_node* hn; +} cpu_entry_t; +DEFINE_PER_CPU(cpu_entry_t, ghqedf_cpu_entries); + +DEFINE_SPINLOCK(ghqedf_cpu_lock); /* synchronize access to cpu heap */ + +cpu_entry_t* ghqedf_cpus[NR_CPUS]; + +/* the cpus queue themselves according to priority in here */ +static struct heap_node ghqedf_heap_node[NR_CPUS]; +static struct heap ghqedf_cpu_heap; + +static rt_domain_t ghqedf; /* used only for the release queue */ + +struct subqueue { + struct heap queue; + struct task_struct* top; + struct heap_node* hn; + spinlock_t lock; +}; + +/* per-cpu sub queue */ +//DEFINE_PER_CPU(struct subqueue, ghqedf_subqueue); + +struct subqueue ghqedf_subqueue[NR_CPUS]; + +/* heap nodes for subqueue::hn field */ +static struct heap_node ghqedf_subqueue_heap_node[NR_CPUS]; + +/* queue of sub queues */ +struct heap master_queue; + +/* re-use ready queue lock */ +#define master_lock (ghqedf.ready_lock) + +static int subqueue_higher_prio(struct heap_node *_a, struct heap_node *_b) +{ + struct subqueue *a, *b; + a = _a->value; + b = _b->value; + return edf_higher_prio(a->top, b->top); +} + +static void subqueues_init(void) +{ + int cpu; + struct subqueue *q; + + heap_init(&master_queue); + + for_each_online_cpu(cpu) { +// q = &per_cpu(ghqedf_subqueue, cpu); + q = ghqedf_subqueue + cpu; + heap_init(&q->queue); + q->top = NULL; + q->hn = ghqedf_subqueue_heap_node + cpu; + heap_node_init(&q->hn, q); + spin_lock_init(&q->lock); + heap_insert(subqueue_higher_prio, &master_queue, q->hn); + } +} + +static void __update_top(struct subqueue* q) +{ + struct heap_node *tmp; + + tmp = heap_peek(edf_ready_order, &q->queue); + q->top = tmp ? tmp->value : NULL; +} + +static void update_top(struct subqueue* q) +{ + spin_lock(&q->lock); + __update_top(q); + spin_unlock(&q->lock); +} + +static void merge_into_ready_queue(struct heap *h) +{ +// struct subqueue *q = &__get_cpu_var(ghqedf_subqueue); + struct subqueue *q = ghqedf_subqueue + smp_processor_id(); + struct heap_node *tmp; + void *old_top; + int changed_top = 0; + + spin_lock(&q->lock); + tmp = heap_peek(edf_ready_order, &q->queue); + old_top = tmp ? tmp->value : NULL; + heap_union(edf_ready_order, &q->queue, h); + /* is the new min the task that we just inserted? */ + changed_top = !old_top || + heap_peek(edf_ready_order, &q->queue)->value != old_top; + spin_unlock(&q->lock); + if (changed_top) { + /* need to update master queue */ + spin_lock(&master_lock); + /* If it is not in the heap then it is already + * being updated concurrently, so we skip it. + */ + if (likely(heap_node_in_heap(q->hn))) { + heap_delete(subqueue_higher_prio, &master_queue, q->hn); + update_top(q); + heap_insert(subqueue_higher_prio, &master_queue, q->hn); + } else + TRACE("not updating subqueue top\n"); + spin_unlock(&master_lock); + } +} + +static void add_to_ready_queue(struct task_struct *t) +{ + struct heap tmp; + + TRACE_TASK(t, "adding to ready queue\n"); + heap_init(&tmp); + heap_insert(edf_ready_order, &tmp, tsk_rt(t)->heap_node); + merge_into_ready_queue(&tmp); +} + + +static int cpu_lower_prio(struct heap_node *_a, struct heap_node *_b) +{ + cpu_entry_t *a, *b; + a = _a->value; + b = _b->value; + /* Note that a and b are inverted: we want the lowest-priority CPU at + * the top of the heap. + */ + return edf_higher_prio(b->linked, a->linked); +} + +static void remove_from_cpu_heap(cpu_entry_t* entry) +{ + if (likely(heap_node_in_heap(entry->hn))) + heap_delete(cpu_lower_prio, &ghqedf_cpu_heap, entry->hn); +} + +/* update_cpu_position - Move the cpu entry to the correct place to maintain + * order in the cpu queue. Caller must hold ghqedf lock. + */ +static void update_cpu_position(cpu_entry_t *entry) +{ + remove_from_cpu_heap(entry); + heap_insert(cpu_lower_prio, &ghqedf_cpu_heap, entry->hn); +} + +/* caller must hold ghqedf lock */ +static cpu_entry_t* lowest_prio_cpu(int take) +{ + struct heap_node* hn; + if (take) + hn = heap_take(cpu_lower_prio, &ghqedf_cpu_heap); + else + hn = heap_peek(cpu_lower_prio, &ghqedf_cpu_heap); + return hn ? hn->value : NULL; +} + + +/* link_task_to_cpu - Update the link of a CPU. + * Handles the case where the to-be-linked task is already + * scheduled on a different CPU. + */ +static noinline void link_task_to_cpu(struct task_struct* linked, + cpu_entry_t *entry) +{ + cpu_entry_t *sched = NULL; + struct task_struct* tmp; + int on_cpu; + + BUG_ON(linked && !is_realtime(linked)); + + /* Currently linked task is set to be unlinked. */ + if (entry->linked) { + entry->linked->rt_param.linked_on = NO_CPU; + } + + /* Link new task to CPU. */ + if (linked) { + set_rt_flags(linked, RT_F_RUNNING); + /* handle task is already scheduled somewhere! */ + on_cpu = linked->rt_param.scheduled_on; + if (on_cpu != NO_CPU) { + sched = &per_cpu(ghqedf_cpu_entries, on_cpu); + /* this should only happen if not linked already */ + BUG_ON(sched->linked == linked); + + /* If we are already scheduled on the CPU to which we + * wanted to link, we don't need to do the swap -- + * we just link ourselves to the CPU and depend on + * the caller to get things right. + * + * But only swap if the other node is in the queue. + * If it is not, then it is being updated + * concurrently and some other task was already + * picked for it. + */ + if (entry != sched && heap_node_in_heap(sched->hn)) { + TRACE_TASK(linked, + "already scheduled on %d, " + "updating link.\n", + sched->cpu); + tmp = sched->linked; + linked->rt_param.linked_on = sched->cpu; + sched->linked = linked; + sched->picked = 1; + update_cpu_position(sched); + linked = tmp; + } + } + if (linked) /* might be NULL due to swap */ + linked->rt_param.linked_on = entry->cpu; + } + entry->linked = linked; + entry->picked = entry == sched; /* set to one if we linked to the + * the CPU that the task is + * executing on + */ + if (linked) + TRACE_TASK(linked, "linked to %d.\n", entry->cpu); + else + TRACE("NULL linked to %d.\n", entry->cpu); + update_cpu_position(entry); +} + +/* unlink - Make sure a task is not linked any longer to an entry + * where it was linked before. Must hold ghqedf_lock. + */ +static noinline void unlink(struct task_struct* t) +{ + cpu_entry_t *entry; + + if (t->rt_param.linked_on != NO_CPU) { + /* unlink */ + entry = &per_cpu(ghqedf_cpu_entries, t->rt_param.linked_on); + t->rt_param.linked_on = NO_CPU; + link_task_to_cpu(NULL, entry); + } +} + + +/* preempt - force a CPU to reschedule + */ +static noinline void preempt(cpu_entry_t *entry) +{ + if (smp_processor_id() == entry->cpu) + set_tsk_need_resched(current); + else + smp_send_reschedule(entry->cpu); +} + +/* requeue - Put an unlinked task into gsn-edf domain. + * Caller must hold ghqedf_lock. + * + * call unlocked, but with preemptions disabled! + */ +static noinline void requeue(struct task_struct* task) +{ + if (is_released(task, litmus_clock())) + add_to_ready_queue(task); + else + /* it has got to wait */ + add_release(&ghqedf, task); +} + + +static struct task_struct* take_if_preempt_required(cpu_entry_t* last) +{ + struct heap_node* tmp; + struct subqueue* q; + struct task_struct* t; + int preempt_required = 0; + + spin_lock(&master_lock); + tmp = heap_peek(subqueue_higher_prio, &master_queue); + BUG_ON(!tmp); /* should be there */ + q = tmp->value; + + spin_lock(&q->lock); + tmp = heap_peek(edf_ready_order, &q->queue); + t = tmp ? tmp->value : NULL; + preempt_required = edf_higher_prio(t, last->linked); + if (preempt_required) { + /* take it out */ + last = lowest_prio_cpu(1); + spin_unlock(&ghqedf_cpu_lock); + heap_delete(subqueue_higher_prio, &master_queue, q->hn); + } + /* drop lock master lock while we update subqueue */ + spin_unlock(&master_lock); + + if (preempt_required) { + heap_delete(edf_ready_order, &q->queue, tmp); + /* precompute, so that next lookup is O(1) */ + __update_top(q); + spin_unlock(&q->lock); + + /* re-insert with new priority */ + spin_lock(&master_lock); + /* update, with right locking order */ + update_top(q); + heap_insert(subqueue_higher_prio, &master_queue, q->hn); + spin_unlock(&master_lock); + + return t; + } else { + spin_unlock(&q->lock); + return NULL; + } +} + + +/* check for any necessary preemptions */ +static void check_for_preemptions(void) +{ + int done = 0; + unsigned long flags; + struct task_struct *task, *unlinked; + cpu_entry_t* last; + + + local_irq_save(flags); + while (!done) { + unlinked = NULL; + spin_lock(&ghqedf_cpu_lock); + last = lowest_prio_cpu(0); + if (likely(last)) { + task = take_if_preempt_required(last); + if (task) { + TRACE_TASK(task, "removed from ready Q\n"); + /* cpu lock was dropped, reacquire */ + spin_lock(&ghqedf_cpu_lock); + if (last->linked && !last->picked) + /* can be requeued by us */ + unlinked = last->linked; + TRACE("check_for_preemptions: " + "attempting to link task %d to %d\n", + task->pid, last->cpu); + link_task_to_cpu(task, last); + update_cpu_position(last); + } else + /* no preemption required */ + done = 1; + } else + /* all gone, being checked elsewhere? */ + done = 1; + spin_unlock(&ghqedf_cpu_lock); + if (unlinked) + /* stick it back into the queue */ + requeue(unlinked); + if (last && !done) + /* we have a preemption, send IPI */ + preempt(last); + } + TRACE("done with preemption checking\n"); + local_irq_restore(flags); +} + +/* ghqedf_job_arrival: task is either resumed or released + * call only unlocked! + */ +static noinline void ghqedf_job_arrival(struct task_struct* task) +{ + requeue(task); + check_for_preemptions(); +} + +static void ghqedf_release_jobs(rt_domain_t* rt, struct heap* tasks) +{ + unsigned long flags; + + TRACE("release_jobs() invoked\n"); + local_irq_save(flags); + /* insert unlocked */ + merge_into_ready_queue(tasks); + local_irq_restore(flags); + check_for_preemptions(); +} + +/* caller holds ghqedf_lock */ +static noinline int job_completion(cpu_entry_t* entry, int forced) +{ + + struct task_struct *t = entry->scheduled; + + sched_trace_task_completion(t, forced); + + TRACE_TASK(t, "job_completion().\n"); + + /* set flags */ + set_rt_flags(t, RT_F_SLEEP); + /* prepare for next period */ + prepare_for_next_period(t); + if (is_released(t, litmus_clock())) + sched_trace_task_release(t); + + + if (is_released(t, litmus_clock())){ + /* we changed the priority, see if we need to preempt */ + set_rt_flags(t, RT_F_RUNNING); + update_cpu_position(entry); + return 1; + } + else { + /* it has got to wait */ + unlink(t); + add_release(&ghqedf, t); + return 0; + } +} + +/* ghqedf_tick - this function is called for every local timer + * interrupt. + * + * checks whether the current task has expired and checks + * whether we need to preempt it if it has not expired + */ +static void ghqedf_tick(struct task_struct* t) +{ + if (is_realtime(t) && budget_exhausted(t)) + set_tsk_need_resched(t); +} + +static struct task_struct* ghqedf_schedule(struct task_struct * prev) +{ + cpu_entry_t* entry = &__get_cpu_var(ghqedf_cpu_entries); + int out_of_time, sleep, preempt, exists, blocks; + struct task_struct* next = NULL; + + /* Bail out early if we are the release master. + * The release master never schedules any real-time tasks. + */ + if (ghqedf.release_master == entry->cpu) + return NULL; + +// TRACE_TASK(prev, "invoked ghqedf_schedule.\n"); + + /* sanity checking */ + BUG_ON(entry->scheduled && entry->scheduled != prev); + BUG_ON(entry->scheduled && !is_realtime(prev)); + BUG_ON(is_realtime(prev) && !entry->scheduled); + + /* (0) Determine state */ + exists = entry->scheduled != NULL; + blocks = exists && !is_running(entry->scheduled); + out_of_time = exists && budget_exhausted(entry->scheduled); + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; + + spin_lock(&ghqedf_cpu_lock); + + preempt = entry->scheduled != entry->linked; + + if (exists) + TRACE_TASK(prev, + "blocks:%d out_of_time:%d sleep:%d preempt:%d " + "state:%d sig:%d\n", + blocks, out_of_time, sleep, preempt, + prev->state, signal_pending(prev)); + if (preempt && entry->linked) + TRACE_TASK(prev, "will be preempted by %s/%d\n", + entry->linked->comm, entry->linked->pid); + + /* If a task blocks we have no choice but to reschedule. + */ + if (blocks) + unlink(entry->scheduled); + + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. Don't do a job completion if we block (can't have timers + * running for blocked jobs). Preemptions go first for the same reason. + */ + if ((out_of_time || sleep) && !blocks && !preempt) { + if (job_completion(entry, !sleep)) { + /* Task might stay with us. + * Drop locks and check for preemptions. + */ + spin_unlock(&ghqedf_cpu_lock); + /* anything to update ? */ + check_for_preemptions(); + spin_lock(&ghqedf_cpu_lock); + /* if something higher priority got linked, + * then we need to add the task into the + * ready queue (since it wasn't added by + * check_for_preemptions b/c picked==1. + */ + if (entry->linked != prev) + add_to_ready_queue(prev); + } + } + + /* Link pending task if we became unlinked. + * NOTE: Do not hold locks while performing ready queue updates + * since we want concurrent access to the queue. + */ + if (!entry->linked) { + if (exists) + /* We are committed to descheduling; erase marker + * before we drop the lock. + */ + tsk_rt(prev)->scheduled_on = NO_CPU; + spin_unlock(&ghqedf_cpu_lock); + check_for_preemptions(); /* update links */ + spin_lock(&ghqedf_cpu_lock); + } + + /* The final scheduling decision. Do we need to switch for some reason? + * If linked is different from scheduled, then select linked as next. + */ + if (entry->linked != entry->scheduled) { + /* Schedule a linked job? */ + if (entry->linked) { + entry->linked->rt_param.scheduled_on = entry->cpu; + entry->picked = 1; + next = entry->linked; + } + if (entry->scheduled) + entry->scheduled->rt_param.scheduled_on = NO_CPU; + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + spin_unlock(&ghqedf_cpu_lock); + if (exists && preempt && !blocks) + /* stick preempted task back into the ready queue */ + ghqedf_job_arrival(prev); + + if (next) + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + else if (exists && !next) + TRACE("becomes idle at %llu.\n", litmus_clock()); + + return next; +} + + +/* _finish_switch - we just finished the switch away from prev + */ +static void ghqedf_finish_switch(struct task_struct *prev) +{ + cpu_entry_t* entry = &__get_cpu_var(ghqedf_cpu_entries); + + entry->scheduled = is_realtime(current) ? current : NULL; + TRACE_TASK(prev, "switched away from\n"); +} + + +/* Prepare a task for running in RT mode + */ +static void ghqedf_task_new(struct task_struct * t, int on_rq, int running) +{ + unsigned long flags; + cpu_entry_t* entry; + + TRACE("ghqedf: task new %d\n", t->pid); + + spin_lock_irqsave(&ghqedf_cpu_lock, flags); + + /* setup job params */ + release_at(t, litmus_clock()); + + if (running) { + entry = &per_cpu(ghqedf_cpu_entries, task_cpu(t)); + BUG_ON(entry->scheduled); + if (entry->cpu != ghqedf.release_master) { + entry->scheduled = t; + t->rt_param.scheduled_on = task_cpu(t); + } else { + preempt(entry); + tsk_rt(t)->scheduled_on = NO_CPU; + } + } else { + tsk_rt(t)->scheduled_on = NO_CPU; + } + tsk_rt(t)->linked_on = NO_CPU; + + spin_unlock_irqrestore(&ghqedf_cpu_lock, flags); + + if (!running || entry->cpu == ghqedf.release_master) + ghqedf_job_arrival(t); +} + +static void ghqedf_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + lt_t now; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + + spin_lock_irqsave(&ghqedf_cpu_lock, flags); + now = litmus_clock(); + if (is_tardy(task, now)) { + /* new sporadic release */ + release_at(task, now); + sched_trace_task_release(task); + } + spin_unlock_irqrestore(&ghqedf_cpu_lock, flags); + ghqedf_job_arrival(task); +} + +static void ghqedf_task_block(struct task_struct *t) +{ + TRACE_TASK(t, "block at %llu\n", litmus_clock()); +} + +static void ghqedf_task_exit(struct task_struct * t) +{ + unsigned long flags; + + /* unlink if necessary */ + spin_lock_irqsave(&ghqedf_cpu_lock, flags); + /* remove from CPU state, if necessary */ + unlink(t); + if (tsk_rt(t)->scheduled_on != NO_CPU) { + ghqedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; + tsk_rt(t)->scheduled_on = NO_CPU; + } else { + /* FIXME: If t is currently queued, then we need to + * dequeue it now; otherwise it will probably + * cause a crash once it is dequeued. + */ + TRACE_TASK(t, "called ghqedf_task_exit(), " + "but is not scheduled!\n"); + } + spin_unlock_irqrestore(&ghqedf_cpu_lock, flags); + + TRACE_TASK(t, "RIP\n"); +} + +static long ghqedf_admit_task(struct task_struct* tsk) +{ + return 0; +} + + +static long ghqedf_activate_plugin(void) +{ + int cpu; + cpu_entry_t *entry; + + heap_init(&ghqedf_cpu_heap); + subqueues_init(); + ghqedf.release_master = atomic_read(&release_master_cpu); + + for_each_online_cpu(cpu) { + entry = &per_cpu(ghqedf_cpu_entries, cpu); + heap_node_init(&entry->hn, entry); + entry->linked = NULL; + entry->scheduled = NULL; + entry->picked = 0; + if (cpu != ghqedf.release_master) { + TRACE("G-EDF: Initializing CPU #%d.\n", cpu); + update_cpu_position(entry); + } else { + TRACE("G-EDF: CPU %d is release master.\n", cpu); + } + } + return 0; +} + + +/* Plugin object */ +static struct sched_plugin ghqedf_plugin __cacheline_aligned_in_smp = { + .plugin_name = "GHQ-EDF", + .finish_switch = ghqedf_finish_switch, + .tick = ghqedf_tick, + .task_new = ghqedf_task_new, + .complete_job = complete_job, + .task_exit = ghqedf_task_exit, + .schedule = ghqedf_schedule, + .task_wake_up = ghqedf_task_wake_up, + .task_block = ghqedf_task_block, + .admit_task = ghqedf_admit_task, + .activate_plugin = ghqedf_activate_plugin, +}; + + +static int __init init_ghqedf(void) +{ + int cpu; + cpu_entry_t *entry; + + /* initialize CPU state */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + entry = &per_cpu(ghqedf_cpu_entries, cpu); + ghqedf_cpus[cpu] = entry; + entry->cpu = cpu; + entry->hn = &ghqedf_heap_node[cpu]; + heap_node_init(&entry->hn, entry); + } + edf_domain_init(&ghqedf, NULL, ghqedf_release_jobs); + return register_sched_plugin(&ghqedf_plugin); +} + + +module_init(init_ghqedf); diff --git a/litmus/sched_gq_edf.c b/litmus/sched_gq_edf.c new file mode 100644 index 0000000..7b6e8dd --- /dev/null +++ b/litmus/sched_gq_edf.c @@ -0,0 +1,606 @@ +/* A quantum-based implementation of G-EDF. + * + * Based on GSN-EDF. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +/* cpu_state_t - maintain the linked and scheduled state + */ +typedef struct { + int cpu; + struct task_struct* linked; /* only RT tasks */ + struct task_struct* scheduled; /* only RT tasks */ + struct task_struct* absentee; /* blocked quantum owner */ + struct heap_node* hn; +} cpu_state_t; +DEFINE_PER_CPU(cpu_state_t, gq_cpu_entries); + +cpu_state_t* gq_cpus[NR_CPUS]; + +/* the cpus queue themselves according to priority in here */ +static struct heap_node gq_heap_node[NR_CPUS]; +static struct heap gq_cpu_heap; +/* jobs to be merged at the beginning of the next quantum */ +static struct heap gq_released_heap; + + +static rt_domain_t gqedf; +#define gq_lock (gqedf.ready_lock) + +DEFINE_SPINLOCK(gq_release_lock); + +static void preempt(cpu_state_t *entry) +{ + if (smp_processor_id() == entry->cpu) + set_tsk_need_resched(current); + else + smp_send_reschedule(entry->cpu); +} + +static int cpu_lower_prio(struct heap_node *_a, struct heap_node *_b) +{ + cpu_state_t *a, *b; + a = _a->value; + b = _b->value; + /* Note that a and b are inverted: we want the lowest-priority CPU at + * the top of the heap. + */ + return edf_higher_prio(b->linked, a->linked); +} + +/* update_cpu_position - Move the cpu entry to the correct place to maintain + * order in the cpu queue. Caller must hold gqedf lock. + */ +static void update_cpu_position(cpu_state_t *entry) +{ + if (likely(heap_node_in_heap(entry->hn))) + heap_delete(cpu_lower_prio, &gq_cpu_heap, entry->hn); + heap_insert(cpu_lower_prio, &gq_cpu_heap, entry->hn); +} + +/* caller must hold gqedf lock */ +static cpu_state_t* lowest_prio_cpu(void) +{ + struct heap_node* hn; + hn = heap_peek(cpu_lower_prio, &gq_cpu_heap); + return hn->value; //hn ? hn->value : NULL; +} + +/* link_task_to_cpu - Update the link of a CPU. + * Handles the case where the to-be-linked task is already + * scheduled on a different CPU. + */ +static noinline void link_task_to_cpu(struct task_struct* linked, + cpu_state_t *entry) +{ + cpu_state_t *sched; + struct task_struct* tmp; + int on_cpu; + + BUG_ON(linked && !is_realtime(linked)); + /* don't relink tasks that are already linked */ + BUG_ON(linked && tsk_rt(linked)->linked_on != NO_CPU); + + /* Currently linked task is set to be unlinked. */ + if (entry->linked) { + entry->linked->rt_param.linked_on = NO_CPU; + } + + /* Link new task to CPU. */ + if (linked) { + set_rt_flags(linked, RT_F_RUNNING); + /* handle task is already scheduled somewhere! */ + on_cpu = linked->rt_param.scheduled_on; + if (on_cpu != NO_CPU) { + sched = &per_cpu(gq_cpu_entries, on_cpu); + /* this should only happen if not linked already */ + BUG_ON(sched->linked == linked); + + /* If we are already scheduled on the CPU to which we + * wanted to link, we don't need to do the swap -- + * we just link ourselves to the CPU and depend on + * the caller to get things right. + */ + if (entry != sched) { + TRACE_TASK(linked, + "already scheduled on %d, updating link.\n", + sched->cpu); + tmp = sched->linked; + linked->rt_param.linked_on = sched->cpu; + sched->linked = linked; + update_cpu_position(sched); + linked = tmp; + } + } + if (linked) /* might be NULL due to swap */ + linked->rt_param.linked_on = entry->cpu; + } + entry->linked = linked; + if (linked) + TRACE_TASK(linked, "linked to %d.\n", entry->cpu); + else + TRACE("NULL linked to %d.\n", entry->cpu); + update_cpu_position(entry); +} + +/* unlink - Make sure a task is not linked any longer to an entry + * where it was linked before. Must hold gq_lock. + */ +static noinline void unlink(struct task_struct* t) +{ + cpu_state_t *entry; + + if (unlikely(!t)) { + TRACE_BUG_ON(!t); + return; + } + + if (t->rt_param.linked_on != NO_CPU) { + /* unlink */ + entry = &per_cpu(gq_cpu_entries, t->rt_param.linked_on); + t->rt_param.linked_on = NO_CPU; + link_task_to_cpu(NULL, entry); + } else if (is_queued(t)) { + /* This is an interesting situation: t is scheduled, + * but was just recently unlinked. It cannot be + * linked anywhere else (because then it would have + * been relinked to this CPU), thus it must be in some + * queue. We must remove it from the list in this + * case. + */ + TRACE_TASK(t, "unlink() -> remove()\n"); + remove(&gqedf, t); + } +} + + +/* requeue - Put an unlinked task into gsn-edf domain. + * Caller must hold gq_lock. + */ +static noinline void requeue(struct task_struct* task) +{ + BUG_ON(!task); + /* sanity check before insertion */ + BUG_ON(is_queued(task)); + + if (is_released(task, litmus_clock())) + __add_ready(&gqedf, task); + else + /* it has got to wait */ + add_release(&gqedf, task); +} + +/* check for any necessary preemptions */ +static void link_highest_priority_jobs(void) +{ + struct task_struct *task; + cpu_state_t* last; + + for(last = lowest_prio_cpu(); +// last && + edf_preemption_needed(&gqedf, last->linked); + last = lowest_prio_cpu()) { + TRACE("last cpu:%d linked:%s/%d preempt:%d\n", + last->cpu, + last->linked ? last->linked->comm : "---", + last->linked ? last->linked->pid : 0, + edf_preemption_needed(&gqedf, last->linked)); + /* preemption necessary */ + task = __take_ready(&gqedf); + TRACE("attempting to link task %d to %d at %llu\n", + task->pid, last->cpu, litmus_clock()); + if (last->linked) { + TRACE_TASK(last->linked, "requeued.\n"); + requeue(last->linked); + } + link_task_to_cpu(task, last); + } +} + +/* caller holds gq_lock */ +static void job_completion(struct task_struct *t, int forced) +{ + + sched_trace_task_completion(t, forced); + + TRACE_TASK(t, "job_completion(forced=%d), state:%d\n", forced, + t->state); + + /* prepare for next period */ + prepare_for_next_period(t); + if (is_released(t, litmus_clock())) + sched_trace_task_release(t); + /* unlink */ + unlink(t); + /* requeue + * But don't requeue a blocking task. */ + if (is_running(t)) + requeue(t); + else + TRACE_TASK(t, "job_completion(): not requeued, is not running. " + "state:%d\n", t->state); +} + + +static void gq_add_released_queue(struct task_struct *t) +{ + spin_lock(&gq_release_lock); + heap_insert(edf_ready_order, &gq_released_heap, tsk_rt(t)->heap_node); + spin_unlock(&gq_release_lock); +} + +/* caller holds gq_lock, irqs are disabled */ +static void merge_released_queue(void) +{ +#ifdef CONFIG_SCHED_DEBUG_TRACE + struct heap_node* hn; + struct task_struct* t; +#endif + + spin_lock(&gq_release_lock); + +#ifdef CONFIG_SCHED_DEBUG_TRACE + /* do it individually (= slooow) + * so that we can trace each merge + */ + + + while ((hn = heap_take(edf_ready_order, &gq_released_heap))) { + t = (struct task_struct*) hn->value; + TRACE_TASK(t, "merged into ready queue (is_released:%d)\n", + is_released(t, litmus_clock())); + __add_ready(&gqedf, t); + } +#else + __merge_ready(&gqedf, &gq_released_heap); +#endif + + spin_unlock(&gq_release_lock); +} + +/* gq_tick - this function is called for every local timer + * interrupt. + * + * checks whether the current task has expired and checks + * whether we need to preempt it if it has not expired + */ +static void gq_tick(struct task_struct* t) +{ + unsigned long flags; + cpu_state_t* entry; + + spin_lock_irqsave(&gq_lock, flags); + entry = &__get_cpu_var(gq_cpu_entries); + entry->absentee = NULL; + + merge_released_queue(); + /* update linked if required */ + link_highest_priority_jobs(); + + if (entry->linked != entry->scheduled || + (is_realtime(t) && budget_exhausted(t))) + /* let's reschedule */ + set_tsk_need_resched(t); + + spin_unlock_irqrestore(&gq_lock, flags); +} + +static struct task_struct* gq_schedule(struct task_struct * prev) +{ + cpu_state_t* entry = &__get_cpu_var(gq_cpu_entries); + int sleep, preempt, exists, blocks, out_of_time; + struct task_struct* next = NULL; + + /* Bail out early if we are the release master. + * The release master never schedules any real-time tasks. + */ + if (gqedf.release_master == entry->cpu) + return NULL; + + spin_lock(&gq_lock); + + /* sanity checking */ + BUG_ON(entry->scheduled && entry->scheduled != prev); + BUG_ON(entry->scheduled && !is_realtime(prev)); + BUG_ON(is_realtime(prev) && !entry->scheduled); + BUG_ON(entry->scheduled && tsk_rt(entry->scheduled)->scheduled_on != entry->cpu); + BUG_ON(entry->scheduled && tsk_rt(entry->scheduled)->scheduled_on != entry->cpu); + + /* Determine state */ + exists = entry->scheduled != NULL; + blocks = exists && !is_running(entry->scheduled); + out_of_time = exists && budget_exhausted(entry->scheduled); + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; + preempt = entry->scheduled != entry->linked; + + BUG_ON(blocks && sleep); + + TRACE_TASK(prev, "invoked gq_schedule.\n"); + + if (exists) + TRACE_TASK(prev, + "blocks:%d sleep:%d preempt:%d " + "state:%d sig:%d out_of_time:%d\n", + blocks, sleep, preempt, + prev->state, signal_pending(prev), + out_of_time); + if (entry->linked && preempt) + TRACE_TASK(prev, "will be preempted by %s/%d\n", + entry->linked->comm, entry->linked->pid); + + + if (blocks) { + /* Record task identity so that the task + * can be rescheduled when it resumes, + * but only do so when prev has not been + * preempted anyway. + */ + if (!preempt) { + entry->absentee = prev; + tsk_rt(prev)->last_cpu = entry->cpu; + } + unlink(entry->scheduled); + } + + if (sleep || out_of_time) + job_completion(entry->scheduled, !sleep); + + /* The final scheduling decision. Do we need to switch for some reason? + * If linked is different from scheduled, then select linked as next. + */ + TRACE("gq: linked=%p scheduled=%p\n", entry->linked, entry->scheduled); + if (entry->linked != entry->scheduled) { + /* Schedule a linked job? */ + if (entry->linked) { + entry->linked->rt_param.scheduled_on = entry->cpu; + next = entry->linked; + } + if (entry->scheduled) { + /* kick this task off the CPU */ + entry->scheduled->rt_param.scheduled_on = NO_CPU; + TRACE_TASK(entry->scheduled, "scheduled_on <- NO_CPU\n"); + } + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + spin_unlock(&gq_lock); + + TRACE("gq_lock released, next=0x%p\n", next); + + + if (next) + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + else if (exists && !next) + TRACE("becomes idle at %llu.\n", litmus_clock()); + + + return next; +} + + +/* _finish_switch - we just finished the switch away from prev + */ +static void gq_finish_switch(struct task_struct *prev) +{ + cpu_state_t* entry = &__get_cpu_var(gq_cpu_entries); + + entry->scheduled = is_realtime(current) ? current : NULL; + TRACE_TASK(prev, "switched away from\n"); +} + + +/* Prepare a task for running in RT mode + */ +static void gq_task_new(struct task_struct * t, int on_rq, int running) +{ + unsigned long flags; + cpu_state_t* entry = NULL; + int on_rm = 0; + + spin_lock_irqsave(&gq_lock, flags); + + /* setup job params */ + release_at(t, litmus_clock()); + + if (running) { + entry = &per_cpu(gq_cpu_entries, task_cpu(t)); + BUG_ON(entry->scheduled); + on_rm = entry->cpu == gqedf.release_master; + } + + TRACE_TASK(t, "gq edf: task new (running:%d on_rm:%d)\n", + running, on_rm); + + if (running && on_rm) + preempt(entry); + + if (running && !on_rm) { + /* just leave it where it is, CPU was real-time idle */ + tsk_rt(t)->scheduled_on = task_cpu(t); + tsk_rt(t)->linked_on = task_cpu(t); + entry->scheduled = t; + if (entry->linked != NULL) { + /* Something raced and got assigned here. + * Kick it back into the queue, since t is + * already executing. + */ + tsk_rt(entry->linked)->linked_on = NO_CPU; + __add_ready(&gqedf, entry->linked); + } + entry->linked = t; + } + + if (!running || on_rm) { + /* should be released properly */ + tsk_rt(t)->scheduled_on = NO_CPU; + tsk_rt(t)->linked_on = NO_CPU; + gq_add_released_queue(t); + } + + spin_unlock_irqrestore(&gq_lock, flags); +} + +static void gq_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + cpu_state_t* entry; + lt_t now; + + spin_lock_irqsave(&gq_lock, flags); + + now = litmus_clock(); + if (is_tardy(task, now)) { + TRACE_TASK(task, "wake_up => new release\n"); + /* Since task came back after its deadline, we + * assume that resuming indidates a new job release. + */ + release_at(task, now); + sched_trace_task_release(task); + gq_add_released_queue(task); + } else if (is_released(task, now)) { + set_rt_flags(task, RT_F_RUNNING); + entry = &per_cpu(gq_cpu_entries, tsk_rt(task)->last_cpu); + /* check if task is still the quantum owner on its CPU */ + if (entry->linked == NULL && entry->absentee == task) { + TRACE_TASK(task, "wake_up => is quantum owner\n"); + /* can resume right away */ + link_task_to_cpu(task, entry); + if (entry->scheduled != task) + preempt(entry); + } else { + /* becomes eligible at next quantum */ + TRACE_TASK(task, "wake_up => released_queue\n"); + gq_add_released_queue(task); + } + } else { + /* last suspension occurred together with a + * job completion + */ + TRACE_TASK(task, "wake_up => not yet released!\n"); + requeue(task); + } + spin_unlock_irqrestore(&gq_lock, flags); +} + +static void gq_task_block(struct task_struct *t) +{ + TRACE_TASK(t, "block at %llu\n", litmus_clock()); +} + + +static void gq_task_exit(struct task_struct * t) +{ + unsigned long flags; + + /* unlink if necessary */ + spin_lock_irqsave(&gq_lock, flags); + unlink(t); + if (tsk_rt(t)->scheduled_on != NO_CPU) { + gq_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; + tsk_rt(t)->scheduled_on = NO_CPU; + } + spin_unlock_irqrestore(&gq_lock, flags); + + BUG_ON(!is_realtime(t)); + TRACE_TASK(t, "RIP\n"); +} + + + +static void gq_release_jobs(rt_domain_t* rt, struct heap* tasks) +{ + unsigned long flags; + + spin_lock_irqsave(&gq_release_lock, flags); + TRACE("gq_release_jobs() at %llu\n", litmus_clock()); + + /* save tasks to queue so that they can be merged on next quantum */ + heap_union(edf_ready_order, &gq_released_heap, tasks); + + spin_unlock_irqrestore(&gq_release_lock, flags); +} + +static long gq_admit_task(struct task_struct* tsk) +{ + return 0; +} + + +static long gq_activate_plugin(void) +{ + int cpu; + cpu_state_t *entry; + + heap_init(&gq_cpu_heap); + heap_init(&gq_released_heap); + gqedf.release_master = atomic_read(&release_master_cpu); + + + for_each_online_cpu(cpu) { + entry = &per_cpu(gq_cpu_entries, cpu); + heap_node_init(&entry->hn, entry); + entry->linked = NULL; + entry->scheduled = NULL; + entry->absentee = NULL; + if (cpu != gqedf.release_master) { + TRACE("GQ-EDF: Initializing CPU #%d.\n", cpu); + update_cpu_position(entry); + } else { + TRACE("GQ-EDF: CPU %d is release master.\n", cpu); + } + } + return 0; +} + +/* Plugin object */ +static struct sched_plugin gq_edf_plugin __cacheline_aligned_in_smp = { + .plugin_name = "GQ-EDF", + .finish_switch = gq_finish_switch, + .tick = gq_tick, + .task_new = gq_task_new, + .complete_job = complete_job, + .task_exit = gq_task_exit, + .schedule = gq_schedule, + .task_wake_up = gq_task_wake_up, + .task_block = gq_task_block, + .admit_task = gq_admit_task, + .activate_plugin = gq_activate_plugin, +}; + + +static int __init init_gq_edf(void) +{ + int cpu; + cpu_state_t *entry; + + /* initialize CPU state */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + entry = &per_cpu(gq_cpu_entries, cpu); + gq_cpus[cpu] = entry; + entry->cpu = cpu; + entry->hn = &gq_heap_node[cpu]; + heap_node_init(&entry->hn, entry); + } + edf_domain_init(&gqedf, NULL, gq_release_jobs); + return register_sched_plugin(&gq_edf_plugin); +} + + +module_init(init_gq_edf);