#include <linux/spinlock.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/sort.h>
#include <linux/module.h>

#include <litmus/debug_trace.h>
#include <litmus/litmus.h>
#include <litmus/jobs.h>
#include <litmus/sched_plugin.h>
#include <litmus/edf_common.h>
#include <litmus/sched_trace.h>
#include <litmus/trace.h>
#include <litmus/rt_param.h>

#include <litmus/preempt.h>
#include <litmus/budget.h>
#include <litmus/np.h>

#include <litmus/bheap.h>

/* to set up domain/cpu mappings */
#include <litmus/litmus_proc.h>

typedef struct cont_domain {
	rt_domain_t domain;
	struct task_struct *container;
	struct task_struct *scheduled; //fixed task
	lt_t scheduled_last_exec_time; //exec_time of the scheduled task when it was last scheduled
	u64 f_util;
	struct bheap_node *hn;
	struct hrtimer idle_enforcement_timer;
	int timer_armed;
} cont_domain_t;

typedef struct {
	int 			cpu;
	struct task_struct	*linked;
	struct task_struct	*scheduled; //container or migrating task
	atomic_t		will_schedule;
	/* This heap node should never be NULL. This will be in edfsc_cpu_heap when
	 * extra capacity is available on this CPU. Aka, when this CPU's container
	 * is not fully provisioned.
	 */
	struct bheap_node	*hn;
} cpu_entry_t;

struct list_head pending_adds;

struct list_head migrating_tasks;

struct list_head pending_removes;

struct hrtimer container_release_timer;

DEFINE_PER_CPU(cpu_entry_t, edfsc_cpu_entries);

struct task_struct* container_tasks;

static cont_domain_t* container_domains;

static cont_domain_t** container_list;

static rt_domain_t gsched_domain;
#define g_lock (gsched_domain.ready_lock)

#define CONTAINER_PERIOD 50000000

u64 m_util;
u64 sys_util;

//only true when container_boundary needs to perform stabilization
int sys_changed;

#define is_container(task) ((task) && tsk_rt(task)->edfsc_params.domain != NULL && tsk_rt(task)->domain == &gsched_domain)
#define is_fixed(task) ((task) && tsk_rt(task)->domain && tsk_rt(task)->domain != &gsched_domain)
#define is_migrating(task) ((task) && tsk_rt(task)->edfsc_params.domain == NULL && tsk_rt(task)->domain == &gsched_domain)

#define FP_SHIFT 20
#define to_fp(a) ((a) << FP_SHIFT)
#define from_fp(a) ((a) >> FP_SHIFT)
#define fp_div(a, b) (to_fp((a)) / (b))

// We need these from litmus.c for partially initializing our container tasks
struct release_heap* release_heap_alloc(int gfp_flags);
void release_heap_free(struct release_heap* rh);
struct bheap_node* bheap_node_alloc(int gfp_flags);
void bheap_node_free(struct bheap_node* hn);

int count_migrating_tasks(void)
{
	int task_count = 0;
	struct list_head *pos;
	unsigned long flags;

	raw_spin_lock_irqsave(&g_lock, flags);

	list_for_each(pos, &migrating_tasks) {
		task_count++;
	}

	raw_spin_unlock_irqrestore(&g_lock, flags);
	return task_count;
}

/* Do a backwards comparison based on f_util so that heavier containers
 * will come first
 */
// Used for best-fit
static int container_lower_prio(const void *_a, const void *_b)
{
	const cont_domain_t* a = *(const cont_domain_t**)(_a);
	const cont_domain_t* b = *(const cont_domain_t**)(_b);
	return (b->f_util - a->f_util);
}

// Used for worst-fit
static int container_higher_prio(const void *_a, const void *_b)
{
	const cont_domain_t* a = *(const cont_domain_t**)(_a);
	const cont_domain_t* b = *(const cont_domain_t**)(_b);
	return (a->f_util - b->f_util);
}

/* Finds the task_struct of a list node
 */
static struct task_struct* task_of_list_node(struct list_head *node)
{
	return container_of(node, struct task_struct, edfsc_qnode);
}

/* Requeues task in the domain recorded in its edfsc_params
 */
static noinline void requeue(struct task_struct* task)
{
	BUG_ON(!task);
	/* sanity check before insertion */
	BUG_ON(is_queued(task));
	BUG_ON(is_migrating(task) && task->rt_param.edfsc_params.container_task != NULL);
	//BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
	BUG_ON(budget_enforced(task) && budget_exhausted(task));
	//BUG_ON(is_container(task) && ((cont_domain_t*)task->rt_param.edfsc_params.domain)->timer_armed);
	//BUG_ON(task && is_completed(task));

	if (is_early_releasing(task) || is_released(task, litmus_clock())) {
		__add_ready((rt_domain_t *) tsk_rt(task)->domain, task);
	} else {
		/* it has got to wait */
		add_release((rt_domain_t *) tsk_rt(task)->domain, task);
	}
}

/**
 * Preempt and litmus_reschedule() according to our understanding of the CPU state.
 *
 * @note Correctly preempts fixed task if entry->scheduled is a container
 * @param entry CPU state per EDF-SC. entry->scheduled may be NULL.
 */
static void preempt(cpu_entry_t *entry)
{
	BUG_ON(!entry);
	if (is_container(entry->scheduled)) {
		preempt_if_preemptable(tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled, entry->cpu);
	}
	else {
		preempt_if_preemptable(entry->scheduled, entry->cpu);
	}
}

/////////////////////////////////////////////////////////////////////////////////////
/*
 *
 * CPU ORDERING
 *
 */

static struct bheap_node* edfsc_cpu_heap_node; // Array of cpu heap nodes
static struct bheap edfsc_cpu_heap; // Cpu heap

static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
{
	cpu_entry_t *a, *b;
	a = _a->value;
	b = _b->value;
	/* Note that a and b are inverted: we want the lowest-priority CPU at
	 * the top of the heap.
	 */
	return edf_higher_prio(b->linked, a->linked);
}

/* caller must hold g_lock */
static cpu_entry_t* lowest_prio_cpu(void)
{
	struct bheap_node* hn;
	BUG_ON(!raw_spin_is_locked(&g_lock));
	hn = bheap_peek(cpu_lower_prio, &edfsc_cpu_heap);
	return hn->value;
}

static void remove_cpu_from_global(cpu_entry_t *entry)
{
	BUG_ON(!raw_spin_is_locked(&g_lock));
	// This disconnects the node and sets entry->hn->degree = NOT_IN_HEAP
	bheap_delete(cpu_lower_prio, &edfsc_cpu_heap, entry->hn);
}

static void add_cpu_to_global(cpu_entry_t *entry)
{
	BUG_ON(!raw_spin_is_locked(&g_lock));
	BUG_ON(bheap_node_in_heap(entry->hn));
	bheap_insert(cpu_lower_prio, &edfsc_cpu_heap, entry->hn);
}

/* update_cpu_position - Move the cpu entry to the correct place to maintain
 *					   order in the cpu queue. Caller must hold g_lock.
 */
static void update_cpu_position(cpu_entry_t *entry)
{
	if (likely(bheap_node_in_heap(entry->hn))) {
		remove_cpu_from_global(entry);
		add_cpu_to_global(entry);
	}
}

///////////////////////////////////////////////////////////////////////////////////////
/*
 *
 * IDLE CONTAINER BUDGET ENFORCEMENT
 *
 */

// updates exec_time for container budget tracking
static void update_container_budget(struct task_struct* t) {
	lt_t now = litmus_clock();
	tsk_rt(t)->job_params.exec_time += now
		- tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time;
	tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = now;
}

// timeout for timer enforcing budget of empty container
static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer)
{
	cont_domain_t* domain = container_of(timer, cont_domain_t, idle_enforcement_timer);

	unsigned long flags;

	local_irq_save(flags);
	BUG_ON(tsk_rt(domain->container)->edfsc_params.id != this_cpu_ptr(&edfsc_cpu_entries)->cpu);
	domain->timer_armed = 0;
	tsk_rt(domain->container)->completed = 1;
	litmus_reschedule_local();
	local_irq_restore(flags);

	return HRTIMER_NORESTART;
}

void manage_idle_enforcement_timer(struct task_struct* t)
{
	lt_t now;

	cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain;
	now = litmus_clock();
	BUG_ON(is_completed(t));
	BUG_ON(budget_exhausted(t) && !is_np(t));

	if (!domain->timer_armed) {
		domain->scheduled_last_exec_time = now;
		//hrtimer_start cancels the timer so don't have to check
		//if it is already armed
		hrtimer_start(&(domain->idle_enforcement_timer),
				ns_to_ktime(now + budget_remaining(t)),
				HRTIMER_MODE_ABS_PINNED);
		domain->timer_armed = 1;
	}
}

void cancel_idle_enforcement_timer(struct task_struct* t)
{
	cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain;
	hrtimer_try_to_cancel(&(domain->idle_enforcement_timer));
	domain->timer_armed = 0;
}

/* link_task_to_cpu - Links a migrating task or container to a CPU
 *					  Update the link of a CPU.
 */
static noinline void link_task_to_cpu(struct task_struct* linked,
					  cpu_entry_t *entry)
{
	BUG_ON(is_fixed(linked));
	BUG_ON(is_container(linked) && tsk_rt(linked)->edfsc_params.id != entry->cpu);
	BUG_ON(linked && is_queued(linked));
	//BUG_ON(linked && ((budget_enforced(linked) && budget_exhausted(linked)) || is_completed(linked)));
	BUG_ON(linked && !is_released(linked, litmus_clock()));
	//BUG_ON(is_container(linked) && linked->rt_param.edfsc_params.domain->timer_armed);

	/* Currently linked task is set to be unlinked. */
	if (entry->linked && entry->linked->rt_param.linked_on == entry->cpu)
		entry->linked->rt_param.linked_on = NO_CPU;

	/* Link new task to CPU. */
	if (linked)
		linked->rt_param.linked_on = entry->cpu;

	entry->linked = linked;
	BUG_ON(entry->linked && entry->linked->rt_param.linked_on != entry->cpu);
#ifdef WANT_ALL_SCHED_EVENTS
	if (linked)
		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
	else
		TRACE("NULL linked to %d.\n", entry->cpu);
#endif
	update_cpu_position(entry);
}

/* unlink - Make sure a task is not linked any longer to an entry
 *		  where it was linked before. Must hold g_lock.
 */
static noinline void unlink(struct task_struct* t)
{
	cpu_entry_t *entry;
	BUG_ON(!t);

	if (t->rt_param.linked_on != NO_CPU) {
		/* unlink */
		entry = &per_cpu(edfsc_cpu_entries, t->rt_param.linked_on);
		BUG_ON(entry->cpu != t->rt_param.linked_on);
		t->rt_param.linked_on = NO_CPU;
		link_task_to_cpu(NULL, entry);
		BUG_ON(entry->linked || t->rt_param.linked_on != NO_CPU);
	} else if (is_queued(t)) {
		/* This is an interesting situation: t is scheduled,
		 * but was just recently unlinked.  It cannot be
		 * linked anywhere else (because then it would have
		 * been relinked to this CPU), thus it must be in some
		 * queue. We must remove it from the list in this
		 * case.
		 */
		remove(&gsched_domain, t);
	}
}

//TODO change local linking
static void g_preempt_check(void)
{
	struct task_struct *task, *temp;
	cpu_entry_t *last, *target;

	if (!bheap_peek(cpu_lower_prio, &edfsc_cpu_heap))
		return;

	// Loop through CPUs in priority order, checking if anything needs preemption
	for (last = lowest_prio_cpu();
		edf_preemption_needed(&gsched_domain, last->linked);
		last = lowest_prio_cpu()) {
		target = last;

		/* preemption necessary */
		task = __take_ready(&gsched_domain);
		// Preempt_check can be called before gschedule, and therefore g_job_completion.
		// So, a task can be temporarily added to the ready queue, but will quickly be rectified
		// by either this, or g_job_completion
		if (requeue_preempted_job(task)) {
			// Update container budget tracking
			if (is_container(task)) {
				last = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id);
			}
			else if (is_container(last->linked)) {
				if (tsk_rt(last->linked)->edfsc_params.domain->timer_armed) {
					update_container_budget(last->linked);
				}
			}
			if (requeue_preempted_job(last->linked)) {
				requeue(last->linked);
			}
			TRACE("g_preempt_check: attempting to link task %d to %d\n",
				task->pid, target->cpu);
			link_task_to_cpu(task, last);
			preempt(last);
		}
	}
}

static int c_preempt_check(cont_domain_t *container)
{
	if ((is_migrating(container->scheduled) && __peek_ready(&container->domain))
			|| edf_preemption_needed(&container->domain, container->scheduled)) {
		preempt(&per_cpu(edfsc_cpu_entries, tsk_rt(container->container)->edfsc_params.id));
		return 1;
	} else {
		return 0;
	}
}

// Callback for new global job release
static void g_release_jobs(rt_domain_t* rt, struct bheap* tasks)
{
	unsigned long flags;

	raw_spin_lock_irqsave(&g_lock, flags);

	__merge_ready(rt, tasks);
	g_preempt_check();

	raw_spin_unlock_irqrestore(&g_lock, flags);
}

// Callback for new container release
static int c_check_resched(rt_domain_t *edf)
{
	cont_domain_t *cont_dom = container_of(edf, cont_domain_t, domain);
	/* because this is a callback from rt_domain_t we already hold
	 * the necessary lock for the ready queue
	 */
	return c_preempt_check(cont_dom);
}

static void g_remove_task(struct task_struct *t)
{
	BUG_ON(is_container(t));
	//BUG_ON(get_rt_utilization(t) > m_util);
	m_util -= get_rt_utilization(t);
	sys_util -= get_rt_utilization(t);
	sys_changed = 1;
}

static void c_remove_task(struct task_struct *t)
{
	//BUG_ON(get_rt_utilization(t) > container_domains[tsk_rt(t)->task_params.cpu].f_util);
	container_domains[tsk_rt(t)->task_params.cpu].f_util -= get_rt_utilization(t);
	sys_util -= get_rt_utilization(t);
	sys_changed = 1;
}

/**
 * Remove a task from it's current domain and put it in a different domain.
 * Must be called at the greater time of job completion and deadline to respect
 * EDF-sc invariants. Can only go from migrating to fixed task.
 */
static void migrate_task(struct task_struct *t)
{
	BUG_ON(!t);
	BUG_ON(is_container(t) || is_fixed(t));
	BUG_ON(!tsk_rt(t)->edfsc_params.move_to);

	if (is_queued(t))
		remove(tsk_rt(t)->domain, t);
	// Remove the util of the "fake reservation task"(specified by the paper) from the system
	sys_util -= get_rt_utilization(t);
	m_util -= get_rt_utilization(t);
	tsk_rt(t)->domain = (rt_domain_t*)tsk_rt(t)->edfsc_params.move_to;
	tsk_rt(t)->edfsc_params.container_task = tsk_rt(t)->edfsc_params.move_to->container;
	requeue(t);
	c_preempt_check((cont_domain_t*)tsk_rt(t)->domain);
	tsk_rt(t)->edfsc_params.move_to = NULL;
	sys_changed = 1;
}

/**
 * Release a container and take it's core out of availability if it's a fully
 * provisioned container
 * Note: This is shared by container_boundary() and g_task_completion().
 */
static void c_release(struct task_struct *t) {
	cpu_entry_t* entry;

	BUG_ON(!is_container(t));
	BUG_ON(t->rt_param.edfsc_params.domain->timer_armed);

	entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id);
	tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t));
	prepare_for_next_period(t);
	if (is_early_releasing(t) || is_released(t, litmus_clock()))
		sched_trace_task_release(t);
	/* If this container is fully provisioned, remove it from gsched_domain,
	 * edfsc_cpu_heap, and disable the idle enforcement timer. If not, restore.
	 */
	if (get_rt_utilization(t) == to_fp(1)) {
		// Make this cpu unavailable to the global scheduler
		if (bheap_node_in_heap(entry->hn))
			remove_cpu_from_global(entry);
		// Note that we no longer need the global scheduler to schedule us
		if (is_queued(t)) {
			remove(&gsched_domain, t);
		}
		// Fully provisioned containers always run, so just set this here
		if (entry->linked != t) {
			BUG_ON(is_container(entry->linked));
			if (requeue_preempted_job(entry->linked)) {
				requeue(entry->linked);
			}
			link_task_to_cpu(t, entry);
		}
		tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = litmus_clock();
		// Run schedule again to make sure that we're run
		preempt(entry);
	} else {
		// Make our cpu available again
		if (!bheap_node_in_heap(entry->hn))
			add_cpu_to_global(entry);
		// Note that container's aren't real tasks and thus can't block
		unlink(t);
		// Request to be scheduled globally again
		requeue(t);
	}
}

// migrating or container task job_completion, called from edfsc_gschedule
// g_lock must already be held
static noinline void g_job_completion(struct task_struct* t, int forced)
{
	BUG_ON(!t);
	sched_trace_task_completion(t, forced);

	TRACE_TASK(t, "g_job_completion(forced=%d).\n", forced);

	unlink(t);
	tsk_rt(t)->completed = 0;

	// When a migrating task is being turned turned into a fixed task
	if (is_migrating(t) && tsk_rt(t)->edfsc_params.move_to) {
		prepare_for_next_period(t);
		if (is_early_releasing(t) || is_released(t, litmus_clock()))
			sched_trace_task_release(t);
		if (tsk_rt(t)->job_params.lateness > 0) {
			// Don't wait if prev job was tardy
			migrate_task(t);
		} else {
			list_add(&t->edfsc_qnode, &pending_removes);
			hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(get_deadline(t)),
						HRTIMER_MODE_ABS_PINNED);
		}
	// When a migrating job finishes
	} else if (is_migrating(t)) {
		/* prepare for next period */
		prepare_for_next_period(t);
		if (is_early_releasing(t) || is_released(t, litmus_clock()))
			sched_trace_task_release(t);
		// requeue, but don't requeue a blocking task
		if (is_current_running()) {
			requeue(t);
			g_preempt_check();
		}
	} else if (is_container(t)) {
		// When a container job finishes late, release it immediately
		if (get_deadline(t) < litmus_clock()) {
			c_release(t);
			g_preempt_check();
			if (get_rt_utilization(t) == to_fp(1))
				manage_idle_enforcement_timer(t);
		}
		else {
			tsk_rt(t)->completed = 1;
		}
	}
}

// fixed task job_completion, called from edfsc_cschedule
static void c_job_completion(struct task_struct* t, int forced)
{
	sched_trace_task_completion(t, forced);
	TRACE_TASK(t, "c_job_completion(forced=%d).\n", forced);

	tsk_rt(t)->completed = 0;
	prepare_for_next_period(t);
	requeue(t);
}

// need to update cpu entries after global scheduling
// As long as this only touches CPU-local state, it shouldn't need g_lock:
static void g_finish_switch(struct task_struct *prev)
{
	unsigned long flags;
	cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries);
	struct task_struct* container = &container_tasks[entry->cpu];
	raw_spin_lock_irqsave(&g_lock, flags);
	entry->scheduled = is_realtime(current) ? current : NULL;
	// If we're scheduling a task in a container, set entry->scheduled to the container
	if (entry->scheduled) {
		if (entry->scheduled->rt_param.edfsc_params.container_task) {
			entry->scheduled = entry->scheduled->rt_param.edfsc_params.container_task;
		}
	}
	// occurs when current is non-rt, and linked is a container
	// this happens when an empty container "task" is supposed to be current
	// but because it's not a real task, a non-rt task is current instead
	else if (tsk_rt(container)->scheduled_on != NO_CPU){
		entry->scheduled = container;
	}

	raw_spin_unlock_irqrestore(&g_lock, flags);
#ifdef WANT_ALL_SCHED_EVENTS
	TRACE_TASK(prev, "switched away from\n");
#endif
}

static int fifo_prio(struct bheap_node* _a, struct bheap_node* _b)
{
	return 0;
}

/**
 * Schedule inside of a container domain
 * Called with g_lock already held
 * @param cedf Pointer to tsk_rt(container)->edfsc_params->domain
 * @param prev Previous task running on this processor before schedule was called
 */
static noinline void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
{
	rt_domain_t *edf = &cedf->domain;

	struct task_struct* next;
	struct task_struct* other_t;
	struct bheap temp;
	cpu_entry_t *this_entry, *other_entry;
	int 		out_of_time, sleep, preempt,
				np, exists, blocks, resched;
	// XXX: The scheduler we copied this from also used `cont_out_of_time`. Is
	//      there some logic that we should have left that needs this?

	/* sanity checking
	 * differently from gedf, when a task exits (dead)
	 * cedf->schedule may be null and prev _is_ realtime
	 */
	//BUG_ON(cedf->scheduled && cedf->scheduled != prev && is_realtime(prev));
	BUG_ON(cedf->scheduled && !is_realtime(cedf->scheduled));

	/* (0) Determine state */
	exists		= cedf->scheduled != NULL;
	blocks		= exists && current == cedf->scheduled && !is_current_running();
	out_of_time	= exists && budget_enforced(cedf->scheduled)
				         && budget_exhausted(cedf->scheduled);
	np  		= exists && is_np(cedf->scheduled);
	sleep		= exists && is_completed(cedf->scheduled);
	preempt		= (is_migrating(cedf->scheduled) && __peek_ready(edf)) ||
					edf_preemption_needed(edf, cedf->scheduled);

	/* If we need to preempt do so.
	 * The following checks set resched to 1 in case of special
	 * circumstances.
	 */
	resched = preempt;

	/* Request a sys_exit_np() call if we would like to preempt but cannot.
	 * Multiple calls to request_exit_np() don't hurt.
	 */
	if (np && (out_of_time || preempt || sleep))
		request_exit_np(cedf->scheduled);

	/* Any task that is preemptable and either exhausts its execution
	 * budget or wants to sleep completes. We may have to reschedule after
	 * this.
	 */
	if (!np && (out_of_time || sleep)) {
		if (is_fixed(cedf->scheduled))
			c_job_completion(cedf->scheduled, !sleep);
		else {
			tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL;
			g_job_completion(cedf->scheduled, !sleep);
		}
		resched = 1;
	}
	// Deschedule any background jobs if a fixed task is ready
	else if (!np && preempt) {
		if (!blocks && cedf->scheduled && !is_queued(cedf->scheduled)) {
			if (is_migrating(cedf->scheduled))
				tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL;
			requeue(cedf->scheduled);
		}
		resched = 1;
	}

	/* The final scheduling decision. Do we need to switch for some reason?
	 * Switch if we are in RT mode and have no task or if we need to
	 * resched.
	 */
	next = NULL;
	if (blocks || !exists || (!np && resched)) {
		BUG_ON(cedf->scheduled && !blocks && !out_of_time && !sleep && !is_migrating(cedf->scheduled) && !is_queued(cedf->scheduled));
		next = __take_ready(edf);
		BUG_ON(next && budget_enforced(next) && budget_exhausted(next));
	} else if (exists) {
		// This is safe when background scheduling, as we can only get here if
		// there were no other fixed tasks ready to run.
		BUG_ON(is_queued(cedf->scheduled));
		BUG_ON(budget_enforced(cedf->scheduled) && budget_exhausted(cedf->scheduled));
		next = cedf->scheduled;
	}

	this_entry = this_cpu_ptr(&edfsc_cpu_entries);
	if (next) {
		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
	// Give the container a little breathing room, otherwise, the core will be pounded with work
	// Will often trigger watchdog due to continous execution
	} else if (!list_empty(&migrating_tasks)) {
		// Find a task in gsched_domain that isn't a container to background schedule
		bheap_init(&temp);
		next = __take_ready(&gsched_domain);
		while (is_container(next) || (is_migrating(next) && next->cpu != this_entry->cpu)) {
			bheap_insert(fifo_prio, &temp, tsk_rt(next)->heap_node);
			next = __take_ready(&gsched_domain);
		}
		if (next) {
			tsk_rt(next)->edfsc_params.container_task = cedf->container;
			TRACE_TASK(next, "background scheduling at %llu\n", litmus_clock());
		} else {
			TRACE("container becomes idle at %llu\n", litmus_clock());
		}
		while (bheap_peek(fifo_prio, &temp)) {
			requeue(bheap_take(fifo_prio, &temp)->value);
		}
	}

	if (next && next->cpu != this_entry->cpu) {
		other_entry = &per_cpu(edfsc_cpu_entries, next->cpu);
		other_t = is_container(other_entry->linked) ?
			other_entry->linked->rt_param.edfsc_params.domain->scheduled : other_entry->linked;
		// If we detect a direct swap, and the other task has already gone through gschedule
		// To prevent a deadlock, we let them go first and reschedule
		if (other_t && other_t->cpu == this_entry->cpu) {
			if (is_migrating(other_t) || other_entry->linked->rt_param.scheduled_on == other_entry->cpu) {
				if (is_migrating(next))
					next->rt_param.edfsc_params.container_task = NULL;
				requeue(next);
				next = NULL;
			}
		}
	}

	cedf->scheduled = next;
}

//assuming prev is previous task running on the processor before calling schedule
static struct task_struct *edfsc_gschedule(struct task_struct *prev)
{
	cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries);
	int out_of_time, sleep, preempted, np, exists, blocks, is_cont;
	unsigned long flags;
	struct task_struct* next = NULL;
	struct task_struct* temp = NULL;

	raw_spin_lock_irqsave(&g_lock, flags);

	/* sanity checking */
	BUG_ON(entry->scheduled && entry->scheduled != prev && !is_container(entry->scheduled));
	// It's okay for the previously scheduled task to not be rt if we think a
	// container task is scheduled and the container doesn't have any pending
	// jobs of fixed tasks.
	BUG_ON(entry->scheduled && !is_container(entry->scheduled) && !is_realtime(prev));
	// Bug if we didn't think anything was scheduled, but a realtime task was running on our CPU
	//BUG_ON(is_realtime(prev) && tsk_rt(prev)->linked_on != NO_CPU && !entry->scheduled);

	/* (0) Determine state */
	exists	  = entry->scheduled != NULL;
	is_cont		= is_container(entry->scheduled);
	blocks	  = exists && !is_cont && !is_current_running();
	np 			= exists && !is_cont && is_np(entry->scheduled);
	sleep		= exists && is_completed(entry->scheduled);
	preempted	 = entry->scheduled != entry->linked;

	/* Manually track container budget */
	if (is_cont && (tsk_rt(entry->scheduled)->edfsc_params.domain->timer_armed || sleep)) {
		update_container_budget(entry->scheduled);
		out_of_time = exists && budget_enforced(entry->scheduled)
			&& budget_exhausted(entry->scheduled);
		/* Cancel container enforcement timer if container is fully provisioned and out of sync with
		 * container_boundary, or if it is currently being scheduled in gedf
		 */
		if (bheap_node_in_heap(entry->hn) || (!bheap_node_in_heap(entry->hn) && out_of_time))
			cancel_idle_enforcement_timer(entry->scheduled);
	}
	else {
		out_of_time = exists && budget_enforced(entry->scheduled)
			&& budget_exhausted(entry->scheduled);
	}

	if (exists)
		TRACE_TASK(prev,
				"blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
				"state:%d sig:%d is_cont:%d\n",
				blocks, out_of_time, np, sleep, preempt,
				prev->state, signal_pending(prev), is_cont);

	if (entry->linked && preempted)
		TRACE_TASK(prev, "will be preempted by %s/%d\n",
				entry->linked->comm, entry->linked->pid);

	// If a task blocks we have no choice but to reschedule.
	if (blocks)
		unlink(prev);

	/* Request a sys_exit_np() call if we would like to preempt but cannot.
	 * We need to make sure to update the link structure anyway in case
	 * that we are still linked. Multiple calls to request_exit_np() don't
	 * hurt.
	 */
	if (np && (out_of_time || preempted || sleep)) {
		unlink(entry->scheduled);
		request_exit_np(entry->scheduled);
	}

	/* Any task that is preemptable and either exhausts its execution
	 * budget or wants to sleep completes. We may have to reschedule after
	 * this. Don't do a job completion if we block (can't have timers running
	 * for blocked jobs).
	 */
	if (!np && (out_of_time || sleep)) {
		g_job_completion(entry->scheduled, !sleep);
	}

	BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked && !is_container(entry->linked));

	if (!entry->linked && bheap_node_in_heap(entry->hn)) {
		g_preempt_check();
	}

	BUG_ON(entry->linked && is_queued(entry->linked));
	BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked
		   && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu);
	BUG_ON(is_container(entry->linked) && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu);

	/* The final scheduling decision. Do we need to switch for some reason?
	 * If linked is different from scheduled, then select linked as next.
	 */
	if ((!np || blocks) && entry->linked != entry->scheduled) {
		// Set the newly linked job to be scheduled
		if (entry->linked) {
			next = entry->linked;
			tsk_rt(entry->linked)->scheduled_on = entry->cpu;
			BUG_ON(is_queued(entry->linked));
			TRACE_TASK(next, "scheduled on P%d\n", smp_processor_id());
		}
		// Set the previously linked to to be unscheduled
		if (entry->scheduled) {
			/* When a scheduled is linked to another cpu, from this cpu, there's no guarantee on the order
			 * in which gschedule is called on both cpus. If it has already have scheduled_on set to the other
			 * cpu, then we have to preserve it and can't just set it to NO_CPU
			 */
			if (tsk_rt(entry->scheduled)->scheduled_on == entry->cpu) {
				tsk_rt(entry->scheduled)->scheduled_on = NO_CPU;
			}
			TRACE_TASK(entry->scheduled, "descheduled\n");
		}
	} else if (entry->scheduled) {
		next = entry->scheduled;
		tsk_rt(next)->scheduled_on = entry->cpu;
	}
	BUG_ON(next && get_exec_time(next) > get_exec_cost(next));

	// If next is a container, then perform cschedule to determine the fixed task to schedule
	if (is_container(next)) {
		edfsc_cschedule(tsk_rt(next)->edfsc_params.domain, prev);
		if (bheap_node_in_heap(entry->hn))
			manage_idle_enforcement_timer(next);
		next = tsk_rt(next)->edfsc_params.domain->scheduled;
	}
	// When next is migrating, but previously scheduled realtime task is a container
	// must properly restore background scheduled task(if any) to its correct queue/heap
	else if (is_container(entry->scheduled) && next != entry->scheduled) {
		struct task_struct** child = &tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled;
		// No need to handle fixed tasks, cschedule will do that when it runs next
		if (*child && is_migrating(*child)) {
			int background_out_of_time = budget_enforced(*child) && budget_exhausted(*child);
			BUG_ON(is_queued(*child));
			BUG_ON(tsk_rt(*child)->linked_on != NO_CPU);
			tsk_rt(*child)->edfsc_params.container_task = NULL;
			// If migrating and done
			if (is_completed(*child) || background_out_of_time) {
				g_job_completion(*child, background_out_of_time);
			// If migrating and not blocked
			} else if (is_current_running()) {
				requeue(*child);
			}
			// Regardless, we never "freeze" a migrating task in a container
			*child = NULL;
		}
	}
	// Tell LITMUS^RT that we choose a task and are done scheduling after return
	sched_state_task_picked();

	raw_spin_unlock_irqrestore(&g_lock, flags);

#ifdef WANT_ALL_SCHED_EVENTS
	TRACE("g_lock released, next=0x%p\n", next);

	if (next)
		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
	else if (exists && !next)
		TRACE("becomes idle at %llu.\n", litmus_clock());
#endif

	return next;
}

/*
 * Task addition, stabilization, and container task reweighting heuristic to
 * be run every container task period.
 */
static enum hrtimer_restart container_boundary(struct hrtimer *timer)
{
	int i;
	struct list_head *it;
	struct list_head *temp;
	u64 u_extra;
	cont_domain_t *container;
	struct task_struct *t;
	int num_cpus = num_online_cpus();
	unsigned long flags;

	TS_SCHED_TIMER_START

	raw_spin_lock_irqsave(&g_lock, flags);

	t = NULL;

	// Try to add tasks from the queue
	list_for_each_safe(it, temp, &pending_adds) {
		container = NULL;
		t = task_of_list_node(it);
		list_del_init(it);
		//sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit
		sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit
		if (to_fp(num_cpus) > get_rt_utilization(t) + sys_util) {
			for (i = 0; i < num_cpus; i++) {
				if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util) {
				//if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util) {
					//container = &(container_domains[i]);
					container = container_list[i]; // Used for best/worst fit
					break;
				}
			}

			if (container) {
				tsk_rt(t)->domain = (rt_domain_t*)container;
				tsk_rt(t)->edfsc_params.container_task = container->container;
				container->f_util += get_rt_utilization(t);
			} else {
				tsk_rt(t)->domain = &gsched_domain;
				tsk_rt(t)->edfsc_params.container_task = NULL;
				m_util += get_rt_utilization(t);
				list_add(&t->edfsc_qnode, &migrating_tasks);
			}
			sys_util += get_rt_utilization(t);
			sys_changed = 1;
		}
		/* Unblock the task waiting on our admission decision. They will detect
		 * if they have been admitted by examining if tsk_rt(t)->domain != NULL
		 * This sets the the state to TASK_RUNNING, adds the task to the run
		 * queue, and runs edfsc_task_new(). That function will then invoke the
		 *  scheduler once the task is setup and our state is consistent.
		 * XXX: It's unclear when we return from wake_up_new_task(), thus we
		 *      may be defering other countainer boundary computations for far
		 *      longer than we should.
		 */
		raw_spin_unlock_irqrestore(&g_lock, flags);
		BUG_ON(!wake_up_process(t));
		raw_spin_lock_irqsave(&g_lock, flags);
	}

	// Attempt to move migrating tasks into containers
	// TODO optimize this so we don't actually have to iterate over all the
	// migrating tasks and potentially all the containers every period for a
	// best-case Omega(m) and worst-case O(m^2) work---only once the scheduler
	// is actually working
	// Done, only does stabilization when stuff changes in the system
	// According to the paper, when we migrate, we must reserve space in the container
	// We do this by adding a fake task that ultimately doesn't release any jobs
	// This is represented here by adding the utilization to sys_util
	// which will be subtracted when the migrating task is actually changed to fixed
	if (sys_changed) { // change this to false to disable stabilization
		list_for_each_safe(it, temp, &migrating_tasks) {
			struct task_struct* t = task_of_list_node(it);
			// Although technically selecting the migrating tasks to be moved into containers
			// doesn't change m_util and the container's f_util until after the move,
			// but since the move is guaranteed to happen before the next container_boundary
			// where we check all the utilization stuff, it's fine to account for it now
			if (!(tsk_rt(t)->edfsc_params.move_to)) {
				tsk_rt(t)->edfsc_params.move_to = NULL;

				container = NULL;
				//sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit
				sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit
				for (i = 0; i < num_cpus; i++) {
					u64 leftover = to_fp(1) - container_domains[i].f_util;
					if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util &&
					//if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util &&
							to_fp(num_cpus) > get_rt_utilization(t) + sys_util) {
						//container = &(container_domains[i]);
						container = container_list[i]; // Used for best/worst fit
						break;
					}
				}

				if (container) {
					list_del_init(it);
					container->f_util += get_rt_utilization(t);
					sys_util += get_rt_utilization(t);
					tsk_rt(t)->edfsc_params.move_to = container;
					sys_changed = 1;
				}
			}
		}
	}

	// If needed, reweight containers using EqualOver heuristic
	if (sys_changed) {
		int remaining;
		// Sort containers by the utilization of their fixed tasks
		sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL);
		u_extra = to_fp(num_cpus) - sys_util;
		// Fully provision all the container tasks we can
		for (i = 0; i < num_cpus && u_extra >= to_fp(1) - container_list[i]->f_util; i++) {
			struct task_struct* t = container_list[i]->container;
			tsk_rt(t)->task_params.utilization = to_fp(1);
			u_extra -= to_fp(1) - container_list[i]->f_util;
		}
		// Split the extra capacity between the remaining container tasks
		// XXX this is actually dangerous as hell, right?  Since overheads are
		// non-zero, this will make tardiness grow unboundedly for migrating
		// tasks unless we're saved by slack stealing.  MinOrFull is also bad
		// because it will cause tardiness to grow unboundedly for fixed tasks
		// when overheads are considered.  Oh noooooooooooo---
		// ---Here's a bogus idea that might just work: split the difference.
		// Basically act like migrating tasks are another processor (or two or
		// three or .. or m) and split the extra capacity evenly among
		// containers and the migrating tasks.  In reality we'll need something
		// like that anyway, and it should at least be less dangerous.
		u_extra = u_extra / 2;
		remaining = num_cpus - i;
		for (; i < num_cpus; i++) {
			struct task_struct* t = container_list[i]->container;
			u64 temp_val = container_list[i]->f_util + u_extra / remaining;
			tsk_rt(t)->task_params.utilization = (temp_val < to_fp(1)) ? temp_val : to_fp(1);
			BUG_ON(tsk_rt(t)->task_params.utilization > to_fp(1));
		}
	}
	sys_changed = 0;

	INIT_LIST_HEAD(&pending_adds);

	// Re-release container tasks, or tell them they can if they're tardy
	for (i = 0; i < num_cpus; i++) {
		t = container_list[i]->container;
		int armed = container_list[i]->timer_armed;
		// If the container tasks are currently scheduled, update their budget
		if (armed) {
			update_container_budget(t);
		}

		/* Either container has completed, or it is fully provisioned and in sync
		 * (thus not requiring a budget enforcement timer).
		 */
		if ((!armed && get_rt_period(t) == get_exec_cost(t)) || budget_exhausted(t) || is_completed(t)) {
			BUG_ON(is_queued(t));
			sched_trace_task_completion(t, 0);
			if (armed)
				cancel_idle_enforcement_timer(t);
			tsk_rt(t)->completed = 0;
			c_release(t);
		}
	}
	g_preempt_check();

	raw_spin_unlock_irqrestore(&g_lock, flags);

	TS_SCHED_TIMER_END

	hrtimer_add_expires_ns(timer, CONTAINER_PERIOD);
	return HRTIMER_RESTART;
}

/*
 *	When preempt check scheduled a task to multiple cores(due to swapping and multiple
 *  multiple invocations of preempt_check), we should not wait for stack, and reschedule
 */
static bool edfsc_should_wait_for_stack(struct task_struct* t) {
	cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries);
	struct task_struct* tsk = tsk_rt(t)->edfsc_params.container_task;
	tsk = tsk ? tsk : t;
	return tsk_rt(tsk)->linked_on == tsk_rt(tsk)->scheduled_on && tsk_rt(tsk)->linked_on == entry->cpu;
}

/**
 * Fired when a task reaches its deadline and is pending deletion or migration
 */
static enum hrtimer_restart task_deadline_callback(struct hrtimer* timer) {
	unsigned long flags;
	struct task_struct *t = container_of(timer, struct task_struct, edfsc_deadline_timer);

	raw_spin_lock_irqsave(&g_lock, flags);
	BUG_ON(is_container(t));
	// This is true only if set to be migrating from container_boundary
	if (tsk_rt(t)->edfsc_params.move_to) {
		// Can only be here when called from g_job_completion
		migrate_task(t);
	// In the else case, only task_params is guaranteed to be valid
	// However, in task_exit, we stored information in task_param.cpu
	// To help up do remove operations
	} else {
		// A move to NULL means deletion
		if (tsk_rt(t)->task_params.cpu == NO_CPU)
			g_remove_task(t);
		else
			c_remove_task(t);
		// Release our reference to the task struct
		put_task_struct(t);
	}
	list_del_init(&t->edfsc_qnode);
	raw_spin_unlock_irqrestore(&g_lock, flags);
	return HRTIMER_NORESTART;
}

/**
 * This /always/ runs after admission succeeds, so we can rely on
 * edfsc_admit_task() handling most of the initialization.
 * g_lock is not yet held
 */
static void edfsc_task_new(struct task_struct* t, int on_rq, int is_scheduled)
{
	unsigned long flags;
	cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(t));

	tsk_rt(t)->sporadic_release = 0;

	TRACE("EDF-sc: task new %d\n", t->pid);

	// Create a timer that we'll use to delay accounting during migrations
	hrtimer_init(&t->edfsc_deadline_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
	t->edfsc_deadline_timer.function = task_deadline_callback;

	raw_spin_lock_irqsave(&g_lock, flags);

	release_at(t, litmus_clock());
	sched_trace_task_release(t);
	tsk_rt(t)->linked_on = NO_CPU;
	tsk_rt(t)->scheduled_on = NO_CPU;
	// Queue this task and request a reschedule
	if (on_rq || is_scheduled) {
		requeue(t);
		if (is_migrating(t)) {
			g_preempt_check();
		}
		else if (is_fixed(t)) {
			c_preempt_check((cont_domain_t*)tsk_rt(t)->domain);
		}
		preempt(entry);
	}
	raw_spin_unlock_irqrestore(&g_lock, flags);
}

/**
 * This is called by LITMUS when our task is being woken up after having
 * previously blocked on something like console or disk I/O. This is a pair to
 * the `if (blocks) unlink(entry->scheduled);` in edfsc_gschedule().
 */
static void edfsc_task_wake_up(struct task_struct *task)
{
	unsigned long flags;
	lt_t now = litmus_clock();
	cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(task));
	TRACE_TASK(task, "wake_up at %llu\n", now);
	raw_spin_lock_irqsave(&g_lock, flags);
	now = litmus_clock();
	if (is_sporadic(task) && is_tardy(task, now)) {
		inferred_sporadic_job_release_at(task, now);
	}
	if (!is_queued(task) && tsk_rt(task)->domain)
		requeue(task);
	if (is_migrating(task))
		g_preempt_check();
	else if (is_fixed(task))
		c_preempt_check((cont_domain_t*)tsk_rt(task)->domain);
	preempt(entry);
	raw_spin_unlock_irqrestore(&g_lock, flags);
}

static void edfsc_task_block(struct task_struct *t)
{
	unsigned long flags;
	raw_spin_lock_irqsave(&g_lock, flags);
	if (is_migrating(t) && tsk_rt(t)->edfsc_params.container_task) {
		tsk_rt(t)->edfsc_params.container_task = NULL;
	}
	raw_spin_unlock_irqrestore(&g_lock, flags);
}

/**
 * This is called by LITMUS before our task is switched to another scheduler
 * During task termination (do_exit()), LITMUS first switches the scheduler
 * to SCHED_FIFO before running the normal Linux task termination proceedure.
 * After we return from this, `t` may or may not still exist. So we should have
 * no outstanding handles to any part of the task struct afer this point.
 */
static void edfsc_task_exit(struct task_struct* t)
{
	unsigned long flags;
	lt_t now, unaccount_time = 0;
	cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(t));

	BUG_ON(is_container(t));
	raw_spin_lock_irqsave(&g_lock, flags);
	TRACE_TASK(t, "called edfsc_task_exit\n");

	// Remove this task from all members of its scheduling domain
	if (is_fixed(t)) {
		tsk_rt(t)->task_params.cpu=((cont_domain_t*)tsk_rt(t)->domain)->container->rt_param.edfsc_params.id;
		if (is_queued(t))
			remove(tsk_rt(t)->domain, t);
		else {
			// If we're fixed and not on the ready queues, we should be currently running
			BUG_ON(((cont_domain_t*)tsk_rt(t)->domain)->scheduled != t);
			BUG_ON(t != current);
			((cont_domain_t*)tsk_rt(t)->domain)->scheduled = NULL;
		}
	} else {
		tsk_rt(t)->task_params.cpu = NO_CPU;
		list_del_init(&t->edfsc_qnode);
		if (tsk_rt(t)->edfsc_params.container_task != NULL) {
			BUG_ON(tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled != t);
			tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled = NULL;
		}
		else {
			unlink(t);
			entry->scheduled = NULL;
		}
	}
	tsk_rt(t)->domain = NULL;
	BUG_ON(is_queued(t));

	/* To preserve EDF-sc scheduling invariants, we can only release a task's
	 * utilization at the greater of completion or deadline boundary. Thus, here
	 * we schedule a timer to handle this unaccounting of utilization.
	 */
	now = litmus_clock();
	if (is_released(t, now)) {
		/* If a task has already been released, no future jobs are pending and we can
		 * just unaccount at the current deadline.
		 */
		unaccount_time = get_deadline(t);
	} else {
		/* If the task has yet to be released, but we still haven't reached the
		 * deadline of its last-finished job, wait for that deadline. Otherwise
		 * we're after a deadline and before a release, so just remove now.
		 */
		if (lt_after(tsk_rt(t)->edfsc_params.prev_deadline, now))
			unaccount_time = tsk_rt(t)->edfsc_params.prev_deadline;
		else
			unaccount_time = 0;
	}

	/* Take out an extra reference on the task struct so that it's not freed until
	 * the deadline boundary timer fires and we finish with it
	 */
	get_task_struct(t);
	// Make it clear that this task is going away
	tsk_rt(t)->edfsc_params.move_to = NULL;


	if (unaccount_time == 0) {
		raw_spin_unlock_irqrestore(&g_lock, flags);
		// Don't bother setting a zero-length timer - just skip straight to the callback
		task_deadline_callback(&t->edfsc_deadline_timer);
	}
	else {
		list_add(&t->edfsc_qnode, &pending_removes);
		raw_spin_unlock_irqrestore(&g_lock, flags);
		hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(unaccount_time),
					HRTIMER_MODE_ABS_PINNED);
	}
}

static struct domain_proc_info edfsc_domain_proc_info;
static long edfsc_get_domain_proc_info(struct domain_proc_info **ret)
{
	*ret = &edfsc_domain_proc_info;
	return 0;
}

static void edfsc_setup_domain_proc(void)
{
	int cpu;
	// We don't support release master
	int num_rt_cpus = num_online_cpus();
	struct cd_mapping *cpu_map, *domain_map;

	memset(&edfsc_domain_proc_info, 0, sizeof(edfsc_domain_proc_info));
	init_domain_proc_info(&edfsc_domain_proc_info, num_rt_cpus, num_rt_cpus + 1);
	edfsc_domain_proc_info.num_cpus = num_rt_cpus;
	edfsc_domain_proc_info.num_domains = num_rt_cpus + 1;

	for (cpu = 0; cpu < num_online_cpus(); ++cpu) {
		/* add one-to-one relation for the container domains */
		cpu_map = &edfsc_domain_proc_info.cpu_to_domains[cpu];
		domain_map = &edfsc_domain_proc_info.domain_to_cpus[cpu];

		cpu_map->id = cpu;
		domain_map->id = cpu;
		cpumask_set_cpu(cpu, cpu_map->mask);
		cpumask_set_cpu(cpu, domain_map->mask);

		/* add all-to-one relation for the global domain */
		cpu_map = &edfsc_domain_proc_info.cpu_to_domains[cpu];
		domain_map = &edfsc_domain_proc_info.domain_to_cpus[num_rt_cpus];

		cpu_map->id = cpu;
		domain_map->id = num_rt_cpus;
		cpumask_set_cpu(num_rt_cpus, cpu_map->mask);
		cpumask_set_cpu(cpu, domain_map->mask);
	}
}

static long edfsc_activate_plugin(void)
{
	/* TODO This will need to:
	 * - Initialize the containers and container tasks
	 *   (or can that be done at least partially in the module init function?
	 *   First releases have to be here, but setting up data structures might
	 *   be reusable if we don't destroy them when the plugin is deactivated)
	 * - ...
	 */
	int i;
	lt_t now;
	cpu_entry_t* entry;
	struct task_struct* t;

	edfsc_setup_domain_proc();

	INIT_LIST_HEAD(&pending_adds);
	INIT_LIST_HEAD(&migrating_tasks);
	INIT_LIST_HEAD(&pending_removes);
	bheap_init(&edfsc_cpu_heap);

	// Set up the container boundary timer
	hrtimer_init(&container_release_timer, CLOCK_MONOTONIC,
			HRTIMER_MODE_ABS_PINNED);
	container_release_timer.function = container_boundary;

	edf_domain_init(&gsched_domain, NULL, g_release_jobs);

	container_tasks = kmalloc(sizeof(struct task_struct) * num_online_cpus(), GFP_KERNEL);
	container_domains = kmalloc(sizeof(cont_domain_t) * num_online_cpus(), GFP_KERNEL);
	container_list = kmalloc(sizeof(cont_domain_t*) * num_online_cpus(), GFP_KERNEL);
	edfsc_cpu_heap_node = kmalloc(sizeof(struct bheap_node) * num_online_cpus(), GFP_KERNEL);

	sys_util = to_fp(0);
	m_util = to_fp(0);
	sys_changed = 1;

	memset(container_tasks, 0, sizeof(struct task_struct) * num_online_cpus());
	memset(container_domains, 0, sizeof(cont_domain_t) * num_online_cpus());

	// Initialize container domains
	for (i = 0; i < num_online_cpus(); i++) {
		edf_domain_init(&container_domains[i].domain, c_check_resched, NULL);
		container_domains[i].scheduled = NULL;
		container_domains[i].container = &container_tasks[i];
		container_domains[i].f_util = to_fp(0);
		hrtimer_init(&(container_domains[i].idle_enforcement_timer), CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
		container_domains[i].idle_enforcement_timer.function = on_idle_enforcement_timeout;


		// Name the task its container ID mapped to ASCII
		snprintf(container_tasks[i].comm, TASK_COMM_LEN, "%d", i);
		container_tasks[i].pid = -i;
		tsk_rt(&container_tasks[i])->task_params.exec_cost = CONTAINER_PERIOD;
		tsk_rt(&container_tasks[i])->task_params.period =
			CONTAINER_PERIOD;
		tsk_rt(&container_tasks[i])->task_params.utilization = to_fp(1);
		tsk_rt(&container_tasks[i])->task_params.relative_deadline =
			CONTAINER_PERIOD;
		tsk_rt(&container_tasks[i])->task_params.budget_policy = PRECISE_ENFORCEMENT;
		tsk_rt(&container_tasks[i])->edfsc_params.container_task = NULL;
		tsk_rt(&container_tasks[i])->domain = &gsched_domain;
		tsk_rt(&container_tasks[i])->edfsc_params.domain = &container_domains[i];
		tsk_rt(&container_tasks[i])->sporadic_release = 0;
		tsk_rt(&container_tasks[i])->edfsc_params.id = i;
		tsk_rt(&container_tasks[i])->heap_node = bheap_node_alloc(GFP_ATOMIC);
		tsk_rt(&container_tasks[i])->rel_heap = release_heap_alloc(GFP_ATOMIC);
		tsk_rt(&container_tasks[i])->linked_on = NO_CPU;
		tsk_rt(&container_tasks[i])->scheduled_on = NO_CPU;

		if (!tsk_rt(&container_tasks[i])->heap_node || !tsk_rt(&container_tasks[i])->rel_heap) {
			printk(KERN_WARNING "litmus: no more heap node memory!?\n");
			return -ENOMEM;
		} else {
			bheap_node_init(&tsk_rt(&container_tasks[i])->heap_node, &container_tasks[i]);
		}

		container_tasks[i].policy = SCHED_LITMUS;

		// Populate the container_list while we're at it.
		container_list[i] = &container_domains[i];

		// Link heap nodes to CPU structures
		entry = &per_cpu(edfsc_cpu_entries, i);
		entry->cpu = i;
		entry->scheduled = NULL;
		entry->linked = NULL;
		entry->hn = &edfsc_cpu_heap_node[i];
		bheap_node_init(&entry->hn, entry);
	}

	now = litmus_clock();
	for (i = 0; i < num_online_cpus(); i++) {
		t = &container_tasks[i];
		entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id);
		((cont_domain_t*)tsk_rt(t)->edfsc_params.domain)->scheduled_last_exec_time = now;
		release_at(t, now);
		link_task_to_cpu(t, entry);
	}

	// Start the container boundary timer
	hrtimer_start(&container_release_timer,
			ns_to_ktime(now + CONTAINER_PERIOD),
			HRTIMER_MODE_ABS_PINNED);

	return 0;
}

static long edfsc_deactivate_plugin(void)
{
	int i;
	struct list_head *l, *temp;
	struct task_struct* t;

	// Stop the container boundary timer
	hrtimer_try_to_cancel(&container_release_timer);

	list_for_each_safe(l, temp, &pending_removes) {
		t = task_of_list_node(l);
		list_del_init(l);
		hrtimer_try_to_cancel(&t->edfsc_deadline_timer);
	}

	for (i = 0; i < num_online_cpus(); i++) {
		bheap_node_free(tsk_rt(&container_tasks[i])->heap_node);
		release_heap_free(tsk_rt(&container_tasks[i])->rel_heap);
		hrtimer_try_to_cancel(&container_domains[i].idle_enforcement_timer);
	}

	kfree(container_tasks);
	kfree(container_domains);
	kfree(container_list);
	kfree(edfsc_cpu_heap_node);

	destroy_domain_proc_info(&edfsc_domain_proc_info);
	return 0;
}

/**
 * This is called before is_realtime(tsk) and before edfsc_task_new()
 * We should be inside the context of the process attempting to become realtime
 * Called with preemption disabled and g_lock /not/ held
 */
static long edfsc_admit_task(struct task_struct* tsk)
{
	unsigned long flags;
	// We assume that we're running in the context of `tsk`
	BUG_ON(tsk != current);

	raw_spin_lock_irqsave(&g_lock, flags);
	// Make sure that edfsc_params doesn't contain garbage
	// Note that edfsc_params->domain will always be NULL for non-container tasks
	memset(&tsk_rt(tsk)->edfsc_params, 0, sizeof(struct edfsc_params));

	// This is how we tell if we've been admitted, so make sure it's unset first
	// Note that this represents the domain we're being scheduled in
	tsk_rt(tsk)->domain = NULL;
	// The admission test needs to know our utilization
	tsk_rt(tsk)->task_params.utilization = fp_div(get_exec_cost(tsk), get_rt_period(tsk));
	// Add us to the queue of tasks waiting on admission
	list_add_tail(&tsk->edfsc_qnode, &pending_adds);
	raw_spin_unlock_irqrestore(&g_lock, flags);
	// We don't know if we can be admitted until a container job boundry is reached,
	// so block until the scheduler can make that decision
	set_current_state(TASK_INTERRUPTIBLE); // Changed from TASK_RUNNING
	preempt_enable_no_resched();
	schedule();
	// LITMUS^RT expects preemption to still be disabled after we return
	preempt_disable();
	// We only resume excution here after admission tests complete OR if we
	// were interrupted by a signal.
	if (tsk_rt(tsk)->domain != NULL)
		return 0; // Successfully admitted
	else {
		raw_spin_lock_irqsave(&g_lock, flags);
		// We'll still be on pending_adds if interrupted by a signal
		struct list_head* l;
		struct list_head *temp;
		list_for_each_safe(l, temp, &pending_adds) {
			if (task_of_list_node(l) == tsk) {
				list_del_init(l);
				raw_spin_unlock_irqrestore(&g_lock, flags);
				return -EINTR; // Interrupted
			}
		}
		raw_spin_unlock_irqrestore(&g_lock, flags);
		return -ENOSPC; // Rejected
	}
}

/*	Plugin object	*/
static struct sched_plugin edfsc_plugin __cacheline_aligned_in_smp = {
	.plugin_name		= "EDF-sc",
	.finish_switch		= g_finish_switch,
	.task_new		= edfsc_task_new,
	.complete_job		= complete_job,
	.task_exit		= edfsc_task_exit,
	.schedule		= edfsc_gschedule,
	.task_wake_up		= edfsc_task_wake_up,
	.task_block		= edfsc_task_block,
	.admit_task		= edfsc_admit_task,
	.activate_plugin	= edfsc_activate_plugin,
	.deactivate_plugin	= edfsc_deactivate_plugin,
	.should_wait_for_stack = edfsc_should_wait_for_stack,
	.get_domain_proc_info	= edfsc_get_domain_proc_info,
};


static int __init init_edfsc(void)
{
	return register_sched_plugin(&edfsc_plugin);
}

module_init(init_edfsc);