#include <linux/list.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/hrtimer.h>

#include <litmus/litmus.h>
#include <litmus/bheap.h>
#include <litmus/rt_domain.h>
#include <litmus/jobs.h>
#include <litmus/budget.h>
#include <litmus/np.h>
#include <litmus/sched_trace.h>
#include <litmus/debug_trace.h>
#include <litmus/trace.h>
#include <litmus/reservations/gedf_reservation.h>
#include <litmus/reservations/table_driven_ext_reservation.h>

// Needed to store context during cross-CPU function calls
struct csd_wrapper {
	struct call_single_data csd;
	struct gedf_reservation_environment* gedf_env;
};

/* ******************************************************************************* */
/* returns 1 if res of a has earlier deadline than res of b */
static int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
{
	return higher_res_prio(bheap2res(a), bheap2res(b));
}

/* Functions used to maintain a heap of cpu entries in edf order
 * cpu_lower_prio is the comparator function used to enforce edf order
 *
 * The next two functions must be called under domain.ready_lock of the reservation
 * update_cpu_position is called when cpu->linked changes
 * lowest_prio_cpu returns the lowest prio cpu
 */
static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
{
	struct gedf_cpu_entry *a, *b;
	a = _a->value;
	b = _b->value;
	return higher_res_prio(&b->linked->res, &a->linked->res);
}

static void update_cpu_position(struct gedf_cpu_entry* entry, struct bheap* cpu_heap)
{
	if (likely(bheap_node_in_heap(entry->hn)))
		bheap_delete(cpu_lower_prio, cpu_heap, entry->hn);
	bheap_insert(cpu_lower_prio, cpu_heap, entry->hn);
}

static struct gedf_cpu_entry* lowest_prio_cpu(struct bheap* cpu_heap)
{
	struct bheap_node* hn;
	hn = bheap_peek(cpu_lower_prio, cpu_heap);
	return hn->value;
}

static int edf_preemption_needed(
	struct gedf_reservation_environment* gedf_env,
	struct gedf_reservation* gedf_res)
{
	/* we need the read lock for edf_ready_queue */
	/* no need to preempt if there is nothing pending */
	if (bheap_empty(&gedf_env->domain.ready_queue))
		return 0;
	/* we need to reschedule if res doesn't exist */
	if (!gedf_res)
		return 1;

	/* NOTE: We cannot check for non-preemptibility since we
	 *       don't know what address space we're currently in.
	 */

	return higher_res_prio(__next_ready_res(&gedf_env->domain), &gedf_res->res);
}

/* ******************************************************************************** */
//TODO: add support for checking non-preemptivity
static void preempt(struct gedf_cpu_entry* entry)
{
	if (!entry->scheduled || entry->scheduled->res.ops->is_np(&entry->scheduled->res, entry->id))
		litmus_reschedule(entry->id);
}

static void requeue(
	struct gedf_reservation_environment* gedf_env,
	struct gedf_reservation* gedf_res)
{
	BUG_ON(!gedf_res);
	BUG_ON(is_queued_res(&gedf_res->res));

	if (gedf_res->allow_early_release ||
			lt_before_eq(gedf_res->res.replenishment_time, litmus_clock()))
		__add_ready_res(&gedf_env->domain, &gedf_res->res);
	else
		__add_release_res(&gedf_env->domain, &gedf_res->res);
}

static void link_task_to_cpu(
	struct gedf_reservation_environment* gedf_env,
	struct gedf_reservation* linked,
	struct gedf_cpu_entry* entry)
{
	struct gedf_cpu_entry* on_cpu;
	struct gedf_reservation* tmp;

	if (entry->linked)
		entry->linked->linked_on = NULL;

	if (linked) {
		on_cpu = linked->scheduled_on;
		if (on_cpu) {
			BUG_ON(on_cpu->linked == linked);

			if (entry != on_cpu) {
				tmp = on_cpu->linked;
				linked->linked_on = on_cpu;
				on_cpu->linked = linked;
				update_cpu_position(on_cpu, &gedf_env->cpu_heap);
				linked = tmp;
			}
		}
		if (linked)
			linked->linked_on = entry;
	}
	entry->linked = linked;
	update_cpu_position(entry, &gedf_env->cpu_heap);
}

static void unlink(
	struct gedf_reservation_environment* gedf_env,
	struct gedf_reservation* gedf_res)
{
	if (gedf_res->linked_on) {
		link_task_to_cpu(gedf_env, NULL, gedf_res->linked_on);
		gedf_res->linked_on = NULL;
	} else if (is_queued_res(&gedf_res->res)) {
		remove_res(&gedf_env->domain, &gedf_res->res);
	}
}

static void check_for_preemptions(struct gedf_reservation_environment* gedf_env)
{
	struct gedf_reservation* gedf_res;
	struct gedf_cpu_entry* last;

	if (bheap_empty(&gedf_env->cpu_heap))
		return;

	for (last = lowest_prio_cpu(&gedf_env->cpu_heap);
			edf_preemption_needed(gedf_env, last->linked);
			last = lowest_prio_cpu(&gedf_env->cpu_heap)) {
		gedf_res = (struct gedf_reservation*)__take_ready_res(&gedf_env->domain);
		if (last->linked && last->linked->res.cur_budget)
			requeue(gedf_env, last->linked);
		link_task_to_cpu(gedf_env, gedf_res, last);
		preempt(last);
	}
}

/* ******************************************************************************* */
static void gedf_shutdown(
	struct reservation *res)
{
	res->env->ops->shutdown(res->env);
	clean_up_ext_reservation(res);
	kfree(res);
}

static int gedf_is_np(
	struct reservation *res,
	int cpu)
{
	return res->env->ops->is_np(res->env, cpu);
}

static int gedf_task_is_np(
	struct reservation *res,
	int cpu)
{
	struct task_struct* t = ((struct gedf_task_reservation*)res)->task;
	if (is_user_np(t)) {
		request_exit_np(t);
		return 1;
	} else if (is_kernel_np(t))
		return 1;

	return 0;
}

static void gedf_task_shutdown(
	struct reservation *res)
{
	clean_up_ext_reservation(res);
	kfree(res);
}

static void gedf_on_preempt(
	struct reservation *res,
	int cpu)
{
	res->env->ops->suspend(res->env, cpu);
}

static void gedf_on_schedule(
	struct reservation *res,
	int cpu)
{
	res->env->ops->resume(res->env, cpu);
}

static struct task_struct* gedf_dispatch_client(
	struct reservation* res,
	lt_t* time_slice,
	int cpu)
{
	return res->env->ops->dispatch(res->env, time_slice, cpu);
}

static struct task_struct* gedf_task_dispatch_client(
	struct reservation* res,
	lt_t* time_slice,
	int cpu)
{
	struct gedf_task_reservation* tmp = (struct gedf_task_reservation*)res;
	return tmp->task;
}

static void gedf_replenish_budget(
	struct reservation* res,
	int cpu)
{
	struct gedf_container_reservation* gedf_cont_res =
		(struct gedf_container_reservation*)res;
	res->budget_consumed = 0;
	res->cur_budget = gedf_cont_res->max_budget;
	res->replenishment_time += gedf_cont_res->period;
	res->priority = ULLONG_MAX - res->replenishment_time - gedf_cont_res->relative_deadline;
}

static void gedf_task_replenish_budget(
	struct reservation* res,
	int cpu)
{
	struct task_struct* t = ((struct gedf_task_reservation*)res)->task;

	if (is_completed(t)) {
		sched_trace_task_completion(t, 0);
		prepare_for_next_period(t);
		tsk_rt(t)->completed = 0;
		sched_trace_task_release(t);
		res->priority = ULLONG_MAX - get_deadline(t);
		res->replenishment_time = get_release(t);
	} else {
		sched_trace_task_completion(t, 1);
		res->replenishment_time += get_rt_period(t);
		res->priority = ULLONG_MAX - res->replenishment_time - get_rt_relative_deadline(t);
		TRACE_TASK(t, "overrun budget!\n");
	}
	//TODO: hacky. reset exec_time so arm_enforcement_timer doesn't
	// yell at us for trying to set timer while overrun budget
	tsk_rt(t)->job_params.exec_time = 0;
	res->budget_consumed = 0;
	res->cur_budget = get_exec_cost(t);
}

static void gedf_drain_budget(
	struct reservation* res,
	lt_t how_much,
	int cpu)
{

	if (how_much > res->cur_budget)
		res->cur_budget = 0;
	else
		res->cur_budget -= how_much;
	res->budget_consumed += how_much;
	res->budget_consumed_total += how_much;

	res->env->ops->update_time(res->env, how_much, cpu);
}

static void gedf_task_drain_budget(
	struct reservation* res,
	lt_t how_much,
	int cpu)
{
	struct task_struct* t = ((struct gedf_task_reservation*)res)->task;

	if (how_much > res->cur_budget || is_completed(t) ||
		(budget_enforced(t) && budget_exhausted(t)))
		res->cur_budget = 0;
	else
		res->cur_budget -= how_much;
	res->budget_consumed += how_much;
	res->budget_consumed_total += how_much;
}

static struct reservation_ops gedf_cont_ops =
{
	.drain_budget = gedf_drain_budget,
	.replenish_budget = gedf_replenish_budget,
	.dispatch_client = gedf_dispatch_client,
	.on_schedule = gedf_on_schedule,
	.on_preempt = gedf_on_preempt,
	.is_np = gedf_is_np,
	.shutdown = gedf_shutdown
};

static struct reservation_ops gedf_task_ops =
{
	.drain_budget = gedf_task_drain_budget,
	.replenish_budget = gedf_task_replenish_budget,
	.dispatch_client = gedf_task_dispatch_client,
	.is_np = gedf_task_is_np,
	.shutdown = gedf_task_shutdown
};

long alloc_gedf_task_reservation(
	struct gedf_task_reservation** _res,
	struct task_struct* task)
{
	struct gedf_task_reservation* gedf_task_res;
	gedf_task_res = kzalloc(sizeof(*gedf_task_res), GFP_KERNEL);
	if (!gedf_task_res)
		return -ENOMEM;

	init_ext_reservation(&gedf_task_res->gedf_res.res, task->pid, &gedf_task_ops);

	gedf_task_res->task = task;
	gedf_task_res->gedf_res.res.priority = ULLONG_MAX - get_rt_relative_deadline(task);
	gedf_task_res->gedf_res.res.cur_budget = get_exec_cost(task);
	gedf_task_res->gedf_res.allow_early_release = is_early_releasing(task);

	*_res = gedf_task_res;
	return 0;
}

long alloc_gedf_container_reservation(
	struct gedf_container_reservation** _res,
	int id,
	lt_t max_budget,
	lt_t period,
	lt_t relative_deadline)
{
	struct gedf_container_reservation* gedf_cont_res;
	gedf_cont_res = kzalloc(sizeof(*gedf_cont_res), GFP_KERNEL);
	if (!gedf_cont_res)
		return -ENOMEM;

	init_ext_reservation(&gedf_cont_res->gedf_res.res, id, &gedf_cont_ops);

	gedf_cont_res->max_budget = max_budget;
	gedf_cont_res->period = period;
	gedf_cont_res->relative_deadline = relative_deadline;
	gedf_cont_res->gedf_res.allow_early_release = 0;

	*_res = gedf_cont_res;
	return 0;
}

/* ******************************************************************************** */
static void gedf_env_shutdown(
	struct reservation_environment* env)
{
	struct gedf_reservation_environment* gedf_env;
	struct reservation* res;
	unsigned long flags;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);

	domain_suspend_releases(&gedf_env->domain);

	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);

	/* call shutdown on all scheduled reservations */
	while (!list_empty(&env->all_reservations)) {
		res = list_first_entry(&env->all_reservations,
					struct reservation, all_list);
		list_del(&res->all_list);
		res->ops->shutdown(res);
	}
	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);

	/* free memory */
	kfree(gedf_env->cpu_entries);
	kfree(gedf_env->cpu_node);
	kfree(gedf_env);
}

static int gedf_env_is_np(
	struct reservation_environment* env,
	int cpu)
{
	if (cpu == -1)
		return 0;
	struct gedf_reservation_environment* gedf_env =
		container_of(env, struct gedf_reservation_environment, env);
	struct gedf_reservation* scheduled =
		gedf_env->cpu_entries[cpu].scheduled;
	return scheduled && scheduled->res.ops->is_np(&scheduled->res, cpu);
}

static struct reservation* gedf_find_res_by_id(
	struct reservation_environment* env,
	int id)
{
	struct reservation* res;
	struct gedf_reservation_environment* gedf_env;
	unsigned long flags;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);

	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
	list_for_each_entry(res, &env->all_reservations, all_list) {
		if (res->id == id) {
			raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
			return res;
		}
	}
	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
	return NULL;
}

/* This assumes that is is only called from res itself requesting to be removed
 * This WILL cause rt task to become lost if res is a scheduling entity
 */
static void gedf_env_remove_res(
	struct reservation_environment* env,
	struct reservation* res,
	int complete,
	int cpu)
{
	struct gedf_reservation_environment* gedf_env;
	struct gedf_reservation* gedf_res;
	unsigned long flags;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);
	gedf_res = container_of(res, struct gedf_reservation, res);

	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
	gedf_res->will_remove = complete;
	gedf_res->blocked = !complete;

	list_del_init(&gedf_res->res.all_list);
	unlink(gedf_env, gedf_res);
	check_for_preemptions(gedf_env);
	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
	/* After preempt is called, schedule will update budget tracking.
	 * In update_time, the environment will detect that res(which is scheduled)
	 * wants to be removed.
	 * If the reservation is flagged for removal, the shutdown callback is called
	 * If the reservation is flagged as blocked, then it will not be requeued back
	 * into the domain, and will invoke on_preempt callback in env_dispatch.
	 * Because we unlinked it, after env_dispatch, res is essentially gone.
	 */
}

static void gedf_env_add_res(
	struct reservation_environment* env,
	struct reservation* res,
	int cpu)
{
	struct gedf_reservation_environment* gedf_env;
	struct gedf_reservation* gedf_res;
	unsigned long flags;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);
	gedf_res = container_of(res, struct gedf_reservation, res);

	res->par_env = env;

	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
	gedf_res->will_remove = 0;
	gedf_res->blocked = 0;

	list_add_tail(&gedf_res->res.all_list, &env->all_reservations);
	requeue(gedf_env, gedf_res);
	check_for_preemptions(gedf_env);
	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
}

/* try_resume_timer: Attempt to resume the release timer locally.
 * @param csd_info Pointer to `info` field of struct call_single_data
 * @note Used as IPI callback, do not call directly. Lockless.
 */
static void try_resume_timer(void *csd_info)
{
	struct csd_wrapper* csd_wrapper = csd_info;
	struct gedf_reservation_environment* gedf_env = csd_wrapper->gedf_env;
	int cpu = smp_processor_id();
	struct gedf_cpu_entry* entry = &gedf_env->cpu_entries[cpu];
	// Abort if this CPU was suspended before we could process the IPI
	if (!bheap_node_in_heap(entry->hn))
		goto out;
	domain_resume_releases(&gedf_env->domain);
out:
	kfree(csd_wrapper);
}

/* gedf_env_suspend: Remove the specified core from scheduling consideration
 * @param env Environment to modify
 * @param cpu CPU to remove if present.
 * @note Safe to call if core already removed. Skips lock in that case.
 */
static void gedf_env_suspend(
	struct reservation_environment* env,
	int cpu)
{
	struct gedf_reservation_environment* gedf_env;
	struct gedf_cpu_entry* entry;
	struct gedf_reservation* tmp;
	unsigned long flags;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);
	entry = &gedf_env->cpu_entries[cpu];

	/* Ignore suspension requests on inactive cores
	 * This will not errantly fail, as the first thing resume() does is re-add the node
	 * This will only errantly pass if another core is simultaneously inside
	 * our critical section. The second check catches that.
	 * In all cases this will avoid taking the lock if we were never part of the container.
	 */
	if (!bheap_node_in_heap(entry->hn))
		return;

	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
	// Do not remove! See above comment.
	if (!bheap_node_in_heap(entry->hn))
		goto unlock;

	//TODO: More Graceful way to handle forbidden zone violation?
	// BUG_ON(env->ops->is_np(env, cpu));

	gedf_env->num_cpus--;
	/* on env suspension, we need to preempt scheduled tasks, and unlink linked tasks */
	if (entry->linked) {
		tmp = entry->linked;
		unlink(gedf_env, entry->linked);
		requeue(gedf_env, tmp);
	}
	if (entry->scheduled) {
		if (entry->scheduled->res.ops->on_preempt)
			entry->scheduled->res.ops->on_preempt(&entry->scheduled->res, cpu);
		if (entry->scheduled->scheduled_on == entry)
			entry->scheduled->scheduled_on = NULL;
	}
	entry->scheduled = NULL;

	/* this essentially removes the cpu from scheduling consideration */
	bheap_delete(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn);

	check_for_preemptions(gedf_env);

	/* suspends rt_domain releases when the last core of env is preempted
	 * OR re-arm release timer on a different CPU */
	if (!gedf_env->num_cpus)
		domain_suspend_releases(&gedf_env->domain);
	else {
		struct csd_wrapper* csd_wrapper =
			kzalloc(sizeof(struct csd_wrapper), GFP_ATOMIC);
		csd_wrapper->gedf_env = gedf_env;
		csd_wrapper->csd.func = &try_resume_timer;
		csd_wrapper->csd.info = csd_wrapper;
		smp_call_function_single_async(
				lowest_prio_cpu(&gedf_env->cpu_heap)->id,
				&csd_wrapper->csd);
	}
unlock:
	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
}

/* gedf_env_resume: Add the specified core to scheduling consideration
 * @param env Environment to modify
 * @param cpu CPU to add if not yet added.
 * @note Safe to call if core already added.
 */
static void gedf_env_resume(
	struct reservation_environment* env,
	int cpu)
{
	struct gedf_reservation_environment* gedf_env;
	struct gedf_cpu_entry* entry;
	unsigned long flags;
	// Needs to be volatile or it may be optimized to gedf_env->num_cpus
	volatile int tmp_cpus;
	// For waking up forbidden-zone waiters
	int i;
	struct task_struct *next;
	struct reservation *next_res;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);
	entry = &gedf_env->cpu_entries[cpu];

	// If we've already been resumed, do nothing
	if (bheap_node_in_heap(entry->hn))
		return;

	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
	// Check again. Our earlier check may have raced with this critical section
	if (bheap_node_in_heap(entry->hn)) {
		raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
		return;
	}

	// Save how many cpus were resumed before us (if none, we need to restart the timer)
	tmp_cpus = gedf_env->num_cpus;

	/* adds cpu back to scheduling consideration */
	bheap_insert(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn);
	gedf_env->num_cpus++;

	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);

	// Keep this outside the lock. Resuming the timer may have side-effects.
	if (!tmp_cpus)
		domain_resume_releases(&gedf_env->domain);

#ifdef CONFIG_LITMUS_LOCKING
	// Wake up any forbidden-zone waiters
	for (i = 0; i < gedf_env->num_waiter_queues; i++) {
		if (i == cpu)
			continue;

		spin_lock_irqsave(&gedf_env->fz_waiters[i].lock, flags);
		while ( (next = __waitqueue_remove_first(&gedf_env->fz_waiters[i])) ) {
			next_res = (struct reservation *) tsk_rt(next)->plugin_state;
			wake_up_process(next);
		}

		spin_unlock_irqrestore(&gedf_env->fz_waiters[i].lock, flags);
	}
#endif
}

static struct task_struct* gedf_env_dispatch(
	struct reservation_environment* env,
	lt_t* time_slice,
	int cpu)
{
	struct gedf_reservation_environment* gedf_env;
	struct gedf_cpu_entry* entry;
	struct task_struct* next = NULL;
	unsigned long flags;
	int np = 0;
	int block = 0;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);
	entry = &gedf_env->cpu_entries[cpu];

	BUG_ON(entry->id != cpu);

	if (entry->scheduled)
		np = entry->scheduled->res.ops->is_np(&entry->scheduled->res, cpu);

	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);

	BUG_ON(!bheap_node_in_heap(entry->hn));
	BUG_ON(bheap_empty(&gedf_env->cpu_heap));

	if (entry->scheduled)
		block = entry->scheduled->blocked;

	/* update linked if linked for this cpu is empty */
	if (!entry->linked)
		check_for_preemptions(gedf_env);

	BUG_ON(!entry->linked && __peek_ready_res(&gedf_env->domain));

	/* if linked and scheduled differ, preempt and schedule accordingly */
	if ((!np || block) && entry->scheduled != entry->linked) {
		if (entry->scheduled) {
			if (entry->scheduled->res.ops->on_preempt)
				entry->scheduled->res.ops->on_preempt(&entry->scheduled->res, cpu);
			if (entry->scheduled->scheduled_on == entry)
				entry->scheduled->scheduled_on = NULL;
		}
		if (entry->linked) {
			entry->linked->scheduled_on = entry;
			if (entry->linked->res.ops->on_schedule)
				entry->linked->res.ops->on_schedule(&entry->linked->res, cpu);
		}
		entry->scheduled = entry->linked;
	}
	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);

	if (entry->scheduled) {
		/* let scheduled reservation decide what runs next */
		next = entry->scheduled->res.ops->dispatch_client(&entry->scheduled->res, time_slice, cpu);
		*time_slice = (*time_slice > entry->scheduled->res.cur_budget) ?
			entry->scheduled->res.cur_budget : *time_slice;
	} else {
		*time_slice = ULLONG_MAX;
	}

	return next;
}

static void gedf_env_update_time(
	struct reservation_environment* env,
	lt_t how_much,
	int cpu)
{
	struct gedf_reservation_environment* gedf_env;
	struct gedf_cpu_entry* entry;
	unsigned long flags;

	gedf_env = container_of(env, struct gedf_reservation_environment, env);
	entry = &gedf_env->cpu_entries[cpu];

	BUG_ON(entry->id != cpu);

	if (!entry->scheduled)
		return;

	/* tells scheduled res to drain its budget.
	 * In the situation of 2 cores having the same scheduled(detailed in comment below), the task will be
	 * out of budget. This means drain_budget just atomically sets cur_budget to 0 on drain.
	 * Therefore, no lock is needed for this operation
	 */
	entry->scheduled->res.ops->drain_budget(&entry->scheduled->res, how_much, cpu);

	/* if flagged for removal from environment, invoke shutdown callback */
	if (entry->scheduled->will_remove) {
		/* assumed to already been unlinked by whatever set will_remove */
		entry->scheduled->res.ops->shutdown(&entry->scheduled->res);
		entry->scheduled = NULL;
	}

	/* We need to lock this whole section due to how budget draining works.
	 * check_for_preemption can be called before budget is properly updated, which,
	 * through multiple parallel calls to check_for_preemption may end up linking
	 * a task that's out of budget(but not when it is ran through check_for_preemption) to
	 * a core other than this one.
	 * That core can then experience multiple reschedule calls due to the multiple calls to
	 * check_for_preemption, which will make the linked out of budget task into scheduled.
	 * Now we have an interesting dilemma. This core and the other core both sees that its
	 * scheduling the same out of budget task. So we need a way to break symmetry and let
	 * one core do nothing. By checking for !cur_budget and replenishing budget under a lock,
	 * we can achieve this.
	 */
	raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
	if (entry->scheduled && !entry->scheduled->res.cur_budget) {
		entry->scheduled->res.ops->replenish_budget(&entry->scheduled->res, cpu);
		/* unlink and requeue if not blocked and not np*/
		if (!entry->scheduled->blocked &&
				!entry->scheduled->res.ops->is_np(&entry->scheduled->res, cpu)) {
			unlink(gedf_env, entry->scheduled);
			requeue(gedf_env, entry->scheduled);
			check_for_preemptions(gedf_env);
		}
	}
	raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
}

/* callback for how the domain will release jobs */
static void gedf_env_release_jobs(rt_domain_t* rt, struct bheap* res)
{
	unsigned long flags;
	struct gedf_reservation_environment* gedf_env
		= container_of(rt, struct gedf_reservation_environment, domain);

	raw_spin_lock_irqsave(&rt->ready_lock, flags);
	__merge_ready(rt, res);
	check_for_preemptions(gedf_env);
	raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
}

#ifdef CONFIG_LITMUS_LOCKING

#include <litmus/fdso.h>
#include <litmus/wait.h>

/* called with IRQs off */
static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
{
	struct gedf_cpu_entry* linked_on;
	int check_preempt = 0;

	struct reservation *t_res, *prio_inh_res;
	struct gedf_reservation_environment *gedf_env;
	struct gedf_reservation *gedf_res;

	t_res = (struct reservation *) tsk_rt(t)->plugin_state;
	gedf_res = container_of(t_res, struct gedf_reservation, res);

	prio_inh_res = (struct reservation *) tsk_rt(prio_inh)->plugin_state;

	gedf_env = container_of(t_res->par_env, struct gedf_reservation_environment, env);

	raw_spin_lock(&gedf_env->domain.ready_lock);

	TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
	t_res->inh_res = prio_inh_res;

	linked_on = gedf_res->linked_on;

	/* If it is scheduled, then we need to reorder the CPU heap. */
	if (linked_on) {
		TRACE_TASK(t, "%s: linked  on %d\n",
			   __FUNCTION__, linked_on->id);
		/* holder is scheduled; need to re-order CPUs. */
		update_cpu_position(gedf_res->linked_on, &gedf_env->cpu_heap);
	} else {
		/* holder may be queued: first stop queue changes */
		raw_spin_lock(&gedf_env->domain.release_lock);
		if (is_queued_res(t_res)) {
			TRACE_TASK(t, "%s: is queued\n",
				   __FUNCTION__);
			/* We need to update the position of holder in some
			 * heap. Note that this could be a release heap if we
			 * budget enforcement is used and this job overran. */
			check_preempt =
				!bheap_decrease(edf_ready_order,
					       t_res->heap_node);
		} else {
			/* Nothing to do: if it is not queued and not linked
			 * then it is either sleeping or currently being moved
			 * by other code (e.g., a timer interrupt handler) that
			 * will use the correct priority when enqueuing the
			 * task. */
			TRACE_TASK(t, "%s: is NOT queued => Done.\n",
				   __FUNCTION__);
		}
		raw_spin_unlock(&gedf_env->domain.release_lock);

		/* If holder was enqueued in a release heap, then the following
		 * preemption check is pointless, but we can't easily detect
		 * that case. If you want to fix this, then consider that
		 * simply adding a state flag requires O(n) time to update when
		 * releasing n tasks, which conflicts with the goal to have
		 * O(log n) merges. */
		if (check_preempt) {
			/* heap_decrease() hit the top level of the heap: make
			 * sure preemption checks get the right task, not the
			 * potentially stale cache. */
			bheap_uncache_min(edf_ready_order,
					 &gedf_env->domain.ready_queue);
			check_for_preemptions(gedf_env);
		}
	}

	raw_spin_unlock(&gedf_env->domain.ready_lock);
}

/* called with IRQs off */
static void clear_priority_inheritance(struct task_struct* t)
{
	struct reservation *t_res;
	struct gedf_reservation *gedf_res;
	struct reservation_environment *env;
	struct gedf_reservation_environment *gedf_env;
	struct gedf_cpu_entry *entry;

	t_res = (struct reservation *) tsk_rt(t)->plugin_state;
	gedf_res = container_of(t_res, struct gedf_reservation, res);

	env = t_res->par_env;
	gedf_env = container_of(env, struct gedf_reservation_environment, env);

	raw_spin_lock(&gedf_env->domain.ready_lock);

	/* A job only stops inheriting a priority when it releases a
	 * resource. Thus we can make the following assumption.*/
	entry = &gedf_env->cpu_entries[smp_processor_id()];
	BUG_ON(entry->scheduled != gedf_res);

	TRACE_TASK(t, "priority restored\n");
	t_res->inh_res = NULL;

	BUG_ON(!gedf_res->linked_on && !bheap_node_in_heap(t_res->heap_node));

	/* Check if rescheduling is necessary. We can't use heap_decrease()
	 * since the priority was effectively lowered. Instead, we
	 * update the position of the CPU on which it is linked, or remove
	 * and re-add to to the appropriate heap if it is not linked. */
	if (gedf_res->linked_on) {
		update_cpu_position(gedf_res->linked_on, &gedf_env->cpu_heap);
	}
	else {
		struct bheap *heap;
		if (t_res->replenishment_time > litmus_clock()) {
			raw_spin_lock(&gedf_env->domain.release_lock);
			heap = &(get_release_heap_res(&gedf_env->domain, t_res, 0)->heap);
			raw_spin_unlock(&gedf_env->domain.release_lock);
		}
		else {
			heap = &(gedf_env->domain.ready_queue);
		}

		bheap_delete(edf_ready_order, heap, t_res->heap_node);
		bheap_insert(edf_ready_order, heap, t_res->heap_node);
	}

	check_for_preemptions(gedf_env);

	raw_spin_unlock(&gedf_env->domain.ready_lock);
}

/* ******************** OMLP support ********************** */

/* struct for semaphore with priority inheritance */
struct omlp_semaphore {
	struct litmus_lock litmus_lock;

	/* current resource holder */
	struct task_struct *owner;

	/* highest-priority waiter */
	struct task_struct *hp_waiter;
	struct reservation *hp_waiter_res;

	/* FIFO queue of waiting tasks */
	wait_queue_head_t fifo_wait;
	/* Priority queue of waiting tasks */
	wait_queue_head_t prio_wait;

	/* How many slots remaining in FIFO queue? */
	unsigned int num_free;
};

static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock)
{
	return container_of(lock, struct omlp_semaphore, litmus_lock);
}

/* already locked */
static void omlp_enqueue(struct omlp_semaphore *sem, prio_wait_queue_t* wait)
{
	if (sem->num_free) {
		/* there is space in the FIFO queue */
		sem->num_free--;
		__add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait->wq);
	} else {
		/* nope, gotta go to the priority queue */
		__add_wait_queue_prio_exclusive(&sem->prio_wait, wait);
	}
}

/* already locked */
static int omlp_move(struct omlp_semaphore *sem)
{
	struct list_head* first;

	if (waitqueue_active(&sem->prio_wait)) {
		first = sem->prio_wait.task_list.next;
		list_move_tail(first, &sem->fifo_wait.task_list);
		return 1;
	}
	else
		return 0;
}

static struct task_struct* omlp_dequeue(struct omlp_semaphore *sem)
{
	struct task_struct* first = __waitqueue_remove_first(&sem->fifo_wait);

	if (first && !omlp_move(sem))
		sem->num_free++;

	return first;
}

/* caller is responsible for locking */
static struct task_struct* omlp_find_hp_waiter(struct omlp_semaphore *sem,
					struct task_struct* skip)
{
	struct list_head	*pos;
	struct task_struct 	*queued, *found = NULL;
	struct reservation 	*q_res, *f_res = NULL;

	/* check FIFO queue first */
	list_for_each(pos, &sem->fifo_wait.task_list) {
		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
							   task_list)->private;

		/* Compare task prios, find high prio task. */
		q_res = (struct reservation *) tsk_rt(queued)->plugin_state;
		if (queued != skip && higher_res_prio(q_res, f_res)) {
			f_res = q_res;
			found = queued;
		}
	}

	/* check priority queue next */
	if (waitqueue_active(&sem->prio_wait)) {
		/* first has highest priority */
		pos = sem->prio_wait.task_list.next;
		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
							   task_list)->private;
		q_res = (struct reservation *) tsk_rt(queued)->plugin_state;
		if (higher_res_prio(q_res, f_res)) {
			f_res = q_res;
			found = queued;
		}
	}

	return found;
}

int gedf_env_omlp_lock(struct litmus_lock* l)
{
	// The task and the semaphore
	struct task_struct* t = current;
	struct omlp_semaphore *sem = omlp_from_lock(l);
	// Various scheduler entities
	struct reservation *t_res, *o_res;
	struct gedf_reservation *gedf_res;
	// Waiting
	unsigned long flags;
	prio_wait_queue_t wait;

	if (!is_realtime(t))
		return -EPERM;

	/* prevent nested lock acquisition --- not supported by global OMLP
	   by default */
	if (tsk_rt(t)->num_locks_held)
		return -EBUSY;

	t_res = (struct reservation *) tsk_rt(t)->plugin_state;
	gedf_res = container_of(t_res, struct gedf_reservation, res);

	spin_lock_irqsave(&sem->fifo_wait.lock, flags);

	if (sem->owner) {
		/* resource is not free => must suspend and wait */

		/* the priority queue needs the deadline, not the "priority" */
		init_prio_waitqueue_entry(&wait, t, ULLONG_MAX - t_res->priority);

		set_task_state(t, TASK_UNINTERRUPTIBLE);

		omlp_enqueue(sem, &wait);

		/* check if we need to activate priority inheritance */
		if (higher_res_prio(t_res, sem->hp_waiter_res)) {
			sem->hp_waiter = t;
			sem->hp_waiter_res = t_res;
			o_res = (struct reservation *) tsk_rt(sem->owner)->plugin_state;
			if (higher_res_prio(t_res, o_res)) {
				set_priority_inheritance(sem->owner, sem->hp_waiter);
			}
		}

		TS_LOCK_SUSPEND;

		/* release lock before sleeping */
		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);

		BUG_ON(!gedf_res->linked_on && !bheap_node_in_heap(t_res->heap_node));

		schedule(); // will have issues if the reservation
		            // is not linked or on the ready queue

		TS_LOCK_RESUME;

		/* Since we hold the lock, no other task will change
		 * ->owner. We can thus check it without acquiring the spin
		 * lock. */
		BUG_ON(sem->owner != t);
	} else {
		/* it's ours now */
		sem->owner = t;

		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
	}

	tsk_rt(t)->num_locks_held++;

	return 0;
}

static int gedf_env_omlp_access_fz_check(struct litmus_lock* l, lt_t fz_len)
{
	// The task and the semaphore
	struct task_struct* t = current;
	struct omlp_semaphore *sem = omlp_from_lock(l);
	// Various scheduler entities
	struct reservation *t_res;
	struct gedf_reservation_environment *gedf_env;
	struct gedf_reservation *gedf_res;
	// Forbidden zones
	unsigned int cpu;
	lt_t timeslice_end, remaining_component_budget;
	struct mtd_reservation *mtd_res;
	// Waiting
	unsigned long flags;
	wait_queue_t wait;

	if (!is_realtime(t))
		return -EPERM;

	if (sem->owner != t) { // does this need a lock around it?
		return -EINVAL;
	}

	t_res = (struct reservation *) tsk_rt(t)->plugin_state;
	gedf_res = container_of(t_res, struct gedf_reservation, res);

	/* make task non-preemptive, even if it must suspend because
	 * of a forbidden zone; will be cleared by a user-space call */
	if (likely(tsk_rt(t)->ctrl_page))
		tsk_rt(t)->ctrl_page->sched.np.flag = 1;
	else
		TRACE_TASK(t, "Can't make task user-space np: no ctrl page\n");

	/* if in a forbidden zone, wait until the next time slice */
	/* note that we can't use t_res->par_env->res->cur_budget
	 * because it is only updated on a call to schedule(), and
	 * thus might not reflect the current remaining budget
	 * for the component */
	gedf_env = container_of(t_res->par_env, struct gedf_reservation_environment, env);
	spin_lock_irqsave(&gedf_env->fz_waiters[0].lock, flags);

	cpu = smp_processor_id();

	spin_unlock_irqrestore(&gedf_env->fz_waiters[0].lock, flags);
	spin_lock_irqsave(&gedf_env->fz_waiters[cpu].lock, flags);

	mtd_res = (struct mtd_reservation*)t_res->par_env->res;
	timeslice_end = mtd_res->cur_interval[cpu].end + mtd_res->major_cycle_start[cpu];
	remaining_component_budget = timeslice_end - litmus_clock();

	if (remaining_component_budget < fz_len) {
		/* go on a wait queue to be woken up when the parent reservation
		* is next scheduled */
		init_waitqueue_entry(&wait, t);
		set_task_state(t, TASK_UNINTERRUPTIBLE);
		__add_wait_queue_tail_exclusive(&gedf_env->fz_waiters[cpu], &wait);

		TS_LOCK_SUSPEND;

		spin_unlock_irqrestore(&gedf_env->fz_waiters[cpu].lock, flags);

		BUG_ON(!gedf_res->linked_on && !bheap_node_in_heap(t_res->heap_node));

		schedule();

		TS_LOCK_RESUME;
	} else {
		spin_unlock_irqrestore(&gedf_env->fz_waiters[cpu].lock, flags);
	}

	return 0;
}

static int gedf_env_omlp_unlock(struct litmus_lock* l)
{
	struct task_struct *t = current, *next;
	struct omlp_semaphore *sem = omlp_from_lock(l);
	unsigned long flags;
	int err = 0;
	struct reservation *n_res;

	spin_lock_irqsave(&sem->fifo_wait.lock, flags);

	if (sem->owner != t) {
		err = -EINVAL;
		goto out;
	}

	tsk_rt(t)->num_locks_held--;

	/* check if there are jobs waiting for this resource */
	next = omlp_dequeue(sem);
	if (next) {
		/* next becomes the resouce holder */
		sem->owner = next;
		TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);

		n_res = (struct reservation *) tsk_rt(next)->plugin_state;

		/* determine new hp_waiter if necessary */
		if (next == sem->hp_waiter) {
			TRACE_TASK(next, "was highest-prio waiter\n");
			/* next has the highest priority --- it doesn't need to
			 * inherit.  However, we need to make sure that the
			 * next-highest priority in the queue is reflected in
			 * hp_waiter. */
			sem->hp_waiter = omlp_find_hp_waiter(sem, next);
			if (sem->hp_waiter) {
				TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
				sem->hp_waiter_res = (struct reservation *) tsk_rt(sem->hp_waiter)->plugin_state;
			}
			else {
				TRACE("no further waiters\n");
				sem->hp_waiter_res = NULL;
			}
		} else {
			/* Well, if next is not the highest-priority waiter,
			 * then it ought to inherit the highest-priority
			 * waiter's priority. */
			set_priority_inheritance(next, sem->hp_waiter);
		}

		/* wake up next */
		wake_up_process(next);
	} else
		/* becomes available */
		sem->owner = NULL;

	/* we lose the benefit of priority inheritance (if any) */
	if (((struct reservation *)tsk_rt(t)->plugin_state)->inh_res)
		clear_priority_inheritance(t);

out:
	if (likely(tsk_rt(t)->ctrl_page))
		tsk_rt(t)->ctrl_page->sched.np.flag = 0;
	spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);

	return err;
}

static int gedf_env_omlp_close(struct litmus_lock* l)
{
	struct task_struct *t = current;
	struct omlp_semaphore *sem = omlp_from_lock(l);
	unsigned long flags;

	int owner;

	spin_lock_irqsave(&sem->fifo_wait.lock, flags);

	owner = sem->owner == t;

	spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);

	if (owner)
		gedf_env_omlp_unlock(l);

	return 0;
}

static void gedf_env_omlp_free(struct litmus_lock* lock)
{
	kfree(omlp_from_lock(lock));
}

static struct litmus_lock_ops gedf_env_omlp_lock_ops = {
	.close                       = gedf_env_omlp_close,
	.lock                        = gedf_env_omlp_lock,
	.access_forbidden_zone_check = gedf_env_omlp_access_fz_check,
	.unlock                      = gedf_env_omlp_unlock,
	.deallocate                  = gedf_env_omlp_free,
};

static struct litmus_lock* gedf_env_new_omlp(void)
{
	struct omlp_semaphore* sem;
	struct reservation *t_res;
	struct gedf_reservation_environment *gedf_env;

	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
	if (!sem)
		return NULL;

	t_res = (struct reservation *) tsk_rt(current)->plugin_state;
	gedf_env = container_of(t_res->par_env, struct gedf_reservation_environment, env);

	sem->owner   = NULL;
	sem->hp_waiter = NULL;
	sem->hp_waiter_res = NULL;
	init_waitqueue_head(&sem->fifo_wait);
	init_waitqueue_head(&sem->prio_wait);
	sem->litmus_lock.ops = &gedf_env_omlp_lock_ops;
	/* free = cpus-1 since ->owner is the head and also counted */
	sem->num_free = gedf_env->num_cpus - 1;

	return &sem->litmus_lock;
}

/* **** lock constructor **** */

static long gedf_env_allocate_lock(
	struct reservation_environment* env,
	struct litmus_lock **lock,
	int type,
	void* __user unused)
{
	int err = -ENXIO;

	/* EXT-RES currently only supports the OMLP within components
	   for global resources. */
	switch (type) {

		case OMLP_SEM:
			/* O(m) Multiprocessor Locking Protocol */
			*lock  = gedf_env_new_omlp();
			if (*lock)
				err = 0;
			else
				err = -ENOMEM;
			break;

	};

	return err;
}

#endif

static struct reservation_environment_ops gedf_env_ops = {
	.update_time = gedf_env_update_time,
	.dispatch = gedf_env_dispatch,
	.resume = gedf_env_resume,
	.suspend = gedf_env_suspend,
	.add_res = gedf_env_add_res,
	.remove_res = gedf_env_remove_res,
	.find_res_by_id = gedf_find_res_by_id,
	.is_np = gedf_env_is_np,
	.shutdown = gedf_env_shutdown,
#ifdef CONFIG_LITMUS_LOCKING
	.allocate_lock = gedf_env_allocate_lock,
#endif
};

long alloc_gedf_reservation_environment(
	struct gedf_reservation_environment** _env,
	int max_cpus)
{
	struct gedf_reservation_environment* gedf_env;
	int i;
	int total_cpus = num_online_cpus();

	gedf_env = kzalloc(sizeof(struct gedf_reservation_environment), GFP_ATOMIC);
	if (!gedf_env)
		return -ENOMEM;
	/* We don't know which subset of CPUs we'll run on, so we must keep state
	 * for all of them */
	gedf_env->cpu_entries = kzalloc(sizeof(struct gedf_cpu_entry)*total_cpus, GFP_ATOMIC);
	if (!gedf_env->cpu_entries) {
		kfree(gedf_env);
		return -ENOMEM;
	}
	gedf_env->cpu_node = kzalloc(sizeof(struct bheap_node)*total_cpus, GFP_ATOMIC);
	if (!gedf_env->cpu_node) {
		kfree(gedf_env->cpu_entries);
		kfree(gedf_env);
		return -ENOMEM;
	}
#ifdef CONFIG_LITMUS_LOCKING
	gedf_env->fz_waiters = kzalloc(sizeof(wait_queue_head_t)*total_cpus, GFP_ATOMIC);
	if (!gedf_env->fz_waiters) {
		kfree(gedf_env->cpu_entries);
		kfree(gedf_env->cpu_node);
		kfree(gedf_env);
		return -ENOMEM;
	}
#endif

	/* set environment callback actions */
	gedf_env->env.ops = &gedf_env_ops;
	INIT_LIST_HEAD(&gedf_env->env.all_reservations);

	gedf_env->num_cpus = 0;
	bheap_init(&gedf_env->cpu_heap);
	for (i = 0; i < max_cpus; i++) {
		gedf_env->cpu_entries[i].id = i;

		/* initialize cpu heap node */
		gedf_env->cpu_entries[i].hn = &gedf_env->cpu_node[i];
		bheap_node_init(&gedf_env->cpu_entries[i].hn, &gedf_env->cpu_entries[i]);
	}

	/* initialize environment domain */
	rt_domain_init(&gedf_env->domain, edf_ready_order, NULL, gedf_env_release_jobs);

#ifdef CONFIG_LITMUS_LOCKING
	gedf_env->num_waiter_queues = max_cpus;
	for (i = 0; i < max_cpus; i++) {
		init_waitqueue_head(&gedf_env->fz_waiters[i]);
	}
#endif

	*_env = gedf_env;
	return 0;
}