From 28cef80c0b9da0184ef736ae131b6146c5976422 Mon Sep 17 00:00:00 2001
From: Ming Yang <yang@cs.unc.edu>
Date: Thu, 11 Feb 2016 20:31:16 -0500
Subject: Manually patched mc^2 related code

---
 include/litmus/mc2_common.h |   31 +
 include/litmus/rt_param.h   |    4 +
 include/litmus/unistd_32.h  |    3 +-
 include/litmus/unistd_64.h  |    5 +-
 litmus/Makefile             |    3 +-
 litmus/mc2_common.c         |   78 +++
 litmus/reservation.c        |    6 +-
 litmus/sched_mc2.c          | 1634 +++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 1757 insertions(+), 7 deletions(-)
 create mode 100644 include/litmus/mc2_common.h
 create mode 100644 litmus/mc2_common.c
 create mode 100644 litmus/sched_mc2.c

diff --git a/include/litmus/mc2_common.h b/include/litmus/mc2_common.h
new file mode 100644
index 000000000000..e3c0af28f1b9
--- /dev/null
+++ b/include/litmus/mc2_common.h
@@ -0,0 +1,31 @@
+/*
+ * MC^2 common data structures
+ */
+ 
+#ifndef __UNC_MC2_COMMON_H__
+#define __UNC_MC2_COMMON_H__
+
+enum crit_level {
+	CRIT_LEVEL_A = 0,
+	CRIT_LEVEL_B = 1,
+	CRIT_LEVEL_C = 2,
+	NUM_CRIT_LEVELS = 3,
+};
+
+struct mc2_task {
+	enum crit_level crit;
+	unsigned int res_id;
+};
+
+#ifdef __KERNEL__
+
+#include <litmus/reservation.h>
+
+#define tsk_mc2_data(t)		(tsk_rt(t)->mc2_data)
+
+long mc2_task_client_init(struct task_client *tc, struct mc2_task *mc2_param, struct task_struct *tsk,
+							struct reservation *res);
+	
+#endif /* __KERNEL__ */
+
+#endif
\ No newline at end of file
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index e626bbbe60d5..26dfa33c1e5e 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -206,6 +206,7 @@ struct rt_job {
 };
 
 struct pfair_param;
+struct mc2_task;
 
 /*	RT task parameters for scheduling extensions
  *	These parameters are inherited during clone and therefore must
@@ -322,6 +323,9 @@ struct rt_param {
 
 	/* Pointer to the page shared between userspace and kernel. */
 	struct control_page * ctrl_page;
+
+	/* Mixed-criticality specific data */
+	struct mc2_task* mc2_data;
 };
 
 #endif
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 5f6a2749c6a7..202f439a62ae 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -19,5 +19,6 @@
 #define __NR_null_call		__LSC(11)
 #define __NR_reservation_create	__LSC(12)
 #define __NR_reservation_destroy __LSC(13)
+#define __NR_set_mc2_task_param	__LSC(14)
 
-#define NR_litmus_syscalls 14
+#define NR_litmus_syscalls 15
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index 3e6b1d330336..ba2c91c5bf8c 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -33,6 +33,7 @@ __SYSCALL(__NR_null_call, sys_null_call)
 __SYSCALL(__NR_reservation_create, sys_reservation_create)
 #define __NR_reservation_destroy		__LSC(13)
 __SYSCALL(__NR_reservation_destroy, sys_reservation_destroy)
+#define __NR_set_mc2_task_param			__LSC(14)
+__SYSCALL(__NR_set_mc2_task_param, sys_set_mc2_task_param)
 
-
-#define NR_litmus_syscalls 14
+#define NR_litmus_syscalls 15
diff --git a/litmus/Makefile b/litmus/Makefile
index 05021f553eda..70c77b3e9b53 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -35,4 +35,5 @@ obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
 
 obj-y += reservation.o polling_reservations.o
 
-obj-y += sched_pres.o
\ No newline at end of file
+obj-y += sched_pres.o
+obj-y += mc2_common.o sched_mc2.o
diff --git a/litmus/mc2_common.c b/litmus/mc2_common.c
new file mode 100644
index 000000000000..a8ea5d9889f3
--- /dev/null
+++ b/litmus/mc2_common.c
@@ -0,0 +1,78 @@
+/*
+ * litmus/mc2_common.c
+ *
+ * Common functions for MC2 plugin.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+
+#include <litmus/mc2_common.h>
+
+long mc2_task_client_init(struct task_client *tc, struct mc2_task *mc2_param, struct task_struct *tsk, struct reservation *res)
+{
+	task_client_init(tc, tsk, res);
+	if ((mc2_param->crit < CRIT_LEVEL_A) ||
+		(mc2_param->crit > CRIT_LEVEL_C))
+		return -EINVAL;
+	
+	TRACE_TASK(tsk, "mc2_task_client_init: crit_level = %d\n", mc2_param->crit);
+	
+	return 0;
+}
+
+asmlinkage long sys_set_mc2_task_param(pid_t pid, struct mc2_task __user * param)
+{
+	struct task_struct *target;
+	int retval = -EINVAL;
+	struct mc2_task *mp = kzalloc(sizeof(*mp), GFP_KERNEL);
+	
+	if (!mp)
+		return -ENOMEM;
+
+	printk("Setting up mc^2 task parameters for process %d.\n", pid);
+
+	if (pid < 0 || param == 0) {
+		goto out;
+	}
+	if (copy_from_user(mp, param, sizeof(*mp))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	/* Task search and manipulation must be protected */
+	read_lock_irq(&tasklist_lock);
+	if (!(target = find_task_by_vpid(pid))) {
+		retval = -ESRCH;
+		goto out_unlock;
+	}
+
+	if (is_realtime(target)) {
+		/* The task is already a real-time task.
+		 * We cannot not allow parameter changes at this point.
+		 */
+		retval = -EBUSY;
+		goto out_unlock;
+	}
+	if (mp->crit < CRIT_LEVEL_A || mp->crit >= NUM_CRIT_LEVELS) {
+		printk(KERN_INFO "litmus: real-time task %d rejected "
+			"because of invalid criticality level\n", pid);
+		goto out_unlock;
+	}
+	
+	//target->rt_param.plugin_state = mp;
+	target->rt_param.mc2_data = mp;
+
+	retval = 0;
+out_unlock:
+	read_unlock_irq(&tasklist_lock);
+out:
+	return retval;
+}
\ No newline at end of file
diff --git a/litmus/reservation.c b/litmus/reservation.c
index 08c74f9005b3..d11003af279a 100644
--- a/litmus/reservation.c
+++ b/litmus/reservation.c
@@ -217,7 +217,7 @@ static void sup_charge_budget(
 			/* stop at the first ACTIVE reservation */
 			//break;
 	}
-	TRACE("finished charging budgets\n");
+	//TRACE("finished charging budgets\n");
 }
 
 static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
@@ -234,7 +234,7 @@ static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
 			break;
 		}
 	}
-	TRACE("finished replenishing budgets\n");
+	//TRACE("finished replenishing budgets\n");
 
 	/* request a scheduler update at the next replenishment instant */
 	res = list_first_entry_or_null(&sup_env->depleted_reservations,
@@ -252,7 +252,7 @@ void sup_update_time(
 	/* If the time didn't advance, there is nothing to do.
 	 * This check makes it safe to call sup_advance_time() potentially
 	 * multiple times (e.g., via different code paths. */
-	TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
+	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
 	if (unlikely(now <= sup_env->env.current_time))
 		return;
 
diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
new file mode 100644
index 000000000000..09b5ebed2be5
--- /dev/null
+++ b/litmus/sched_mc2.c
@@ -0,0 +1,1634 @@
+/*
+ * litmus/sched_mc2.c
+ *
+ * Implementation of the Mixed-Criticality on MultiCore scheduler
+ *
+ * Thus plugin implements a scheduling algorithm proposed in 
+ * "Mixed-Criticality Real-Time Scheduling for Multicore System" paper.
+ */ 
+ 
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+
+#include <litmus/sched_plugin.h>
+#include <litmus/preempt.h>
+#include <litmus/debug_trace.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/budget.h>
+#include <litmus/litmus_proc.h>
+#include <litmus/sched_trace.h>
+
+#include <litmus/mc2_common.h>
+#include <litmus/reservation.h>
+#include <litmus/polling_reservations.h>
+
+/* _global_env - reservation container for level-C tasks*/
+struct gmp_reservation_environment _global_env;
+
+/* cpu_entry - keep track of a running task on a cpu
+ * This state is used to decide the lowest priority cpu
+ */
+struct cpu_entry {
+	struct task_struct *scheduled;
+	lt_t deadline;
+	int cpu;
+	enum crit_level lv;
+	/* if will_schedule is true, this cpu is already selected and
+	   call mc2_schedule() soon. */
+	bool will_schedule;
+};
+
+/* cpu_priority - a global state for choosing the lowest priority CPU */
+struct cpu_priority {
+	raw_spinlock_t lock;
+	struct cpu_entry cpu_entries[NR_CPUS];
+};
+
+struct cpu_priority _lowest_prio_cpu;
+	
+/* mc2_task_state - a task state structure */
+struct mc2_task_state {
+	struct task_client res_info;
+	/* if cpu == -1, this task is a global task (level C) */
+	int cpu;
+	bool has_departed;
+	struct mc2_task mc2_param;
+};
+
+/* crit_entry - maintain the logically running job (ghost job) */
+struct crit_entry {
+	enum crit_level level;
+	struct task_struct *running;
+};
+
+/* mc2_cpu_state - maintain the scheduled state and ghost jobs
+ * timer : timer for partitioned tasks (level A and B)
+ * g_timer : timer for global tasks (level C)
+ */
+struct mc2_cpu_state {
+	raw_spinlock_t lock;
+
+	struct sup_reservation_environment sup_env;
+	struct hrtimer timer;
+
+	int cpu;
+	struct task_struct* scheduled;
+	struct crit_entry crit_entries[NUM_CRIT_LEVELS];
+};
+
+static DEFINE_PER_CPU(struct mc2_cpu_state, mc2_cpu_state);
+
+#define cpu_state_for(cpu_id)	(&per_cpu(mc2_cpu_state, cpu_id))
+#define local_cpu_state()	(&__get_cpu_var(mc2_cpu_state))
+
+/* get_mc2_state - get the task's state */
+static struct mc2_task_state* get_mc2_state(struct task_struct *tsk)
+{
+	struct mc2_task_state* tinfo;
+	
+	tinfo = (struct mc2_task_state*)tsk_rt(tsk)->plugin_state;
+	
+	if (tinfo)
+		return tinfo;
+	else
+		return NULL;
+}
+
+/* get_task_crit_level - return the criticaility level of a task */
+static enum crit_level get_task_crit_level(struct task_struct *tsk)
+{
+	struct mc2_task *mp;
+	
+	if (!tsk || !is_realtime(tsk))
+		return NUM_CRIT_LEVELS;
+	
+	mp = tsk_rt(tsk)->mc2_data;
+	
+	if (!mp)
+		return NUM_CRIT_LEVELS;
+	else
+		return mp->crit;
+}
+
+/* res_find_by_id - find reservation by id */
+static struct reservation* res_find_by_id(struct mc2_cpu_state *state,
+                                          unsigned int id)
+{
+	struct reservation *res;
+
+	res = sup_find_by_id(&state->sup_env, id);
+	if (!res)
+		res = gmp_find_by_id(&_global_env, id);
+	
+	return res;
+}
+
+/* mc2_update_time - update time for a given criticality level. 
+ *                   caller must hold a proper lock
+ *                   (cpu_state lock or global lock)
+ */
+static void mc2_update_time(enum crit_level lv, 
+                            struct mc2_cpu_state *state, lt_t time)
+{
+	if (lv < CRIT_LEVEL_C)
+		sup_update_time(&state->sup_env, time);
+	else if (lv == CRIT_LEVEL_C)
+		gmp_update_time(&_global_env, time);
+	else
+		TRACE("update_time(): Criticality level error!!!!\n");
+}
+
+/* task_depart - remove a task from its reservation
+ *               If the job has remaining budget, convert it to a ghost job
+ *               and update crit_entries[]
+ *               
+ * @job_complete	indicate whether job completes or not              
+ */
+static void task_departs(struct task_struct *tsk, int job_complete)
+{
+	struct mc2_task_state* tinfo = get_mc2_state(tsk);
+	struct mc2_cpu_state* state = local_cpu_state();
+	struct reservation* res;
+	struct reservation_client *client;
+
+	BUG_ON(!is_realtime(tsk));
+	
+	res    = tinfo->res_info.client.reservation;
+	client = &tinfo->res_info.client;
+
+	res->ops->client_departs(res, client, job_complete);
+	tinfo->has_departed = true;
+	TRACE_TASK(tsk, "CLIENT DEPART with budget %llu\n", res->cur_budget);
+
+	if (job_complete && res->cur_budget) {
+		struct crit_entry* ce;
+		enum crit_level lv = tinfo->mc2_param.crit;
+
+		ce = &state->crit_entries[lv];
+		ce->running = tsk;
+		res->is_ghost = 1;
+		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
+
+	}
+}
+
+/* task_arrive - put a task into its reservation
+ *               If the job was a ghost job, remove it from crit_entries[]
+ */
+static void task_arrives(struct mc2_cpu_state *state, struct task_struct *tsk)
+{
+	struct mc2_task_state* tinfo = get_mc2_state(tsk);
+	struct reservation* res;
+	struct reservation_client *client;
+	enum crit_level lv = get_task_crit_level(tsk);
+
+	res    = tinfo->res_info.client.reservation;
+	client = &tinfo->res_info.client;
+
+	tinfo->has_departed = false;
+	res->ops->client_arrives(res, client);
+
+	sched_trace_task_release(tsk);
+
+	if (lv != NUM_CRIT_LEVELS) {
+		struct crit_entry *ce;
+		ce = &state->crit_entries[lv];
+		/* if the currrent task is a ghost job, remove it */
+		if (ce->running == tsk)
+			ce->running = NULL;
+	}
+}
+
+/* get_lowest_prio_cpu - return the lowest priority cpu
+ *                       This will be used for scheduling level-C tasks.
+ *                       If all CPUs are running tasks which has
+ *                       higher priority than level C, return NO_CPU.
+ */
+static int get_lowest_prio_cpu(lt_t priority)
+{
+	struct cpu_entry *ce;
+	int cpu, ret = NO_CPU;
+	lt_t latest_deadline = 0;
+
+	raw_spin_lock(&_lowest_prio_cpu.lock);
+	ce = &_lowest_prio_cpu.cpu_entries[local_cpu_state()->cpu];
+	if (!ce->will_schedule && !ce->scheduled) {
+		raw_spin_unlock(&_lowest_prio_cpu.lock);
+		TRACE("CPU %d (local) is the lowest!\n", ce->cpu);
+		return ce->cpu;
+	} else {
+		TRACE("Local CPU will_schedule=%d, scheduled=(%s/%d)\n", ce->will_schedule, ce->scheduled ? (ce->scheduled)->comm : "null", ce->scheduled ? (ce->scheduled)->pid : 0);
+	}
+
+	for_each_online_cpu(cpu) {
+		ce = &_lowest_prio_cpu.cpu_entries[cpu];
+		/* If a CPU will call schedule() in the near future, we don't
+		   return that CPU. */
+		TRACE("CPU %d will_schedule=%d, scheduled=(%s/%d:%d)\n", cpu, ce->will_schedule,
+	      ce->scheduled ? (ce->scheduled)->comm : "null",
+	      ce->scheduled ? (ce->scheduled)->pid : 0,
+	      ce->scheduled ? (ce->scheduled)->rt_param.job_params.job_no : 0);
+		if (!ce->will_schedule) {
+			if (!ce->scheduled) {
+				/* Idle cpu, return this. */
+				raw_spin_unlock(&_lowest_prio_cpu.lock);
+				TRACE("CPU %d is the lowest!\n", ce->cpu);
+				return ce->cpu;
+			} else if (ce->lv == CRIT_LEVEL_C &&
+			           ce->deadline > latest_deadline) {
+				latest_deadline = ce->deadline;
+				ret = ce->cpu;
+			}
+		}
+	}
+
+	raw_spin_unlock(&_lowest_prio_cpu.lock);
+
+	if (priority >= latest_deadline)
+		ret = NO_CPU;
+
+	TRACE("CPU %d is the lowest!\n", ret);
+
+	return ret;
+}
+
+/* NOTE: drops state->lock */
+/* mc2_update_timer_and_unlock - set a timer and g_timer and unlock 
+ *                               Whenever res_env.current_time is updated,
+ *                               we check next_scheduler_update and set 
+ *                               a timer.
+ *                               If there exist a global event which is 
+ *                               not armed on any CPU and g_timer is not
+ *                               active, set a g_timer for that event.
+ */
+static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
+{
+	int local;
+	lt_t update, now;
+	enum crit_level lv = get_task_crit_level(state->scheduled);
+	struct next_timer_event *event, *next;
+
+	//TRACE_TASK(state->scheduled, "update_timer!\n");
+	if (lv != NUM_CRIT_LEVELS)
+		TRACE_TASK(state->scheduled, "UPDATE_TIMER LV = %d\n", lv);
+
+	update = state->sup_env.next_scheduler_update;
+	now = state->sup_env.env.current_time;
+
+	/* Be sure we're actually running on the right core,
+	 * as pres_update_timer() is also called from pres_task_resume(),
+	 * which might be called on any CPU when a thread resumes.
+	 */
+	local = local_cpu_state() == state;
+
+	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
+		/* If the event time is already passed, we call schedule() on
+		   the lowest priority cpu */
+		if (event->next_update >= update) {
+			break;
+		}
+
+		if (event->next_update < litmus_clock()) {
+			if (event->timer_armed_on == NO_CPU) {
+				struct reservation *res = gmp_find_by_id(&_global_env, event->id);
+				int cpu = get_lowest_prio_cpu(res?res->priority:0);
+				TRACE("GLOBAL EVENT PASSED!! poking CPU %d to reschedule\n", cpu);
+				list_del(&event->list);
+				kfree(event);
+				if (cpu != NO_CPU) {
+					raw_spin_lock(&_lowest_prio_cpu.lock);
+					_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
+					raw_spin_unlock(&_lowest_prio_cpu.lock);
+					litmus_reschedule(cpu);
+				}
+			}
+		} else if (event->next_update < update && event->timer_armed_on == NO_CPU) {
+			event->timer_armed_on = state->cpu;
+			update = event->next_update;
+			break;
+		}
+	}
+	
+	/* Must drop state lock before calling into hrtimer_start(), which
+	 * may raise a softirq, which in turn may wake ksoftirqd. */
+	raw_spin_unlock(&state->lock);
+	raw_spin_unlock(&_global_env.lock);
+	
+	if (update <= now) {
+		litmus_reschedule(state->cpu);
+	} else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
+		/* Reprogram only if not already set correctly. */
+		if (!hrtimer_active(&state->timer) ||
+		    ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
+			TRACE("canceling timer...at %llu\n", 
+			      ktime_to_ns(hrtimer_get_expires(&state->timer)));
+			hrtimer_cancel(&state->timer);
+			TRACE("setting scheduler timer for %llu\n", update);
+			/* We cannot use hrtimer_start() here because the
+			 * wakeup flag must be set to zero. */
+			__hrtimer_start_range_ns(&state->timer,
+					ns_to_ktime(update),
+					0 /* timer coalescing slack */,
+					HRTIMER_MODE_ABS_PINNED,
+					0 /* wakeup */);
+		}
+	} else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
+		/* Poke remote core only if timer needs to be set earlier than
+		 * it is currently set.
+		 */
+		TRACE("mc2_update_timer for remote CPU %d (update=%llu, "
+		      "active:%d, set:%llu)\n",
+			state->cpu,
+			update,
+			hrtimer_active(&state->timer),
+			ktime_to_ns(hrtimer_get_expires(&state->timer)));
+		if (!hrtimer_active(&state->timer) ||
+		    ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
+			TRACE("poking CPU %d so that it can update its "
+			       "scheduling timer (active:%d, set:%llu)\n",
+			       state->cpu,
+			       hrtimer_active(&state->timer),
+			       ktime_to_ns(hrtimer_get_expires(&state->timer)));
+			litmus_reschedule(state->cpu);
+		}
+	}
+}
+
+/* mc2_update_ghost_state - Update crit_entries[] to track ghost jobs
+ *                          If the budget of a ghost is exhausted,
+ *                          clear is_ghost and reschedule
+ */
+static lt_t mc2_update_ghost_state(struct mc2_cpu_state *state)
+{
+	int lv = 0;
+	struct crit_entry* ce;
+	struct reservation *res;
+	struct mc2_task_state *tinfo;
+	lt_t ret = ULLONG_MAX;
+	
+	BUG_ON(!state);
+	
+	for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
+		ce = &state->crit_entries[lv];
+		if (ce->running != NULL) {
+//printk(KERN_ALERT "P%d ce->running : %s/%d\n", state->cpu,  ce->running ? (ce->running)->comm : "null", ce->running ? (ce->running)->pid : 0);
+			tinfo = get_mc2_state(ce->running);
+			if (!tinfo)
+				continue;
+			
+			res = res_find_by_id(state, tinfo->mc2_param.res_id);
+			BUG_ON(!res);
+//printk(KERN_ALERT "R%d found!\n", res->id);			
+			TRACE("LV %d running id %d budget %llu\n", 
+			       lv, tinfo->mc2_param.res_id, res->cur_budget);
+			/* If the budget is exhausted, clear is_ghost and reschedule */
+			if (!res->cur_budget) {
+				struct sup_reservation_environment* sup_env = &state->sup_env;
+				
+				TRACE("GHOST FINISH id %d at %llu\n", 
+				      tinfo->mc2_param.res_id, litmus_clock());
+				ce->running = NULL;
+				res->is_ghost = 0;
+				
+				if (lv < CRIT_LEVEL_C) {
+					res = list_first_entry_or_null(
+					      &sup_env->active_reservations, 
+						  struct reservation, list);
+					if (res)
+						litmus_reschedule_local();
+				} else if (lv == CRIT_LEVEL_C) {
+					res = list_first_entry_or_null(
+					      &_global_env.active_reservations,
+						  struct reservation, list);
+					if (res)
+						litmus_reschedule(state->cpu);
+				}
+			} else {
+				//TRACE("GHOST NOT FINISH id %d budget %llu\n", res->id, res->cur_budget);
+				//gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
+				if (ret > res->cur_budget) {
+					ret = res->cur_budget;
+				}
+			}
+		}
+	}
+	
+	return ret;
+}			
+
+/* update_cpu_prio - Update cpu's priority
+ *                   When a cpu picks a new task, call this function
+ *                   to update cpu priorities.
+ */
+static void update_cpu_prio(struct mc2_cpu_state *state)
+{
+	struct cpu_entry *ce = &_lowest_prio_cpu.cpu_entries[state->cpu];
+	enum crit_level lv = get_task_crit_level(state->scheduled);
+	
+	if (!state->scheduled) {
+		/* cpu is idle. */
+		ce->scheduled = NULL;
+		ce->deadline = ULLONG_MAX;
+		ce->lv = NUM_CRIT_LEVELS;
+	} else if (lv == CRIT_LEVEL_C) {
+		ce->scheduled = state->scheduled;
+		ce->deadline = get_deadline(state->scheduled);
+		ce->lv = lv;
+	} else if (lv < CRIT_LEVEL_C) {
+		/* If cpu is running level A or B tasks, it is not eligible
+		   to run level-C tasks */
+		ce->scheduled = state->scheduled;
+		ce->deadline = 0;
+		ce->lv = lv;
+	}
+};
+
+/* on_scheduling_timer - timer event for partitioned tasks
+ */                       
+static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
+{
+	unsigned long flags;
+	enum hrtimer_restart restart = HRTIMER_NORESTART;
+	struct mc2_cpu_state *state;
+	lt_t update, now;
+	int global_schedule_now;
+	lt_t remain_budget;
+	
+	state = container_of(timer, struct mc2_cpu_state, timer);
+
+	/* The scheduling timer should only fire on the local CPU, because
+	 * otherwise deadlocks via timer_cancel() are possible.
+	 * Note: this does not interfere with dedicated interrupt handling, as
+	 * even under dedicated interrupt handling scheduling timers for
+	 * budget enforcement must occur locally on each CPU.
+	 */
+	BUG_ON(state->cpu != raw_smp_processor_id());
+
+	TRACE("TIMER FIRED at %llu\n", litmus_clock());
+	raw_spin_lock_irqsave(&_global_env.lock, flags);
+	raw_spin_lock(&state->lock);
+//printk(KERN_ALERT "P%d on_scheduling_timer() hold lock %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);			
+	now = litmus_clock();
+	sup_update_time(&state->sup_env, now);
+	global_schedule_now = gmp_update_time(&_global_env, now);
+//printk(KERN_ALERT "P%d update_time in timer() %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);			
+	remain_budget = mc2_update_ghost_state(state);
+	
+	update = state->sup_env.next_scheduler_update;
+	now = state->sup_env.env.current_time;
+
+	if (remain_budget != ULLONG_MAX && update > now + remain_budget) {
+		update = now + remain_budget;
+	}
+	
+	//TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
+//printk(KERN_ALERT "on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
+	if (update <= now) {
+		litmus_reschedule_local();
+	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
+		hrtimer_set_expires(timer, ns_to_ktime(update));
+		restart = HRTIMER_RESTART;
+	}
+
+	BUG_ON(global_schedule_now < 0 || global_schedule_now > 4);
+	
+	/* Find the lowest cpu, and call reschedule */
+	while (global_schedule_now--) {
+		int cpu = get_lowest_prio_cpu(0);
+		if (cpu != NO_CPU) {
+			raw_spin_lock(&_lowest_prio_cpu.lock);
+			_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
+			raw_spin_unlock(&_lowest_prio_cpu.lock);
+			//TRACE("LOWEST CPU = P%d\n", cpu);
+			litmus_reschedule(cpu);
+		}
+	} 
+	
+	raw_spin_unlock(&state->lock);
+	raw_spin_unlock_irqrestore(&_global_env.lock, flags);
+//printk(KERN_ALERT "P%d on_scheduling_timer() release lock %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);	
+	return restart;
+}
+
+/* mc2_dispatch - Select the next task to schedule.
+ */
+struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, struct mc2_cpu_state* state)
+{
+	struct reservation *res, *next;
+	struct task_struct *tsk = NULL;
+	struct crit_entry *ce;
+	enum crit_level lv;
+	lt_t time_slice, cur_priority;
+
+	list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
+		if (res->state == RESERVATION_ACTIVE) {
+			tsk = res->ops->dispatch_client(res, &time_slice);
+			if (likely(tsk)) {
+				lv = get_task_crit_level(tsk);
+				if (lv == NUM_CRIT_LEVELS) {
+					sup_scheduler_update_after(sup_env, res->cur_budget);
+					return tsk;
+				} else {
+					ce = &state->crit_entries[lv];
+					if (likely(!ce->running)) {
+						/* If we found the next task, clear all flags */
+						sup_scheduler_update_after(sup_env, res->cur_budget);
+						res->blocked_by_ghost = 0;
+						res->is_ghost = 0;
+						return tsk;
+					} else {
+						/* We cannot schedule the same criticality task
+						   because the ghost job exists. Set blocked_by_ghost
+						   flag not to charge budget */
+						res->blocked_by_ghost = 1;
+						TRACE_TASK(ce->running, " is GHOST\n");
+					}
+				}
+			}
+		}
+	}
+	
+	/* no eligible level A or B tasks exists */
+	/* check the ghost job */
+	ce = &state->crit_entries[CRIT_LEVEL_C];
+	if (ce->running) {
+		TRACE_TASK(ce->running," is GHOST\n");
+		return NULL;
+	}
+	
+	cur_priority = _lowest_prio_cpu.cpu_entries[state->cpu].deadline;
+	
+	TRACE("****** ACTIVE LIST ******\n");
+	TRACE_TASK(_lowest_prio_cpu.cpu_entries[state->cpu].scheduled, "** CURRENT JOB deadline %llu **\n", cur_priority);
+	list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
+		TRACE("R%d deadline=%llu, scheduled_on=%d\n", res->id, res->priority, res->scheduled_on);
+		if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) {
+			tsk = res->ops->dispatch_client(res, &time_slice);
+			if (likely(tsk)) {
+				lv = get_task_crit_level(tsk);
+				if (lv == NUM_CRIT_LEVELS) {
+					gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
+					//res->event_added = 1;
+					return tsk;
+				} else if (lv == CRIT_LEVEL_C) {
+					//ce = &state->crit_entries[lv];
+					//if (likely(!ce->running)) {
+						gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
+						res->event_added = 1;
+						res->blocked_by_ghost = 0;
+						res->is_ghost = 0;
+						res->scheduled_on = state->cpu;
+						return tsk;
+					//} else {
+					//	res->blocked_by_ghost = 1;
+					//	TRACE_TASK(ce->running, " is GHOST\n");
+					//	return NULL;
+					//}
+				} else {
+					BUG();
+				}
+			}
+		}
+	}
+	
+	return NULL;
+}
+
+/* not used now */
+static void pre_schedule(struct task_struct *prev)
+{
+	enum crit_level lv;
+	if (!is_realtime(prev) || !prev)
+		return;
+	
+	lv = get_task_crit_level(prev);
+}
+
+/* not used now */
+static void post_schedule(struct task_struct *next)
+{
+	enum crit_level lv;
+	if (!is_realtime(next) || !next)
+		return;
+	
+	lv = get_task_crit_level(next);
+}
+
+/* mc2_schedule - main scheduler function. pick the next task to run
+ */
+static struct task_struct* mc2_schedule(struct task_struct * prev)
+{
+	/* next == NULL means "schedule background work". */
+	lt_t now;
+	struct mc2_cpu_state *state = local_cpu_state();
+
+	pre_schedule(prev);
+
+	raw_spin_lock(&_global_env.lock);
+	raw_spin_lock(&state->lock);
+
+	//BUG_ON(state->scheduled && state->scheduled != prev);
+	//BUG_ON(state->scheduled && !is_realtime(prev));
+	if (state->scheduled && state->scheduled != prev)
+		printk(KERN_ALERT "BUG1!!!!!!!! %s %s\n", state->scheduled ? (state->scheduled)->comm : "null", prev ? (prev)->comm : "null");
+	if (state->scheduled && !is_realtime(prev))
+		printk(KERN_ALERT "BUG2!!!!!!!! \n");
+
+	/* update time */
+	state->sup_env.will_schedule = true;
+
+	now = litmus_clock();
+	sup_update_time(&state->sup_env, now);
+	gmp_update_time(&_global_env, now);
+
+	mc2_update_ghost_state(state);
+
+	/* remove task from reservation if it blocks */
+	if (is_realtime(prev) && !is_running(prev))
+		task_departs(prev, is_completed(prev));
+
+	raw_spin_lock(&_lowest_prio_cpu.lock);
+	_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule = false;
+
+	/* figure out what to schedule next */
+	state->scheduled = mc2_dispatch(&state->sup_env, state);
+	if (state->scheduled && is_realtime(state->scheduled))
+		TRACE_TASK(state->scheduled, "mc2_dispatch picked me!\n");
+
+	update_cpu_prio(state);
+	raw_spin_unlock(&_lowest_prio_cpu.lock);
+
+	/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
+	sched_state_task_picked();
+
+	/* program scheduler timer */
+	state->sup_env.will_schedule = false;
+
+	/* NOTE: drops state->lock */
+	mc2_update_timer_and_unlock(state);
+
+
+
+	if (prev != state->scheduled && is_realtime(prev)) {
+		struct mc2_task_state* tinfo = get_mc2_state(prev);
+		struct reservation* res = tinfo->res_info.client.reservation;
+		TRACE_TASK(prev, "PREV JOB scheduled_on = P%d\n", res->scheduled_on);
+		res->scheduled_on = NO_CPU;
+		TRACE_TASK(prev, "descheduled.\n");
+		/* if prev is preempted and a global task, find the lowest cpu and reschedule */
+		if (tinfo->has_departed == false && get_task_crit_level(prev) == CRIT_LEVEL_C) {
+			int cpu;
+			raw_spin_lock(&_global_env.lock);
+			cpu = get_lowest_prio_cpu(res?res->priority:0);
+			//TRACE("LEVEL-C TASK PREEMPTED!! poking CPU %d to reschedule\n", cpu);
+			if (cpu != NO_CPU) {
+				raw_spin_lock(&_lowest_prio_cpu.lock);
+				_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
+				raw_spin_unlock(&_lowest_prio_cpu.lock);
+				litmus_reschedule(cpu);
+			}
+			raw_spin_unlock(&_global_env.lock);
+		}
+	}
+	if (state->scheduled) {
+		TRACE_TASK(state->scheduled, "scheduled.\n");
+	}
+
+	post_schedule(state->scheduled);
+
+	return state->scheduled;
+}
+
+static void resume_legacy_task_model_updates(struct task_struct *tsk)
+{
+	lt_t now;
+	if (is_sporadic(tsk)) {
+		/* If this sporadic task was gone for a "long" time and woke up past
+		 * its deadline, then give it a new budget by triggering a job
+		 * release. This is purely cosmetic and has no effect on the
+		 * P-RES scheduler. */
+
+		now = litmus_clock();
+		if (is_tardy(tsk, now)) {
+			release_at(tsk, now);
+			sched_trace_task_release(tsk);
+		}
+	}
+}
+
+/* mc2_task_resume - Called when the state of tsk changes back to
+ *                   TASK_RUNNING. We need to requeue the task.
+ */
+static void mc2_task_resume(struct task_struct  *tsk)
+{
+	unsigned long flags;
+	struct mc2_task_state* tinfo = get_mc2_state(tsk);
+	struct mc2_cpu_state *state;
+
+	TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
+
+	local_irq_save(flags);
+	if (tinfo->cpu != -1)
+		state = cpu_state_for(tinfo->cpu);
+	else
+		state = local_cpu_state();
+
+	raw_spin_lock(&_global_env.lock);
+//printk(KERN_ALERT "P%d resume() hold lock\n", state->cpu);	
+	/* Requeue only if self-suspension was already processed. */
+	if (tinfo->has_departed)
+	{
+		raw_spin_lock(&state->lock);
+		/* Assumption: litmus_clock() is synchronized across cores,
+		 * since we might not actually be executing on tinfo->cpu
+		 * at the moment. */
+		if (tinfo->cpu != -1) {
+			sup_update_time(&state->sup_env, litmus_clock());
+		} else {
+			//TRACE("RESUME UPDATE ####\n");
+			gmp_update_time(&_global_env, litmus_clock());
+			//TRACE("RESUME UPDATE $$$$\n");
+		}
+			
+		mc2_update_ghost_state(state);
+		task_arrives(state, tsk);
+		/* NOTE: drops state->lock */
+		TRACE_TASK(tsk, "mc2_resume()\n");
+		mc2_update_timer_and_unlock(state);	
+//printk(KERN_ALERT "P%d resume() dropped lock\n", state->cpu);			
+	} else {
+		TRACE_TASK(tsk, "resume event ignored, still scheduled\n");
+		raw_spin_unlock(&_global_env.lock);
+//printk(KERN_ALERT "P%d resume() release lock\n", state->cpu);			
+	}
+
+	local_irq_restore(flags);
+	
+	resume_legacy_task_model_updates(tsk);
+}
+
+/* mc2_complete_job - syscall backend for job completions
+ */
+static long mc2_complete_job(void)
+{
+	ktime_t next_release;
+	long err;
+
+	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
+		get_deadline(current));
+
+	tsk_rt(current)->completed = 1;
+	
+	/* If this the first job instance, we need to reset replenish
+	   time to the next release time */
+	if (tsk_rt(current)->sporadic_release) {
+		struct mc2_cpu_state *state;
+		struct reservation_environment *env;
+		struct mc2_task_state *tinfo;
+		struct reservation *res;
+		unsigned long flags;
+
+		preempt_disable();
+		local_irq_save(flags);
+		
+		tinfo = get_mc2_state(current);
+		
+		if (get_task_crit_level(current) < CRIT_LEVEL_C)
+			state = cpu_state_for(tinfo->cpu);
+		else
+			state = local_cpu_state();
+		
+		raw_spin_lock(&_global_env.lock);
+		raw_spin_lock(&state->lock);
+//printk(KERN_ALERT "P%d complete() hold lock\n", state->cpu);
+		env = &(state->sup_env.env);
+		
+		res = res_find_by_id(state, tinfo->mc2_param.res_id);
+		
+		if (get_task_crit_level(current) < CRIT_LEVEL_C) {
+			env->time_zero = tsk_rt(current)->sporadic_release_time;
+		} else {
+			_global_env.env.time_zero = tsk_rt(current)->sporadic_release_time;
+		}
+		
+		/* set next_replenishtime to synchronous release time */
+		res->next_replenishment = tsk_rt(current)->sporadic_release_time;
+		
+		if (get_task_crit_level(current) == CRIT_LEVEL_A) {
+			struct table_driven_reservation *tdres;
+			tdres = container_of(res, struct table_driven_reservation, res);
+			tdres->next_interval = 0;
+			tdres->major_cycle_start = tsk_rt(current)->sporadic_release_time;
+			res->next_replenishment += tdres->intervals[0].start;			
+		}
+		res->cur_budget = 0;
+		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
+		
+		//TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
+		
+		raw_spin_unlock(&state->lock);
+		raw_spin_unlock(&_global_env.lock);
+//printk(KERN_ALERT "P%d complete() release lock\n", state->cpu);				
+		local_irq_restore(flags);
+		preempt_enable();
+	}
+	sched_trace_task_completion(current, 0);
+	
+	/* update the next release time and deadline */
+	prepare_for_next_period(current);
+	
+	next_release = ns_to_ktime(get_release(current));
+	preempt_disable();
+	TRACE_CUR("next_release=%llu\n", get_release(current));
+	if (get_release(current) > litmus_clock()) {
+		/* sleep until next_release */
+		set_current_state(TASK_INTERRUPTIBLE);
+		preempt_enable_no_resched();
+		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
+		if (get_task_crit_level(current) == CRIT_LEVEL_A)
+			sched_trace_task_release(current);
+	} else {
+		/* release the next job immediately */
+		err = 0;
+		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
+		preempt_enable();
+		if (get_task_crit_level(current) == CRIT_LEVEL_A)
+			sched_trace_task_release(current);
+	}
+
+	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
+
+	return err;
+}
+
+/* mc2_admit_task - Setup mc2 task parameters
+ */
+static long mc2_admit_task(struct task_struct *tsk)
+{
+	long err = -ESRCH;
+	unsigned long flags;
+	struct reservation *res;
+	struct mc2_cpu_state *state;
+	struct mc2_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
+	struct mc2_task *mp = tsk_rt(tsk)->mc2_data;
+	enum crit_level lv;
+	
+	if (!tinfo)
+		return -ENOMEM;
+
+	if (!mp) {
+		printk(KERN_ERR "mc2_admit_task: criticality level has not been set\n");
+		return err;
+	}
+	
+	lv = mp->crit;
+	preempt_disable();
+
+	if (lv < CRIT_LEVEL_C) {
+		state = cpu_state_for(task_cpu(tsk));
+		raw_spin_lock_irqsave(&state->lock, flags);
+
+		res = sup_find_by_id(&state->sup_env, mp->res_id);
+
+		/* found the appropriate reservation */
+		if (res) {
+			TRACE_TASK(tsk, "SUP FOUND RES ID\n");
+			tinfo->mc2_param.crit = mp->crit;
+			tinfo->mc2_param.res_id = mp->res_id;
+		
+			/* initial values */
+			err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
+			tinfo->cpu = task_cpu(tsk);
+			tinfo->has_departed = true;
+			tsk_rt(tsk)->plugin_state = tinfo;
+
+			/* disable LITMUS^RT's per-thread budget enforcement */
+			tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
+		}
+
+		raw_spin_unlock_irqrestore(&state->lock, flags);
+	} else if (lv == CRIT_LEVEL_C) {
+		raw_spin_lock_irqsave(&_global_env.lock, flags);
+//printk(KERN_ALERT "admit() hold lock\n");		
+		state = local_cpu_state();
+		
+		raw_spin_lock(&state->lock);
+		
+		res = gmp_find_by_id(&_global_env, mp->res_id);
+
+		/* found the appropriate reservation (or vCPU) */
+		if (res) {
+			TRACE_TASK(tsk, "GMP FOUND RES ID\n");
+			tinfo->mc2_param.crit = mp->crit;
+			tinfo->mc2_param.res_id = mp->res_id;
+			
+			/* initial values */
+			err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
+			tinfo->cpu = -1;
+			tinfo->has_departed = true;
+			tsk_rt(tsk)->plugin_state = tinfo;
+
+			/* disable LITMUS^RT's per-thread budget enforcement */
+			tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
+		}
+
+		raw_spin_unlock(&state->lock);
+		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
+//printk(KERN_ALERT "admit() release lock\n");		
+	}
+	
+	preempt_enable();
+
+	if (err)
+		kfree(tinfo);
+
+	return err;
+}
+
+/* mc2_task_new - A new real-time job is arrived. Release the next job
+ *                at the next reservation replenish time
+ */
+static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
+			  int is_running)
+{
+	unsigned long flags;
+	struct mc2_task_state* tinfo = get_mc2_state(tsk);
+	struct mc2_cpu_state *state; // = cpu_state_for(tinfo->cpu);
+	struct reservation *res;
+	enum crit_level lv = get_task_crit_level(tsk);
+	lt_t release = 0;
+
+	TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
+		   litmus_clock(), on_runqueue, is_running);
+
+	local_irq_save(flags);
+	if (tinfo->cpu == -1)
+		state = local_cpu_state();
+	else 
+		state = cpu_state_for(tinfo->cpu);
+	
+	/* acquire the lock protecting the state and disable interrupts */
+	raw_spin_lock(&_global_env.lock);
+	raw_spin_lock(&state->lock);
+//printk(KERN_ALERT "new() hold lock R%d\n", tinfo->mc2_param.res_id);	
+	if (is_running) {
+		state->scheduled = tsk;
+		/* make sure this task should actually be running */
+		litmus_reschedule_local();
+	}
+	
+	res = res_find_by_id(state, tinfo->mc2_param.res_id);
+	release = res->next_replenishment;
+	
+	if (on_runqueue || is_running) {
+		/* Assumption: litmus_clock() is synchronized across cores
+		 * [see comment in pres_task_resume()] */
+		mc2_update_time(lv, state, litmus_clock());
+		mc2_update_ghost_state(state);
+		task_arrives(state, tsk);
+		/* NOTE: drops state->lock */
+		TRACE("mc2_new()\n");
+		
+		mc2_update_timer_and_unlock(state);
+//printk(KERN_ALERT "new() dropped lock R%d\n",tinfo->mc2_param.res_id);		
+	} else {
+		raw_spin_unlock(&state->lock);
+		raw_spin_unlock(&_global_env.lock);
+//printk(KERN_ALERT "new() release lock R%d\n",tinfo->mc2_param.res_id);		
+	}
+	local_irq_restore(flags);
+	
+	if (!release) {
+		TRACE_TASK(tsk, "mc2_task_new() next_release = %llu\n", release);
+		release_at(tsk, release);
+	}
+	else
+		TRACE_TASK(tsk, "mc2_task_new() next_release = NULL\n");
+}
+
+/* mc2_reservation_destroy - reservation_destroy system call backend
+ */
+static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
+{
+	long ret = -EINVAL;
+	struct mc2_cpu_state *state;
+	struct reservation *res = NULL, *next;
+	struct sup_reservation_environment *sup_env;
+	int found = 0;
+	enum crit_level lv = get_task_crit_level(current);
+	unsigned long flags;
+	
+	if (cpu == -1) {
+		/* if the reservation is global reservation */
+		local_irq_save(flags);
+		state = local_cpu_state();
+		raw_spin_lock(&_global_env.lock);
+		raw_spin_lock(&state->lock);
+		
+		list_for_each_entry_safe(res, next, &_global_env.depleted_reservations, list) {
+			if (res->id == reservation_id) {
+				TRACE("DESTROY RES FOUND!!!\n");
+				list_del(&res->list);
+				kfree(res);
+				found = 1;
+				ret = 0;
+			}
+		}
+		if (!found) {
+			list_for_each_entry_safe(res, next, &_global_env.inactive_reservations, list) {
+				if (res->id == reservation_id) {
+					TRACE("DESTROY RES FOUND!!!\n");
+					list_del(&res->list);
+					kfree(res);
+					found = 1;
+					ret = 0;
+				}
+			}
+		}
+		if (!found) {
+			list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
+				if (res->id == reservation_id) {
+					TRACE("DESTROY RES FOUND!!!\n");
+					list_del(&res->list);
+					kfree(res);
+					found = 1;
+					ret = 0;
+				}
+			}
+		}
+
+		raw_spin_unlock(&state->lock);
+		raw_spin_unlock(&_global_env.lock);
+		local_irq_restore(flags);
+	} else {
+		/* if the reservation is partitioned reservation */
+		state = cpu_state_for(cpu);
+		raw_spin_lock_irqsave(&state->lock, flags);
+		
+	//	res = sup_find_by_id(&state->sup_env, reservation_id);
+		sup_env = &state->sup_env;
+		list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
+			if (res->id == reservation_id) {
+				if (lv == CRIT_LEVEL_A) {
+					struct table_driven_reservation *tdres;
+					tdres = container_of(res, struct table_driven_reservation, res);
+					kfree(tdres->intervals);
+				}
+				list_del(&res->list);
+				kfree(res);
+				found = 1;
+				ret = 0;
+			}
+		}
+		if (!found) {
+			list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
+				if (res->id == reservation_id) {
+					if (lv == CRIT_LEVEL_A) {
+						struct table_driven_reservation *tdres;
+						tdres = container_of(res, struct table_driven_reservation, res);
+						kfree(tdres->intervals);
+					}
+					list_del(&res->list);
+					kfree(res);
+					found = 1;
+					ret = 0;
+				}
+			}
+		}
+		if (!found) {
+			list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
+				if (res->id == reservation_id) {
+					if (lv == CRIT_LEVEL_A) {
+						struct table_driven_reservation *tdres;
+						tdres = container_of(res, struct table_driven_reservation, res);
+						kfree(tdres->intervals);
+					}
+					list_del(&res->list);
+					kfree(res);
+					found = 1;
+					ret = 0;
+				}
+			}
+		}
+
+		raw_spin_unlock_irqrestore(&state->lock, flags);
+	}
+	
+	TRACE("RESERVATION_DESTROY ret = %d\n", ret);
+	return ret;
+}
+
+/* mc2_task_exit - Task became a normal task (not real-time task)
+ */
+static void mc2_task_exit(struct task_struct *tsk)
+{
+	unsigned long flags;
+	struct mc2_task_state* tinfo = get_mc2_state(tsk);
+	struct mc2_cpu_state *state;
+	enum crit_level lv = tinfo->mc2_param.crit;
+	struct crit_entry* ce;
+	int cpu;
+
+	local_irq_save(flags);
+	if (tinfo->cpu != -1)
+		state = cpu_state_for(tinfo->cpu);
+	else
+		state = local_cpu_state();
+		
+	raw_spin_lock(&_global_env.lock);
+	raw_spin_lock(&state->lock);
+	
+	if (state->scheduled == tsk)
+		state->scheduled = NULL;
+
+	ce = &state->crit_entries[lv];
+	if (ce->running == tsk)
+		ce->running = NULL;
+	
+	/* remove from queues */
+	if (is_running(tsk)) {
+		/* Assumption: litmus_clock() is synchronized across cores
+		 * [see comment in pres_task_resume()] */
+		
+		/* update both global and partitioned */
+		mc2_update_time(lv, state, litmus_clock());
+		mc2_update_ghost_state(state);
+		task_departs(tsk, 0);
+		
+		/* NOTE: drops state->lock */
+		TRACE("mc2_exit()\n");
+
+		mc2_update_timer_and_unlock(state);	
+	} else {
+		raw_spin_unlock(&state->lock);
+		raw_spin_unlock(&_global_env.lock);
+	}
+
+	for_each_online_cpu(cpu) {
+		state = cpu_state_for(cpu);
+		if (state == local_cpu_state())
+			continue;
+		raw_spin_lock(&state->lock);
+		
+		if (state->scheduled == tsk)
+			state->scheduled = NULL;
+		
+		ce = &state->crit_entries[lv];
+		if (ce->running == tsk)
+			ce->running = NULL;
+		
+		raw_spin_unlock(&state->lock);
+	}
+	
+	local_irq_restore(flags);
+	
+	kfree(tsk_rt(tsk)->plugin_state);
+	tsk_rt(tsk)->plugin_state = NULL;
+	kfree(tsk_rt(tsk)->mc2_data);
+	tsk_rt(tsk)->mc2_data = NULL;
+}
+
+/* create_polling_reservation - create a new polling reservation
+ */
+static long create_polling_reservation(
+	int res_type,
+	struct reservation_config *config)
+{
+	struct mc2_cpu_state *state;
+	struct reservation* res;
+	struct polling_reservation *pres;
+	unsigned long flags;
+	int use_edf  = config->priority == LITMUS_NO_PRIORITY;
+	int periodic =  res_type == PERIODIC_POLLING;
+	long err = -EINVAL;
+
+	/* sanity checks */
+	if (config->polling_params.budget >
+	    config->polling_params.period) {
+		printk(KERN_ERR "invalid polling reservation (%u): "
+		       "budget > period\n", config->id);
+		return -EINVAL;
+	}
+	if (config->polling_params.budget >
+	    config->polling_params.relative_deadline
+	    && config->polling_params.relative_deadline) {
+		printk(KERN_ERR "invalid polling reservation (%u): "
+		       "budget > deadline\n", config->id);
+		return -EINVAL;
+	}
+	if (config->polling_params.offset >
+	    config->polling_params.period) {
+		printk(KERN_ERR "invalid polling reservation (%u): "
+		       "offset > period\n", config->id);
+		return -EINVAL;
+	}
+
+	/* Allocate before we grab a spin lock.
+	 * Todo: would be nice to use a core-local allocation.
+	 */
+	pres = kzalloc(sizeof(*pres), GFP_KERNEL);
+	if (!pres)
+		return -ENOMEM;
+
+	if (config->cpu != -1) {
+		
+		raw_spin_lock_irqsave(&_global_env.lock, flags);
+		state = cpu_state_for(config->cpu);
+		raw_spin_lock(&state->lock);
+
+		res = sup_find_by_id(&state->sup_env, config->id);
+		if (!res) {
+			polling_reservation_init(pres, use_edf, periodic,
+				config->polling_params.budget,
+				config->polling_params.period,
+				config->polling_params.relative_deadline,
+				config->polling_params.offset);
+			pres->res.id = config->id;
+			pres->res.blocked_by_ghost = 0;
+			pres->res.is_ghost = 0;
+			if (!use_edf)
+				pres->res.priority = config->priority;
+			sup_add_new_reservation(&state->sup_env, &pres->res);
+			err = config->id;
+		} else {
+			err = -EEXIST;
+		}
+
+		raw_spin_unlock(&state->lock);
+		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
+
+	} else {
+		raw_spin_lock_irqsave(&_global_env.lock, flags);
+		
+		res = gmp_find_by_id(&_global_env, config->id);
+		if (!res) {
+			polling_reservation_init(pres, use_edf, periodic,
+				config->polling_params.budget,
+				config->polling_params.period,
+				config->polling_params.relative_deadline,
+				config->polling_params.offset);
+			pres->res.id = config->id;
+			pres->res.blocked_by_ghost = 0;
+			pres->res.scheduled_on = NO_CPU;
+			pres->res.is_ghost = 0;
+			if (!use_edf)
+				pres->res.priority = config->priority;
+			gmp_add_new_reservation(&_global_env, &pres->res);
+			TRACE("GMP_ADD_NEW_RESERVATION R%d\n", pres->res.id);
+			err = config->id;
+		} else {
+			err = -EEXIST;
+		}
+		raw_spin_unlock_irqrestore(&_global_env.lock, flags);		
+	}
+	
+	if (err < 0)
+		kfree(pres);
+
+	return err;
+}
+
+#define MAX_INTERVALS 1024
+
+/* create_table_driven_reservation - create a table_driven reservation
+ */
+static long create_table_driven_reservation(
+	struct reservation_config *config)
+{
+	struct mc2_cpu_state *state;
+	struct reservation* res;
+	struct table_driven_reservation *td_res = NULL;
+	struct lt_interval *slots = NULL;
+	size_t slots_size;
+	unsigned int i, num_slots;
+	unsigned long flags;
+	long err = -EINVAL;
+
+
+	if (!config->table_driven_params.num_intervals) {
+		printk(KERN_ERR "invalid table-driven reservation (%u): "
+		       "no intervals\n", config->id);
+		return -EINVAL;
+	}
+
+	if (config->table_driven_params.num_intervals > MAX_INTERVALS) {
+		printk(KERN_ERR "invalid table-driven reservation (%u): "
+		       "too many intervals (max: %d)\n", config->id, MAX_INTERVALS);
+		return -EINVAL;
+	}
+
+	num_slots = config->table_driven_params.num_intervals;
+	slots_size = sizeof(slots[0]) * num_slots;
+	slots = kzalloc(slots_size, GFP_KERNEL);
+	if (!slots)
+		return -ENOMEM;
+
+	td_res = kzalloc(sizeof(*td_res), GFP_KERNEL);
+	if (!td_res)
+		err = -ENOMEM;
+	else
+		err = copy_from_user(slots,
+			config->table_driven_params.intervals, slots_size);
+
+	if (!err) {
+		/* sanity checks */
+		for (i = 0; !err && i < num_slots; i++)
+			if (slots[i].end <= slots[i].start) {
+				printk(KERN_ERR
+				       "invalid table-driven reservation (%u): "
+				       "invalid interval %u => [%llu, %llu]\n",
+				       config->id, i,
+				       slots[i].start, slots[i].end);
+				err = -EINVAL;
+			}
+
+		for (i = 0; !err && i + 1 < num_slots; i++)
+			if (slots[i + 1].start <= slots[i].end) {
+				printk(KERN_ERR
+				       "invalid table-driven reservation (%u): "
+				       "overlapping intervals %u, %u\n",
+				       config->id, i, i + 1);
+				err = -EINVAL;
+			}
+
+		if (slots[num_slots - 1].end >
+			config->table_driven_params.major_cycle_length) {
+			printk(KERN_ERR
+				"invalid table-driven reservation (%u): last "
+				"interval ends past major cycle %llu > %llu\n",
+				config->id,
+				slots[num_slots - 1].end,
+				config->table_driven_params.major_cycle_length);
+			err = -EINVAL;
+		}
+	}
+
+	if (!err) {
+		state = cpu_state_for(config->cpu);
+		raw_spin_lock_irqsave(&state->lock, flags);
+
+		res = sup_find_by_id(&state->sup_env, config->id);
+		if (!res) {
+			table_driven_reservation_init(td_res,
+				config->table_driven_params.major_cycle_length,
+				slots, num_slots);
+			td_res->res.id = config->id;
+			td_res->res.priority = config->priority;
+			td_res->res.blocked_by_ghost = 0;
+			sup_add_new_reservation(&state->sup_env, &td_res->res);
+			err = config->id;
+		} else {
+			err = -EEXIST;
+		}
+
+		raw_spin_unlock_irqrestore(&state->lock, flags);
+	}
+
+	if (err < 0) {
+		kfree(slots);
+		kfree(td_res);
+	}
+
+	return err;
+}
+
+/* mc2_reservation_create - reservation_create system call backend
+ */
+static long mc2_reservation_create(int res_type, void* __user _config)
+{
+	long ret = -EINVAL;
+	struct reservation_config config;
+
+	TRACE("Attempt to create reservation (%d)\n", res_type);
+
+	if (copy_from_user(&config, _config, sizeof(config)))
+		return -EFAULT;
+
+	if (config.cpu != -1) {
+		if (config.cpu < 0 || !cpu_online(config.cpu)) {
+			printk(KERN_ERR "invalid polling reservation (%u): "
+				   "CPU %d offline\n", config.id, config.cpu);
+			return -EINVAL;
+		}
+	}
+
+	switch (res_type) {
+		case PERIODIC_POLLING:
+		case SPORADIC_POLLING:
+			ret = create_polling_reservation(res_type, &config);
+			break;
+
+		case TABLE_DRIVEN:
+			ret = create_table_driven_reservation(&config);
+			break;
+
+		default:
+			return -EINVAL;
+	};
+
+	return ret;
+}
+
+static struct domain_proc_info mc2_domain_proc_info;
+
+static long mc2_get_domain_proc_info(struct domain_proc_info **ret)
+{
+	*ret = &mc2_domain_proc_info;
+	return 0;
+}
+
+static void mc2_setup_domain_proc(void)
+{
+	int i, cpu;
+	int num_rt_cpus = num_online_cpus();
+
+	struct cd_mapping *cpu_map, *domain_map;
+
+	memset(&mc2_domain_proc_info, sizeof(mc2_domain_proc_info), 0);
+	init_domain_proc_info(&mc2_domain_proc_info, num_rt_cpus, num_rt_cpus);
+	mc2_domain_proc_info.num_cpus = num_rt_cpus;
+	mc2_domain_proc_info.num_domains = num_rt_cpus;
+
+	i = 0;
+	for_each_online_cpu(cpu) {
+		cpu_map = &mc2_domain_proc_info.cpu_to_domains[i];
+		domain_map = &mc2_domain_proc_info.domain_to_cpus[i];
+
+		cpu_map->id = cpu;
+		domain_map->id = i;
+		cpumask_set_cpu(i, cpu_map->mask);
+		cpumask_set_cpu(cpu, domain_map->mask);
+		++i;
+	}
+}
+
+static long mc2_activate_plugin(void)
+{
+	int cpu, lv;
+	struct mc2_cpu_state *state;
+	struct cpu_entry *ce;
+
+	gmp_init(&_global_env);
+	raw_spin_lock_init(&_lowest_prio_cpu.lock);
+	
+	for_each_online_cpu(cpu) {
+		TRACE("Initializing CPU%d...\n", cpu);
+
+		state = cpu_state_for(cpu);
+		ce = &_lowest_prio_cpu.cpu_entries[cpu];
+		
+		ce->cpu = cpu;
+		ce->scheduled = NULL;
+		ce->deadline = ULLONG_MAX;
+		ce->lv = NUM_CRIT_LEVELS;
+		ce->will_schedule = false;
+
+		raw_spin_lock_init(&state->lock);
+		state->cpu = cpu;
+		state->scheduled = NULL;
+		for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
+			struct crit_entry *cr_entry = &state->crit_entries[lv];
+			cr_entry->level = lv;
+			cr_entry->running = NULL;
+		}
+		sup_init(&state->sup_env);
+
+		hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+		state->timer.function = on_scheduling_timer;
+	}
+
+	mc2_setup_domain_proc();
+
+	return 0;
+}
+
+static void mc2_finish_switch(struct task_struct *prev)
+{
+	struct mc2_cpu_state *state = local_cpu_state();
+	
+	state->scheduled = is_realtime(current) ? current : NULL;
+	TRACE("FINISH CXS! from %s/%d to %s/%d\n", prev ? (prev)->comm : "null", prev ? (prev)->pid : 0, current ? (current)->comm : "null", current ? (current)->pid : 0);
+}
+
+static long mc2_deactivate_plugin(void)
+{
+	int cpu;
+	struct mc2_cpu_state *state;
+	struct reservation *res;
+	struct next_timer_event *event;
+	struct cpu_entry *ce;
+	
+	raw_spin_lock(&_global_env.lock);
+
+	for_each_online_cpu(cpu) {
+		state = cpu_state_for(cpu);
+		raw_spin_lock(&state->lock);
+
+		hrtimer_cancel(&state->timer);
+
+		ce = &_lowest_prio_cpu.cpu_entries[cpu];
+		
+		ce->cpu = cpu;
+		ce->scheduled = NULL;
+		ce->deadline = ULLONG_MAX;
+		ce->lv = NUM_CRIT_LEVELS;
+		ce->will_schedule = false;
+
+		/* Delete all reservations --- assumes struct reservation
+		 * is prefix of containing struct. */
+
+		while (!list_empty(&state->sup_env.active_reservations)) {
+			res = list_first_entry(
+				&state->sup_env.active_reservations,
+			        struct reservation, list);
+			list_del(&res->list);
+			kfree(res);
+		}
+
+		while (!list_empty(&state->sup_env.inactive_reservations)) {
+			res = list_first_entry(
+				&state->sup_env.inactive_reservations,
+			        struct reservation, list);
+			list_del(&res->list);
+			kfree(res);
+		}
+
+		while (!list_empty(&state->sup_env.depleted_reservations)) {
+			res = list_first_entry(
+				&state->sup_env.depleted_reservations,
+			        struct reservation, list);
+			list_del(&res->list);
+			kfree(res);
+		}
+
+		raw_spin_unlock(&state->lock);
+	}
+
+	
+	while (!list_empty(&_global_env.active_reservations)) {
+		TRACE("RES FOUND!!!\n");
+		res = list_first_entry(
+			&_global_env.active_reservations,
+				struct reservation, list);
+		list_del(&res->list);
+		kfree(res);
+	}
+
+	while (!list_empty(&_global_env.inactive_reservations)) {
+		TRACE("RES FOUND!!!\n");
+		res = list_first_entry(
+			&_global_env.inactive_reservations,
+				struct reservation, list);
+		list_del(&res->list);
+		kfree(res);
+	}
+
+	while (!list_empty(&_global_env.depleted_reservations)) {
+		TRACE("RES FOUND!!!\n");
+		res = list_first_entry(
+			&_global_env.depleted_reservations,
+				struct reservation, list);
+		list_del(&res->list);
+		kfree(res);
+	}
+	
+	while (!list_empty(&_global_env.next_events)) {
+		TRACE("EVENT FOUND!!!\n");
+		event = list_first_entry(
+			&_global_env.next_events,
+				struct next_timer_event, list);
+		list_del(&event->list);
+		kfree(event);
+	}
+	
+	raw_spin_unlock(&_global_env.lock);
+	
+	destroy_domain_proc_info(&mc2_domain_proc_info);
+	return 0;
+}
+
+static struct sched_plugin mc2_plugin = {
+	.plugin_name			= "MC2",
+	.schedule				= mc2_schedule,
+	.finish_switch			= mc2_finish_switch,
+	.task_wake_up			= mc2_task_resume,
+	.admit_task				= mc2_admit_task,
+	.task_new				= mc2_task_new,
+	.task_exit				= mc2_task_exit,
+	.complete_job           = mc2_complete_job,
+	.get_domain_proc_info   = mc2_get_domain_proc_info,
+	.activate_plugin		= mc2_activate_plugin,
+	.deactivate_plugin      = mc2_deactivate_plugin,
+	.reservation_create     = mc2_reservation_create,
+	.reservation_destroy	= mc2_reservation_destroy,
+};
+
+static int __init init_mc2(void)
+{
+	return register_sched_plugin(&mc2_plugin);
+}
+
+module_init(init_mc2);
-- 
cgit v1.2.2