#include <linux/slab.h>
#include <linux/uaccess.h>

#include <litmus/trace.h>
#include <litmus/sched_plugin.h>
#include <litmus/prioq_lock.h>

#include <litmus/litmus_proc.h>


#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
#include <litmus/gpu_affinity.h>
#endif

void __attribute__((unused))
__dump_prioq_lock_info(struct prioq_mutex *mutex)
{
#ifdef CONFIG_SCHED_DEBUG_TRACE
	TRACE_CUR("%s (mutex: %p):\n", mutex->litmus_lock.name, mutex);
	TRACE_CUR("owner: %s/%d (inh: %s/%d)\n",
			  (mutex->owner) ?
			  mutex->owner->comm : "null",
			  (mutex->owner) ?
			  mutex->owner->pid : 0,
			  (mutex->owner && tsk_rt(mutex->owner)->inh_task) ?
			  tsk_rt(mutex->owner)->inh_task->comm : "null",
			  (mutex->owner && tsk_rt(mutex->owner)->inh_task) ?
			  tsk_rt(mutex->owner)->inh_task->pid : 0);
	TRACE_CUR("hp waiter: %s/%d (inh: %s/%d)\n",
			  (mutex->hp_waiter) ?
			  mutex->hp_waiter->comm : "null",
			  (mutex->hp_waiter) ?
			  mutex->hp_waiter->pid : 0,
			  (mutex->hp_waiter && tsk_rt(mutex->hp_waiter)->inh_task) ?
			  tsk_rt(mutex->hp_waiter)->inh_task->comm : "null",
			  (mutex->hp_waiter && tsk_rt(mutex->hp_waiter)->inh_task) ?
			  tsk_rt(mutex->hp_waiter)->inh_task->pid : 0);
	TRACE_CUR("blocked tasks, front to back:\n");
	if (waitqueue_active(&mutex->wait)) {
		wait_queue_t *q;
		struct list_head *pos;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
		dgl_wait_state_t	*dgl_wait = NULL;
#endif
		list_for_each(pos, &mutex->wait.task_list) {
			struct task_struct *blocked_task;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
			int enabled = 1;
#endif
			q = list_entry(pos, wait_queue_t, task_list);

#ifdef CONFIG_LITMUS_DGL_SUPPORT
			if(q->func == dgl_wake_up) {
				dgl_wait = (dgl_wait_state_t*) q->private;
				blocked_task = dgl_wait->task;

				if(tsk_rt(blocked_task)->blocked_lock != &mutex->litmus_lock)
					enabled = 0;
			}
			else {
				blocked_task = (struct task_struct*) q->private;
			}
#else
			blocked_task = (struct task_struct*) q->private;
#endif
			TRACE_CUR("\t%s/%d (inh: %s/%d)"
#ifdef CONFIG_LITMUS_DGL_SUPPORT
					  " DGL enabled: %d"
#endif
					  "\n",
					  blocked_task->comm, blocked_task->pid,
					  (tsk_rt(blocked_task)->inh_task) ?
					  tsk_rt(blocked_task)->inh_task->comm : "null",
					  (tsk_rt(blocked_task)->inh_task) ?
					  tsk_rt(blocked_task)->inh_task->pid : 0
#ifdef CONFIG_LITMUS_DGL_SUPPORT
					  , enabled
#endif
					  );
		}
	}
	else {
		TRACE_CUR("\t<NONE>\n");
	}
#endif
}

static void __add_wait_queue_sorted(wait_queue_head_t *q, wait_queue_t *add_node)
{
	struct list_head *pq = &(q->task_list);
	wait_queue_t *q_node;
	struct task_struct *queued_task;
	struct task_struct *add_task;
	struct list_head *pos;

	if (list_empty(pq)) {
		list_add_tail(&add_node->task_list, pq);
		return;
	}

	add_task = get_queued_task(add_node);

	/* less priority than tail?  if so, go to tail */
	q_node = list_entry(pq->prev, wait_queue_t, task_list);
	queued_task = get_queued_task(q_node);
	if (litmus->compare(queued_task, add_task)) {
		list_add_tail(&add_node->task_list, pq);
		return;
	}

	/* belongs at head or between nodes */
	list_for_each(pos, pq) {
		q_node = list_entry(pos, wait_queue_t, task_list);
		queued_task = get_queued_task(q_node);
		if(litmus->compare(add_task, queued_task)) {
			list_add(&add_node->task_list, pos->prev);
			return;
		}
	}

	WARN_ON(1);
	list_add_tail(&add_node->task_list, pq);
}

static inline void __add_wait_queue_sorted_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
	wait->flags |= WQ_FLAG_EXCLUSIVE;
	__add_wait_queue_sorted(q, wait);
}

static void __prioq_increase_pos(struct prioq_mutex *mutex, struct task_struct *t)
{
	wait_queue_t		*q;
	struct list_head	*pos;
	struct task_struct  *queued;

	/* TODO: Make this efficient instead of remove/add */
	list_for_each(pos, &mutex->wait.task_list) {
		q = list_entry(pos, wait_queue_t, task_list);
		queued = get_queued_task(q);
		if (queued == t) {
			__remove_wait_queue(&mutex->wait, q);
			__add_wait_queue_sorted(&mutex->wait, q);
			return;
		}
	}

	BUG();
}


static void __prioq_decrease_pos(struct prioq_mutex *mutex, struct task_struct *t)
{
	wait_queue_t		*q;
	struct list_head	*pos;
	struct task_struct  *queued;

	/* TODO: Make this efficient instead of remove/add */
	list_for_each(pos, &mutex->wait.task_list) {
		q = list_entry(pos, wait_queue_t, task_list);
		queued = get_queued_task(q);
		if (queued == t) {
			__remove_wait_queue(&mutex->wait, q);
			__add_wait_queue_sorted(&mutex->wait, q);
			return;
		}
	}

	BUG();
}

#ifdef CONFIG_LITMUS_DGL_SUPPORT
static int __prioq_dgl_increase_pos(struct prioq_mutex *mutex, struct task_struct *t)
{
// TODO:
//	(1) Increase position for 't' in all of it's DGLs.
//  (2) Check to see if 't' can take the DGLs atomically
//  (3) If it can take the DGLs, do so.
//  (4) Cleanup?
//  (5) Wake up 't'


	wait_queue_t		*q;
	struct list_head	*pos;
	struct task_struct  *queued;
	int i;
	int ret = 0;

	list_for_each(pos, &mutex->wait.task_list) {
		q = list_entry(pos, wait_queue_t, task_list);
		if(q->func == dgl_wake_up) {
			// we're looking at a dgl request
			dgl_wait_state_t *dgl_wait = (dgl_wait_state_t*) q->private;
			queued = dgl_wait->task;

			if (queued == t)  // is it the one we're looking for?
			{
				// reposition on the other mutexes
				for(i = 0; i < dgl_wait->size; ++i) {
					struct prioq_mutex *pm = (struct prioq_mutex *) dgl_wait->locks[i];
					if (pm != mutex)
						__prioq_increase_pos(pm, t);
				}
				// reposition on this mutex
				__remove_wait_queue(&mutex->wait, q);
				__add_wait_queue_sorted(&mutex->wait, q);


				if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) {
					/* it can't take the lock. do nothing. */
				}
				else {
					TRACE_CUR("%s/%d can take its entire DGL atomically via inheritance!\n",
							  dgl_wait->task->comm, dgl_wait->task->pid);

					/* we took the lock!  we've already been removed from mutex->wait.task_list */

					TRACE_TASK(t, "waking up since it is no longer blocked.\n");

					tsk_rt(t)->blocked_lock = NULL;
					mb();

#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
					// re-enable tracking
					if(tsk_rt(t)->held_gpus) {
						tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
					}
#endif
					wake_up_process(t);
					ret = 1;
				}
				break;
			}
		}
		else {
			// not dgl request.
			queued = (struct task_struct*) q->private;
			if (queued == t) {  // is this the one we're looking for?
								// if so, reposition it.
				__remove_wait_queue(&mutex->wait, q);
				__add_wait_queue_sorted(&mutex->wait, q);
				break;
			}
		}
	}

	return ret;
}

static void __prioq_dgl_decrease_pos(struct prioq_mutex *mutex, struct task_struct *t)
{
	// TODO:
	//	(1) Increase position for 't' in all of it's DGLs.
	//  (2) Check to see if 't' can take the DGLs atomically
	//  (3) If it can take the DGLs, do so.
	//  (4) Cleanup?
	//  (5) Wake up 't'


	wait_queue_t		*q;
	struct list_head	*pos;
	struct task_struct  *queued;
	int i;

	list_for_each(pos, &mutex->wait.task_list) {
		q = list_entry(pos, wait_queue_t, task_list);
		if(q->func == dgl_wake_up) {
			// we're looking at a dgl request
			dgl_wait_state_t *dgl_wait = (dgl_wait_state_t*) q->private;
			queued = dgl_wait->task;

			if (queued == t)  // is it the one we're looking for?
			{
				// reposition on the other mutexes
				for(i = 0; i < dgl_wait->size; ++i) {
					struct prioq_mutex *pm = (struct prioq_mutex *)dgl_wait->locks[i];
					if (pm != mutex)
						__prioq_decrease_pos(pm, t);
				}
				// reposition on this mutex
				__remove_wait_queue(&mutex->wait, q);
				__add_wait_queue_sorted(&mutex->wait, q);
				return;
			}
		}
		else {
			// not dgl request.
			queued = (struct task_struct*) q->private;
			if (queued == t) {  // is this the one we're looking for?
								// if so, reposition it.
				__remove_wait_queue(&mutex->wait, q);
				__add_wait_queue_sorted(&mutex->wait, q);
				return;
			}
		}
	}

	BUG();
}
#endif


/* caller is responsible for locking */
static struct task_struct* __prioq_mutex_find_hp_waiter(struct prioq_mutex *mutex,
											 struct task_struct* skip)
{
	wait_queue_t		*q;
	struct list_head	*pos;
	struct task_struct  *queued = NULL, *found = NULL;

	/* list in sorted order.  higher-prio tasks likely at the front. */
	list_for_each(pos, &mutex->wait.task_list) {
		q = list_entry(pos, wait_queue_t, task_list);
		queued = get_queued_task(q);

		/* Compare task prios, find high prio task. */
		if (queued &&
			(queued != skip) &&
			(tsk_rt(queued)->blocked_lock == &mutex->litmus_lock) &&
			litmus->compare(queued, found)) {
			found = queued;
		}
	}
	return found;
}


#ifdef CONFIG_LITMUS_DGL_SUPPORT

int prioq_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);
	return(mutex->owner == t);
}

struct task_struct* prioq_mutex_get_owner(struct litmus_lock *l)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);
	return(mutex->owner);
}

// return 1 if resource was immediatly acquired.
// Assumes mutex->lock is held.
// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
int prioq_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
					   wait_queue_t* wq_node)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);
	struct task_struct *t = dgl_wait->task;

	int acquired_immediatly = 0;

	BUG_ON(t != current);


	init_dgl_waitqueue_entry(wq_node, dgl_wait);

	set_task_state(t, TASK_UNINTERRUPTIBLE);
	__add_wait_queue_sorted_exclusive(&mutex->wait, wq_node);

	return acquired_immediatly;
}


void prioq_mutex_enable_priority(struct litmus_lock *l,
							   dgl_wait_state_t* dgl_wait)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);
	struct task_struct *t = dgl_wait->task;
	struct task_struct *owner = mutex->owner;
	unsigned long flags = 0;  // these are unused under DGL coarse-grain locking

	/**************************************
	* This code looks like it supports fine-grain locking, but it does not!
	* TODO: Gaurantee that mutex->lock is held by the caller to support fine-grain locking.
	**************************************/

	BUG_ON(owner == t);

	tsk_rt(t)->blocked_lock = l;
	mb();

	TRACE_TASK(t, "Enabling prio on lock %d. I am %s/%d  :  cur hp_waiter is %s/%d.\n",
			   l->ident,
			   (t) ? t->comm : "null",
			   (t) ? t->pid : 0,
			   (mutex->hp_waiter) ? mutex->hp_waiter->comm : "null",
			   (mutex->hp_waiter) ? mutex->hp_waiter->pid : 0);

	if (litmus->compare(t, mutex->hp_waiter)) {
		struct task_struct *old_max_eff_prio;
		struct task_struct *new_max_eff_prio;
		struct task_struct *new_prio = NULL;

		if(mutex->hp_waiter)
			TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
					   mutex->hp_waiter->comm, mutex->hp_waiter->pid);
		else
			TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");


		if (!owner) {
			TRACE_TASK(t, "Enabling priority, but this lock %d is idle.\n", l->ident);
			goto out;
		}

		raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);

		old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
		mutex->hp_waiter = t;
		l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
		binheap_decrease(&l->nest.hp_binheap_node,
						 &tsk_rt(owner)->hp_blocked_tasks);
		new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);

		if(new_max_eff_prio != old_max_eff_prio) {
			TRACE_TASK(t, "is new hp_waiter.\n");

			if ((effective_priority(owner) == old_max_eff_prio) ||
				(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
				new_prio = new_max_eff_prio;
			}
		}
		else {
			TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
		}

		if(new_prio) {
			litmus->nested_increase_prio(owner, new_prio,
										 &mutex->lock, flags);  // unlocks lock.
		}
		else {
			raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
			unlock_fine_irqrestore(&mutex->lock, flags);
		}

		return;
	}

	TRACE_TASK(t, "no change in hp_waiter.\n");

out:
	unlock_fine_irqrestore(&mutex->lock, flags);
}

static void select_next_lock_if_primary(struct litmus_lock *l,
										dgl_wait_state_t *dgl_wait)
{
	struct task_struct *t = dgl_wait->task;

	if(tsk_rt(t)->blocked_lock == l) {
		struct prioq_mutex *mutex = prioq_mutex_from_lock(l);

		TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
				  l->ident, t->comm, t->pid);

		tsk_rt(t)->blocked_lock = NULL;
		mb();


		/* determine new hp_waiter if necessary */
		if (t == mutex->hp_waiter) {

			TRACE_TASK(t, "Deciding to not be hp waiter on lock %d any more.\n", l->ident);
			/* next has the highest priority --- it doesn't need to
			 * inherit.  However, we need to make sure that the
			 * next-highest priority in the queue is reflected in
			 * hp_waiter. */
			mutex->hp_waiter = __prioq_mutex_find_hp_waiter(mutex, t);
			l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
				effective_priority(mutex->hp_waiter) :
				NULL;

			if (mutex->hp_waiter)
				TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
			else
				TRACE("no further waiters\n");
		}

		select_next_lock(dgl_wait /*, l*/);  // pick the next lock to be blocked on
	}
	else {
		TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
				  l->ident, t->comm, t->pid);
	}
}
#endif


#ifdef CONFIG_LITMUS_DGL_SUPPORT

int prioq_mutex_dgl_can_quick_lock(struct litmus_lock *l, struct task_struct *t)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);

	if(!mutex->owner) {
		wait_queue_t *front = list_entry(mutex->wait.task_list.next, wait_queue_t, task_list);
		struct task_struct *at_front = get_queued_task(front);
		if(t == at_front) {
			return 1;
		}
	}
	return 0;
}

void prioq_mutex_dgl_quick_lock(struct litmus_lock *l, struct litmus_lock *cur_lock,
								struct task_struct* t, wait_queue_t *q)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);

	BUG_ON(mutex->owner);
	BUG_ON(t != get_queued_task(list_entry(mutex->wait.task_list.next, wait_queue_t, task_list)));


	mutex->owner = t;

	if (l != cur_lock) {
		/* we have to update the state of the other lock for it */
		__remove_wait_queue(&mutex->wait, q);

		mutex->hp_waiter = __prioq_mutex_find_hp_waiter(mutex, t);
		l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
			effective_priority(mutex->hp_waiter) :
			NULL;

		if (mutex->hp_waiter)
			TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
		else
			TRACE("no further waiters\n");

		raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);

		binheap_add(&l->nest.hp_binheap_node,
					&tsk_rt(t)->hp_blocked_tasks,
					struct nested_info, hp_binheap_node);

		raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
	}
	else {
		/* the unlock call that triggered the quick_lock call will handle
		 * the acquire of cur_lock.
		 */
	}
}
#endif


int prioq_mutex_lock(struct litmus_lock* l)
{
	struct task_struct *t = current;
	struct task_struct *owner;
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);
	wait_queue_t wait;
	unsigned long flags;

#ifdef CONFIG_LITMUS_DGL_SUPPORT
	raw_spinlock_t *dgl_lock;
#endif

	if (!is_realtime(t))
		return -EPERM;

#ifdef CONFIG_LITMUS_DGL_SUPPORT
	dgl_lock = litmus->get_dgl_spinlock(t);
#endif

	lock_global_irqsave(dgl_lock, flags);
	lock_fine_irqsave(&mutex->lock, flags);

	/* block if there is an owner, or if hp_waiter is blocked for DGL and
	 * prio(t) < prio(hp_waiter) */
	if (mutex->owner ||
		(waitqueue_active(&mutex->wait) && litmus->compare(mutex->hp_waiter, t))) {
		TRACE_TASK(t, "Blocking on lock %d (held by %s/%d).\n",
					l->ident,
				   (mutex->owner) ? mutex->owner->comm : "null",
				   (mutex->owner) ? mutex->owner->pid : 0);

#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
		// KLUDGE: don't count this suspension as time in the critical gpu
		// critical section
		if(tsk_rt(t)->held_gpus) {
			tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
		}
#endif

		/* resource is not free => must suspend and wait */

		owner = mutex->owner;

		init_waitqueue_entry(&wait, t);

		tsk_rt(t)->blocked_lock = l;  /* record where we are blocked */
		mb();  // needed?

		/* FIXME: interruptible would be nice some day */
		set_task_state(t, TASK_UNINTERRUPTIBLE);

		__add_wait_queue_sorted_exclusive(&mutex->wait, &wait);

		/* check if we need to activate priority inheritance */
		/* We can't be the hp waiter if there is no owner - task waiting for
		 * the full DGL must be the hp_waiter. */
		if (owner && litmus->compare(t, mutex->hp_waiter)) {

			struct task_struct *old_max_eff_prio;
			struct task_struct *new_max_eff_prio;
			struct task_struct *new_prio = NULL;

			if(mutex->hp_waiter)
				TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
						   mutex->hp_waiter->comm, mutex->hp_waiter->pid);
			else
				TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");

			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);

			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
			mutex->hp_waiter = t;

			TRACE_TASK(t, "prioq_mutex %d state after enqeue in priority queue\n", l->ident);
			__dump_prioq_lock_info(mutex);

			l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
			binheap_decrease(&l->nest.hp_binheap_node,
							 &tsk_rt(owner)->hp_blocked_tasks);
			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);

			if(new_max_eff_prio != old_max_eff_prio) {
				TRACE_TASK(t, "is new hp_waiter.\n");

				if ((effective_priority(owner) == old_max_eff_prio) ||
					(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
					new_prio = new_max_eff_prio;
				}
			}
			else {
				TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
			}

			if(new_prio) {
				litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
											 flags);  // unlocks lock.
			}
			else {
				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
				unlock_fine_irqrestore(&mutex->lock, flags);
			}
		}
		else {
			TRACE_TASK(t, "no change in hp_waiter.\n");

			TRACE_TASK(t, "prioq_mutex %d state after enqeue in priority queue\n", l->ident);
			__dump_prioq_lock_info(mutex);

			unlock_fine_irqrestore(&mutex->lock, flags);
		}

		unlock_global_irqrestore(dgl_lock, flags);

		TS_LOCK_SUSPEND;

		/* We depend on the FIFO order.  Thus, we don't need to recheck
		 * when we wake up; we are guaranteed to have the lock since
		 * there is only one wake up per release.
		 */

		suspend_for_lock();

		TS_LOCK_RESUME;

		/* Since we hold the lock, no other task will change
		 * ->owner. We can thus check it without acquiring the spin
		 * lock. */
		BUG_ON(mutex->owner != t);

		TRACE_TASK(t, "Acquired lock %d.\n", l->ident);

	} else {
		TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);

		/* it's ours now */
		mutex->owner = t;

		raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
		binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
					struct nested_info, hp_binheap_node);
		raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);


		unlock_fine_irqrestore(&mutex->lock, flags);
		unlock_global_irqrestore(dgl_lock, flags);
	}

	return 0;
}


int prioq_mutex_unlock(struct litmus_lock* l)
{
	struct task_struct *t = current, *next = NULL;
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);
	unsigned long flags;

	struct task_struct *old_max_eff_prio;

#ifdef CONFIG_LITMUS_DGL_SUPPORT
	dgl_wait_state_t *dgl_wait = NULL;
	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
#endif

	int err = 0;

	if (mutex->owner != t) {
		err = -EINVAL;
		return err;
	}

	lock_global_irqsave(dgl_lock, flags);
	lock_fine_irqsave(&mutex->lock, flags);

	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);

	TRACE_TASK(t, "Freeing lock %d\n", l->ident);

	old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
	binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);

	if(tsk_rt(t)->inh_task){
		struct task_struct *new_max_eff_prio =
			top_priority(&tsk_rt(t)->hp_blocked_tasks);

		if((new_max_eff_prio == NULL) ||
			  /* there was a change in eff prio */
		   (  (new_max_eff_prio != old_max_eff_prio) &&
			/* and owner had the old eff prio */
			  (effective_priority(t) == old_max_eff_prio))  )
		{
			// old_max_eff_prio > new_max_eff_prio

			if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
				TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d   task: %s/%d [%s/%d]\n",
						   new_max_eff_prio->comm, new_max_eff_prio->pid,
						   t->comm, t->pid, tsk_rt(t)->inh_task->comm,
						   tsk_rt(t)->inh_task->pid);
				WARN_ON(1);
			}

			litmus->decrease_prio(t, new_max_eff_prio);
		}
	}

	if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
	   tsk_rt(t)->inh_task != NULL)
	{
		WARN_ON(tsk_rt(t)->inh_task != NULL);
		TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
				   tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
	}

	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);


	mutex->owner = NULL;

#ifdef CONFIG_LITMUS_DGL_SUPPORT
	if(waitqueue_active(&mutex->wait)) {
		/* Priority queue-based locks must be _acquired_ atomically under DGLs
		 * in order to avoid deadlock.  We leave this lock idle momentarily the
		 * DGL waiter can't acquire all locks at once.
		 */
		wait_queue_t *q = list_entry(mutex->wait.task_list.next, wait_queue_t, task_list);
		if(q->func == dgl_wake_up) {
			dgl_wait = (dgl_wait_state_t*) q->private;

			TRACE_CUR("Checking to see if DGL waiter %s/%d can take its locks\n",
					  dgl_wait->task->comm, dgl_wait->task->pid);

			if(__attempt_atomic_dgl_acquire(l, dgl_wait)) {
				/* failed. can't take this lock yet. we remain at head of prioq
				 * allow hp requests in the future to go ahead of us. */
				select_next_lock_if_primary(l, dgl_wait);
				goto out;
			}
			else {
				TRACE_CUR("%s/%d can take its entire DGL atomically.\n",
						  dgl_wait->task->comm, dgl_wait->task->pid);
			}
		}

		/* remove the first */
		next = __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait);
	}
#else
	/* check if there are jobs waiting for this resource */
	next = __waitqueue_remove_first(&mutex->wait);
#endif
	if (next) {
		/* next becomes the resouce holder */
		mutex->owner = next;
		TRACE_CUR("lock %d ownership passed to %s/%d\n", l->ident, next->comm, next->pid);

		/* determine new hp_waiter if necessary */
		if (next == mutex->hp_waiter) {

			TRACE_TASK(next, "was highest-prio waiter\n");
			/* next has the highest priority --- it doesn't need to
			 * inherit.  However, we need to make sure that the
			 * next-highest priority in the queue is reflected in
			 * hp_waiter. */
			mutex->hp_waiter = __prioq_mutex_find_hp_waiter(mutex, next);
			l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
				effective_priority(mutex->hp_waiter) :
				NULL;

			if (mutex->hp_waiter)
				TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
			else
				TRACE("no further waiters\n");

			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);

			binheap_add(&l->nest.hp_binheap_node,
						&tsk_rt(next)->hp_blocked_tasks,
						struct nested_info, hp_binheap_node);

			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
		}
		else {
			/* Well, if 'next' is not the highest-priority waiter,
			 * then it (probably) ought to inherit the highest-priority
			 * waiter's priority. */
			TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);

			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);

			binheap_add(&l->nest.hp_binheap_node,
						&tsk_rt(next)->hp_blocked_tasks,
						struct nested_info, hp_binheap_node);

			/* It is possible that 'next' *should* be the hp_waiter, but isn't
			 * because that update hasn't yet executed (update operation is
			 * probably blocked on mutex->lock). So only inherit if the top of
			 * 'next's top heap node is indeed the effective prio. of hp_waiter.
			 * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
			 * since the effective priority of hp_waiter can change (and the
			 * update has not made it to this lock).)
			 */
#ifdef CONFIG_LITMUS_DGL_SUPPORT
			if((l->nest.hp_waiter_eff_prio != NULL) &&
			   (top_priority(&tsk_rt(next)->hp_blocked_tasks) == l->nest.hp_waiter_eff_prio))
			{
				if(dgl_wait && tsk_rt(next)->blocked_lock) {
					if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
						litmus->nested_increase_prio(next, l->nest.hp_waiter_eff_prio, &mutex->lock, flags);  // unlocks lock && hp_blocked_tasks_lock.
						goto out;  // all spinlocks are released.  bail out now.
					}
				}
				else {
					litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
				}
			}

			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
#else
			if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) == l->nest.hp_waiter_eff_prio))
			{
				litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
			}
			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
#endif
		}

		TRACE_TASK(next, "waking up since it is no longer blocked.\n");

		tsk_rt(next)->blocked_lock = NULL;
		mb();

#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
		// re-enable tracking
		if(tsk_rt(next)->held_gpus) {
			tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
		}
#endif

		wake_up_process(next);
	}

	unlock_fine_irqrestore(&mutex->lock, flags);

#ifdef CONFIG_LITMUS_DGL_SUPPORT
out:
#endif
	unlock_global_irqrestore(dgl_lock, flags);

	TRACE_TASK(t, "-- Freed lock %d --\n", l->ident);

	return err;
}


void prioq_mutex_propagate_increase_inheritance(struct litmus_lock* l,
											struct task_struct* t,
											raw_spinlock_t* to_unlock,
											unsigned long irqflags)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);

	// relay-style locking
	lock_fine(&mutex->lock);
	unlock_fine(to_unlock);

#ifdef CONFIG_LITMUS_DGL_SUPPORT
	{
		int woke_up = __prioq_dgl_increase_pos(mutex, t);
		if (woke_up) {
			/* t got the DGL. it is not blocked anywhere. just return. */
			unlock_fine_irqrestore(&mutex->lock, irqflags);
			return;
		}
	}
#else
	__prioq_increase_pos(mutex, t);
#endif

	if(tsk_rt(t)->blocked_lock == l) {  // prevent race on tsk_rt(t)->blocked
		struct task_struct *owner = mutex->owner;

		struct task_struct *old_max_eff_prio;
		struct task_struct *new_max_eff_prio;

		if (!owner) {
			TRACE_TASK(t, "Owner on PRIOQ lock %d is null. Don't propagate.\n", l->ident);
			if(t == mutex->hp_waiter) {
				// reflect the changed prio.
				l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
			}
			return;
		}

		raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);

		old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);

		if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
			TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
			mutex->hp_waiter = t;

			TRACE_TASK(t, "prioq_mutex %d state after prio increase in priority queue\n", l->ident);
			__dump_prioq_lock_info(mutex);
		}
		else {
			TRACE_TASK(t, "prioq_mutex %d state after prio increase in priority queue\n", l->ident);
			__dump_prioq_lock_info(mutex);
		}

		if(t == mutex->hp_waiter) {
			// reflect the increased priority in the heap node.
			l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);

			BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
			BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
											&tsk_rt(owner)->hp_blocked_tasks));

			binheap_decrease(&l->nest.hp_binheap_node,
							 &tsk_rt(owner)->hp_blocked_tasks);
		}

		new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);


		if(new_max_eff_prio != old_max_eff_prio) {
			// new_max_eff_prio > old_max_eff_prio holds.
			if ((effective_priority(owner) == old_max_eff_prio) ||
				(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
				TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
						  l->ident);

				// beware: recursion
				litmus->nested_increase_prio(owner, new_max_eff_prio,
											 &mutex->lock, irqflags);  // unlocks mutex->lock
			}
			else {
				TRACE_CUR("Lower priority than holder %s/%d.  No propagation.\n",
						  owner->comm, owner->pid);
				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
				unlock_fine_irqrestore(&mutex->lock, irqflags);
			}
		}
		else {
			TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
			raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
			unlock_fine_irqrestore(&mutex->lock, irqflags);
		}
	}
	else {
		struct litmus_lock *still_blocked;

		TRACE_TASK(t, "prioq_mutex %d state after prio increase in priority queue\n", l->ident);
		__dump_prioq_lock_info(mutex);

		still_blocked = tsk_rt(t)->blocked_lock;

		TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
		if(still_blocked) {
			TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
					   still_blocked->ident);
			if(still_blocked->ops->propagate_increase_inheritance) {
				/* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
				 we know that task 't' has not released any locks behind us in this
				 chain.  Propagation just needs to catch up with task 't'. */
				still_blocked->ops->propagate_increase_inheritance(still_blocked,
																   t,
																   &mutex->lock,
																   irqflags);
			}
			else {
				TRACE_TASK(t,
						   "Inheritor is blocked on lock (%p) that does not "
						   "support nesting!\n",
						   still_blocked);
				unlock_fine_irqrestore(&mutex->lock, irqflags);
			}
		}
		else {
			unlock_fine_irqrestore(&mutex->lock, irqflags);
		}
	}
}


void prioq_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
											 struct task_struct* t,
											 raw_spinlock_t* to_unlock,
											 unsigned long irqflags)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);

	// relay-style locking
	lock_fine(&mutex->lock);
	unlock_fine(to_unlock);

#ifdef CONFIG_LITMUS_DGL_SUPPORT
	__prioq_dgl_decrease_pos(mutex, t);
#else
	__prioq_decrease_pos(mutex, t);
#endif

	if(tsk_rt(t)->blocked_lock == l) {  // prevent race on tsk_rt(t)->blocked
		if(t == mutex->hp_waiter) {
			struct task_struct *owner = mutex->owner;

			struct task_struct *old_max_eff_prio;
			struct task_struct *new_max_eff_prio;

			if (!owner) {
				TRACE_TASK(t, "Owner on PRIOQ lock %d is null. Don't propagate.\n", l->ident);
				// reflect the changed prio.
				l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
				return;
			}

			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);

			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);

			binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
			mutex->hp_waiter = __prioq_mutex_find_hp_waiter(mutex, NULL);

			TRACE_TASK(t, "prioq_mutex %d state after prio decrease in priority queue\n", l->ident);
			__dump_prioq_lock_info(mutex);

			l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
				effective_priority(mutex->hp_waiter) : NULL;
			binheap_add(&l->nest.hp_binheap_node,
						&tsk_rt(owner)->hp_blocked_tasks,
						struct nested_info, hp_binheap_node);

			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);

			if((old_max_eff_prio != new_max_eff_prio) &&
			   (effective_priority(owner) == old_max_eff_prio))
			{
				// Need to set new effective_priority for owner

				struct task_struct *decreased_prio;

				TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
						  l->ident);

				if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
					TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
							  (new_max_eff_prio) ? new_max_eff_prio->comm : "null",
							  (new_max_eff_prio) ? new_max_eff_prio->pid : 0,
							  owner->comm,
							  owner->pid,
							  l->ident);

					decreased_prio = new_max_eff_prio;
				}
				else {
					TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
							  (new_max_eff_prio) ? new_max_eff_prio->comm : "null",
							  (new_max_eff_prio) ? new_max_eff_prio->pid : 0,
							  owner->comm,
							  owner->pid,
							  l->ident);

					decreased_prio = NULL;
				}

				// beware: recursion
				litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock
			}
			else {
				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
				unlock_fine_irqrestore(&mutex->lock, irqflags);
			}
		}
		else {
			TRACE_TASK(t, "prioq_mutex %d state after prio decrease in priority queue\n", l->ident);
			__dump_prioq_lock_info(mutex);

			TRACE_TASK(t, "is not hp_waiter.  No propagation.\n");
			unlock_fine_irqrestore(&mutex->lock, irqflags);
		}
	}
	else {
		struct litmus_lock *still_blocked;

		TRACE_TASK(t, "prioq_mutex %d state after prio decrease in priority queue\n", l->ident);
		__dump_prioq_lock_info(mutex);

		still_blocked = tsk_rt(t)->blocked_lock;

		TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
		if(still_blocked) {
			TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
					   still_blocked->ident);
			if(still_blocked->ops->propagate_decrease_inheritance) {
				/* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
				 we know that task 't' has not released any locks behind us in this
				 chain.  propagation just needs to catch up with task 't' */
				still_blocked->ops->propagate_decrease_inheritance(still_blocked,
																   t,
																   &mutex->lock,
																   irqflags);
			}
			else {
				TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
						   still_blocked);
				unlock_fine_irqrestore(&mutex->lock, irqflags);
			}
		}
		else {
			unlock_fine_irqrestore(&mutex->lock, irqflags);
		}
	}
}


int prioq_mutex_close(struct litmus_lock* l)
{
	struct task_struct *t = current;
	struct prioq_mutex *mutex = prioq_mutex_from_lock(l);
	unsigned long flags;

	int owner;

#ifdef CONFIG_LITMUS_DGL_SUPPORT
	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
#endif

	lock_global_irqsave(dgl_lock, flags);
	lock_fine_irqsave(&mutex->lock, flags);

	owner = (mutex->owner == t);

	unlock_fine_irqrestore(&mutex->lock, flags);
	unlock_global_irqrestore(dgl_lock, flags);

	/*
	 TODO: Currently panic.  FIX THIS!
	if (owner)
		prioq_mutex_unlock(l);
	*/

	return 0;
}

void prioq_mutex_free(struct litmus_lock* lock)
{
	kfree(prioq_mutex_from_lock(lock));
}


/* The following may race if DGLs are enabled.  Only examine /proc if things
   appear to be locked up.  TODO: FIX THIS! Must find an elegant way to transmit
   DGL lock to function. */
static int prioq_proc_print(char *page, char **start, off_t off, int count, int *eof, void *data)
{
	struct prioq_mutex *mutex = prioq_mutex_from_lock((struct litmus_lock*)data);

	int attempts = 0;
	const int max_attempts = 10;
	int locked = 0;
	unsigned long flags;

	int size = count;
	char *next = page;
	int w;

	while(attempts < max_attempts)
	{
		locked = raw_spin_trylock_irqsave(&mutex->lock, flags);

		if (unlikely(!locked)) {
			++attempts;
			cpu_relax();
		}
		else {
			break;
		}
	}

	if (locked) {
		w = scnprintf(next, size, "%s (mutex: %p, data: %p):\n", mutex->litmus_lock.name, mutex, data);
		size -= w;
		next += w;

		w = scnprintf(next, size,
						"owner: %s/%d (inh: %s/%d)\n",
							(mutex->owner) ?
								mutex->owner->comm : "null",
							(mutex->owner) ?
								mutex->owner->pid : 0,
							(mutex->owner && tsk_rt(mutex->owner)->inh_task) ?
								tsk_rt(mutex->owner)->inh_task->comm : "null",
							(mutex->owner && tsk_rt(mutex->owner)->inh_task) ?
								tsk_rt(mutex->owner)->inh_task->pid : 0);
		size -= w;
		next += w;

		w = scnprintf(next, size,
						"hp waiter: %s/%d (inh: %s/%d)\n",
							(mutex->hp_waiter) ?
								mutex->hp_waiter->comm : "null",
							(mutex->hp_waiter) ?
								mutex->hp_waiter->pid : 0,
							(mutex->hp_waiter && tsk_rt(mutex->hp_waiter)->inh_task) ?
								tsk_rt(mutex->hp_waiter)->inh_task->comm : "null",
							(mutex->hp_waiter && tsk_rt(mutex->hp_waiter)->inh_task) ?
								tsk_rt(mutex->hp_waiter)->inh_task->pid : 0);
		size -= w;
		next += w;

		w = scnprintf(next, size, "\nblocked tasks, front to back:\n");
		size -= w;
		next += w;

		if (waitqueue_active(&mutex->wait)) {
			wait_queue_t *q;
			struct list_head *pos;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
			dgl_wait_state_t	*dgl_wait = NULL;
#endif
			list_for_each(pos, &mutex->wait.task_list) {
				struct task_struct *blocked_task;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
				int enabled = 1;
#endif
				q = list_entry(pos, wait_queue_t, task_list);

				blocked_task = get_queued_task(q);
#ifdef CONFIG_LITMUS_DGL_SUPPORT
				if(q->func == dgl_wake_up) {
					dgl_wait = (dgl_wait_state_t*) q->private;
					blocked_task = dgl_wait->task;

					if(tsk_rt(blocked_task)->blocked_lock != &mutex->litmus_lock)
						enabled = 0;
				}
				else {
					blocked_task = (struct task_struct*) q->private;
				}
#else
				blocked_task = (struct task_struct*) q->private;
#endif

				w = scnprintf(next, size,
						"\t%s/%d (inh: %s/%d)"
#ifdef CONFIG_LITMUS_DGL_SUPPORT
						" DGL enabled: %d"
#endif
						"\n",
						blocked_task->comm, blocked_task->pid,
						(tsk_rt(blocked_task)->inh_task) ?
							tsk_rt(blocked_task)->inh_task->comm : "null",
						(tsk_rt(blocked_task)->inh_task) ?
							tsk_rt(blocked_task)->inh_task->pid : 0
#ifdef CONFIG_LITMUS_DGL_SUPPORT
						, enabled
#endif
						);
				size -= w;
				next += w;
			}
		}
		else {
			w = scnprintf(next, size, "\t<NONE>\n");
			size -= w;
			next += w;
		}

		raw_spin_unlock_irqrestore(&mutex->lock, flags);
	}
	else {
		w = scnprintf(next, size, "%s is busy.\n", mutex->litmus_lock.name);
		size -= w;
		next += w;
	}

	return count - size;
}

static void prioq_proc_add(struct litmus_lock* l)
{
	snprintf(l->name, LOCK_NAME_LEN, "prioq-%d", l->ident);

	l->proc_entry = litmus_add_proc_lock(l, prioq_proc_print);
}

static void prioq_proc_remove(struct litmus_lock* l)
{
	litmus_remove_proc_lock(l);
}

static struct litmus_lock_proc_ops prioq_proc_ops =
{
	.add = prioq_proc_add,
	.remove = prioq_proc_remove
};


struct litmus_lock* prioq_mutex_new(struct litmus_lock_ops* ops)
{
	struct prioq_mutex* mutex;

	mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
	if (!mutex)
		return NULL;
	memset(mutex, 0, sizeof(*mutex));

	mutex->litmus_lock.ops = ops;
	mutex->owner   = NULL;
	mutex->hp_waiter = NULL;
	init_waitqueue_head(&mutex->wait);


#ifdef CONFIG_DEBUG_SPINLOCK
	{
		__raw_spin_lock_init(&mutex->lock,
							 ((struct litmus_lock*)mutex)->cheat_lockdep,
							 &((struct litmus_lock*)mutex)->key);
	}
#else
	raw_spin_lock_init(&mutex->lock);
#endif

	((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;

	((struct litmus_lock*)mutex)->proc = &prioq_proc_ops;

	return &mutex->litmus_lock;
}