#include <linux/module.h>
#include <linux/semaphore.h>
#include <linux/pci.h>

#include <litmus/sched_trace.h>
#include <litmus/nvidia_info.h>
#include <litmus/litmus.h>

#include <litmus/sched_plugin.h>

#include <litmus/binheap.h>

#ifdef CONFIG_LITMUS_SOFTIRQD
#include <litmus/litmus_softirq.h>
#endif

typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
typedef unsigned char      NvU8;  /* 0 to 255                                */
typedef unsigned short     NvU16; /* 0 to 65535                              */
typedef signed char        NvS8;  /* -128 to 127                             */
typedef signed short       NvS16; /* -32768 to 32767                         */
typedef float              NvF32; /* IEEE Single Precision (S1E8M23)         */
typedef double             NvF64; /* IEEE Double Precision (S1E11M52)        */
typedef unsigned int       NvV32; /* "void": enumerated or multiple fields   */
typedef unsigned int       NvU32; /* 0 to 4294967295                         */
typedef unsigned long long NvU64; /* 0 to 18446744073709551615          */
typedef union
{
    volatile NvV8 Reg008[1];
    volatile NvV16 Reg016[1];
    volatile NvV32 Reg032[1];
} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;

typedef struct
{
    NvU64 address;
#ifdef CONFIG_CUDA_5_0
	NvU64 strapped_size;
#endif
    NvU64 size;
    NvU32 offset;
    NvU32 *map;
    litmus_nv_phwreg_t map_u;
} litmus_nv_aperture_t;

typedef struct
{
    void  *priv;                    /* private data */
    void  *os_state;                /* os-specific device state */

#ifndef CONFIG_CUDA_5_0
    int    rmInitialized;
#endif
    int    flags;

    /* PCI config info */
    NvU32 domain;
    NvU16 bus;
    NvU16 slot;
    NvU16 vendor_id;
    NvU16 device_id;
    NvU16 subsystem_id;
    NvU32 gpu_id;
    void *handle;

    NvU32 pci_cfg_space[16];

    /* physical characteristics */
    litmus_nv_aperture_t bars[3];
    litmus_nv_aperture_t *regs;
    litmus_nv_aperture_t *fb, ud;
    litmus_nv_aperture_t agp;

    NvU32  interrupt_line;

    NvU32 agp_config;
    NvU32 agp_status;

    NvU32 primary_vga;

    NvU32 sim_env;

    NvU32 rc_timer_enabled;

    /* list of events allocated for this device */
    void *event_list;

    void *kern_mappings;

} litmus_nv_state_t;

typedef struct work_struct litmus_nv_task_t;

typedef struct litmus_nv_work_s {
    litmus_nv_task_t task;
    void *data;
} litmus_nv_work_t;

typedef struct litmus_nv_linux_state_s {
    litmus_nv_state_t nv_state;
    atomic_t usage_count;

    struct pci_dev *dev;
    void *agp_bridge;
    void *alloc_queue;

    void *timer_sp;
    void *isr_sp;
    void *pci_cfgchk_sp;
    void *isr_bh_sp;

#if defined(CONFIG_CUDA_4_0) || defined(CONFIG_CUDA_5_0)
	char registry_keys[512];
#endif

    /* keep track of any pending bottom halfes */
    struct tasklet_struct tasklet;
    litmus_nv_work_t work;

    /* get a timer callback every second */
    struct timer_list rc_timer;

    /* lock for linux-specific data, not used by core rm */
    struct semaphore ldata_lock;

    /* lock for linux-specific alloc queue */
    struct semaphore at_lock;

#if 0
#if defined(NV_USER_MAP)
    /* list of user mappings */
    struct nv_usermap_s *usermap_list;

    /* lock for VMware-specific mapping list */
    struct semaphore mt_lock;
#endif /* defined(NV_USER_MAP) */
#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
	void *apm_nv_dev;
#endif
#endif

    NvU32 device_num;
    struct litmus_nv_linux_state_s *next;
} litmus_nv_linux_state_t;

void dump_nvidia_info(const struct tasklet_struct *t)
{
	litmus_nv_state_t* nvstate = NULL;
	litmus_nv_linux_state_t* linuxstate =  NULL;
	struct pci_dev* pci = NULL;

	nvstate = (litmus_nv_state_t*)(t->data);

	if(nvstate)
	{
		TRACE("NV State:\n"
			  "\ttasklet ptr = %p\n"
			  "\tstate ptr = %p\n"
			  "\tprivate data ptr = %p\n"
			  "\tos state ptr = %p\n"
			  "\tdomain = %u\n"
			  "\tbus = %u\n"
			  "\tslot = %u\n"
			  "\tvender_id = %u\n"
			  "\tdevice_id = %u\n"
			  "\tsubsystem_id = %u\n"
			  "\tgpu_id = %u\n"
			  "\tinterrupt_line = %u\n",
			  t,
			  nvstate,
			  nvstate->priv,
			  nvstate->os_state,
			  nvstate->domain,
			  nvstate->bus,
			  nvstate->slot,
			  nvstate->vendor_id,
			  nvstate->device_id,
			  nvstate->subsystem_id,
			  nvstate->gpu_id,
			  nvstate->interrupt_line);

		linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
	}
	else
	{
		TRACE("INVALID NVSTATE????\n");
	}

	if(linuxstate)
	{
		int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
		int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
		int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);


		TRACE("LINUX NV State:\n"
			  "\tlinux nv state ptr: %p\n"
			  "\taddress of tasklet: %p\n"
			  "\taddress of work: %p\n"
			  "\tusage_count: %d\n"
			  "\tdevice_num: %u\n"
			  "\ttasklet addr == this tasklet: %d\n"
			  "\tpci: %p\n",
			  linuxstate,
			  &(linuxstate->tasklet),
			  &(linuxstate->work),
			  atomic_read(&(linuxstate->usage_count)),
			  linuxstate->device_num,
			  (t == &(linuxstate->tasklet)),
			  linuxstate->dev);

		pci = linuxstate->dev;

		TRACE("Offsets:\n"
			  "\tOffset from LinuxState: %d, %x\n"
			  "\tOffset from NVState: %d, %x\n"
			  "\tOffset from parameter: %d, %x\n"
			  "\tdevice_num: %u\n",
			  ls_offset, ls_offset,
			  ns_offset_raw, ns_offset_raw,
			  ns_offset_desired, ns_offset_desired,
			  *((u32*)((void*)nvstate + ns_offset_desired)));
	}
	else
	{
		TRACE("INVALID LINUXNVSTATE?????\n");
	}

#if 0
	if(pci)
	{
		TRACE("PCI DEV Info:\n"
			  "pci device ptr: %p\n"
			  "\tdevfn = %d\n"
			  "\tvendor = %d\n"
			  "\tdevice = %d\n"
			  "\tsubsystem_vendor = %d\n"
			  "\tsubsystem_device = %d\n"
			  "\tslot # = %d\n",
			  pci,
			  pci->devfn,
			  pci->vendor,
			  pci->device,
			  pci->subsystem_vendor,
			  pci->subsystem_device,
			  pci->slot->number);
	}
	else
	{
		TRACE("INVALID PCIDEV PTR?????\n");
	}
#endif
}


static struct module* nvidia_mod = NULL;


#if 0
static int nvidia_ready_module_notify(struct notifier_block *self,
				unsigned long val, void *data)
{
	mutex_lock(&module_mutex);
	nvidia_mod = find_module("nvidia");
	mutex_unlock(&module_mutex);

	if(nvidia_mod != NULL)
	{
		TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
			  (void*)(nvidia_mod->module_core),
			  (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
		init_nv_device_reg();
		return(0);
	}
	else
	{
		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
	}
}

static int nvidia_going_module_notify(struct notifier_block *self,
				unsigned long val, void *data)
{
	nvidia_mod = NULL;
	mb();

	return 0;
}

static struct notifier_block nvidia_ready = {
	.notifier_call = nvidia_ready_module_notify,
	.priority = 1,
};

static struct notifier_block nvidia_going = {
	.notifier_call = nvidia_going_module_notify,
	.priority = 1,
};
#endif


static int init_nv_device_reg(void);
static int shutdown_nv_device_reg(void);


int init_nvidia_info(void)
{
	mutex_lock(&module_mutex);
	nvidia_mod = find_module("nvidia");
	mutex_unlock(&module_mutex);
	if(nvidia_mod != NULL)
	{
		TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
			  (void*)(nvidia_mod->module_core),
			  (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
		init_nv_device_reg();
		return(0);
	}
	else
	{
		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);

		init_nv_device_reg();
		return(0);
//		return(-1);
	}
}

void shutdown_nvidia_info(void)
{
	nvidia_mod = NULL;
	mb();

	shutdown_nv_device_reg();
}

/* works with pointers to static data inside the module too. */
int is_nvidia_func(void* func_addr)
{
	int ret = 0;
	if(nvidia_mod)
	{
		ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
		/*
		if(ret)
		{
			TRACE("%s : %p is in NVIDIA module: %d\n",
			  	__FUNCTION__, func_addr, ret);
		}*/
	}

	return(ret);
}

u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
{
	// life is too short to use hard-coded offsets.  update this later.
	litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
	litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);

	BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);

	return(linuxstate->device_num);
}

u32 get_work_nv_device_num(const struct work_struct *t)
{
	// offset determined though observed behavior of the NV driver.
	const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
	void* state = (void*)(t);
	void** device_num_ptr = state + DEVICE_NUM_OFFSET;
	return(*((u32*)(*device_num_ptr)));
}


///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////


typedef struct {
	raw_spinlock_t	lock;  /* not needed if GPU not shared between scheudling domains */
	struct binheap	owners;

#ifdef CONFIG_LITMUS_SOFTIRQD
	klmirqd_callback_t callback;
	struct task_struct* thread;
	int ready:1;  /* todo: make threads check for the ready flag */
#endif

#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
	struct tasklet_struct nv_klmirqd_dbg_tasklet;
#endif
}nv_device_registry_t;


static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];


#ifdef CONFIG_LITMUS_SOFTIRQD
static int nvidia_klmirqd_cb(void *arg)
{
	unsigned long flags;
	int reg_device_id = (int)(long long)(arg);
	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];

	TRACE("nv klmirqd callback for GPU %d\n", reg_device_id);

	raw_spin_lock_irqsave(&reg->lock, flags);
	reg->thread = current;
	reg->ready = 1;
	raw_spin_unlock_irqrestore(&reg->lock, flags);

	return 0;
}
#endif

#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
struct nv_klmirqd_dbg_timer_struct
{
	struct hrtimer timer;
};

static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer;

static void nv_klmirqd_arm_dbg_timer(lt_t relative_time)
{
	lt_t when_to_fire = litmus_clock() + relative_time;

	TRACE("next nv tasklet in %d ns\n", relative_time);

	__hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer,
							 ns_to_ktime(when_to_fire),
							 0,
							 HRTIMER_MODE_ABS_PINNED,
							 0);
}

static void nv_klmirqd_dbg_tasklet_func(unsigned long arg)
{
	lt_t now = litmus_clock();
	nv_device_registry_t *reg = (nv_device_registry_t*)arg;
	int gpunum = reg - &NV_DEVICE_REG[0];

	TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum);

	/* set up the next timer */
	nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
}


static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer)
{
	lt_t now = litmus_clock();
	int gpu = (int)(now % num_online_gpus());
	nv_device_registry_t *reg;

	TRACE("nvklmirqd_timer invoked!\n");

	reg = &NV_DEVICE_REG[gpu];

	if (reg->thread && reg->ready) {
		TRACE("Adding a tasklet for GPU %d\n", gpu);
		litmus_tasklet_schedule(&reg->nv_klmirqd_dbg_tasklet, reg->thread);
	}
	else {
		TRACE("nv klmirqd is not ready!\n");
		nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
	}

	return HRTIMER_NORESTART;
}
#endif


static int gpu_owner_max_priority_order(struct binheap_node *a,
											struct binheap_node *b)
{
	struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node),
										   struct task_struct, rt_param);
	struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node),
										   struct task_struct, rt_param);

	BUG_ON(!d_a);
	BUG_ON(!d_b);

	return litmus->compare(d_a, d_b);
}

static int init_nv_device_reg(void)
{
	int i;
	char name[MAX_KLMIRQD_NAME_LEN+1];

#ifdef CONFIG_LITMUS_SOFTIRQD
	if (!klmirqd_is_ready()) {
		TRACE("klmirqd is not ready!\n");
		return 0;
	}
#endif

	memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
	mb();


	for(i = 0; i < num_online_gpus(); ++i) {
		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
		INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);

#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
		tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]);
#endif

#ifdef CONFIG_LITMUS_SOFTIRQD
		{
			int default_cpu = litmus->map_gpu_to_cpu(i);

			snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i);

			NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
			NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
			mb();

			if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].callback) != 0) {
				TRACE("Failed to create klmirqd thread for GPU %d\n", i);
			}
		}
#endif
	}

#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
	hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
	nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func;
	nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000);
#endif

	return(1);
}


/* The following code is full of nasty race conditions... */
/* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */
static int shutdown_nv_device_reg(void)
{
	TRACE("Shutting down nv device registration.\n");

#ifdef CONFIG_LITMUS_SOFTIRQD
	{
		int i;
		nv_device_registry_t *reg;

		for (i = 0; i < num_online_gpus(); ++i) {

			TRACE("Shutting down GPU %d.\n", i);

			reg = &NV_DEVICE_REG[i];

			if (reg->thread && reg->ready) {
				kill_klmirqd_thread(reg->thread);

				/* assume that all goes according to plan... */
				reg->thread = NULL;
				reg->ready = 0;
			}

			while (!binheap_empty(&reg->owners)) {
				binheap_delete_root(&reg->owners, struct rt_param, gpu_owner_node);
			}
		}
	}
#endif

	return(1);
}


/* use to get the owner of nv_device_id. */
struct task_struct* get_nv_max_device_owner(u32 target_device_id)
{
	struct task_struct *owner = NULL;
	nv_device_registry_t *reg;

	BUG_ON(target_device_id >= NV_DEVICE_NUM);

	reg = &NV_DEVICE_REG[target_device_id];

	if (!binheap_empty(&reg->owners)) {
		struct task_struct *hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
											  struct task_struct, rt_param);
		TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid);
	}

	return(owner);
}

#ifdef CONFIG_LITMUS_SOFTIRQD
struct task_struct* get_nv_klmirqd_thread(u32 target_device_id)
{
	struct task_struct *klmirqd = NULL;
	nv_device_registry_t *reg;

	BUG_ON(target_device_id >= NV_DEVICE_NUM);

	reg = &NV_DEVICE_REG[target_device_id];

	if(likely(reg->ready)) {
		klmirqd = reg->thread;
	}

	return klmirqd;
}
#endif


#ifdef CONFIG_LITMUS_SOFTIRQD
static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp)
{
	int retval = 0;

	TRACE_CUR("Increasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);

	/* the klmirqd thread should never attempt to hold a litmus-level real-time
	 * so nested support is not required */
	retval = litmus->__increase_prio(klmirqd, hp);

	return retval;
}

static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp)
{
	int retval = 0;

	TRACE_CUR("Decreasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);

	/* the klmirqd thread should never attempt to hold a litmus-level real-time
	 * so nested support is not required */
	retval = litmus->__decrease_prio(klmirqd, hp);

	return retval;
}
#endif


/* call when an gpu owner becomes real-time */
long enable_gpu_owner(struct task_struct *t)
{
	long retval = 0;
//	unsigned long flags;
	int gpu;
	nv_device_registry_t *reg;

#ifdef CONFIG_LITMUS_SOFTIRQD
	struct task_struct *hp;
#endif

	if (!tsk_rt(t)->held_gpus) {
		TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
		return -1;
	}

	BUG_ON(!is_realtime(t));

	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));

	if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
		TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu);
		goto out;
	}

	/* update the registration (and maybe klmirqd) */
	reg = &NV_DEVICE_REG[gpu];

//	raw_spin_lock_irqsave(&reg->lock, flags);

	binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
				struct rt_param, gpu_owner_node);


#ifdef CONFIG_LITMUS_SOFTIRQD
	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
						  struct task_struct, rt_param);

	if (hp == t) {
		/* we're the new hp */
		TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);

		retval = gpu_klmirqd_increase_priority(reg->thread, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
	}
#endif

//	raw_spin_unlock_irqsave(&reg->lock, flags);

out:
	return retval;
}

/* call when an gpu owner exits real-time */
long disable_gpu_owner(struct task_struct *t)
{
	long retval = 0;
//	unsigned long flags;
	int gpu;
	nv_device_registry_t *reg;

#ifdef CONFIG_LITMUS_SOFTIRQD
	struct task_struct *hp;
	struct task_struct *new_hp = NULL;
#endif

	if (!tsk_rt(t)->held_gpus) {
		TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
		return -1;
	}

	BUG_ON(!is_realtime(t));

	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));

	if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
		TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu);
		goto out;
	}

	TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu);


	reg = &NV_DEVICE_REG[gpu];

//	raw_spin_lock_irqsave(&reg->lock, flags);


#ifdef CONFIG_LITMUS_SOFTIRQD
	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
					  struct task_struct, rt_param);

	binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);


	if (!binheap_empty(&reg->owners)) {
		new_hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
							  struct task_struct, rt_param);
	}

	if (hp == t && new_hp != t) {
		struct task_struct *to_inh = NULL;

		TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);

		if (new_hp) {
			to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
		}

		retval = gpu_klmirqd_decrease_priority(reg->thread, to_inh);
	}
#else
	binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
#endif

//	raw_spin_unlock_irqsave(&reg->lock, flags);


out:
	return retval;
}


int gpu_owner_increase_priority(struct task_struct *t)
{
	int retval = 0;
	int gpu;
	nv_device_registry_t *reg;

	struct task_struct *hp = NULL;
	struct task_struct *hp_eff = NULL;

	BUG_ON(!is_realtime(t));
	BUG_ON(!tsk_rt(t)->held_gpus);

	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));

	if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
		WARN_ON(!is_running(t));
		TRACE_CUR("gpu klmirqd may not inherit from %s/%d on GPU %d\n",
				  t->comm, t->pid, gpu);
		goto out;
	}


	TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu);
	reg = &NV_DEVICE_REG[gpu];

	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
					  struct task_struct, rt_param);
	hp_eff = effective_priority(hp);

	if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
		binheap_decrease(&tsk_rt(t)->gpu_owner_node, &reg->owners);
	}

	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
					  struct task_struct, rt_param);

	if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
		hp_eff = effective_priority(hp);
		TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);

		retval = gpu_klmirqd_increase_priority(reg->thread, hp_eff);
	}

out:
	return retval;
}


int gpu_owner_decrease_priority(struct task_struct *t)
{
	int retval = 0;
	int gpu;
	nv_device_registry_t *reg;

	struct task_struct *hp = NULL;
	struct task_struct *hp_eff = NULL;

	BUG_ON(!is_realtime(t));
	BUG_ON(!tsk_rt(t)->held_gpus);

	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));

	if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
		WARN_ON(!is_running(t));
		TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n",
				  t->comm, t->pid, gpu);
		goto out;
	}

	TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu);
	reg = &NV_DEVICE_REG[gpu];

	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
					  struct task_struct, rt_param);
	hp_eff = effective_priority(hp);
	binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
	binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
				struct rt_param, gpu_owner_node);

	if (hp == t) { /* t was originally the hp */
		struct task_struct *new_hp =
			container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
					 struct task_struct, rt_param);
		if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
			hp_eff = effective_priority(new_hp);
			TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
			retval = gpu_klmirqd_decrease_priority(reg->thread, hp_eff);
		}
	}

out:
	return retval;
}


static int __reg_nv_device(int reg_device_id, struct task_struct *t)
{
	__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);

	return(0);
}

static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
{
	__clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);

	return(0);
}


int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
{
	int ret;

	if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0))
	{
		if(reg_action)
			ret = __reg_nv_device(reg_device_id, t);
		else
			ret = __clear_reg_nv_device(reg_device_id, t);
	}
	else
	{
		ret = -ENODEV;
	}

	return(ret);
}


#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
//void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
//{
//	unsigned long flags;
//	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
//
//
//
//	if(reg->max_prio_owner != t) {
//
//		raw_spin_lock_irqsave(&reg->lock, flags);
//
//		if(reg->max_prio_owner != t) {
//			if(litmus->compare(t, reg->max_prio_owner)) {
//				litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
//				reg->max_prio_owner = t;
//			}
//		}
//
//		raw_spin_unlock_irqrestore(&reg->lock, flags);
//	}
//}
//
//
//void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
//{
//	unsigned long flags;
//	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
//
//	if(reg->max_prio_owner == t) {
//
//		raw_spin_lock_irqsave(&reg->lock, flags);
//
//		if(reg->max_prio_owner == t) {
//			reg->max_prio_owner = find_hp_owner(reg, NULL);
//			if(reg->max_prio_owner != t) {
//				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
//			}
//		}
//
//		raw_spin_unlock_irqrestore(&reg->lock, flags);
//	}
//}
#endif


//static int __reg_nv_device(int reg_device_id, struct task_struct *t)
//{
//	int ret = 0;
//	int i;
//	struct task_struct *old_max = NULL;
//
//
//	raw_spin_lock_irqsave(&reg->lock, flags);
//
//	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
//		TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
//		for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
//			if(reg->owners[i] == NULL) {
//				reg->owners[i] = t;
//
//				//if(edf_higher_prio(t, reg->max_prio_owner)) {
//				if(litmus->compare(t, reg->max_prio_owner)) {
//					old_max = reg->max_prio_owner;
//					reg->max_prio_owner = t;
//
//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
//					litmus->change_prio_pai_tasklet(old_max, t);
//#endif
//				}
//
//#ifdef CONFIG_LITMUS_SOFTIRQD
//				down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
//#endif
//				++(reg->nr_owners);
//
//				break;
//			}
//		}
//	}
//	else
//	{
//		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
//		//ret = -EBUSY;
//	}
//
//	raw_spin_unlock_irqrestore(&reg->lock, flags);
//
//	__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
//
//	return(ret);
//}
//
//static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
//{
//	int ret = 0;
//	int i;
//	unsigned long flags;
//	nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
//
//#ifdef CONFIG_LITMUS_SOFTIRQD
//    struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
//#endif
//
//	if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
//		return ret;
//	}
//
//	raw_spin_lock_irqsave(&reg->lock, flags);
//
//	TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
//
//	for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
//		if(reg->owners[i] == t) {
//#ifdef CONFIG_LITMUS_SOFTIRQD
//			flush_pending(klmirqd_th, t);
//#endif
//			if(reg->max_prio_owner == t) {
//				reg->max_prio_owner = find_hp_owner(reg, t);
//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
//				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
//#endif
//			}
//
//#ifdef CONFIG_LITMUS_SOFTIRQD
//			up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
//#endif
//
//			reg->owners[i] = NULL;
//			--(reg->nr_owners);
//
//			break;
//		}
//	}
//
//	raw_spin_unlock_irqrestore(&reg->lock, flags);
//
//	__clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
//
//	return(ret);
//}
//
//
//int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
//{
//	int ret;
//
//	if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
//	{
//		if(reg_action)
//			ret = __reg_nv_device(reg_device_id, t);
//		else
//			ret = __clear_reg_nv_device(reg_device_id, t);
//	}
//	else
//	{
//		ret = -ENODEV;
//	}
//
//	return(ret);
//}


//void lock_nv_registry(u32 target_device_id, unsigned long* flags)
//{
//	BUG_ON(target_device_id >= NV_DEVICE_NUM);
//
//	if(in_interrupt())
//		TRACE("Locking registry for %d.\n", target_device_id);
//	else
//		TRACE_CUR("Locking registry for %d.\n", target_device_id);
//
//	raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
//}
//
//void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
//{
//	BUG_ON(target_device_id >= NV_DEVICE_NUM);
//
//	if(in_interrupt())
//		TRACE("Unlocking registry for %d.\n", target_device_id);
//	else
//		TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
//
//	raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
//}