#include <linux/module.h>
#include <linux/semaphore.h>
#include <linux/pci.h>

#include <litmus/sched_trace.h>
#include <litmus/nvidia_info.h>
#include <litmus/litmus.h>

typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
typedef unsigned char      NvU8;  /* 0 to 255                                */
typedef unsigned short     NvU16; /* 0 to 65535                              */
typedef signed char        NvS8;  /* -128 to 127                             */
typedef signed short       NvS16; /* -32768 to 32767                         */
typedef float              NvF32; /* IEEE Single Precision (S1E8M23)         */
typedef double             NvF64; /* IEEE Double Precision (S1E11M52)        */
typedef unsigned int       NvV32; /* "void": enumerated or multiple fields   */
typedef unsigned int       NvU32; /* 0 to 4294967295                         */
typedef unsigned long long NvU64; /* 0 to 18446744073709551615          */
typedef union
{
    volatile NvV8 Reg008[1];
    volatile NvV16 Reg016[1];
    volatile NvV32 Reg032[1];
} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;

typedef struct
{
    NvU64 address;
    NvU64 size;
    NvU32 offset;
    NvU32 *map;
    litmus_nv_phwreg_t map_u;
} litmus_nv_aperture_t;

typedef struct
{
    void  *priv;                    /* private data */
    void  *os_state;                /* os-specific device state */
	
    int    rmInitialized;
    int    flags;
	
    /* PCI config info */
    NvU32 domain;
    NvU16 bus;
    NvU16 slot;
    NvU16 vendor_id;
    NvU16 device_id;
    NvU16 subsystem_id;
    NvU32 gpu_id;
    void *handle;
	
    NvU32 pci_cfg_space[16];
	
    /* physical characteristics */
    litmus_nv_aperture_t bars[3];
    litmus_nv_aperture_t *regs;
    litmus_nv_aperture_t *fb, ud;
    litmus_nv_aperture_t agp;
	
    NvU32  interrupt_line;
	
    NvU32 agp_config;
    NvU32 agp_status;
	
    NvU32 primary_vga;
	
    NvU32 sim_env;
	
    NvU32 rc_timer_enabled;
	
    /* list of events allocated for this device */
    void *event_list;
	
    void *kern_mappings;
	
} litmus_nv_state_t;

typedef struct work_struct litmus_nv_task_t;

typedef struct litmus_nv_work_s {
    litmus_nv_task_t task;
    void *data;
} litmus_nv_work_t;

typedef struct litmus_nv_linux_state_s {
    litmus_nv_state_t nv_state;
    atomic_t usage_count;
	
    struct pci_dev *dev;
    void *agp_bridge;
    void *alloc_queue;
	
    void *timer_sp;
    void *isr_sp;
    void *pci_cfgchk_sp;
    void *isr_bh_sp;

#ifdef CONFIG_CUDA_4_0
	char registry_keys[512];
#endif

    /* keep track of any pending bottom halfes */
    struct tasklet_struct tasklet;
    litmus_nv_work_t work;
	
    /* get a timer callback every second */
    struct timer_list rc_timer;
	
    /* lock for linux-specific data, not used by core rm */
    struct semaphore ldata_lock;
	
    /* lock for linux-specific alloc queue */
    struct semaphore at_lock;
	
#if 0
#if defined(NV_USER_MAP)
    /* list of user mappings */
    struct nv_usermap_s *usermap_list;
	
    /* lock for VMware-specific mapping list */
    struct semaphore mt_lock;
#endif /* defined(NV_USER_MAP) */	
#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
	void *apm_nv_dev;
#endif
#endif
	
    NvU32 device_num;
    struct litmus_nv_linux_state_s *next;
} litmus_nv_linux_state_t;

void dump_nvidia_info(const struct tasklet_struct *t)
{
	litmus_nv_state_t* nvstate = NULL;
	litmus_nv_linux_state_t* linuxstate =  NULL;
	struct pci_dev* pci = NULL;
	
	nvstate = (litmus_nv_state_t*)(t->data);
	
	if(nvstate)
	{
		TRACE("NV State:\n"
			  "\ttasklet ptr = %p\n"
			  "\tstate ptr = %p\n"
			  "\tprivate data ptr = %p\n"
			  "\tos state ptr = %p\n"
			  "\tdomain = %u\n"
			  "\tbus = %u\n"
			  "\tslot = %u\n"
			  "\tvender_id = %u\n"
			  "\tdevice_id = %u\n"
			  "\tsubsystem_id = %u\n"
			  "\tgpu_id = %u\n"
			  "\tinterrupt_line = %u\n",
			  t,
			  nvstate,
			  nvstate->priv,
			  nvstate->os_state,
			  nvstate->domain,
			  nvstate->bus,
			  nvstate->slot,
			  nvstate->vendor_id,
			  nvstate->device_id,
			  nvstate->subsystem_id,
			  nvstate->gpu_id,
			  nvstate->interrupt_line);
		
		linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
	}
	else
	{
		TRACE("INVALID NVSTATE????\n");
	}
	
	if(linuxstate)
	{
		int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
		int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
		int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
		
		
		TRACE("LINUX NV State:\n"
			  "\tlinux nv state ptr: %p\n"
			  "\taddress of tasklet: %p\n"
			  "\taddress of work: %p\n"
			  "\tusage_count: %d\n"
			  "\tdevice_num: %u\n"
			  "\ttasklet addr == this tasklet: %d\n"
			  "\tpci: %p\n",
			  linuxstate,
			  &(linuxstate->tasklet),
			  &(linuxstate->work),
			  atomic_read(&(linuxstate->usage_count)),
			  linuxstate->device_num,
			  (t == &(linuxstate->tasklet)),
			  linuxstate->dev);
		
		pci = linuxstate->dev;
		
		TRACE("Offsets:\n"
			  "\tOffset from LinuxState: %d, %x\n"
			  "\tOffset from NVState: %d, %x\n"
			  "\tOffset from parameter: %d, %x\n"
			  "\tdevice_num: %u\n",
			  ls_offset, ls_offset,
			  ns_offset_raw, ns_offset_raw,
			  ns_offset_desired, ns_offset_desired,
			  *((u32*)((void*)nvstate + ns_offset_desired)));
	}
	else
	{
		TRACE("INVALID LINUXNVSTATE?????\n");
	}

#if 0
	if(pci)
	{
		TRACE("PCI DEV Info:\n"
			  "pci device ptr: %p\n"
			  "\tdevfn = %d\n"
			  "\tvendor = %d\n"
			  "\tdevice = %d\n"
			  "\tsubsystem_vendor = %d\n"
			  "\tsubsystem_device = %d\n"
			  "\tslot # = %d\n",
			  pci,
			  pci->devfn,
			  pci->vendor,
			  pci->device,
			  pci->subsystem_vendor,
			  pci->subsystem_device,
			  pci->slot->number);
	}
	else
	{
		TRACE("INVALID PCIDEV PTR?????\n");
	}
#endif
}

static struct module* nvidia_mod = NULL;
int init_nvidia_info(void)
{
	mutex_lock(&module_mutex);
	nvidia_mod = find_module("nvidia");
	mutex_unlock(&module_mutex);	
	if(nvidia_mod != NULL)
	{
		TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
			  (void*)(nvidia_mod->module_core),
			  (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
		init_nv_device_reg();
		return(0);
	}
	else
	{
		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
		return(-1);
	}
}


/* works with pointers to static data inside the module too. */
int is_nvidia_func(void* func_addr)
{
	int ret = 0;
	if(nvidia_mod)
	{
		ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
		/*
		if(ret)
		{
			TRACE("%s : %p is in NVIDIA module: %d\n",
			  	__FUNCTION__, func_addr, ret);
		}*/
	}
	
	return(ret);
}

u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
{
	// life is too short to use hard-coded offsets.  update this later.
	litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
	litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);

	BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);

	return(linuxstate->device_num);

	//int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);

#if 0
	// offset determined though observed behavior of the NV driver.
	//const int DEVICE_NUM_OFFSET = 0x480;  // CUDA 4.0 RC1
	//const int DEVICE_NUM_OFFSET = 0x510;  // CUDA 4.0 RC2

	void* state = (void*)(t->data);
	void* device_num_ptr = state + DEVICE_NUM_OFFSET;
	
	//dump_nvidia_info(t);
	return(*((u32*)device_num_ptr));
#endif
}

u32 get_work_nv_device_num(const struct work_struct *t)
{
	// offset determined though observed behavior of the NV driver.
	const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
	void* state = (void*)(t);
	void** device_num_ptr = state + DEVICE_NUM_OFFSET;
	return(*((u32*)(*device_num_ptr)));
}


typedef struct {
	raw_spinlock_t	lock;
	struct task_struct *device_owner;
}nv_device_registry_t;

static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];

int init_nv_device_reg(void)
{
	int i;
	
	//memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
	
	for(i = 0; i < NV_DEVICE_NUM; ++i)
	{
		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
		NV_DEVICE_REG[i].device_owner = NULL;
	}
				 
	return(1);
}

/* use to get nv_device_id by given owner.
 (if return -1, can't get the assocaite device id)*/
/*
int get_nv_device_id(struct task_struct* owner)
{
	int i;
	if(!owner)
	{
		return(-1);
	}
	for(i = 0; i < NV_DEVICE_NUM; ++i)
	{
		if(NV_DEVICE_REG[i].device_owner == owner)
			return(i);
	}
	return(-1); 
}
*/


static int __reg_nv_device(int reg_device_id)
{
    struct task_struct* old =
		cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner,
				NULL,
				current);

	mb();

	if(likely(old == NULL))
	{
		down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
		TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
		return(0);
	}   
	else
	{   
		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
		return(-EBUSY);    
	}

#if 0
	//unsigned long flags;
	//raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags);
	//lock_nv_registry(reg_device_id, &flags);

	if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL))
	{
		NV_DEVICE_REG[reg_device_id].device_owner = current;
		mb(); // needed?

		// release spin lock before chance of going to sleep.
		//raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);	
		//unlock_nv_registry(reg_device_id, &flags);

		down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
		TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
		return(0);
	}
	else
	{
		//raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);
		//unlock_nv_registry(reg_device_id, &flags);

		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
		return(-EBUSY);		
	}
#endif
}

static int __clear_reg_nv_device(int de_reg_device_id)
{
	int ret;
	unsigned long flags;
    struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
	struct task_struct* old;
	
	lock_nv_registry(de_reg_device_id, &flags);
	
	old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner,
				current,
				NULL);
	
	mb();
			    
	if(likely(old == current))
	{   
		flush_pending(klitirqd_th, current);
		//unlock_nv_registry(de_reg_device_id, &flags);
		
		up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem);

		unlock_nv_registry(de_reg_device_id, &flags);
		ret = 0;
		
		TRACE_CUR("%s: semaphore released.\n",__FUNCTION__);
	}
	else
	{
		unlock_nv_registry(de_reg_device_id, &flags);
		ret = -EINVAL;
		
		if(old)
			TRACE_CUR("%s: device %d is not registered for this process's use!  %s/%d is!\n",
					  __FUNCTION__, de_reg_device_id, old->comm, old->pid);
		else
			TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n",
					  __FUNCTION__, de_reg_device_id);
	}

	return(ret);
}


int reg_nv_device(int reg_device_id, int reg_action)
{
	int ret;

	if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
	{
		if(reg_action)
			ret = __reg_nv_device(reg_device_id);
		else
			ret = __clear_reg_nv_device(reg_device_id);
	}
	else
	{
		ret = -ENODEV;
	}

	return(ret);
}

/* use to get the owner of nv_device_id. */
struct task_struct* get_nv_device_owner(u32 target_device_id)
{
	struct task_struct* owner;
	BUG_ON(target_device_id >= NV_DEVICE_NUM);
	owner = NV_DEVICE_REG[target_device_id].device_owner;
	return(owner);
}

void lock_nv_registry(u32 target_device_id, unsigned long* flags)
{
	BUG_ON(target_device_id >= NV_DEVICE_NUM);

	if(in_interrupt())
		TRACE("Locking registry for %d.\n", target_device_id);
	else
		TRACE_CUR("Locking registry for %d.\n", target_device_id);

	raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
}

void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
{
	BUG_ON(target_device_id >= NV_DEVICE_NUM);

	if(in_interrupt())
		TRACE("Unlocking registry for %d.\n", target_device_id);
	else
		TRACE_CUR("Unlocking registry for %d.\n", target_device_id);

	raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
}


void increment_nv_int_count(u32 device)
{
	unsigned long flags;
	struct task_struct* owner;

	lock_nv_registry(device, &flags);

	owner = NV_DEVICE_REG[device].device_owner;
	if(owner)
	{
		atomic_inc(&tsk_rt(owner)->nv_int_count);
	}

	unlock_nv_registry(device, &flags);
}
EXPORT_SYMBOL(increment_nv_int_count);