#include <linux/module.h>
#include <linux/semaphore.h>
#include <linux/pci.h>
#include <litmus/sched_trace.h>
#include <litmus/nvidia_info.h>
#include <litmus/litmus.h>
typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
typedef unsigned char NvU8; /* 0 to 255 */
typedef unsigned short NvU16; /* 0 to 65535 */
typedef signed char NvS8; /* -128 to 127 */
typedef signed short NvS16; /* -32768 to 32767 */
typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
typedef unsigned int NvU32; /* 0 to 4294967295 */
typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
typedef union
{
volatile NvV8 Reg008[1];
volatile NvV16 Reg016[1];
volatile NvV32 Reg032[1];
} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
typedef struct
{
NvU64 address;
NvU64 size;
NvU32 offset;
NvU32 *map;
litmus_nv_phwreg_t map_u;
} litmus_nv_aperture_t;
typedef struct
{
void *priv; /* private data */
void *os_state; /* os-specific device state */
int rmInitialized;
int flags;
/* PCI config info */
NvU32 domain;
NvU16 bus;
NvU16 slot;
NvU16 vendor_id;
NvU16 device_id;
NvU16 subsystem_id;
NvU32 gpu_id;
void *handle;
NvU32 pci_cfg_space[16];
/* physical characteristics */
litmus_nv_aperture_t bars[3];
litmus_nv_aperture_t *regs;
litmus_nv_aperture_t *fb, ud;
litmus_nv_aperture_t agp;
NvU32 interrupt_line;
NvU32 agp_config;
NvU32 agp_status;
NvU32 primary_vga;
NvU32 sim_env;
NvU32 rc_timer_enabled;
/* list of events allocated for this device */
void *event_list;
void *kern_mappings;
} litmus_nv_state_t;
typedef struct work_struct litmus_nv_task_t;
typedef struct litmus_nv_work_s {
litmus_nv_task_t task;
void *data;
} litmus_nv_work_t;
typedef struct litmus_nv_linux_state_s {
litmus_nv_state_t nv_state;
atomic_t usage_count;
struct pci_dev *dev;
void *agp_bridge;
void *alloc_queue;
void *timer_sp;
void *isr_sp;
void *pci_cfgchk_sp;
void *isr_bh_sp;
#ifdef CONFIG_CUDA_4_0
char registry_keys[512];
#endif
/* keep track of any pending bottom halfes */
struct tasklet_struct tasklet;
litmus_nv_work_t work;
/* get a timer callback every second */
struct timer_list rc_timer;
/* lock for linux-specific data, not used by core rm */
struct semaphore ldata_lock;
/* lock for linux-specific alloc queue */
struct semaphore at_lock;
#if 0
#if defined(NV_USER_MAP)
/* list of user mappings */
struct nv_usermap_s *usermap_list;
/* lock for VMware-specific mapping list */
struct semaphore mt_lock;
#endif /* defined(NV_USER_MAP) */
#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
void *apm_nv_dev;
#endif
#endif
NvU32 device_num;
struct litmus_nv_linux_state_s *next;
} litmus_nv_linux_state_t;
void dump_nvidia_info(const struct tasklet_struct *t)
{
litmus_nv_state_t* nvstate = NULL;
litmus_nv_linux_state_t* linuxstate = NULL;
struct pci_dev* pci = NULL;
nvstate = (litmus_nv_state_t*)(t->data);
if(nvstate)
{
TRACE("NV State:\n"
"\ttasklet ptr = %p\n"
"\tstate ptr = %p\n"
"\tprivate data ptr = %p\n"
"\tos state ptr = %p\n"
"\tdomain = %u\n"
"\tbus = %u\n"
"\tslot = %u\n"
"\tvender_id = %u\n"
"\tdevice_id = %u\n"
"\tsubsystem_id = %u\n"
"\tgpu_id = %u\n"
"\tinterrupt_line = %u\n",
t,
nvstate,
nvstate->priv,
nvstate->os_state,
nvstate->domain,
nvstate->bus,
nvstate->slot,
nvstate->vendor_id,
nvstate->device_id,
nvstate->subsystem_id,
nvstate->gpu_id,
nvstate->interrupt_line);
linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
}
else
{
TRACE("INVALID NVSTATE????\n");
}
if(linuxstate)
{
int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
TRACE("LINUX NV State:\n"
"\tlinux nv state ptr: %p\n"
"\taddress of tasklet: %p\n"
"\taddress of work: %p\n"
"\tusage_count: %d\n"
"\tdevice_num: %u\n"
"\ttasklet addr == this tasklet: %d\n"
"\tpci: %p\n",
linuxstate,
&(linuxstate->tasklet),
&(linuxstate->work),
atomic_read(&(linuxstate->usage_count)),
linuxstate->device_num,
(t == &(linuxstate->tasklet)),
linuxstate->dev);
pci = linuxstate->dev;
TRACE("Offsets:\n"
"\tOffset from LinuxState: %d, %x\n"
"\tOffset from NVState: %d, %x\n"
"\tOffset from parameter: %d, %x\n"
"\tdevice_num: %u\n",
ls_offset, ls_offset,
ns_offset_raw, ns_offset_raw,
ns_offset_desired, ns_offset_desired,
*((u32*)((void*)nvstate + ns_offset_desired)));
}
else
{
TRACE("INVALID LINUXNVSTATE?????\n");
}
#if 0
if(pci)
{
TRACE("PCI DEV Info:\n"
"pci device ptr: %p\n"
"\tdevfn = %d\n"
"\tvendor = %d\n"
"\tdevice = %d\n"
"\tsubsystem_vendor = %d\n"
"\tsubsystem_device = %d\n"
"\tslot # = %d\n",
pci,
pci->devfn,
pci->vendor,
pci->device,
pci->subsystem_vendor,
pci->subsystem_device,
pci->slot->number);
}
else
{
TRACE("INVALID PCIDEV PTR?????\n");
}
#endif
}
static struct module* nvidia_mod = NULL;
int init_nvidia_info(void)
{
mutex_lock(&module_mutex);
nvidia_mod = find_module("nvidia");
mutex_unlock(&module_mutex);
if(nvidia_mod != NULL)
{
TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
(void*)(nvidia_mod->module_core),
(void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
init_nv_device_reg();
return(0);
}
else
{
TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
return(-1);
}
}
/* works with pointers to static data inside the module too. */
int is_nvidia_func(void* func_addr)
{
int ret = 0;
if(nvidia_mod)
{
ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
/*
if(ret)
{
TRACE("%s : %p is in NVIDIA module: %d\n",
__FUNCTION__, func_addr, ret);
}*/
}
return(ret);
}
u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
{
// life is too short to use hard-coded offsets. update this later.
litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
return(linuxstate->device_num);
//int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
#if 0
// offset determined though observed behavior of the NV driver.
//const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1
//const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2
void* state = (void*)(t->data);
void* device_num_ptr = state + DEVICE_NUM_OFFSET;
//dump_nvidia_info(t);
return(*((u32*)device_num_ptr));
#endif
}
u32 get_work_nv_device_num(const struct work_struct *t)
{
// offset determined though observed behavior of the NV driver.
const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
void* state = (void*)(t);
void** device_num_ptr = state + DEVICE_NUM_OFFSET;
return(*((u32*)(*device_num_ptr)));
}
typedef struct {
raw_spinlock_t lock;
struct task_struct *device_owner;
}nv_device_registry_t;
static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
int init_nv_device_reg(void)
{
int i;
//memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
for(i = 0; i < NV_DEVICE_NUM; ++i)
{
raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
NV_DEVICE_REG[i].device_owner = NULL;
}
return(1);
}
/* use to get nv_device_id by given owner.
(if return -1, can't get the assocaite device id)*/
/*
int get_nv_device_id(struct task_struct* owner)
{
int i;
if(!owner)
{
return(-1);
}
for(i = 0; i < NV_DEVICE_NUM; ++i)
{
if(NV_DEVICE_REG[i].device_owner == owner)
return(i);
}
return(-1);
}
*/
static int __reg_nv_device(int reg_device_id)
{
int ret = 0;
struct task_struct* old =
cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner,
NULL,
current);
mb();
if(likely(old == NULL))
{
#ifdef CONFIG_LITMUS_SOFTIRQD
down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
#endif
TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
}
else
{
TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
ret = -EBUSY;
}
return(ret);
#if 0
//unsigned long flags;
//raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags);
//lock_nv_registry(reg_device_id, &flags);
if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL))
{
NV_DEVICE_REG[reg_device_id].device_owner = current;
mb(); // needed?
// release spin lock before chance of going to sleep.
//raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);
//unlock_nv_registry(reg_device_id, &flags);
down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
return(0);
}
else
{
//raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);
//unlock_nv_registry(reg_device_id, &flags);
TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
return(-EBUSY);
}
#endif
}
static int __clear_reg_nv_device(int de_reg_device_id)
{
int ret = 0;
struct task_struct* old;
#ifdef CONFIG_LITMUS_SOFTIRQD
unsigned long flags;
struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
lock_nv_registry(de_reg_device_id, &flags);
#endif
old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner,
current,
NULL);
mb();
#ifdef CONFIG_LITMUS_SOFTIRQD
if(likely(old == current))
{
flush_pending(klitirqd_th, current);
//unlock_nv_registry(de_reg_device_id, &flags);
up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem);
unlock_nv_registry(de_reg_device_id, &flags);
ret = 0;
TRACE_CUR("%s: semaphore released.\n",__FUNCTION__);
}
else
{
unlock_nv_registry(de_reg_device_id, &flags);
ret = -EINVAL;
if(old)
TRACE_CUR("%s: device %d is not registered for this process's use! %s/%d is!\n",
__FUNCTION__, de_reg_device_id, old->comm, old->pid);
else
TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n",
__FUNCTION__, de_reg_device_id);
}
#endif
return(ret);
}
int reg_nv_device(int reg_device_id, int reg_action)
{
int ret;
if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
{
if(reg_action)
ret = __reg_nv_device(reg_device_id);
else
ret = __clear_reg_nv_device(reg_device_id);
}
else
{
ret = -ENODEV;
}
return(ret);
}
/* use to get the owner of nv_device_id. */
struct task_struct* get_nv_device_owner(u32 target_device_id)
{
struct task_struct* owner;
BUG_ON(target_device_id >= NV_DEVICE_NUM);
owner = NV_DEVICE_REG[target_device_id].device_owner;
return(owner);
}
void lock_nv_registry(u32 target_device_id, unsigned long* flags)
{
BUG_ON(target_device_id >= NV_DEVICE_NUM);
if(in_interrupt())
TRACE("Locking registry for %d.\n", target_device_id);
else
TRACE_CUR("Locking registry for %d.\n", target_device_id);
raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
}
void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
{
BUG_ON(target_device_id >= NV_DEVICE_NUM);
if(in_interrupt())
TRACE("Unlocking registry for %d.\n", target_device_id);
else
TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
}
void increment_nv_int_count(u32 device)
{
unsigned long flags;
struct task_struct* owner;
lock_nv_registry(device, &flags);
owner = NV_DEVICE_REG[device].device_owner;
if(owner)
{
atomic_inc(&tsk_rt(owner)->nv_int_count);
}
unlock_nv_registry(device, &flags);
}
EXPORT_SYMBOL(increment_nv_int_count);