#include #include #include #include #include #include typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ typedef unsigned char NvU8; /* 0 to 255 */ typedef unsigned short NvU16; /* 0 to 65535 */ typedef signed char NvS8; /* -128 to 127 */ typedef signed short NvS16; /* -32768 to 32767 */ typedef float NvF32; /* IEEE Single Precision (S1E8M23) */ typedef double NvF64; /* IEEE Double Precision (S1E11M52) */ typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ typedef unsigned int NvU32; /* 0 to 4294967295 */ typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ typedef union { volatile NvV8 Reg008[1]; volatile NvV16 Reg016[1]; volatile NvV32 Reg032[1]; } litmus_nv_hwreg_t, * litmus_nv_phwreg_t; typedef struct { NvU64 address; NvU64 size; NvU32 offset; NvU32 *map; litmus_nv_phwreg_t map_u; } litmus_nv_aperture_t; typedef struct { void *priv; /* private data */ void *os_state; /* os-specific device state */ int rmInitialized; int flags; /* PCI config info */ NvU32 domain; NvU16 bus; NvU16 slot; NvU16 vendor_id; NvU16 device_id; NvU16 subsystem_id; NvU32 gpu_id; void *handle; NvU32 pci_cfg_space[16]; /* physical characteristics */ litmus_nv_aperture_t bars[3]; litmus_nv_aperture_t *regs; litmus_nv_aperture_t *fb, ud; litmus_nv_aperture_t agp; NvU32 interrupt_line; NvU32 agp_config; NvU32 agp_status; NvU32 primary_vga; NvU32 sim_env; NvU32 rc_timer_enabled; /* list of events allocated for this device */ void *event_list; void *kern_mappings; } litmus_nv_state_t; typedef struct work_struct litmus_nv_task_t; typedef struct litmus_nv_work_s { litmus_nv_task_t task; void *data; } litmus_nv_work_t; typedef struct litmus_nv_linux_state_s { litmus_nv_state_t nv_state; atomic_t usage_count; struct pci_dev *dev; void *agp_bridge; void *alloc_queue; void *timer_sp; void *isr_sp; void *pci_cfgchk_sp; void *isr_bh_sp; #ifdef CONFIG_CUDA_4_0 char registry_keys[512]; #endif /* keep track of any pending bottom halfes */ struct tasklet_struct tasklet; litmus_nv_work_t work; /* get a timer callback every second */ struct timer_list rc_timer; /* lock for linux-specific data, not used by core rm */ struct semaphore ldata_lock; /* lock for linux-specific alloc queue */ struct semaphore at_lock; #if 0 #if defined(NV_USER_MAP) /* list of user mappings */ struct nv_usermap_s *usermap_list; /* lock for VMware-specific mapping list */ struct semaphore mt_lock; #endif /* defined(NV_USER_MAP) */ #if defined(NV_PM_SUPPORT_OLD_STYLE_APM) void *apm_nv_dev; #endif #endif NvU32 device_num; struct litmus_nv_linux_state_s *next; } litmus_nv_linux_state_t; void dump_nvidia_info(const struct tasklet_struct *t) { litmus_nv_state_t* nvstate = NULL; litmus_nv_linux_state_t* linuxstate = NULL; struct pci_dev* pci = NULL; nvstate = (litmus_nv_state_t*)(t->data); if(nvstate) { TRACE("NV State:\n" "\ttasklet ptr = %p\n" "\tstate ptr = %p\n" "\tprivate data ptr = %p\n" "\tos state ptr = %p\n" "\tdomain = %u\n" "\tbus = %u\n" "\tslot = %u\n" "\tvender_id = %u\n" "\tdevice_id = %u\n" "\tsubsystem_id = %u\n" "\tgpu_id = %u\n" "\tinterrupt_line = %u\n", t, nvstate, nvstate->priv, nvstate->os_state, nvstate->domain, nvstate->bus, nvstate->slot, nvstate->vendor_id, nvstate->device_id, nvstate->subsystem_id, nvstate->gpu_id, nvstate->interrupt_line); linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); } else { TRACE("INVALID NVSTATE????\n"); } if(linuxstate) { int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); TRACE("LINUX NV State:\n" "\tlinux nv state ptr: %p\n" "\taddress of tasklet: %p\n" "\taddress of work: %p\n" "\tusage_count: %d\n" "\tdevice_num: %u\n" "\ttasklet addr == this tasklet: %d\n" "\tpci: %p\n", linuxstate, &(linuxstate->tasklet), &(linuxstate->work), atomic_read(&(linuxstate->usage_count)), linuxstate->device_num, (t == &(linuxstate->tasklet)), linuxstate->dev); pci = linuxstate->dev; TRACE("Offsets:\n" "\tOffset from LinuxState: %d, %x\n" "\tOffset from NVState: %d, %x\n" "\tOffset from parameter: %d, %x\n" "\tdevice_num: %u\n", ls_offset, ls_offset, ns_offset_raw, ns_offset_raw, ns_offset_desired, ns_offset_desired, *((u32*)((void*)nvstate + ns_offset_desired))); } else { TRACE("INVALID LINUXNVSTATE?????\n"); } #if 0 if(pci) { TRACE("PCI DEV Info:\n" "pci device ptr: %p\n" "\tdevfn = %d\n" "\tvendor = %d\n" "\tdevice = %d\n" "\tsubsystem_vendor = %d\n" "\tsubsystem_device = %d\n" "\tslot # = %d\n", pci, pci->devfn, pci->vendor, pci->device, pci->subsystem_vendor, pci->subsystem_device, pci->slot->number); } else { TRACE("INVALID PCIDEV PTR?????\n"); } #endif } static struct module* nvidia_mod = NULL; int init_nvidia_info(void) { mutex_lock(&module_mutex); nvidia_mod = find_module("nvidia"); mutex_unlock(&module_mutex); if(nvidia_mod != NULL) { TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, (void*)(nvidia_mod->module_core), (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); init_nv_device_reg(); return(0); } else { TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); return(-1); } } /* works with pointers to static data inside the module too. */ int is_nvidia_func(void* func_addr) { int ret = 0; if(nvidia_mod) { ret = within_module_core((long unsigned int)func_addr, nvidia_mod); /* if(ret) { TRACE("%s : %p is in NVIDIA module: %d\n", __FUNCTION__, func_addr, ret); }*/ } return(ret); } u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) { // life is too short to use hard-coded offsets. update this later. litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); return(linuxstate->device_num); //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); #if 0 // offset determined though observed behavior of the NV driver. //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1 //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2 void* state = (void*)(t->data); void* device_num_ptr = state + DEVICE_NUM_OFFSET; //dump_nvidia_info(t); return(*((u32*)device_num_ptr)); #endif } u32 get_work_nv_device_num(const struct work_struct *t) { // offset determined though observed behavior of the NV driver. const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); void* state = (void*)(t); void** device_num_ptr = state + DEVICE_NUM_OFFSET; return(*((u32*)(*device_num_ptr))); } typedef struct { raw_spinlock_t lock; struct task_struct *device_owner; }nv_device_registry_t; static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; int init_nv_device_reg(void) { int i; //memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); for(i = 0; i < NV_DEVICE_NUM; ++i) { raw_spin_lock_init(&NV_DEVICE_REG[i].lock); NV_DEVICE_REG[i].device_owner = NULL; } return(1); } /* use to get nv_device_id by given owner. (if return -1, can't get the assocaite device id)*/ /* int get_nv_device_id(struct task_struct* owner) { int i; if(!owner) { return(-1); } for(i = 0; i < NV_DEVICE_NUM; ++i) { if(NV_DEVICE_REG[i].device_owner == owner) return(i); } return(-1); } */ static int __reg_nv_device(int reg_device_id) { struct task_struct* old = cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, NULL, current); mb(); if(likely(old == NULL)) { down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); return(0); } else { TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); return(-EBUSY); } #if 0 //unsigned long flags; //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); //lock_nv_registry(reg_device_id, &flags); if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL)) { NV_DEVICE_REG[reg_device_id].device_owner = current; mb(); // needed? // release spin lock before chance of going to sleep. //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); //unlock_nv_registry(reg_device_id, &flags); down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); return(0); } else { //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); //unlock_nv_registry(reg_device_id, &flags); TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); return(-EBUSY); } #endif } static int __clear_reg_nv_device(int de_reg_device_id) { int ret; unsigned long flags; struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); struct task_struct* old; lock_nv_registry(de_reg_device_id, &flags); old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, current, NULL); mb(); if(likely(old == current)) { flush_pending(klitirqd_th, current); //unlock_nv_registry(de_reg_device_id, &flags); up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem); unlock_nv_registry(de_reg_device_id, &flags); ret = 0; TRACE_CUR("%s: semaphore released.\n",__FUNCTION__); } else { unlock_nv_registry(de_reg_device_id, &flags); ret = -EINVAL; if(old) TRACE_CUR("%s: device %d is not registered for this process's use! %s/%d is!\n", __FUNCTION__, de_reg_device_id, old->comm, old->pid); else TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", __FUNCTION__, de_reg_device_id); } return(ret); } int reg_nv_device(int reg_device_id, int reg_action) { int ret; if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) { if(reg_action) ret = __reg_nv_device(reg_device_id); else ret = __clear_reg_nv_device(reg_device_id); } else { ret = -ENODEV; } return(ret); } /* use to get the owner of nv_device_id. */ struct task_struct* get_nv_device_owner(u32 target_device_id) { struct task_struct* owner; BUG_ON(target_device_id >= NV_DEVICE_NUM); owner = NV_DEVICE_REG[target_device_id].device_owner; return(owner); } void lock_nv_registry(u32 target_device_id, unsigned long* flags) { BUG_ON(target_device_id >= NV_DEVICE_NUM); if(in_interrupt()) TRACE("Locking registry for %d.\n", target_device_id); else TRACE_CUR("Locking registry for %d.\n", target_device_id); raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); } void unlock_nv_registry(u32 target_device_id, unsigned long* flags) { BUG_ON(target_device_id >= NV_DEVICE_NUM); if(in_interrupt()) TRACE("Unlocking registry for %d.\n", target_device_id); else TRACE_CUR("Unlocking registry for %d.\n", target_device_id); raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); } void increment_nv_int_count(u32 device) { unsigned long flags; struct task_struct* owner; lock_nv_registry(device, &flags); owner = NV_DEVICE_REG[device].device_owner; if(owner) { atomic_inc(&tsk_rt(owner)->nv_int_count); } unlock_nv_registry(device, &flags); } EXPORT_SYMBOL(increment_nv_int_count);