#include <linux/module.h>
#include <linux/semaphore.h>
#include <linux/pci.h>
#include <litmus/sched_trace.h>
#include <litmus/nvidia_info.h>
#include <litmus/litmus.h>
#include <litmus/sched_plugin.h>
#include <litmus/binheap.h>
typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
typedef unsigned char NvU8; /* 0 to 255 */
typedef unsigned short NvU16; /* 0 to 65535 */
typedef signed char NvS8; /* -128 to 127 */
typedef signed short NvS16; /* -32768 to 32767 */
typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
typedef unsigned int NvU32; /* 0 to 4294967295 */
typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
typedef union
{
volatile NvV8 Reg008[1];
volatile NvV16 Reg016[1];
volatile NvV32 Reg032[1];
} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
typedef struct
{
NvU64 address;
NvU64 size;
NvU32 offset;
NvU32 *map;
litmus_nv_phwreg_t map_u;
} litmus_nv_aperture_t;
typedef struct
{
void *priv; /* private data */
void *os_state; /* os-specific device state */
int rmInitialized;
int flags;
/* PCI config info */
NvU32 domain;
NvU16 bus;
NvU16 slot;
NvU16 vendor_id;
NvU16 device_id;
NvU16 subsystem_id;
NvU32 gpu_id;
void *handle;
NvU32 pci_cfg_space[16];
/* physical characteristics */
litmus_nv_aperture_t bars[3];
litmus_nv_aperture_t *regs;
litmus_nv_aperture_t *fb, ud;
litmus_nv_aperture_t agp;
NvU32 interrupt_line;
NvU32 agp_config;
NvU32 agp_status;
NvU32 primary_vga;
NvU32 sim_env;
NvU32 rc_timer_enabled;
/* list of events allocated for this device */
void *event_list;
void *kern_mappings;
} litmus_nv_state_t;
typedef struct work_struct litmus_nv_task_t;
typedef struct litmus_nv_work_s {
litmus_nv_task_t task;
void *data;
} litmus_nv_work_t;
typedef struct litmus_nv_linux_state_s {
litmus_nv_state_t nv_state;
atomic_t usage_count;
struct pci_dev *dev;
void *agp_bridge;
void *alloc_queue;
void *timer_sp;
void *isr_sp;
void *pci_cfgchk_sp;
void *isr_bh_sp;
#ifdef CONFIG_CUDA_4_0
char registry_keys[512];
#endif
/* keep track of any pending bottom halfes */
struct tasklet_struct tasklet;
litmus_nv_work_t work;
/* get a timer callback every second */
struct timer_list rc_timer;
/* lock for linux-specific data, not used by core rm */
struct semaphore ldata_lock;
/* lock for linux-specific alloc queue */
struct semaphore at_lock;
#if 0
#if defined(NV_USER_MAP)
/* list of user mappings */
struct nv_usermap_s *usermap_list;
/* lock for VMware-specific mapping list */
struct semaphore mt_lock;
#endif /* defined(NV_USER_MAP) */
#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
void *apm_nv_dev;
#endif
#endif
NvU32 device_num;
struct litmus_nv_linux_state_s *next;
} litmus_nv_linux_state_t;
void dump_nvidia_info(const struct tasklet_struct *t)
{
litmus_nv_state_t* nvstate = NULL;
litmus_nv_linux_state_t* linuxstate = NULL;
struct pci_dev* pci = NULL;
nvstate = (litmus_nv_state_t*)(t->data);
if(nvstate)
{
TRACE("NV State:\n"
"\ttasklet ptr = %p\n"
"\tstate ptr = %p\n"
"\tprivate data ptr = %p\n"
"\tos state ptr = %p\n"
"\tdomain = %u\n"
"\tbus = %u\n"
"\tslot = %u\n"
"\tvender_id = %u\n"
"\tdevice_id = %u\n"
"\tsubsystem_id = %u\n"
"\tgpu_id = %u\n"
"\tinterrupt_line = %u\n",
t,
nvstate,
nvstate->priv,
nvstate->os_state,
nvstate->domain,
nvstate->bus,
nvstate->slot,
nvstate->vendor_id,
nvstate->device_id,
nvstate->subsystem_id,
nvstate->gpu_id,
nvstate->interrupt_line);
linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
}
else
{
TRACE("INVALID NVSTATE????\n");
}
if(linuxstate)
{
int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
TRACE("LINUX NV State:\n"
"\tlinux nv state ptr: %p\n"
"\taddress of tasklet: %p\n"
"\taddress of work: %p\n"
"\tusage_count: %d\n"
"\tdevice_num: %u\n"
"\ttasklet addr == this tasklet: %d\n"
"\tpci: %p\n",
linuxstate,
&(linuxstate->tasklet),
&(linuxstate->work),
atomic_read(&(linuxstate->usage_count)),
linuxstate->device_num,
(t == &(linuxstate->tasklet)),
linuxstate->dev);
pci = linuxstate->dev;
TRACE("Offsets:\n"
"\tOffset from LinuxState: %d, %x\n"
"\tOffset from NVState: %d, %x\n"
"\tOffset from parameter: %d, %x\n"
"\tdevice_num: %u\n",
ls_offset, ls_offset,
ns_offset_raw, ns_offset_raw,
ns_offset_desired, ns_offset_desired,
*((u32*)((void*)nvstate + ns_offset_desired)));
}
else
{
TRACE("INVALID LINUXNVSTATE?????\n");
}
#if 0
if(pci)
{
TRACE("PCI DEV Info:\n"
"pci device ptr: %p\n"
"\tdevfn = %d\n"
"\tvendor = %d\n"
"\tdevice = %d\n"
"\tsubsystem_vendor = %d\n"
"\tsubsystem_device = %d\n"
"\tslot # = %d\n",
pci,
pci->devfn,
pci->vendor,
pci->device,
pci->subsystem_vendor,
pci->subsystem_device,
pci->slot->number);
}
else
{
TRACE("INVALID PCIDEV PTR?????\n");
}
#endif
}
static struct module* nvidia_mod = NULL;
#if 0
static int nvidia_ready_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
mutex_lock(&module_mutex);
nvidia_mod = find_module("nvidia");
mutex_unlock(&module_mutex);
if(nvidia_mod != NULL)
{
TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
(void*)(nvidia_mod->module_core),
(void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
init_nv_device_reg();
return(0);
}
else
{
TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
}
}
static int nvidia_going_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
nvidia_mod = NULL;
mb();
return 0;
}
static struct notifier_block nvidia_ready = {
.notifier_call = nvidia_ready_module_notify,
.priority = 1,
};
static struct notifier_block nvidia_going = {
.notifier_call = nvidia_going_module_notify,
.priority = 1,
};
#endif
int init_nvidia_info(void)
{
#if 1
mutex_lock(&module_mutex);
nvidia_mod = find_module("nvidia");
mutex_unlock(&module_mutex);
if(nvidia_mod != NULL)
{
TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
(void*)(nvidia_mod->module_core),
(void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
init_nv_device_reg();
return(0);
}
else
{
TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
return(-1);
}
#endif
}
void shutdown_nvidia_info(void)
{
nvidia_mod = NULL;
mb();
}
/* works with pointers to static data inside the module too. */
int is_nvidia_func(void* func_addr)
{
int ret = 0;
if(nvidia_mod)
{
ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
/*
if(ret)
{
TRACE("%s : %p is in NVIDIA module: %d\n",
__FUNCTION__, func_addr, ret);
}*/
}
return(ret);
}
u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
{
// life is too short to use hard-coded offsets. update this later.
litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
return(linuxstate->device_num);
//int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
#if 0
// offset determined though observed behavior of the NV driver.
//const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1
//const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2
void* state = (void*)(t->data);
void* device_num_ptr = state + DEVICE_NUM_OFFSET;
//dump_nvidia_info(t);
return(*((u32*)device_num_ptr));
#endif
}
u32 get_work_nv_device_num(const struct work_struct *t)
{
// offset determined though observed behavior of the NV driver.
const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
void* state = (void*)(t);
void** device_num_ptr = state + DEVICE_NUM_OFFSET;
return(*((u32*)(*device_num_ptr)));
}
typedef struct {
raw_spinlock_t lock;
int nr_owners;
struct task_struct* max_prio_owner;
struct task_struct* owners[NV_MAX_SIMULT_USERS];
}nv_device_registry_t;
static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
int init_nv_device_reg(void)
{
int i;
memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
for(i = 0; i < NV_DEVICE_NUM; ++i)
{
raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
}
return(1);
}
/* use to get nv_device_id by given owner.
(if return -1, can't get the assocaite device id)*/
/*
int get_nv_device_id(struct task_struct* owner)
{
int i;
if(!owner)
{
return(-1);
}
for(i = 0; i < NV_DEVICE_NUM; ++i)
{
if(NV_DEVICE_REG[i].device_owner == owner)
return(i);
}
return(-1);
}
*/
static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
int i;
struct task_struct *found = NULL;
for(i = 0; i < reg->nr_owners; ++i) {
if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
found = reg->owners[i];
}
}
return found;
}
#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
{
unsigned long flags;
nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
if(reg->max_prio_owner != t) {
raw_spin_lock_irqsave(®->lock, flags);
if(reg->max_prio_owner != t) {
if(litmus->compare(t, reg->max_prio_owner)) {
litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
reg->max_prio_owner = t;
}
}
raw_spin_unlock_irqrestore(®->lock, flags);
}
}
void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
{
unsigned long flags;
nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
if(reg->max_prio_owner == t) {
raw_spin_lock_irqsave(®->lock, flags);
if(reg->max_prio_owner == t) {
reg->max_prio_owner = find_hp_owner(reg, NULL);
if(reg->max_prio_owner != t) {
litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
}
}
raw_spin_unlock_irqrestore(®->lock, flags);
}
}
#endif
static int __reg_nv_device(int reg_device_id, struct task_struct *t)
{
int ret = 0;
int i;
struct task_struct *old_max = NULL;
unsigned long flags;
nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
// TODO: check if taks is already registered.
return ret; // assume already registered.
}
raw_spin_lock_irqsave(®->lock, flags);
if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
if(reg->owners[i] == NULL) {
reg->owners[i] = t;
//if(edf_higher_prio(t, reg->max_prio_owner)) {
if(litmus->compare(t, reg->max_prio_owner)) {
old_max = reg->max_prio_owner;
reg->max_prio_owner = t;
#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
litmus->change_prio_pai_tasklet(old_max, t);
#endif
}
#ifdef CONFIG_LITMUS_SOFTIRQD
down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
#endif
++(reg->nr_owners);
break;
}
}
}
else
{
TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
//ret = -EBUSY;
}
raw_spin_unlock_irqrestore(®->lock, flags);
__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
return(ret);
}
static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
{
int ret = 0;
int i;
unsigned long flags;
nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
#ifdef CONFIG_LITMUS_SOFTIRQD
struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
#endif
if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
return ret;
}
raw_spin_lock_irqsave(®->lock, flags);
TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
if(reg->owners[i] == t) {
#ifdef CONFIG_LITMUS_SOFTIRQD
flush_pending(klitirqd_th, t);
#endif
if(reg->max_prio_owner == t) {
reg->max_prio_owner = find_hp_owner(reg, t);
#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
#endif
}
#ifdef CONFIG_LITMUS_SOFTIRQD
up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
#endif
reg->owners[i] = NULL;
--(reg->nr_owners);
break;
}
}
raw_spin_unlock_irqrestore(®->lock, flags);
__clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
return(ret);
}
int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
{
int ret;
if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
{
if(reg_action)
ret = __reg_nv_device(reg_device_id, t);
else
ret = __clear_reg_nv_device(reg_device_id, t);
}
else
{
ret = -ENODEV;
}
return(ret);
}
/* use to get the owner of nv_device_id. */
struct task_struct* get_nv_max_device_owner(u32 target_device_id)
{
struct task_struct *owner = NULL;
BUG_ON(target_device_id >= NV_DEVICE_NUM);
owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
return(owner);
}
void lock_nv_registry(u32 target_device_id, unsigned long* flags)
{
BUG_ON(target_device_id >= NV_DEVICE_NUM);
if(in_interrupt())
TRACE("Locking registry for %d.\n", target_device_id);
else
TRACE_CUR("Locking registry for %d.\n", target_device_id);
raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
}
void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
{
BUG_ON(target_device_id >= NV_DEVICE_NUM);
if(in_interrupt())
TRACE("Unlocking registry for %d.\n", target_device_id);
else
TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
}
//void increment_nv_int_count(u32 device)
//{
// unsigned long flags;
// struct task_struct* owner;
//
// lock_nv_registry(device, &flags);
//
// owner = NV_DEVICE_REG[device].device_owner;
// if(owner)
// {
// atomic_inc(&tsk_rt(owner)->nv_int_count);
// }
//
// unlock_nv_registry(device, &flags);
//}
//EXPORT_SYMBOL(increment_nv_int_count);