diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2011-06-02 16:06:05 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2011-06-02 16:06:05 -0400 |
commit | 3d5537c160c1484e8d562b9828baf679cc53f67a (patch) | |
tree | b595364f1b0f94ac2426c8315bc5967debc7bbb0 /litmus/nvidia_info.c | |
parent | 7d754596756240fa918b94cd0c3011c77a638987 (diff) |
Full patch for klitirqd with Nvidia GPU support.
Diffstat (limited to 'litmus/nvidia_info.c')
-rw-r--r-- | litmus/nvidia_info.c | 526 |
1 files changed, 526 insertions, 0 deletions
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c new file mode 100644 index 000000000000..78f035244d21 --- /dev/null +++ b/litmus/nvidia_info.c | |||
@@ -0,0 +1,526 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/semaphore.h> | ||
3 | #include <linux/pci.h> | ||
4 | |||
5 | #include <litmus/sched_trace.h> | ||
6 | #include <litmus/nvidia_info.h> | ||
7 | #include <litmus/litmus.h> | ||
8 | |||
9 | typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ | ||
10 | typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ | ||
11 | typedef unsigned char NvU8; /* 0 to 255 */ | ||
12 | typedef unsigned short NvU16; /* 0 to 65535 */ | ||
13 | typedef signed char NvS8; /* -128 to 127 */ | ||
14 | typedef signed short NvS16; /* -32768 to 32767 */ | ||
15 | typedef float NvF32; /* IEEE Single Precision (S1E8M23) */ | ||
16 | typedef double NvF64; /* IEEE Double Precision (S1E11M52) */ | ||
17 | typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ | ||
18 | typedef unsigned int NvU32; /* 0 to 4294967295 */ | ||
19 | typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ | ||
20 | typedef union | ||
21 | { | ||
22 | volatile NvV8 Reg008[1]; | ||
23 | volatile NvV16 Reg016[1]; | ||
24 | volatile NvV32 Reg032[1]; | ||
25 | } litmus_nv_hwreg_t, * litmus_nv_phwreg_t; | ||
26 | |||
27 | typedef struct | ||
28 | { | ||
29 | NvU64 address; | ||
30 | NvU64 size; | ||
31 | NvU32 offset; | ||
32 | NvU32 *map; | ||
33 | litmus_nv_phwreg_t map_u; | ||
34 | } litmus_nv_aperture_t; | ||
35 | |||
36 | typedef struct | ||
37 | { | ||
38 | void *priv; /* private data */ | ||
39 | void *os_state; /* os-specific device state */ | ||
40 | |||
41 | int rmInitialized; | ||
42 | int flags; | ||
43 | |||
44 | /* PCI config info */ | ||
45 | NvU32 domain; | ||
46 | NvU16 bus; | ||
47 | NvU16 slot; | ||
48 | NvU16 vendor_id; | ||
49 | NvU16 device_id; | ||
50 | NvU16 subsystem_id; | ||
51 | NvU32 gpu_id; | ||
52 | void *handle; | ||
53 | |||
54 | NvU32 pci_cfg_space[16]; | ||
55 | |||
56 | /* physical characteristics */ | ||
57 | litmus_nv_aperture_t bars[3]; | ||
58 | litmus_nv_aperture_t *regs; | ||
59 | litmus_nv_aperture_t *fb, ud; | ||
60 | litmus_nv_aperture_t agp; | ||
61 | |||
62 | NvU32 interrupt_line; | ||
63 | |||
64 | NvU32 agp_config; | ||
65 | NvU32 agp_status; | ||
66 | |||
67 | NvU32 primary_vga; | ||
68 | |||
69 | NvU32 sim_env; | ||
70 | |||
71 | NvU32 rc_timer_enabled; | ||
72 | |||
73 | /* list of events allocated for this device */ | ||
74 | void *event_list; | ||
75 | |||
76 | void *kern_mappings; | ||
77 | |||
78 | } litmus_nv_state_t; | ||
79 | |||
80 | typedef struct work_struct litmus_nv_task_t; | ||
81 | |||
82 | typedef struct litmus_nv_work_s { | ||
83 | litmus_nv_task_t task; | ||
84 | void *data; | ||
85 | } litmus_nv_work_t; | ||
86 | |||
87 | typedef struct litmus_nv_linux_state_s { | ||
88 | litmus_nv_state_t nv_state; | ||
89 | atomic_t usage_count; | ||
90 | |||
91 | struct pci_dev *dev; | ||
92 | void *agp_bridge; | ||
93 | void *alloc_queue; | ||
94 | |||
95 | void *timer_sp; | ||
96 | void *isr_sp; | ||
97 | void *pci_cfgchk_sp; | ||
98 | void *isr_bh_sp; | ||
99 | |||
100 | #ifdef CONFIG_CUDA_4_0 | ||
101 | char registry_keys[512]; | ||
102 | #endif | ||
103 | |||
104 | /* keep track of any pending bottom halfes */ | ||
105 | struct tasklet_struct tasklet; | ||
106 | litmus_nv_work_t work; | ||
107 | |||
108 | /* get a timer callback every second */ | ||
109 | struct timer_list rc_timer; | ||
110 | |||
111 | /* lock for linux-specific data, not used by core rm */ | ||
112 | struct semaphore ldata_lock; | ||
113 | |||
114 | /* lock for linux-specific alloc queue */ | ||
115 | struct semaphore at_lock; | ||
116 | |||
117 | #if 0 | ||
118 | #if defined(NV_USER_MAP) | ||
119 | /* list of user mappings */ | ||
120 | struct nv_usermap_s *usermap_list; | ||
121 | |||
122 | /* lock for VMware-specific mapping list */ | ||
123 | struct semaphore mt_lock; | ||
124 | #endif /* defined(NV_USER_MAP) */ | ||
125 | #if defined(NV_PM_SUPPORT_OLD_STYLE_APM) | ||
126 | void *apm_nv_dev; | ||
127 | #endif | ||
128 | #endif | ||
129 | |||
130 | NvU32 device_num; | ||
131 | struct litmus_nv_linux_state_s *next; | ||
132 | } litmus_nv_linux_state_t; | ||
133 | |||
134 | void dump_nvidia_info(const struct tasklet_struct *t) | ||
135 | { | ||
136 | litmus_nv_state_t* nvstate = NULL; | ||
137 | litmus_nv_linux_state_t* linuxstate = NULL; | ||
138 | struct pci_dev* pci = NULL; | ||
139 | |||
140 | nvstate = (litmus_nv_state_t*)(t->data); | ||
141 | |||
142 | if(nvstate) | ||
143 | { | ||
144 | TRACE("NV State:\n" | ||
145 | "\ttasklet ptr = %p\n" | ||
146 | "\tstate ptr = %p\n" | ||
147 | "\tprivate data ptr = %p\n" | ||
148 | "\tos state ptr = %p\n" | ||
149 | "\tdomain = %u\n" | ||
150 | "\tbus = %u\n" | ||
151 | "\tslot = %u\n" | ||
152 | "\tvender_id = %u\n" | ||
153 | "\tdevice_id = %u\n" | ||
154 | "\tsubsystem_id = %u\n" | ||
155 | "\tgpu_id = %u\n" | ||
156 | "\tinterrupt_line = %u\n", | ||
157 | t, | ||
158 | nvstate, | ||
159 | nvstate->priv, | ||
160 | nvstate->os_state, | ||
161 | nvstate->domain, | ||
162 | nvstate->bus, | ||
163 | nvstate->slot, | ||
164 | nvstate->vendor_id, | ||
165 | nvstate->device_id, | ||
166 | nvstate->subsystem_id, | ||
167 | nvstate->gpu_id, | ||
168 | nvstate->interrupt_line); | ||
169 | |||
170 | linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); | ||
171 | } | ||
172 | else | ||
173 | { | ||
174 | TRACE("INVALID NVSTATE????\n"); | ||
175 | } | ||
176 | |||
177 | if(linuxstate) | ||
178 | { | ||
179 | int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); | ||
180 | int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); | ||
181 | int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); | ||
182 | |||
183 | |||
184 | TRACE("LINUX NV State:\n" | ||
185 | "\tlinux nv state ptr: %p\n" | ||
186 | "\taddress of tasklet: %p\n" | ||
187 | "\taddress of work: %p\n" | ||
188 | "\tusage_count: %d\n" | ||
189 | "\tdevice_num: %u\n" | ||
190 | "\ttasklet addr == this tasklet: %d\n" | ||
191 | "\tpci: %p\n", | ||
192 | linuxstate, | ||
193 | &(linuxstate->tasklet), | ||
194 | &(linuxstate->work), | ||
195 | atomic_read(&(linuxstate->usage_count)), | ||
196 | linuxstate->device_num, | ||
197 | (t == &(linuxstate->tasklet)), | ||
198 | linuxstate->dev); | ||
199 | |||
200 | pci = linuxstate->dev; | ||
201 | |||
202 | TRACE("Offsets:\n" | ||
203 | "\tOffset from LinuxState: %d, %x\n" | ||
204 | "\tOffset from NVState: %d, %x\n" | ||
205 | "\tOffset from parameter: %d, %x\n" | ||
206 | "\tdevice_num: %u\n", | ||
207 | ls_offset, ls_offset, | ||
208 | ns_offset_raw, ns_offset_raw, | ||
209 | ns_offset_desired, ns_offset_desired, | ||
210 | *((u32*)((void*)nvstate + ns_offset_desired))); | ||
211 | } | ||
212 | else | ||
213 | { | ||
214 | TRACE("INVALID LINUXNVSTATE?????\n"); | ||
215 | } | ||
216 | |||
217 | #if 0 | ||
218 | if(pci) | ||
219 | { | ||
220 | TRACE("PCI DEV Info:\n" | ||
221 | "pci device ptr: %p\n" | ||
222 | "\tdevfn = %d\n" | ||
223 | "\tvendor = %d\n" | ||
224 | "\tdevice = %d\n" | ||
225 | "\tsubsystem_vendor = %d\n" | ||
226 | "\tsubsystem_device = %d\n" | ||
227 | "\tslot # = %d\n", | ||
228 | pci, | ||
229 | pci->devfn, | ||
230 | pci->vendor, | ||
231 | pci->device, | ||
232 | pci->subsystem_vendor, | ||
233 | pci->subsystem_device, | ||
234 | pci->slot->number); | ||
235 | } | ||
236 | else | ||
237 | { | ||
238 | TRACE("INVALID PCIDEV PTR?????\n"); | ||
239 | } | ||
240 | #endif | ||
241 | } | ||
242 | |||
243 | static struct module* nvidia_mod = NULL; | ||
244 | int init_nvidia_info(void) | ||
245 | { | ||
246 | mutex_lock(&module_mutex); | ||
247 | nvidia_mod = find_module("nvidia"); | ||
248 | mutex_unlock(&module_mutex); | ||
249 | if(nvidia_mod != NULL) | ||
250 | { | ||
251 | TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, | ||
252 | (void*)(nvidia_mod->module_core), | ||
253 | (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); | ||
254 | init_nv_device_reg(); | ||
255 | return(0); | ||
256 | } | ||
257 | else | ||
258 | { | ||
259 | TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); | ||
260 | return(-1); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | |||
265 | /* works with pointers to static data inside the module too. */ | ||
266 | int is_nvidia_func(void* func_addr) | ||
267 | { | ||
268 | int ret = 0; | ||
269 | if(nvidia_mod) | ||
270 | { | ||
271 | ret = within_module_core((long unsigned int)func_addr, nvidia_mod); | ||
272 | /* | ||
273 | if(ret) | ||
274 | { | ||
275 | TRACE("%s : %p is in NVIDIA module: %d\n", | ||
276 | __FUNCTION__, func_addr, ret); | ||
277 | }*/ | ||
278 | } | ||
279 | |||
280 | return(ret); | ||
281 | } | ||
282 | |||
283 | u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) | ||
284 | { | ||
285 | // life is too short to use hard-coded offsets. update this later. | ||
286 | litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); | ||
287 | litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); | ||
288 | |||
289 | BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); | ||
290 | |||
291 | return(linuxstate->device_num); | ||
292 | |||
293 | //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); | ||
294 | |||
295 | #if 0 | ||
296 | // offset determined though observed behavior of the NV driver. | ||
297 | //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1 | ||
298 | //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2 | ||
299 | |||
300 | void* state = (void*)(t->data); | ||
301 | void* device_num_ptr = state + DEVICE_NUM_OFFSET; | ||
302 | |||
303 | //dump_nvidia_info(t); | ||
304 | return(*((u32*)device_num_ptr)); | ||
305 | #endif | ||
306 | } | ||
307 | |||
308 | u32 get_work_nv_device_num(const struct work_struct *t) | ||
309 | { | ||
310 | // offset determined though observed behavior of the NV driver. | ||
311 | const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); | ||
312 | void* state = (void*)(t); | ||
313 | void** device_num_ptr = state + DEVICE_NUM_OFFSET; | ||
314 | return(*((u32*)(*device_num_ptr))); | ||
315 | } | ||
316 | |||
317 | |||
318 | |||
319 | typedef struct { | ||
320 | raw_spinlock_t lock; | ||
321 | struct task_struct *device_owner; | ||
322 | }nv_device_registry_t; | ||
323 | |||
324 | static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; | ||
325 | |||
326 | int init_nv_device_reg(void) | ||
327 | { | ||
328 | int i; | ||
329 | |||
330 | //memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); | ||
331 | |||
332 | for(i = 0; i < NV_DEVICE_NUM; ++i) | ||
333 | { | ||
334 | raw_spin_lock_init(&NV_DEVICE_REG[i].lock); | ||
335 | NV_DEVICE_REG[i].device_owner = NULL; | ||
336 | } | ||
337 | |||
338 | return(1); | ||
339 | } | ||
340 | |||
341 | /* use to get nv_device_id by given owner. | ||
342 | (if return -1, can't get the assocaite device id)*/ | ||
343 | /* | ||
344 | int get_nv_device_id(struct task_struct* owner) | ||
345 | { | ||
346 | int i; | ||
347 | if(!owner) | ||
348 | { | ||
349 | return(-1); | ||
350 | } | ||
351 | for(i = 0; i < NV_DEVICE_NUM; ++i) | ||
352 | { | ||
353 | if(NV_DEVICE_REG[i].device_owner == owner) | ||
354 | return(i); | ||
355 | } | ||
356 | return(-1); | ||
357 | } | ||
358 | */ | ||
359 | |||
360 | |||
361 | |||
362 | static int __reg_nv_device(int reg_device_id) | ||
363 | { | ||
364 | struct task_struct* old = | ||
365 | cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, | ||
366 | NULL, | ||
367 | current); | ||
368 | |||
369 | mb(); | ||
370 | |||
371 | if(likely(old == NULL)) | ||
372 | { | ||
373 | down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); | ||
374 | TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); | ||
375 | return(0); | ||
376 | } | ||
377 | else | ||
378 | { | ||
379 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); | ||
380 | return(-EBUSY); | ||
381 | } | ||
382 | |||
383 | #if 0 | ||
384 | //unsigned long flags; | ||
385 | //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); | ||
386 | //lock_nv_registry(reg_device_id, &flags); | ||
387 | |||
388 | if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL)) | ||
389 | { | ||
390 | NV_DEVICE_REG[reg_device_id].device_owner = current; | ||
391 | mb(); // needed? | ||
392 | |||
393 | // release spin lock before chance of going to sleep. | ||
394 | //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); | ||
395 | //unlock_nv_registry(reg_device_id, &flags); | ||
396 | |||
397 | down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); | ||
398 | TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); | ||
399 | return(0); | ||
400 | } | ||
401 | else | ||
402 | { | ||
403 | //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags); | ||
404 | //unlock_nv_registry(reg_device_id, &flags); | ||
405 | |||
406 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); | ||
407 | return(-EBUSY); | ||
408 | } | ||
409 | #endif | ||
410 | } | ||
411 | |||
412 | static int __clear_reg_nv_device(int de_reg_device_id) | ||
413 | { | ||
414 | int ret; | ||
415 | unsigned long flags; | ||
416 | struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); | ||
417 | struct task_struct* old; | ||
418 | |||
419 | lock_nv_registry(de_reg_device_id, &flags); | ||
420 | |||
421 | old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, | ||
422 | current, | ||
423 | NULL); | ||
424 | |||
425 | mb(); | ||
426 | |||
427 | if(likely(old == current)) | ||
428 | { | ||
429 | flush_pending(klitirqd_th, current); | ||
430 | //unlock_nv_registry(de_reg_device_id, &flags); | ||
431 | |||
432 | up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem); | ||
433 | |||
434 | unlock_nv_registry(de_reg_device_id, &flags); | ||
435 | ret = 0; | ||
436 | |||
437 | TRACE_CUR("%s: semaphore released.\n",__FUNCTION__); | ||
438 | } | ||
439 | else | ||
440 | { | ||
441 | unlock_nv_registry(de_reg_device_id, &flags); | ||
442 | ret = -EINVAL; | ||
443 | |||
444 | if(old) | ||
445 | TRACE_CUR("%s: device %d is not registered for this process's use! %s/%d is!\n", | ||
446 | __FUNCTION__, de_reg_device_id, old->comm, old->pid); | ||
447 | else | ||
448 | TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", | ||
449 | __FUNCTION__, de_reg_device_id); | ||
450 | } | ||
451 | |||
452 | return(ret); | ||
453 | } | ||
454 | |||
455 | |||
456 | int reg_nv_device(int reg_device_id, int reg_action) | ||
457 | { | ||
458 | int ret; | ||
459 | |||
460 | if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) | ||
461 | { | ||
462 | if(reg_action) | ||
463 | ret = __reg_nv_device(reg_device_id); | ||
464 | else | ||
465 | ret = __clear_reg_nv_device(reg_device_id); | ||
466 | } | ||
467 | else | ||
468 | { | ||
469 | ret = -ENODEV; | ||
470 | } | ||
471 | |||
472 | return(ret); | ||
473 | } | ||
474 | |||
475 | /* use to get the owner of nv_device_id. */ | ||
476 | struct task_struct* get_nv_device_owner(u32 target_device_id) | ||
477 | { | ||
478 | struct task_struct* owner; | ||
479 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | ||
480 | owner = NV_DEVICE_REG[target_device_id].device_owner; | ||
481 | return(owner); | ||
482 | } | ||
483 | |||
484 | void lock_nv_registry(u32 target_device_id, unsigned long* flags) | ||
485 | { | ||
486 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | ||
487 | |||
488 | if(in_interrupt()) | ||
489 | TRACE("Locking registry for %d.\n", target_device_id); | ||
490 | else | ||
491 | TRACE_CUR("Locking registry for %d.\n", target_device_id); | ||
492 | |||
493 | raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); | ||
494 | } | ||
495 | |||
496 | void unlock_nv_registry(u32 target_device_id, unsigned long* flags) | ||
497 | { | ||
498 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | ||
499 | |||
500 | if(in_interrupt()) | ||
501 | TRACE("Unlocking registry for %d.\n", target_device_id); | ||
502 | else | ||
503 | TRACE_CUR("Unlocking registry for %d.\n", target_device_id); | ||
504 | |||
505 | raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); | ||
506 | } | ||
507 | |||
508 | |||
509 | void increment_nv_int_count(u32 device) | ||
510 | { | ||
511 | unsigned long flags; | ||
512 | struct task_struct* owner; | ||
513 | |||
514 | lock_nv_registry(device, &flags); | ||
515 | |||
516 | owner = NV_DEVICE_REG[device].device_owner; | ||
517 | if(owner) | ||
518 | { | ||
519 | atomic_inc(&tsk_rt(owner)->nv_int_count); | ||
520 | } | ||
521 | |||
522 | unlock_nv_registry(device, &flags); | ||
523 | } | ||
524 | EXPORT_SYMBOL(increment_nv_int_count); | ||
525 | |||
526 | |||