diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-09 15:34:24 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-09 15:34:24 -0400 |
commit | b5b69edba68805ab2d4bdeae58fe783710111bfc (patch) | |
tree | cf9f0c03d1c9a57555539ee8d3325904d6401e7e | |
parent | 0feedf723aaa61958ad81dca9d7135a69220d7b4 (diff) |
NV support: Configure by driver ver, not CUDA ver
-rw-r--r-- | litmus/Kconfig | 50 | ||||
-rw-r--r-- | litmus/nvidia_info.c | 1525 |
2 files changed, 828 insertions, 747 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig index 3d3a3ec71243..32c1c92cb56f 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -538,40 +538,54 @@ config NV_DEVICE_NUM | |||
538 | 538 | ||
539 | choice | 539 | choice |
540 | prompt "CUDA/Driver Version Support" | 540 | prompt "CUDA/Driver Version Support" |
541 | default CUDA_5_0 | 541 | default NV_DRV_319_37 |
542 | depends on LITMUS_NVIDIA | 542 | depends on LITMUS_NVIDIA |
543 | help | 543 | help |
544 | Select the version of CUDA/driver to support. | 544 | Select the version of NVIDIA driver to support. |
545 | Note: Some of the configurations below may work | ||
546 | with other versions of the NVIDIA driver, but | ||
547 | the layouts of data structures in litmus/nvidia_info.c | ||
548 | will have to be manually compared against | ||
549 | <driver>/kernel/nv.h and nv-linux.h in the driver's | ||
550 | GPL shim layer. | ||
551 | |||
552 | config NV_DRV_331_13 | ||
553 | bool "331.13 (post-CUDA 5.5) | ||
554 | depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS | ||
555 | help | ||
556 | NV Driver 331.13. (An updated driver released | ||
557 | after CUDA 5.5.) | ||
545 | 558 | ||
546 | config CUDA_5_X | 559 | config NV_DRV_325_15 |
547 | bool "CUDA 5.0+" | 560 | bool "325.15 (post-CUDA 5.5) |
548 | depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS | 561 | depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS |
549 | help | 562 | help |
550 | Support CUDA 5.0 (dev. driver version: x86_64-310.x) | 563 | NV Driver 325.15. (An updated driver released |
564 | after CUDA 5.5.) | ||
551 | 565 | ||
552 | config CUDA_5_0 | 566 | config NV_DRV_319_37 |
553 | bool "CUDA 5.0" | 567 | bool "319.37 (CUDA 5.5)" |
554 | depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS | 568 | depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS |
555 | help | 569 | help |
556 | Support CUDA 5.0 (dev. driver version: x86_64-304.54) | 570 | NV Driver 319.37. (distributed with CUDA 5.5) |
557 | 571 | ||
558 | config CUDA_4_2 | 572 | config NV_DRV_304_54 |
559 | bool "CUDA 4.2" | 573 | bool "304.54 (CUDA 5.0)" |
560 | depends on LITMUS_NVIDIA | 574 | depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS |
561 | help | 575 | help |
562 | Support CUDA 4.2 (dev driver version: x86_64-295.40) | 576 | NV Driver 304.54. (distributed with CUDA 5.0) |
563 | 577 | ||
564 | config CUDA_4_0 | 578 | config NV_DRV_295_40 |
565 | bool "CUDA 4.0" | 579 | bool "295.40 (CUDA 4.2)" |
566 | depends on LITMUS_NVIDIA | 580 | depends on LITMUS_NVIDIA |
567 | help | 581 | help |
568 | Support CUDA 4.0 (dev. driver version: x86_64-270.41) | 582 | NV Driver 295.40. (distributed with CUDA 4.2) |
569 | 583 | ||
570 | config CUDA_3_2 | 584 | config NV_DRV_270_41 |
571 | bool "CUDA 3.2" | 585 | bool "270.41 (CUDA 4.0)" |
572 | depends on LITMUS_NVIDIA | 586 | depends on LITMUS_NVIDIA |
573 | help | 587 | help |
574 | Support CUDA 3.2 (dev. driver version: x86_64-260.24) | 588 | NV Driver 270.41. (distributed with CUDA 4.0) |
575 | 589 | ||
576 | endchoice | 590 | endchoice |
577 | 591 | ||
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 0050ce65e521..6a101882e615 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c | |||
@@ -14,6 +14,39 @@ | |||
14 | #include <litmus/litmus_softirq.h> | 14 | #include <litmus/litmus_softirq.h> |
15 | #endif | 15 | #endif |
16 | 16 | ||
17 | #if defined(CONFIG_NV_DRV_331_13) | ||
18 | #define NV_MAJOR_V 331 | ||
19 | #define NV_MINOR_V 13 | ||
20 | #elif defined(CONFIG_NV_DRV_325_15) | ||
21 | #define NV_MAJOR_V 325 | ||
22 | #define NV_MINOR_V 15 | ||
23 | #elif defined(CONFIG_NV_DRV_319_37) | ||
24 | #define NV_MAJOR_V 319 | ||
25 | #define NV_MINOR_V 37 | ||
26 | #elif defined(CONFIG_NV_DRV_304_54) | ||
27 | #define NV_MAJOR_V 304 | ||
28 | #define NV_MINOR_V 54 | ||
29 | #elif defined(CONFIG_NV_DRV_295_40) | ||
30 | #define NV_MAJOR_V 295 | ||
31 | #define NV_MINOR_V 40 | ||
32 | #elif defined(CONFIG_NV_DRV_270_41) | ||
33 | #define NV_MAJOR_V 279 | ||
34 | #define NV_MINOR_V 41 | ||
35 | #else | ||
36 | #error "Unsupported NV Driver" | ||
37 | #endif | ||
38 | |||
39 | #if NV_MAJOR_V >= 319 | ||
40 | #include <drm/drmP.h> | ||
41 | #endif | ||
42 | |||
43 | /* The following structures map to structers found in the GPL layer | ||
44 | of the NVIDIA-disributed binary blob driver. Much of the code | ||
45 | is cobbled together from various versions of the NV driver. We | ||
46 | can factor this out into a separate tool that gives memory offsets | ||
47 | to determine the device ID if distributing this code ever becomes | ||
48 | a problem. */ | ||
49 | |||
17 | typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ | 50 | typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ |
18 | typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ | 51 | typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ |
19 | typedef unsigned char NvU8; /* 0 to 255 */ | 52 | typedef unsigned char NvU8; /* 0 to 255 */ |
@@ -25,6 +58,7 @@ typedef double NvF64; /* IEEE Double Precision (S1E11M52) */ | |||
25 | typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ | 58 | typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ |
26 | typedef unsigned int NvU32; /* 0 to 4294967295 */ | 59 | typedef unsigned int NvU32; /* 0 to 4294967295 */ |
27 | typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ | 60 | typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ |
61 | typedef NvU8 NvBool; | ||
28 | typedef union | 62 | typedef union |
29 | { | 63 | { |
30 | volatile NvV8 Reg008[1]; | 64 | volatile NvV8 Reg008[1]; |
@@ -35,8 +69,8 @@ typedef union | |||
35 | typedef struct | 69 | typedef struct |
36 | { | 70 | { |
37 | NvU64 address; | 71 | NvU64 address; |
38 | #if defined(CONFIG_CUDA_4_2) || defined(CONFIG_CUDA_5_0) || defined(CONFIG_CUDA_5_X) | 72 | #if NV_MAJOR_V >= 295 |
39 | NvU64 strapped_size; | 73 | NvU64 strapped_size; |
40 | #endif | 74 | #endif |
41 | NvU64 size; | 75 | NvU64 size; |
42 | NvU32 offset; | 76 | NvU32 offset; |
@@ -44,40 +78,61 @@ typedef struct | |||
44 | litmus_nv_phwreg_t map_u; | 78 | litmus_nv_phwreg_t map_u; |
45 | } litmus_nv_aperture_t; | 79 | } litmus_nv_aperture_t; |
46 | 80 | ||
81 | #if NV_MAJOR_V >= 331 | ||
82 | typedef struct | ||
83 | { | ||
84 | NvU32 domain; | ||
85 | NvU8 bus; | ||
86 | NvU8 slot; | ||
87 | NvU16 vendor_id; | ||
88 | NvU16 device_id; | ||
89 | NvBool valid; | ||
90 | } litmus_pci_info_t; | ||
91 | #endif | ||
92 | |||
47 | typedef struct | 93 | typedef struct |
48 | { | 94 | { |
49 | void *priv; /* private data */ | 95 | void *priv; /* private data */ |
50 | void *os_state; /* os-specific device state */ | 96 | void *os_state; /* os-specific device state */ |
51 | 97 | ||
52 | #ifdef CONFIG_CUDA_4_0 | 98 | #if NV_MAJOR_V == 270 |
53 | int rmInitialized; /* TODO: appears in CUDA 3_2? */ | 99 | int rmInitialized; |
54 | #endif | 100 | #endif |
55 | int flags; | 101 | int flags; |
56 | 102 | ||
103 | #if NV_MAJOR_V <= 331 | ||
57 | /* PCI config info */ | 104 | /* PCI config info */ |
58 | NvU32 domain; | 105 | NvU32 domain; |
59 | NvU16 bus; | 106 | NvU16 bus; |
60 | NvU16 slot; | 107 | NvU16 slot; |
61 | NvU16 vendor_id; | 108 | NvU16 vendor_id; |
62 | NvU16 device_id; | 109 | NvU16 device_id; |
110 | #else | ||
111 | litmus_pci_info_t pci_info; | ||
112 | #endif | ||
113 | |||
63 | NvU16 subsystem_id; | 114 | NvU16 subsystem_id; |
64 | NvU32 gpu_id; | 115 | NvU32 gpu_id; |
65 | void *handle; | 116 | void *handle; |
66 | 117 | ||
118 | #if NV_MAJOR_V < 325 | ||
67 | NvU32 pci_cfg_space[16]; | 119 | NvU32 pci_cfg_space[16]; |
120 | #else | ||
121 | NvU32 pci_cfg_space[64]; | ||
122 | #endif | ||
68 | 123 | ||
69 | /* physical characteristics */ | 124 | /* physical characteristics */ |
70 | litmus_nv_aperture_t bars[3]; | 125 | litmus_nv_aperture_t bars[3]; |
71 | litmus_nv_aperture_t *regs; | 126 | litmus_nv_aperture_t *regs; |
72 | litmus_nv_aperture_t *fb, ud; | 127 | litmus_nv_aperture_t *fb, ud; |
73 | 128 | ||
74 | #ifndef CONFIG_CUDA_5_X | 129 | #if NV_MAJOR_V < 325 |
75 | litmus_nv_aperture_t agp; | 130 | litmus_nv_aperture_t agp; |
76 | #endif | 131 | #endif |
77 | 132 | ||
78 | NvU32 interrupt_line; | 133 | NvU32 interrupt_line; |
79 | 134 | ||
80 | #ifndef CONFIG_CUDA_5_X | 135 | #if NV_MAJOR_V < 325 |
81 | NvU32 agp_config; | 136 | NvU32 agp_config; |
82 | NvU32 agp_status; | 137 | NvU32 agp_status; |
83 | #endif | 138 | #endif |
@@ -106,9 +161,9 @@ typedef struct litmus_nv_linux_state_s { | |||
106 | litmus_nv_state_t nv_state; | 161 | litmus_nv_state_t nv_state; |
107 | atomic_t usage_count; | 162 | atomic_t usage_count; |
108 | 163 | ||
109 | struct pci_dev *dev; | 164 | struct pci_dev *dev; |
110 | 165 | ||
111 | #ifndef CONFIG_CUDA_5_X | 166 | #if NV_MAJOR_V < 325 |
112 | void *agp_bridge; | 167 | void *agp_bridge; |
113 | #endif | 168 | #endif |
114 | 169 | ||
@@ -118,10 +173,7 @@ typedef struct litmus_nv_linux_state_s { | |||
118 | void *isr_sp; | 173 | void *isr_sp; |
119 | void *pci_cfgchk_sp; | 174 | void *pci_cfgchk_sp; |
120 | void *isr_bh_sp; | 175 | void *isr_bh_sp; |
121 | 176 | char registry_keys[512]; | |
122 | #ifndef CONFIG_CUDA_3_2 | ||
123 | char registry_keys[512]; | ||
124 | #endif | ||
125 | 177 | ||
126 | /* keep track of any pending bottom halfes */ | 178 | /* keep track of any pending bottom halfes */ |
127 | struct tasklet_struct tasklet; | 179 | struct tasklet_struct tasklet; |
@@ -136,8 +188,15 @@ typedef struct litmus_nv_linux_state_s { | |||
136 | /* lock for linux-specific alloc queue */ | 188 | /* lock for linux-specific alloc queue */ |
137 | struct semaphore at_lock; | 189 | struct semaphore at_lock; |
138 | 190 | ||
191 | /* !!! This field is all that we're after to determine | ||
192 | !!! the device number of the GPU that spawned a given | ||
193 | vvv tasklet or workqueue item. */ | ||
139 | NvU32 device_num; | 194 | NvU32 device_num; |
140 | struct litmus_nv_linux_state_s *next; | 195 | struct litmus_nv_linux_state_s *next; |
196 | |||
197 | #if NV_MAJOR_V >= 319 | ||
198 | struct drm_device *drm; | ||
199 | #endif | ||
141 | } litmus_nv_linux_state_t; | 200 | } litmus_nv_linux_state_t; |
142 | 201 | ||
143 | 202 | ||
@@ -145,85 +204,93 @@ typedef struct litmus_nv_linux_state_s { | |||
145 | static void __attribute__((unused)) | 204 | static void __attribute__((unused)) |
146 | dump_nvidia_info(const struct tasklet_struct *t) | 205 | dump_nvidia_info(const struct tasklet_struct *t) |
147 | { | 206 | { |
148 | litmus_nv_state_t* nvstate = NULL; | 207 | litmus_nv_state_t* nvstate = NULL; |
149 | litmus_nv_linux_state_t* linuxstate = NULL; | 208 | litmus_nv_linux_state_t* linuxstate = NULL; |
150 | struct pci_dev* pci = NULL; | 209 | struct pci_dev* pci = NULL; |
151 | 210 | ||
152 | nvstate = (litmus_nv_state_t*)(t->data); | 211 | nvstate = (litmus_nv_state_t*)(t->data); |
153 | 212 | ||
154 | if(nvstate) | 213 | if(nvstate) |
155 | { | 214 | { |
156 | TRACE("NV State:\n" | 215 | TRACE("NV State:\n" |
157 | "\ttasklet ptr = %p\n" | 216 | "\ttasklet ptr = %p\n" |
158 | "\tstate ptr = %p\n" | 217 | "\tstate ptr = %p\n" |
159 | "\tprivate data ptr = %p\n" | 218 | "\tprivate data ptr = %p\n" |
160 | "\tos state ptr = %p\n" | 219 | "\tos state ptr = %p\n" |
161 | "\tdomain = %u\n" | 220 | "\tdomain = %u\n" |
162 | "\tbus = %u\n" | 221 | "\tbus = %u\n" |
163 | "\tslot = %u\n" | 222 | "\tslot = %u\n" |
164 | "\tvender_id = %u\n" | 223 | "\tvender_id = %u\n" |
165 | "\tdevice_id = %u\n" | 224 | "\tdevice_id = %u\n" |
166 | "\tsubsystem_id = %u\n" | 225 | "\tsubsystem_id = %u\n" |
167 | "\tgpu_id = %u\n" | 226 | "\tgpu_id = %u\n" |
168 | "\tinterrupt_line = %u\n", | 227 | "\tinterrupt_line = %u\n", |
169 | t, | 228 | t, |
170 | nvstate, | 229 | nvstate, |
171 | nvstate->priv, | 230 | nvstate->priv, |
172 | nvstate->os_state, | 231 | nvstate->os_state, |
173 | nvstate->domain, | 232 | #if NV_MAJOR_V <= 331 |
174 | nvstate->bus, | 233 | nvstate->domain, |
175 | nvstate->slot, | 234 | nvstate->bus, |
176 | nvstate->vendor_id, | 235 | nvstate->slot, |
177 | nvstate->device_id, | 236 | nvstate->vendor_id, |
178 | nvstate->subsystem_id, | 237 | nvstate->device_id, |
179 | nvstate->gpu_id, | 238 | #else |
180 | nvstate->interrupt_line); | 239 | nvstate->pci_info.domain, |
181 | 240 | nvstate->pci_info.bus, | |
182 | linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); | 241 | nvstate->pci_info.slot, |
183 | } | 242 | nvstate->pci_info.vendor_id, |
184 | else | 243 | nvstate->pci_info.device_id, |
185 | { | 244 | #endif |
186 | TRACE("INVALID NVSTATE????\n"); | 245 | nvstate->subsystem_id, |
187 | } | 246 | nvstate->gpu_id, |
188 | 247 | nvstate->interrupt_line); | |
189 | if(linuxstate) | 248 | |
190 | { | 249 | linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); |
191 | int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); | 250 | } |
192 | int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); | 251 | else |
193 | int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); | 252 | { |
194 | 253 | TRACE("INVALID NVSTATE????\n"); | |
195 | TRACE("LINUX NV State:\n" | 254 | } |
196 | "\tlinux nv state ptr: %p\n" | 255 | |
197 | "\taddress of tasklet: %p\n" | 256 | if(linuxstate) |
198 | "\taddress of work: %p\n" | 257 | { |
199 | "\tusage_count: %d\n" | 258 | int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); |
200 | "\tdevice_num: %u\n" | 259 | int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); |
201 | "\ttasklet addr == this tasklet: %d\n" | 260 | int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); |
202 | "\tpci: %p\n", | 261 | |
203 | linuxstate, | 262 | TRACE("LINUX NV State:\n" |
204 | &(linuxstate->tasklet), | 263 | "\tlinux nv state ptr: %p\n" |
205 | &(linuxstate->work), | 264 | "\taddress of tasklet: %p\n" |
206 | atomic_read(&(linuxstate->usage_count)), | 265 | "\taddress of work: %p\n" |
207 | linuxstate->device_num, | 266 | "\tusage_count: %d\n" |
208 | (t == &(linuxstate->tasklet)), | 267 | "\tdevice_num: %u\n" |
209 | linuxstate->dev); | 268 | "\ttasklet addr == this tasklet: %d\n" |
210 | 269 | "\tpci: %p\n", | |
211 | pci = linuxstate->dev; | 270 | linuxstate, |
212 | 271 | &(linuxstate->tasklet), | |
213 | TRACE("Offsets:\n" | 272 | &(linuxstate->work), |
214 | "\tOffset from LinuxState: %d, %x\n" | 273 | atomic_read(&(linuxstate->usage_count)), |
215 | "\tOffset from NVState: %d, %x\n" | 274 | linuxstate->device_num, |
216 | "\tOffset from parameter: %d, %x\n" | 275 | (t == &(linuxstate->tasklet)), |
217 | "\tdevice_num: %u\n", | 276 | linuxstate->dev); |
218 | ls_offset, ls_offset, | 277 | |
219 | ns_offset_raw, ns_offset_raw, | 278 | pci = linuxstate->dev; |
220 | ns_offset_desired, ns_offset_desired, | 279 | |
221 | *((u32*)((void*)nvstate + ns_offset_desired))); | 280 | TRACE("Offsets:\n" |
222 | } | 281 | "\tOffset from LinuxState: %d, %x\n" |
223 | else | 282 | "\tOffset from NVState: %d, %x\n" |
224 | { | 283 | "\tOffset from parameter: %d, %x\n" |
225 | TRACE("INVALID LINUXNVSTATE?????\n"); | 284 | "\tdevice_num: %u\n", |
226 | } | 285 | ls_offset, ls_offset, |
286 | ns_offset_raw, ns_offset_raw, | ||
287 | ns_offset_desired, ns_offset_desired, | ||
288 | *((u32*)((void*)nvstate + ns_offset_desired))); | ||
289 | } | ||
290 | else | ||
291 | { | ||
292 | TRACE("INVALID LINUXNVSTATE?????\n"); | ||
293 | } | ||
227 | } | 294 | } |
228 | #endif | 295 | #endif |
229 | 296 | ||
@@ -235,120 +302,120 @@ static int shutdown_nv_device_reg(void); | |||
235 | void shutdown_nvidia_info(void); | 302 | void shutdown_nvidia_info(void); |
236 | 303 | ||
237 | static int nvidia_going_module_notify(struct notifier_block *self, | 304 | static int nvidia_going_module_notify(struct notifier_block *self, |
238 | unsigned long val, void *data) | 305 | unsigned long val, void *data) |
239 | { | 306 | { |
240 | struct module *mod = data; | 307 | struct module *mod = data; |
241 | 308 | ||
242 | if (nvidia_mod && (mod == nvidia_mod)) { | 309 | if (nvidia_mod && (mod == nvidia_mod)) { |
243 | switch (val) { | 310 | switch (val) { |
244 | case MODULE_STATE_GOING: | 311 | case MODULE_STATE_GOING: |
245 | /* just set our mod reference to null to avoid crash */ | 312 | /* just set our mod reference to null to avoid crash */ |
246 | nvidia_mod = NULL; | 313 | nvidia_mod = NULL; |
247 | mb(); | 314 | mb(); |
248 | break; | 315 | break; |
249 | default: | 316 | default: |
250 | break; | 317 | break; |
251 | } | 318 | } |
252 | } | 319 | } |
253 | 320 | ||
254 | return 0; | 321 | return 0; |
255 | } | 322 | } |
256 | 323 | ||
257 | static struct notifier_block nvidia_going = { | 324 | static struct notifier_block nvidia_going = { |
258 | .notifier_call = nvidia_going_module_notify, | 325 | .notifier_call = nvidia_going_module_notify, |
259 | .priority = 1, | 326 | .priority = 1, |
260 | }; | 327 | }; |
261 | 328 | ||
262 | 329 | ||
263 | struct init_nvinfo_wq_data | 330 | struct init_nvinfo_wq_data |
264 | { | 331 | { |
265 | struct work_struct work; | 332 | struct work_struct work; |
266 | }; | 333 | }; |
267 | 334 | ||
268 | static void __init_nvidia_info(struct work_struct *w) | 335 | static void __init_nvidia_info(struct work_struct *w) |
269 | { | 336 | { |
270 | struct init_nvinfo_wq_data *work = | 337 | struct init_nvinfo_wq_data *work = |
271 | container_of(w, struct init_nvinfo_wq_data, work); | 338 | container_of(w, struct init_nvinfo_wq_data, work); |
272 | struct module* mod; | 339 | struct module* mod; |
273 | 340 | ||
274 | mutex_lock(&module_mutex); | 341 | mutex_lock(&module_mutex); |
275 | mod = find_module("nvidia"); | 342 | mod = find_module("nvidia"); |
276 | mutex_unlock(&module_mutex); | 343 | mutex_unlock(&module_mutex); |
277 | 344 | ||
278 | if(mod != NULL) { | 345 | if(mod != NULL) { |
279 | TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, | 346 | TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, |
280 | (void*)(mod->module_core), | 347 | (void*)(mod->module_core), |
281 | (void*)(mod->module_core) + mod->core_size); | 348 | (void*)(mod->module_core) + mod->core_size); |
282 | 349 | ||
283 | init_nv_device_reg(); | 350 | init_nv_device_reg(); |
284 | nvidia_mod = mod; /* make module visible to others */ | 351 | nvidia_mod = mod; /* make module visible to others */ |
285 | register_module_notifier(&nvidia_going); | 352 | register_module_notifier(&nvidia_going); |
286 | } | 353 | } |
287 | else { | 354 | else { |
288 | TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); | 355 | TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); |
289 | init_nv_device_reg(); | 356 | init_nv_device_reg(); |
290 | } | 357 | } |
291 | 358 | ||
292 | kfree(work); | 359 | kfree(work); |
293 | } | 360 | } |
294 | 361 | ||
295 | int init_nvidia_info(void) | 362 | int init_nvidia_info(void) |
296 | { | 363 | { |
297 | struct init_nvinfo_wq_data *wq_job = | 364 | struct init_nvinfo_wq_data *wq_job = |
298 | kmalloc(sizeof(struct init_nvinfo_wq_data), GFP_ATOMIC); | 365 | kmalloc(sizeof(struct init_nvinfo_wq_data), GFP_ATOMIC); |
299 | INIT_WORK(&wq_job->work, __init_nvidia_info); | 366 | INIT_WORK(&wq_job->work, __init_nvidia_info); |
300 | schedule_work(&wq_job->work); | 367 | schedule_work(&wq_job->work); |
301 | return 0; | 368 | return 0; |
302 | } | 369 | } |
303 | 370 | ||
304 | void shutdown_nvidia_info(void) | 371 | void shutdown_nvidia_info(void) |
305 | { | 372 | { |
306 | if (nvidia_mod) { | 373 | if (nvidia_mod) { |
307 | nvidia_mod = NULL; | 374 | nvidia_mod = NULL; |
308 | mb(); | 375 | mb(); |
309 | 376 | ||
310 | unregister_module_notifier(&nvidia_going); | 377 | unregister_module_notifier(&nvidia_going); |
311 | shutdown_nv_device_reg(); | 378 | shutdown_nv_device_reg(); |
312 | } | 379 | } |
313 | } | 380 | } |
314 | 381 | ||
315 | /* works with pointers to static data inside the module too. */ | 382 | /* works with pointers to static data inside the module too. */ |
316 | int is_nvidia_func(void* func_addr) | 383 | int is_nvidia_func(void* func_addr) |
317 | { | 384 | { |
318 | int ret = 0; | 385 | int ret = 0; |
319 | struct module* mod = nvidia_mod; | 386 | struct module* mod = nvidia_mod; |
320 | if(mod) | 387 | if(mod) |
321 | { | 388 | { |
322 | ret = within_module_core((long unsigned int)func_addr, mod); | 389 | ret = within_module_core((long unsigned int)func_addr, mod); |
323 | /* | 390 | /* |
324 | if(ret) | 391 | if(ret) |
325 | { | 392 | { |
326 | TRACE("%s : %p is in NVIDIA module: %d\n", | 393 | TRACE("%s : %p is in NVIDIA module: %d\n", |
327 | __FUNCTION__, func_addr, ret); | 394 | __FUNCTION__, func_addr, ret); |
328 | }*/ | 395 | }*/ |
329 | } | 396 | } |
330 | 397 | ||
331 | return(ret); | 398 | return(ret); |
332 | } | 399 | } |
333 | 400 | ||
334 | u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) | 401 | u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) |
335 | { | 402 | { |
336 | // life is too short to use hard-coded offsets. update this later. | 403 | // life is too short to use hard-coded offsets. update this later. |
337 | litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); | 404 | litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); |
338 | litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); | 405 | litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); |
339 | 406 | ||
340 | BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); | 407 | BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); |
341 | 408 | ||
342 | return(linuxstate->device_num); | 409 | return(linuxstate->device_num); |
343 | } | 410 | } |
344 | 411 | ||
345 | u32 get_work_nv_device_num(const struct work_struct *t) | 412 | u32 get_work_nv_device_num(const struct work_struct *t) |
346 | { | 413 | { |
347 | // offset determined though observed behavior of the NV driver. | 414 | // offset determined though observed behavior of the NV driver. |
348 | const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); | 415 | const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); |
349 | void* state = (void*)(t); | 416 | void* state = (void*)(t); |
350 | void** device_num_ptr = state + DEVICE_NUM_OFFSET; | 417 | void** device_num_ptr = state + DEVICE_NUM_OFFSET; |
351 | return(*((u32*)(*device_num_ptr))); | 418 | return(*((u32*)(*device_num_ptr))); |
352 | } | 419 | } |
353 | 420 | ||
354 | 421 | ||
@@ -358,23 +425,23 @@ u32 get_work_nv_device_num(const struct work_struct *t) | |||
358 | 425 | ||
359 | 426 | ||
360 | typedef struct { | 427 | typedef struct { |
361 | raw_spinlock_t lock; | 428 | raw_spinlock_t lock; |
362 | struct binheap owners; | 429 | struct binheap owners; |
363 | 430 | ||
364 | #ifdef CONFIG_LITMUS_SOFTIRQD | 431 | #ifdef CONFIG_LITMUS_SOFTIRQD |
365 | klmirqd_callback_t interrupt_callback; | 432 | klmirqd_callback_t interrupt_callback; |
366 | struct task_struct* interrupt_thread; | 433 | struct task_struct* interrupt_thread; |
367 | unsigned int interrupt_ready:1; /* todo: make threads check for the ready flag */ | 434 | unsigned int interrupt_ready:1; /* todo: make threads check for the ready flag */ |
368 | 435 | ||
369 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 436 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
370 | klmirqd_callback_t workq_callback; | 437 | klmirqd_callback_t workq_callback; |
371 | struct task_struct* workq_thread; | 438 | struct task_struct* workq_thread; |
372 | unsigned int workq_ready:1; | 439 | unsigned int workq_ready:1; |
373 | #endif | 440 | #endif |
374 | #endif | 441 | #endif |
375 | 442 | ||
376 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | 443 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG |
377 | struct tasklet_struct nv_klmirqd_dbg_tasklet; | 444 | struct tasklet_struct nv_klmirqd_dbg_tasklet; |
378 | #endif | 445 | #endif |
379 | }nv_device_registry_t; | 446 | }nv_device_registry_t; |
380 | 447 | ||
@@ -386,35 +453,35 @@ static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; | |||
386 | #ifdef CONFIG_LITMUS_SOFTIRQD | 453 | #ifdef CONFIG_LITMUS_SOFTIRQD |
387 | static int nvidia_launch_interrupt_cb(void *arg) | 454 | static int nvidia_launch_interrupt_cb(void *arg) |
388 | { | 455 | { |
389 | unsigned long flags; | 456 | unsigned long flags; |
390 | int reg_device_id = (int)(long long)(arg); | 457 | int reg_device_id = (int)(long long)(arg); |
391 | nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; | 458 | nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; |
392 | 459 | ||
393 | TRACE("nvklmirqd callback for GPU %d\n", reg_device_id); | 460 | TRACE("nvklmirqd callback for GPU %d\n", reg_device_id); |
394 | 461 | ||
395 | raw_spin_lock_irqsave(®->lock, flags); | 462 | raw_spin_lock_irqsave(®->lock, flags); |
396 | reg->interrupt_thread = current; | 463 | reg->interrupt_thread = current; |
397 | reg->interrupt_ready = 1; | 464 | reg->interrupt_ready = 1; |
398 | raw_spin_unlock_irqrestore(®->lock, flags); | 465 | raw_spin_unlock_irqrestore(®->lock, flags); |
399 | 466 | ||
400 | return 0; | 467 | return 0; |
401 | } | 468 | } |
402 | 469 | ||
403 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 470 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
404 | static int nvidia_launch_workq_cb(void *arg) | 471 | static int nvidia_launch_workq_cb(void *arg) |
405 | { | 472 | { |
406 | unsigned long flags; | 473 | unsigned long flags; |
407 | int reg_device_id = (int)(long long)(arg); | 474 | int reg_device_id = (int)(long long)(arg); |
408 | nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; | 475 | nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; |
409 | 476 | ||
410 | TRACE("nvklmworkerd callback for GPU %d\n", reg_device_id); | 477 | TRACE("nvklmworkerd callback for GPU %d\n", reg_device_id); |
411 | 478 | ||
412 | raw_spin_lock_irqsave(®->lock, flags); | 479 | raw_spin_lock_irqsave(®->lock, flags); |
413 | reg->workq_thread = current; | 480 | reg->workq_thread = current; |
414 | reg->workq_ready = 1; | 481 | reg->workq_ready = 1; |
415 | raw_spin_unlock_irqrestore(®->lock, flags); | 482 | raw_spin_unlock_irqrestore(®->lock, flags); |
416 | 483 | ||
417 | return 0; | 484 | return 0; |
418 | } | 485 | } |
419 | #endif | 486 | #endif |
420 | #endif | 487 | #endif |
@@ -422,133 +489,133 @@ static int nvidia_launch_workq_cb(void *arg) | |||
422 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | 489 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG |
423 | struct nv_klmirqd_dbg_timer_struct | 490 | struct nv_klmirqd_dbg_timer_struct |
424 | { | 491 | { |
425 | struct hrtimer timer; | 492 | struct hrtimer timer; |
426 | }; | 493 | }; |
427 | 494 | ||
428 | static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer; | 495 | static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer; |
429 | 496 | ||
430 | static void nv_klmirqd_arm_dbg_timer(lt_t relative_time) | 497 | static void nv_klmirqd_arm_dbg_timer(lt_t relative_time) |
431 | { | 498 | { |
432 | lt_t when_to_fire = litmus_clock() + relative_time; | 499 | lt_t when_to_fire = litmus_clock() + relative_time; |
433 | 500 | ||
434 | TRACE("next nv tasklet in %d ns\n", relative_time); | 501 | TRACE("next nv tasklet in %d ns\n", relative_time); |
435 | 502 | ||
436 | __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer, | 503 | __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer, |
437 | ns_to_ktime(when_to_fire), | 504 | ns_to_ktime(when_to_fire), |
438 | 0, | 505 | 0, |
439 | HRTIMER_MODE_ABS_PINNED, | 506 | HRTIMER_MODE_ABS_PINNED, |
440 | 0); | 507 | 0); |
441 | } | 508 | } |
442 | 509 | ||
443 | static void nv_klmirqd_dbg_tasklet_func(unsigned long arg) | 510 | static void nv_klmirqd_dbg_tasklet_func(unsigned long arg) |
444 | { | 511 | { |
445 | lt_t now = litmus_clock(); | 512 | lt_t now = litmus_clock(); |
446 | nv_device_registry_t *reg = (nv_device_registry_t*)arg; | 513 | nv_device_registry_t *reg = (nv_device_registry_t*)arg; |
447 | int gpunum = reg - &NV_DEVICE_REG[0]; | 514 | int gpunum = reg - &NV_DEVICE_REG[0]; |
448 | 515 | ||
449 | TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum); | 516 | TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum); |
450 | 517 | ||
451 | /* set up the next timer */ | 518 | /* set up the next timer */ |
452 | nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. | 519 | nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. |
453 | } | 520 | } |
454 | 521 | ||
455 | 522 | ||
456 | static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer) | 523 | static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer) |
457 | { | 524 | { |
458 | lt_t now = litmus_clock(); | 525 | lt_t now = litmus_clock(); |
459 | int gpu = (int)(now % num_online_gpus()); | 526 | int gpu = (int)(now % num_online_gpus()); |
460 | nv_device_registry_t *reg; | 527 | nv_device_registry_t *reg; |
461 | 528 | ||
462 | TRACE("nvklmirqd_timer invoked!\n"); | 529 | TRACE("nvklmirqd_timer invoked!\n"); |
463 | 530 | ||
464 | reg = &NV_DEVICE_REG[gpu]; | 531 | reg = &NV_DEVICE_REG[gpu]; |
465 | 532 | ||
466 | if (reg->interrupt_thread && reg->interrupt_ready) { | 533 | if (reg->interrupt_thread && reg->interrupt_ready) { |
467 | TRACE("Adding a tasklet for GPU %d\n", gpu); | 534 | TRACE("Adding a tasklet for GPU %d\n", gpu); |
468 | litmus_tasklet_schedule(®->nv_klmirqd_dbg_tasklet, reg->interrupt_thread); | 535 | litmus_tasklet_schedule(®->nv_klmirqd_dbg_tasklet, reg->interrupt_thread); |
469 | } | 536 | } |
470 | else { | 537 | else { |
471 | TRACE("nv klmirqd is not ready!\n"); | 538 | TRACE("nv klmirqd is not ready!\n"); |
472 | nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. | 539 | nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. |
473 | } | 540 | } |
474 | 541 | ||
475 | return HRTIMER_NORESTART; | 542 | return HRTIMER_NORESTART; |
476 | } | 543 | } |
477 | #endif | 544 | #endif |
478 | 545 | ||
479 | 546 | ||
480 | static int gpu_owner_max_priority_order(struct binheap_node *a, | 547 | static int gpu_owner_max_priority_order(struct binheap_node *a, |
481 | struct binheap_node *b) | 548 | struct binheap_node *b) |
482 | { | 549 | { |
483 | struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node), | 550 | struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node), |
484 | struct task_struct, rt_param); | 551 | struct task_struct, rt_param); |
485 | struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node), | 552 | struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node), |
486 | struct task_struct, rt_param); | 553 | struct task_struct, rt_param); |
487 | 554 | ||
488 | BUG_ON(!d_a); | 555 | BUG_ON(!d_a); |
489 | BUG_ON(!d_b); | 556 | BUG_ON(!d_b); |
490 | 557 | ||
491 | return litmus->compare(d_a, d_b); | 558 | return litmus->compare(d_a, d_b); |
492 | } | 559 | } |
493 | 560 | ||
494 | static int init_nv_device_reg(void) | 561 | static int init_nv_device_reg(void) |
495 | { | 562 | { |
496 | int i; | 563 | int i; |
497 | 564 | ||
498 | #ifdef CONFIG_LITMUS_SOFTIRQD | 565 | #ifdef CONFIG_LITMUS_SOFTIRQD |
499 | if (!klmirqd_is_ready()) { | 566 | if (!klmirqd_is_ready()) { |
500 | TRACE("klmirqd is not ready!\n"); | 567 | TRACE("klmirqd is not ready!\n"); |
501 | return 0; | 568 | return 0; |
502 | } | 569 | } |
503 | #endif | 570 | #endif |
504 | 571 | ||
505 | memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); | 572 | memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); |
506 | mb(); | 573 | mb(); |
507 | 574 | ||
508 | for(i = 0; i < num_online_gpus(); ++i) { | 575 | for(i = 0; i < num_online_gpus(); ++i) { |
509 | raw_spin_lock_init(&NV_DEVICE_REG[i].lock); | 576 | raw_spin_lock_init(&NV_DEVICE_REG[i].lock); |
510 | INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); | 577 | INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); |
511 | 578 | ||
512 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | 579 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG |
513 | tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, | 580 | tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, |
514 | nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]); | 581 | nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]); |
515 | #endif | 582 | #endif |
516 | 583 | ||
517 | #ifdef CONFIG_LITMUS_SOFTIRQD | 584 | #ifdef CONFIG_LITMUS_SOFTIRQD |
518 | { | 585 | { |
519 | char name[MAX_KLMIRQD_NAME_LEN+1]; | 586 | char name[MAX_KLMIRQD_NAME_LEN+1]; |
520 | int default_cpu = litmus->map_gpu_to_cpu(i); | 587 | int default_cpu = litmus->map_gpu_to_cpu(i); |
521 | 588 | ||
522 | /* spawn the interrupt thread */ | 589 | /* spawn the interrupt thread */ |
523 | snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i); | 590 | snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i); |
524 | NV_DEVICE_REG[i].interrupt_callback.func = nvidia_launch_interrupt_cb; | 591 | NV_DEVICE_REG[i].interrupt_callback.func = nvidia_launch_interrupt_cb; |
525 | NV_DEVICE_REG[i].interrupt_callback.arg = (void*)(long long)(i); | 592 | NV_DEVICE_REG[i].interrupt_callback.arg = (void*)(long long)(i); |
526 | mb(); | 593 | mb(); |
527 | if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].interrupt_callback) != 0) { | 594 | if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].interrupt_callback) != 0) { |
528 | TRACE("Failed to create nvklmirqd thread for GPU %d\n", i); | 595 | TRACE("Failed to create nvklmirqd thread for GPU %d\n", i); |
529 | } | 596 | } |
530 | 597 | ||
531 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 598 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
532 | /* spawn the workqueue thread */ | 599 | /* spawn the workqueue thread */ |
533 | snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmworker%d", i); | 600 | snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmworker%d", i); |
534 | NV_DEVICE_REG[i].workq_callback.func = nvidia_launch_workq_cb; | 601 | NV_DEVICE_REG[i].workq_callback.func = nvidia_launch_workq_cb; |
535 | NV_DEVICE_REG[i].workq_callback.arg = (void*)(long long)(i); | 602 | NV_DEVICE_REG[i].workq_callback.arg = (void*)(long long)(i); |
536 | mb(); | 603 | mb(); |
537 | if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].workq_callback) != 0) { | 604 | if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].workq_callback) != 0) { |
538 | TRACE("Failed to create nvklmworkqd thread for GPU %d\n", i); | 605 | TRACE("Failed to create nvklmworkqd thread for GPU %d\n", i); |
539 | } | 606 | } |
540 | #endif | 607 | #endif |
541 | } | 608 | } |
542 | #endif | 609 | #endif |
543 | } | 610 | } |
544 | 611 | ||
545 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | 612 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG |
546 | hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 613 | hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
547 | nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func; | 614 | nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func; |
548 | nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000); | 615 | nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000); |
549 | #endif | 616 | #endif |
550 | 617 | ||
551 | return(1); | 618 | return(1); |
552 | } | 619 | } |
553 | 620 | ||
554 | 621 | ||
@@ -556,262 +623,262 @@ static int init_nv_device_reg(void) | |||
556 | /* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */ | 623 | /* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */ |
557 | static int shutdown_nv_device_reg(void) | 624 | static int shutdown_nv_device_reg(void) |
558 | { | 625 | { |
559 | TRACE("Shutting down nv device registration.\n"); | 626 | TRACE("Shutting down nv device registration.\n"); |
560 | 627 | ||
561 | #ifdef CONFIG_LITMUS_SOFTIRQD | 628 | #ifdef CONFIG_LITMUS_SOFTIRQD |
562 | { | 629 | { |
563 | unsigned long flags; | 630 | unsigned long flags; |
564 | int i; | 631 | int i; |
565 | nv_device_registry_t *reg; | 632 | nv_device_registry_t *reg; |
566 | 633 | ||
567 | for (i = 0; i < num_online_gpus(); ++i) { | 634 | for (i = 0; i < num_online_gpus(); ++i) { |
568 | 635 | ||
569 | TRACE("Shutting down GPU %d.\n", i); | 636 | TRACE("Shutting down GPU %d.\n", i); |
570 | 637 | ||
571 | reg = &NV_DEVICE_REG[i]; | 638 | reg = &NV_DEVICE_REG[i]; |
572 | 639 | ||
573 | if ((reg->interrupt_thread && reg->interrupt_ready) | 640 | if ((reg->interrupt_thread && reg->interrupt_ready) |
574 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 641 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
575 | || (reg->workq_thread && reg->workq_ready) | 642 | || (reg->workq_thread && reg->workq_ready) |
576 | #endif | 643 | #endif |
577 | ) | 644 | ) |
578 | { | 645 | { |
579 | raw_spin_lock_irqsave(®->lock, flags); | 646 | raw_spin_lock_irqsave(®->lock, flags); |
580 | if (reg->interrupt_thread && reg->interrupt_ready) { | 647 | if (reg->interrupt_thread && reg->interrupt_ready) { |
581 | struct task_struct* th = reg->interrupt_thread; | 648 | struct task_struct* th = reg->interrupt_thread; |
582 | reg->interrupt_thread = NULL; | 649 | reg->interrupt_thread = NULL; |
583 | mb(); | 650 | mb(); |
584 | reg->interrupt_ready = 0; | 651 | reg->interrupt_ready = 0; |
585 | mb(); | 652 | mb(); |
586 | raw_spin_unlock_irqrestore(®->lock, flags); | 653 | raw_spin_unlock_irqrestore(®->lock, flags); |
587 | kill_klmirqd_thread(th); | 654 | kill_klmirqd_thread(th); |
588 | } | 655 | } |
589 | else | 656 | else |
590 | raw_spin_unlock_irqrestore(®->lock, flags); | 657 | raw_spin_unlock_irqrestore(®->lock, flags); |
591 | 658 | ||
592 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 659 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
593 | raw_spin_lock_irqsave(®->lock, flags); | 660 | raw_spin_lock_irqsave(®->lock, flags); |
594 | if (reg->workq_thread && reg->workq_ready) { | 661 | if (reg->workq_thread && reg->workq_ready) { |
595 | struct task_struct* th = reg->workq_thread; | 662 | struct task_struct* th = reg->workq_thread; |
596 | reg->workq_thread = NULL; | 663 | reg->workq_thread = NULL; |
597 | mb(); | 664 | mb(); |
598 | reg->workq_ready = 0; | 665 | reg->workq_ready = 0; |
599 | mb(); | 666 | mb(); |
600 | 667 | ||
601 | raw_spin_unlock_irqrestore(®->lock, flags); | 668 | raw_spin_unlock_irqrestore(®->lock, flags); |
602 | kill_klmirqd_thread(th); | 669 | kill_klmirqd_thread(th); |
603 | } | 670 | } |
604 | else | 671 | else |
605 | raw_spin_unlock_irqrestore(®->lock, flags); | 672 | raw_spin_unlock_irqrestore(®->lock, flags); |
606 | #endif | 673 | #endif |
607 | } | 674 | } |
608 | 675 | ||
609 | while (!binheap_empty(®->owners)) { | 676 | while (!binheap_empty(®->owners)) { |
610 | binheap_delete_root(®->owners, struct rt_param, gpu_owner_node); | 677 | binheap_delete_root(®->owners, struct rt_param, gpu_owner_node); |
611 | } | 678 | } |
612 | } | 679 | } |
613 | } | 680 | } |
614 | #endif | 681 | #endif |
615 | 682 | ||
616 | return(1); | 683 | return(1); |
617 | } | 684 | } |
618 | 685 | ||
619 | 686 | ||
620 | /* use to get the owner of nv_device_id. */ | 687 | /* use to get the owner of nv_device_id. */ |
621 | struct task_struct* get_nv_max_device_owner(u32 target_device_id) | 688 | struct task_struct* get_nv_max_device_owner(u32 target_device_id) |
622 | { | 689 | { |
623 | struct task_struct *owner = NULL; | 690 | struct task_struct *owner = NULL; |
624 | nv_device_registry_t *reg; | 691 | nv_device_registry_t *reg; |
625 | 692 | ||
626 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | 693 | BUG_ON(target_device_id >= NV_DEVICE_NUM); |
627 | 694 | ||
628 | reg = &NV_DEVICE_REG[target_device_id]; | 695 | reg = &NV_DEVICE_REG[target_device_id]; |
629 | 696 | ||
630 | if (!binheap_empty(®->owners)) { | 697 | if (!binheap_empty(®->owners)) { |
631 | struct task_struct *hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 698 | struct task_struct *hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
632 | struct task_struct, rt_param); | 699 | struct task_struct, rt_param); |
633 | TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid); | 700 | TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid); |
634 | 701 | ||
635 | owner = hp; | 702 | owner = hp; |
636 | } | 703 | } |
637 | 704 | ||
638 | return(owner); | 705 | return(owner); |
639 | } | 706 | } |
640 | 707 | ||
641 | 708 | ||
642 | #ifdef CONFIG_LITMUS_SOFTIRQD | 709 | #ifdef CONFIG_LITMUS_SOFTIRQD |
643 | 710 | ||
644 | typedef enum { | 711 | typedef enum { |
645 | INTERRUPT_TH, | 712 | INTERRUPT_TH, |
646 | WORKQ_TH | 713 | WORKQ_TH |
647 | } nvklmtype_t; | 714 | } nvklmtype_t; |
648 | 715 | ||
649 | static struct task_struct* __get_klm_thread(nv_device_registry_t* reg, nvklmtype_t type) | 716 | static struct task_struct* __get_klm_thread(nv_device_registry_t* reg, nvklmtype_t type) |
650 | { | 717 | { |
651 | struct task_struct *klmirqd = NULL; | 718 | struct task_struct *klmirqd = NULL; |
652 | 719 | ||
653 | switch(type) | 720 | switch(type) |
654 | { | 721 | { |
655 | case INTERRUPT_TH: | 722 | case INTERRUPT_TH: |
656 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON | 723 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON |
657 | case WORKQ_TH: | 724 | case WORKQ_TH: |
658 | #endif | 725 | #endif |
659 | if(likely(reg->interrupt_ready)) | 726 | if(likely(reg->interrupt_ready)) |
660 | klmirqd = reg->interrupt_thread; | 727 | klmirqd = reg->interrupt_thread; |
661 | break; | 728 | break; |
662 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 729 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
663 | case WORKQ_TH: | 730 | case WORKQ_TH: |
664 | if(likely(reg->workq_ready)) | 731 | if(likely(reg->workq_ready)) |
665 | klmirqd = reg->workq_thread; | 732 | klmirqd = reg->workq_thread; |
666 | break; | 733 | break; |
667 | #endif | 734 | #endif |
668 | default: | 735 | default: |
669 | break; | 736 | break; |
670 | } | 737 | } |
671 | 738 | ||
672 | return klmirqd; | 739 | return klmirqd; |
673 | } | 740 | } |
674 | 741 | ||
675 | static struct task_struct* __get_and_lock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type) | 742 | static struct task_struct* __get_and_lock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type) |
676 | { | 743 | { |
677 | struct task_struct *klmirqd; | 744 | struct task_struct *klmirqd; |
678 | 745 | ||
679 | raw_spin_lock_irqsave(®->lock, *flags); | 746 | raw_spin_lock_irqsave(®->lock, *flags); |
680 | klmirqd = __get_klm_thread(reg, type); | 747 | klmirqd = __get_klm_thread(reg, type); |
681 | 748 | ||
682 | if (!klmirqd) { | 749 | if (!klmirqd) { |
683 | /* unlock if thread does not exist or is not ready */ | 750 | /* unlock if thread does not exist or is not ready */ |
684 | raw_spin_unlock_irqrestore(®->lock, *flags); | 751 | raw_spin_unlock_irqrestore(®->lock, *flags); |
685 | } | 752 | } |
686 | 753 | ||
687 | return klmirqd; | 754 | return klmirqd; |
688 | } | 755 | } |
689 | 756 | ||
690 | static void __unlock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type) | 757 | static void __unlock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type) |
691 | { | 758 | { |
692 | /* workq and interrupts share a lock per GPU */ | 759 | /* workq and interrupts share a lock per GPU */ |
693 | raw_spin_unlock_irqrestore(®->lock, *flags); | 760 | raw_spin_unlock_irqrestore(®->lock, *flags); |
694 | } | 761 | } |
695 | 762 | ||
696 | struct task_struct* get_and_lock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags) | 763 | struct task_struct* get_and_lock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags) |
697 | { | 764 | { |
698 | nv_device_registry_t *reg; | 765 | nv_device_registry_t *reg; |
699 | struct task_struct *th; | 766 | struct task_struct *th; |
700 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | 767 | BUG_ON(target_device_id >= NV_DEVICE_NUM); |
701 | 768 | ||
702 | if (unlikely(nvidia_mod == NULL)) | 769 | if (unlikely(nvidia_mod == NULL)) |
703 | return NULL; | 770 | return NULL; |
704 | 771 | ||
705 | reg = &NV_DEVICE_REG[target_device_id]; | 772 | reg = &NV_DEVICE_REG[target_device_id]; |
706 | th = __get_and_lock_klm_thread(reg, flags, INTERRUPT_TH); | 773 | th = __get_and_lock_klm_thread(reg, flags, INTERRUPT_TH); |
707 | 774 | ||
708 | barrier(); | 775 | barrier(); |
709 | if (unlikely(nvidia_mod == NULL)) { | 776 | if (unlikely(nvidia_mod == NULL)) { |
710 | th = NULL; | 777 | th = NULL; |
711 | __unlock_klm_thread(reg, flags, INTERRUPT_TH); | 778 | __unlock_klm_thread(reg, flags, INTERRUPT_TH); |
712 | } | 779 | } |
713 | 780 | ||
714 | return th; | 781 | return th; |
715 | } | 782 | } |
716 | 783 | ||
717 | void unlock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags) | 784 | void unlock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags) |
718 | { | 785 | { |
719 | nv_device_registry_t *reg; | 786 | nv_device_registry_t *reg; |
720 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | 787 | BUG_ON(target_device_id >= NV_DEVICE_NUM); |
721 | reg = &NV_DEVICE_REG[target_device_id]; | 788 | reg = &NV_DEVICE_REG[target_device_id]; |
722 | __unlock_klm_thread(reg, flags, INTERRUPT_TH); | 789 | __unlock_klm_thread(reg, flags, INTERRUPT_TH); |
723 | } | 790 | } |
724 | 791 | ||
725 | struct task_struct* get_nvklmirqd_thread(u32 target_device_id) | 792 | struct task_struct* get_nvklmirqd_thread(u32 target_device_id) |
726 | { | 793 | { |
727 | /* should this function be allowed? who will use klmirqd thread without thread safety? */ | 794 | /* should this function be allowed? who will use klmirqd thread without thread safety? */ |
728 | unsigned long flags; | 795 | unsigned long flags; |
729 | struct task_struct *klmirqd; | 796 | struct task_struct *klmirqd; |
730 | klmirqd = get_and_lock_nvklmirqd_thread(target_device_id, &flags); | 797 | klmirqd = get_and_lock_nvklmirqd_thread(target_device_id, &flags); |
731 | if(klmirqd) | 798 | if(klmirqd) |
732 | unlock_nvklmirqd_thread(target_device_id, &flags); | 799 | unlock_nvklmirqd_thread(target_device_id, &flags); |
733 | return klmirqd; | 800 | return klmirqd; |
734 | } | 801 | } |
735 | 802 | ||
736 | #if defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON) || defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED) | 803 | #if defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON) || defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED) |
737 | 804 | ||
738 | struct task_struct* get_and_lock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags) | 805 | struct task_struct* get_and_lock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags) |
739 | { | 806 | { |
740 | nv_device_registry_t *reg; | 807 | nv_device_registry_t *reg; |
741 | struct task_struct *th; | 808 | struct task_struct *th; |
742 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | 809 | BUG_ON(target_device_id >= NV_DEVICE_NUM); |
743 | 810 | ||
744 | if (unlikely(nvidia_mod == NULL)) | 811 | if (unlikely(nvidia_mod == NULL)) |
745 | return NULL; | 812 | return NULL; |
746 | 813 | ||
747 | reg = &NV_DEVICE_REG[target_device_id]; | 814 | reg = &NV_DEVICE_REG[target_device_id]; |
748 | th = __get_and_lock_klm_thread(reg, flags, WORKQ_TH); | 815 | th = __get_and_lock_klm_thread(reg, flags, WORKQ_TH); |
749 | 816 | ||
750 | barrier(); | 817 | barrier(); |
751 | if (unlikely(nvidia_mod == NULL)) { | 818 | if (unlikely(nvidia_mod == NULL)) { |
752 | th = NULL; | 819 | th = NULL; |
753 | __unlock_klm_thread(reg, flags, WORKQ_TH); | 820 | __unlock_klm_thread(reg, flags, WORKQ_TH); |
754 | } | 821 | } |
755 | 822 | ||
756 | return th; | 823 | return th; |
757 | } | 824 | } |
758 | 825 | ||
759 | void unlock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags) | 826 | void unlock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags) |
760 | { | 827 | { |
761 | nv_device_registry_t *reg; | 828 | nv_device_registry_t *reg; |
762 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | 829 | BUG_ON(target_device_id >= NV_DEVICE_NUM); |
763 | reg = &NV_DEVICE_REG[target_device_id]; | 830 | reg = &NV_DEVICE_REG[target_device_id]; |
764 | __unlock_klm_thread(reg, flags, WORKQ_TH); | 831 | __unlock_klm_thread(reg, flags, WORKQ_TH); |
765 | } | 832 | } |
766 | 833 | ||
767 | 834 | ||
768 | struct task_struct* get_nvklmworkqd_thread(u32 target_device_id) | 835 | struct task_struct* get_nvklmworkqd_thread(u32 target_device_id) |
769 | { | 836 | { |
770 | /* should this function be allowed? who will use klmirqd thread without thread safety? */ | 837 | /* should this function be allowed? who will use klmirqd thread without thread safety? */ |
771 | unsigned long flags; | 838 | unsigned long flags; |
772 | struct task_struct *klmirqd; | 839 | struct task_struct *klmirqd; |
773 | klmirqd = get_and_lock_nvklmworkqd_thread(target_device_id, &flags); | 840 | klmirqd = get_and_lock_nvklmworkqd_thread(target_device_id, &flags); |
774 | if(klmirqd) | 841 | if(klmirqd) |
775 | unlock_nvklmworkqd_thread(target_device_id, &flags); | 842 | unlock_nvklmworkqd_thread(target_device_id, &flags); |
776 | return klmirqd; | 843 | return klmirqd; |
777 | } | 844 | } |
778 | #endif // end WORKQs | 845 | #endif // end WORKQs |
779 | 846 | ||
780 | 847 | ||
781 | static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp) | 848 | static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp) |
782 | { | 849 | { |
783 | int retval = 0; | 850 | int retval = 0; |
784 | 851 | ||
785 | /* | 852 | /* |
786 | TRACE_CUR("Increasing priority of %s/%d to %s/%d.\n", | 853 | TRACE_CUR("Increasing priority of %s/%d to %s/%d.\n", |
787 | klmirqd->comm, klmirqd->pid, | 854 | klmirqd->comm, klmirqd->pid, |
788 | (hp) ? hp->comm : "null", | 855 | (hp) ? hp->comm : "null", |
789 | (hp) ? hp->pid : 0); | 856 | (hp) ? hp->pid : 0); |
790 | */ | 857 | */ |
791 | 858 | ||
792 | /* the klmirqd thread should never attempt to hold a litmus-level real-time | 859 | /* the klmirqd thread should never attempt to hold a litmus-level real-time |
793 | * so nested support is not required */ | 860 | * so nested support is not required */ |
794 | retval = litmus->__increase_prio(klmirqd, hp); | 861 | retval = litmus->__increase_prio(klmirqd, hp); |
795 | 862 | ||
796 | return retval; | 863 | return retval; |
797 | } | 864 | } |
798 | 865 | ||
799 | static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp, int budget_triggered) | 866 | static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp, int budget_triggered) |
800 | { | 867 | { |
801 | int retval = 0; | 868 | int retval = 0; |
802 | 869 | ||
803 | /* | 870 | /* |
804 | TRACE_CUR("Decreasing priority of %s/%d to %s/%d.\n", | 871 | TRACE_CUR("Decreasing priority of %s/%d to %s/%d.\n", |
805 | klmirqd->comm, klmirqd->pid, | 872 | klmirqd->comm, klmirqd->pid, |
806 | (hp) ? hp->comm : "null", | 873 | (hp) ? hp->comm : "null", |
807 | (hp) ? hp->pid : 0); | 874 | (hp) ? hp->pid : 0); |
808 | */ | 875 | */ |
809 | 876 | ||
810 | /* the klmirqd thread should never attempt to hold a litmus-level real-time | 877 | /* the klmirqd thread should never attempt to hold a litmus-level real-time |
811 | * so nested support is not required */ | 878 | * so nested support is not required */ |
812 | retval = litmus->__decrease_prio(klmirqd, hp, budget_triggered); | 879 | retval = litmus->__decrease_prio(klmirqd, hp, budget_triggered); |
813 | 880 | ||
814 | return retval; | 881 | return retval; |
815 | } | 882 | } |
816 | #endif // end CONFIG_LITMUS_SOFTIRQD | 883 | #endif // end CONFIG_LITMUS_SOFTIRQD |
817 | 884 | ||
@@ -821,159 +888,159 @@ static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct tas | |||
821 | /* call when an gpu owner becomes real-time */ | 888 | /* call when an gpu owner becomes real-time */ |
822 | long enable_gpu_owner(struct task_struct *t) | 889 | long enable_gpu_owner(struct task_struct *t) |
823 | { | 890 | { |
824 | long retval = 0; | 891 | long retval = 0; |
825 | int gpu; | 892 | int gpu; |
826 | nv_device_registry_t *reg; | 893 | nv_device_registry_t *reg; |
827 | 894 | ||
828 | #ifdef CONFIG_LITMUS_SOFTIRQD | 895 | #ifdef CONFIG_LITMUS_SOFTIRQD |
829 | struct task_struct *hp; | 896 | struct task_struct *hp; |
830 | #endif | 897 | #endif |
831 | 898 | ||
832 | if (!tsk_rt(t)->held_gpus) { | 899 | if (!tsk_rt(t)->held_gpus) { |
833 | // TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); | 900 | // TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); |
834 | return -1; | 901 | return -1; |
835 | } | 902 | } |
836 | 903 | ||
837 | BUG_ON(!is_realtime(t)); | 904 | BUG_ON(!is_realtime(t)); |
838 | 905 | ||
839 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); | 906 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); |
840 | 907 | ||
841 | if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | 908 | if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { |
842 | TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu); | 909 | TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu); |
843 | goto out; | 910 | goto out; |
844 | } | 911 | } |
845 | 912 | ||
846 | /* update the registration (and maybe klmirqd) */ | 913 | /* update the registration (and maybe klmirqd) */ |
847 | reg = &NV_DEVICE_REG[gpu]; | 914 | reg = &NV_DEVICE_REG[gpu]; |
848 | 915 | ||
849 | binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, | 916 | binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, |
850 | struct rt_param, gpu_owner_node); | 917 | struct rt_param, gpu_owner_node); |
851 | 918 | ||
852 | 919 | ||
853 | #ifdef CONFIG_LITMUS_SOFTIRQD | 920 | #ifdef CONFIG_LITMUS_SOFTIRQD |
854 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 921 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
855 | struct task_struct, rt_param); | 922 | struct task_struct, rt_param); |
856 | 923 | ||
857 | if (hp == t) { | 924 | if (hp == t) { |
858 | int interrupt_success; | 925 | int interrupt_success; |
859 | 926 | ||
860 | /* we're the new hp */ | 927 | /* we're the new hp */ |
861 | /* | 928 | /* |
862 | TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n", | 929 | TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n", |
863 | t->comm, t->pid, | 930 | t->comm, t->pid, |
864 | effective_priority(t)->comm, effective_priority(t)->pid, | 931 | effective_priority(t)->comm, effective_priority(t)->pid, |
865 | gpu); | 932 | gpu); |
866 | */ | 933 | */ |
867 | 934 | ||
868 | interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, effective_priority(t)); | 935 | interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, effective_priority(t)); |
869 | 936 | ||
870 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 937 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
871 | { | 938 | { |
872 | int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, effective_priority(t)); | 939 | int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, effective_priority(t)); |
873 | if(interrupt_success != 1 || workq_success != 1) | 940 | if(interrupt_success != 1 || workq_success != 1) |
874 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; | 941 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; |
875 | else | 942 | else |
876 | retval = 1; | 943 | retval = 1; |
877 | } | 944 | } |
878 | #else | 945 | #else |
879 | retval = interrupt_success; | 946 | retval = interrupt_success; |
880 | #endif | 947 | #endif |
881 | } | 948 | } |
882 | #endif | 949 | #endif |
883 | 950 | ||
884 | out: | 951 | out: |
885 | return retval; | 952 | return retval; |
886 | } | 953 | } |
887 | 954 | ||
888 | /* call when an gpu owner exits real-time */ | 955 | /* call when an gpu owner exits real-time */ |
889 | long disable_gpu_owner(struct task_struct *t) | 956 | long disable_gpu_owner(struct task_struct *t) |
890 | { | 957 | { |
891 | long retval = 0; | 958 | long retval = 0; |
892 | int gpu; | 959 | int gpu; |
893 | nv_device_registry_t *reg; | 960 | nv_device_registry_t *reg; |
894 | 961 | ||
895 | #ifdef CONFIG_LITMUS_SOFTIRQD | 962 | #ifdef CONFIG_LITMUS_SOFTIRQD |
896 | struct task_struct *hp; | 963 | struct task_struct *hp; |
897 | struct task_struct *new_hp = NULL; | 964 | struct task_struct *new_hp = NULL; |
898 | #endif | 965 | #endif |
899 | 966 | ||
900 | if (!tsk_rt(t)->held_gpus) { | 967 | if (!tsk_rt(t)->held_gpus) { |
901 | TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); | 968 | TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); |
902 | return -1; | 969 | return -1; |
903 | } | 970 | } |
904 | 971 | ||
905 | BUG_ON(!is_realtime(t)); | 972 | BUG_ON(!is_realtime(t)); |
906 | 973 | ||
907 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); | 974 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); |
908 | 975 | ||
909 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | 976 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { |
910 | // TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu); | 977 | // TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu); |
911 | goto out; | 978 | goto out; |
912 | } | 979 | } |
913 | 980 | ||
914 | // TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu); | 981 | // TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu); |
915 | 982 | ||
916 | 983 | ||
917 | reg = &NV_DEVICE_REG[gpu]; | 984 | reg = &NV_DEVICE_REG[gpu]; |
918 | 985 | ||
919 | #ifdef CONFIG_LITMUS_SOFTIRQD | 986 | #ifdef CONFIG_LITMUS_SOFTIRQD |
920 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 987 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
921 | struct task_struct, rt_param); | 988 | struct task_struct, rt_param); |
922 | 989 | ||
923 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); | 990 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); |
924 | 991 | ||
925 | 992 | ||
926 | if (!binheap_empty(®->owners)) { | 993 | if (!binheap_empty(®->owners)) { |
927 | new_hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 994 | new_hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
928 | struct task_struct, rt_param); | 995 | struct task_struct, rt_param); |
929 | } | 996 | } |
930 | 997 | ||
931 | if (hp == t && new_hp != t) { | 998 | if (hp == t && new_hp != t) { |
932 | int interrupt_success; | 999 | int interrupt_success; |
933 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 1000 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
934 | #endif | 1001 | #endif |
935 | struct task_struct *to_inh = (new_hp) ? effective_priority(new_hp) : NULL; | 1002 | struct task_struct *to_inh = (new_hp) ? effective_priority(new_hp) : NULL; |
936 | 1003 | ||
937 | /* | 1004 | /* |
938 | TRACE_CUR("%s/%d is no longer hp on GPU %d; new hp = %s/%d (eff_prio = %s/%d).\n", | 1005 | TRACE_CUR("%s/%d is no longer hp on GPU %d; new hp = %s/%d (eff_prio = %s/%d).\n", |
939 | t->comm, t->pid, | 1006 | t->comm, t->pid, |
940 | gpu, | 1007 | gpu, |
941 | (new_hp) ? new_hp->comm : "null", | 1008 | (new_hp) ? new_hp->comm : "null", |
942 | (new_hp) ? new_hp->pid : 0, | 1009 | (new_hp) ? new_hp->pid : 0, |
943 | (to_inh) ? to_inh->comm : "null", | 1010 | (to_inh) ? to_inh->comm : "null", |
944 | (to_inh) ? to_inh->pid : 0); | 1011 | (to_inh) ? to_inh->pid : 0); |
945 | */ | 1012 | */ |
946 | 1013 | ||
947 | interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, to_inh, 0); | 1014 | interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, to_inh, 0); |
948 | 1015 | ||
949 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 1016 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
950 | { | 1017 | { |
951 | int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, to_inh, 0); | 1018 | int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, to_inh, 0); |
952 | if(interrupt_success != 1 || workq_success != 1) | 1019 | if(interrupt_success != 1 || workq_success != 1) |
953 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; | 1020 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; |
954 | else | 1021 | else |
955 | retval = 1; | 1022 | retval = 1; |
956 | } | 1023 | } |
957 | #else | 1024 | #else |
958 | retval = interrupt_success; | 1025 | retval = interrupt_success; |
959 | #endif | 1026 | #endif |
960 | } | 1027 | } |
961 | #else | 1028 | #else |
962 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); | 1029 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); |
963 | #endif | 1030 | #endif |
964 | 1031 | ||
965 | out: | 1032 | out: |
966 | return retval; | 1033 | return retval; |
967 | } | 1034 | } |
968 | 1035 | ||
969 | 1036 | ||
970 | long recheck_gpu_owner(struct task_struct* t) | 1037 | long recheck_gpu_owner(struct task_struct* t) |
971 | { | 1038 | { |
972 | /* TODO: blend implementation of disable/enable */ | 1039 | /* TODO: blend implementation of disable/enable */ |
973 | int retval = disable_gpu_owner(t); | 1040 | int retval = disable_gpu_owner(t); |
974 | if (!retval) | 1041 | if (!retval) |
975 | retval = enable_gpu_owner(t); | 1042 | retval = enable_gpu_owner(t); |
976 | return retval; | 1043 | return retval; |
977 | } | 1044 | } |
978 | 1045 | ||
979 | 1046 | ||
@@ -984,177 +1051,177 @@ long recheck_gpu_owner(struct task_struct* t) | |||
984 | 1051 | ||
985 | int gpu_owner_increase_priority(struct task_struct *t) | 1052 | int gpu_owner_increase_priority(struct task_struct *t) |
986 | { | 1053 | { |
987 | int retval = 0; | 1054 | int retval = 0; |
988 | int gpu; | 1055 | int gpu; |
989 | nv_device_registry_t *reg; | 1056 | nv_device_registry_t *reg; |
990 | 1057 | ||
991 | struct task_struct *hp = NULL; | 1058 | struct task_struct *hp = NULL; |
992 | struct task_struct *hp_eff = NULL; | 1059 | struct task_struct *hp_eff = NULL; |
993 | 1060 | ||
994 | #ifdef CONFIG_LITMUS_SOFTIRQD | 1061 | #ifdef CONFIG_LITMUS_SOFTIRQD |
995 | int increase_klmirqd = 0; | 1062 | int increase_klmirqd = 0; |
996 | #endif | 1063 | #endif |
997 | 1064 | ||
998 | BUG_ON(!is_realtime(t)); | 1065 | BUG_ON(!is_realtime(t)); |
999 | BUG_ON(!tsk_rt(t)->held_gpus); | 1066 | BUG_ON(!tsk_rt(t)->held_gpus); |
1000 | 1067 | ||
1001 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); | 1068 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); |
1002 | 1069 | ||
1003 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | 1070 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { |
1004 | TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", | 1071 | TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", |
1005 | t->comm, t->pid, gpu); | 1072 | t->comm, t->pid, gpu); |
1006 | goto out; | 1073 | goto out; |
1007 | } | 1074 | } |
1008 | 1075 | ||
1009 | TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu); | 1076 | TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu); |
1010 | reg = &NV_DEVICE_REG[gpu]; | 1077 | reg = &NV_DEVICE_REG[gpu]; |
1011 | 1078 | ||
1012 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 1079 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
1013 | struct task_struct, rt_param); | 1080 | struct task_struct, rt_param); |
1014 | hp_eff = effective_priority(hp); | 1081 | hp_eff = effective_priority(hp); |
1015 | 1082 | ||
1016 | if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */ | 1083 | if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */ |
1017 | binheap_decrease(&tsk_rt(t)->gpu_owner_node, ®->owners); | 1084 | binheap_decrease(&tsk_rt(t)->gpu_owner_node, ®->owners); |
1018 | } | 1085 | } |
1019 | #ifdef CONFIG_LITMUS_SOFTIRQD | 1086 | #ifdef CONFIG_LITMUS_SOFTIRQD |
1020 | else { | 1087 | else { |
1021 | /* unconditionally propagate - t already has the updated eff and is at the root, | 1088 | /* unconditionally propagate - t already has the updated eff and is at the root, |
1022 | so we can't detect a change in inheritance, but we know that priority has | 1089 | so we can't detect a change in inheritance, but we know that priority has |
1023 | indeed increased/changed. */ | 1090 | indeed increased/changed. */ |
1024 | increase_klmirqd = 1; | 1091 | increase_klmirqd = 1; |
1025 | } | 1092 | } |
1026 | 1093 | ||
1027 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 1094 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
1028 | struct task_struct, rt_param); | 1095 | struct task_struct, rt_param); |
1029 | 1096 | ||
1030 | /* check if the eff. prio. of hp has changed */ | 1097 | /* check if the eff. prio. of hp has changed */ |
1031 | if (increase_klmirqd || (effective_priority(hp) != hp_eff)) { | 1098 | if (increase_klmirqd || (effective_priority(hp) != hp_eff)) { |
1032 | int interrupt_success; | 1099 | int interrupt_success; |
1033 | 1100 | ||
1034 | hp_eff = effective_priority(hp); | 1101 | hp_eff = effective_priority(hp); |
1035 | TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n", | 1102 | TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n", |
1036 | t->comm, t->pid, | 1103 | t->comm, t->pid, |
1037 | hp_eff->comm, hp_eff->pid, | 1104 | hp_eff->comm, hp_eff->pid, |
1038 | gpu); | 1105 | gpu); |
1039 | 1106 | ||
1040 | interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, hp_eff); | 1107 | interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, hp_eff); |
1041 | 1108 | ||
1042 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 1109 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
1043 | { | 1110 | { |
1044 | int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, hp_eff); | 1111 | int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, hp_eff); |
1045 | if(interrupt_success != 1 || workq_success != 1) | 1112 | if(interrupt_success != 1 || workq_success != 1) |
1046 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; | 1113 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; |
1047 | else | 1114 | else |
1048 | retval = 1; | 1115 | retval = 1; |
1049 | } | 1116 | } |
1050 | #else | 1117 | #else |
1051 | retval = interrupt_success; | 1118 | retval = interrupt_success; |
1052 | #endif | 1119 | #endif |
1053 | 1120 | ||
1054 | } | 1121 | } |
1055 | #endif | 1122 | #endif |
1056 | 1123 | ||
1057 | out: | 1124 | out: |
1058 | return retval; | 1125 | return retval; |
1059 | } | 1126 | } |
1060 | 1127 | ||
1061 | 1128 | ||
1062 | int gpu_owner_decrease_priority(struct task_struct *t) | 1129 | int gpu_owner_decrease_priority(struct task_struct *t) |
1063 | { | 1130 | { |
1064 | int retval = 0; | 1131 | int retval = 0; |
1065 | int gpu; | 1132 | int gpu; |
1066 | nv_device_registry_t *reg; | 1133 | nv_device_registry_t *reg; |
1067 | 1134 | ||
1068 | struct task_struct *hp = NULL; | 1135 | struct task_struct *hp = NULL; |
1069 | struct task_struct *hp_eff = NULL; | 1136 | struct task_struct *hp_eff = NULL; |
1070 | 1137 | ||
1071 | BUG_ON(!is_realtime(t)); | 1138 | BUG_ON(!is_realtime(t)); |
1072 | BUG_ON(!tsk_rt(t)->held_gpus); | 1139 | BUG_ON(!tsk_rt(t)->held_gpus); |
1073 | 1140 | ||
1074 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); | 1141 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); |
1075 | 1142 | ||
1076 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | 1143 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { |
1077 | TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", | 1144 | TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", |
1078 | t->comm, t->pid, gpu); | 1145 | t->comm, t->pid, gpu); |
1079 | goto out; | 1146 | goto out; |
1080 | } | 1147 | } |
1081 | 1148 | ||
1082 | TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu); | 1149 | TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu); |
1083 | reg = &NV_DEVICE_REG[gpu]; | 1150 | reg = &NV_DEVICE_REG[gpu]; |
1084 | 1151 | ||
1085 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 1152 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
1086 | struct task_struct, rt_param); | 1153 | struct task_struct, rt_param); |
1087 | hp_eff = effective_priority(hp); | 1154 | hp_eff = effective_priority(hp); |
1088 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); | 1155 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); |
1089 | binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, | 1156 | binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, |
1090 | struct rt_param, gpu_owner_node); | 1157 | struct rt_param, gpu_owner_node); |
1091 | 1158 | ||
1092 | #ifdef CONFIG_LITMUS_SOFTIRQD | 1159 | #ifdef CONFIG_LITMUS_SOFTIRQD |
1093 | if (hp == t) { /* t was originally the hp */ | 1160 | if (hp == t) { /* t was originally the hp */ |
1094 | struct task_struct *new_hp = | 1161 | struct task_struct *new_hp = |
1095 | container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | 1162 | container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), |
1096 | struct task_struct, rt_param); | 1163 | struct task_struct, rt_param); |
1097 | /* if the new_hp is still t, or if the effective priority has changed */ | 1164 | /* if the new_hp is still t, or if the effective priority has changed */ |
1098 | if ((new_hp == t) || (effective_priority(new_hp) != hp_eff)) { | 1165 | if ((new_hp == t) || (effective_priority(new_hp) != hp_eff)) { |
1099 | int interrupt_success; | 1166 | int interrupt_success; |
1100 | 1167 | ||
1101 | hp_eff = effective_priority(new_hp); | 1168 | hp_eff = effective_priority(new_hp); |
1102 | TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu); | 1169 | TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu); |
1103 | interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, hp_eff, 1); | 1170 | interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, hp_eff, 1); |
1104 | 1171 | ||
1105 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED | 1172 | #ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED |
1106 | { | 1173 | { |
1107 | int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, hp_eff, 1); | 1174 | int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, hp_eff, 1); |
1108 | if(interrupt_success != 1 || workq_success != 1) | 1175 | if(interrupt_success != 1 || workq_success != 1) |
1109 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; | 1176 | retval = (interrupt_success != 1) ? interrupt_success : workq_success; |
1110 | else | 1177 | else |
1111 | retval = 1; | 1178 | retval = 1; |
1112 | } | 1179 | } |
1113 | #else | 1180 | #else |
1114 | retval = interrupt_success; | 1181 | retval = interrupt_success; |
1115 | #endif | 1182 | #endif |
1116 | } | 1183 | } |
1117 | } | 1184 | } |
1118 | #endif | 1185 | #endif |
1119 | 1186 | ||
1120 | out: | 1187 | out: |
1121 | return retval; | 1188 | return retval; |
1122 | } | 1189 | } |
1123 | 1190 | ||
1124 | 1191 | ||
1125 | 1192 | ||
1126 | static int __reg_nv_device(int reg_device_id, struct task_struct *t) | 1193 | static int __reg_nv_device(int reg_device_id, struct task_struct *t) |
1127 | { | 1194 | { |
1128 | __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); | 1195 | __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); |
1129 | 1196 | ||
1130 | return(0); | 1197 | return(0); |
1131 | } | 1198 | } |
1132 | 1199 | ||
1133 | static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) | 1200 | static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) |
1134 | { | 1201 | { |
1135 | __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); | 1202 | __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); |
1136 | 1203 | ||
1137 | return(0); | 1204 | return(0); |
1138 | } | 1205 | } |
1139 | 1206 | ||
1140 | 1207 | ||
1141 | int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) | 1208 | int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) |
1142 | { | 1209 | { |
1143 | int ret; | 1210 | int ret; |
1144 | 1211 | ||
1145 | if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0)) | 1212 | if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0)) |
1146 | { | 1213 | { |
1147 | if(reg_action) | 1214 | if(reg_action) |
1148 | ret = __reg_nv_device(reg_device_id, t); | 1215 | ret = __reg_nv_device(reg_device_id, t); |
1149 | else | 1216 | else |
1150 | ret = __clear_reg_nv_device(reg_device_id, t); | 1217 | ret = __clear_reg_nv_device(reg_device_id, t); |
1151 | } | 1218 | } |
1152 | else | 1219 | else |
1153 | { | 1220 | { |
1154 | ret = -ENODEV; | 1221 | ret = -ENODEV; |
1155 | } | 1222 | } |
1156 | 1223 | ||
1157 | return(ret); | 1224 | return(ret); |
1158 | } | 1225 | } |
1159 | 1226 | ||
1160 | 1227 | ||
@@ -1162,45 +1229,45 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) | |||
1162 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | 1229 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD |
1163 | //void pai_check_priority_increase(struct task_struct *t, int reg_device_id) | 1230 | //void pai_check_priority_increase(struct task_struct *t, int reg_device_id) |
1164 | //{ | 1231 | //{ |
1165 | // unsigned long flags; | 1232 | // unsigned long flags; |
1166 | // nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; | 1233 | // nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; |
1167 | // | 1234 | // |
1168 | // | 1235 | // |
1169 | // | 1236 | // |
1170 | // if(reg->max_prio_owner != t) { | 1237 | // if(reg->max_prio_owner != t) { |
1171 | // | 1238 | // |
1172 | // raw_spin_lock_irqsave(®->lock, flags); | 1239 | // raw_spin_lock_irqsave(®->lock, flags); |
1173 | // | 1240 | // |
1174 | // if(reg->max_prio_owner != t) { | 1241 | // if(reg->max_prio_owner != t) { |
1175 | // if(litmus->compare(t, reg->max_prio_owner)) { | 1242 | // if(litmus->compare(t, reg->max_prio_owner)) { |
1176 | // litmus->change_prio_pai_tasklet(reg->max_prio_owner, t); | 1243 | // litmus->change_prio_pai_tasklet(reg->max_prio_owner, t); |
1177 | // reg->max_prio_owner = t; | 1244 | // reg->max_prio_owner = t; |
1178 | // } | 1245 | // } |
1179 | // } | 1246 | // } |
1180 | // | 1247 | // |
1181 | // raw_spin_unlock_irqrestore(®->lock, flags); | 1248 | // raw_spin_unlock_irqrestore(®->lock, flags); |
1182 | // } | 1249 | // } |
1183 | //} | 1250 | //} |
1184 | // | 1251 | // |
1185 | // | 1252 | // |
1186 | //void pai_check_priority_decrease(struct task_struct *t, int reg_device_id) | 1253 | //void pai_check_priority_decrease(struct task_struct *t, int reg_device_id) |
1187 | //{ | 1254 | //{ |
1188 | // unsigned long flags; | 1255 | // unsigned long flags; |
1189 | // nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; | 1256 | // nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; |
1190 | // | 1257 | // |
1191 | // if(reg->max_prio_owner == t) { | 1258 | // if(reg->max_prio_owner == t) { |
1192 | // | 1259 | // |
1193 | // raw_spin_lock_irqsave(®->lock, flags); | 1260 | // raw_spin_lock_irqsave(®->lock, flags); |
1194 | // | 1261 | // |
1195 | // if(reg->max_prio_owner == t) { | 1262 | // if(reg->max_prio_owner == t) { |
1196 | // reg->max_prio_owner = find_hp_owner(reg, NULL); | 1263 | // reg->max_prio_owner = find_hp_owner(reg, NULL); |
1197 | // if(reg->max_prio_owner != t) { | 1264 | // if(reg->max_prio_owner != t) { |
1198 | // litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); | 1265 | // litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); |
1199 | // } | 1266 | // } |
1200 | // } | 1267 | // } |
1201 | // | 1268 | // |
1202 | // raw_spin_unlock_irqrestore(®->lock, flags); | 1269 | // raw_spin_unlock_irqrestore(®->lock, flags); |
1203 | // } | 1270 | // } |
1204 | //} | 1271 | //} |
1205 | #endif | 1272 | #endif |
1206 | 1273 | ||
@@ -1210,144 +1277,144 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) | |||
1210 | 1277 | ||
1211 | //static int __reg_nv_device(int reg_device_id, struct task_struct *t) | 1278 | //static int __reg_nv_device(int reg_device_id, struct task_struct *t) |
1212 | //{ | 1279 | //{ |
1213 | // int ret = 0; | 1280 | // int ret = 0; |
1214 | // int i; | 1281 | // int i; |
1215 | // struct task_struct *old_max = NULL; | 1282 | // struct task_struct *old_max = NULL; |
1216 | // | 1283 | // |
1217 | // | 1284 | // |
1218 | // raw_spin_lock_irqsave(®->lock, flags); | 1285 | // raw_spin_lock_irqsave(®->lock, flags); |
1219 | // | 1286 | // |
1220 | // if(reg->nr_owners < NV_MAX_SIMULT_USERS) { | 1287 | // if(reg->nr_owners < NV_MAX_SIMULT_USERS) { |
1221 | // TRACE_TASK(t, "registers GPU %d\n", reg_device_id); | 1288 | // TRACE_TASK(t, "registers GPU %d\n", reg_device_id); |
1222 | // for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { | 1289 | // for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { |
1223 | // if(reg->owners[i] == NULL) { | 1290 | // if(reg->owners[i] == NULL) { |
1224 | // reg->owners[i] = t; | 1291 | // reg->owners[i] = t; |
1225 | // | 1292 | // |
1226 | // //if(edf_higher_prio(t, reg->max_prio_owner)) { | 1293 | // //if(edf_higher_prio(t, reg->max_prio_owner)) { |
1227 | // if(litmus->compare(t, reg->max_prio_owner)) { | 1294 | // if(litmus->compare(t, reg->max_prio_owner)) { |
1228 | // old_max = reg->max_prio_owner; | 1295 | // old_max = reg->max_prio_owner; |
1229 | // reg->max_prio_owner = t; | 1296 | // reg->max_prio_owner = t; |
1230 | // | 1297 | // |
1231 | //#ifdef CONFIG_LITMUS_PAI_SOFTIRQD | 1298 | //#ifdef CONFIG_LITMUS_PAI_SOFTIRQD |
1232 | // litmus->change_prio_pai_tasklet(old_max, t); | 1299 | // litmus->change_prio_pai_tasklet(old_max, t); |
1233 | //#endif | 1300 | //#endif |
1234 | // } | 1301 | // } |
1235 | // | 1302 | // |
1236 | //#ifdef CONFIG_LITMUS_SOFTIRQD | 1303 | //#ifdef CONFIG_LITMUS_SOFTIRQD |
1237 | // down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem); | 1304 | // down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem); |
1238 | //#endif | 1305 | //#endif |
1239 | // ++(reg->nr_owners); | 1306 | // ++(reg->nr_owners); |
1240 | // | 1307 | // |
1241 | // break; | 1308 | // break; |
1242 | // } | 1309 | // } |
1243 | // } | 1310 | // } |
1244 | // } | 1311 | // } |
1245 | // else | 1312 | // else |
1246 | // { | 1313 | // { |
1247 | // TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); | 1314 | // TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); |
1248 | // //ret = -EBUSY; | 1315 | // //ret = -EBUSY; |
1249 | // } | 1316 | // } |
1250 | // | 1317 | // |
1251 | // raw_spin_unlock_irqrestore(®->lock, flags); | 1318 | // raw_spin_unlock_irqrestore(®->lock, flags); |
1252 | // | 1319 | // |
1253 | // __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); | 1320 | // __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); |
1254 | // | 1321 | // |
1255 | // return(ret); | 1322 | // return(ret); |
1256 | //} | 1323 | //} |
1257 | // | 1324 | // |
1258 | //static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) | 1325 | //static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) |
1259 | //{ | 1326 | //{ |
1260 | // int ret = 0; | 1327 | // int ret = 0; |
1261 | // int i; | 1328 | // int i; |
1262 | // unsigned long flags; | 1329 | // unsigned long flags; |
1263 | // nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; | 1330 | // nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; |
1264 | // | 1331 | // |
1265 | //#ifdef CONFIG_LITMUS_SOFTIRQD | 1332 | //#ifdef CONFIG_LITMUS_SOFTIRQD |
1266 | // struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id); | 1333 | // struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id); |
1267 | //#endif | 1334 | //#endif |
1268 | // | 1335 | // |
1269 | // if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { | 1336 | // if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { |
1270 | // return ret; | 1337 | // return ret; |
1271 | // } | 1338 | // } |
1272 | // | 1339 | // |
1273 | // raw_spin_lock_irqsave(®->lock, flags); | 1340 | // raw_spin_lock_irqsave(®->lock, flags); |
1274 | // | 1341 | // |
1275 | // TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); | 1342 | // TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); |
1276 | // | 1343 | // |
1277 | // for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { | 1344 | // for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { |
1278 | // if(reg->owners[i] == t) { | 1345 | // if(reg->owners[i] == t) { |
1279 | //#ifdef CONFIG_LITMUS_SOFTIRQD | 1346 | //#ifdef CONFIG_LITMUS_SOFTIRQD |
1280 | // flush_pending(klmirqd_th, t); | 1347 | // flush_pending(klmirqd_th, t); |
1281 | //#endif | 1348 | //#endif |
1282 | // if(reg->max_prio_owner == t) { | 1349 | // if(reg->max_prio_owner == t) { |
1283 | // reg->max_prio_owner = find_hp_owner(reg, t); | 1350 | // reg->max_prio_owner = find_hp_owner(reg, t); |
1284 | //#ifdef CONFIG_LITMUS_PAI_SOFTIRQD | 1351 | //#ifdef CONFIG_LITMUS_PAI_SOFTIRQD |
1285 | // litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); | 1352 | // litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); |
1286 | //#endif | 1353 | //#endif |
1287 | // } | 1354 | // } |
1288 | // | 1355 | // |
1289 | //#ifdef CONFIG_LITMUS_SOFTIRQD | 1356 | //#ifdef CONFIG_LITMUS_SOFTIRQD |
1290 | // up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem); | 1357 | // up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem); |
1291 | //#endif | 1358 | //#endif |
1292 | // | 1359 | // |
1293 | // reg->owners[i] = NULL; | 1360 | // reg->owners[i] = NULL; |
1294 | // --(reg->nr_owners); | 1361 | // --(reg->nr_owners); |
1295 | // | 1362 | // |
1296 | // break; | 1363 | // break; |
1297 | // } | 1364 | // } |
1298 | // } | 1365 | // } |
1299 | // | 1366 | // |
1300 | // raw_spin_unlock_irqrestore(®->lock, flags); | 1367 | // raw_spin_unlock_irqrestore(®->lock, flags); |
1301 | // | 1368 | // |
1302 | // __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); | 1369 | // __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); |
1303 | // | 1370 | // |
1304 | // return(ret); | 1371 | // return(ret); |
1305 | //} | 1372 | //} |
1306 | // | 1373 | // |
1307 | // | 1374 | // |
1308 | //int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) | 1375 | //int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) |
1309 | //{ | 1376 | //{ |
1310 | // int ret; | 1377 | // int ret; |
1311 | // | 1378 | // |
1312 | // if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) | 1379 | // if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) |
1313 | // { | 1380 | // { |
1314 | // if(reg_action) | 1381 | // if(reg_action) |
1315 | // ret = __reg_nv_device(reg_device_id, t); | 1382 | // ret = __reg_nv_device(reg_device_id, t); |
1316 | // else | 1383 | // else |
1317 | // ret = __clear_reg_nv_device(reg_device_id, t); | 1384 | // ret = __clear_reg_nv_device(reg_device_id, t); |
1318 | // } | 1385 | // } |
1319 | // else | 1386 | // else |
1320 | // { | 1387 | // { |
1321 | // ret = -ENODEV; | 1388 | // ret = -ENODEV; |
1322 | // } | 1389 | // } |
1323 | // | 1390 | // |
1324 | // return(ret); | 1391 | // return(ret); |
1325 | //} | 1392 | //} |
1326 | 1393 | ||
1327 | 1394 | ||
1328 | 1395 | ||
1329 | //void lock_nv_registry(u32 target_device_id, unsigned long* flags) | 1396 | //void lock_nv_registry(u32 target_device_id, unsigned long* flags) |
1330 | //{ | 1397 | //{ |
1331 | // BUG_ON(target_device_id >= NV_DEVICE_NUM); | 1398 | // BUG_ON(target_device_id >= NV_DEVICE_NUM); |
1332 | // | 1399 | // |
1333 | // if(in_interrupt()) | 1400 | // if(in_interrupt()) |
1334 | // TRACE("Locking registry for %d.\n", target_device_id); | 1401 | // TRACE("Locking registry for %d.\n", target_device_id); |
1335 | // else | 1402 | // else |
1336 | // TRACE_CUR("Locking registry for %d.\n", target_device_id); | 1403 | // TRACE_CUR("Locking registry for %d.\n", target_device_id); |
1337 | // | 1404 | // |
1338 | // raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); | 1405 | // raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); |
1339 | //} | 1406 | //} |
1340 | // | 1407 | // |
1341 | //void unlock_nv_registry(u32 target_device_id, unsigned long* flags) | 1408 | //void unlock_nv_registry(u32 target_device_id, unsigned long* flags) |
1342 | //{ | 1409 | //{ |
1343 | // BUG_ON(target_device_id >= NV_DEVICE_NUM); | 1410 | // BUG_ON(target_device_id >= NV_DEVICE_NUM); |
1344 | // | 1411 | // |
1345 | // if(in_interrupt()) | 1412 | // if(in_interrupt()) |
1346 | // TRACE("Unlocking registry for %d.\n", target_device_id); | 1413 | // TRACE("Unlocking registry for %d.\n", target_device_id); |
1347 | // else | 1414 | // else |
1348 | // TRACE_CUR("Unlocking registry for %d.\n", target_device_id); | 1415 | // TRACE_CUR("Unlocking registry for %d.\n", target_device_id); |
1349 | // | 1416 | // |
1350 | // raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); | 1417 | // raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); |
1351 | //} | 1418 | //} |
1352 | 1419 | ||
1353 | 1420 | ||