diff options
author | Arto Merilainen <amerilainen@nvidia.com> | 2014-03-19 03:38:25 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:08:53 -0400 |
commit | a9785995d5f22aaeb659285f8aeb64d8b56982e0 (patch) | |
tree | cc75f75bcf43db316a002a7a240b81f299bf6d7f /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |
parent | 61efaf843c22b85424036ec98015121c08f5f16c (diff) |
gpu: nvgpu: Add NVIDIA GPU Driver
This patch moves the NVIDIA GPU driver to a new location.
Bug 1482562
Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6
Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-on: http://git-master/r/383722
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 699 |
1 files changed, 699 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c new file mode 100644 index 00000000..da7d733e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -0,0 +1,699 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger/Profiler Driver | ||
3 | * | ||
4 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/fs.h> | ||
20 | #include <linux/file.h> | ||
21 | #include <linux/cdev.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/nvhost.h> | ||
24 | #include <linux/nvhost_dbg_gpu_ioctl.h> | ||
25 | |||
26 | #include "gk20a.h" | ||
27 | #include "gr_gk20a.h" | ||
28 | #include "dbg_gpu_gk20a.h" | ||
29 | #include "regops_gk20a.h" | ||
30 | #include "hw_therm_gk20a.h" | ||
31 | |||
32 | struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = { | ||
33 | .exec_reg_ops = exec_regops_gk20a, | ||
34 | }; | ||
35 | |||
36 | /* silly allocator - just increment session id */ | ||
37 | static atomic_t session_id = ATOMIC_INIT(0); | ||
38 | static int generate_session_id(void) | ||
39 | { | ||
40 | return atomic_add_return(1, &session_id); | ||
41 | } | ||
42 | |||
43 | static int alloc_session(struct dbg_session_gk20a **_dbg_s) | ||
44 | { | ||
45 | struct dbg_session_gk20a *dbg_s; | ||
46 | *_dbg_s = NULL; | ||
47 | |||
48 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
49 | |||
50 | dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL); | ||
51 | if (!dbg_s) | ||
52 | return -ENOMEM; | ||
53 | |||
54 | dbg_s->id = generate_session_id(); | ||
55 | dbg_s->ops = &dbg_gpu_session_ops_gk20a; | ||
56 | *_dbg_s = dbg_s; | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler) | ||
61 | { | ||
62 | struct dbg_session_gk20a *dbg_session; | ||
63 | struct gk20a *g; | ||
64 | |||
65 | struct platform_device *pdev; | ||
66 | struct device *dev; | ||
67 | |||
68 | int err; | ||
69 | |||
70 | if (!is_profiler) | ||
71 | g = container_of(inode->i_cdev, | ||
72 | struct gk20a, dbg.cdev); | ||
73 | else | ||
74 | g = container_of(inode->i_cdev, | ||
75 | struct gk20a, prof.cdev); | ||
76 | pdev = g->dev; | ||
77 | dev = &pdev->dev; | ||
78 | |||
79 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev)); | ||
80 | |||
81 | err = alloc_session(&dbg_session); | ||
82 | if (err) | ||
83 | return err; | ||
84 | |||
85 | filp->private_data = dbg_session; | ||
86 | dbg_session->pdev = pdev; | ||
87 | dbg_session->dev = dev; | ||
88 | dbg_session->g = g; | ||
89 | dbg_session->is_profiler = is_profiler; | ||
90 | dbg_session->is_pg_disabled = false; | ||
91 | |||
92 | INIT_LIST_HEAD(&dbg_session->dbg_s_list_node); | ||
93 | init_waitqueue_head(&dbg_session->dbg_events.wait_queue); | ||
94 | dbg_session->dbg_events.events_enabled = false; | ||
95 | dbg_session->dbg_events.num_pending_events = 0; | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | /* used in scenarios where the debugger session can take just the inter-session | ||
101 | * lock for performance, but the profiler session must take the per-gpu lock | ||
102 | * since it might not have an associated channel. */ | ||
103 | static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s) | ||
104 | { | ||
105 | if (dbg_s->is_profiler) | ||
106 | mutex_lock(&dbg_s->g->dbg_sessions_lock); | ||
107 | else | ||
108 | mutex_lock(&dbg_s->ch->dbg_s_lock); | ||
109 | } | ||
110 | |||
111 | static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s) | ||
112 | { | ||
113 | if (dbg_s->is_profiler) | ||
114 | mutex_unlock(&dbg_s->g->dbg_sessions_lock); | ||
115 | else | ||
116 | mutex_unlock(&dbg_s->ch->dbg_s_lock); | ||
117 | } | ||
118 | |||
119 | static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) | ||
120 | { | ||
121 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
122 | |||
123 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
124 | |||
125 | dbg_s->dbg_events.events_enabled = true; | ||
126 | dbg_s->dbg_events.num_pending_events = 0; | ||
127 | |||
128 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
129 | } | ||
130 | |||
131 | static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) | ||
132 | { | ||
133 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
134 | |||
135 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
136 | |||
137 | dbg_s->dbg_events.events_enabled = false; | ||
138 | dbg_s->dbg_events.num_pending_events = 0; | ||
139 | |||
140 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
141 | } | ||
142 | |||
143 | static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) | ||
144 | { | ||
145 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
146 | |||
147 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
148 | |||
149 | if (dbg_s->dbg_events.events_enabled && | ||
150 | dbg_s->dbg_events.num_pending_events > 0) | ||
151 | dbg_s->dbg_events.num_pending_events--; | ||
152 | |||
153 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
154 | } | ||
155 | |||
156 | static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, | ||
157 | struct nvhost_dbg_gpu_events_ctrl_args *args) | ||
158 | { | ||
159 | int ret = 0; | ||
160 | |||
161 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); | ||
162 | |||
163 | if (!dbg_s->ch) { | ||
164 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
165 | "no channel bound to dbg session\n"); | ||
166 | return -EINVAL; | ||
167 | } | ||
168 | |||
169 | switch (args->cmd) { | ||
170 | case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: | ||
171 | gk20a_dbg_gpu_events_enable(dbg_s); | ||
172 | break; | ||
173 | |||
174 | case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: | ||
175 | gk20a_dbg_gpu_events_disable(dbg_s); | ||
176 | break; | ||
177 | |||
178 | case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: | ||
179 | gk20a_dbg_gpu_events_clear(dbg_s); | ||
180 | break; | ||
181 | |||
182 | default: | ||
183 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
184 | "unrecognized dbg gpu events ctrl cmd: 0x%x", | ||
185 | args->cmd); | ||
186 | ret = -EINVAL; | ||
187 | break; | ||
188 | } | ||
189 | |||
190 | return ret; | ||
191 | } | ||
192 | |||
193 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) | ||
194 | { | ||
195 | unsigned int mask = 0; | ||
196 | struct dbg_session_gk20a *dbg_s = filep->private_data; | ||
197 | |||
198 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
199 | |||
200 | poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait); | ||
201 | |||
202 | gk20a_dbg_session_mutex_lock(dbg_s); | ||
203 | |||
204 | if (dbg_s->dbg_events.events_enabled && | ||
205 | dbg_s->dbg_events.num_pending_events > 0) { | ||
206 | gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d", | ||
207 | dbg_s->id); | ||
208 | gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending", | ||
209 | dbg_s->dbg_events.num_pending_events); | ||
210 | mask = (POLLPRI | POLLIN); | ||
211 | } | ||
212 | |||
213 | gk20a_dbg_session_mutex_unlock(dbg_s); | ||
214 | |||
215 | return mask; | ||
216 | } | ||
217 | |||
218 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) | ||
219 | { | ||
220 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
221 | return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); | ||
222 | } | ||
223 | |||
224 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) | ||
225 | { | ||
226 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
227 | return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); | ||
228 | } | ||
229 | |||
230 | void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch) | ||
231 | { | ||
232 | struct dbg_session_gk20a *dbg_s; | ||
233 | |||
234 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
235 | |||
236 | /* guard against the session list being modified */ | ||
237 | mutex_lock(&ch->dbg_s_lock); | ||
238 | |||
239 | list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) { | ||
240 | if (dbg_s->dbg_events.events_enabled) { | ||
241 | gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d", | ||
242 | dbg_s->id); | ||
243 | gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending", | ||
244 | dbg_s->dbg_events.num_pending_events); | ||
245 | |||
246 | dbg_s->dbg_events.num_pending_events++; | ||
247 | |||
248 | wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue); | ||
249 | } | ||
250 | } | ||
251 | |||
252 | mutex_unlock(&ch->dbg_s_lock); | ||
253 | } | ||
254 | |||
255 | |||
256 | static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, | ||
257 | __u32 powermode); | ||
258 | |||
259 | static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s) | ||
260 | { | ||
261 | struct channel_gk20a *ch_gk20a = dbg_s->ch; | ||
262 | struct gk20a *g = dbg_s->g; | ||
263 | |||
264 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
265 | |||
266 | /* wasn't bound to start with ? */ | ||
267 | if (!ch_gk20a) { | ||
268 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?"); | ||
269 | return -ENODEV; | ||
270 | } | ||
271 | |||
272 | mutex_lock(&g->dbg_sessions_lock); | ||
273 | mutex_lock(&ch_gk20a->dbg_s_lock); | ||
274 | |||
275 | --g->dbg_sessions; | ||
276 | |||
277 | /* Powergate enable is called here as possibility of dbg_session | ||
278 | * which called powergate disable ioctl, to be killed without calling | ||
279 | * powergate enable ioctl | ||
280 | */ | ||
281 | dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE); | ||
282 | |||
283 | dbg_s->ch = NULL; | ||
284 | fput(dbg_s->ch_f); | ||
285 | dbg_s->ch_f = NULL; | ||
286 | |||
287 | list_del_init(&dbg_s->dbg_s_list_node); | ||
288 | |||
289 | mutex_unlock(&ch_gk20a->dbg_s_lock); | ||
290 | mutex_unlock(&g->dbg_sessions_lock); | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | ||
296 | { | ||
297 | struct dbg_session_gk20a *dbg_s = filp->private_data; | ||
298 | |||
299 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev)); | ||
300 | |||
301 | /* unbind if it was bound */ | ||
302 | if (!dbg_s->ch) | ||
303 | return 0; | ||
304 | dbg_unbind_channel_gk20a(dbg_s); | ||
305 | |||
306 | kfree(dbg_s); | ||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
311 | struct nvhost_dbg_gpu_bind_channel_args *args) | ||
312 | { | ||
313 | struct file *f; | ||
314 | struct gk20a *g; | ||
315 | struct channel_gk20a *ch; | ||
316 | |||
317 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
318 | dev_name(dbg_s->dev), args->channel_fd); | ||
319 | |||
320 | if (args->channel_fd == ~0) | ||
321 | return dbg_unbind_channel_gk20a(dbg_s); | ||
322 | |||
323 | /* even though get_file_channel is doing this it releases it as well */ | ||
324 | /* by holding it here we'll keep it from disappearing while the | ||
325 | * debugger is in session */ | ||
326 | f = fget(args->channel_fd); | ||
327 | if (!f) | ||
328 | return -ENODEV; | ||
329 | |||
330 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
331 | if (!ch) { | ||
332 | gk20a_dbg_fn("no channel found for fd"); | ||
333 | fput(f); | ||
334 | return -EINVAL; | ||
335 | } | ||
336 | |||
337 | g = dbg_s->g; | ||
338 | gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid); | ||
339 | |||
340 | mutex_lock(&g->dbg_sessions_lock); | ||
341 | mutex_lock(&ch->dbg_s_lock); | ||
342 | |||
343 | dbg_s->ch_f = f; | ||
344 | dbg_s->ch = ch; | ||
345 | list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list); | ||
346 | |||
347 | g->dbg_sessions++; | ||
348 | |||
349 | mutex_unlock(&ch->dbg_s_lock); | ||
350 | mutex_unlock(&g->dbg_sessions_lock); | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
355 | struct nvhost_dbg_gpu_exec_reg_ops_args *args); | ||
356 | |||
357 | static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
358 | struct nvhost_dbg_gpu_powergate_args *args); | ||
359 | |||
360 | static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
361 | struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args); | ||
362 | |||
363 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | ||
364 | unsigned long arg) | ||
365 | { | ||
366 | struct dbg_session_gk20a *dbg_s = filp->private_data; | ||
367 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
368 | u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE]; | ||
369 | int err = 0; | ||
370 | |||
371 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | ||
372 | |||
373 | if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) || | ||
374 | (_IOC_NR(cmd) == 0) || | ||
375 | (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST)) | ||
376 | return -EFAULT; | ||
377 | |||
378 | BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE); | ||
379 | |||
380 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
381 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
382 | return -EFAULT; | ||
383 | } | ||
384 | |||
385 | if (!g->gr.sw_ready) { | ||
386 | err = gk20a_busy(g->dev); | ||
387 | if (err) | ||
388 | return err; | ||
389 | |||
390 | gk20a_idle(g->dev); | ||
391 | } | ||
392 | |||
393 | switch (cmd) { | ||
394 | case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL: | ||
395 | err = dbg_bind_channel_gk20a(dbg_s, | ||
396 | (struct nvhost_dbg_gpu_bind_channel_args *)buf); | ||
397 | gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); | ||
398 | break; | ||
399 | |||
400 | case NVHOST_DBG_GPU_IOCTL_REG_OPS: | ||
401 | err = nvhost_ioctl_channel_reg_ops(dbg_s, | ||
402 | (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf); | ||
403 | gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); | ||
404 | break; | ||
405 | |||
406 | case NVHOST_DBG_GPU_IOCTL_POWERGATE: | ||
407 | err = nvhost_ioctl_powergate_gk20a(dbg_s, | ||
408 | (struct nvhost_dbg_gpu_powergate_args *)buf); | ||
409 | gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err); | ||
410 | break; | ||
411 | |||
412 | case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL: | ||
413 | err = gk20a_dbg_gpu_events_ctrl(dbg_s, | ||
414 | (struct nvhost_dbg_gpu_events_ctrl_args *)buf); | ||
415 | break; | ||
416 | |||
417 | case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: | ||
418 | err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, | ||
419 | (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf); | ||
420 | break; | ||
421 | |||
422 | default: | ||
423 | gk20a_err(dev_from_gk20a(g), | ||
424 | "unrecognized dbg gpu ioctl cmd: 0x%x", | ||
425 | cmd); | ||
426 | err = -ENOTTY; | ||
427 | break; | ||
428 | } | ||
429 | |||
430 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
431 | err = copy_to_user((void __user *)arg, | ||
432 | buf, _IOC_SIZE(cmd)); | ||
433 | |||
434 | return err; | ||
435 | } | ||
436 | |||
437 | /* In order to perform a context relative op the context has | ||
438 | * to be created already... which would imply that the | ||
439 | * context switch mechanism has already been put in place. | ||
440 | * So by the time we perform such an opertation it should always | ||
441 | * be possible to query for the appropriate context offsets, etc. | ||
442 | * | ||
443 | * But note: while the dbg_gpu bind requires the a channel fd, | ||
444 | * it doesn't require an allocated gr/compute obj at that point... | ||
445 | */ | ||
446 | static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, | ||
447 | struct gr_gk20a *gr) | ||
448 | { | ||
449 | int err; | ||
450 | |||
451 | mutex_lock(&gr->ctx_mutex); | ||
452 | err = !gr->ctx_vars.golden_image_initialized; | ||
453 | mutex_unlock(&gr->ctx_mutex); | ||
454 | if (err) | ||
455 | return false; | ||
456 | return true; | ||
457 | |||
458 | } | ||
459 | |||
460 | static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
461 | struct nvhost_dbg_gpu_exec_reg_ops_args *args) | ||
462 | { | ||
463 | int err; | ||
464 | struct device *dev = dbg_s->dev; | ||
465 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
466 | struct nvhost_dbg_gpu_reg_op *ops; | ||
467 | u64 ops_size = sizeof(ops[0]) * args->num_ops; | ||
468 | |||
469 | gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); | ||
470 | |||
471 | if (!dbg_s->ops) { | ||
472 | gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); | ||
473 | return -EINVAL; | ||
474 | } | ||
475 | |||
476 | if (!dbg_s->is_profiler && !dbg_s->ch) { | ||
477 | gk20a_err(dev, "bind a channel before regops for a debugging session"); | ||
478 | return -EINVAL; | ||
479 | } | ||
480 | |||
481 | /* be sure that ctx info is in place */ | ||
482 | if (!gr_context_info_available(dbg_s, &g->gr)) { | ||
483 | gk20a_err(dev, "gr context data not available\n"); | ||
484 | return -ENODEV; | ||
485 | } | ||
486 | |||
487 | ops = kzalloc(ops_size, GFP_KERNEL); | ||
488 | if (!ops) { | ||
489 | gk20a_err(dev, "Allocating memory failed!"); | ||
490 | return -ENOMEM; | ||
491 | } | ||
492 | |||
493 | gk20a_dbg_fn("Copying regops from userspace"); | ||
494 | |||
495 | if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) { | ||
496 | dev_err(dev, "copy_from_user failed!"); | ||
497 | err = -EFAULT; | ||
498 | goto clean_up; | ||
499 | } | ||
500 | |||
501 | /* since exec_reg_ops sends methods to the ucode, it must take the | ||
502 | * global gpu lock to protect against mixing methods from debug sessions | ||
503 | * on other channels */ | ||
504 | mutex_lock(&g->dbg_sessions_lock); | ||
505 | |||
506 | err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops); | ||
507 | |||
508 | mutex_unlock(&g->dbg_sessions_lock); | ||
509 | |||
510 | if (err) { | ||
511 | gk20a_err(dev, "dbg regops failed"); | ||
512 | goto clean_up; | ||
513 | } | ||
514 | |||
515 | gk20a_dbg_fn("Copying result to userspace"); | ||
516 | |||
517 | if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) { | ||
518 | dev_err(dev, "copy_to_user failed!"); | ||
519 | err = -EFAULT; | ||
520 | goto clean_up; | ||
521 | } | ||
522 | return 0; | ||
523 | clean_up: | ||
524 | kfree(ops); | ||
525 | return err; | ||
526 | } | ||
527 | |||
528 | static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, | ||
529 | __u32 powermode) | ||
530 | { | ||
531 | int err = 0; | ||
532 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
533 | |||
534 | /* This function must be called with g->dbg_sessions_lock held */ | ||
535 | |||
536 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d", | ||
537 | dev_name(dbg_s->dev), powermode); | ||
538 | |||
539 | switch (powermode) { | ||
540 | case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE: | ||
541 | /* save off current powergate, clk state. | ||
542 | * set gpu module's can_powergate = 0. | ||
543 | * set gpu module's clk to max. | ||
544 | * while *a* debug session is active there will be no power or | ||
545 | * clocking state changes allowed from mainline code (but they | ||
546 | * should be saved). | ||
547 | */ | ||
548 | /* Allow powergate disable if the current dbg_session doesn't | ||
549 | * call a powergate disable ioctl and the global | ||
550 | * powergating_disabled_refcount is zero | ||
551 | */ | ||
552 | |||
553 | if ((dbg_s->is_pg_disabled == false) && | ||
554 | (g->dbg_powergating_disabled_refcount++ == 0)) { | ||
555 | |||
556 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy"); | ||
557 | gk20a_busy(g->dev); | ||
558 | gk20a_channel_busy(dbg_s->pdev); | ||
559 | |||
560 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, | ||
561 | false); | ||
562 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, | ||
563 | false); | ||
564 | gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A); | ||
565 | |||
566 | g->elcg_enabled = false; | ||
567 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); | ||
568 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); | ||
569 | |||
570 | gk20a_pmu_disable_elpg(g); | ||
571 | } | ||
572 | |||
573 | dbg_s->is_pg_disabled = true; | ||
574 | break; | ||
575 | |||
576 | case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE: | ||
577 | /* restore (can) powergate, clk state */ | ||
578 | /* release pending exceptions to fault/be handled as usual */ | ||
579 | /*TBD: ordering of these? */ | ||
580 | |||
581 | /* Re-enabling powergate as no other sessions want | ||
582 | * powergate disabled and the current dbg-sessions had | ||
583 | * requested the powergate disable through ioctl | ||
584 | */ | ||
585 | if (dbg_s->is_pg_disabled && | ||
586 | --g->dbg_powergating_disabled_refcount == 0) { | ||
587 | |||
588 | g->elcg_enabled = true; | ||
589 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); | ||
590 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); | ||
591 | gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A); | ||
592 | |||
593 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, | ||
594 | g->slcg_enabled); | ||
595 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, | ||
596 | g->slcg_enabled); | ||
597 | |||
598 | gk20a_pmu_enable_elpg(g); | ||
599 | |||
600 | gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle"); | ||
601 | gk20a_channel_idle(dbg_s->pdev); | ||
602 | gk20a_idle(g->dev); | ||
603 | } | ||
604 | |||
605 | dbg_s->is_pg_disabled = false; | ||
606 | break; | ||
607 | |||
608 | default: | ||
609 | gk20a_err(dev_from_gk20a(g), | ||
610 | "unrecognized dbg gpu powergate mode: 0x%x", | ||
611 | powermode); | ||
612 | err = -ENOTTY; | ||
613 | break; | ||
614 | } | ||
615 | |||
616 | return err; | ||
617 | } | ||
618 | |||
619 | static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
620 | struct nvhost_dbg_gpu_powergate_args *args) | ||
621 | { | ||
622 | int err; | ||
623 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
624 | gk20a_dbg_fn("%s powergate mode = %d", | ||
625 | dev_name(dbg_s->dev), args->mode); | ||
626 | |||
627 | mutex_lock(&g->dbg_sessions_lock); | ||
628 | err = dbg_set_powergate(dbg_s, args->mode); | ||
629 | mutex_unlock(&g->dbg_sessions_lock); | ||
630 | return err; | ||
631 | } | ||
632 | |||
633 | static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
634 | struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args) | ||
635 | { | ||
636 | int err; | ||
637 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
638 | struct channel_gk20a *ch_gk20a; | ||
639 | |||
640 | gk20a_dbg_fn("%s smpc ctxsw mode = %d", | ||
641 | dev_name(dbg_s->dev), args->mode); | ||
642 | |||
643 | /* Take the global lock, since we'll be doing global regops */ | ||
644 | mutex_lock(&g->dbg_sessions_lock); | ||
645 | |||
646 | ch_gk20a = dbg_s->ch; | ||
647 | |||
648 | if (!ch_gk20a) { | ||
649 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
650 | "no bound channel for smpc ctxsw mode update\n"); | ||
651 | err = -EINVAL; | ||
652 | goto clean_up; | ||
653 | } | ||
654 | |||
655 | err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, | ||
656 | args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); | ||
657 | if (err) { | ||
658 | gk20a_err(dev_from_gk20a(dbg_s->g), | ||
659 | "error (%d) during smpc ctxsw mode update\n", err); | ||
660 | goto clean_up; | ||
661 | } | ||
662 | /* The following regops are a hack/war to make up for the fact that we | ||
663 | * just scribbled into the ctxsw image w/o really knowing whether | ||
664 | * it was already swapped out in/out once or not, etc. | ||
665 | */ | ||
666 | { | ||
667 | struct nvhost_dbg_gpu_reg_op ops[4]; | ||
668 | int i; | ||
669 | for (i = 0; i < ARRAY_SIZE(ops); i++) { | ||
670 | ops[i].op = NVHOST_DBG_GPU_REG_OP_WRITE_32; | ||
671 | ops[i].type = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX; | ||
672 | ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS; | ||
673 | ops[i].value_hi = 0; | ||
674 | ops[i].and_n_mask_lo = 0; | ||
675 | ops[i].and_n_mask_hi = 0; | ||
676 | } | ||
677 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/ | ||
678 | ops[0].offset = 0x00419e08; | ||
679 | ops[0].value_lo = 0x1d; | ||
680 | |||
681 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */ | ||
682 | ops[1].offset = 0x00419e58; | ||
683 | ops[1].value_lo = 0x1; | ||
684 | |||
685 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */ | ||
686 | ops[2].offset = 0x00419e68; | ||
687 | ops[2].value_lo = 0xaaaa; | ||
688 | |||
689 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */ | ||
690 | ops[3].offset = 0x00419f40; | ||
691 | ops[3].value_lo = 0x18; | ||
692 | |||
693 | err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops)); | ||
694 | } | ||
695 | |||
696 | clean_up: | ||
697 | mutex_unlock(&g->dbg_sessions_lock); | ||
698 | return err; | ||
699 | } | ||