diff options
Diffstat (limited to 'include/gk20a/fecs_trace_gk20a.c')
-rw-r--r-- | include/gk20a/fecs_trace_gk20a.c | 744 |
1 files changed, 0 insertions, 744 deletions
diff --git a/include/gk20a/fecs_trace_gk20a.c b/include/gk20a/fecs_trace_gk20a.c deleted file mode 100644 index 5c1c5e0..0000000 --- a/include/gk20a/fecs_trace_gk20a.c +++ /dev/null | |||
@@ -1,744 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/kmem.h> | ||
24 | #include <nvgpu/dma.h> | ||
25 | #include <nvgpu/enabled.h> | ||
26 | #include <nvgpu/bug.h> | ||
27 | #include <nvgpu/hashtable.h> | ||
28 | #include <nvgpu/circ_buf.h> | ||
29 | #include <nvgpu/thread.h> | ||
30 | #include <nvgpu/barrier.h> | ||
31 | #include <nvgpu/mm.h> | ||
32 | #include <nvgpu/enabled.h> | ||
33 | #include <nvgpu/ctxsw_trace.h> | ||
34 | #include <nvgpu/io.h> | ||
35 | #include <nvgpu/utils.h> | ||
36 | #include <nvgpu/timers.h> | ||
37 | #include <nvgpu/channel.h> | ||
38 | |||
39 | #include "fecs_trace_gk20a.h" | ||
40 | #include "gk20a.h" | ||
41 | #include "gr_gk20a.h" | ||
42 | |||
43 | #include <nvgpu/log.h> | ||
44 | #include <nvgpu/fecs_trace.h> | ||
45 | |||
46 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
47 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
48 | |||
49 | struct gk20a_fecs_trace_hash_ent { | ||
50 | u32 context_ptr; | ||
51 | pid_t pid; | ||
52 | struct hlist_node node; | ||
53 | }; | ||
54 | |||
55 | struct gk20a_fecs_trace { | ||
56 | |||
57 | DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); | ||
58 | struct nvgpu_mutex hash_lock; | ||
59 | struct nvgpu_mutex poll_lock; | ||
60 | struct nvgpu_thread poll_task; | ||
61 | bool init; | ||
62 | struct nvgpu_mutex enable_lock; | ||
63 | u32 enable_count; | ||
64 | }; | ||
65 | |||
66 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
67 | u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void) | ||
68 | { | ||
69 | return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); | ||
70 | } | ||
71 | |||
72 | u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts) | ||
73 | { | ||
74 | return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); | ||
75 | } | ||
76 | |||
77 | u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) | ||
78 | { | ||
79 | return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); | ||
80 | } | ||
81 | |||
82 | static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) | ||
83 | { | ||
84 | return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); | ||
85 | } | ||
86 | |||
87 | int gk20a_fecs_trace_num_ts(void) | ||
88 | { | ||
89 | return (ctxsw_prog_record_timestamp_record_size_in_bytes_v() | ||
90 | - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64); | ||
91 | } | ||
92 | |||
93 | struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( | ||
94 | struct gk20a *g, int idx) | ||
95 | { | ||
96 | struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; | ||
97 | |||
98 | return (struct gk20a_fecs_trace_record *) | ||
99 | ((u8 *) mem->cpu_va | ||
100 | + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); | ||
101 | } | ||
102 | |||
103 | bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) | ||
104 | { | ||
105 | /* | ||
106 | * testing magic_hi should suffice. magic_lo is sometimes used | ||
107 | * as a sequence number in experimental ucode. | ||
108 | */ | ||
109 | return (r->magic_hi | ||
110 | == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v()); | ||
111 | } | ||
112 | |||
113 | int gk20a_fecs_trace_get_read_index(struct gk20a *g) | ||
114 | { | ||
115 | return gr_gk20a_elpg_protected_call(g, | ||
116 | gk20a_readl(g, gr_fecs_mailbox1_r())); | ||
117 | } | ||
118 | |||
119 | int gk20a_fecs_trace_get_write_index(struct gk20a *g) | ||
120 | { | ||
121 | return gr_gk20a_elpg_protected_call(g, | ||
122 | gk20a_readl(g, gr_fecs_mailbox0_r())); | ||
123 | } | ||
124 | |||
125 | static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index) | ||
126 | { | ||
127 | nvgpu_log(g, gpu_dbg_ctxsw, "set read=%d", index); | ||
128 | return gr_gk20a_elpg_protected_call(g, | ||
129 | (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0)); | ||
130 | } | ||
131 | |||
132 | void gk20a_fecs_trace_hash_dump(struct gk20a *g) | ||
133 | { | ||
134 | u32 bkt; | ||
135 | struct gk20a_fecs_trace_hash_ent *ent; | ||
136 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
137 | |||
138 | nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table"); | ||
139 | |||
140 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
141 | hash_for_each(trace->pid_hash_table, bkt, ent, node) | ||
142 | { | ||
143 | nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d", | ||
144 | ent, bkt, ent->context_ptr, ent->pid); | ||
145 | |||
146 | } | ||
147 | nvgpu_mutex_release(&trace->hash_lock); | ||
148 | } | ||
149 | |||
150 | static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid) | ||
151 | { | ||
152 | struct gk20a_fecs_trace_hash_ent *he; | ||
153 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
154 | |||
155 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
156 | "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid); | ||
157 | |||
158 | he = nvgpu_kzalloc(g, sizeof(*he)); | ||
159 | if (unlikely(!he)) { | ||
160 | nvgpu_warn(g, | ||
161 | "can't alloc new hash entry for context_ptr=%x pid=%d", | ||
162 | context_ptr, pid); | ||
163 | return -ENOMEM; | ||
164 | } | ||
165 | |||
166 | he->context_ptr = context_ptr; | ||
167 | he->pid = pid; | ||
168 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
169 | hash_add(trace->pid_hash_table, &he->node, context_ptr); | ||
170 | nvgpu_mutex_release(&trace->hash_lock); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr) | ||
175 | { | ||
176 | struct hlist_node *tmp; | ||
177 | struct gk20a_fecs_trace_hash_ent *ent; | ||
178 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
179 | |||
180 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
181 | "freeing hash entry context_ptr=%x", context_ptr); | ||
182 | |||
183 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
184 | hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node, | ||
185 | context_ptr) { | ||
186 | if (ent->context_ptr == context_ptr) { | ||
187 | hash_del(&ent->node); | ||
188 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
189 | "freed hash entry=%p context_ptr=%x", ent, | ||
190 | ent->context_ptr); | ||
191 | nvgpu_kfree(g, ent); | ||
192 | break; | ||
193 | } | ||
194 | } | ||
195 | nvgpu_mutex_release(&trace->hash_lock); | ||
196 | } | ||
197 | |||
198 | static void gk20a_fecs_trace_free_hash_table(struct gk20a *g) | ||
199 | { | ||
200 | u32 bkt; | ||
201 | struct hlist_node *tmp; | ||
202 | struct gk20a_fecs_trace_hash_ent *ent; | ||
203 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
204 | |||
205 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace); | ||
206 | |||
207 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
208 | hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) { | ||
209 | hash_del(&ent->node); | ||
210 | nvgpu_kfree(g, ent); | ||
211 | } | ||
212 | nvgpu_mutex_release(&trace->hash_lock); | ||
213 | |||
214 | } | ||
215 | |||
216 | static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr) | ||
217 | { | ||
218 | struct gk20a_fecs_trace_hash_ent *ent; | ||
219 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
220 | pid_t pid = 0; | ||
221 | |||
222 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
223 | hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) { | ||
224 | if (ent->context_ptr == context_ptr) { | ||
225 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
226 | "found context_ptr=%x -> pid=%d", | ||
227 | ent->context_ptr, ent->pid); | ||
228 | pid = ent->pid; | ||
229 | break; | ||
230 | } | ||
231 | } | ||
232 | nvgpu_mutex_release(&trace->hash_lock); | ||
233 | |||
234 | return pid; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Converts HW entry format to userspace-facing format and pushes it to the | ||
239 | * queue. | ||
240 | */ | ||
241 | static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) | ||
242 | { | ||
243 | int i; | ||
244 | struct nvgpu_gpu_ctxsw_trace_entry entry = { }; | ||
245 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
246 | pid_t cur_pid; | ||
247 | pid_t new_pid; | ||
248 | int count = 0; | ||
249 | |||
250 | /* for now, only one VM */ | ||
251 | const int vmid = 0; | ||
252 | |||
253 | struct gk20a_fecs_trace_record *r = | ||
254 | gk20a_fecs_trace_get_record(g, index); | ||
255 | |||
256 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
257 | "consuming record trace=%p read=%d record=%p", trace, index, r); | ||
258 | |||
259 | if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) { | ||
260 | nvgpu_warn(g, | ||
261 | "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", | ||
262 | trace, index, r, r->magic_lo, r->magic_hi); | ||
263 | return -EINVAL; | ||
264 | } | ||
265 | |||
266 | /* Clear magic_hi to detect cases where CPU could read write index | ||
267 | * before FECS record is actually written to DRAM. This should not | ||
268 | * as we force FECS writes to SYSMEM by reading through PRAMIN. | ||
269 | */ | ||
270 | r->magic_hi = 0; | ||
271 | |||
272 | cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); | ||
273 | new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); | ||
274 | |||
275 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
276 | "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)", | ||
277 | r->context_ptr, cur_pid, r->new_context_ptr, new_pid); | ||
278 | |||
279 | entry.context_id = r->context_id; | ||
280 | entry.vmid = vmid; | ||
281 | |||
282 | /* break out FECS record into trace events */ | ||
283 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
284 | |||
285 | entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
286 | entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
287 | entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
288 | |||
289 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
290 | "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x", | ||
291 | entry.tag, entry.timestamp, r->context_id, | ||
292 | r->new_context_id); | ||
293 | |||
294 | switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) { | ||
295 | case NVGPU_GPU_CTXSW_TAG_RESTORE_START: | ||
296 | case NVGPU_GPU_CTXSW_TAG_CONTEXT_START: | ||
297 | entry.context_id = r->new_context_id; | ||
298 | entry.pid = new_pid; | ||
299 | break; | ||
300 | |||
301 | case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: | ||
302 | case NVGPU_GPU_CTXSW_TAG_FE_ACK: | ||
303 | case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI: | ||
304 | case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP: | ||
305 | case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP: | ||
306 | case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP: | ||
307 | case NVGPU_GPU_CTXSW_TAG_SAVE_END: | ||
308 | entry.context_id = r->context_id; | ||
309 | entry.pid = cur_pid; | ||
310 | break; | ||
311 | |||
312 | default: | ||
313 | /* tags are not guaranteed to start at the beginning */ | ||
314 | WARN_ON(entry.tag && (entry.tag != NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)); | ||
315 | continue; | ||
316 | } | ||
317 | |||
318 | nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld", | ||
319 | entry.tag, entry.context_id, entry.pid); | ||
320 | |||
321 | if (!entry.context_id) | ||
322 | continue; | ||
323 | |||
324 | gk20a_ctxsw_trace_write(g, &entry); | ||
325 | count++; | ||
326 | } | ||
327 | |||
328 | gk20a_ctxsw_trace_wake_up(g, vmid); | ||
329 | return count; | ||
330 | } | ||
331 | |||
332 | int gk20a_fecs_trace_poll(struct gk20a *g) | ||
333 | { | ||
334 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
335 | |||
336 | int read = 0; | ||
337 | int write = 0; | ||
338 | int cnt; | ||
339 | int err; | ||
340 | |||
341 | err = gk20a_busy(g); | ||
342 | if (unlikely(err)) | ||
343 | return err; | ||
344 | |||
345 | nvgpu_mutex_acquire(&trace->poll_lock); | ||
346 | write = gk20a_fecs_trace_get_write_index(g); | ||
347 | if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) { | ||
348 | nvgpu_err(g, | ||
349 | "failed to acquire write index, write=%d", write); | ||
350 | err = write; | ||
351 | goto done; | ||
352 | } | ||
353 | |||
354 | read = gk20a_fecs_trace_get_read_index(g); | ||
355 | |||
356 | cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS); | ||
357 | if (!cnt) | ||
358 | goto done; | ||
359 | |||
360 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
361 | "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", | ||
362 | read, gk20a_fecs_trace_get_read_index(g), write, cnt); | ||
363 | |||
364 | /* Ensure all FECS writes have made it to SYSMEM */ | ||
365 | g->ops.mm.fb_flush(g); | ||
366 | |||
367 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { | ||
368 | /* Bits 30:0 of MAILBOX1 represents actual read pointer value */ | ||
369 | read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT))); | ||
370 | } | ||
371 | |||
372 | while (read != write) { | ||
373 | cnt = gk20a_fecs_trace_ring_read(g, read); | ||
374 | if (cnt > 0) { | ||
375 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
376 | "number of trace entries added: %d", cnt); | ||
377 | } | ||
378 | |||
379 | /* Get to next record. */ | ||
380 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); | ||
381 | } | ||
382 | |||
383 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { | ||
384 | /* | ||
385 | * In the next step, read pointer is going to be updated. | ||
386 | * So, MSB of read pointer should be set back to 1. This will | ||
387 | * keep FECS trace enabled. | ||
388 | */ | ||
389 | read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)); | ||
390 | } | ||
391 | |||
392 | /* ensure FECS records has been updated before incrementing read index */ | ||
393 | nvgpu_wmb(); | ||
394 | gk20a_fecs_trace_set_read_index(g, read); | ||
395 | |||
396 | done: | ||
397 | nvgpu_mutex_release(&trace->poll_lock); | ||
398 | gk20a_idle(g); | ||
399 | return err; | ||
400 | } | ||
401 | |||
402 | static int gk20a_fecs_trace_periodic_polling(void *arg) | ||
403 | { | ||
404 | struct gk20a *g = (struct gk20a *)arg; | ||
405 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
406 | |||
407 | pr_info("%s: running\n", __func__); | ||
408 | |||
409 | while (!nvgpu_thread_should_stop(&trace->poll_task)) { | ||
410 | |||
411 | nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US, | ||
412 | GK20A_FECS_TRACE_FRAME_PERIOD_US * 2); | ||
413 | |||
414 | gk20a_fecs_trace_poll(g); | ||
415 | } | ||
416 | |||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | size_t gk20a_fecs_trace_buffer_size(struct gk20a *g) | ||
421 | { | ||
422 | return GK20A_FECS_TRACE_NUM_RECORDS | ||
423 | * ctxsw_prog_record_timestamp_record_size_in_bytes_v(); | ||
424 | } | ||
425 | |||
426 | int gk20a_fecs_trace_init(struct gk20a *g) | ||
427 | { | ||
428 | struct gk20a_fecs_trace *trace; | ||
429 | int err; | ||
430 | |||
431 | trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace)); | ||
432 | if (!trace) { | ||
433 | nvgpu_warn(g, "failed to allocate fecs_trace"); | ||
434 | return -ENOMEM; | ||
435 | } | ||
436 | g->fecs_trace = trace; | ||
437 | |||
438 | err = nvgpu_mutex_init(&trace->poll_lock); | ||
439 | if (err) | ||
440 | goto clean; | ||
441 | err = nvgpu_mutex_init(&trace->hash_lock); | ||
442 | if (err) | ||
443 | goto clean_poll_lock; | ||
444 | |||
445 | err = nvgpu_mutex_init(&trace->enable_lock); | ||
446 | if (err) | ||
447 | goto clean_hash_lock; | ||
448 | |||
449 | BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); | ||
450 | hash_init(trace->pid_hash_table); | ||
451 | |||
452 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
453 | |||
454 | trace->enable_count = 0; | ||
455 | trace->init = true; | ||
456 | |||
457 | return 0; | ||
458 | |||
459 | clean_hash_lock: | ||
460 | nvgpu_mutex_destroy(&trace->hash_lock); | ||
461 | |||
462 | clean_poll_lock: | ||
463 | nvgpu_mutex_destroy(&trace->poll_lock); | ||
464 | clean: | ||
465 | nvgpu_kfree(g, trace); | ||
466 | g->fecs_trace = NULL; | ||
467 | return err; | ||
468 | } | ||
469 | |||
470 | int gk20a_fecs_trace_bind_channel(struct gk20a *g, | ||
471 | struct channel_gk20a *ch) | ||
472 | { | ||
473 | /* | ||
474 | * map our circ_buf to the context space and store the GPU VA | ||
475 | * in the context header. | ||
476 | */ | ||
477 | |||
478 | u32 lo; | ||
479 | u32 hi; | ||
480 | u64 addr; | ||
481 | struct tsg_gk20a *tsg; | ||
482 | struct nvgpu_gr_ctx *ch_ctx; | ||
483 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
484 | struct nvgpu_mem *mem; | ||
485 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | ||
486 | u32 aperture_mask; | ||
487 | |||
488 | tsg = tsg_gk20a_from_ch(ch); | ||
489 | if (tsg == NULL) { | ||
490 | nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); | ||
491 | return -EINVAL; | ||
492 | } | ||
493 | |||
494 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, | ||
495 | "chid=%d context_ptr=%x inst_block=%llx", | ||
496 | ch->chid, context_ptr, | ||
497 | nvgpu_inst_block_addr(g, &ch->inst_block)); | ||
498 | |||
499 | tsg = tsg_gk20a_from_ch(ch); | ||
500 | if (!tsg) | ||
501 | return -EINVAL; | ||
502 | |||
503 | ch_ctx = &tsg->gr_ctx; | ||
504 | mem = &ch_ctx->mem; | ||
505 | |||
506 | if (!trace) | ||
507 | return -ENOMEM; | ||
508 | |||
509 | mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; | ||
510 | |||
511 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { | ||
512 | addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA]; | ||
513 | nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr); | ||
514 | aperture_mask = 0; | ||
515 | } else { | ||
516 | addr = nvgpu_inst_block_addr(g, mem); | ||
517 | nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr); | ||
518 | aperture_mask = nvgpu_aperture_mask(g, mem, | ||
519 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), | ||
520 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), | ||
521 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); | ||
522 | } | ||
523 | if (!addr) | ||
524 | return -ENOMEM; | ||
525 | |||
526 | lo = u64_lo32(addr); | ||
527 | hi = u64_hi32(addr); | ||
528 | |||
529 | mem = &ch_ctx->mem; | ||
530 | |||
531 | nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, | ||
532 | lo, GK20A_FECS_TRACE_NUM_RECORDS); | ||
533 | |||
534 | nvgpu_mem_wr(g, mem, | ||
535 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | ||
536 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | ||
537 | GK20A_FECS_TRACE_NUM_RECORDS)); | ||
538 | |||
539 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) | ||
540 | mem = &ch->ctx_header; | ||
541 | |||
542 | nvgpu_mem_wr(g, mem, | ||
543 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), | ||
544 | lo); | ||
545 | nvgpu_mem_wr(g, mem, | ||
546 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), | ||
547 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | | ||
548 | aperture_mask); | ||
549 | |||
550 | /* pid (process identifier) in user space, corresponds to tgid (thread | ||
551 | * group id) in kernel space. | ||
552 | */ | ||
553 | gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid); | ||
554 | |||
555 | return 0; | ||
556 | } | ||
557 | |||
558 | int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) | ||
559 | { | ||
560 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | ||
561 | |||
562 | if (g->fecs_trace) { | ||
563 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, | ||
564 | "ch=%p context_ptr=%x", ch, context_ptr); | ||
565 | |||
566 | if (g->ops.fecs_trace.is_enabled(g)) { | ||
567 | if (g->ops.fecs_trace.flush) | ||
568 | g->ops.fecs_trace.flush(g); | ||
569 | gk20a_fecs_trace_poll(g); | ||
570 | } | ||
571 | gk20a_fecs_trace_hash_del(g, context_ptr); | ||
572 | } | ||
573 | return 0; | ||
574 | } | ||
575 | |||
576 | int gk20a_fecs_trace_reset(struct gk20a *g) | ||
577 | { | ||
578 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
579 | |||
580 | if (!g->ops.fecs_trace.is_enabled(g)) | ||
581 | return 0; | ||
582 | |||
583 | gk20a_fecs_trace_poll(g); | ||
584 | return gk20a_fecs_trace_set_read_index(g, 0); | ||
585 | } | ||
586 | |||
587 | int gk20a_fecs_trace_deinit(struct gk20a *g) | ||
588 | { | ||
589 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
590 | |||
591 | if (!trace->init) | ||
592 | return 0; | ||
593 | |||
594 | /* | ||
595 | * Check if tracer was enabled before attempting to stop the | ||
596 | * tracer thread. | ||
597 | */ | ||
598 | if (trace->enable_count > 0) { | ||
599 | nvgpu_thread_stop(&trace->poll_task); | ||
600 | } | ||
601 | gk20a_fecs_trace_free_hash_table(g); | ||
602 | |||
603 | nvgpu_mutex_destroy(&g->fecs_trace->hash_lock); | ||
604 | nvgpu_mutex_destroy(&g->fecs_trace->poll_lock); | ||
605 | nvgpu_mutex_destroy(&g->fecs_trace->enable_lock); | ||
606 | |||
607 | nvgpu_kfree(g, g->fecs_trace); | ||
608 | g->fecs_trace = NULL; | ||
609 | return 0; | ||
610 | } | ||
611 | |||
612 | int gk20a_gr_max_entries(struct gk20a *g, | ||
613 | struct nvgpu_gpu_ctxsw_trace_filter *filter) | ||
614 | { | ||
615 | int n; | ||
616 | int tag; | ||
617 | |||
618 | /* Compute number of entries per record, with given filter */ | ||
619 | for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++) | ||
620 | n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0); | ||
621 | |||
622 | /* Return max number of entries generated for the whole ring */ | ||
623 | return n * GK20A_FECS_TRACE_NUM_RECORDS; | ||
624 | } | ||
625 | |||
626 | int gk20a_fecs_trace_enable(struct gk20a *g) | ||
627 | { | ||
628 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
629 | int write; | ||
630 | int err = 0; | ||
631 | |||
632 | if (!trace) | ||
633 | return -EINVAL; | ||
634 | |||
635 | nvgpu_mutex_acquire(&trace->enable_lock); | ||
636 | trace->enable_count++; | ||
637 | |||
638 | if (trace->enable_count == 1U) { | ||
639 | /* drop data in hw buffer */ | ||
640 | if (g->ops.fecs_trace.flush) | ||
641 | g->ops.fecs_trace.flush(g); | ||
642 | |||
643 | write = gk20a_fecs_trace_get_write_index(g); | ||
644 | |||
645 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { | ||
646 | /* | ||
647 | * For enabling FECS trace support, MAILBOX1's MSB | ||
648 | * (Bit 31:31) should be set to 1. Bits 30:0 represents | ||
649 | * actual pointer value. | ||
650 | */ | ||
651 | write = write | | ||
652 | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)); | ||
653 | } | ||
654 | gk20a_fecs_trace_set_read_index(g, write); | ||
655 | |||
656 | /* | ||
657 | * FECS ucode does a priv holdoff around the assertion of | ||
658 | * context reset. So, pri transactions (e.g. mailbox1 register | ||
659 | * write) might fail due to this. Hence, do write with ack | ||
660 | * i.e. write and read it back to make sure write happened for | ||
661 | * mailbox1. | ||
662 | */ | ||
663 | while (gk20a_fecs_trace_get_read_index(g) != write) { | ||
664 | nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed"); | ||
665 | gk20a_fecs_trace_set_read_index(g, write); | ||
666 | } | ||
667 | |||
668 | err = nvgpu_thread_create(&trace->poll_task, g, | ||
669 | gk20a_fecs_trace_periodic_polling, __func__); | ||
670 | if (err) { | ||
671 | nvgpu_warn(g, | ||
672 | "failed to create FECS polling task"); | ||
673 | goto done; | ||
674 | } | ||
675 | } | ||
676 | |||
677 | done: | ||
678 | nvgpu_mutex_release(&trace->enable_lock); | ||
679 | return err; | ||
680 | } | ||
681 | |||
682 | int gk20a_fecs_trace_disable(struct gk20a *g) | ||
683 | { | ||
684 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
685 | int read = 0; | ||
686 | |||
687 | if (trace == NULL) { | ||
688 | return -EINVAL; | ||
689 | } | ||
690 | |||
691 | nvgpu_mutex_acquire(&trace->enable_lock); | ||
692 | if (trace->enable_count <= 0U) { | ||
693 | nvgpu_mutex_release(&trace->enable_lock); | ||
694 | return 0; | ||
695 | } | ||
696 | trace->enable_count--; | ||
697 | if (trace->enable_count == 0U) { | ||
698 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { | ||
699 | /* | ||
700 | * For disabling FECS trace support, MAILBOX1's MSB | ||
701 | * (Bit 31:31) should be set to 0. | ||
702 | */ | ||
703 | read = gk20a_fecs_trace_get_read_index(g) & | ||
704 | (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT))); | ||
705 | |||
706 | gk20a_fecs_trace_set_read_index(g, read); | ||
707 | |||
708 | /* | ||
709 | * FECS ucode does a priv holdoff around the assertion | ||
710 | * of context reset. So, pri transactions (e.g. | ||
711 | * mailbox1 register write) might fail due to this. | ||
712 | * Hence, do write with ack i.e. write and read it back | ||
713 | * to make sure write happened for mailbox1. | ||
714 | */ | ||
715 | while (gk20a_fecs_trace_get_read_index(g) != read) { | ||
716 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
717 | "mailbox1 update failed"); | ||
718 | gk20a_fecs_trace_set_read_index(g, read); | ||
719 | } | ||
720 | } | ||
721 | |||
722 | nvgpu_thread_stop(&trace->poll_task); | ||
723 | |||
724 | } | ||
725 | nvgpu_mutex_release(&trace->enable_lock); | ||
726 | |||
727 | return -EPERM; | ||
728 | } | ||
729 | |||
730 | bool gk20a_fecs_trace_is_enabled(struct gk20a *g) | ||
731 | { | ||
732 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
733 | |||
734 | return (trace && nvgpu_thread_is_running(&trace->poll_task)); | ||
735 | } | ||
736 | |||
737 | void gk20a_fecs_trace_reset_buffer(struct gk20a *g) | ||
738 | { | ||
739 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
740 | |||
741 | gk20a_fecs_trace_set_read_index(g, | ||
742 | gk20a_fecs_trace_get_write_index(g)); | ||
743 | } | ||
744 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ | ||