diff options
author | Nitin Kumbhar <nkumbhar@nvidia.com> | 2018-09-04 06:49:47 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-09-09 20:22:24 -0400 |
commit | e93a4ca50b6b24d3db1f8fdc0e5030fecb5ea8d2 (patch) | |
tree | 204853d4398aaab0a5d69bbc4426e7f78d1753a7 /drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |
parent | b2ba12ed55e9b7079b4216d091003ad6f49a4433 (diff) |
gpu: nvgpu: move fecs trace debugfs to linux
Add fecs trace debugfs initialization as an os op. The
debugfs nodes are set up for gpu versions which call
gk20a_fecs_trace_init().
JIRA NVGPU-602
Change-Id: I606ec31acbf04f633500be4c342db32f3f537794
Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1812449
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 188 |
1 files changed, 13 insertions, 175 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 3134df4d..b30d1743 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -20,10 +20,6 @@ | |||
20 | * DEALINGS IN THE SOFTWARE. | 20 | * DEALINGS IN THE SOFTWARE. |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #ifdef CONFIG_DEBUG_FS | ||
24 | #include <linux/debugfs.h> | ||
25 | #endif | ||
26 | |||
27 | #include <nvgpu/kmem.h> | 23 | #include <nvgpu/kmem.h> |
28 | #include <nvgpu/dma.h> | 24 | #include <nvgpu/dma.h> |
29 | #include <nvgpu/enabled.h> | 25 | #include <nvgpu/enabled.h> |
@@ -43,32 +39,13 @@ | |||
43 | #include "fecs_trace_gk20a.h" | 39 | #include "fecs_trace_gk20a.h" |
44 | #include "gk20a.h" | 40 | #include "gk20a.h" |
45 | #include "gr_gk20a.h" | 41 | #include "gr_gk20a.h" |
46 | #include "os/linux/os_linux.h" | ||
47 | 42 | ||
48 | #include <nvgpu/log.h> | 43 | #include <nvgpu/log.h> |
44 | #include <nvgpu/fecs_trace.h> | ||
49 | 45 | ||
50 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | 46 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> |
51 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 47 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
52 | 48 | ||
53 | /* | ||
54 | * If HW circular buffer is getting too many "buffer full" conditions, | ||
55 | * increasing this constant should help (it drives Linux' internal buffer size). | ||
56 | */ | ||
57 | #define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10) | ||
58 | #define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ | ||
59 | #define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) | ||
60 | #define GK20A_FECS_TRACE_PTIMER_SHIFT 5 | ||
61 | |||
62 | struct gk20a_fecs_trace_record { | ||
63 | u32 magic_lo; | ||
64 | u32 magic_hi; | ||
65 | u32 context_id; | ||
66 | u32 context_ptr; | ||
67 | u32 new_context_id; | ||
68 | u32 new_context_ptr; | ||
69 | u64 ts[]; | ||
70 | }; | ||
71 | |||
72 | struct gk20a_fecs_trace_hash_ent { | 49 | struct gk20a_fecs_trace_hash_ent { |
73 | u32 context_ptr; | 50 | u32 context_ptr; |
74 | pid_t pid; | 51 | pid_t pid; |
@@ -85,29 +62,33 @@ struct gk20a_fecs_trace { | |||
85 | }; | 62 | }; |
86 | 63 | ||
87 | #ifdef CONFIG_GK20A_CTXSW_TRACE | 64 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
88 | static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts) | 65 | u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void) |
66 | { | ||
67 | return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); | ||
68 | } | ||
69 | |||
70 | u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts) | ||
89 | { | 71 | { |
90 | return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); | 72 | return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); |
91 | } | 73 | } |
92 | 74 | ||
93 | static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) | 75 | u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) |
94 | { | 76 | { |
95 | return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); | 77 | return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); |
96 | } | 78 | } |
97 | 79 | ||
98 | |||
99 | static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) | 80 | static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) |
100 | { | 81 | { |
101 | return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); | 82 | return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); |
102 | } | 83 | } |
103 | 84 | ||
104 | static inline int gk20a_fecs_trace_num_ts(void) | 85 | int gk20a_fecs_trace_num_ts(void) |
105 | { | 86 | { |
106 | return (ctxsw_prog_record_timestamp_record_size_in_bytes_v() | 87 | return (ctxsw_prog_record_timestamp_record_size_in_bytes_v() |
107 | - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64); | 88 | - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64); |
108 | } | 89 | } |
109 | 90 | ||
110 | static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( | 91 | struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( |
111 | struct gk20a *g, int idx) | 92 | struct gk20a *g, int idx) |
112 | { | 93 | { |
113 | struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; | 94 | struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; |
@@ -117,7 +98,7 @@ static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( | |||
117 | + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); | 98 | + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); |
118 | } | 99 | } |
119 | 100 | ||
120 | static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) | 101 | bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) |
121 | { | 102 | { |
122 | /* | 103 | /* |
123 | * testing magic_hi should suffice. magic_lo is sometimes used | 104 | * testing magic_hi should suffice. magic_lo is sometimes used |
@@ -127,13 +108,13 @@ static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) | |||
127 | == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v()); | 108 | == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v()); |
128 | } | 109 | } |
129 | 110 | ||
130 | static int gk20a_fecs_trace_get_read_index(struct gk20a *g) | 111 | int gk20a_fecs_trace_get_read_index(struct gk20a *g) |
131 | { | 112 | { |
132 | return gr_gk20a_elpg_protected_call(g, | 113 | return gr_gk20a_elpg_protected_call(g, |
133 | gk20a_readl(g, gr_fecs_mailbox1_r())); | 114 | gk20a_readl(g, gr_fecs_mailbox1_r())); |
134 | } | 115 | } |
135 | 116 | ||
136 | static int gk20a_fecs_trace_get_write_index(struct gk20a *g) | 117 | int gk20a_fecs_trace_get_write_index(struct gk20a *g) |
137 | { | 118 | { |
138 | return gr_gk20a_elpg_protected_call(g, | 119 | return gr_gk20a_elpg_protected_call(g, |
139 | gk20a_readl(g, gr_fecs_mailbox0_r())); | 120 | gk20a_readl(g, gr_fecs_mailbox0_r())); |
@@ -424,147 +405,6 @@ size_t gk20a_fecs_trace_buffer_size(struct gk20a *g) | |||
424 | * ctxsw_prog_record_timestamp_record_size_in_bytes_v(); | 405 | * ctxsw_prog_record_timestamp_record_size_in_bytes_v(); |
425 | } | 406 | } |
426 | 407 | ||
427 | #ifdef CONFIG_DEBUG_FS | ||
428 | /* | ||
429 | * The sequence iterator functions. We simply use the count of the | ||
430 | * next line as our internal position. | ||
431 | */ | ||
432 | static void *gk20a_fecs_trace_debugfs_ring_seq_start( | ||
433 | struct seq_file *s, loff_t *pos) | ||
434 | { | ||
435 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
436 | return NULL; | ||
437 | |||
438 | return pos; | ||
439 | } | ||
440 | |||
441 | static void *gk20a_fecs_trace_debugfs_ring_seq_next( | ||
442 | struct seq_file *s, void *v, loff_t *pos) | ||
443 | { | ||
444 | ++(*pos); | ||
445 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
446 | return NULL; | ||
447 | return pos; | ||
448 | } | ||
449 | |||
450 | static void gk20a_fecs_trace_debugfs_ring_seq_stop( | ||
451 | struct seq_file *s, void *v) | ||
452 | { | ||
453 | } | ||
454 | |||
455 | static int gk20a_fecs_trace_debugfs_ring_seq_show( | ||
456 | struct seq_file *s, void *v) | ||
457 | { | ||
458 | loff_t *pos = (loff_t *) v; | ||
459 | struct gk20a *g = *(struct gk20a **)s->private; | ||
460 | struct gk20a_fecs_trace_record *r = | ||
461 | gk20a_fecs_trace_get_record(g, *pos); | ||
462 | int i; | ||
463 | const u32 invalid_tag = | ||
464 | ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); | ||
465 | u32 tag; | ||
466 | u64 timestamp; | ||
467 | |||
468 | seq_printf(s, "record #%lld (%p)\n", *pos, r); | ||
469 | seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo); | ||
470 | seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi); | ||
471 | if (gk20a_fecs_trace_is_valid_record(r)) { | ||
472 | seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr); | ||
473 | seq_printf(s, "\tcontext_id=%08x\n", r->context_id); | ||
474 | seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr); | ||
475 | seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id); | ||
476 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
477 | tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
478 | if (tag == invalid_tag) | ||
479 | continue; | ||
480 | timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
481 | timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
482 | seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp); | ||
483 | } | ||
484 | } | ||
485 | return 0; | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * Tie them all together into a set of seq_operations. | ||
490 | */ | ||
491 | static const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = { | ||
492 | .start = gk20a_fecs_trace_debugfs_ring_seq_start, | ||
493 | .next = gk20a_fecs_trace_debugfs_ring_seq_next, | ||
494 | .stop = gk20a_fecs_trace_debugfs_ring_seq_stop, | ||
495 | .show = gk20a_fecs_trace_debugfs_ring_seq_show | ||
496 | }; | ||
497 | |||
498 | /* | ||
499 | * Time to set up the file operations for our /proc file. In this case, | ||
500 | * all we need is an open function which sets up the sequence ops. | ||
501 | */ | ||
502 | |||
503 | static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode, | ||
504 | struct file *file) | ||
505 | { | ||
506 | struct gk20a **p; | ||
507 | |||
508 | if (!capable(CAP_SYS_ADMIN)) | ||
509 | return -EPERM; | ||
510 | |||
511 | p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops, | ||
512 | sizeof(struct gk20a *)); | ||
513 | if (!p) | ||
514 | return -ENOMEM; | ||
515 | |||
516 | *p = (struct gk20a *)inode->i_private; | ||
517 | return 0; | ||
518 | }; | ||
519 | |||
520 | /* | ||
521 | * The file operations structure contains our open function along with | ||
522 | * set of the canned seq_ ops. | ||
523 | */ | ||
524 | static const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = { | ||
525 | .owner = THIS_MODULE, | ||
526 | .open = gk20a_ctxsw_debugfs_ring_open, | ||
527 | .read = seq_read, | ||
528 | .llseek = seq_lseek, | ||
529 | .release = seq_release_private | ||
530 | }; | ||
531 | |||
532 | static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val) | ||
533 | { | ||
534 | *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg); | ||
535 | return 0; | ||
536 | } | ||
537 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops, | ||
538 | gk20a_fecs_trace_debugfs_read, NULL, "%llu\n"); | ||
539 | |||
540 | static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val) | ||
541 | { | ||
542 | *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg); | ||
543 | return 0; | ||
544 | } | ||
545 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops, | ||
546 | gk20a_fecs_trace_debugfs_write, NULL, "%llu\n"); | ||
547 | |||
548 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | ||
549 | { | ||
550 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
551 | |||
552 | debugfs_create_file("ctxsw_trace_read", 0600, l->debugfs, g, | ||
553 | &gk20a_fecs_trace_debugfs_read_fops); | ||
554 | debugfs_create_file("ctxsw_trace_write", 0600, l->debugfs, g, | ||
555 | &gk20a_fecs_trace_debugfs_write_fops); | ||
556 | debugfs_create_file("ctxsw_trace_ring", 0600, l->debugfs, g, | ||
557 | &gk20a_fecs_trace_debugfs_ring_fops); | ||
558 | } | ||
559 | |||
560 | #else | ||
561 | |||
562 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | ||
563 | { | ||
564 | } | ||
565 | |||
566 | #endif /* CONFIG_DEBUG_FS */ | ||
567 | |||
568 | int gk20a_fecs_trace_init(struct gk20a *g) | 408 | int gk20a_fecs_trace_init(struct gk20a *g) |
569 | { | 409 | { |
570 | struct gk20a_fecs_trace *trace; | 410 | struct gk20a_fecs_trace *trace; |
@@ -589,8 +429,6 @@ int gk20a_fecs_trace_init(struct gk20a *g) | |||
589 | 429 | ||
590 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | 430 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); |
591 | 431 | ||
592 | gk20a_fecs_trace_debugfs_init(g); | ||
593 | |||
594 | trace->init = true; | 432 | trace->init = true; |
595 | 433 | ||
596 | return 0; | 434 | return 0; |