diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 102 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 42 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 27 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/enabled.h | 1 |
8 files changed, 119 insertions, 62 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index c9d7ea06..117920da 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #include <nvgpu/kmem.h> | 29 | #include <nvgpu/kmem.h> |
30 | #include <nvgpu/dma.h> | 30 | #include <nvgpu/dma.h> |
31 | #include <nvgpu/enabled.h> | ||
31 | #include <nvgpu/bug.h> | 32 | #include <nvgpu/bug.h> |
32 | #include <nvgpu/hashtable.h> | 33 | #include <nvgpu/hashtable.h> |
33 | #include <nvgpu/circ_buf.h> | 34 | #include <nvgpu/circ_buf.h> |
@@ -51,7 +52,7 @@ | |||
51 | * If HW circular buffer is getting too many "buffer full" conditions, | 52 | * If HW circular buffer is getting too many "buffer full" conditions, |
52 | * increasing this constant should help (it drives Linux' internal buffer size). | 53 | * increasing this constant should help (it drives Linux' internal buffer size). |
53 | */ | 54 | */ |
54 | #define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6) | 55 | #define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10) |
55 | #define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ | 56 | #define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ |
56 | #define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) | 57 | #define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) |
57 | #define GK20A_FECS_TRACE_PTIMER_SHIFT 5 | 58 | #define GK20A_FECS_TRACE_PTIMER_SHIFT 5 |
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent { | |||
74 | 75 | ||
75 | struct gk20a_fecs_trace { | 76 | struct gk20a_fecs_trace { |
76 | 77 | ||
77 | struct nvgpu_mem trace_buf; | ||
78 | DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); | 78 | DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); |
79 | struct nvgpu_mutex hash_lock; | 79 | struct nvgpu_mutex hash_lock; |
80 | struct nvgpu_mutex poll_lock; | 80 | struct nvgpu_mutex poll_lock; |
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void) | |||
106 | } | 106 | } |
107 | 107 | ||
108 | static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( | 108 | static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( |
109 | struct gk20a_fecs_trace *trace, int idx) | 109 | struct gk20a *g, int idx) |
110 | { | 110 | { |
111 | struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; | ||
112 | |||
111 | return (struct gk20a_fecs_trace_record *) | 113 | return (struct gk20a_fecs_trace_record *) |
112 | ((u8 *) trace->trace_buf.cpu_va | 114 | ((u8 *) mem->cpu_va |
113 | + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); | 115 | + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); |
114 | } | 116 | } |
115 | 117 | ||
@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) | |||
258 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 260 | struct gk20a_fecs_trace *trace = g->fecs_trace; |
259 | pid_t cur_pid; | 261 | pid_t cur_pid; |
260 | pid_t new_pid; | 262 | pid_t new_pid; |
263 | int count = 0; | ||
261 | 264 | ||
262 | /* for now, only one VM */ | 265 | /* for now, only one VM */ |
263 | const int vmid = 0; | 266 | const int vmid = 0; |
264 | 267 | ||
265 | struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record( | 268 | struct gk20a_fecs_trace_record *r = |
266 | trace, index); | 269 | gk20a_fecs_trace_get_record(g, index); |
267 | 270 | ||
268 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | 271 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, |
269 | "consuming record trace=%p read=%d record=%p", trace, index, r); | 272 | "consuming record trace=%p read=%d record=%p", trace, index, r); |
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) | |||
334 | continue; | 337 | continue; |
335 | 338 | ||
336 | gk20a_ctxsw_trace_write(g, &entry); | 339 | gk20a_ctxsw_trace_write(g, &entry); |
340 | count++; | ||
337 | } | 341 | } |
338 | 342 | ||
339 | gk20a_ctxsw_trace_wake_up(g, vmid); | 343 | gk20a_ctxsw_trace_wake_up(g, vmid); |
340 | return 0; | 344 | return count; |
341 | } | 345 | } |
342 | 346 | ||
343 | int gk20a_fecs_trace_poll(struct gk20a *g) | 347 | int gk20a_fecs_trace_poll(struct gk20a *g) |
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g) | |||
376 | g->ops.mm.fb_flush(g); | 380 | g->ops.mm.fb_flush(g); |
377 | 381 | ||
378 | while (read != write) { | 382 | while (read != write) { |
379 | /* Ignore error code, as we want to consume all records */ | 383 | cnt = gk20a_fecs_trace_ring_read(g, read); |
380 | (void)gk20a_fecs_trace_ring_read(g, read); | 384 | if (cnt <= 0) |
385 | break; | ||
381 | 386 | ||
382 | /* Get to next record. */ | 387 | /* Get to next record. */ |
383 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); | 388 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); |
384 | } | 389 | } |
385 | 390 | ||
386 | /* ensure FECS records has been updated before incrementing read index */ | 391 | /* ensure FECS records has been updated before incrementing read index */ |
387 | nvgpu_smp_wmb(); | 392 | nvgpu_wmb(); |
388 | gk20a_fecs_trace_set_read_index(g, read); | 393 | gk20a_fecs_trace_set_read_index(g, read); |
389 | 394 | ||
390 | done: | 395 | done: |
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg) | |||
411 | return 0; | 416 | return 0; |
412 | } | 417 | } |
413 | 418 | ||
414 | static int gk20a_fecs_trace_alloc_ring(struct gk20a *g) | 419 | size_t gk20a_fecs_trace_buffer_size(struct gk20a *g) |
415 | { | 420 | { |
416 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 421 | return GK20A_FECS_TRACE_NUM_RECORDS |
417 | 422 | * ctxsw_prog_record_timestamp_record_size_in_bytes_v(); | |
418 | return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS | ||
419 | * ctxsw_prog_record_timestamp_record_size_in_bytes_v(), | ||
420 | &trace->trace_buf); | ||
421 | } | ||
422 | |||
423 | static void gk20a_fecs_trace_free_ring(struct gk20a *g) | ||
424 | { | ||
425 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
426 | |||
427 | nvgpu_dma_free(g, &trace->trace_buf); | ||
428 | } | 423 | } |
429 | 424 | ||
430 | #ifdef CONFIG_DEBUG_FS | 425 | #ifdef CONFIG_DEBUG_FS |
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show( | |||
460 | { | 455 | { |
461 | loff_t *pos = (loff_t *) v; | 456 | loff_t *pos = (loff_t *) v; |
462 | struct gk20a *g = *(struct gk20a **)s->private; | 457 | struct gk20a *g = *(struct gk20a **)s->private; |
463 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 458 | struct gk20a_fecs_trace_record *r = |
464 | struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos); | 459 | gk20a_fecs_trace_get_record(g, *pos); |
465 | int i; | 460 | int i; |
466 | const u32 invalid_tag = | 461 | const u32 invalid_tag = |
467 | ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); | 462 | ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); |
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g) | |||
588 | goto clean_poll_lock; | 583 | goto clean_poll_lock; |
589 | 584 | ||
590 | BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); | 585 | BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); |
591 | err = gk20a_fecs_trace_alloc_ring(g); | ||
592 | if (err) { | ||
593 | nvgpu_warn(g, "failed to allocate FECS ring"); | ||
594 | goto clean_hash_lock; | ||
595 | } | ||
596 | |||
597 | hash_init(trace->pid_hash_table); | 586 | hash_init(trace->pid_hash_table); |
598 | 587 | ||
599 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | 588 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); |
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g) | |||
604 | 593 | ||
605 | return 0; | 594 | return 0; |
606 | 595 | ||
607 | clean_hash_lock: | ||
608 | nvgpu_mutex_destroy(&trace->hash_lock); | ||
609 | clean_poll_lock: | 596 | clean_poll_lock: |
610 | nvgpu_mutex_destroy(&trace->poll_lock); | 597 | nvgpu_mutex_destroy(&trace->poll_lock); |
611 | clean: | 598 | clean: |
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
624 | 611 | ||
625 | u32 lo; | 612 | u32 lo; |
626 | u32 hi; | 613 | u32 hi; |
627 | u64 pa; | 614 | u64 addr; |
628 | struct tsg_gk20a *tsg; | 615 | struct tsg_gk20a *tsg; |
629 | struct nvgpu_gr_ctx *ch_ctx; | 616 | struct nvgpu_gr_ctx *ch_ctx; |
630 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 617 | struct gk20a_fecs_trace *trace = g->fecs_trace; |
631 | struct nvgpu_mem *mem; | 618 | struct nvgpu_mem *mem; |
632 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | 619 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); |
633 | pid_t pid; | 620 | pid_t pid; |
634 | u32 aperture; | 621 | u32 aperture_mask; |
635 | 622 | ||
636 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, | 623 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, |
637 | "chid=%d context_ptr=%x inst_block=%llx", | 624 | "chid=%d context_ptr=%x inst_block=%llx", |
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
648 | if (!trace) | 635 | if (!trace) |
649 | return -ENOMEM; | 636 | return -ENOMEM; |
650 | 637 | ||
651 | pa = nvgpu_inst_block_addr(g, &trace->trace_buf); | 638 | mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; |
652 | if (!pa) | 639 | |
653 | return -ENOMEM; | 640 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { |
654 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, | 641 | addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA]; |
642 | nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr); | ||
643 | aperture_mask = 0; | ||
644 | } else { | ||
645 | addr = nvgpu_inst_block_addr(g, mem); | ||
646 | nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr); | ||
647 | aperture_mask = nvgpu_aperture_mask(g, mem, | ||
655 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), | 648 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), |
656 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), | 649 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), |
657 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); | 650 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); |
651 | } | ||
652 | if (!addr) | ||
653 | return -ENOMEM; | ||
654 | |||
655 | lo = u64_lo32(addr); | ||
656 | hi = u64_hi32(addr); | ||
657 | |||
658 | mem = &ch_ctx->mem; | ||
658 | 659 | ||
659 | if (nvgpu_mem_begin(g, mem)) | 660 | if (nvgpu_mem_begin(g, mem)) |
660 | return -ENOMEM; | 661 | return -ENOMEM; |
661 | 662 | ||
662 | lo = u64_lo32(pa); | ||
663 | hi = u64_hi32(pa); | ||
664 | |||
665 | nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, | 663 | nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, |
666 | lo, GK20A_FECS_TRACE_NUM_RECORDS); | 664 | lo, GK20A_FECS_TRACE_NUM_RECORDS); |
667 | 665 | ||
668 | nvgpu_mem_wr(g, mem, | 666 | nvgpu_mem_wr(g, mem, |
667 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | ||
668 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | ||
669 | GK20A_FECS_TRACE_NUM_RECORDS)); | ||
670 | |||
671 | nvgpu_mem_end(g, mem); | ||
672 | |||
673 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) | ||
674 | mem = &ch->ctx_header.mem; | ||
675 | |||
676 | if (nvgpu_mem_begin(g, mem)) | ||
677 | return -ENOMEM; | ||
678 | |||
679 | nvgpu_mem_wr(g, mem, | ||
669 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), | 680 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), |
670 | lo); | 681 | lo); |
671 | nvgpu_mem_wr(g, mem, | 682 | nvgpu_mem_wr(g, mem, |
672 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), | 683 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), |
673 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | | 684 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | |
674 | aperture); | 685 | aperture_mask); |
675 | nvgpu_mem_wr(g, mem, | ||
676 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | ||
677 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | ||
678 | GK20A_FECS_TRACE_NUM_RECORDS)); | ||
679 | 686 | ||
680 | nvgpu_mem_end(g, mem); | 687 | nvgpu_mem_end(g, mem); |
681 | 688 | ||
@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g) | |||
728 | return 0; | 735 | return 0; |
729 | 736 | ||
730 | nvgpu_thread_stop(&trace->poll_task); | 737 | nvgpu_thread_stop(&trace->poll_task); |
731 | gk20a_fecs_trace_free_ring(g); | ||
732 | gk20a_fecs_trace_free_hash_table(g); | 738 | gk20a_fecs_trace_free_hash_table(g); |
733 | 739 | ||
734 | nvgpu_mutex_destroy(&g->fecs_trace->hash_lock); | 740 | nvgpu_mutex_destroy(&g->fecs_trace->hash_lock); |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h index e310a18a..acac14c6 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g, | |||
39 | int gk20a_fecs_trace_enable(struct gk20a *g); | 39 | int gk20a_fecs_trace_enable(struct gk20a *g); |
40 | int gk20a_fecs_trace_disable(struct gk20a *g); | 40 | int gk20a_fecs_trace_disable(struct gk20a *g); |
41 | bool gk20a_fecs_trace_is_enabled(struct gk20a *g); | 41 | bool gk20a_fecs_trace_is_enabled(struct gk20a *g); |
42 | size_t gk20a_fecs_trace_buffer_size(struct gk20a *g); | ||
42 | 43 | ||
43 | #endif /* __FECS_TRACE_GK20A_H */ | 44 | #endif /* __FECS_TRACE_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index a082cd92..7c51afca 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -41,6 +41,7 @@ | |||
41 | 41 | ||
42 | #include "gk20a.h" | 42 | #include "gk20a.h" |
43 | #include "gr_gk20a.h" | 43 | #include "gr_gk20a.h" |
44 | #include "gk20a/fecs_trace_gk20a.h" | ||
44 | #include "gr_ctx_gk20a.h" | 45 | #include "gr_ctx_gk20a.h" |
45 | #include "gr_pri_gk20a.h" | 46 | #include "gr_pri_gk20a.h" |
46 | #include "regops_gk20a.h" | 47 | #include "regops_gk20a.h" |
@@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) | |||
2499 | return ret; | 2500 | return ret; |
2500 | } | 2501 | } |
2501 | g->gr.ctx_vars.priv_access_map_size = 512 * 1024; | 2502 | g->gr.ctx_vars.priv_access_map_size = 512 * 1024; |
2503 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
2504 | g->gr.ctx_vars.fecs_trace_buffer_size = | ||
2505 | gk20a_fecs_trace_buffer_size(g); | ||
2506 | #endif | ||
2502 | } | 2507 | } |
2503 | 2508 | ||
2504 | nvgpu_log_fn(g, "done"); | 2509 | nvgpu_log_fn(g, "done"); |
@@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | |||
2630 | if (err) | 2635 | if (err) |
2631 | goto clean_up; | 2636 | goto clean_up; |
2632 | 2637 | ||
2638 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
2639 | nvgpu_log_info(g, "fecs_trace_buffer_size : %d", | ||
2640 | gr->ctx_vars.fecs_trace_buffer_size); | ||
2641 | |||
2642 | err = nvgpu_dma_alloc_sys(g, | ||
2643 | gr->ctx_vars.fecs_trace_buffer_size, | ||
2644 | &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem); | ||
2645 | if (err) | ||
2646 | goto clean_up; | ||
2647 | |||
2648 | gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy = | ||
2649 | gk20a_gr_destroy_ctx_buffer; | ||
2650 | #endif | ||
2651 | |||
2633 | nvgpu_log_fn(g, "done"); | 2652 | nvgpu_log_fn(g, "done"); |
2634 | return 0; | 2653 | return 0; |
2635 | 2654 | ||
@@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2769 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; | 2788 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; |
2770 | 2789 | ||
2771 | tsg->gr_ctx.global_ctx_buffer_mapped = true; | 2790 | tsg->gr_ctx.global_ctx_buffer_mapped = true; |
2791 | |||
2792 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
2793 | /* FECS trace buffer */ | ||
2794 | if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { | ||
2795 | mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem; | ||
2796 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0, | ||
2797 | gk20a_mem_flag_none, true, mem->aperture); | ||
2798 | if (!gpu_va) | ||
2799 | goto clean_up; | ||
2800 | g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va; | ||
2801 | g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size; | ||
2802 | g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER; | ||
2803 | } | ||
2804 | #endif | ||
2805 | |||
2772 | return 0; | 2806 | return 0; |
2773 | 2807 | ||
2774 | clean_up: | 2808 | clean_up: |
@@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
3050 | "fail to commit gr ctx buffer"); | 3084 | "fail to commit gr ctx buffer"); |
3051 | goto out; | 3085 | goto out; |
3052 | } | 3086 | } |
3087 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
3088 | if (g->ops.fecs_trace.bind_channel && !c->vpr) { | ||
3089 | err = g->ops.fecs_trace.bind_channel(g, c); | ||
3090 | if (err) | ||
3091 | nvgpu_warn(g, | ||
3092 | "fail to bind channel for ctxsw trace"); | ||
3093 | } | ||
3094 | #endif | ||
3053 | } | 3095 | } |
3054 | 3096 | ||
3055 | nvgpu_log_fn(g, "done"); | 3097 | nvgpu_log_fn(g, "done"); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 01c7f43d..66d3c22e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ { | |||
79 | ATTRIBUTE_VPR = 5, | 79 | ATTRIBUTE_VPR = 5, |
80 | GOLDEN_CTX = 6, | 80 | GOLDEN_CTX = 6, |
81 | PRIV_ACCESS_MAP = 7, | 81 | PRIV_ACCESS_MAP = 7, |
82 | FECS_TRACE_BUFFER = 8, | ||
82 | NR_GLOBAL_CTX_BUF = 9 | 83 | NR_GLOBAL_CTX_BUF = 9 |
83 | }; | 84 | }; |
84 | 85 | ||
@@ -89,6 +90,7 @@ enum /*global_ctx_buffer_va */ { | |||
89 | ATTRIBUTE_VA = 2, | 90 | ATTRIBUTE_VA = 2, |
90 | GOLDEN_CTX_VA = 3, | 91 | GOLDEN_CTX_VA = 3, |
91 | PRIV_ACCESS_MAP_VA = 4, | 92 | PRIV_ACCESS_MAP_VA = 4, |
93 | FECS_TRACE_BUFFER_VA = 5, | ||
92 | NR_GLOBAL_CTX_BUF_VA = 6 | 94 | NR_GLOBAL_CTX_BUF_VA = 6 |
93 | }; | 95 | }; |
94 | 96 | ||
@@ -290,6 +292,8 @@ struct gr_gk20a { | |||
290 | 292 | ||
291 | u32 priv_access_map_size; | 293 | u32 priv_access_map_size; |
292 | 294 | ||
295 | u32 fecs_trace_buffer_size; | ||
296 | |||
293 | struct gr_ucode_gk20a ucode; | 297 | struct gr_ucode_gk20a ucode; |
294 | 298 | ||
295 | struct av_list_gk20a sw_bundle_init; | 299 | struct av_list_gk20a sw_bundle_init; |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 61c6cb0f..632f1063 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g) | |||
834 | __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true); | 834 | __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true); |
835 | __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); | 835 | __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); |
836 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); | 836 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); |
837 | __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false); | ||
837 | 838 | ||
838 | /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ | 839 | /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ |
839 | if (gops->fuse.check_priv_security(g)) | 840 | if (gops->fuse.check_priv_security(g)) |
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 62164d16..80e07b78 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c | |||
@@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g) | |||
732 | 732 | ||
733 | __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true); | 733 | __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true); |
734 | __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false); | 734 | __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false); |
735 | __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false); | ||
735 | 736 | ||
736 | /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ | 737 | /* Read fuses to check if gpu needs to boot in secure/non-secure mode */ |
737 | if (gops->fuse.check_priv_security(g)) | 738 | if (gops->fuse.check_priv_security(g)) |
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index f9ac1f2a..9d7dca95 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = { | |||
586 | }, | 586 | }, |
587 | #ifdef CONFIG_GK20A_CTXSW_TRACE | 587 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
588 | .fecs_trace = { | 588 | .fecs_trace = { |
589 | .alloc_user_buffer = NULL, | 589 | .alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc, |
590 | .free_user_buffer = NULL, | 590 | .free_user_buffer = gk20a_ctxsw_dev_ring_free, |
591 | .mmap_user_buffer = NULL, | 591 | .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer, |
592 | .init = NULL, | 592 | .init = gk20a_fecs_trace_init, |
593 | .deinit = NULL, | 593 | .deinit = gk20a_fecs_trace_deinit, |
594 | .enable = NULL, | 594 | .enable = gk20a_fecs_trace_enable, |
595 | .disable = NULL, | 595 | .disable = gk20a_fecs_trace_disable, |
596 | .is_enabled = NULL, | 596 | .is_enabled = gk20a_fecs_trace_is_enabled, |
597 | .reset = NULL, | 597 | .reset = gk20a_fecs_trace_reset, |
598 | .flush = NULL, | 598 | .flush = NULL, |
599 | .poll = NULL, | 599 | .poll = gk20a_fecs_trace_poll, |
600 | .bind_channel = NULL, | 600 | .bind_channel = gk20a_fecs_trace_bind_channel, |
601 | .unbind_channel = NULL, | 601 | .unbind_channel = gk20a_fecs_trace_unbind_channel, |
602 | .max_entries = NULL, | 602 | .max_entries = gk20a_gr_max_entries, |
603 | }, | 603 | }, |
604 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ | 604 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ |
605 | .mm = { | 605 | .mm = { |
@@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g) | |||
843 | } | 843 | } |
844 | 844 | ||
845 | __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); | 845 | __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); |
846 | __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true); | ||
846 | g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; | 847 | g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; |
847 | 848 | ||
848 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); | 849 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false); |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 0ffb0488..c352488c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h | |||
@@ -34,6 +34,7 @@ struct gk20a; | |||
34 | #define NVGPU_IS_FMODEL 1 | 34 | #define NVGPU_IS_FMODEL 1 |
35 | #define NVGPU_DRIVER_IS_DYING 2 | 35 | #define NVGPU_DRIVER_IS_DYING 2 |
36 | #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 | 36 | #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 |
37 | #define NVGPU_FECS_TRACE_VA 4 | ||
37 | 38 | ||
38 | /* | 39 | /* |
39 | * ECC flags | 40 | * ECC flags |