summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-10-25 12:56:09 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-29 14:02:15 -0400
commit4d2d890c01b94d10ad55643a4c2c159a98419efe (patch)
tree1043316e5e0dc163da79ae17c089b7818dc54d4b
parenta681c505c96dba58231ac050e4c8f4f81d79540f (diff)
gpu: nvgpu: Move ctxsw_trace_gk20a.c to common/linux
Migrate ctxsw_trace_gk20a.c to common/linux/ctxsw_trace.c. This has been done becasue the ctxsw tracing code is currently too tightly tied to the Linux OS due to usage of a couple system calls: - poll() - mmap() And general Linux driver framework code. As a result pulling the logic out of the FECS tracing code is simply too large a scope for time time being. Instead the code was just copied as much as possible. The HAL ops for the FECS code was hidden behind the FECS tracing config so that the vm_area_struct is not used when QNX does not define said config. All other non-HAL functions called by the FECS ctxsw tracing code ha now also been hidden by this config. This is not pretty but for the time being it seems like the way to go. JIRA NVGPU-287 Change-Id: Ib880ab237f4abd330dc66998692c86c4507149c2 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586547 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/Makefile4
-rw-r--r--drivers/gpu/nvgpu/common/linux/ctxsw_trace.c (renamed from drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c)14
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h18
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h14
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c2
11 files changed, 64 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index af7a8af5..f1a6f267 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -105,7 +105,6 @@ nvgpu-y := \
105 gk20a/fb_gk20a.o \ 105 gk20a/fb_gk20a.o \
106 gk20a/hal.o \ 106 gk20a/hal.o \
107 gk20a/tsg_gk20a.o \ 107 gk20a/tsg_gk20a.o \
108 gk20a/ctxsw_trace_gk20a.o \
109 gk20a/fecs_trace_gk20a.o \ 108 gk20a/fecs_trace_gk20a.o \
110 gk20a/mc_gk20a.o \ 109 gk20a/mc_gk20a.o \
111 gk20a/sim_gk20a.o \ 110 gk20a/sim_gk20a.o \
@@ -152,6 +151,9 @@ nvgpu-$(CONFIG_DEBUG_FS) += \
152 common/linux/debug_kmem.o 151 common/linux/debug_kmem.o
153endif 152endif
154 153
154nvgpu-$(CONFIG_GK20A_CTXSW_TRACE) += \
155 common/linux/ctxsw_trace.o
156
155nvgpu-$(CONFIG_TEGRA_GK20A) += common/linux/platform_gk20a_tegra.o 157nvgpu-$(CONFIG_TEGRA_GK20A) += common/linux/platform_gk20a_tegra.o
156nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o 158nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
157nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o 159nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
index fb33de23..81a54b7e 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
@@ -28,18 +28,18 @@
28#include <trace/events/gk20a.h> 28#include <trace/events/gk20a.h>
29#include <uapi/linux/nvgpu.h> 29#include <uapi/linux/nvgpu.h>
30 30
31#include <nvgpu/kmem.h> 31#include "gk20a/gk20a.h"
32 32#include "gk20a/gr_gk20a.h"
33#include "ctxsw_trace_gk20a.h" 33#include "gk20a/ctxsw_trace_gk20a.h"
34#include "gk20a.h" 34#include "gk20a/platform_gk20a.h"
35#include "platform_gk20a.h"
36#include "gr_gk20a.h"
37#include "common/linux/os_linux.h"
38 35
36#include <nvgpu/kmem.h>
39#include <nvgpu/log.h> 37#include <nvgpu/log.h>
40#include <nvgpu/atomic.h> 38#include <nvgpu/atomic.h>
41#include <nvgpu/barrier.h> 39#include <nvgpu/barrier.h>
42 40
41#include "os_linux.h"
42
43#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> 43#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 44#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
45 45
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 68ae1a66..fb5d3614 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -1104,7 +1104,9 @@ int nvgpu_remove(struct device *dev, struct class *class)
1104 if (platform->has_cde) 1104 if (platform->has_cde)
1105 gk20a_cde_destroy(l); 1105 gk20a_cde_destroy(l);
1106 1106
1107#ifdef CONFIG_GK20A_CTXSW_TRACE
1107 gk20a_ctxsw_trace_cleanup(g); 1108 gk20a_ctxsw_trace_cleanup(g);
1109#endif
1108 1110
1109 gk20a_sched_ctrl_cleanup(g); 1111 gk20a_sched_ctrl_cleanup(g);
1110 1112
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0d011b06..546f4164 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -553,8 +553,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
553 gk20a_dbg_info("freeing bound channel context, timeout=%ld", 553 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
554 timeout); 554 timeout);
555 555
556#ifdef CONFIG_GK20A_CTXSW_TRACE
556 if (g->ops.fecs_trace.unbind_channel && !ch->vpr) 557 if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
557 g->ops.fecs_trace.unbind_channel(g, ch); 558 g->ops.fecs_trace.unbind_channel(g, ch);
559#endif
558 560
559 /* release channel ctx */ 561 /* release channel ctx */
560 g->ops.gr.free_channel_ctx(ch, was_tsg); 562 g->ops.gr.free_channel_ctx(ch, was_tsg);
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
index b270581b..dddb8603 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
@@ -23,6 +23,8 @@
23#ifndef __CTXSW_TRACE_GK20A_H 23#ifndef __CTXSW_TRACE_GK20A_H
24#define __CTXSW_TRACE_GK20A_H 24#define __CTXSW_TRACE_GK20A_H
25 25
26#include <nvgpu/types.h>
27
26#define GK20A_CTXSW_TRACE_NUM_DEVS 1 28#define GK20A_CTXSW_TRACE_NUM_DEVS 1
27 29
28struct file; 30struct file;
@@ -41,20 +43,22 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
41int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); 43int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
42long gk20a_ctxsw_dev_ioctl(struct file *filp, 44long gk20a_ctxsw_dev_ioctl(struct file *filp,
43 unsigned int cmd, unsigned long arg); 45 unsigned int cmd, unsigned long arg);
44ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); 46ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
45unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); 47 size_t size, loff_t *offs);
46int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); 48unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
49 struct poll_table_struct *pts);
50int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma);
47int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size); 51int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size);
48int gk20a_ctxsw_dev_ring_free(struct gk20a *g); 52int gk20a_ctxsw_dev_ring_free(struct gk20a *g);
49int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma); 53int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma);
50 54
51int gk20a_ctxsw_trace_init(struct gk20a *); 55int gk20a_ctxsw_trace_init(struct gk20a *g);
52void gk20a_ctxsw_trace_cleanup(struct gk20a *); 56void gk20a_ctxsw_trace_cleanup(struct gk20a *g);
53int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); 57int gk20a_ctxsw_trace_write(struct gk20a *g,
58 struct nvgpu_ctxsw_trace_entry *entry);
54void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); 59void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid);
55 60
56void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch); 61void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch);
57void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg); 62void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg);
58 63
59
60#endif /* __CTXSW_TRACE_GK20A_H */ 64#endif /* __CTXSW_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index d10af9e9..17ae626b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1228,16 +1228,24 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
1228 if (nvgpu_pmu_disable_elpg(g)) 1228 if (nvgpu_pmu_disable_elpg(g))
1229 nvgpu_err(g, "failed to set disable elpg"); 1229 nvgpu_err(g, "failed to set disable elpg");
1230 } 1230 }
1231 /* resetting engine will alter read/write index. 1231
1232 * need to flush circular buffer before re-enabling FECS. 1232#ifdef CONFIG_GK20A_CTXSW_TRACE
1233 /*
1234 * Resetting engine will alter read/write index. Need to flush
1235 * circular buffer before re-enabling FECS.
1233 */ 1236 */
1234 if (g->ops.fecs_trace.reset) 1237 if (g->ops.fecs_trace.reset)
1235 g->ops.fecs_trace.reset(g); 1238 g->ops.fecs_trace.reset(g);
1236 /*HALT_PIPELINE method, halt GR engine*/ 1239#endif
1240
1241 /* HALT_PIPELINE method, halt GR engine. */
1237 if (gr_gk20a_halt_pipe(g)) 1242 if (gr_gk20a_halt_pipe(g))
1238 nvgpu_err(g, "failed to HALT gr pipe"); 1243 nvgpu_err(g, "failed to HALT gr pipe");
1239 /* resetting engine using mc_enable_r() is not 1244
1240 enough, we do full init sequence */ 1245 /*
1246 * Resetting engine using mc_enable_r() is not enough; we must
1247 * do full init sequence.
1248 */
1241 gk20a_gr_reset(g); 1249 gk20a_gr_reset(g);
1242 if (g->support_pmu && g->can_elpg) 1250 if (g->support_pmu && g->can_elpg)
1243 nvgpu_pmu_enable_elpg(g); 1251 nvgpu_pmu_enable_elpg(g);
@@ -1618,6 +1626,8 @@ static bool gk20a_fifo_handle_mmu_fault(
1618 } 1626 }
1619 } 1627 }
1620 } 1628 }
1629
1630#ifdef CONFIG_GK20A_CTXSW_TRACE
1621 /* 1631 /*
1622 * For non fake mmu fault, both tsg and ch pointers 1632 * For non fake mmu fault, both tsg and ch pointers
1623 * could be valid. Check tsg first. 1633 * could be valid. Check tsg first.
@@ -1626,10 +1636,11 @@ static bool gk20a_fifo_handle_mmu_fault(
1626 gk20a_ctxsw_trace_tsg_reset(g, tsg); 1636 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1627 else if (ch) 1637 else if (ch)
1628 gk20a_ctxsw_trace_channel_reset(g, ch); 1638 gk20a_ctxsw_trace_channel_reset(g, ch);
1639#endif
1629 1640
1630 /* disable the channel/TSG from hw and increment 1641 /*
1631 * syncpoints */ 1642 * Disable the channel/TSG from hw and increment syncpoints.
1632 1643 */
1633 if (tsg) { 1644 if (tsg) {
1634 if (!g->fifo.deferred_reset_pending) { 1645 if (!g->fifo.deferred_reset_pending) {
1635 if (!fake_fault) 1646 if (!fake_fault)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 47f6c56c..703a7c0c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -295,9 +295,11 @@ int gk20a_finalize_poweron(struct gk20a *g)
295 goto done; 295 goto done;
296 } 296 }
297 297
298#ifdef CONFIG_GK20A_CTXSW_TRACE
298 err = gk20a_ctxsw_trace_init(g); 299 err = gk20a_ctxsw_trace_init(g);
299 if (err) 300 if (err)
300 nvgpu_warn(g, "could not initialize ctxsw tracing"); 301 nvgpu_warn(g, "could not initialize ctxsw tracing");
302#endif
301 303
302 err = gk20a_sched_ctrl_init(g); 304 err = gk20a_sched_ctrl_init(g);
303 if (err) { 305 if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d7fdffb0..a34f06b2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -689,18 +689,25 @@ struct gpu_ops {
689 int (*get_netlist_name)(struct gk20a *g, int index, char *name); 689 int (*get_netlist_name)(struct gk20a *g, int index, char *name);
690 bool (*is_fw_defined)(void); 690 bool (*is_fw_defined)(void);
691 } gr_ctx; 691 } gr_ctx;
692#ifdef CONFIG_GK20A_CTXSW_TRACE
693 /*
694 * Currently only supported on Linux due to the extremely tight
695 * integration with Linux device driver structure (in particular
696 * mmap).
697 */
692 struct { 698 struct {
693 int (*init)(struct gk20a *g); 699 int (*init)(struct gk20a *g);
694 int (*max_entries)(struct gk20a *, 700 int (*max_entries)(struct gk20a *,
695 struct nvgpu_ctxsw_trace_filter *); 701 struct nvgpu_ctxsw_trace_filter *filter);
696 int (*flush)(struct gk20a *g); 702 int (*flush)(struct gk20a *g);
697 int (*poll)(struct gk20a *g); 703 int (*poll)(struct gk20a *g);
698 int (*enable)(struct gk20a *g); 704 int (*enable)(struct gk20a *g);
699 int (*disable)(struct gk20a *g); 705 int (*disable)(struct gk20a *g);
700 bool (*is_enabled)(struct gk20a *g); 706 bool (*is_enabled)(struct gk20a *g);
701 int (*reset)(struct gk20a *g); 707 int (*reset)(struct gk20a *g);
702 int (*bind_channel)(struct gk20a *, struct channel_gk20a *); 708 int (*bind_channel)(struct gk20a *g, struct channel_gk20a *ch);
703 int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); 709 int (*unbind_channel)(struct gk20a *g,
710 struct channel_gk20a *ch);
704 int (*deinit)(struct gk20a *g); 711 int (*deinit)(struct gk20a *g);
705 int (*alloc_user_buffer)(struct gk20a *g, 712 int (*alloc_user_buffer)(struct gk20a *g,
706 void **buf, size_t *size); 713 void **buf, size_t *size);
@@ -710,6 +717,7 @@ struct gpu_ops {
710 int (*set_filter)(struct gk20a *g, 717 int (*set_filter)(struct gk20a *g,
711 struct nvgpu_ctxsw_trace_filter *filter); 718 struct nvgpu_ctxsw_trace_filter *filter);
712 } fecs_trace; 719 } fecs_trace;
720#endif
713 struct { 721 struct {
714 bool (*support_sparse)(struct gk20a *g); 722 bool (*support_sparse)(struct gk20a *g);
715 u64 (*gmmu_map)(struct vm_gk20a *vm, 723 u64 (*gmmu_map)(struct vm_gk20a *vm,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1ea59a9d..f78d862c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3070,13 +3070,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3070 "fail to load golden ctx image"); 3070 "fail to load golden ctx image");
3071 goto out; 3071 goto out;
3072 } 3072 }
3073#ifdef CONFIG_GK20A_CTXSW_TRACE
3073 if (g->ops.fecs_trace.bind_channel && !c->vpr) { 3074 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3074 err = g->ops.fecs_trace.bind_channel(g, c); 3075 err = g->ops.fecs_trace.bind_channel(g, c);
3075 if (err) { 3076 if (err)
3076 nvgpu_warn(g, 3077 nvgpu_warn(g,
3077 "fail to bind channel for ctxsw trace"); 3078 "fail to bind channel for ctxsw trace");
3078 }
3079 } 3079 }
3080#endif
3080 c->first_init = true; 3081 c->first_init = true;
3081 } 3082 }
3082 3083
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index f576278d..c5b66201 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -714,7 +714,9 @@ int gp106_init_hal(struct gk20a *g)
714 gops->clock_gating = gp106_ops.clock_gating; 714 gops->clock_gating = gp106_ops.clock_gating;
715 gops->fifo = gp106_ops.fifo; 715 gops->fifo = gp106_ops.fifo;
716 gops->gr_ctx = gp106_ops.gr_ctx; 716 gops->gr_ctx = gp106_ops.gr_ctx;
717#ifdef CONFIG_GK20A_CTXSW_TRACE
717 gops->fecs_trace = gp106_ops.fecs_trace; 718 gops->fecs_trace = gp106_ops.fecs_trace;
719#endif
718 gops->mm = gp106_ops.mm; 720 gops->mm = gp106_ops.mm;
719 gops->pramin = gp106_ops.pramin; 721 gops->pramin = gp106_ops.pramin;
720 gops->therm = gp106_ops.therm; 722 gops->therm = gp106_ops.therm;
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index cbec89bc..bb95f6db 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -619,7 +619,9 @@ int gp10b_init_hal(struct gk20a *g)
619 gops->clock_gating = gp10b_ops.clock_gating; 619 gops->clock_gating = gp10b_ops.clock_gating;
620 gops->fifo = gp10b_ops.fifo; 620 gops->fifo = gp10b_ops.fifo;
621 gops->gr_ctx = gp10b_ops.gr_ctx; 621 gops->gr_ctx = gp10b_ops.gr_ctx;
622#ifdef CONFIG_GK20A_CTXSW_TRACE
622 gops->fecs_trace = gp10b_ops.fecs_trace; 623 gops->fecs_trace = gp10b_ops.fecs_trace;
624#endif
623 gops->mm = gp10b_ops.mm; 625 gops->mm = gp10b_ops.mm;
624 gops->pramin = gp10b_ops.pramin; 626 gops->pramin = gp10b_ops.pramin;
625 gops->therm = gp10b_ops.therm; 627 gops->therm = gp10b_ops.therm;