diff options
author | seshendra Gadagottu <sgadagottu@nvidia.com> | 2017-06-22 19:28:19 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-10-20 22:03:56 -0400 |
commit | cf70c925cd3d8e6c83feee04316b080d5f258afc (patch) | |
tree | 54c88ee84cff31561accbd72728bfc2a56f7a510 | |
parent | ed8ac6e005d95e051bd03a182bbe0aa09a3c2266 (diff) |
gpu: nvgpu: gv11b: update css ops
Updated following hal functions for css gv11b and reused
them for gv100:
enable_snapshot
disable_snapshot
check_data_available
These changes are needed because of following reasons:
1. Register offsets for perf_pmasys_* are changed
for gv11b/gv100 from gk20a.
2. Updated memory type for perf_pmasys_mem_block_target
based on memory aperture used for hwpm inst_block.
Bug 200327596
Change-Id: I500d17670e2f389d8d0e77884374bcc3504a41f8
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1507546
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c | 206 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/css_gr_gv11b.h | 34 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 7 |
5 files changed, 249 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index d6d60147..c6958bec 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -7,6 +7,7 @@ nvgpu-y += \ | |||
7 | $(nvgpu-t19x)/common/linux/module_t19x.o \ | 7 | $(nvgpu-t19x)/common/linux/module_t19x.o \ |
8 | $(nvgpu-t19x)/common/linux/pci_t19x.o \ | 8 | $(nvgpu-t19x)/common/linux/pci_t19x.o \ |
9 | $(nvgpu-t19x)/gv11b/gv11b.o \ | 9 | $(nvgpu-t19x)/gv11b/gv11b.o \ |
10 | $(nvgpu-t19x)/gv11b/css_gr_gv11b.o \ | ||
10 | $(nvgpu-t19x)/gv11b/dbg_gpu_gv11b.o \ | 11 | $(nvgpu-t19x)/gv11b/dbg_gpu_gv11b.o \ |
11 | $(nvgpu-t19x)/gv11b/mc_gv11b.o \ | 12 | $(nvgpu-t19x)/gv11b/mc_gv11b.o \ |
12 | $(nvgpu-t19x)/gv11b/ltc_gv11b.o \ | 13 | $(nvgpu-t19x)/gv11b/ltc_gv11b.o \ |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index cefaf1ae..2007eee0 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -76,6 +76,7 @@ | |||
76 | #include "gp10b/mm_gp10b.h" | 76 | #include "gp10b/mm_gp10b.h" |
77 | #include "gp10b/pmu_gp10b.h" | 77 | #include "gp10b/pmu_gp10b.h" |
78 | 78 | ||
79 | #include "gv11b/css_gr_gv11b.h" | ||
79 | #include "gv11b/dbg_gpu_gv11b.h" | 80 | #include "gv11b/dbg_gpu_gv11b.h" |
80 | #include "gv11b/hal_gv11b.h" | 81 | #include "gv11b/hal_gv11b.h" |
81 | #include "gv100/gr_gv100.h" | 82 | #include "gv100/gr_gv100.h" |
@@ -639,9 +640,9 @@ static const struct gpu_ops gv100_ops = { | |||
639 | }, | 640 | }, |
640 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 641 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
641 | .css = { | 642 | .css = { |
642 | .enable_snapshot = css_hw_enable_snapshot, | 643 | .enable_snapshot = gv11b_css_hw_enable_snapshot, |
643 | .disable_snapshot = css_hw_disable_snapshot, | 644 | .disable_snapshot = gv11b_css_hw_disable_snapshot, |
644 | .check_data_available = css_hw_check_data_available, | 645 | .check_data_available = gv11b_css_hw_check_data_available, |
645 | .set_handled_snapshots = css_hw_set_handled_snapshots, | 646 | .set_handled_snapshots = css_hw_set_handled_snapshots, |
646 | .allocate_perfmon_ids = css_gr_allocate_perfmon_ids, | 647 | .allocate_perfmon_ids = css_gr_allocate_perfmon_ids, |
647 | .release_perfmon_ids = css_gr_release_perfmon_ids, | 648 | .release_perfmon_ids = css_gr_release_perfmon_ids, |
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c new file mode 100644 index 00000000..6afd92fa --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | * GV11B Cycle stats snapshots support | ||
3 | * | ||
4 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/dma-mapping.h> | ||
26 | #include <linux/dma-buf.h> | ||
27 | |||
28 | #include <nvgpu/bitops.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/lock.h> | ||
31 | #include <nvgpu/dma.h> | ||
32 | |||
33 | #include "gk20a/gk20a.h" | ||
34 | #include "gk20a/css_gr_gk20a.h" | ||
35 | #include "css_gr_gv11b.h" | ||
36 | |||
37 | #include <nvgpu/log.h> | ||
38 | #include <nvgpu/bug.h> | ||
39 | |||
40 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> | ||
41 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> | ||
42 | |||
43 | |||
44 | /* reports whether the hw queue overflowed */ | ||
45 | static inline bool css_hw_get_overflow_status(struct gk20a *g) | ||
46 | { | ||
47 | const u32 st = perf_pmasys_control_membuf_status_overflowed_f(); | ||
48 | return st == (gk20a_readl(g, perf_pmasys_control_r()) & st); | ||
49 | } | ||
50 | |||
51 | /* returns how many pending snapshot entries are pending */ | ||
52 | static inline u32 css_hw_get_pending_snapshots(struct gk20a *g) | ||
53 | { | ||
54 | return gk20a_readl(g, perf_pmasys_mem_bytes_r()) / | ||
55 | sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
56 | } | ||
57 | |||
58 | /* informs hw how many snapshots have been processed (frees up fifo space) */ | ||
59 | static inline void gv11b_css_hw_set_handled_snapshots(struct gk20a *g, u32 done) | ||
60 | { | ||
61 | if (done > 0) { | ||
62 | gk20a_writel(g, perf_pmasys_mem_bump_r(), | ||
63 | done * sizeof(struct gk20a_cs_snapshot_fifo_entry)); | ||
64 | } | ||
65 | } | ||
66 | |||
67 | /* disable streaming to memory */ | ||
68 | static void gv11b_css_hw_reset_streaming(struct gk20a *g) | ||
69 | { | ||
70 | u32 engine_status; | ||
71 | |||
72 | /* reset the perfmon */ | ||
73 | g->ops.mc.reset(g, mc_enable_perfmon_enabled_f()); | ||
74 | |||
75 | /* RBUFEMPTY must be set -- otherwise we'll pick up */ | ||
76 | /* snapshot that have been queued up from earlier */ | ||
77 | engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r()); | ||
78 | |||
79 | /* turn off writes */ | ||
80 | gk20a_writel(g, perf_pmasys_control_r(), | ||
81 | perf_pmasys_control_membuf_clear_status_doit_f()); | ||
82 | |||
83 | /* pointing all pending snapshots as handled */ | ||
84 | gv11b_css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g)); | ||
85 | } | ||
86 | |||
87 | int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch, | ||
88 | struct gk20a_cs_snapshot_client *cs_client) | ||
89 | { | ||
90 | struct gk20a *g = ch->g; | ||
91 | struct gr_gk20a *gr = &g->gr; | ||
92 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
93 | u32 snapshot_size = cs_client->snapshot_size; | ||
94 | int ret; | ||
95 | |||
96 | u32 virt_addr_lo; | ||
97 | u32 virt_addr_hi; | ||
98 | u32 inst_pa_page; | ||
99 | |||
100 | if (data->hw_snapshot) | ||
101 | return 0; | ||
102 | |||
103 | if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE) | ||
104 | snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE; | ||
105 | |||
106 | ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size, | ||
107 | &data->hw_memdesc); | ||
108 | if (ret) | ||
109 | return ret; | ||
110 | |||
111 | /* perf output buffer may not cross a 4GB boundary - with a separate */ | ||
112 | /* va smaller than that, it won't but check anyway */ | ||
113 | if (!data->hw_memdesc.cpu_va || | ||
114 | data->hw_memdesc.size < snapshot_size || | ||
115 | data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) { | ||
116 | ret = -EFAULT; | ||
117 | goto failed_allocation; | ||
118 | } | ||
119 | |||
120 | data->hw_snapshot = | ||
121 | (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va; | ||
122 | data->hw_end = data->hw_snapshot + | ||
123 | snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
124 | data->hw_get = data->hw_snapshot; | ||
125 | memset(data->hw_snapshot, 0xff, snapshot_size); | ||
126 | |||
127 | virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va); | ||
128 | virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va); | ||
129 | |||
130 | gv11b_css_hw_reset_streaming(g); | ||
131 | |||
132 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
133 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
134 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
135 | gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); | ||
136 | |||
137 | /* this field is aligned to 4K */ | ||
138 | inst_pa_page = gk20a_mm_inst_block_addr(g, | ||
139 | &g->mm.hwpm.inst_block) >> 12; | ||
140 | |||
141 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
142 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
143 | perf_pmasys_mem_block_valid_true_f() | | ||
144 | nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, | ||
145 | perf_pmasys_mem_block_target_sys_ncoh_f(), | ||
146 | perf_pmasys_mem_block_target_lfb_f())); | ||
147 | |||
148 | |||
149 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots enabled\n"); | ||
150 | |||
151 | return 0; | ||
152 | |||
153 | failed_allocation: | ||
154 | if (data->hw_memdesc.size) { | ||
155 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
156 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
157 | } | ||
158 | data->hw_snapshot = NULL; | ||
159 | |||
160 | return ret; | ||
161 | } | ||
162 | |||
163 | void gv11b_css_hw_disable_snapshot(struct gr_gk20a *gr) | ||
164 | { | ||
165 | struct gk20a *g = gr->g; | ||
166 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
167 | |||
168 | if (!data->hw_snapshot) | ||
169 | return; | ||
170 | |||
171 | gv11b_css_hw_reset_streaming(g); | ||
172 | |||
173 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
174 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
175 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
176 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
177 | |||
178 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
179 | perf_pmasys_mem_block_base_f(0) | | ||
180 | perf_pmasys_mem_block_valid_false_f() | | ||
181 | perf_pmasys_mem_block_target_f(0)); | ||
182 | |||
183 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
184 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
185 | data->hw_snapshot = NULL; | ||
186 | |||
187 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots disabled\n"); | ||
188 | } | ||
189 | |||
190 | int gv11b_css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
191 | bool *hw_overflow) | ||
192 | { | ||
193 | struct gk20a *g = ch->g; | ||
194 | struct gr_gk20a *gr = &g->gr; | ||
195 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
196 | |||
197 | if (!css->hw_snapshot) | ||
198 | return -EINVAL; | ||
199 | |||
200 | *pending = css_hw_get_pending_snapshots(g); | ||
201 | if (!*pending) | ||
202 | return 0; | ||
203 | |||
204 | *hw_overflow = css_hw_get_overflow_status(g); | ||
205 | return 0; | ||
206 | } | ||
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.h new file mode 100644 index 00000000..6b11a62e --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * GV11B Cycle stats snapshots support | ||
3 | * | ||
4 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef CSS_GR_GV11B_H | ||
26 | #define CSS_GR_GV11B_H | ||
27 | |||
28 | int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch, | ||
29 | struct gk20a_cs_snapshot_client *cs_client); | ||
30 | void gv11b_css_hw_disable_snapshot(struct gr_gk20a *gr); | ||
31 | int gv11b_css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
32 | bool *hw_overflow); | ||
33 | |||
34 | #endif /* CSS_GR_GV11B_H */ | ||
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 168def5c..22beabf7 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -68,6 +68,7 @@ | |||
68 | 68 | ||
69 | #include "dbg_gpu_gv11b.h" | 69 | #include "dbg_gpu_gv11b.h" |
70 | #include "hal_gv11b.h" | 70 | #include "hal_gv11b.h" |
71 | #include "css_gr_gv11b.h" | ||
71 | #include "gr_gv11b.h" | 72 | #include "gr_gv11b.h" |
72 | #include "mc_gv11b.h" | 73 | #include "mc_gv11b.h" |
73 | #include "ltc_gv11b.h" | 74 | #include "ltc_gv11b.h" |
@@ -647,9 +648,9 @@ static const struct gpu_ops gv11b_ops = { | |||
647 | }, | 648 | }, |
648 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 649 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
649 | .css = { | 650 | .css = { |
650 | .enable_snapshot = css_hw_enable_snapshot, | 651 | .enable_snapshot = gv11b_css_hw_enable_snapshot, |
651 | .disable_snapshot = css_hw_disable_snapshot, | 652 | .disable_snapshot = gv11b_css_hw_disable_snapshot, |
652 | .check_data_available = css_hw_check_data_available, | 653 | .check_data_available = gv11b_css_hw_check_data_available, |
653 | .set_handled_snapshots = css_hw_set_handled_snapshots, | 654 | .set_handled_snapshots = css_hw_set_handled_snapshots, |
654 | .allocate_perfmon_ids = css_gr_allocate_perfmon_ids, | 655 | .allocate_perfmon_ids = css_gr_allocate_perfmon_ids, |
655 | .release_perfmon_ids = css_gr_release_perfmon_ids, | 656 | .release_perfmon_ids = css_gr_release_perfmon_ids, |