diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/mm_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/mm_gv11b.c | 330 |
1 files changed, 330 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c new file mode 100644 index 00000000..fdc506ac --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * GV11B MMU | ||
3 | * | ||
4 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/pm_runtime.h> | ||
26 | |||
27 | #include <nvgpu/kmem.h> | ||
28 | #include <nvgpu/dma.h> | ||
29 | #include <nvgpu/log.h> | ||
30 | #include <nvgpu/mm.h> | ||
31 | |||
32 | #include "gk20a/gk20a.h" | ||
33 | #include "gk20a/mm_gk20a.h" | ||
34 | |||
35 | #include "gp10b/mm_gp10b.h" | ||
36 | #include "gp10b/mc_gp10b.h" | ||
37 | |||
38 | #include "mm_gv11b.h" | ||
39 | #include "fb_gv11b.h" | ||
40 | |||
41 | #include <nvgpu/hw/gv11b/hw_fb_gv11b.h> | ||
42 | #include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h> | ||
43 | #include <nvgpu/hw/gv11b/hw_bus_gv11b.h> | ||
44 | |||
45 | #define NVGPU_L3_ALLOC_BIT BIT(36) | ||
46 | |||
47 | bool gv11b_mm_is_bar1_supported(struct gk20a *g) | ||
48 | { | ||
49 | return false; | ||
50 | } | ||
51 | |||
52 | void gv11b_init_inst_block(struct nvgpu_mem *inst_block, | ||
53 | struct vm_gk20a *vm, u32 big_page_size) | ||
54 | { | ||
55 | struct gk20a *g = gk20a_from_vm(vm); | ||
56 | |||
57 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", | ||
58 | nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va); | ||
59 | |||
60 | g->ops.mm.init_pdb(g, inst_block, vm); | ||
61 | |||
62 | if (big_page_size && g->ops.mm.set_big_page_size) | ||
63 | g->ops.mm.set_big_page_size(g, inst_block, big_page_size); | ||
64 | } | ||
65 | |||
66 | bool gv11b_mm_mmu_fault_pending(struct gk20a *g) | ||
67 | { | ||
68 | return gv11b_fb_mmu_fault_pending(g); | ||
69 | } | ||
70 | |||
71 | void gv11b_mm_fault_info_mem_destroy(struct gk20a *g) | ||
72 | { | ||
73 | nvgpu_log_fn(g, " "); | ||
74 | |||
75 | nvgpu_mutex_acquire(&g->mm.hub_isr_mutex); | ||
76 | |||
77 | gv11b_fb_disable_hub_intr(g, STALL_REG_INDEX, HUB_INTR_TYPE_OTHER | | ||
78 | HUB_INTR_TYPE_NONREPLAY | HUB_INTR_TYPE_REPLAY); | ||
79 | |||
80 | nvgpu_kfree(g, g->mm.fault_info[FAULT_TYPE_OTHER_AND_NONREPLAY]); | ||
81 | |||
82 | g->mm.fault_info[FAULT_TYPE_OTHER_AND_NONREPLAY] = NULL; | ||
83 | g->mm.fault_info[FAULT_TYPE_REPLAY] = NULL; | ||
84 | |||
85 | nvgpu_mutex_release(&g->mm.hub_isr_mutex); | ||
86 | nvgpu_mutex_destroy(&g->mm.hub_isr_mutex); | ||
87 | } | ||
88 | |||
89 | static int gv11b_mm_mmu_fault_info_buf_init(struct gk20a *g, | ||
90 | u32 *hub_intr_types) | ||
91 | { | ||
92 | struct mmu_fault_info *fault_info_mem; | ||
93 | |||
94 | fault_info_mem = nvgpu_kzalloc(g, sizeof(struct mmu_fault_info) * | ||
95 | FAULT_TYPE_NUM); | ||
96 | if (!fault_info_mem) { | ||
97 | nvgpu_log_info(g, "failed to alloc shadow fault info"); | ||
98 | return -ENOMEM; | ||
99 | } | ||
100 | /* shadow buffer for copying mmu fault info */ | ||
101 | g->mm.fault_info[FAULT_TYPE_OTHER_AND_NONREPLAY] = | ||
102 | &fault_info_mem[FAULT_TYPE_OTHER_AND_NONREPLAY]; | ||
103 | |||
104 | g->mm.fault_info[FAULT_TYPE_REPLAY] = | ||
105 | &fault_info_mem[FAULT_TYPE_REPLAY]; | ||
106 | |||
107 | *hub_intr_types |= HUB_INTR_TYPE_OTHER; | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static void gv11b_mm_mmu_hw_fault_buf_init(struct gk20a *g, | ||
112 | u32 *hub_intr_types) | ||
113 | { | ||
114 | struct vm_gk20a *vm = g->mm.bar2.vm; | ||
115 | int err = 0; | ||
116 | size_t fb_size; | ||
117 | |||
118 | /* Max entries take care of 1 entry used for full detection */ | ||
119 | fb_size = (g->ops.fifo.get_num_fifos(g) + 1) * | ||
120 | gmmu_fault_buf_size_v(); | ||
121 | |||
122 | err = nvgpu_dma_alloc_map_sys(vm, fb_size, | ||
123 | &g->mm.hw_fault_buf[FAULT_TYPE_OTHER_AND_NONREPLAY]); | ||
124 | if (err) { | ||
125 | nvgpu_err(g, | ||
126 | "Error in hw mmu fault buf [0] alloc in bar2 vm "); | ||
127 | /* Fault will be snapped in pri reg but not in buffer */ | ||
128 | return; | ||
129 | } | ||
130 | |||
131 | g->mm.hw_fault_buf_status[NONREPLAY_REG_INDEX] = | ||
132 | HW_FAULT_BUF_STATUS_ALLOC_TRUE; | ||
133 | *hub_intr_types |= HUB_INTR_TYPE_NONREPLAY; | ||
134 | |||
135 | err = nvgpu_dma_alloc_map_sys(vm, fb_size, | ||
136 | &g->mm.hw_fault_buf[FAULT_TYPE_REPLAY]); | ||
137 | if (err) { | ||
138 | nvgpu_err(g, | ||
139 | "Error in hw mmu fault buf [1] alloc in bar2 vm "); | ||
140 | /* Fault will be snapped in pri reg but not in buffer */ | ||
141 | return; | ||
142 | } | ||
143 | g->mm.hw_fault_buf_status[REPLAY_REG_INDEX] = | ||
144 | HW_FAULT_BUF_STATUS_ALLOC_TRUE; | ||
145 | *hub_intr_types |= HUB_INTR_TYPE_REPLAY; | ||
146 | } | ||
147 | |||
148 | static void gv11b_mm_mmu_hw_fault_buf_deinit(struct gk20a *g) | ||
149 | { | ||
150 | struct vm_gk20a *vm = g->mm.bar2.vm; | ||
151 | |||
152 | nvgpu_log_fn(g, " "); | ||
153 | |||
154 | gv11b_fb_disable_hub_intr(g, STALL_REG_INDEX, HUB_INTR_TYPE_NONREPLAY | | ||
155 | HUB_INTR_TYPE_REPLAY); | ||
156 | |||
157 | g->mm.hub_intr_types &= (~(HUB_INTR_TYPE_NONREPLAY | | ||
158 | HUB_INTR_TYPE_REPLAY)); | ||
159 | |||
160 | if ((gv11b_fb_is_fault_buf_enabled(g, NONREPLAY_REG_INDEX))) { | ||
161 | gv11b_fb_fault_buf_set_state_hw(g, NONREPLAY_REG_INDEX, | ||
162 | FAULT_BUF_DISABLED); | ||
163 | } | ||
164 | |||
165 | if ((gv11b_fb_is_fault_buf_enabled(g, REPLAY_REG_INDEX))) { | ||
166 | gv11b_fb_fault_buf_set_state_hw(g, REPLAY_REG_INDEX, | ||
167 | FAULT_BUF_DISABLED); | ||
168 | } | ||
169 | |||
170 | if (g->mm.hw_fault_buf_status[NONREPLAY_REG_INDEX] == | ||
171 | HW_FAULT_BUF_STATUS_ALLOC_TRUE) { | ||
172 | nvgpu_dma_unmap_free(vm, | ||
173 | &g->mm.hw_fault_buf[FAULT_TYPE_OTHER_AND_NONREPLAY]); | ||
174 | g->mm.hw_fault_buf_status[NONREPLAY_REG_INDEX] = | ||
175 | HW_FAULT_BUF_STATUS_ALLOC_FALSE; | ||
176 | } | ||
177 | |||
178 | if (g->mm.hw_fault_buf_status[REPLAY_REG_INDEX] == | ||
179 | HW_FAULT_BUF_STATUS_ALLOC_TRUE) { | ||
180 | nvgpu_dma_unmap_free(vm, | ||
181 | &g->mm.hw_fault_buf[FAULT_TYPE_REPLAY]); | ||
182 | g->mm.hw_fault_buf_status[REPLAY_REG_INDEX] = | ||
183 | HW_FAULT_BUF_STATUS_ALLOC_FALSE; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | void gv11b_mm_remove_bar2_vm(struct gk20a *g) | ||
188 | { | ||
189 | struct mm_gk20a *mm = &g->mm; | ||
190 | |||
191 | nvgpu_log_fn(g, " "); | ||
192 | |||
193 | gv11b_mm_mmu_hw_fault_buf_deinit(g); | ||
194 | |||
195 | nvgpu_free_inst_block(g, &mm->bar2.inst_block); | ||
196 | nvgpu_vm_put(mm->bar2.vm); | ||
197 | } | ||
198 | |||
199 | static void gv11b_mm_mmu_fault_setup_hw(struct gk20a *g) | ||
200 | { | ||
201 | if (g->mm.hw_fault_buf_status[NONREPLAY_REG_INDEX] == | ||
202 | HW_FAULT_BUF_STATUS_ALLOC_TRUE) { | ||
203 | gv11b_fb_fault_buf_configure_hw(g, NONREPLAY_REG_INDEX); | ||
204 | } | ||
205 | if (g->mm.hw_fault_buf_status[REPLAY_REG_INDEX] == | ||
206 | HW_FAULT_BUF_STATUS_ALLOC_TRUE) { | ||
207 | gv11b_fb_fault_buf_configure_hw(g, REPLAY_REG_INDEX); | ||
208 | } | ||
209 | } | ||
210 | |||
211 | static int gv11b_mm_mmu_fault_setup_sw(struct gk20a *g) | ||
212 | { | ||
213 | int err; | ||
214 | |||
215 | nvgpu_log_fn(g, " "); | ||
216 | |||
217 | nvgpu_mutex_init(&g->mm.hub_isr_mutex); | ||
218 | |||
219 | g->mm.hw_fault_buf_status[NONREPLAY_REG_INDEX] = | ||
220 | HW_FAULT_BUF_STATUS_ALLOC_FALSE; | ||
221 | g->mm.hw_fault_buf_status[REPLAY_REG_INDEX] = | ||
222 | HW_FAULT_BUF_STATUS_ALLOC_FALSE; | ||
223 | |||
224 | g->mm.hub_intr_types = HUB_INTR_TYPE_ECC_UNCORRECTED; | ||
225 | |||
226 | err = gv11b_mm_mmu_fault_info_buf_init(g, &g->mm.hub_intr_types); | ||
227 | |||
228 | if (!err) | ||
229 | gv11b_mm_mmu_hw_fault_buf_init(g, &g->mm.hub_intr_types); | ||
230 | |||
231 | return err; | ||
232 | } | ||
233 | |||
234 | int gv11b_init_mm_setup_hw(struct gk20a *g) | ||
235 | { | ||
236 | int err = 0; | ||
237 | |||
238 | nvgpu_log_fn(g, " "); | ||
239 | |||
240 | g->ops.fb.set_mmu_page_size(g); | ||
241 | g->ops.fb.init_hw(g); | ||
242 | |||
243 | err = g->ops.mm.init_bar2_mm_hw_setup(g); | ||
244 | if (err) | ||
245 | return err; | ||
246 | |||
247 | if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g)) | ||
248 | return -EBUSY; | ||
249 | |||
250 | err = gv11b_mm_mmu_fault_setup_sw(g); | ||
251 | if (!err) | ||
252 | gv11b_mm_mmu_fault_setup_hw(g); | ||
253 | |||
254 | nvgpu_log_fn(g, "end"); | ||
255 | |||
256 | return err; | ||
257 | } | ||
258 | |||
259 | void gv11b_mm_l2_flush(struct gk20a *g, bool invalidate) | ||
260 | { | ||
261 | nvgpu_log(g, gpu_dbg_fn, "gv11b_mm_l2_flush"); | ||
262 | |||
263 | g->ops.mm.fb_flush(g); | ||
264 | gk20a_mm_l2_flush(g, invalidate); | ||
265 | g->ops.mm.fb_flush(g); | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * On Volta the GPU determines whether to do L3 allocation for a mapping by | ||
270 | * checking bit 36 of the phsyical address. So if a mapping should allocte lines | ||
271 | * in the L3 this bit must be set. | ||
272 | */ | ||
273 | u64 gv11b_gpu_phys_addr(struct gk20a *g, | ||
274 | struct nvgpu_gmmu_attrs *attrs, u64 phys) | ||
275 | { | ||
276 | if (attrs && attrs->t19x_attrs.l3_alloc) | ||
277 | return phys | NVGPU_L3_ALLOC_BIT; | ||
278 | |||
279 | return phys; | ||
280 | } | ||
281 | |||
282 | int gv11b_init_bar2_mm_hw_setup(struct gk20a *g) | ||
283 | { | ||
284 | struct mm_gk20a *mm = &g->mm; | ||
285 | struct nvgpu_mem *inst_block = &mm->bar2.inst_block; | ||
286 | u64 inst_pa = nvgpu_inst_block_addr(g, inst_block); | ||
287 | u32 reg_val; | ||
288 | struct nvgpu_timeout timeout; | ||
289 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
290 | |||
291 | nvgpu_log_fn(g, " "); | ||
292 | |||
293 | g->ops.fb.set_mmu_page_size(g); | ||
294 | |||
295 | inst_pa = (u32)(inst_pa >> bus_bar2_block_ptr_shift_v()); | ||
296 | nvgpu_log_info(g, "bar2 inst block ptr: 0x%08x", (u32)inst_pa); | ||
297 | |||
298 | gk20a_writel(g, bus_bar2_block_r(), | ||
299 | nvgpu_aperture_mask(g, inst_block, | ||
300 | bus_bar2_block_target_sys_mem_ncoh_f(), | ||
301 | bus_bar2_block_target_vid_mem_f()) | | ||
302 | bus_bar2_block_mode_virtual_f() | | ||
303 | bus_bar2_block_ptr_f(inst_pa)); | ||
304 | |||
305 | /* This is needed as BAR1 support is removed and there is no way | ||
306 | * to know if gpu successfully accessed memory. | ||
307 | * To avoid deadlocks and non-deterministic virtual address translation | ||
308 | * behavior, after writing BAR2_BLOCK to bind BAR2 to a virtual address | ||
309 | * space, SW must ensure that the bind has completed prior to issuing | ||
310 | * any further BAR2 requests by polling for both | ||
311 | * BUS_BIND_STATUS_BAR2_PENDING to return to EMPTY and | ||
312 | * BUS_BIND_STATUS_BAR2_OUTSTANDING to return to FALSE | ||
313 | */ | ||
314 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | ||
315 | NVGPU_TIMER_CPU_TIMER); | ||
316 | nvgpu_log_info(g, "check bar2 bind status"); | ||
317 | do { | ||
318 | reg_val = gk20a_readl(g, bus_bind_status_r()); | ||
319 | |||
320 | if (!((reg_val & bus_bind_status_bar2_pending_busy_f()) || | ||
321 | (reg_val & bus_bind_status_bar2_outstanding_true_f()))) | ||
322 | return 0; | ||
323 | |||
324 | nvgpu_usleep_range(delay, delay * 2); | ||
325 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
326 | } while (!nvgpu_timeout_expired_msg(&timeout, "bar2 bind timedout")); | ||
327 | |||
328 | nvgpu_err(g, "bar2 bind failed. gpu unable to access memory"); | ||
329 | return -EBUSY; | ||
330 | } | ||