summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-01-29 14:29:56 -0500
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:52:05 -0500
commitac0cd782ab539d3a89ac2cc50955f80b8be456d1 (patch)
treee7feacdff08a9798db8ff9bf343f6895eb6d3035 /drivers
parent1fcd7fd547daac5374993f243fad77a822a5a048 (diff)
gpu: nvgpu: gp10b: Implement new page table format
Implement the 5-level Pascal page table format. It is enabled only for simulation. Change-Id: I6767fac8b52fe0f6a2e2f86312de5fc93af6518e Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/682114
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h168
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c197
2 files changed, 303 insertions, 62 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 5a0f9fe2..fc65f57d 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -50,163 +50,207 @@
50#ifndef _hw_gmmu_gp10b_h_ 50#ifndef _hw_gmmu_gp10b_h_
51#define _hw_gmmu_gp10b_h_ 51#define _hw_gmmu_gp10b_h_
52 52
53static inline u32 gmmu_pde_aperture_big_w(void) 53static inline u32 gmmu_new_pde_is_pte_w(void)
54{ 54{
55 return 0; 55 return 0;
56} 56}
57static inline u32 gmmu_pde_aperture_big_invalid_f(void) 57static inline u32 gmmu_new_pde_is_pte_false_f(void)
58{ 58{
59 return 0x0; 59 return 0x0;
60} 60}
61static inline u32 gmmu_pde_aperture_big_video_memory_f(void) 61static inline u32 gmmu_new_pde_aperture_w(void)
62{ 62{
63 return 0x1; 63 return 0;
64}
65static inline u32 gmmu_new_pde_aperture_invalid_f(void)
66{
67 return 0x0;
68}
69static inline u32 gmmu_new_pde_aperture_video_memory_f(void)
70{
71 return 0x2;
72}
73static inline u32 gmmu_new_pde_address_sys_f(u32 v)
74{
75 return (v & 0xffffff) << 8;
76}
77static inline u32 gmmu_new_pde_address_sys_w(void)
78{
79 return 0;
80}
81static inline u32 gmmu_new_pde_vol_w(void)
82{
83 return 0;
84}
85static inline u32 gmmu_new_pde_vol_true_f(void)
86{
87 return 0x8;
88}
89static inline u32 gmmu_new_pde_vol_false_f(void)
90{
91 return 0x0;
92}
93static inline u32 gmmu_new_pde_address_shift_v(void)
94{
95 return 0x0000000c;
96}
97static inline u32 gmmu_new_pde__size_v(void)
98{
99 return 0x00000008;
100}
101static inline u32 gmmu_new_dual_pde_is_pte_w(void)
102{
103 return 0;
104}
105static inline u32 gmmu_new_dual_pde_is_pte_false_f(void)
106{
107 return 0x0;
64} 108}
65static inline u32 gmmu_pde_size_w(void) 109static inline u32 gmmu_new_dual_pde_aperture_big_w(void)
66{ 110{
67 return 0; 111 return 0;
68} 112}
69static inline u32 gmmu_pde_size_full_f(void) 113static inline u32 gmmu_new_dual_pde_aperture_big_invalid_f(void)
70{ 114{
71 return 0x0; 115 return 0x0;
72} 116}
73static inline u32 gmmu_pde_address_big_sys_f(u32 v) 117static inline u32 gmmu_new_dual_pde_aperture_big_video_memory_f(void)
118{
119 return 0x2;
120}
121static inline u32 gmmu_new_dual_pde_address_big_sys_f(u32 v)
74{ 122{
75 return (v & 0xfffffff) << 4; 123 return (v & 0xfffffff) << 4;
76} 124}
77static inline u32 gmmu_pde_address_big_sys_w(void) 125static inline u32 gmmu_new_dual_pde_address_big_sys_w(void)
78{ 126{
79 return 0; 127 return 0;
80} 128}
81static inline u32 gmmu_pde_aperture_small_w(void) 129static inline u32 gmmu_new_dual_pde_aperture_small_w(void)
82{ 130{
83 return 1; 131 return 2;
84} 132}
85static inline u32 gmmu_pde_aperture_small_invalid_f(void) 133static inline u32 gmmu_new_dual_pde_aperture_small_invalid_f(void)
86{ 134{
87 return 0x0; 135 return 0x0;
88} 136}
89static inline u32 gmmu_pde_aperture_small_video_memory_f(void) 137static inline u32 gmmu_new_dual_pde_aperture_small_video_memory_f(void)
90{ 138{
91 return 0x1; 139 return 0x2;
92} 140}
93static inline u32 gmmu_pde_vol_small_w(void) 141static inline u32 gmmu_new_dual_pde_vol_small_w(void)
94{ 142{
95 return 1; 143 return 2;
96} 144}
97static inline u32 gmmu_pde_vol_small_true_f(void) 145static inline u32 gmmu_new_dual_pde_vol_small_true_f(void)
98{ 146{
99 return 0x4; 147 return 0x8;
100} 148}
101static inline u32 gmmu_pde_vol_small_false_f(void) 149static inline u32 gmmu_new_dual_pde_vol_small_false_f(void)
102{ 150{
103 return 0x0; 151 return 0x0;
104} 152}
105static inline u32 gmmu_pde_vol_big_w(void) 153static inline u32 gmmu_new_dual_pde_vol_big_w(void)
106{ 154{
107 return 1; 155 return 0;
108} 156}
109static inline u32 gmmu_pde_vol_big_true_f(void) 157static inline u32 gmmu_new_dual_pde_vol_big_true_f(void)
110{ 158{
111 return 0x8; 159 return 0x8;
112} 160}
113static inline u32 gmmu_pde_vol_big_false_f(void) 161static inline u32 gmmu_new_dual_pde_vol_big_false_f(void)
114{ 162{
115 return 0x0; 163 return 0x0;
116} 164}
117static inline u32 gmmu_pde_address_small_sys_f(u32 v) 165static inline u32 gmmu_new_dual_pde_address_small_sys_f(u32 v)
118{ 166{
119 return (v & 0xfffffff) << 4; 167 return (v & 0xffffff) << 8;
120} 168}
121static inline u32 gmmu_pde_address_small_sys_w(void) 169static inline u32 gmmu_new_dual_pde_address_small_sys_w(void)
122{ 170{
123 return 1; 171 return 2;
124} 172}
125static inline u32 gmmu_pde_address_shift_v(void) 173static inline u32 gmmu_new_dual_pde_address_shift_v(void)
126{ 174{
127 return 0x0000000c; 175 return 0x0000000c;
128} 176}
129static inline u32 gmmu_pde__size_v(void) 177static inline u32 gmmu_new_dual_pde_address_big_shift_v(void)
130{ 178{
131 return 0x00000008; 179 return 0x00000008;
132} 180}
133static inline u32 gmmu_pte__size_v(void) 181static inline u32 gmmu_new_dual_pde__size_v(void)
182{
183 return 0x00000010;
184}
185static inline u32 gmmu_new_pte__size_v(void)
134{ 186{
135 return 0x00000008; 187 return 0x00000008;
136} 188}
137static inline u32 gmmu_pte_valid_w(void) 189static inline u32 gmmu_new_pte_valid_w(void)
138{ 190{
139 return 0; 191 return 0;
140} 192}
141static inline u32 gmmu_pte_valid_true_f(void) 193static inline u32 gmmu_new_pte_valid_true_f(void)
142{ 194{
143 return 0x1; 195 return 0x1;
144} 196}
145static inline u32 gmmu_pte_valid_false_f(void) 197static inline u32 gmmu_new_pte_valid_false_f(void)
146{ 198{
147 return 0x0; 199 return 0x0;
148} 200}
149static inline u32 gmmu_pte_address_sys_f(u32 v) 201static inline u32 gmmu_new_pte_address_sys_f(u32 v)
150{ 202{
151 return (v & 0xfffffff) << 4; 203 return (v & 0xffffff) << 8;
152} 204}
153static inline u32 gmmu_pte_address_sys_w(void) 205static inline u32 gmmu_new_pte_address_sys_w(void)
154{ 206{
155 return 0; 207 return 0;
156} 208}
157static inline u32 gmmu_pte_vol_w(void) 209static inline u32 gmmu_new_pte_vol_w(void)
158{ 210{
159 return 1; 211 return 0;
160} 212}
161static inline u32 gmmu_pte_vol_true_f(void) 213static inline u32 gmmu_new_pte_vol_true_f(void)
162{ 214{
163 return 0x1; 215 return 0x8;
164} 216}
165static inline u32 gmmu_pte_vol_false_f(void) 217static inline u32 gmmu_new_pte_vol_false_f(void)
166{ 218{
167 return 0x0; 219 return 0x0;
168} 220}
169static inline u32 gmmu_pte_aperture_w(void) 221static inline u32 gmmu_new_pte_aperture_w(void)
170{ 222{
171 return 1; 223 return 0;
172} 224}
173static inline u32 gmmu_pte_aperture_video_memory_f(void) 225static inline u32 gmmu_new_pte_aperture_video_memory_f(void)
174{ 226{
175 return 0x0; 227 return 0x0;
176} 228}
177static inline u32 gmmu_pte_read_only_w(void) 229static inline u32 gmmu_new_pte_read_only_w(void)
178{ 230{
179 return 0; 231 return 0;
180} 232}
181static inline u32 gmmu_pte_read_only_true_f(void) 233static inline u32 gmmu_new_pte_read_only_true_f(void)
182{ 234{
183 return 0x4; 235 return 0x40;
184} 236}
185static inline u32 gmmu_pte_write_disable_w(void) 237static inline u32 gmmu_new_pte_comptagline_f(u32 v)
186{ 238{
187 return 1; 239 return (v & 0x3ffff) << 4;
188} 240}
189static inline u32 gmmu_pte_write_disable_true_f(void) 241static inline u32 gmmu_new_pte_comptagline_w(void)
190{
191 return 0x80000000;
192}
193static inline u32 gmmu_pte_read_disable_w(void)
194{ 242{
195 return 1; 243 return 1;
196} 244}
197static inline u32 gmmu_pte_read_disable_true_f(void) 245static inline u32 gmmu_new_pte_kind_f(u32 v)
198{
199 return 0x40000000;
200}
201static inline u32 gmmu_pte_comptagline_f(u32 v)
202{ 246{
203 return (v & 0x3ffff) << 12; 247 return (v & 0xff) << 24;
204} 248}
205static inline u32 gmmu_pte_comptagline_w(void) 249static inline u32 gmmu_new_pte_kind_w(void)
206{ 250{
207 return 1; 251 return 1;
208} 252}
209static inline u32 gmmu_pte_address_shift_v(void) 253static inline u32 gmmu_new_pte_address_shift_v(void)
210{ 254{
211 return 0x0000000c; 255 return 0x0000000c;
212} 256}
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index e3e2c173..1608b176 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -20,6 +20,8 @@
20#include "rpfb_gp10b.h" 20#include "rpfb_gp10b.h"
21#include "hw_ram_gp10b.h" 21#include "hw_ram_gp10b.h"
22#include "hw_bus_gp10b.h" 22#include "hw_bus_gp10b.h"
23#include "hw_gmmu_gp10b.h"
24#include "gk20a/semaphore_gk20a.h"
23 25
24static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g) 26static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
25{ 27{
@@ -138,6 +140,197 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
138 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); 140 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
139} 141}
140 142
143u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
144{
145 return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v());
146}
147
148static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
149 struct gk20a_mm_entry *parent,
150 u32 i, u32 gmmu_pgsz_idx,
151 u64 iova,
152 u32 kind_v, u32 *ctag,
153 bool cacheable, bool unmapped_pte,
154 int rw_flag, bool sparse, u32 flags)
155{
156 u64 pte_addr = 0;
157 u64 pde_addr = 0;
158 struct gk20a_mm_entry *pte = parent->entries + i;
159 u32 pde_v[2] = {0, 0};
160 u32 *pde;
161 struct gk20a *g = vm->mm->g;
162
163 gk20a_dbg_fn("");
164
165 pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0)
166 >> gmmu_new_pde_address_shift_v();
167 pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0);
168
169 pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
170 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
171
172 pde = pde3_from_index(parent, i);
173
174 gk20a_mem_wr32(pde, 0, pde_v[0]);
175 gk20a_mem_wr32(pde, 1, pde_v[1]);
176
177 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
178 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
179 gk20a_dbg_fn("done");
180 return 0;
181}
182
183u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
184{
185 return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v());
186}
187
188static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
189 struct gk20a_mm_entry *pte,
190 u32 i, u32 gmmu_pgsz_idx,
191 u64 iova,
192 u32 kind_v, u32 *ctag,
193 bool cacheable, bool unmapped_pte,
194 int rw_flag, bool sparse, u32 flags)
195{
196 bool small_valid, big_valid;
197 u32 pte_addr_small = 0, pte_addr_big = 0;
198 struct gk20a_mm_entry *entry = pte->entries + i;
199 u32 pde_v[4] = {0, 0, 0, 0};
200 u32 *pde;
201 struct gk20a *g = vm->mm->g;
202
203 gk20a_dbg_fn("");
204
205 small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
206 big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
207
208 if (small_valid)
209 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
210 >> gmmu_new_dual_pde_address_shift_v();
211
212 if (big_valid)
213 pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
214 >> gmmu_new_dual_pde_address_big_shift_v();
215
216 if (small_valid) {
217 pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
218 pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f();
219 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
220 }
221
222 if (big_valid) {
223 pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
224 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
225 pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f();
226 }
227
228 pde = pde0_from_index(pte, i);
229
230 gk20a_mem_wr32(pde, 0, pde_v[0]);
231 gk20a_mem_wr32(pde, 1, pde_v[1]);
232 gk20a_mem_wr32(pde, 2, pde_v[2]);
233 gk20a_mem_wr32(pde, 3, pde_v[3]);
234
235 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
236 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
237 gk20a_dbg_fn("done");
238 return 0;
239}
240
241static int update_gmmu_pte_locked(struct vm_gk20a *vm,
242 struct gk20a_mm_entry *pte,
243 u32 i, u32 gmmu_pgsz_idx,
244 u64 iova,
245 u32 kind_v, u32 *ctag,
246 bool cacheable, bool unmapped_pte,
247 int rw_flag, bool sparse, u32 flags)
248{
249 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
250 u32 pte_w[2] = {0, 0}; /* invalid pte */
251
252 gk20a_dbg_fn("");
253
254 if (iova) {
255 pte_w[0] = gmmu_new_pte_valid_true_f() |
256 gmmu_new_pte_address_sys_f(iova
257 >> gmmu_new_pte_address_shift_v());
258 pte_w[1] = gmmu_new_pte_aperture_video_memory_f() |
259 gmmu_new_pte_kind_f(kind_v) |
260 gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
261
262 if (rw_flag == gk20a_mem_flag_read_only)
263 pte_w[0] |= gmmu_new_pte_read_only_true_f();
264 if (!cacheable)
265 pte_w[1] |= gmmu_new_pte_vol_true_f();
266
267 gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
268 " ctag=%d vol=%d"
269 " [0x%08x, 0x%08x]",
270 i, iova,
271 kind_v, *ctag, !cacheable,
272 pte_w[1], pte_w[0]);
273
274 if (*ctag)
275 *ctag += page_size;
276 } else if (sparse) {
277 pte_w[0] = gmmu_new_pte_valid_false_f();
278 pte_w[1] |= gmmu_new_pte_vol_true_f();
279 } else {
280 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
281 }
282
283 gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]);
284 gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]);
285
286 gk20a_dbg_fn("done");
287 return 0;
288}
289
290const struct gk20a_mmu_level gp10b_mm_levels[] = {
291 {.hi_bit = {48, 48},
292 .lo_bit = {47, 47},
293 .update_entry = update_gmmu_pde3_locked,
294 .entry_size = 8},
295 {.hi_bit = {46, 46},
296 .lo_bit = {38, 38},
297 .update_entry = update_gmmu_pde3_locked,
298 .entry_size = 8},
299 {.hi_bit = {37, 37},
300 .lo_bit = {29, 29},
301 .update_entry = update_gmmu_pde3_locked,
302 .entry_size = 8},
303 {.hi_bit = {28, 28},
304 .lo_bit = {21, 21},
305 .update_entry = update_gmmu_pde0_locked,
306 .entry_size = 16},
307 {.hi_bit = {20, 20},
308 .lo_bit = {12, 16},
309 .update_entry = update_gmmu_pte_locked,
310 .entry_size = 8},
311 {.update_entry = NULL}
312};
313
314const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size)
315{
316 return gp10b_mm_levels;
317}
318
319static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
320{
321 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
322 u32 pdb_addr_hi = u64_hi32(pdb_addr);
323
324 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
325 ram_in_page_dir_base_target_vid_mem_f() |
326 ram_in_page_dir_base_vol_true_f() |
327 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
328 1 << 10);
329
330 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
331 ram_in_page_dir_base_hi_f(pdb_addr_hi));
332}
333
141void gp10b_init_mm(struct gpu_ops *gops) 334void gp10b_init_mm(struct gpu_ops *gops)
142{ 335{
143 gm20b_init_mm(gops); 336 gm20b_init_mm(gops);
@@ -146,4 +339,8 @@ void gp10b_init_mm(struct gpu_ops *gops)
146 gops->mm.init_bar2_vm = gb10b_init_bar2_vm; 339 gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
147 gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup; 340 gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
148 gops->mm.get_iova_addr = gp10b_mm_iova_addr; 341 gops->mm.get_iova_addr = gp10b_mm_iova_addr;
342 if (tegra_platform_is_linsim()) {
343 gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
344 gops->mm.init_pdb = gp10b_mm_init_pdb;
345 }
149} 346}