diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 218 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.h | 24 |
5 files changed, 250 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/Makefile b/drivers/gpu/nvgpu/gm20b/Makefile index 4c1db0c2..237ce6f9 100644 --- a/drivers/gpu/nvgpu/gm20b/Makefile +++ b/drivers/gpu/nvgpu/gm20b/Makefile | |||
@@ -10,4 +10,5 @@ obj-$(CONFIG_GK20A) = \ | |||
10 | fb_gm20b.o \ | 10 | fb_gm20b.o \ |
11 | fifo_gm20b.o \ | 11 | fifo_gm20b.o \ |
12 | gr_ctx_gm20b.o \ | 12 | gr_ctx_gm20b.o \ |
13 | gm20b_gating_reglist.o | 13 | gm20b_gating_reglist.o \ |
14 | mm_gm20b.o | ||
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 7e390c37..0fc5fe99 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "gm20b_gating_reglist.h" | 24 | #include "gm20b_gating_reglist.h" |
25 | #include "fifo_gm20b.h" | 25 | #include "fifo_gm20b.h" |
26 | #include "gr_ctx_gm20b.h" | 26 | #include "gr_ctx_gm20b.h" |
27 | #include "mm_gm20b.h" | ||
27 | 28 | ||
28 | struct gpu_ops gm20b_ops = { | 29 | struct gpu_ops gm20b_ops = { |
29 | .clock_gating = { | 30 | .clock_gating = { |
@@ -49,6 +50,7 @@ int gm20b_init_hal(struct gpu_ops *gops) | |||
49 | gm20b_init_fb(gops); | 50 | gm20b_init_fb(gops); |
50 | gm20b_init_fifo(gops); | 51 | gm20b_init_fifo(gops); |
51 | gm20b_init_gr_ctx(gops); | 52 | gm20b_init_gr_ctx(gops); |
53 | gm20b_init_mm(gops); | ||
52 | gops->name = "gm20b"; | 54 | gops->name = "gm20b"; |
53 | 55 | ||
54 | return 0; | 56 | return 0; |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h index e10ce9c8..250bb565 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h | |||
@@ -138,6 +138,10 @@ static inline u32 gmmu_pte_valid_w(void) | |||
138 | { | 138 | { |
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
141 | static inline u32 gmmu_pte_valid_false_f(void) | ||
142 | { | ||
143 | return 0x0; | ||
144 | } | ||
141 | static inline u32 gmmu_pte_valid_true_f(void) | 145 | static inline u32 gmmu_pte_valid_true_f(void) |
142 | { | 146 | { |
143 | return 0x1; | 147 | return 0x1; |
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c new file mode 100644 index 00000000..67d61569 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -0,0 +1,218 @@ | |||
1 | /* | ||
2 | * GM20B MMU | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include "gk20a/gk20a.h" | ||
17 | #include "mm_gm20b.h" | ||
18 | #include "hw_gmmu_gm20b.h" | ||
19 | |||
20 | static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
21 | static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; | ||
22 | static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL, | ||
23 | 0x1ffffLL }; | ||
24 | static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | ||
25 | |||
26 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | ||
27 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
28 | u64 first_vaddr, u64 last_vaddr) | ||
29 | { | ||
30 | int err; | ||
31 | u32 pte_lo, pte_hi; | ||
32 | u32 pde_lo, pde_hi; | ||
33 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
34 | u64 addr = 0; | ||
35 | u32 pte_cur; | ||
36 | void *pte_kv_cur; | ||
37 | struct page_table_gk20a *pte; | ||
38 | |||
39 | gk20a_dbg_fn(""); | ||
40 | |||
41 | pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, | ||
42 | &pde_lo, &pde_hi); | ||
43 | |||
44 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | ||
45 | pgsz_idx, pde_lo, pde_hi); | ||
46 | |||
47 | /* Expect ptes of the same pde */ | ||
48 | BUG_ON(pde_lo != pde_hi); | ||
49 | |||
50 | pte = vm->pdes.ptes[pgsz_idx] + pde_lo; | ||
51 | |||
52 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); | ||
53 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); | ||
54 | |||
55 | /* get cpu access to the ptes */ | ||
56 | err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, pte->size); | ||
57 | if (err) | ||
58 | goto fail; | ||
59 | |||
60 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | ||
61 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | ||
62 | pte_w[0] = gmmu_pte_valid_false_f(); | ||
63 | pte_w[1] = gmmu_pte_vol_true_f(); | ||
64 | |||
65 | gk20a_dbg(gpu_dbg_pte, | ||
66 | "pte_cur=%d addr=%llx refs=%d" | ||
67 | " [0x%08x,0x%08x]", | ||
68 | pte_cur, addr, | ||
69 | pte->ref_cnt, pte_w[1], pte_w[0]); | ||
70 | |||
71 | gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); | ||
72 | gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); | ||
73 | } | ||
74 | |||
75 | unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); | ||
76 | |||
77 | smp_mb(); | ||
78 | vm->tlb_dirty = true; | ||
79 | gk20a_dbg_fn("set tlb dirty"); | ||
80 | |||
81 | return 0; | ||
82 | fail: | ||
83 | return err; | ||
84 | |||
85 | } | ||
86 | |||
87 | static void allocate_gmmu_pde_sparse(struct vm_gk20a *vm, u32 i) | ||
88 | { | ||
89 | bool small_valid, big_valid; | ||
90 | u64 pte_addr[2] = {0, 0}; | ||
91 | struct page_table_gk20a *small_pte = | ||
92 | vm->pdes.ptes[gmmu_page_size_small] + i; | ||
93 | struct page_table_gk20a *big_pte = | ||
94 | vm->pdes.ptes[gmmu_page_size_big] + i; | ||
95 | u32 pde_v[2] = {0, 0}; | ||
96 | u32 *pde; | ||
97 | |||
98 | gk20a_dbg_fn(""); | ||
99 | |||
100 | small_valid = small_pte && small_pte->ref; | ||
101 | big_valid = big_pte && big_pte->ref; | ||
102 | |||
103 | if (small_valid) | ||
104 | pte_addr[gmmu_page_size_small] = | ||
105 | gk20a_mm_iova_addr(small_pte->sgt->sgl); | ||
106 | if (big_valid) | ||
107 | pte_addr[gmmu_page_size_big] = | ||
108 | gk20a_mm_iova_addr(big_pte->sgt->sgl); | ||
109 | |||
110 | pde_v[0] = gmmu_pde_size_full_f(); | ||
111 | pde_v[0] |= gmmu_pde_aperture_big_invalid_f(); | ||
112 | pde_v[1] |= gmmu_pde_aperture_small_invalid_f() | | ||
113 | gmmu_pde_vol_big_true_f(); | ||
114 | |||
115 | pde = pde_from_index(vm, i); | ||
116 | |||
117 | gk20a_mem_wr32(pde, 0, pde_v[0]); | ||
118 | gk20a_mem_wr32(pde, 1, pde_v[1]); | ||
119 | |||
120 | smp_mb(); | ||
121 | |||
122 | FLUSH_CPU_DCACHE(pde, | ||
123 | sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()), | ||
124 | sizeof(u32)*2); | ||
125 | |||
126 | gk20a_mm_l2_invalidate(vm->mm->g); | ||
127 | |||
128 | gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); | ||
129 | |||
130 | vm->tlb_dirty = true; | ||
131 | } | ||
132 | |||
133 | static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, | ||
134 | u64 vaddr_hi, u32 pde) | ||
135 | { | ||
136 | u64 pde_vaddr_lo, pde_vaddr_hi; | ||
137 | |||
138 | gk20a_dbg_fn(""); | ||
139 | |||
140 | pde_vaddr_lo = (u64)pde << vm->mm->pde_stride_shift; | ||
141 | pde_vaddr_hi = pde_vaddr_lo | | ||
142 | ((0x1UL << (vm->mm->pde_stride_shift)) - 1); | ||
143 | |||
144 | return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); | ||
145 | } | ||
146 | |||
147 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
148 | u32 num_pages, u32 pgsz_idx) | ||
149 | { | ||
150 | struct mm_gk20a *mm = vm->mm; | ||
151 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | ||
152 | u32 pde_shift = vm->mm->pde_stride_shift; | ||
153 | u64 vaddr_hi; | ||
154 | u64 vaddr_pde_start; | ||
155 | u32 i; | ||
156 | u32 pde_lo, pde_hi; | ||
157 | int err; | ||
158 | |||
159 | gk20a_dbg_fn(""); | ||
160 | |||
161 | vaddr_hi = vaddr + pgsz * num_pages - 1; | ||
162 | pde_range_from_vaddr_range(vm, | ||
163 | vaddr, | ||
164 | vaddr_hi, | ||
165 | &pde_lo, &pde_hi); | ||
166 | |||
167 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | ||
168 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | ||
169 | vaddr, vaddr_hi, pde_lo, pde_hi, | ||
170 | vm->mm->pde_stride_shift, pgsz); | ||
171 | |||
172 | for (i = pde_lo; i <= pde_hi; i++) { | ||
173 | /* Mark all ptes as sparse. */ | ||
174 | err = validate_gmmu_page_table_gk20a_locked(vm, i, | ||
175 | pgsz_idx); | ||
176 | if (err) { | ||
177 | gk20a_err(dev_from_vm(vm), | ||
178 | "failed to validate page table %d: %d", | ||
179 | i, err); | ||
180 | goto fail; | ||
181 | } | ||
182 | |||
183 | if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) { | ||
184 | /* entire pde is marked as sparse */ | ||
185 | vaddr_pde_start = (u64)i << pde_shift; | ||
186 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | ||
187 | vaddr_pde_start, | ||
188 | PDE_ADDR_END(vaddr_pde_start, pde_shift)); | ||
189 | } else { | ||
190 | /* Check leading and trailing spaces which doesn't fit | ||
191 | * into entire pde. */ | ||
192 | if (pde_lo == pde_hi) | ||
193 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | ||
194 | vaddr_hi); | ||
195 | else if (i == pde_lo) | ||
196 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | ||
197 | PDE_ADDR_END(vaddr, pde_shift)); | ||
198 | else | ||
199 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | ||
200 | PDE_ADDR_START(vaddr_hi, pde_shift), | ||
201 | vaddr_hi); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | gk20a_mm_l2_flush(mm->g, true); | ||
206 | |||
207 | return 0; | ||
208 | |||
209 | fail: | ||
210 | WARN_ON(1); | ||
211 | |||
212 | return err; | ||
213 | } | ||
214 | |||
215 | void gm20b_init_mm(struct gpu_ops *gops) | ||
216 | { | ||
217 | gops->mm.set_sparse = gm20b_vm_put_sparse; | ||
218 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h new file mode 100644 index 00000000..0f94d2bf --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * GM20B GMMU | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _NVHOST_GM20B_MM | ||
17 | #define _NVHOST_GM20B_MM | ||
18 | struct gk20a; | ||
19 | |||
20 | #define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1)) | ||
21 | #define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1)) | ||
22 | |||
23 | void gm20b_init_mm(struct gpu_ops *gops); | ||
24 | #endif | ||