summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
diff options
context:
space:
mode:
authorKevin Huang <kevinh@nvidia.com>2014-04-23 18:39:00 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:59 -0400
commit18939e4b4a680a8e01bbec864b267e193fb3714d (patch)
tree5fa061e5f27978084350b79b56a3035ddbf12ece /drivers/gpu/nvgpu/gm20b/mm_gm20b.c
parentda56b17de0b360cb020a8831ba82c202057c2968 (diff)
gpu: nvgpu: add support to Maxwell sparse texture
Bug 1442531 Change-Id: Ie927cca905b2ea9811417e7a1fdfdf9d48f015e2 Signed-off-by: Kevin Huang <kevinh@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/mm_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c218
1 files changed, 218 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
new file mode 100644
index 00000000..67d61569
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -0,0 +1,218 @@
1/*
2 * GM20B MMU
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "gk20a/gk20a.h"
17#include "mm_gm20b.h"
18#include "hw_gmmu_gm20b.h"
19
20static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
21static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
22static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
23 0x1ffffLL };
24static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
25
26static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
27 enum gmmu_pgsz_gk20a pgsz_idx,
28 u64 first_vaddr, u64 last_vaddr)
29{
30 int err;
31 u32 pte_lo, pte_hi;
32 u32 pde_lo, pde_hi;
33 u32 pte_w[2] = {0, 0}; /* invalid pte */
34 u64 addr = 0;
35 u32 pte_cur;
36 void *pte_kv_cur;
37 struct page_table_gk20a *pte;
38
39 gk20a_dbg_fn("");
40
41 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
42 &pde_lo, &pde_hi);
43
44 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
45 pgsz_idx, pde_lo, pde_hi);
46
47 /* Expect ptes of the same pde */
48 BUG_ON(pde_lo != pde_hi);
49
50 pte = vm->pdes.ptes[pgsz_idx] + pde_lo;
51
52 pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx);
53 pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx);
54
55 /* get cpu access to the ptes */
56 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, pte->size);
57 if (err)
58 goto fail;
59
60 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
61 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
62 pte_w[0] = gmmu_pte_valid_false_f();
63 pte_w[1] = gmmu_pte_vol_true_f();
64
65 gk20a_dbg(gpu_dbg_pte,
66 "pte_cur=%d addr=%llx refs=%d"
67 " [0x%08x,0x%08x]",
68 pte_cur, addr,
69 pte->ref_cnt, pte_w[1], pte_w[0]);
70
71 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
72 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
73 }
74
75 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
76
77 smp_mb();
78 vm->tlb_dirty = true;
79 gk20a_dbg_fn("set tlb dirty");
80
81 return 0;
82fail:
83 return err;
84
85}
86
87static void allocate_gmmu_pde_sparse(struct vm_gk20a *vm, u32 i)
88{
89 bool small_valid, big_valid;
90 u64 pte_addr[2] = {0, 0};
91 struct page_table_gk20a *small_pte =
92 vm->pdes.ptes[gmmu_page_size_small] + i;
93 struct page_table_gk20a *big_pte =
94 vm->pdes.ptes[gmmu_page_size_big] + i;
95 u32 pde_v[2] = {0, 0};
96 u32 *pde;
97
98 gk20a_dbg_fn("");
99
100 small_valid = small_pte && small_pte->ref;
101 big_valid = big_pte && big_pte->ref;
102
103 if (small_valid)
104 pte_addr[gmmu_page_size_small] =
105 gk20a_mm_iova_addr(small_pte->sgt->sgl);
106 if (big_valid)
107 pte_addr[gmmu_page_size_big] =
108 gk20a_mm_iova_addr(big_pte->sgt->sgl);
109
110 pde_v[0] = gmmu_pde_size_full_f();
111 pde_v[0] |= gmmu_pde_aperture_big_invalid_f();
112 pde_v[1] |= gmmu_pde_aperture_small_invalid_f() |
113 gmmu_pde_vol_big_true_f();
114
115 pde = pde_from_index(vm, i);
116
117 gk20a_mem_wr32(pde, 0, pde_v[0]);
118 gk20a_mem_wr32(pde, 1, pde_v[1]);
119
120 smp_mb();
121
122 FLUSH_CPU_DCACHE(pde,
123 sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
124 sizeof(u32)*2);
125
126 gk20a_mm_l2_invalidate(vm->mm->g);
127
128 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
129
130 vm->tlb_dirty = true;
131}
132
133static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo,
134 u64 vaddr_hi, u32 pde)
135{
136 u64 pde_vaddr_lo, pde_vaddr_hi;
137
138 gk20a_dbg_fn("");
139
140 pde_vaddr_lo = (u64)pde << vm->mm->pde_stride_shift;
141 pde_vaddr_hi = pde_vaddr_lo |
142 ((0x1UL << (vm->mm->pde_stride_shift)) - 1);
143
144 return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi);
145}
146
147static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
148 u32 num_pages, u32 pgsz_idx)
149{
150 struct mm_gk20a *mm = vm->mm;
151 u32 pgsz = gmmu_page_sizes[pgsz_idx];
152 u32 pde_shift = vm->mm->pde_stride_shift;
153 u64 vaddr_hi;
154 u64 vaddr_pde_start;
155 u32 i;
156 u32 pde_lo, pde_hi;
157 int err;
158
159 gk20a_dbg_fn("");
160
161 vaddr_hi = vaddr + pgsz * num_pages - 1;
162 pde_range_from_vaddr_range(vm,
163 vaddr,
164 vaddr_hi,
165 &pde_lo, &pde_hi);
166
167 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
168 "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d",
169 vaddr, vaddr_hi, pde_lo, pde_hi,
170 vm->mm->pde_stride_shift, pgsz);
171
172 for (i = pde_lo; i <= pde_hi; i++) {
173 /* Mark all ptes as sparse. */
174 err = validate_gmmu_page_table_gk20a_locked(vm, i,
175 pgsz_idx);
176 if (err) {
177 gk20a_err(dev_from_vm(vm),
178 "failed to validate page table %d: %d",
179 i, err);
180 goto fail;
181 }
182
183 if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) {
184 /* entire pde is marked as sparse */
185 vaddr_pde_start = (u64)i << pde_shift;
186 allocate_gmmu_ptes_sparse(vm, pgsz_idx,
187 vaddr_pde_start,
188 PDE_ADDR_END(vaddr_pde_start, pde_shift));
189 } else {
190 /* Check leading and trailing spaces which doesn't fit
191 * into entire pde. */
192 if (pde_lo == pde_hi)
193 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
194 vaddr_hi);
195 else if (i == pde_lo)
196 allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
197 PDE_ADDR_END(vaddr, pde_shift));
198 else
199 allocate_gmmu_ptes_sparse(vm, pgsz_idx,
200 PDE_ADDR_START(vaddr_hi, pde_shift),
201 vaddr_hi);
202 }
203 }
204
205 gk20a_mm_l2_flush(mm->g, true);
206
207 return 0;
208
209fail:
210 WARN_ON(1);
211
212 return err;
213}
214
215void gm20b_init_mm(struct gpu_ops *gops)
216{
217 gops->mm.set_sparse = gm20b_vm_put_sparse;
218}