aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlexandre Courbot <acourbot@nvidia.com>2015-02-20 04:23:04 -0500
committerBen Skeggs <bskeggs@redhat.com>2015-04-14 03:00:45 -0400
commita7f6da6e758cd99fcae918b63549273893983189 (patch)
tree2aa072dd29a1f6272749eff16f462c35a9e6cb37 /drivers/gpu
parent58fd9375c2c5344e8ab6ef9971635bc59cd39658 (diff)
drm/nouveau/instmem/gk20a: add IOMMU support
Let GK20A's instmem take advantage of the IOMMU if it is present. Having an IOMMU means that instmem is no longer allocated using the DMA API, but instead obtained through page_alloc and made contiguous to the GPU by IOMMU mappings. Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c291
1 files changed, 252 insertions, 39 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 175ac187d382..fcba72eb74a3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -20,12 +20,32 @@
20 * DEALINGS IN THE SOFTWARE. 20 * DEALINGS IN THE SOFTWARE.
21 */ 21 */
22 22
23/*
24 * GK20A does not have dedicated video memory, and to accurately represent this
25 * fact Nouveau will not create a RAM device for it. Therefore its instmem
26 * implementation must be done directly on top of system memory, while providing
27 * coherent read and write operations.
28 *
29 * Instmem can be allocated through two means:
30 * 1) If an IOMMU mapping has been probed, the IOMMU API is used to make memory
31 * pages contiguous to the GPU. This is the preferred way.
32 * 2) If no IOMMU mapping is probed, the DMA API is used to allocate physically
33 * contiguous memory.
34 *
35 * In both cases CPU read and writes are performed using PRAMIN (i.e. using the
36 * GPU path) to ensure these operations are coherent for the GPU. This allows us
37 * to use more "relaxed" allocation parameters when using the DMA API, since we
38 * never need a kernel mapping.
39 */
40
23#include <subdev/fb.h> 41#include <subdev/fb.h>
24#include <core/mm.h> 42#include <core/mm.h>
25#include <core/device.h> 43#include <core/device.h>
26 44
27#ifdef __KERNEL__ 45#ifdef __KERNEL__
28#include <linux/dma-attrs.h> 46#include <linux/dma-attrs.h>
47#include <linux/iommu.h>
48#include <nouveau_platform.h>
29#endif 49#endif
30 50
31#include "priv.h" 51#include "priv.h"
@@ -36,18 +56,53 @@ struct gk20a_instobj_priv {
36 struct nvkm_mem *mem; 56 struct nvkm_mem *mem;
37 /* Pointed by mem */ 57 /* Pointed by mem */
38 struct nvkm_mem _mem; 58 struct nvkm_mem _mem;
59};
60
61/*
62 * Used for objects allocated using the DMA API
63 */
64struct gk20a_instobj_dma {
65 struct gk20a_instobj_priv base;
66
39 void *cpuaddr; 67 void *cpuaddr;
40 dma_addr_t handle; 68 dma_addr_t handle;
41 struct nvkm_mm_node r; 69 struct nvkm_mm_node r;
42}; 70};
43 71
72/*
73 * Used for objects flattened using the IOMMU API
74 */
75struct gk20a_instobj_iommu {
76 struct gk20a_instobj_priv base;
77
78 /* array of base.mem->size pages */
79 struct page *pages[];
80};
81
44struct gk20a_instmem_priv { 82struct gk20a_instmem_priv {
45 struct nvkm_instmem base; 83 struct nvkm_instmem base;
46 spinlock_t lock; 84 spinlock_t lock;
47 u64 addr; 85 u64 addr;
86
87 /* Only used if IOMMU if present */
88 struct mutex *mm_mutex;
89 struct nvkm_mm *mm;
90 struct iommu_domain *domain;
91 unsigned long iommu_pgshift;
92
93 /* Only used by DMA API */
48 struct dma_attrs attrs; 94 struct dma_attrs attrs;
49}; 95};
50 96
97/*
98 * Use PRAMIN to read/write data and avoid coherency issues.
99 * PRAMIN uses the GPU path and ensures data will always be coherent.
100 *
101 * A dynamic mapping based solution would be desirable in the future, but
102 * the issue remains of how to maintain coherency efficiently. On ARM it is
103 * not easy (if possible at all?) to create uncached temporary mappings.
104 */
105
51static u32 106static u32
52gk20a_instobj_rd32(struct nvkm_object *object, u64 offset) 107gk20a_instobj_rd32(struct nvkm_object *object, u64 offset)
53{ 108{
@@ -87,50 +142,79 @@ gk20a_instobj_wr32(struct nvkm_object *object, u64 offset, u32 data)
87} 142}
88 143
89static void 144static void
90gk20a_instobj_dtor(struct nvkm_object *object) 145gk20a_instobj_dtor_dma(struct gk20a_instobj_priv *_node)
91{ 146{
92 struct gk20a_instobj_priv *node = (void *)object; 147 struct gk20a_instobj_dma *node = (void *)_node;
93 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(node); 148 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(node);
94 struct device *dev = nv_device_base(nv_device(priv)); 149 struct device *dev = nv_device_base(nv_device(priv));
95 150
96 if (unlikely(!node->handle)) 151 if (unlikely(!node->handle))
97 return; 152 return;
98 153
99 dma_free_attrs(dev, node->mem->size << PAGE_SHIFT, node->cpuaddr, 154 dma_free_attrs(dev, _node->mem->size << PAGE_SHIFT, node->cpuaddr,
100 node->handle, &priv->attrs); 155 node->handle, &priv->attrs);
156}
157
158static void
159gk20a_instobj_dtor_iommu(struct gk20a_instobj_priv *_node)
160{
161 struct gk20a_instobj_iommu *node = (void *)_node;
162 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(node);
163 struct nvkm_mm_node *r;
164 int i;
165
166 if (unlikely(list_empty(&_node->mem->regions)))
167 return;
168
169 r = list_first_entry(&_node->mem->regions, struct nvkm_mm_node,
170 rl_entry);
171
172 /* clear bit 34 to unmap pages */
173 r->offset &= ~BIT(34 - priv->iommu_pgshift);
174
175 /* Unmap pages from GPU address space and free them */
176 for (i = 0; i < _node->mem->size; i++) {
177 iommu_unmap(priv->domain,
178 (r->offset + i) << priv->iommu_pgshift, PAGE_SIZE);
179 __free_page(node->pages[i]);
180 }
181
182 /* Release area from GPU address space */
183 mutex_lock(priv->mm_mutex);
184 nvkm_mm_free(priv->mm, &r);
185 mutex_unlock(priv->mm_mutex);
186}
187
188static void
189gk20a_instobj_dtor(struct nvkm_object *object)
190{
191 struct gk20a_instobj_priv *node = (void *)object;
192 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(node);
193
194 if (priv->domain)
195 gk20a_instobj_dtor_iommu(node);
196 else
197 gk20a_instobj_dtor_dma(node);
101 198
102 nvkm_instobj_destroy(&node->base); 199 nvkm_instobj_destroy(&node->base);
103} 200}
104 201
105static int 202static int
106gk20a_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, 203gk20a_instobj_ctor_dma(struct nvkm_object *parent, struct nvkm_object *engine,
107 struct nvkm_oclass *oclass, void *data, u32 _size, 204 struct nvkm_oclass *oclass, u32 npages, u32 align,
108 struct nvkm_object **pobject) 205 struct gk20a_instobj_priv **_node)
109{ 206{
110 struct nvkm_instobj_args *args = data; 207 struct gk20a_instobj_dma *node;
111 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(parent); 208 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(parent);
112 struct device *dev = nv_device_base(nv_device(priv)); 209 struct device *dev = nv_device_base(nv_device(parent));
113 struct gk20a_instobj_priv *node;
114 u32 size, align;
115 u32 npages;
116 int ret; 210 int ret;
117 211
118 nv_debug(parent, "%s: size: %x align: %x\n", __func__,
119 args->size, args->align);
120
121 size = max((args->size + 4095) & ~4095, (u32)4096);
122 align = max((args->align + 4095) & ~4095, (u32)4096);
123
124 npages = size >> PAGE_SHIFT;
125
126 ret = nvkm_instobj_create_(parent, engine, oclass, sizeof(*node), 212 ret = nvkm_instobj_create_(parent, engine, oclass, sizeof(*node),
127 (void **)&node); 213 (void **)&node);
128 *pobject = nv_object(node); 214 *_node = &node->base;
129 if (ret) 215 if (ret)
130 return ret; 216 return ret;
131 217
132 node->mem = &node->_mem;
133
134 node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, 218 node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
135 &node->handle, GFP_KERNEL, 219 &node->handle, GFP_KERNEL,
136 &priv->attrs); 220 &priv->attrs);
@@ -144,16 +228,132 @@ gk20a_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
144 nv_warn(priv, "memory not aligned as requested: %pad (0x%x)\n", 228 nv_warn(priv, "memory not aligned as requested: %pad (0x%x)\n",
145 &node->handle, align); 229 &node->handle, align);
146 230
147 node->mem->offset = node->handle; 231 /* present memory for being mapped using small pages */
232 node->r.type = 12;
233 node->r.offset = node->handle >> 12;
234 node->r.length = (npages << PAGE_SHIFT) >> 12;
235
236 node->base._mem.offset = node->handle;
237
238 INIT_LIST_HEAD(&node->base._mem.regions);
239 list_add_tail(&node->r.rl_entry, &node->base._mem.regions);
240
241 return 0;
242}
243
244static int
245gk20a_instobj_ctor_iommu(struct nvkm_object *parent, struct nvkm_object *engine,
246 struct nvkm_oclass *oclass, u32 npages, u32 align,
247 struct gk20a_instobj_priv **_node)
248{
249 struct gk20a_instobj_iommu *node;
250 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(parent);
251 struct nvkm_mm_node *r;
252 int ret;
253 int i;
254
255 ret = nvkm_instobj_create_(parent, engine, oclass,
256 sizeof(*node) + sizeof(node->pages[0]) * npages,
257 (void **)&node);
258 *_node = &node->base;
259 if (ret)
260 return ret;
261
262 /* Allocate backing memory */
263 for (i = 0; i < npages; i++) {
264 struct page *p = alloc_page(GFP_KERNEL);
265
266 if (p == NULL) {
267 ret = -ENOMEM;
268 goto free_pages;
269 }
270 node->pages[i] = p;
271 }
272
273 mutex_lock(priv->mm_mutex);
274 /* Reserve area from GPU address space */
275 ret = nvkm_mm_head(priv->mm, 0, 1, npages, npages,
276 align >> priv->iommu_pgshift, &r);
277 mutex_unlock(priv->mm_mutex);
278 if (ret) {
279 nv_error(priv, "virtual space is full!\n");
280 goto free_pages;
281 }
282
283 /* Map into GPU address space */
284 for (i = 0; i < npages; i++) {
285 struct page *p = node->pages[i];
286 u32 offset = (r->offset + i) << priv->iommu_pgshift;
287
288 ret = iommu_map(priv->domain, offset, page_to_phys(p),
289 PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
290 if (ret < 0) {
291 nv_error(priv, "IOMMU mapping failure: %d\n", ret);
292
293 while (i-- > 0) {
294 offset -= PAGE_SIZE;
295 iommu_unmap(priv->domain, offset, PAGE_SIZE);
296 }
297 goto release_area;
298 }
299 }
300
301 /* Bit 34 tells that an address is to be resolved through the IOMMU */
302 r->offset |= BIT(34 - priv->iommu_pgshift);
303
304 node->base._mem.offset = ((u64)r->offset) << priv->iommu_pgshift;
305
306 INIT_LIST_HEAD(&node->base._mem.regions);
307 list_add_tail(&r->rl_entry, &node->base._mem.regions);
308
309 return 0;
310
311release_area:
312 mutex_lock(priv->mm_mutex);
313 nvkm_mm_free(priv->mm, &r);
314 mutex_unlock(priv->mm_mutex);
315
316free_pages:
317 for (i = 0; i < npages && node->pages[i] != NULL; i++)
318 __free_page(node->pages[i]);
319
320 return ret;
321}
322
323static int
324gk20a_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
325 struct nvkm_oclass *oclass, void *data, u32 _size,
326 struct nvkm_object **pobject)
327{
328 struct nvkm_instobj_args *args = data;
329 struct gk20a_instmem_priv *priv = (void *)nvkm_instmem(parent);
330 struct gk20a_instobj_priv *node;
331 u32 size, align;
332 int ret;
333
334 nv_debug(parent, "%s (%s): size: %x align: %x\n", __func__,
335 priv->domain ? "IOMMU" : "DMA", args->size, args->align);
336
337 /* Round size and align to page bounds */
338 size = max(roundup(args->size, PAGE_SIZE), PAGE_SIZE);
339 align = max(roundup(args->align, PAGE_SIZE), PAGE_SIZE);
340
341 if (priv->domain)
342 ret = gk20a_instobj_ctor_iommu(parent, engine, oclass,
343 size >> PAGE_SHIFT, align, &node);
344 else
345 ret = gk20a_instobj_ctor_dma(parent, engine, oclass,
346 size >> PAGE_SHIFT, align, &node);
347 *pobject = nv_object(node);
348 if (ret)
349 return ret;
350
351 node->mem = &node->_mem;
352
353 /* present memory for being mapped using small pages */
148 node->mem->size = size >> 12; 354 node->mem->size = size >> 12;
149 node->mem->memtype = 0; 355 node->mem->memtype = 0;
150 node->mem->page_shift = 12; 356 node->mem->page_shift = 12;
151 INIT_LIST_HEAD(&node->mem->regions);
152
153 node->r.type = 12;
154 node->r.offset = node->handle >> 12;
155 node->r.length = npages;
156 list_add_tail(&node->r.rl_entry, &node->mem->regions);
157 357
158 node->base.addr = node->mem->offset; 358 node->base.addr = node->mem->offset;
159 node->base.size = size; 359 node->base.size = size;
@@ -192,6 +392,7 @@ gk20a_instmem_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
192 struct nvkm_object **pobject) 392 struct nvkm_object **pobject)
193{ 393{
194 struct gk20a_instmem_priv *priv; 394 struct gk20a_instmem_priv *priv;
395 struct nouveau_platform_device *plat;
195 int ret; 396 int ret;
196 397
197 ret = nvkm_instmem_create(parent, engine, oclass, &priv); 398 ret = nvkm_instmem_create(parent, engine, oclass, &priv);
@@ -201,15 +402,27 @@ gk20a_instmem_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
201 402
202 spin_lock_init(&priv->lock); 403 spin_lock_init(&priv->lock);
203 404
204 init_dma_attrs(&priv->attrs); 405 plat = nv_device_to_platform(nv_device(parent));
205 /* 406 if (plat->gpu->iommu.domain) {
206 * We will access instmem through PRAMIN and thus do not need a 407 priv->domain = plat->gpu->iommu.domain;
207 * consistent CPU pointer or kernel mapping 408 priv->mm = plat->gpu->iommu.mm;
208 */ 409 priv->iommu_pgshift = plat->gpu->iommu.pgshift;
209 dma_set_attr(DMA_ATTR_NON_CONSISTENT, &priv->attrs); 410 priv->mm_mutex = &plat->gpu->iommu.mutex;
210 dma_set_attr(DMA_ATTR_WEAK_ORDERING, &priv->attrs); 411
211 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &priv->attrs); 412 nv_info(priv, "using IOMMU\n");
212 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &priv->attrs); 413 } else {
414 init_dma_attrs(&priv->attrs);
415 /*
416 * We will access instmem through PRAMIN and thus do not need a
417 * consistent CPU pointer or kernel mapping
418 */
419 dma_set_attr(DMA_ATTR_NON_CONSISTENT, &priv->attrs);
420 dma_set_attr(DMA_ATTR_WEAK_ORDERING, &priv->attrs);
421 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &priv->attrs);
422 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &priv->attrs);
423
424 nv_info(priv, "using DMA API\n");
425 }
213 426
214 return 0; 427 return 0;
215} 428}