aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/nouveau
diff options
context:
space:
mode:
authorAlexandre Courbot <acourbot@nvidia.com>2014-10-27 05:49:17 -0400
committerBen Skeggs <bskeggs@redhat.com>2014-12-02 00:43:59 -0500
commitc3a0c771e582a74a344fa9dd805662222f896181 (patch)
tree4374fe4b75818b907ab3c07bcdc9ec9e0e6325ed /drivers/gpu/drm/nouveau
parentc5d7ddf70a8445f711f5ab9ab92bd4f59f993262 (diff)
drm/nouveau: implement explicitly coherent BOs
Allow nouveau_bo_new() to recognize the TTM_PL_FLAG_UNCACHED flag, which means that we want the allocated BO to be perfectly coherent between the CPU and GPU. This is useful on non-coherent architectures for which we do not want to manually sync some rarely-accessed buffers: typically, fences and pushbuffers. A TTM BO allocated with the TTM_PL_FLAG_UNCACHED on a non-coherent architecture will be populated using the DMA API, and accesses to it performed using the coherent mapping performed by dma_alloc_coherent(). Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/nouveau')
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c80
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.h1
2 files changed, 73 insertions, 8 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 9a8adeec80cd..ed9a6946f6d6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -214,6 +214,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
214 nvbo->tile_flags = tile_flags; 214 nvbo->tile_flags = tile_flags;
215 nvbo->bo.bdev = &drm->ttm.bdev; 215 nvbo->bo.bdev = &drm->ttm.bdev;
216 216
217 if (!nv_device_is_cpu_coherent(nvkm_device(&drm->device)))
218 nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
219
217 nvbo->page_shift = 12; 220 nvbo->page_shift = 12;
218 if (drm->client.vm) { 221 if (drm->client.vm) {
219 if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) 222 if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024)
@@ -291,8 +294,9 @@ void
291nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy) 294nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
292{ 295{
293 struct ttm_placement *pl = &nvbo->placement; 296 struct ttm_placement *pl = &nvbo->placement;
294 uint32_t flags = TTM_PL_MASK_CACHING | 297 uint32_t flags = (nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED :
295 (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0); 298 TTM_PL_MASK_CACHING) |
299 (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
296 300
297 pl->placement = nvbo->placements; 301 pl->placement = nvbo->placements;
298 set_placement_list(nvbo->placements, &pl->num_placement, 302 set_placement_list(nvbo->placements, &pl->num_placement,
@@ -396,7 +400,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
396 if (ret) 400 if (ret)
397 return ret; 401 return ret;
398 402
399 ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap); 403 /*
404 * TTM buffers allocated using the DMA API already have a mapping, let's
405 * use it instead.
406 */
407 if (!nvbo->force_coherent)
408 ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
409 &nvbo->kmap);
410
400 ttm_bo_unreserve(&nvbo->bo); 411 ttm_bo_unreserve(&nvbo->bo);
401 return ret; 412 return ret;
402} 413}
@@ -404,7 +415,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
404void 415void
405nouveau_bo_unmap(struct nouveau_bo *nvbo) 416nouveau_bo_unmap(struct nouveau_bo *nvbo)
406{ 417{
407 if (nvbo) 418 if (!nvbo)
419 return;
420
421 /*
422 * TTM buffers allocated using the DMA API already had a coherent
423 * mapping which we used, no need to unmap.
424 */
425 if (!nvbo->force_coherent)
408 ttm_bo_kunmap(&nvbo->kmap); 426 ttm_bo_kunmap(&nvbo->kmap);
409} 427}
410 428
@@ -422,12 +440,36 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
422 return 0; 440 return 0;
423} 441}
424 442
443static inline void *
444_nouveau_bo_mem_index(struct nouveau_bo *nvbo, unsigned index, void *mem, u8 sz)
445{
446 struct ttm_dma_tt *dma_tt;
447 u8 *m = mem;
448
449 index *= sz;
450
451 if (m) {
452 /* kmap'd address, return the corresponding offset */
453 m += index;
454 } else {
455 /* DMA-API mapping, lookup the right address */
456 dma_tt = (struct ttm_dma_tt *)nvbo->bo.ttm;
457 m = dma_tt->cpu_address[index / PAGE_SIZE];
458 m += index % PAGE_SIZE;
459 }
460
461 return m;
462}
463#define nouveau_bo_mem_index(o, i, m) _nouveau_bo_mem_index(o, i, m, sizeof(*m))
464
425u16 465u16
426nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index) 466nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index)
427{ 467{
428 bool is_iomem; 468 bool is_iomem;
429 u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); 469 u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
430 mem = &mem[index]; 470
471 mem = nouveau_bo_mem_index(nvbo, index, mem);
472
431 if (is_iomem) 473 if (is_iomem)
432 return ioread16_native((void __force __iomem *)mem); 474 return ioread16_native((void __force __iomem *)mem);
433 else 475 else
@@ -439,7 +481,9 @@ nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val)
439{ 481{
440 bool is_iomem; 482 bool is_iomem;
441 u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); 483 u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
442 mem = &mem[index]; 484
485 mem = nouveau_bo_mem_index(nvbo, index, mem);
486
443 if (is_iomem) 487 if (is_iomem)
444 iowrite16_native(val, (void __force __iomem *)mem); 488 iowrite16_native(val, (void __force __iomem *)mem);
445 else 489 else
@@ -451,7 +495,9 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index)
451{ 495{
452 bool is_iomem; 496 bool is_iomem;
453 u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); 497 u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
454 mem = &mem[index]; 498
499 mem = nouveau_bo_mem_index(nvbo, index, mem);
500
455 if (is_iomem) 501 if (is_iomem)
456 return ioread32_native((void __force __iomem *)mem); 502 return ioread32_native((void __force __iomem *)mem);
457 else 503 else
@@ -463,7 +509,9 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
463{ 509{
464 bool is_iomem; 510 bool is_iomem;
465 u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); 511 u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
466 mem = &mem[index]; 512
513 mem = nouveau_bo_mem_index(nvbo, index, mem);
514
467 if (is_iomem) 515 if (is_iomem)
468 iowrite32_native(val, (void __force __iomem *)mem); 516 iowrite32_native(val, (void __force __iomem *)mem);
469 else 517 else
@@ -1383,6 +1431,14 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
1383 dev = drm->dev; 1431 dev = drm->dev;
1384 pdev = nv_device_base(device); 1432 pdev = nv_device_base(device);
1385 1433
1434 /*
1435 * Objects matching this condition have been marked as force_coherent,
1436 * so use the DMA API for them.
1437 */
1438 if (!nv_device_is_cpu_coherent(device) &&
1439 ttm->caching_state == tt_uncached)
1440 return ttm_dma_populate(ttm_dma, dev->dev);
1441
1386#if __OS_HAS_AGP 1442#if __OS_HAS_AGP
1387 if (drm->agp.stat == ENABLED) { 1443 if (drm->agp.stat == ENABLED) {
1388 return ttm_agp_tt_populate(ttm); 1444 return ttm_agp_tt_populate(ttm);
@@ -1440,6 +1496,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
1440 dev = drm->dev; 1496 dev = drm->dev;
1441 pdev = nv_device_base(device); 1497 pdev = nv_device_base(device);
1442 1498
1499 /*
1500 * Objects matching this condition have been marked as force_coherent,
1501 * so use the DMA API for them.
1502 */
1503 if (!nv_device_is_cpu_coherent(device) &&
1504 ttm->caching_state == tt_uncached)
1505 ttm_dma_unpopulate(ttm_dma, dev->dev);
1506
1443#if __OS_HAS_AGP 1507#if __OS_HAS_AGP
1444 if (drm->agp.stat == ENABLED) { 1508 if (drm->agp.stat == ENABLED) {
1445 ttm_agp_tt_unpopulate(ttm); 1509 ttm_agp_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 22d2c764d80b..0f8bbd48a0b9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -13,6 +13,7 @@ struct nouveau_bo {
13 u32 valid_domains; 13 u32 valid_domains;
14 struct ttm_place placements[3]; 14 struct ttm_place placements[3];
15 struct ttm_place busy_placements[3]; 15 struct ttm_place busy_placements[3];
16 bool force_coherent;
16 struct ttm_bo_kmap_obj kmap; 17 struct ttm_bo_kmap_obj kmap;
17 struct list_head head; 18 struct list_head head;
18 19