diff options
author | Alexandre Courbot <acourbot@nvidia.com> | 2014-10-27 05:49:17 -0400 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2014-12-02 00:43:59 -0500 |
commit | c3a0c771e582a74a344fa9dd805662222f896181 (patch) | |
tree | 4374fe4b75818b907ab3c07bcdc9ec9e0e6325ed /drivers/gpu/drm/nouveau | |
parent | c5d7ddf70a8445f711f5ab9ab92bd4f59f993262 (diff) |
drm/nouveau: implement explicitly coherent BOs
Allow nouveau_bo_new() to recognize the TTM_PL_FLAG_UNCACHED flag, which
means that we want the allocated BO to be perfectly coherent between the
CPU and GPU. This is useful on non-coherent architectures for which we
do not want to manually sync some rarely-accessed buffers: typically,
fences and pushbuffers.
A TTM BO allocated with the TTM_PL_FLAG_UNCACHED on a non-coherent
architecture will be populated using the DMA API, and accesses to it
performed using the coherent mapping performed by dma_alloc_coherent().
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/nouveau')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_bo.c | 80 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_bo.h | 1 |
2 files changed, 73 insertions, 8 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 9a8adeec80cd..ed9a6946f6d6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c | |||
@@ -214,6 +214,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, | |||
214 | nvbo->tile_flags = tile_flags; | 214 | nvbo->tile_flags = tile_flags; |
215 | nvbo->bo.bdev = &drm->ttm.bdev; | 215 | nvbo->bo.bdev = &drm->ttm.bdev; |
216 | 216 | ||
217 | if (!nv_device_is_cpu_coherent(nvkm_device(&drm->device))) | ||
218 | nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; | ||
219 | |||
217 | nvbo->page_shift = 12; | 220 | nvbo->page_shift = 12; |
218 | if (drm->client.vm) { | 221 | if (drm->client.vm) { |
219 | if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) | 222 | if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) |
@@ -291,8 +294,9 @@ void | |||
291 | nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy) | 294 | nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy) |
292 | { | 295 | { |
293 | struct ttm_placement *pl = &nvbo->placement; | 296 | struct ttm_placement *pl = &nvbo->placement; |
294 | uint32_t flags = TTM_PL_MASK_CACHING | | 297 | uint32_t flags = (nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED : |
295 | (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0); | 298 | TTM_PL_MASK_CACHING) | |
299 | (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0); | ||
296 | 300 | ||
297 | pl->placement = nvbo->placements; | 301 | pl->placement = nvbo->placements; |
298 | set_placement_list(nvbo->placements, &pl->num_placement, | 302 | set_placement_list(nvbo->placements, &pl->num_placement, |
@@ -396,7 +400,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo) | |||
396 | if (ret) | 400 | if (ret) |
397 | return ret; | 401 | return ret; |
398 | 402 | ||
399 | ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap); | 403 | /* |
404 | * TTM buffers allocated using the DMA API already have a mapping, let's | ||
405 | * use it instead. | ||
406 | */ | ||
407 | if (!nvbo->force_coherent) | ||
408 | ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, | ||
409 | &nvbo->kmap); | ||
410 | |||
400 | ttm_bo_unreserve(&nvbo->bo); | 411 | ttm_bo_unreserve(&nvbo->bo); |
401 | return ret; | 412 | return ret; |
402 | } | 413 | } |
@@ -404,7 +415,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo) | |||
404 | void | 415 | void |
405 | nouveau_bo_unmap(struct nouveau_bo *nvbo) | 416 | nouveau_bo_unmap(struct nouveau_bo *nvbo) |
406 | { | 417 | { |
407 | if (nvbo) | 418 | if (!nvbo) |
419 | return; | ||
420 | |||
421 | /* | ||
422 | * TTM buffers allocated using the DMA API already had a coherent | ||
423 | * mapping which we used, no need to unmap. | ||
424 | */ | ||
425 | if (!nvbo->force_coherent) | ||
408 | ttm_bo_kunmap(&nvbo->kmap); | 426 | ttm_bo_kunmap(&nvbo->kmap); |
409 | } | 427 | } |
410 | 428 | ||
@@ -422,12 +440,36 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible, | |||
422 | return 0; | 440 | return 0; |
423 | } | 441 | } |
424 | 442 | ||
443 | static inline void * | ||
444 | _nouveau_bo_mem_index(struct nouveau_bo *nvbo, unsigned index, void *mem, u8 sz) | ||
445 | { | ||
446 | struct ttm_dma_tt *dma_tt; | ||
447 | u8 *m = mem; | ||
448 | |||
449 | index *= sz; | ||
450 | |||
451 | if (m) { | ||
452 | /* kmap'd address, return the corresponding offset */ | ||
453 | m += index; | ||
454 | } else { | ||
455 | /* DMA-API mapping, lookup the right address */ | ||
456 | dma_tt = (struct ttm_dma_tt *)nvbo->bo.ttm; | ||
457 | m = dma_tt->cpu_address[index / PAGE_SIZE]; | ||
458 | m += index % PAGE_SIZE; | ||
459 | } | ||
460 | |||
461 | return m; | ||
462 | } | ||
463 | #define nouveau_bo_mem_index(o, i, m) _nouveau_bo_mem_index(o, i, m, sizeof(*m)) | ||
464 | |||
425 | u16 | 465 | u16 |
426 | nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index) | 466 | nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index) |
427 | { | 467 | { |
428 | bool is_iomem; | 468 | bool is_iomem; |
429 | u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); | 469 | u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); |
430 | mem = &mem[index]; | 470 | |
471 | mem = nouveau_bo_mem_index(nvbo, index, mem); | ||
472 | |||
431 | if (is_iomem) | 473 | if (is_iomem) |
432 | return ioread16_native((void __force __iomem *)mem); | 474 | return ioread16_native((void __force __iomem *)mem); |
433 | else | 475 | else |
@@ -439,7 +481,9 @@ nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val) | |||
439 | { | 481 | { |
440 | bool is_iomem; | 482 | bool is_iomem; |
441 | u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); | 483 | u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); |
442 | mem = &mem[index]; | 484 | |
485 | mem = nouveau_bo_mem_index(nvbo, index, mem); | ||
486 | |||
443 | if (is_iomem) | 487 | if (is_iomem) |
444 | iowrite16_native(val, (void __force __iomem *)mem); | 488 | iowrite16_native(val, (void __force __iomem *)mem); |
445 | else | 489 | else |
@@ -451,7 +495,9 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index) | |||
451 | { | 495 | { |
452 | bool is_iomem; | 496 | bool is_iomem; |
453 | u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); | 497 | u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); |
454 | mem = &mem[index]; | 498 | |
499 | mem = nouveau_bo_mem_index(nvbo, index, mem); | ||
500 | |||
455 | if (is_iomem) | 501 | if (is_iomem) |
456 | return ioread32_native((void __force __iomem *)mem); | 502 | return ioread32_native((void __force __iomem *)mem); |
457 | else | 503 | else |
@@ -463,7 +509,9 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val) | |||
463 | { | 509 | { |
464 | bool is_iomem; | 510 | bool is_iomem; |
465 | u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); | 511 | u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem); |
466 | mem = &mem[index]; | 512 | |
513 | mem = nouveau_bo_mem_index(nvbo, index, mem); | ||
514 | |||
467 | if (is_iomem) | 515 | if (is_iomem) |
468 | iowrite32_native(val, (void __force __iomem *)mem); | 516 | iowrite32_native(val, (void __force __iomem *)mem); |
469 | else | 517 | else |
@@ -1383,6 +1431,14 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) | |||
1383 | dev = drm->dev; | 1431 | dev = drm->dev; |
1384 | pdev = nv_device_base(device); | 1432 | pdev = nv_device_base(device); |
1385 | 1433 | ||
1434 | /* | ||
1435 | * Objects matching this condition have been marked as force_coherent, | ||
1436 | * so use the DMA API for them. | ||
1437 | */ | ||
1438 | if (!nv_device_is_cpu_coherent(device) && | ||
1439 | ttm->caching_state == tt_uncached) | ||
1440 | return ttm_dma_populate(ttm_dma, dev->dev); | ||
1441 | |||
1386 | #if __OS_HAS_AGP | 1442 | #if __OS_HAS_AGP |
1387 | if (drm->agp.stat == ENABLED) { | 1443 | if (drm->agp.stat == ENABLED) { |
1388 | return ttm_agp_tt_populate(ttm); | 1444 | return ttm_agp_tt_populate(ttm); |
@@ -1440,6 +1496,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) | |||
1440 | dev = drm->dev; | 1496 | dev = drm->dev; |
1441 | pdev = nv_device_base(device); | 1497 | pdev = nv_device_base(device); |
1442 | 1498 | ||
1499 | /* | ||
1500 | * Objects matching this condition have been marked as force_coherent, | ||
1501 | * so use the DMA API for them. | ||
1502 | */ | ||
1503 | if (!nv_device_is_cpu_coherent(device) && | ||
1504 | ttm->caching_state == tt_uncached) | ||
1505 | ttm_dma_unpopulate(ttm_dma, dev->dev); | ||
1506 | |||
1443 | #if __OS_HAS_AGP | 1507 | #if __OS_HAS_AGP |
1444 | if (drm->agp.stat == ENABLED) { | 1508 | if (drm->agp.stat == ENABLED) { |
1445 | ttm_agp_tt_unpopulate(ttm); | 1509 | ttm_agp_tt_unpopulate(ttm); |
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 22d2c764d80b..0f8bbd48a0b9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h | |||
@@ -13,6 +13,7 @@ struct nouveau_bo { | |||
13 | u32 valid_domains; | 13 | u32 valid_domains; |
14 | struct ttm_place placements[3]; | 14 | struct ttm_place placements[3]; |
15 | struct ttm_place busy_placements[3]; | 15 | struct ttm_place busy_placements[3]; |
16 | bool force_coherent; | ||
16 | struct ttm_bo_kmap_obj kmap; | 17 | struct ttm_bo_kmap_obj kmap; |
17 | struct list_head head; | 18 | struct list_head head; |
18 | 19 | ||