diff options
author | Christian König <deathsimple@vodafone.de> | 2012-05-10 09:57:31 -0400 |
---|---|---|
committer | Christian König <deathsimple@vodafone.de> | 2012-06-21 03:38:38 -0400 |
commit | 68e250b7c281dbb75ea2a892a7d4ca27f974fc91 (patch) | |
tree | f1b20eabbd8441f085f922d3d0509c3f92a1ece4 /drivers/gpu/drm | |
parent | 876dc9f32907e57e0298bcd0f1607cb7a2582f63 (diff) |
drm/radeon: add infrastructure for advanced ring synchronization v2
v2: BUG_ON not matching rings.
Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_fence.c | 73 |
2 files changed, 87 insertions, 11 deletions
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e2feddd91df..33a72dc9af8 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -257,8 +257,8 @@ struct radeon_fence_driver { | |||
257 | uint32_t scratch_reg; | 257 | uint32_t scratch_reg; |
258 | uint64_t gpu_addr; | 258 | uint64_t gpu_addr; |
259 | volatile uint32_t *cpu_addr; | 259 | volatile uint32_t *cpu_addr; |
260 | /* seq is protected by ring emission lock */ | 260 | /* sync_seq is protected by ring emission lock */ |
261 | uint64_t seq; | 261 | uint64_t sync_seq[RADEON_NUM_RINGS]; |
262 | atomic64_t last_seq; | 262 | atomic64_t last_seq; |
263 | unsigned long last_activity; | 263 | unsigned long last_activity; |
264 | bool initialized; | 264 | bool initialized; |
@@ -288,6 +288,27 @@ int radeon_fence_wait_any(struct radeon_device *rdev, | |||
288 | struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); | 288 | struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); |
289 | void radeon_fence_unref(struct radeon_fence **fence); | 289 | void radeon_fence_unref(struct radeon_fence **fence); |
290 | unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); | 290 | unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); |
291 | bool radeon_fence_need_sync(struct radeon_fence *fence, int ring); | ||
292 | void radeon_fence_note_sync(struct radeon_fence *fence, int ring); | ||
293 | static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a, | ||
294 | struct radeon_fence *b) | ||
295 | { | ||
296 | if (!a) { | ||
297 | return b; | ||
298 | } | ||
299 | |||
300 | if (!b) { | ||
301 | return a; | ||
302 | } | ||
303 | |||
304 | BUG_ON(a->ring != b->ring); | ||
305 | |||
306 | if (a->seq > b->seq) { | ||
307 | return a; | ||
308 | } else { | ||
309 | return b; | ||
310 | } | ||
311 | } | ||
291 | 312 | ||
292 | /* | 313 | /* |
293 | * Tiling registers | 314 | * Tiling registers |
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 401d346a05c..7b55625a5e1 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c | |||
@@ -72,7 +72,7 @@ int radeon_fence_emit(struct radeon_device *rdev, | |||
72 | } | 72 | } |
73 | kref_init(&((*fence)->kref)); | 73 | kref_init(&((*fence)->kref)); |
74 | (*fence)->rdev = rdev; | 74 | (*fence)->rdev = rdev; |
75 | (*fence)->seq = ++rdev->fence_drv[ring].seq; | 75 | (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; |
76 | (*fence)->ring = ring; | 76 | (*fence)->ring = ring; |
77 | radeon_fence_ring_emit(rdev, ring, *fence); | 77 | radeon_fence_ring_emit(rdev, ring, *fence); |
78 | trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); | 78 | trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); |
@@ -449,7 +449,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) | |||
449 | * wait. | 449 | * wait. |
450 | */ | 450 | */ |
451 | seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; | 451 | seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; |
452 | if (seq >= rdev->fence_drv[ring].seq) { | 452 | if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { |
453 | /* nothing to wait for, last_seq is | 453 | /* nothing to wait for, last_seq is |
454 | already the last emited fence */ | 454 | already the last emited fence */ |
455 | return -ENOENT; | 455 | return -ENOENT; |
@@ -464,7 +464,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) | |||
464 | * activity can be scheduled so there won't be concurrent access | 464 | * activity can be scheduled so there won't be concurrent access |
465 | * to seq value. | 465 | * to seq value. |
466 | */ | 466 | */ |
467 | return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, | 467 | return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].sync_seq[ring], |
468 | ring, false, false); | 468 | ring, false, false); |
469 | } | 469 | } |
470 | 470 | ||
@@ -492,7 +492,8 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) | |||
492 | * but it's ok to report slightly wrong fence count here. | 492 | * but it's ok to report slightly wrong fence count here. |
493 | */ | 493 | */ |
494 | radeon_fence_process(rdev, ring); | 494 | radeon_fence_process(rdev, ring); |
495 | emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq); | 495 | emitted = rdev->fence_drv[ring].sync_seq[ring] |
496 | - atomic64_read(&rdev->fence_drv[ring].last_seq); | ||
496 | /* to avoid 32bits warp around */ | 497 | /* to avoid 32bits warp around */ |
497 | if (emitted > 0x10000000) { | 498 | if (emitted > 0x10000000) { |
498 | emitted = 0x10000000; | 499 | emitted = 0x10000000; |
@@ -500,6 +501,51 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) | |||
500 | return (unsigned)emitted; | 501 | return (unsigned)emitted; |
501 | } | 502 | } |
502 | 503 | ||
504 | bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) | ||
505 | { | ||
506 | struct radeon_fence_driver *fdrv; | ||
507 | |||
508 | if (!fence) { | ||
509 | return false; | ||
510 | } | ||
511 | |||
512 | if (fence->ring == dst_ring) { | ||
513 | return false; | ||
514 | } | ||
515 | |||
516 | /* we are protected by the ring mutex */ | ||
517 | fdrv = &fence->rdev->fence_drv[dst_ring]; | ||
518 | if (fence->seq <= fdrv->sync_seq[fence->ring]) { | ||
519 | return false; | ||
520 | } | ||
521 | |||
522 | return true; | ||
523 | } | ||
524 | |||
525 | void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) | ||
526 | { | ||
527 | struct radeon_fence_driver *dst, *src; | ||
528 | unsigned i; | ||
529 | |||
530 | if (!fence) { | ||
531 | return; | ||
532 | } | ||
533 | |||
534 | if (fence->ring == dst_ring) { | ||
535 | return; | ||
536 | } | ||
537 | |||
538 | /* we are protected by the ring mutex */ | ||
539 | src = &fence->rdev->fence_drv[fence->ring]; | ||
540 | dst = &fence->rdev->fence_drv[dst_ring]; | ||
541 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { | ||
542 | if (i == dst_ring) { | ||
543 | continue; | ||
544 | } | ||
545 | dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); | ||
546 | } | ||
547 | } | ||
548 | |||
503 | int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) | 549 | int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) |
504 | { | 550 | { |
505 | uint64_t index; | 551 | uint64_t index; |
@@ -521,7 +567,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) | |||
521 | } | 567 | } |
522 | rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; | 568 | rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; |
523 | rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; | 569 | rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; |
524 | radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); | 570 | radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); |
525 | rdev->fence_drv[ring].initialized = true; | 571 | rdev->fence_drv[ring].initialized = true; |
526 | dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", | 572 | dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", |
527 | ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); | 573 | ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); |
@@ -530,10 +576,13 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) | |||
530 | 576 | ||
531 | static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) | 577 | static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) |
532 | { | 578 | { |
579 | int i; | ||
580 | |||
533 | rdev->fence_drv[ring].scratch_reg = -1; | 581 | rdev->fence_drv[ring].scratch_reg = -1; |
534 | rdev->fence_drv[ring].cpu_addr = NULL; | 582 | rdev->fence_drv[ring].cpu_addr = NULL; |
535 | rdev->fence_drv[ring].gpu_addr = 0; | 583 | rdev->fence_drv[ring].gpu_addr = 0; |
536 | rdev->fence_drv[ring].seq = 0; | 584 | for (i = 0; i < RADEON_NUM_RINGS; ++i) |
585 | rdev->fence_drv[ring].sync_seq[i] = 0; | ||
537 | atomic64_set(&rdev->fence_drv[ring].last_seq, 0); | 586 | atomic64_set(&rdev->fence_drv[ring].last_seq, 0); |
538 | rdev->fence_drv[ring].last_activity = jiffies; | 587 | rdev->fence_drv[ring].last_activity = jiffies; |
539 | rdev->fence_drv[ring].initialized = false; | 588 | rdev->fence_drv[ring].initialized = false; |
@@ -579,7 +628,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) | |||
579 | struct drm_info_node *node = (struct drm_info_node *)m->private; | 628 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
580 | struct drm_device *dev = node->minor->dev; | 629 | struct drm_device *dev = node->minor->dev; |
581 | struct radeon_device *rdev = dev->dev_private; | 630 | struct radeon_device *rdev = dev->dev_private; |
582 | int i; | 631 | int i, j; |
583 | 632 | ||
584 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { | 633 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
585 | if (!rdev->fence_drv[i].initialized) | 634 | if (!rdev->fence_drv[i].initialized) |
@@ -588,8 +637,14 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) | |||
588 | seq_printf(m, "--- ring %d ---\n", i); | 637 | seq_printf(m, "--- ring %d ---\n", i); |
589 | seq_printf(m, "Last signaled fence 0x%016llx\n", | 638 | seq_printf(m, "Last signaled fence 0x%016llx\n", |
590 | (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); | 639 | (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); |
591 | seq_printf(m, "Last emitted 0x%016llx\n", | 640 | seq_printf(m, "Last emitted 0x%016llx\n", |
592 | rdev->fence_drv[i].seq); | 641 | rdev->fence_drv[i].sync_seq[i]); |
642 | |||
643 | for (j = 0; j < RADEON_NUM_RINGS; ++j) { | ||
644 | if (i != j && rdev->fence_drv[j].initialized) | ||
645 | seq_printf(m, "Last sync to ring %d 0x%016llx\n", | ||
646 | j, rdev->fence_drv[i].sync_seq[j]); | ||
647 | } | ||
593 | } | 648 | } |
594 | return 0; | 649 | return 0; |
595 | } | 650 | } |