diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2012-05-31 19:00:25 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2012-06-01 12:00:14 -0400 |
commit | 416a2bd274566a6f607a271f524b2dc0b84d9106 (patch) | |
tree | 502720262c07cdb14bc14155bc8295cc20a7d411 /drivers/gpu/drm/radeon/rv770.c | |
parent | 95c4b23ec4e2fa5604df229ddf134e31d7b3b378 (diff) |
drm/radeon: fixup tiling group size and backendmap on r6xx-r9xx (v4)
Tiling group size is always 256bits on r6xx/r7xx/r8xx/9xx. Also fix and
simplify render backend map. This now properly sets up the backend map
on r6xx-9xx which should improve 3D performance.
Vadim benchmarked also:
Some benchmarks on juniper (5750), fullscreen 1920x1080,
first result - kernel 3.4.0+ (fb21affa), second - with these patches:
Lightsmark: 91 fps => 123 fps +35%
Doom3: 74 fps => 101 fps +36%
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/rv770.c')
-rw-r--r-- | drivers/gpu/drm/radeon/rv770.c | 264 |
1 files changed, 48 insertions, 216 deletions
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index c12349dba3a2..04ddc365a908 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c | |||
@@ -365,180 +365,6 @@ void r700_cp_fini(struct radeon_device *rdev) | |||
365 | /* | 365 | /* |
366 | * Core functions | 366 | * Core functions |
367 | */ | 367 | */ |
368 | static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev, | ||
369 | u32 num_tile_pipes, | ||
370 | u32 num_backends, | ||
371 | u32 backend_disable_mask) | ||
372 | { | ||
373 | u32 backend_map = 0; | ||
374 | u32 enabled_backends_mask; | ||
375 | u32 enabled_backends_count; | ||
376 | u32 cur_pipe; | ||
377 | u32 swizzle_pipe[R7XX_MAX_PIPES]; | ||
378 | u32 cur_backend; | ||
379 | u32 i; | ||
380 | bool force_no_swizzle; | ||
381 | |||
382 | if (num_tile_pipes > R7XX_MAX_PIPES) | ||
383 | num_tile_pipes = R7XX_MAX_PIPES; | ||
384 | if (num_tile_pipes < 1) | ||
385 | num_tile_pipes = 1; | ||
386 | if (num_backends > R7XX_MAX_BACKENDS) | ||
387 | num_backends = R7XX_MAX_BACKENDS; | ||
388 | if (num_backends < 1) | ||
389 | num_backends = 1; | ||
390 | |||
391 | enabled_backends_mask = 0; | ||
392 | enabled_backends_count = 0; | ||
393 | for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { | ||
394 | if (((backend_disable_mask >> i) & 1) == 0) { | ||
395 | enabled_backends_mask |= (1 << i); | ||
396 | ++enabled_backends_count; | ||
397 | } | ||
398 | if (enabled_backends_count == num_backends) | ||
399 | break; | ||
400 | } | ||
401 | |||
402 | if (enabled_backends_count == 0) { | ||
403 | enabled_backends_mask = 1; | ||
404 | enabled_backends_count = 1; | ||
405 | } | ||
406 | |||
407 | if (enabled_backends_count != num_backends) | ||
408 | num_backends = enabled_backends_count; | ||
409 | |||
410 | switch (rdev->family) { | ||
411 | case CHIP_RV770: | ||
412 | case CHIP_RV730: | ||
413 | force_no_swizzle = false; | ||
414 | break; | ||
415 | case CHIP_RV710: | ||
416 | case CHIP_RV740: | ||
417 | default: | ||
418 | force_no_swizzle = true; | ||
419 | break; | ||
420 | } | ||
421 | |||
422 | memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); | ||
423 | switch (num_tile_pipes) { | ||
424 | case 1: | ||
425 | swizzle_pipe[0] = 0; | ||
426 | break; | ||
427 | case 2: | ||
428 | swizzle_pipe[0] = 0; | ||
429 | swizzle_pipe[1] = 1; | ||
430 | break; | ||
431 | case 3: | ||
432 | if (force_no_swizzle) { | ||
433 | swizzle_pipe[0] = 0; | ||
434 | swizzle_pipe[1] = 1; | ||
435 | swizzle_pipe[2] = 2; | ||
436 | } else { | ||
437 | swizzle_pipe[0] = 0; | ||
438 | swizzle_pipe[1] = 2; | ||
439 | swizzle_pipe[2] = 1; | ||
440 | } | ||
441 | break; | ||
442 | case 4: | ||
443 | if (force_no_swizzle) { | ||
444 | swizzle_pipe[0] = 0; | ||
445 | swizzle_pipe[1] = 1; | ||
446 | swizzle_pipe[2] = 2; | ||
447 | swizzle_pipe[3] = 3; | ||
448 | } else { | ||
449 | swizzle_pipe[0] = 0; | ||
450 | swizzle_pipe[1] = 2; | ||
451 | swizzle_pipe[2] = 3; | ||
452 | swizzle_pipe[3] = 1; | ||
453 | } | ||
454 | break; | ||
455 | case 5: | ||
456 | if (force_no_swizzle) { | ||
457 | swizzle_pipe[0] = 0; | ||
458 | swizzle_pipe[1] = 1; | ||
459 | swizzle_pipe[2] = 2; | ||
460 | swizzle_pipe[3] = 3; | ||
461 | swizzle_pipe[4] = 4; | ||
462 | } else { | ||
463 | swizzle_pipe[0] = 0; | ||
464 | swizzle_pipe[1] = 2; | ||
465 | swizzle_pipe[2] = 4; | ||
466 | swizzle_pipe[3] = 1; | ||
467 | swizzle_pipe[4] = 3; | ||
468 | } | ||
469 | break; | ||
470 | case 6: | ||
471 | if (force_no_swizzle) { | ||
472 | swizzle_pipe[0] = 0; | ||
473 | swizzle_pipe[1] = 1; | ||
474 | swizzle_pipe[2] = 2; | ||
475 | swizzle_pipe[3] = 3; | ||
476 | swizzle_pipe[4] = 4; | ||
477 | swizzle_pipe[5] = 5; | ||
478 | } else { | ||
479 | swizzle_pipe[0] = 0; | ||
480 | swizzle_pipe[1] = 2; | ||
481 | swizzle_pipe[2] = 4; | ||
482 | swizzle_pipe[3] = 5; | ||
483 | swizzle_pipe[4] = 3; | ||
484 | swizzle_pipe[5] = 1; | ||
485 | } | ||
486 | break; | ||
487 | case 7: | ||
488 | if (force_no_swizzle) { | ||
489 | swizzle_pipe[0] = 0; | ||
490 | swizzle_pipe[1] = 1; | ||
491 | swizzle_pipe[2] = 2; | ||
492 | swizzle_pipe[3] = 3; | ||
493 | swizzle_pipe[4] = 4; | ||
494 | swizzle_pipe[5] = 5; | ||
495 | swizzle_pipe[6] = 6; | ||
496 | } else { | ||
497 | swizzle_pipe[0] = 0; | ||
498 | swizzle_pipe[1] = 2; | ||
499 | swizzle_pipe[2] = 4; | ||
500 | swizzle_pipe[3] = 6; | ||
501 | swizzle_pipe[4] = 3; | ||
502 | swizzle_pipe[5] = 1; | ||
503 | swizzle_pipe[6] = 5; | ||
504 | } | ||
505 | break; | ||
506 | case 8: | ||
507 | if (force_no_swizzle) { | ||
508 | swizzle_pipe[0] = 0; | ||
509 | swizzle_pipe[1] = 1; | ||
510 | swizzle_pipe[2] = 2; | ||
511 | swizzle_pipe[3] = 3; | ||
512 | swizzle_pipe[4] = 4; | ||
513 | swizzle_pipe[5] = 5; | ||
514 | swizzle_pipe[6] = 6; | ||
515 | swizzle_pipe[7] = 7; | ||
516 | } else { | ||
517 | swizzle_pipe[0] = 0; | ||
518 | swizzle_pipe[1] = 2; | ||
519 | swizzle_pipe[2] = 4; | ||
520 | swizzle_pipe[3] = 6; | ||
521 | swizzle_pipe[4] = 3; | ||
522 | swizzle_pipe[5] = 1; | ||
523 | swizzle_pipe[6] = 7; | ||
524 | swizzle_pipe[7] = 5; | ||
525 | } | ||
526 | break; | ||
527 | } | ||
528 | |||
529 | cur_backend = 0; | ||
530 | for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { | ||
531 | while (((1 << cur_backend) & enabled_backends_mask) == 0) | ||
532 | cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; | ||
533 | |||
534 | backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); | ||
535 | |||
536 | cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; | ||
537 | } | ||
538 | |||
539 | return backend_map; | ||
540 | } | ||
541 | |||
542 | static void rv770_gpu_init(struct radeon_device *rdev) | 368 | static void rv770_gpu_init(struct radeon_device *rdev) |
543 | { | 369 | { |
544 | int i, j, num_qd_pipes; | 370 | int i, j, num_qd_pipes; |
@@ -554,14 +380,17 @@ static void rv770_gpu_init(struct radeon_device *rdev) | |||
554 | u32 sq_thread_resource_mgmt; | 380 | u32 sq_thread_resource_mgmt; |
555 | u32 hdp_host_path_cntl; | 381 | u32 hdp_host_path_cntl; |
556 | u32 sq_dyn_gpr_size_simd_ab_0; | 382 | u32 sq_dyn_gpr_size_simd_ab_0; |
557 | u32 backend_map; | ||
558 | u32 gb_tiling_config = 0; | 383 | u32 gb_tiling_config = 0; |
559 | u32 cc_rb_backend_disable = 0; | 384 | u32 cc_rb_backend_disable = 0; |
560 | u32 cc_gc_shader_pipe_config = 0; | 385 | u32 cc_gc_shader_pipe_config = 0; |
561 | u32 mc_arb_ramcfg; | 386 | u32 mc_arb_ramcfg; |
562 | u32 db_debug4; | 387 | u32 db_debug4, tmp; |
388 | u32 inactive_pipes, shader_pipe_config; | ||
389 | u32 disabled_rb_mask; | ||
390 | unsigned active_number; | ||
563 | 391 | ||
564 | /* setup chip specs */ | 392 | /* setup chip specs */ |
393 | rdev->config.rv770.tiling_group_size = 256; | ||
565 | switch (rdev->family) { | 394 | switch (rdev->family) { |
566 | case CHIP_RV770: | 395 | case CHIP_RV770: |
567 | rdev->config.rv770.max_pipes = 4; | 396 | rdev->config.rv770.max_pipes = 4; |
@@ -672,23 +501,60 @@ static void rv770_gpu_init(struct radeon_device *rdev) | |||
672 | /* setup tiling, simd, pipe config */ | 501 | /* setup tiling, simd, pipe config */ |
673 | mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); | 502 | mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); |
674 | 503 | ||
504 | shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG); | ||
505 | inactive_pipes = (shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT; | ||
506 | for (i = 0, tmp = 1, active_number = 0; i < R7XX_MAX_PIPES; i++) { | ||
507 | if (!(inactive_pipes & tmp)) { | ||
508 | active_number++; | ||
509 | } | ||
510 | tmp <<= 1; | ||
511 | } | ||
512 | if (active_number == 1) { | ||
513 | WREG32(SPI_CONFIG_CNTL, DISABLE_INTERP_1); | ||
514 | } else { | ||
515 | WREG32(SPI_CONFIG_CNTL, 0); | ||
516 | } | ||
517 | |||
518 | cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000; | ||
519 | tmp = R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_rb_backend_disable >> 16); | ||
520 | if (tmp < rdev->config.rv770.max_backends) { | ||
521 | rdev->config.rv770.max_backends = tmp; | ||
522 | } | ||
523 | |||
524 | cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; | ||
525 | tmp = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R7XX_MAX_PIPES_MASK); | ||
526 | if (tmp < rdev->config.rv770.max_pipes) { | ||
527 | rdev->config.rv770.max_pipes = tmp; | ||
528 | } | ||
529 | tmp = R7XX_MAX_SIMDS - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK); | ||
530 | if (tmp < rdev->config.rv770.max_simds) { | ||
531 | rdev->config.rv770.max_simds = tmp; | ||
532 | } | ||
533 | |||
675 | switch (rdev->config.rv770.max_tile_pipes) { | 534 | switch (rdev->config.rv770.max_tile_pipes) { |
676 | case 1: | 535 | case 1: |
677 | default: | 536 | default: |
678 | gb_tiling_config |= PIPE_TILING(0); | 537 | gb_tiling_config = PIPE_TILING(0); |
679 | break; | 538 | break; |
680 | case 2: | 539 | case 2: |
681 | gb_tiling_config |= PIPE_TILING(1); | 540 | gb_tiling_config = PIPE_TILING(1); |
682 | break; | 541 | break; |
683 | case 4: | 542 | case 4: |
684 | gb_tiling_config |= PIPE_TILING(2); | 543 | gb_tiling_config = PIPE_TILING(2); |
685 | break; | 544 | break; |
686 | case 8: | 545 | case 8: |
687 | gb_tiling_config |= PIPE_TILING(3); | 546 | gb_tiling_config = PIPE_TILING(3); |
688 | break; | 547 | break; |
689 | } | 548 | } |
690 | rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes; | 549 | rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes; |
691 | 550 | ||
551 | disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R7XX_MAX_BACKENDS_MASK; | ||
552 | tmp = (gb_tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT; | ||
553 | tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.rv770.max_backends, | ||
554 | R7XX_MAX_BACKENDS, disabled_rb_mask); | ||
555 | gb_tiling_config |= tmp << 16; | ||
556 | rdev->config.rv770.backend_map = tmp; | ||
557 | |||
692 | if (rdev->family == CHIP_RV770) | 558 | if (rdev->family == CHIP_RV770) |
693 | gb_tiling_config |= BANK_TILING(1); | 559 | gb_tiling_config |= BANK_TILING(1); |
694 | else { | 560 | else { |
@@ -699,10 +565,6 @@ static void rv770_gpu_init(struct radeon_device *rdev) | |||
699 | } | 565 | } |
700 | rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3); | 566 | rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3); |
701 | gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); | 567 | gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); |
702 | if ((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) | ||
703 | rdev->config.rv770.tiling_group_size = 512; | ||
704 | else | ||
705 | rdev->config.rv770.tiling_group_size = 256; | ||
706 | if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) { | 568 | if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) { |
707 | gb_tiling_config |= ROW_TILING(3); | 569 | gb_tiling_config |= ROW_TILING(3); |
708 | gb_tiling_config |= SAMPLE_SPLIT(3); | 570 | gb_tiling_config |= SAMPLE_SPLIT(3); |
@@ -714,47 +576,19 @@ static void rv770_gpu_init(struct radeon_device *rdev) | |||
714 | } | 576 | } |
715 | 577 | ||
716 | gb_tiling_config |= BANK_SWAPS(1); | 578 | gb_tiling_config |= BANK_SWAPS(1); |
717 | |||
718 | cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000; | ||
719 | cc_rb_backend_disable |= | ||
720 | BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK); | ||
721 | |||
722 | cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; | ||
723 | cc_gc_shader_pipe_config |= | ||
724 | INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK); | ||
725 | cc_gc_shader_pipe_config |= | ||
726 | INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK); | ||
727 | |||
728 | if (rdev->family == CHIP_RV740) | ||
729 | backend_map = 0x28; | ||
730 | else | ||
731 | backend_map = r700_get_tile_pipe_to_backend_map(rdev, | ||
732 | rdev->config.rv770.max_tile_pipes, | ||
733 | (R7XX_MAX_BACKENDS - | ||
734 | r600_count_pipe_bits((cc_rb_backend_disable & | ||
735 | R7XX_MAX_BACKENDS_MASK) >> 16)), | ||
736 | (cc_rb_backend_disable >> 16)); | ||
737 | |||
738 | rdev->config.rv770.tile_config = gb_tiling_config; | 579 | rdev->config.rv770.tile_config = gb_tiling_config; |
739 | rdev->config.rv770.backend_map = backend_map; | ||
740 | gb_tiling_config |= BACKEND_MAP(backend_map); | ||
741 | 580 | ||
742 | WREG32(GB_TILING_CONFIG, gb_tiling_config); | 581 | WREG32(GB_TILING_CONFIG, gb_tiling_config); |
743 | WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); | 582 | WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); |
744 | WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); | 583 | WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); |
745 | 584 | ||
746 | WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); | ||
747 | WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); | ||
748 | WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); | ||
749 | WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); | ||
750 | |||
751 | WREG32(CGTS_SYS_TCC_DISABLE, 0); | 585 | WREG32(CGTS_SYS_TCC_DISABLE, 0); |
752 | WREG32(CGTS_TCC_DISABLE, 0); | 586 | WREG32(CGTS_TCC_DISABLE, 0); |
753 | WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); | 587 | WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); |
754 | WREG32(CGTS_USER_TCC_DISABLE, 0); | 588 | WREG32(CGTS_USER_TCC_DISABLE, 0); |
755 | 589 | ||
756 | num_qd_pipes = | 590 | |
757 | R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); | 591 | num_qd_pipes = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); |
758 | WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); | 592 | WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); |
759 | WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); | 593 | WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); |
760 | 594 | ||
@@ -815,8 +649,6 @@ static void rv770_gpu_init(struct radeon_device *rdev) | |||
815 | 649 | ||
816 | WREG32(VGT_NUM_INSTANCES, 1); | 650 | WREG32(VGT_NUM_INSTANCES, 1); |
817 | 651 | ||
818 | WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); | ||
819 | |||
820 | WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); | 652 | WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); |
821 | 653 | ||
822 | WREG32(CP_PERFMON_CNTL, 0); | 654 | WREG32(CP_PERFMON_CNTL, 0); |