aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/rv770.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2012-05-31 19:00:25 -0400
committerDave Airlie <airlied@redhat.com>2012-06-01 12:00:14 -0400
commit416a2bd274566a6f607a271f524b2dc0b84d9106 (patch)
tree502720262c07cdb14bc14155bc8295cc20a7d411 /drivers/gpu/drm/radeon/rv770.c
parent95c4b23ec4e2fa5604df229ddf134e31d7b3b378 (diff)
drm/radeon: fixup tiling group size and backendmap on r6xx-r9xx (v4)
Tiling group size is always 256bits on r6xx/r7xx/r8xx/9xx. Also fix and simplify render backend map. This now properly sets up the backend map on r6xx-9xx which should improve 3D performance. Vadim benchmarked also: Some benchmarks on juniper (5750), fullscreen 1920x1080, first result - kernel 3.4.0+ (fb21affa), second - with these patches: Lightsmark: 91 fps => 123 fps +35% Doom3: 74 fps => 101 fps +36% Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/rv770.c')
-rw-r--r--drivers/gpu/drm/radeon/rv770.c264
1 files changed, 48 insertions, 216 deletions
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index c12349dba3a2..04ddc365a908 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -365,180 +365,6 @@ void r700_cp_fini(struct radeon_device *rdev)
365/* 365/*
366 * Core functions 366 * Core functions
367 */ 367 */
368static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
369 u32 num_tile_pipes,
370 u32 num_backends,
371 u32 backend_disable_mask)
372{
373 u32 backend_map = 0;
374 u32 enabled_backends_mask;
375 u32 enabled_backends_count;
376 u32 cur_pipe;
377 u32 swizzle_pipe[R7XX_MAX_PIPES];
378 u32 cur_backend;
379 u32 i;
380 bool force_no_swizzle;
381
382 if (num_tile_pipes > R7XX_MAX_PIPES)
383 num_tile_pipes = R7XX_MAX_PIPES;
384 if (num_tile_pipes < 1)
385 num_tile_pipes = 1;
386 if (num_backends > R7XX_MAX_BACKENDS)
387 num_backends = R7XX_MAX_BACKENDS;
388 if (num_backends < 1)
389 num_backends = 1;
390
391 enabled_backends_mask = 0;
392 enabled_backends_count = 0;
393 for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
394 if (((backend_disable_mask >> i) & 1) == 0) {
395 enabled_backends_mask |= (1 << i);
396 ++enabled_backends_count;
397 }
398 if (enabled_backends_count == num_backends)
399 break;
400 }
401
402 if (enabled_backends_count == 0) {
403 enabled_backends_mask = 1;
404 enabled_backends_count = 1;
405 }
406
407 if (enabled_backends_count != num_backends)
408 num_backends = enabled_backends_count;
409
410 switch (rdev->family) {
411 case CHIP_RV770:
412 case CHIP_RV730:
413 force_no_swizzle = false;
414 break;
415 case CHIP_RV710:
416 case CHIP_RV740:
417 default:
418 force_no_swizzle = true;
419 break;
420 }
421
422 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
423 switch (num_tile_pipes) {
424 case 1:
425 swizzle_pipe[0] = 0;
426 break;
427 case 2:
428 swizzle_pipe[0] = 0;
429 swizzle_pipe[1] = 1;
430 break;
431 case 3:
432 if (force_no_swizzle) {
433 swizzle_pipe[0] = 0;
434 swizzle_pipe[1] = 1;
435 swizzle_pipe[2] = 2;
436 } else {
437 swizzle_pipe[0] = 0;
438 swizzle_pipe[1] = 2;
439 swizzle_pipe[2] = 1;
440 }
441 break;
442 case 4:
443 if (force_no_swizzle) {
444 swizzle_pipe[0] = 0;
445 swizzle_pipe[1] = 1;
446 swizzle_pipe[2] = 2;
447 swizzle_pipe[3] = 3;
448 } else {
449 swizzle_pipe[0] = 0;
450 swizzle_pipe[1] = 2;
451 swizzle_pipe[2] = 3;
452 swizzle_pipe[3] = 1;
453 }
454 break;
455 case 5:
456 if (force_no_swizzle) {
457 swizzle_pipe[0] = 0;
458 swizzle_pipe[1] = 1;
459 swizzle_pipe[2] = 2;
460 swizzle_pipe[3] = 3;
461 swizzle_pipe[4] = 4;
462 } else {
463 swizzle_pipe[0] = 0;
464 swizzle_pipe[1] = 2;
465 swizzle_pipe[2] = 4;
466 swizzle_pipe[3] = 1;
467 swizzle_pipe[4] = 3;
468 }
469 break;
470 case 6:
471 if (force_no_swizzle) {
472 swizzle_pipe[0] = 0;
473 swizzle_pipe[1] = 1;
474 swizzle_pipe[2] = 2;
475 swizzle_pipe[3] = 3;
476 swizzle_pipe[4] = 4;
477 swizzle_pipe[5] = 5;
478 } else {
479 swizzle_pipe[0] = 0;
480 swizzle_pipe[1] = 2;
481 swizzle_pipe[2] = 4;
482 swizzle_pipe[3] = 5;
483 swizzle_pipe[4] = 3;
484 swizzle_pipe[5] = 1;
485 }
486 break;
487 case 7:
488 if (force_no_swizzle) {
489 swizzle_pipe[0] = 0;
490 swizzle_pipe[1] = 1;
491 swizzle_pipe[2] = 2;
492 swizzle_pipe[3] = 3;
493 swizzle_pipe[4] = 4;
494 swizzle_pipe[5] = 5;
495 swizzle_pipe[6] = 6;
496 } else {
497 swizzle_pipe[0] = 0;
498 swizzle_pipe[1] = 2;
499 swizzle_pipe[2] = 4;
500 swizzle_pipe[3] = 6;
501 swizzle_pipe[4] = 3;
502 swizzle_pipe[5] = 1;
503 swizzle_pipe[6] = 5;
504 }
505 break;
506 case 8:
507 if (force_no_swizzle) {
508 swizzle_pipe[0] = 0;
509 swizzle_pipe[1] = 1;
510 swizzle_pipe[2] = 2;
511 swizzle_pipe[3] = 3;
512 swizzle_pipe[4] = 4;
513 swizzle_pipe[5] = 5;
514 swizzle_pipe[6] = 6;
515 swizzle_pipe[7] = 7;
516 } else {
517 swizzle_pipe[0] = 0;
518 swizzle_pipe[1] = 2;
519 swizzle_pipe[2] = 4;
520 swizzle_pipe[3] = 6;
521 swizzle_pipe[4] = 3;
522 swizzle_pipe[5] = 1;
523 swizzle_pipe[6] = 7;
524 swizzle_pipe[7] = 5;
525 }
526 break;
527 }
528
529 cur_backend = 0;
530 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
531 while (((1 << cur_backend) & enabled_backends_mask) == 0)
532 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
533
534 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
535
536 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
537 }
538
539 return backend_map;
540}
541
542static void rv770_gpu_init(struct radeon_device *rdev) 368static void rv770_gpu_init(struct radeon_device *rdev)
543{ 369{
544 int i, j, num_qd_pipes; 370 int i, j, num_qd_pipes;
@@ -554,14 +380,17 @@ static void rv770_gpu_init(struct radeon_device *rdev)
554 u32 sq_thread_resource_mgmt; 380 u32 sq_thread_resource_mgmt;
555 u32 hdp_host_path_cntl; 381 u32 hdp_host_path_cntl;
556 u32 sq_dyn_gpr_size_simd_ab_0; 382 u32 sq_dyn_gpr_size_simd_ab_0;
557 u32 backend_map;
558 u32 gb_tiling_config = 0; 383 u32 gb_tiling_config = 0;
559 u32 cc_rb_backend_disable = 0; 384 u32 cc_rb_backend_disable = 0;
560 u32 cc_gc_shader_pipe_config = 0; 385 u32 cc_gc_shader_pipe_config = 0;
561 u32 mc_arb_ramcfg; 386 u32 mc_arb_ramcfg;
562 u32 db_debug4; 387 u32 db_debug4, tmp;
388 u32 inactive_pipes, shader_pipe_config;
389 u32 disabled_rb_mask;
390 unsigned active_number;
563 391
564 /* setup chip specs */ 392 /* setup chip specs */
393 rdev->config.rv770.tiling_group_size = 256;
565 switch (rdev->family) { 394 switch (rdev->family) {
566 case CHIP_RV770: 395 case CHIP_RV770:
567 rdev->config.rv770.max_pipes = 4; 396 rdev->config.rv770.max_pipes = 4;
@@ -672,23 +501,60 @@ static void rv770_gpu_init(struct radeon_device *rdev)
672 /* setup tiling, simd, pipe config */ 501 /* setup tiling, simd, pipe config */
673 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 502 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
674 503
504 shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
505 inactive_pipes = (shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT;
506 for (i = 0, tmp = 1, active_number = 0; i < R7XX_MAX_PIPES; i++) {
507 if (!(inactive_pipes & tmp)) {
508 active_number++;
509 }
510 tmp <<= 1;
511 }
512 if (active_number == 1) {
513 WREG32(SPI_CONFIG_CNTL, DISABLE_INTERP_1);
514 } else {
515 WREG32(SPI_CONFIG_CNTL, 0);
516 }
517
518 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
519 tmp = R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_rb_backend_disable >> 16);
520 if (tmp < rdev->config.rv770.max_backends) {
521 rdev->config.rv770.max_backends = tmp;
522 }
523
524 cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
525 tmp = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R7XX_MAX_PIPES_MASK);
526 if (tmp < rdev->config.rv770.max_pipes) {
527 rdev->config.rv770.max_pipes = tmp;
528 }
529 tmp = R7XX_MAX_SIMDS - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
530 if (tmp < rdev->config.rv770.max_simds) {
531 rdev->config.rv770.max_simds = tmp;
532 }
533
675 switch (rdev->config.rv770.max_tile_pipes) { 534 switch (rdev->config.rv770.max_tile_pipes) {
676 case 1: 535 case 1:
677 default: 536 default:
678 gb_tiling_config |= PIPE_TILING(0); 537 gb_tiling_config = PIPE_TILING(0);
679 break; 538 break;
680 case 2: 539 case 2:
681 gb_tiling_config |= PIPE_TILING(1); 540 gb_tiling_config = PIPE_TILING(1);
682 break; 541 break;
683 case 4: 542 case 4:
684 gb_tiling_config |= PIPE_TILING(2); 543 gb_tiling_config = PIPE_TILING(2);
685 break; 544 break;
686 case 8: 545 case 8:
687 gb_tiling_config |= PIPE_TILING(3); 546 gb_tiling_config = PIPE_TILING(3);
688 break; 547 break;
689 } 548 }
690 rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes; 549 rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes;
691 550
551 disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R7XX_MAX_BACKENDS_MASK;
552 tmp = (gb_tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
553 tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.rv770.max_backends,
554 R7XX_MAX_BACKENDS, disabled_rb_mask);
555 gb_tiling_config |= tmp << 16;
556 rdev->config.rv770.backend_map = tmp;
557
692 if (rdev->family == CHIP_RV770) 558 if (rdev->family == CHIP_RV770)
693 gb_tiling_config |= BANK_TILING(1); 559 gb_tiling_config |= BANK_TILING(1);
694 else { 560 else {
@@ -699,10 +565,6 @@ static void rv770_gpu_init(struct radeon_device *rdev)
699 } 565 }
700 rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3); 566 rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3);
701 gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); 567 gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT);
702 if ((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT)
703 rdev->config.rv770.tiling_group_size = 512;
704 else
705 rdev->config.rv770.tiling_group_size = 256;
706 if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) { 568 if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) {
707 gb_tiling_config |= ROW_TILING(3); 569 gb_tiling_config |= ROW_TILING(3);
708 gb_tiling_config |= SAMPLE_SPLIT(3); 570 gb_tiling_config |= SAMPLE_SPLIT(3);
@@ -714,47 +576,19 @@ static void rv770_gpu_init(struct radeon_device *rdev)
714 } 576 }
715 577
716 gb_tiling_config |= BANK_SWAPS(1); 578 gb_tiling_config |= BANK_SWAPS(1);
717
718 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
719 cc_rb_backend_disable |=
720 BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK);
721
722 cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
723 cc_gc_shader_pipe_config |=
724 INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK);
725 cc_gc_shader_pipe_config |=
726 INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK);
727
728 if (rdev->family == CHIP_RV740)
729 backend_map = 0x28;
730 else
731 backend_map = r700_get_tile_pipe_to_backend_map(rdev,
732 rdev->config.rv770.max_tile_pipes,
733 (R7XX_MAX_BACKENDS -
734 r600_count_pipe_bits((cc_rb_backend_disable &
735 R7XX_MAX_BACKENDS_MASK) >> 16)),
736 (cc_rb_backend_disable >> 16));
737
738 rdev->config.rv770.tile_config = gb_tiling_config; 579 rdev->config.rv770.tile_config = gb_tiling_config;
739 rdev->config.rv770.backend_map = backend_map;
740 gb_tiling_config |= BACKEND_MAP(backend_map);
741 580
742 WREG32(GB_TILING_CONFIG, gb_tiling_config); 581 WREG32(GB_TILING_CONFIG, gb_tiling_config);
743 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 582 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
744 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 583 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
745 584
746 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
747 WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
748 WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
749 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
750
751 WREG32(CGTS_SYS_TCC_DISABLE, 0); 585 WREG32(CGTS_SYS_TCC_DISABLE, 0);
752 WREG32(CGTS_TCC_DISABLE, 0); 586 WREG32(CGTS_TCC_DISABLE, 0);
753 WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); 587 WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
754 WREG32(CGTS_USER_TCC_DISABLE, 0); 588 WREG32(CGTS_USER_TCC_DISABLE, 0);
755 589
756 num_qd_pipes = 590
757 R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); 591 num_qd_pipes = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
758 WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); 592 WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
759 WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); 593 WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
760 594
@@ -815,8 +649,6 @@ static void rv770_gpu_init(struct radeon_device *rdev)
815 649
816 WREG32(VGT_NUM_INSTANCES, 1); 650 WREG32(VGT_NUM_INSTANCES, 1);
817 651
818 WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
819
820 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 652 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
821 653
822 WREG32(CP_PERFMON_CNTL, 0); 654 WREG32(CP_PERFMON_CNTL, 0);