diff options
-rw-r--r-- | drivers/gpu/drm/nouveau/nv50_pm.c | 185 |
1 files changed, 98 insertions, 87 deletions
diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c index 109e473fd5f8..0f2632638c92 100644 --- a/drivers/gpu/drm/nouveau/nv50_pm.c +++ b/drivers/gpu/drm/nouveau/nv50_pm.c | |||
@@ -354,21 +354,12 @@ nv50_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl) | |||
354 | 354 | ||
355 | struct nv50_pm_state { | 355 | struct nv50_pm_state { |
356 | struct nouveau_pm_level *perflvl; | 356 | struct nouveau_pm_level *perflvl; |
357 | 357 | struct hwsq_ucode eclk_hwsq; | |
358 | struct hwsq_ucode mclk_hwsq; | 358 | struct hwsq_ucode mclk_hwsq; |
359 | u32 mscript; | 359 | u32 mscript; |
360 | u32 mmast; | 360 | u32 mmast; |
361 | u32 mctrl; | 361 | u32 mctrl; |
362 | u32 mcoef; | 362 | u32 mcoef; |
363 | |||
364 | u32 emast; | ||
365 | u32 nctrl; | ||
366 | u32 ncoef; | ||
367 | u32 sctrl; | ||
368 | u32 scoef; | ||
369 | |||
370 | u32 amast; | ||
371 | u32 pdivs; | ||
372 | }; | 363 | }; |
373 | 364 | ||
374 | static u32 | 365 | static u32 |
@@ -598,10 +589,11 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) | |||
598 | { | 589 | { |
599 | struct drm_nouveau_private *dev_priv = dev->dev_private; | 590 | struct drm_nouveau_private *dev_priv = dev->dev_private; |
600 | struct nv50_pm_state *info; | 591 | struct nv50_pm_state *info; |
592 | struct hwsq_ucode *hwsq; | ||
601 | struct pll_lims pll; | 593 | struct pll_lims pll; |
594 | u32 out, mast, divs, ctrl; | ||
602 | int clk, ret = -EINVAL; | 595 | int clk, ret = -EINVAL; |
603 | int N, M, P1, P2; | 596 | int N, M, P1, P2; |
604 | u32 out; | ||
605 | 597 | ||
606 | if (dev_priv->chipset == 0xaa || | 598 | if (dev_priv->chipset == 0xaa || |
607 | dev_priv->chipset == 0xac) | 599 | dev_priv->chipset == 0xac) |
@@ -622,41 +614,32 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) | |||
622 | info->mscript = perflvl->memscript; | 614 | info->mscript = perflvl->memscript; |
623 | } | 615 | } |
624 | 616 | ||
625 | /* core: for the moment at least, always use nvpll */ | 617 | divs = read_div(dev); |
626 | clk = calc_pll(dev, 0x4028, &pll, perflvl->core, &N, &M, &P1); | 618 | mast = info->mmast; |
627 | if (clk == 0) | ||
628 | goto error; | ||
629 | 619 | ||
630 | info->emast = 0x00000003; | 620 | /* start building HWSQ script for engine reclocking */ |
631 | info->nctrl = 0x80000000 | (P1 << 19) | (P1 << 16); | 621 | hwsq = &info->eclk_hwsq; |
632 | info->ncoef = (N << 8) | M; | 622 | hwsq_init(hwsq); |
623 | hwsq_setf(hwsq, 0x10, 0); /* disable bus access */ | ||
624 | hwsq_op5f(hwsq, 0x00, 0x01); /* wait for access disabled? */ | ||
633 | 625 | ||
634 | /* shader: tie to nvclk if possible, otherwise use spll. have to be | 626 | /* vdec/dom6: switch to "safe" clocks temporarily */ |
635 | * very careful that the shader clock is at least twice the core, or | 627 | if (perflvl->vdec) { |
636 | * some chipsets will be very unhappy. i expect most or all of these | 628 | mast &= ~0x00000c00; |
637 | * cases will be handled by tying to nvclk, but it's possible there's | 629 | divs &= ~0x00000700; |
638 | * corners | 630 | } |
639 | */ | ||
640 | if (P1-- && perflvl->shader == (perflvl->core << 1)) { | ||
641 | info->emast |= 0x00000020; | ||
642 | info->sctrl = 0x00000000 | (P1 << 19) | (P1 << 16); | ||
643 | info->scoef = nv_rd32(dev, 0x004024); | ||
644 | } else { | ||
645 | clk = calc_pll(dev, 0x4020, &pll, perflvl->shader, &N, &M, &P1); | ||
646 | if (clk == 0) | ||
647 | goto error; | ||
648 | 631 | ||
649 | info->emast |= 0x00000030; | 632 | if (perflvl->dom6) { |
650 | info->sctrl = 0x80000000 | (P1 << 19) | (P1 << 16); | 633 | mast &= ~0x0c000000; |
651 | info->scoef = (N << 8) | M; | 634 | divs &= ~0x00000007; |
652 | } | 635 | } |
653 | 636 | ||
637 | hwsq_wr32(hwsq, 0x00c040, mast); | ||
638 | |||
654 | /* vdec: avoid modifying xpll until we know exactly how the other | 639 | /* vdec: avoid modifying xpll until we know exactly how the other |
655 | * clock domains work, i suspect at least some of them can also be | 640 | * clock domains work, i suspect at least some of them can also be |
656 | * tied to xpll... | 641 | * tied to xpll... |
657 | */ | 642 | */ |
658 | info->amast = nv_rd32(dev, 0x00c040); | ||
659 | info->pdivs = read_div(dev); | ||
660 | if (perflvl->vdec) { | 643 | if (perflvl->vdec) { |
661 | /* see how close we can get using nvclk as a source */ | 644 | /* see how close we can get using nvclk as a source */ |
662 | clk = calc_div(perflvl->core, perflvl->vdec, &P1); | 645 | clk = calc_div(perflvl->core, perflvl->vdec, &P1); |
@@ -669,16 +652,14 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) | |||
669 | out = calc_div(out, perflvl->vdec, &P2); | 652 | out = calc_div(out, perflvl->vdec, &P2); |
670 | 653 | ||
671 | /* select whichever gets us closest */ | 654 | /* select whichever gets us closest */ |
672 | info->amast &= ~0x00000c00; | ||
673 | info->pdivs &= ~0x00000700; | ||
674 | if (abs((int)perflvl->vdec - clk) <= | 655 | if (abs((int)perflvl->vdec - clk) <= |
675 | abs((int)perflvl->vdec - out)) { | 656 | abs((int)perflvl->vdec - out)) { |
676 | if (dev_priv->chipset != 0x98) | 657 | if (dev_priv->chipset != 0x98) |
677 | info->amast |= 0x00000c00; | 658 | mast |= 0x00000c00; |
678 | info->pdivs |= P1 << 8; | 659 | divs |= P1 << 8; |
679 | } else { | 660 | } else { |
680 | info->amast |= 0x00000800; | 661 | mast |= 0x00000800; |
681 | info->pdivs |= P2 << 8; | 662 | divs |= P2 << 8; |
682 | } | 663 | } |
683 | } | 664 | } |
684 | 665 | ||
@@ -686,21 +667,82 @@ nv50_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) | |||
686 | * of the host clock frequency | 667 | * of the host clock frequency |
687 | */ | 668 | */ |
688 | if (perflvl->dom6) { | 669 | if (perflvl->dom6) { |
689 | info->amast &= ~0x0c000000; | ||
690 | if (clk_same(perflvl->dom6, read_clk(dev, clk_src_href))) { | 670 | if (clk_same(perflvl->dom6, read_clk(dev, clk_src_href))) { |
691 | info->amast |= 0x00000000; | 671 | mast |= 0x00000000; |
692 | } else | 672 | } else |
693 | if (clk_same(perflvl->dom6, read_clk(dev, clk_src_hclk))) { | 673 | if (clk_same(perflvl->dom6, read_clk(dev, clk_src_hclk))) { |
694 | info->amast |= 0x08000000; | 674 | mast |= 0x08000000; |
695 | } else { | 675 | } else { |
696 | clk = read_clk(dev, clk_src_hclk) * 3; | 676 | clk = read_clk(dev, clk_src_hclk) * 3; |
697 | clk = calc_div(clk, perflvl->dom6, &P1); | 677 | clk = calc_div(clk, perflvl->dom6, &P1); |
698 | 678 | ||
699 | info->amast |= 0x0c000000; | 679 | mast |= 0x0c000000; |
700 | info->pdivs = (info->pdivs & ~0x00000007) | P1; | 680 | divs |= P1; |
701 | } | 681 | } |
702 | } | 682 | } |
703 | 683 | ||
684 | /* vdec/dom6: complete switch to new clocks */ | ||
685 | switch (dev_priv->chipset) { | ||
686 | case 0x92: | ||
687 | case 0x94: | ||
688 | case 0x96: | ||
689 | hwsq_wr32(hwsq, 0x004800, divs); | ||
690 | break; | ||
691 | default: | ||
692 | hwsq_wr32(hwsq, 0x004700, divs); | ||
693 | break; | ||
694 | } | ||
695 | |||
696 | hwsq_wr32(hwsq, 0x00c040, mast); | ||
697 | |||
698 | /* core/shader: make sure sclk/nvclk are disconnected from their | ||
699 | * PLLs (nvclk to dom6, sclk to hclk) | ||
700 | */ | ||
701 | if (dev_priv->chipset < 0x92) | ||
702 | mast = (mast & ~0x001000b0) | 0x00100080; | ||
703 | else | ||
704 | mast = (mast & ~0x000000b3) | 0x00000081; | ||
705 | |||
706 | hwsq_wr32(hwsq, 0x00c040, mast); | ||
707 | |||
708 | /* core: for the moment at least, always use nvpll */ | ||
709 | clk = calc_pll(dev, 0x4028, &pll, perflvl->core, &N, &M, &P1); | ||
710 | if (clk == 0) | ||
711 | goto error; | ||
712 | |||
713 | ctrl = nv_rd32(dev, 0x004028) & ~0xc03f0100; | ||
714 | mast &= ~0x00100000; | ||
715 | mast |= 3; | ||
716 | |||
717 | hwsq_wr32(hwsq, 0x004028, 0x80000000 | (P1 << 19) | (P1 << 16) | ctrl); | ||
718 | hwsq_wr32(hwsq, 0x00402c, (N << 8) | M); | ||
719 | |||
720 | /* shader: tie to nvclk if possible, otherwise use spll. have to be | ||
721 | * very careful that the shader clock is at least twice the core, or | ||
722 | * some chipsets will be very unhappy. i expect most or all of these | ||
723 | * cases will be handled by tying to nvclk, but it's possible there's | ||
724 | * corners | ||
725 | */ | ||
726 | ctrl = nv_rd32(dev, 0x004020) & ~0xc03f0100; | ||
727 | |||
728 | if (P1-- && perflvl->shader == (perflvl->core << 1)) { | ||
729 | hwsq_wr32(hwsq, 0x004020, (P1 << 19) | (P1 << 16) | ctrl); | ||
730 | hwsq_wr32(hwsq, 0x00c040, 0x00000020 | mast); | ||
731 | } else { | ||
732 | clk = calc_pll(dev, 0x4020, &pll, perflvl->shader, &N, &M, &P1); | ||
733 | if (clk == 0) | ||
734 | goto error; | ||
735 | ctrl |= 0x80000000; | ||
736 | |||
737 | hwsq_wr32(hwsq, 0x004020, (P1 << 19) | (P1 << 16) | ctrl); | ||
738 | hwsq_wr32(hwsq, 0x004024, (N << 8) | M); | ||
739 | hwsq_wr32(hwsq, 0x00c040, 0x00000030 | mast); | ||
740 | } | ||
741 | |||
742 | hwsq_setf(hwsq, 0x10, 1); /* enable bus access */ | ||
743 | hwsq_op5f(hwsq, 0x00, 0x00); /* wait for access enabled? */ | ||
744 | hwsq_fini(hwsq); | ||
745 | |||
704 | return info; | 746 | return info; |
705 | error: | 747 | error: |
706 | kfree(info); | 748 | kfree(info); |
@@ -708,7 +750,7 @@ error: | |||
708 | } | 750 | } |
709 | 751 | ||
710 | static int | 752 | static int |
711 | prog_mclk(struct drm_device *dev, struct hwsq_ucode *hwsq) | 753 | prog_hwsq(struct drm_device *dev, struct hwsq_ucode *hwsq) |
712 | { | 754 | { |
713 | struct drm_nouveau_private *dev_priv = dev->dev_private; | 755 | struct drm_nouveau_private *dev_priv = dev->dev_private; |
714 | u32 hwsq_data, hwsq_kick; | 756 | u32 hwsq_data, hwsq_kick; |
@@ -748,20 +790,17 @@ prog_mclk(struct drm_device *dev, struct hwsq_ucode *hwsq) | |||
748 | int | 790 | int |
749 | nv50_pm_clocks_set(struct drm_device *dev, void *data) | 791 | nv50_pm_clocks_set(struct drm_device *dev, void *data) |
750 | { | 792 | { |
751 | struct drm_nouveau_private *dev_priv = dev->dev_private; | ||
752 | struct nv50_pm_state *info = data; | 793 | struct nv50_pm_state *info = data; |
753 | struct bit_entry M; | 794 | struct bit_entry M; |
754 | int ret = 0; | 795 | int ret = -EBUSY; |
755 | 796 | ||
756 | /* halt and idle execution engines */ | 797 | /* halt and idle execution engines */ |
757 | nv_mask(dev, 0x002504, 0x00000001, 0x00000001); | 798 | nv_mask(dev, 0x002504, 0x00000001, 0x00000001); |
758 | if (!nv_wait(dev, 0x002504, 0x00000010, 0x00000010)) | 799 | if (!nv_wait(dev, 0x002504, 0x00000010, 0x00000010)) |
759 | goto error; | 800 | goto resume; |
760 | 801 | ||
761 | /* memory: it is *very* important we change this first, the ucode | 802 | /* program memory clock, if necessary - must come before engine clock |
762 | * we build in pre() now has hardcoded 0xc040 values, which can't | 803 | * reprogramming due to how we construct the hwsq scripts in pre() |
763 | * change before we execute it or the engine clocks may end up | ||
764 | * messed up. | ||
765 | */ | 804 | */ |
766 | if (info->mclk_hwsq.len) { | 805 | if (info->mclk_hwsq.len) { |
767 | /* execute some scripts that do ??? from the vbios.. */ | 806 | /* execute some scripts that do ??? from the vbios.. */ |
@@ -775,42 +814,14 @@ nv50_pm_clocks_set(struct drm_device *dev, void *data) | |||
775 | nouveau_bios_init_exec(dev, info->mscript); | 814 | nouveau_bios_init_exec(dev, info->mscript); |
776 | } | 815 | } |
777 | 816 | ||
778 | ret = prog_mclk(dev, &info->mclk_hwsq); | 817 | ret = prog_hwsq(dev, &info->mclk_hwsq); |
779 | if (ret) | 818 | if (ret) |
780 | goto resume; | 819 | goto resume; |
781 | } | 820 | } |
782 | 821 | ||
783 | /* reclock vdec/dom6 */ | 822 | /* program engine clocks */ |
784 | nv_mask(dev, 0x00c040, 0x00000c00, 0x00000000); | 823 | ret = prog_hwsq(dev, &info->eclk_hwsq); |
785 | switch (dev_priv->chipset) { | ||
786 | case 0x92: | ||
787 | case 0x94: | ||
788 | case 0x96: | ||
789 | nv_mask(dev, 0x004800, 0x00000707, info->pdivs); | ||
790 | break; | ||
791 | default: | ||
792 | nv_mask(dev, 0x004700, 0x00000707, info->pdivs); | ||
793 | break; | ||
794 | } | ||
795 | nv_mask(dev, 0x00c040, 0x0c000c00, info->amast); | ||
796 | 824 | ||
797 | /* core/shader: make sure sclk/nvclk are disconnected from their | ||
798 | * plls (nvclk to dom6, sclk to hclk), modify the plls, and | ||
799 | * reconnect sclk/nvclk to their new clock source | ||
800 | */ | ||
801 | if (dev_priv->chipset < 0x92) | ||
802 | nv_mask(dev, 0x00c040, 0x001000b0, 0x00100080); /* grrr! */ | ||
803 | else | ||
804 | nv_mask(dev, 0x00c040, 0x000000b3, 0x00000081); | ||
805 | nv_mask(dev, 0x004020, 0xc03f0100, info->sctrl); | ||
806 | nv_wr32(dev, 0x004024, info->scoef); | ||
807 | nv_mask(dev, 0x004028, 0xc03f0100, info->nctrl); | ||
808 | nv_wr32(dev, 0x00402c, info->ncoef); | ||
809 | nv_mask(dev, 0x00c040, 0x00100033, info->emast); | ||
810 | |||
811 | goto resume; | ||
812 | error: | ||
813 | ret = -EBUSY; | ||
814 | resume: | 825 | resume: |
815 | nv_mask(dev, 0x002504, 0x00000001, 0x00000000); | 826 | nv_mask(dev, 0x002504, 0x00000001, 0x00000000); |
816 | kfree(info); | 827 | kfree(info); |