summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/clk/clk_arb.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/clk/clk_arb.c')
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c426
1 files changed, 331 insertions, 95 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index f3d6cfab..b816a570 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -41,14 +41,32 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
41static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb); 41static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
42static void nvgpu_clk_arb_free_fd(struct kref *refcount); 42static void nvgpu_clk_arb_free_fd(struct kref *refcount);
43static void nvgpu_clk_arb_free_session(struct kref *refcount); 43static void nvgpu_clk_arb_free_session(struct kref *refcount);
44static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk, 44static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
45 u16 mclk, u32 voltuv, u32 voltuv_sram); 45 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
46static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, 46 u32 voltuv_sram);
47 u16 *gpc2clk_target, u16 *mclk_target, u32 *voltuv, 47static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
48 u32 *voltuv_sram); 48 u16 *gpc2clk_target, u16 *sys2clk_target, u16 *xbar2clk_target,
49 u16 *mclk_target, u32 *voltuv, u32 *voltuv_sram);
50
51#define VF_POINT_INVALID_PSTATE ~0U
52#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
53#define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\
54 __fls((a)->pstates) :\
55 VF_POINT_INVALID_PSTATE)
56#define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\
57 __fls((a)->pstates & (b)->pstates) :\
58 VF_POINT_INVALID_PSTATE)
49 59
50struct nvgpu_clk_vf_point { 60struct nvgpu_clk_vf_point {
51 u16 mhz; 61 u16 pstates;
62 union {
63 struct {
64 u16 gpc_mhz;
65 u16 sys_mhz;
66 u16 xbar_mhz;
67 };
68 u16 mem_mhz;
69 };
52 u32 uvolt; 70 u32 uvolt;
53 u32 uvolt_sram; 71 u32 uvolt_sram;
54}; 72};
@@ -72,6 +90,7 @@ struct nvgpu_clk_arb_debug {
72struct nvgpu_clk_arb_target { 90struct nvgpu_clk_arb_target {
73 u16 mclk; 91 u16 mclk;
74 u16 gpc2clk; 92 u16 gpc2clk;
93 u32 pstate;
75}; 94};
76 95
77struct nvgpu_clk_arb { 96struct nvgpu_clk_arb {
@@ -362,9 +381,12 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
362 kref_init(&session->refcount); 381 kref_init(&session->refcount);
363 382
364 session->zombie = false; 383 session->zombie = false;
384 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
385 /* make sure that the initialization of the pool is visible
386 * before the update */
387 smp_wmb();
365 session->target = &session->target_pool[0]; 388 session->target = &session->target_pool[0];
366 session->target->mclk = arb->mclk_default_mhz; 389
367 session->target->gpc2clk = arb->gpc2clk_default_mhz;
368 init_llist_head(&session->targets); 390 init_llist_head(&session->targets);
369 391
370 spin_lock(&arb->sessions_lock); 392 spin_lock(&arb->sessions_lock);
@@ -464,13 +486,15 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
464 struct nvgpu_clk_vf_table *table; 486 struct nvgpu_clk_vf_table *table;
465 487
466 u32 i, j; 488 u32 i, j;
467 int status = 0; 489 int status = -EINVAL;
468 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; 490 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
469 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; 491 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
470 u16 gpc2clk_min, gpc2clk_max, clk_cur; 492 u16 gpc2clk_min, gpc2clk_max, clk_cur;
471 u16 mclk_min, mclk_max; 493 u16 mclk_min, mclk_max;
472 u32 num_points; 494 u32 num_points;
473 495
496 struct clk_set_info *p5_info, *p0_info;
497
474 table = ACCESS_ONCE(arb->current_vf_table); 498 table = ACCESS_ONCE(arb->current_vf_table);
475 /* make flag visible when all data has resolved in the tables */ 499 /* make flag visible when all data has resolved in the tables */
476 smp_rmb(); 500 smp_rmb();
@@ -504,17 +528,28 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
504 memset(table->gpc2clk_points, 0, 528 memset(table->gpc2clk_points, 0,
505 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point)); 529 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
506 530
531 p5_info = pstate_get_clk_set_info(g,
532 CTRL_PERF_PSTATE_P5, clkwhich_mclk);
533 if (!p5_info)
534 goto exit_vf_table;
535
536 p0_info = pstate_get_clk_set_info(g,
537 CTRL_PERF_PSTATE_P0, clkwhich_mclk);
538 if (!p0_info)
539 goto exit_vf_table;
540
507 for (i = 0, j = 0, num_points = 0, clk_cur = 0; 541 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
508 i < table->mclk_num_points; i++) { 542 i < table->mclk_num_points; i++) {
543
509 if ((arb->mclk_f_points[i] >= mclk_min) && 544 if ((arb->mclk_f_points[i] >= mclk_min) &&
510 (arb->mclk_f_points[i] <= mclk_max) && 545 (arb->mclk_f_points[i] <= mclk_max) &&
511 (arb->mclk_f_points[i] != clk_cur)) { 546 (arb->mclk_f_points[i] != clk_cur)) {
512 547
513 table->mclk_points[j].mhz = arb->mclk_f_points[i]; 548 table->mclk_points[j].mem_mhz = arb->mclk_f_points[i];
514 mclk_voltuv = mclk_voltuv_sram = 0; 549 mclk_voltuv = mclk_voltuv_sram = 0;
515 550
516 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, 551 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
517 &table->mclk_points[j].mhz, &mclk_voltuv, 552 &table->mclk_points[j].mem_mhz, &mclk_voltuv,
518 CTRL_VOLT_DOMAIN_LOGIC); 553 CTRL_VOLT_DOMAIN_LOGIC);
519 if (status < 0) { 554 if (status < 0) {
520 gk20a_err(dev_from_gk20a(g), 555 gk20a_err(dev_from_gk20a(g),
@@ -522,7 +557,8 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
522 goto exit_vf_table; 557 goto exit_vf_table;
523 } 558 }
524 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, 559 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
525 &table->mclk_points[j].mhz, &mclk_voltuv_sram, 560 &table->mclk_points[j].mem_mhz,
561 &mclk_voltuv_sram,
526 CTRL_VOLT_DOMAIN_SRAM); 562 CTRL_VOLT_DOMAIN_SRAM);
527 if (status < 0) { 563 if (status < 0) {
528 gk20a_err(dev_from_gk20a(g), 564 gk20a_err(dev_from_gk20a(g),
@@ -532,7 +568,19 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
532 568
533 table->mclk_points[j].uvolt = mclk_voltuv; 569 table->mclk_points[j].uvolt = mclk_voltuv;
534 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram; 570 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
535 clk_cur = table->mclk_points[j].mhz; 571 clk_cur = table->mclk_points[j].mem_mhz;
572
573 if ((clk_cur >= p5_info->min_mhz) &&
574 (clk_cur <= p5_info->max_mhz))
575 VF_POINT_SET_PSTATE_SUPPORTED(
576 &table->mclk_points[j],
577 CTRL_PERF_PSTATE_P5);
578 if ((clk_cur >= p0_info->min_mhz) &&
579 (clk_cur <= p0_info->max_mhz))
580 VF_POINT_SET_PSTATE_SUPPORTED(
581 &table->mclk_points[j],
582 CTRL_PERF_PSTATE_P0);
583
536 j++; 584 j++;
537 num_points++; 585 num_points++;
538 586
@@ -540,45 +588,187 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
540 } 588 }
541 table->mclk_num_points = num_points; 589 table->mclk_num_points = num_points;
542 590
591 p5_info = pstate_get_clk_set_info(g,
592 CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
593 if (!p5_info) {
594 status = -EINVAL;
595 goto exit_vf_table;
596 }
597
598 p0_info = pstate_get_clk_set_info(g,
599 CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
600 if (!p0_info) {
601 status = -EINVAL;
602 goto exit_vf_table;
603 }
604
605 /* GPC2CLK needs to be checked in two passes. The first determines the
606 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
607 * second verifies that the clocks minimum DVCO is satisfied and sets
608 * the voltages
609 */
543 for (i = 0, j = 0, num_points = 0, clk_cur = 0; 610 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
544 i < table->gpc2clk_num_points; i++) { 611 i < table->gpc2clk_num_points; i++) {
612 struct set_fll_clk setfllclk;
613
545 if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) && 614 if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) &&
546 (arb->gpc2clk_f_points[i] <= gpc2clk_max) && 615 (arb->gpc2clk_f_points[i] <= gpc2clk_max) &&
547 (arb->gpc2clk_f_points[i] != clk_cur)) { 616 (arb->gpc2clk_f_points[i] != clk_cur)) {
548 617
549 table->gpc2clk_points[j].mhz = arb->gpc2clk_f_points[i]; 618 table->gpc2clk_points[j].gpc_mhz =
550 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; 619 arb->gpc2clk_f_points[i];
551 620
552 status = clk_domain_get_f_or_v(g, 621 setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
553 CTRL_CLK_DOMAIN_GPC2CLK, 622 status = clk_get_fll_clks(g, &setfllclk);
554 &table->gpc2clk_points[j].mhz, &gpc2clk_voltuv,
555 CTRL_VOLT_DOMAIN_LOGIC);
556 if (status < 0) { 623 if (status < 0) {
557 gk20a_err(dev_from_gk20a(g), 624 gk20a_err(dev_from_gk20a(g),
558 "failed to get GPC2CLK LOGIC voltage"); 625 "failed to get GPC2CLK slave clocks");
559 goto exit_vf_table; 626 goto exit_vf_table;
560 } 627 }
561 628
562 status = clk_domain_get_f_or_v(g, 629
563 CTRL_CLK_DOMAIN_GPC2CLK, 630 table->gpc2clk_points[j].sys_mhz =
564 &table->gpc2clk_points[j].mhz, 631 setfllclk.sys2clkmhz;
565 &gpc2clk_voltuv_sram, 632 table->gpc2clk_points[j].xbar_mhz =
566 CTRL_VOLT_DOMAIN_SRAM); 633 setfllclk.xbar2clkmhz;
634
635 clk_cur = table->gpc2clk_points[j].gpc_mhz;
636
637 if ((clk_cur >= p5_info->min_mhz) &&
638 (clk_cur <= p5_info->max_mhz))
639 VF_POINT_SET_PSTATE_SUPPORTED(
640 &table->gpc2clk_points[j],
641 CTRL_PERF_PSTATE_P5);
642 if ((clk_cur >= p0_info->min_mhz) &&
643 (clk_cur <= p0_info->max_mhz))
644 VF_POINT_SET_PSTATE_SUPPORTED(
645 &table->gpc2clk_points[j],
646 CTRL_PERF_PSTATE_P0);
647
648 j++;
649 num_points++;
650 }
651 }
652 table->gpc2clk_num_points = num_points;
653
654 /* Second pass */
655 for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {
656 struct set_fll_clk setfllclk;
657
658 u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
659 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
660
661 /* Check sysclk */
662 p5_info = pstate_get_clk_set_info(g,
663 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
664 clkwhich_sys2clk);
665 if (!p5_info) {
666 status = -EINVAL;
667 goto exit_vf_table;
668 }
669 /* sys2clk below DVCO min, need to find correct clock */
670 if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
671 for (j = i + 1; j < table->gpc2clk_num_points; j++) {
672
673 if (table->gpc2clk_points[j].sys_mhz >=
674 p5_info->min_mhz) {
675
676 table->gpc2clk_points[i].sys_mhz =
677 table->gpc2clk_points[j].
678 sys_mhz;
679
680 alt_gpc2clk = alt_gpc2clk <
681 table->gpc2clk_points[j].
682 gpc_mhz ?
683 table->gpc2clk_points[j].
684 gpc_mhz:
685 alt_gpc2clk;
686 break;
687 }
688 }
689 /* no VF exists that satisfies condition */
690 if (j == table->gpc2clk_num_points) {
691 status = -EINVAL;
692 goto exit_vf_table;
693 }
694 }
695
696 /* Check xbarclk */
697 p5_info = pstate_get_clk_set_info(g,
698 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
699 clkwhich_xbar2clk);
700 if (!p5_info) {
701 status = -EINVAL;
702 goto exit_vf_table;
703 }
704
705 /* xbar2clk below DVCO min, need to find correct clock */
706 if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
707 for (j = i; j < table->gpc2clk_num_points; j++) {
708 if (table->gpc2clk_points[j].xbar_mhz >=
709 p5_info->min_mhz) {
710
711 table->gpc2clk_points[i].xbar_mhz =
712 table->gpc2clk_points[j].
713 xbar_mhz;
714 alt_gpc2clk = alt_gpc2clk <
715 table->gpc2clk_points[j].
716 gpc_mhz ?
717 table->gpc2clk_points[j].
718 gpc_mhz:
719 alt_gpc2clk;
720 break;
721 }
722 }
723 /* no VF exists that satisfies condition */
724 if (j == table->gpc2clk_num_points) {
725 status = -EINVAL;
726
727 goto exit_vf_table;
728 }
729 }
730
731 /* alternate gpc2clk clock has been requested, we need to
732 * calculate new ratios */
733 if (alt_gpc2clk != table->gpc2clk_points[i].gpc_mhz) {
734 setfllclk.gpc2clkmhz = alt_gpc2clk;
735
736 status = clk_get_fll_clks(g, &setfllclk);
567 if (status < 0) { 737 if (status < 0) {
568 gk20a_err(dev_from_gk20a(g), 738 gk20a_err(dev_from_gk20a(g),
569 "failed to get GPC2CLK SRAM voltage"); 739 "failed to get GPC2CLK slave clocks");
570 goto exit_vf_table; 740 goto exit_vf_table;
571 } 741 }
572 742
573 table->gpc2clk_points[j].uvolt = gpc2clk_voltuv; 743 table->gpc2clk_points[i].sys_mhz =
574 table->gpc2clk_points[j].uvolt_sram = 744 setfllclk.sys2clkmhz;
575 gpc2clk_voltuv_sram; 745 table->gpc2clk_points[i].xbar_mhz =
576 clk_cur = table->gpc2clk_points[j].mhz; 746 setfllclk.xbar2clkmhz;
577 j++; 747 }
578 num_points++; 748
749 /* Calculate voltages */
750 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
751 &alt_gpc2clk, &gpc2clk_voltuv,
752 CTRL_VOLT_DOMAIN_LOGIC);
753 if (status < 0) {
754 gk20a_err(dev_from_gk20a(g),
755 "failed to get GPC2CLK LOGIC voltage");
756 goto exit_vf_table;
757 }
758
759 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
760 &alt_gpc2clk,
761 &gpc2clk_voltuv_sram,
762 CTRL_VOLT_DOMAIN_SRAM);
763 if (status < 0) {
764 gk20a_err(dev_from_gk20a(g),
765 "failed to get GPC2CLK SRAM voltage");
766 goto exit_vf_table;
579 } 767 }
768
769 table->gpc2clk_points[i].uvolt = gpc2clk_voltuv;
770 table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram;
580 } 771 }
581 table->gpc2clk_num_points = num_points;
582 772
583 /* make table visible when all data has resolved in the tables */ 773 /* make table visible when all data has resolved in the tables */
584 smp_wmb(); 774 smp_wmb();
@@ -625,13 +815,14 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
625 struct gk20a *g = arb->g; 815 struct gk20a *g = arb->g;
626 struct llist_node *head; 816 struct llist_node *head;
627 817
818 u32 pstate = VF_POINT_INVALID_PSTATE;
628 u32 voltuv, voltuv_sram; 819 u32 voltuv, voltuv_sram;
629 bool mclk_set, gpc2clk_set; 820 bool mclk_set, gpc2clk_set;
630 821
631 int status = 0; 822 int status = 0;
632 823
633 /* Temporary variables for checking target frequency */ 824 /* Temporary variables for checking target frequency */
634 u16 gpc2clk_target, mclk_target; 825 u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target;
635 826
636#ifdef CONFIG_DEBUG_FS 827#ifdef CONFIG_DEBUG_FS
637 u64 t0, t1; 828 u64 t0, t1;
@@ -699,29 +890,25 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
699 rcu_read_unlock(); 890 rcu_read_unlock();
700 891
701 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : 892 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
702 arb->actual->gpc2clk ? gpc2clk_target : 893 arb->gpc2clk_default_mhz;
703 arb->gpc2clk_default_mhz;
704
705 mclk_target = (mclk_target > 0) ? mclk_target :
706 arb->actual->mclk ? mclk_target :
707 arb->mclk_default_mhz;
708
709 if (!gpc2clk_target && !mclk_target) {
710 mclk_target = arb->mclk_default_mhz;
711 gpc2clk_target = arb->gpc2clk_default_mhz;
712 }
713
714 if (!gpc2clk_target)
715 gpc2clk_target = arb->actual->mclk;
716
717 if (!mclk_target)
718 mclk_target = arb->actual->mclk;
719 894
895 mclk_target = (mclk_target > 0) ? mclk_target:
896 arb->mclk_default_mhz;
720 897
898 sys2clk_target = 0;
899 xbar2clk_target = 0;
721 /* Query the table for the closest vf point to program */ 900 /* Query the table for the closest vf point to program */
722 nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, &mclk_target, &voltuv, 901 pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
902 &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
723 &voltuv_sram); 903 &voltuv_sram);
724 904
905 if (pstate == VF_POINT_INVALID_PSTATE) {
906 arb->status = -EINVAL;
907 /* make status visible */
908 smp_mb();
909 goto exit_arb;
910 }
911
725 if ((arb->actual->gpc2clk == gpc2clk_target) && 912 if ((arb->actual->gpc2clk == gpc2clk_target) &&
726 (arb->actual->mclk == mclk_target) && 913 (arb->actual->mclk == mclk_target) &&
727 (arb->voltuv_actual == voltuv)) { 914 (arb->voltuv_actual == voltuv)) {
@@ -731,12 +918,17 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
731 /* Program clocks */ 918 /* Program clocks */
732 /* A change in both mclk of gpc2clk may require a change in voltage */ 919 /* A change in both mclk of gpc2clk may require a change in voltage */
733 920
734 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, mclk_target, 921 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
735 voltuv, voltuv_sram); 922 sys2clk_target, xbar2clk_target, mclk_target, voltuv,
923 voltuv_sram);
736 924
737 if (status < 0) 925 if (status < 0) {
738 goto exit_arb; 926 arb->status = status;
927 /* make status visible */
928 smp_mb();
739 929
930 goto exit_arb;
931 }
740 actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ? 932 actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
741 &arb->actual_pool[1] : &arb->actual_pool[0]; 933 &arb->actual_pool[1] : &arb->actual_pool[0];
742 934
@@ -745,6 +937,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
745 actual->gpc2clk = gpc2clk_target; 937 actual->gpc2clk = gpc2clk_target;
746 actual->mclk = mclk_target; 938 actual->mclk = mclk_target;
747 arb->voltuv_actual = voltuv; 939 arb->voltuv_actual = voltuv;
940 actual->pstate = pstate;
748 arb->status = status; 941 arb->status = status;
749 942
750 /* Make changes visible to other threads */ 943 /* Make changes visible to other threads */
@@ -1015,15 +1208,17 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
1015 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); 1208 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
1016} 1209}
1017 1210
1018static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, 1211static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1019 u16 *gpc2clk, u16 *mclk, u32 *voltuv, 1212 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
1020 u32 *voltuv_sram) 1213 u32 *voltuv, u32 *voltuv_sram)
1021{ 1214{
1022 u16 gpc2clk_target, mclk_target; 1215 u16 gpc2clk_target, mclk_target;
1023 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; 1216 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
1024 u32 mclk_voltuv, mclk_voltuv_sram; 1217 u32 mclk_voltuv, mclk_voltuv_sram;
1218 u32 pstate = VF_POINT_INVALID_PSTATE;
1025 struct nvgpu_clk_vf_table *table; 1219 struct nvgpu_clk_vf_table *table;
1026 u32 index; 1220 u32 index, index_mclk;
1221 struct nvgpu_clk_vf_point *mclk_vf = NULL;
1027 1222
1028 do { 1223 do {
1029 gpc2clk_target = *gpc2clk; 1224 gpc2clk_target = *gpc2clk;
@@ -1042,12 +1237,39 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1042 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) 1237 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points))
1043 goto find_exit; 1238 goto find_exit;
1044 1239
1240 /* First we check MCLK to find out which PSTATE we are
1241 * are requesting, and from there try to find the minimum
1242 * GPC2CLK on the same PSTATE that satisfies the request.
1243 * If no GPC2CLK can be found, then we need to up the PSTATE
1244 */
1245
1246recalculate_vf_point:
1247 for (index = 0; index < table->mclk_num_points; index++) {
1248 if (table->mclk_points[index].mem_mhz >= mclk_target) {
1249 mclk_vf = &table->mclk_points[index];
1250 break;
1251 }
1252 }
1253 if (index == table->mclk_num_points) {
1254 mclk_vf = &table->mclk_points[index-1];
1255 }
1256 index_mclk = index;
1257
1045 /* round up the freq requests */ 1258 /* round up the freq requests */
1046 for (index = 0; index < table->gpc2clk_num_points; index++) { 1259 for (index = 0; index < table->gpc2clk_num_points; index++) {
1047 if (table->gpc2clk_points[index].mhz >= 1260 pstate = VF_POINT_COMMON_PSTATE(
1048 gpc2clk_target) { 1261 &table->gpc2clk_points[index], mclk_vf);
1262
1263 if ((table->gpc2clk_points[index].gpc_mhz >=
1264 gpc2clk_target) &&
1265 (pstate != VF_POINT_INVALID_PSTATE)){
1049 gpc2clk_target = 1266 gpc2clk_target =
1050 table->gpc2clk_points[index].mhz; 1267 table->gpc2clk_points[index].gpc_mhz;
1268 *sys2clk =
1269 table->gpc2clk_points[index].sys_mhz;
1270 *xbar2clk =
1271 table->gpc2clk_points[index].xbar_mhz;
1272
1051 gpc2clk_voltuv = 1273 gpc2clk_voltuv =
1052 table->gpc2clk_points[index].uvolt; 1274 table->gpc2clk_points[index].uvolt;
1053 gpc2clk_voltuv_sram = 1275 gpc2clk_voltuv_sram =
@@ -1057,27 +1279,42 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1057 } 1279 }
1058 1280
1059 if (index == table->gpc2clk_num_points) { 1281 if (index == table->gpc2clk_num_points) {
1060 gpc2clk_target = table->gpc2clk_points[index-1].mhz; 1282 pstate = VF_POINT_COMMON_PSTATE(
1061 gpc2clk_voltuv = table->gpc2clk_points[index-1].uvolt; 1283 &table->gpc2clk_points[index-1], mclk_vf);
1062 gpc2clk_voltuv_sram = 1284 if (pstate != VF_POINT_INVALID_PSTATE) {
1063 table->gpc2clk_points[index-1].uvolt_sram; 1285 gpc2clk_target =
1064 } 1286 table->gpc2clk_points[index-1].gpc_mhz;
1287 *sys2clk =
1288 table->gpc2clk_points[index-1].sys_mhz;
1289 *xbar2clk =
1290 table->gpc2clk_points[index-1].xbar_mhz;
1065 1291
1066 for (index = 0; index < table->mclk_num_points; index++) { 1292 gpc2clk_voltuv =
1067 if (table->mclk_points[index].mhz >= mclk_target) { 1293 table->gpc2clk_points[index-1].uvolt;
1068 mclk_target = table->mclk_points[index].mhz; 1294 gpc2clk_voltuv_sram =
1069 mclk_voltuv = table->mclk_points[index].uvolt; 1295 table->gpc2clk_points[index-1].
1070 mclk_voltuv_sram = 1296 uvolt_sram;
1071 table->mclk_points[index].uvolt_sram; 1297 } else if (index_mclk == table->mclk_num_points - 1) {
1072 break; 1298 /* There is no available combination of MCLK
1299 * and GPC2CLK, we need to fail this
1300 */
1301 gpc2clk_target = 0;
1302 mclk_target = 0;
1303 pstate = VF_POINT_INVALID_PSTATE;
1304 goto find_exit;
1305 } else {
1306 /* recalculate with higher PSTATE */
1307 gpc2clk_target = *gpc2clk;
1308 mclk_target = table->mclk_points[index_mclk+1].
1309 mem_mhz;
1310 goto recalculate_vf_point;
1073 } 1311 }
1074 } 1312 }
1075 if (index == table->mclk_num_points) { 1313
1076 mclk_target = table->mclk_points[index-1].mhz; 1314 mclk_target = mclk_vf->mem_mhz;
1077 mclk_voltuv = table->mclk_points[index-1].uvolt; 1315 mclk_voltuv = mclk_vf->uvolt;
1078 mclk_voltuv_sram = 1316 mclk_voltuv_sram = mclk_vf->uvolt_sram;
1079 table->mclk_points[index-1].uvolt_sram; 1317
1080 }
1081 } while (!table || 1318 } while (!table ||
1082 (ACCESS_ONCE(arb->current_vf_table) != table)); 1319 (ACCESS_ONCE(arb->current_vf_table) != table));
1083 1320
@@ -1088,15 +1325,23 @@ find_exit:
1088 1325
1089 *gpc2clk = gpc2clk_target; 1326 *gpc2clk = gpc2clk_target;
1090 *mclk = mclk_target; 1327 *mclk = mclk_target;
1328 return pstate;
1091} 1329}
1092 1330
1093static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, 1331static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1094 u16 mclk_target, u32 voltuv, u32 voltuv_sram) 1332 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
1333 u32 voltuv_sram)
1095{ 1334{
1096 struct change_fll_clk fllclk; 1335 struct set_fll_clk fllclk;
1097 struct nvgpu_clk_arb *arb = g->clk_arb; 1336 struct nvgpu_clk_arb *arb = g->clk_arb;
1098 int status; 1337 int status;
1099 1338
1339 fllclk.gpc2clkmhz = gpc2clk_target;
1340 fllclk.sys2clkmhz = sys2clk_target;
1341 fllclk.xbar2clkmhz = xbar2clk_target;
1342
1343 fllclk.voltuv = voltuv;
1344
1100 /* if voltage ascends we do: 1345 /* if voltage ascends we do:
1101 * (1) FLL change 1346 * (1) FLL change
1102 * (2) Voltage change 1347 * (2) Voltage change
@@ -1117,17 +1362,11 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1117 if (status < 0) 1362 if (status < 0)
1118 return status; 1363 return status;
1119 1364
1120 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 1365 status = clk_set_fll_clks(g, &fllclk);
1121 fllclk.clkmhz = gpc2clk_target;
1122 fllclk.voltuv = voltuv;
1123 status = clk_program_fll_clks(g, &fllclk);
1124 if (status < 0) 1366 if (status < 0)
1125 return status; 1367 return status;
1126 } else if (voltuv > arb->voltuv_actual) { 1368 } else if (voltuv > arb->voltuv_actual) {
1127 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 1369 status = clk_set_fll_clks(g, &fllclk);
1128 fllclk.clkmhz = gpc2clk_target;
1129 fllclk.voltuv = voltuv;
1130 status = clk_program_fll_clks(g, &fllclk);
1131 if (status < 0) 1370 if (status < 0)
1132 return status; 1371 return status;
1133 1372
@@ -1143,10 +1382,7 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1143 if (status < 0) 1382 if (status < 0)
1144 return status; 1383 return status;
1145 1384
1146 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 1385 status = clk_set_fll_clks(g, &fllclk);
1147 fllclk.clkmhz = gpc2clk_target;
1148 fllclk.voltuv = voltuv;
1149 status = clk_program_fll_clks(g, &fllclk);
1150 if (status < 0) 1386 if (status < 0)
1151 return status; 1387 return status;
1152 } 1388 }