summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/clk/clk_arb.c
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2016-11-04 01:01:32 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:56:52 -0500
commit2f258670e40be1c92ab57b6fe77908add768ad9d (patch)
tree77e4fe65923a4e6667ac6956d1e2de158655c5e8 /drivers/gpu/nvgpu/clk/clk_arb.c
parent2ebf09920755daeb7e1be71bf317f88ec9d533e1 (diff)
gpu: nvgpu: p-state bound arbiter
Modification of the ARBITER clocks to be P-State aware Up to now the arbiter just considered the whole range of the GPC and MCLK domains, which could end up on illegal combinations of MCLK, GPC2CLK, and set the SYSCLK and XBARCLK domains below their minimum VCO The following has been implemented: (1) Modified VF tables to add which PState are supported on each point. (2) Return and store the current PState on the arbiter state. (3) Modified logic to prevent illegal combinations of MCLK and GPC2CLK. (4) Modified logic to prevent setting VF points for XBAR and SYS domains below VCO limits. (5) Modified voltage calculation to account for increased values of XBAR and SYS on some VF points. (6) Modified arbiter clock target logic to prevent an application that has not requested a particular VF point to set target to default targets. (7) Remove unnecesary mutexes from critical path JIRA DNVGPU-182 JIRA DNVGPU-183 Change-Id: I3d1c30903278f848681b8da833a867835acc99bb Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1247937 (cherry picked from commit b8bcc07eb3b5b70ec1ee19ace237df99d6170138) Reviewed-on: http://git-master/r/1268063 Tested-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/clk/clk_arb.c')
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c426
1 files changed, 331 insertions, 95 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index f3d6cfab..b816a570 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -41,14 +41,32 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
41static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb); 41static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
42static void nvgpu_clk_arb_free_fd(struct kref *refcount); 42static void nvgpu_clk_arb_free_fd(struct kref *refcount);
43static void nvgpu_clk_arb_free_session(struct kref *refcount); 43static void nvgpu_clk_arb_free_session(struct kref *refcount);
44static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk, 44static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
45 u16 mclk, u32 voltuv, u32 voltuv_sram); 45 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
46static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, 46 u32 voltuv_sram);
47 u16 *gpc2clk_target, u16 *mclk_target, u32 *voltuv, 47static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
48 u32 *voltuv_sram); 48 u16 *gpc2clk_target, u16 *sys2clk_target, u16 *xbar2clk_target,
49 u16 *mclk_target, u32 *voltuv, u32 *voltuv_sram);
50
51#define VF_POINT_INVALID_PSTATE ~0U
52#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
53#define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\
54 __fls((a)->pstates) :\
55 VF_POINT_INVALID_PSTATE)
56#define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\
57 __fls((a)->pstates & (b)->pstates) :\
58 VF_POINT_INVALID_PSTATE)
49 59
50struct nvgpu_clk_vf_point { 60struct nvgpu_clk_vf_point {
51 u16 mhz; 61 u16 pstates;
62 union {
63 struct {
64 u16 gpc_mhz;
65 u16 sys_mhz;
66 u16 xbar_mhz;
67 };
68 u16 mem_mhz;
69 };
52 u32 uvolt; 70 u32 uvolt;
53 u32 uvolt_sram; 71 u32 uvolt_sram;
54}; 72};
@@ -72,6 +90,7 @@ struct nvgpu_clk_arb_debug {
72struct nvgpu_clk_arb_target { 90struct nvgpu_clk_arb_target {
73 u16 mclk; 91 u16 mclk;
74 u16 gpc2clk; 92 u16 gpc2clk;
93 u32 pstate;
75}; 94};
76 95
77struct nvgpu_clk_arb { 96struct nvgpu_clk_arb {
@@ -362,9 +381,12 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
362 kref_init(&session->refcount); 381 kref_init(&session->refcount);
363 382
364 session->zombie = false; 383 session->zombie = false;
384 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
385 /* make sure that the initialization of the pool is visible
386 * before the update */
387 smp_wmb();
365 session->target = &session->target_pool[0]; 388 session->target = &session->target_pool[0];
366 session->target->mclk = arb->mclk_default_mhz; 389
367 session->target->gpc2clk = arb->gpc2clk_default_mhz;
368 init_llist_head(&session->targets); 390 init_llist_head(&session->targets);
369 391
370 spin_lock(&arb->sessions_lock); 392 spin_lock(&arb->sessions_lock);
@@ -464,13 +486,15 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
464 struct nvgpu_clk_vf_table *table; 486 struct nvgpu_clk_vf_table *table;
465 487
466 u32 i, j; 488 u32 i, j;
467 int status = 0; 489 int status = -EINVAL;
468 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; 490 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
469 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; 491 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
470 u16 gpc2clk_min, gpc2clk_max, clk_cur; 492 u16 gpc2clk_min, gpc2clk_max, clk_cur;
471 u16 mclk_min, mclk_max; 493 u16 mclk_min, mclk_max;
472 u32 num_points; 494 u32 num_points;
473 495
496 struct clk_set_info *p5_info, *p0_info;
497
474 table = ACCESS_ONCE(arb->current_vf_table); 498 table = ACCESS_ONCE(arb->current_vf_table);
475 /* make flag visible when all data has resolved in the tables */ 499 /* make flag visible when all data has resolved in the tables */
476 smp_rmb(); 500 smp_rmb();
@@ -504,17 +528,28 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
504 memset(table->gpc2clk_points, 0, 528 memset(table->gpc2clk_points, 0,
505 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point)); 529 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
506 530
531 p5_info = pstate_get_clk_set_info(g,
532 CTRL_PERF_PSTATE_P5, clkwhich_mclk);
533 if (!p5_info)
534 goto exit_vf_table;
535
536 p0_info = pstate_get_clk_set_info(g,
537 CTRL_PERF_PSTATE_P0, clkwhich_mclk);
538 if (!p0_info)
539 goto exit_vf_table;
540
507 for (i = 0, j = 0, num_points = 0, clk_cur = 0; 541 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
508 i < table->mclk_num_points; i++) { 542 i < table->mclk_num_points; i++) {
543
509 if ((arb->mclk_f_points[i] >= mclk_min) && 544 if ((arb->mclk_f_points[i] >= mclk_min) &&
510 (arb->mclk_f_points[i] <= mclk_max) && 545 (arb->mclk_f_points[i] <= mclk_max) &&
511 (arb->mclk_f_points[i] != clk_cur)) { 546 (arb->mclk_f_points[i] != clk_cur)) {
512 547
513 table->mclk_points[j].mhz = arb->mclk_f_points[i]; 548 table->mclk_points[j].mem_mhz = arb->mclk_f_points[i];
514 mclk_voltuv = mclk_voltuv_sram = 0; 549 mclk_voltuv = mclk_voltuv_sram = 0;
515 550
516 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, 551 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
517 &table->mclk_points[j].mhz, &mclk_voltuv, 552 &table->mclk_points[j].mem_mhz, &mclk_voltuv,
518 CTRL_VOLT_DOMAIN_LOGIC); 553 CTRL_VOLT_DOMAIN_LOGIC);
519 if (status < 0) { 554 if (status < 0) {
520 gk20a_err(dev_from_gk20a(g), 555 gk20a_err(dev_from_gk20a(g),
@@ -522,7 +557,8 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
522 goto exit_vf_table; 557 goto exit_vf_table;
523 } 558 }
524 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, 559 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
525 &table->mclk_points[j].mhz, &mclk_voltuv_sram, 560 &table->mclk_points[j].mem_mhz,
561 &mclk_voltuv_sram,
526 CTRL_VOLT_DOMAIN_SRAM); 562 CTRL_VOLT_DOMAIN_SRAM);
527 if (status < 0) { 563 if (status < 0) {
528 gk20a_err(dev_from_gk20a(g), 564 gk20a_err(dev_from_gk20a(g),
@@ -532,7 +568,19 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
532 568
533 table->mclk_points[j].uvolt = mclk_voltuv; 569 table->mclk_points[j].uvolt = mclk_voltuv;
534 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram; 570 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
535 clk_cur = table->mclk_points[j].mhz; 571 clk_cur = table->mclk_points[j].mem_mhz;
572
573 if ((clk_cur >= p5_info->min_mhz) &&
574 (clk_cur <= p5_info->max_mhz))
575 VF_POINT_SET_PSTATE_SUPPORTED(
576 &table->mclk_points[j],
577 CTRL_PERF_PSTATE_P5);
578 if ((clk_cur >= p0_info->min_mhz) &&
579 (clk_cur <= p0_info->max_mhz))
580 VF_POINT_SET_PSTATE_SUPPORTED(
581 &table->mclk_points[j],
582 CTRL_PERF_PSTATE_P0);
583
536 j++; 584 j++;
537 num_points++; 585 num_points++;
538 586
@@ -540,45 +588,187 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
540 } 588 }
541 table->mclk_num_points = num_points; 589 table->mclk_num_points = num_points;
542 590
591 p5_info = pstate_get_clk_set_info(g,
592 CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
593 if (!p5_info) {
594 status = -EINVAL;
595 goto exit_vf_table;
596 }
597
598 p0_info = pstate_get_clk_set_info(g,
599 CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
600 if (!p0_info) {
601 status = -EINVAL;
602 goto exit_vf_table;
603 }
604
605 /* GPC2CLK needs to be checked in two passes. The first determines the
606 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
607 * second verifies that the clocks minimum DVCO is satisfied and sets
608 * the voltages
609 */
543 for (i = 0, j = 0, num_points = 0, clk_cur = 0; 610 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
544 i < table->gpc2clk_num_points; i++) { 611 i < table->gpc2clk_num_points; i++) {
612 struct set_fll_clk setfllclk;
613
545 if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) && 614 if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) &&
546 (arb->gpc2clk_f_points[i] <= gpc2clk_max) && 615 (arb->gpc2clk_f_points[i] <= gpc2clk_max) &&
547 (arb->gpc2clk_f_points[i] != clk_cur)) { 616 (arb->gpc2clk_f_points[i] != clk_cur)) {
548 617
549 table->gpc2clk_points[j].mhz = arb->gpc2clk_f_points[i]; 618 table->gpc2clk_points[j].gpc_mhz =
550 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; 619 arb->gpc2clk_f_points[i];
551 620
552 status = clk_domain_get_f_or_v(g, 621 setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
553 CTRL_CLK_DOMAIN_GPC2CLK, 622 status = clk_get_fll_clks(g, &setfllclk);
554 &table->gpc2clk_points[j].mhz, &gpc2clk_voltuv,
555 CTRL_VOLT_DOMAIN_LOGIC);
556 if (status < 0) { 623 if (status < 0) {
557 gk20a_err(dev_from_gk20a(g), 624 gk20a_err(dev_from_gk20a(g),
558 "failed to get GPC2CLK LOGIC voltage"); 625 "failed to get GPC2CLK slave clocks");
559 goto exit_vf_table; 626 goto exit_vf_table;
560 } 627 }
561 628
562 status = clk_domain_get_f_or_v(g, 629
563 CTRL_CLK_DOMAIN_GPC2CLK, 630 table->gpc2clk_points[j].sys_mhz =
564 &table->gpc2clk_points[j].mhz, 631 setfllclk.sys2clkmhz;
565 &gpc2clk_voltuv_sram, 632 table->gpc2clk_points[j].xbar_mhz =
566 CTRL_VOLT_DOMAIN_SRAM); 633 setfllclk.xbar2clkmhz;
634
635 clk_cur = table->gpc2clk_points[j].gpc_mhz;
636
637 if ((clk_cur >= p5_info->min_mhz) &&
638 (clk_cur <= p5_info->max_mhz))
639 VF_POINT_SET_PSTATE_SUPPORTED(
640 &table->gpc2clk_points[j],
641 CTRL_PERF_PSTATE_P5);
642 if ((clk_cur >= p0_info->min_mhz) &&
643 (clk_cur <= p0_info->max_mhz))
644 VF_POINT_SET_PSTATE_SUPPORTED(
645 &table->gpc2clk_points[j],
646 CTRL_PERF_PSTATE_P0);
647
648 j++;
649 num_points++;
650 }
651 }
652 table->gpc2clk_num_points = num_points;
653
654 /* Second pass */
655 for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {
656 struct set_fll_clk setfllclk;
657
658 u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
659 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
660
661 /* Check sysclk */
662 p5_info = pstate_get_clk_set_info(g,
663 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
664 clkwhich_sys2clk);
665 if (!p5_info) {
666 status = -EINVAL;
667 goto exit_vf_table;
668 }
669 /* sys2clk below DVCO min, need to find correct clock */
670 if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
671 for (j = i + 1; j < table->gpc2clk_num_points; j++) {
672
673 if (table->gpc2clk_points[j].sys_mhz >=
674 p5_info->min_mhz) {
675
676 table->gpc2clk_points[i].sys_mhz =
677 table->gpc2clk_points[j].
678 sys_mhz;
679
680 alt_gpc2clk = alt_gpc2clk <
681 table->gpc2clk_points[j].
682 gpc_mhz ?
683 table->gpc2clk_points[j].
684 gpc_mhz:
685 alt_gpc2clk;
686 break;
687 }
688 }
689 /* no VF exists that satisfies condition */
690 if (j == table->gpc2clk_num_points) {
691 status = -EINVAL;
692 goto exit_vf_table;
693 }
694 }
695
696 /* Check xbarclk */
697 p5_info = pstate_get_clk_set_info(g,
698 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
699 clkwhich_xbar2clk);
700 if (!p5_info) {
701 status = -EINVAL;
702 goto exit_vf_table;
703 }
704
705 /* xbar2clk below DVCO min, need to find correct clock */
706 if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
707 for (j = i; j < table->gpc2clk_num_points; j++) {
708 if (table->gpc2clk_points[j].xbar_mhz >=
709 p5_info->min_mhz) {
710
711 table->gpc2clk_points[i].xbar_mhz =
712 table->gpc2clk_points[j].
713 xbar_mhz;
714 alt_gpc2clk = alt_gpc2clk <
715 table->gpc2clk_points[j].
716 gpc_mhz ?
717 table->gpc2clk_points[j].
718 gpc_mhz:
719 alt_gpc2clk;
720 break;
721 }
722 }
723 /* no VF exists that satisfies condition */
724 if (j == table->gpc2clk_num_points) {
725 status = -EINVAL;
726
727 goto exit_vf_table;
728 }
729 }
730
731 /* alternate gpc2clk clock has been requested, we need to
732 * calculate new ratios */
733 if (alt_gpc2clk != table->gpc2clk_points[i].gpc_mhz) {
734 setfllclk.gpc2clkmhz = alt_gpc2clk;
735
736 status = clk_get_fll_clks(g, &setfllclk);
567 if (status < 0) { 737 if (status < 0) {
568 gk20a_err(dev_from_gk20a(g), 738 gk20a_err(dev_from_gk20a(g),
569 "failed to get GPC2CLK SRAM voltage"); 739 "failed to get GPC2CLK slave clocks");
570 goto exit_vf_table; 740 goto exit_vf_table;
571 } 741 }
572 742
573 table->gpc2clk_points[j].uvolt = gpc2clk_voltuv; 743 table->gpc2clk_points[i].sys_mhz =
574 table->gpc2clk_points[j].uvolt_sram = 744 setfllclk.sys2clkmhz;
575 gpc2clk_voltuv_sram; 745 table->gpc2clk_points[i].xbar_mhz =
576 clk_cur = table->gpc2clk_points[j].mhz; 746 setfllclk.xbar2clkmhz;
577 j++; 747 }
578 num_points++; 748
749 /* Calculate voltages */
750 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
751 &alt_gpc2clk, &gpc2clk_voltuv,
752 CTRL_VOLT_DOMAIN_LOGIC);
753 if (status < 0) {
754 gk20a_err(dev_from_gk20a(g),
755 "failed to get GPC2CLK LOGIC voltage");
756 goto exit_vf_table;
757 }
758
759 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
760 &alt_gpc2clk,
761 &gpc2clk_voltuv_sram,
762 CTRL_VOLT_DOMAIN_SRAM);
763 if (status < 0) {
764 gk20a_err(dev_from_gk20a(g),
765 "failed to get GPC2CLK SRAM voltage");
766 goto exit_vf_table;
579 } 767 }
768
769 table->gpc2clk_points[i].uvolt = gpc2clk_voltuv;
770 table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram;
580 } 771 }
581 table->gpc2clk_num_points = num_points;
582 772
583 /* make table visible when all data has resolved in the tables */ 773 /* make table visible when all data has resolved in the tables */
584 smp_wmb(); 774 smp_wmb();
@@ -625,13 +815,14 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
625 struct gk20a *g = arb->g; 815 struct gk20a *g = arb->g;
626 struct llist_node *head; 816 struct llist_node *head;
627 817
818 u32 pstate = VF_POINT_INVALID_PSTATE;
628 u32 voltuv, voltuv_sram; 819 u32 voltuv, voltuv_sram;
629 bool mclk_set, gpc2clk_set; 820 bool mclk_set, gpc2clk_set;
630 821
631 int status = 0; 822 int status = 0;
632 823
633 /* Temporary variables for checking target frequency */ 824 /* Temporary variables for checking target frequency */
634 u16 gpc2clk_target, mclk_target; 825 u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target;
635 826
636#ifdef CONFIG_DEBUG_FS 827#ifdef CONFIG_DEBUG_FS
637 u64 t0, t1; 828 u64 t0, t1;
@@ -699,29 +890,25 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
699 rcu_read_unlock(); 890 rcu_read_unlock();
700 891
701 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : 892 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
702 arb->actual->gpc2clk ? gpc2clk_target : 893 arb->gpc2clk_default_mhz;
703 arb->gpc2clk_default_mhz;
704
705 mclk_target = (mclk_target > 0) ? mclk_target :
706 arb->actual->mclk ? mclk_target :
707 arb->mclk_default_mhz;
708
709 if (!gpc2clk_target && !mclk_target) {
710 mclk_target = arb->mclk_default_mhz;
711 gpc2clk_target = arb->gpc2clk_default_mhz;
712 }
713
714 if (!gpc2clk_target)
715 gpc2clk_target = arb->actual->mclk;
716
717 if (!mclk_target)
718 mclk_target = arb->actual->mclk;
719 894
895 mclk_target = (mclk_target > 0) ? mclk_target:
896 arb->mclk_default_mhz;
720 897
898 sys2clk_target = 0;
899 xbar2clk_target = 0;
721 /* Query the table for the closest vf point to program */ 900 /* Query the table for the closest vf point to program */
722 nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, &mclk_target, &voltuv, 901 pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
902 &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
723 &voltuv_sram); 903 &voltuv_sram);
724 904
905 if (pstate == VF_POINT_INVALID_PSTATE) {
906 arb->status = -EINVAL;
907 /* make status visible */
908 smp_mb();
909 goto exit_arb;
910 }
911
725 if ((arb->actual->gpc2clk == gpc2clk_target) && 912 if ((arb->actual->gpc2clk == gpc2clk_target) &&
726 (arb->actual->mclk == mclk_target) && 913 (arb->actual->mclk == mclk_target) &&
727 (arb->voltuv_actual == voltuv)) { 914 (arb->voltuv_actual == voltuv)) {
@@ -731,12 +918,17 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
731 /* Program clocks */ 918 /* Program clocks */
732 /* A change in both mclk of gpc2clk may require a change in voltage */ 919 /* A change in both mclk of gpc2clk may require a change in voltage */
733 920
734 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, mclk_target, 921 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
735 voltuv, voltuv_sram); 922 sys2clk_target, xbar2clk_target, mclk_target, voltuv,
923 voltuv_sram);
736 924
737 if (status < 0) 925 if (status < 0) {
738 goto exit_arb; 926 arb->status = status;
927 /* make status visible */
928 smp_mb();
739 929
930 goto exit_arb;
931 }
740 actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ? 932 actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
741 &arb->actual_pool[1] : &arb->actual_pool[0]; 933 &arb->actual_pool[1] : &arb->actual_pool[0];
742 934
@@ -745,6 +937,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
745 actual->gpc2clk = gpc2clk_target; 937 actual->gpc2clk = gpc2clk_target;
746 actual->mclk = mclk_target; 938 actual->mclk = mclk_target;
747 arb->voltuv_actual = voltuv; 939 arb->voltuv_actual = voltuv;
940 actual->pstate = pstate;
748 arb->status = status; 941 arb->status = status;
749 942
750 /* Make changes visible to other threads */ 943 /* Make changes visible to other threads */
@@ -1015,15 +1208,17 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
1015 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); 1208 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
1016} 1209}
1017 1210
1018static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, 1211static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1019 u16 *gpc2clk, u16 *mclk, u32 *voltuv, 1212 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
1020 u32 *voltuv_sram) 1213 u32 *voltuv, u32 *voltuv_sram)
1021{ 1214{
1022 u16 gpc2clk_target, mclk_target; 1215 u16 gpc2clk_target, mclk_target;
1023 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; 1216 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
1024 u32 mclk_voltuv, mclk_voltuv_sram; 1217 u32 mclk_voltuv, mclk_voltuv_sram;
1218 u32 pstate = VF_POINT_INVALID_PSTATE;
1025 struct nvgpu_clk_vf_table *table; 1219 struct nvgpu_clk_vf_table *table;
1026 u32 index; 1220 u32 index, index_mclk;
1221 struct nvgpu_clk_vf_point *mclk_vf = NULL;
1027 1222
1028 do { 1223 do {
1029 gpc2clk_target = *gpc2clk; 1224 gpc2clk_target = *gpc2clk;
@@ -1042,12 +1237,39 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1042 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) 1237 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points))
1043 goto find_exit; 1238 goto find_exit;
1044 1239
1240 /* First we check MCLK to find out which PSTATE we are
1241 * are requesting, and from there try to find the minimum
1242 * GPC2CLK on the same PSTATE that satisfies the request.
1243 * If no GPC2CLK can be found, then we need to up the PSTATE
1244 */
1245
1246recalculate_vf_point:
1247 for (index = 0; index < table->mclk_num_points; index++) {
1248 if (table->mclk_points[index].mem_mhz >= mclk_target) {
1249 mclk_vf = &table->mclk_points[index];
1250 break;
1251 }
1252 }
1253 if (index == table->mclk_num_points) {
1254 mclk_vf = &table->mclk_points[index-1];
1255 }
1256 index_mclk = index;
1257
1045 /* round up the freq requests */ 1258 /* round up the freq requests */
1046 for (index = 0; index < table->gpc2clk_num_points; index++) { 1259 for (index = 0; index < table->gpc2clk_num_points; index++) {
1047 if (table->gpc2clk_points[index].mhz >= 1260 pstate = VF_POINT_COMMON_PSTATE(
1048 gpc2clk_target) { 1261 &table->gpc2clk_points[index], mclk_vf);
1262
1263 if ((table->gpc2clk_points[index].gpc_mhz >=
1264 gpc2clk_target) &&
1265 (pstate != VF_POINT_INVALID_PSTATE)){
1049 gpc2clk_target = 1266 gpc2clk_target =
1050 table->gpc2clk_points[index].mhz; 1267 table->gpc2clk_points[index].gpc_mhz;
1268 *sys2clk =
1269 table->gpc2clk_points[index].sys_mhz;
1270 *xbar2clk =
1271 table->gpc2clk_points[index].xbar_mhz;
1272
1051 gpc2clk_voltuv = 1273 gpc2clk_voltuv =
1052 table->gpc2clk_points[index].uvolt; 1274 table->gpc2clk_points[index].uvolt;
1053 gpc2clk_voltuv_sram = 1275 gpc2clk_voltuv_sram =
@@ -1057,27 +1279,42 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1057 } 1279 }
1058 1280
1059 if (index == table->gpc2clk_num_points) { 1281 if (index == table->gpc2clk_num_points) {
1060 gpc2clk_target = table->gpc2clk_points[index-1].mhz; 1282 pstate = VF_POINT_COMMON_PSTATE(
1061 gpc2clk_voltuv = table->gpc2clk_points[index-1].uvolt; 1283 &table->gpc2clk_points[index-1], mclk_vf);
1062 gpc2clk_voltuv_sram = 1284 if (pstate != VF_POINT_INVALID_PSTATE) {
1063 table->gpc2clk_points[index-1].uvolt_sram; 1285 gpc2clk_target =
1064 } 1286 table->gpc2clk_points[index-1].gpc_mhz;
1287 *sys2clk =
1288 table->gpc2clk_points[index-1].sys_mhz;
1289 *xbar2clk =
1290 table->gpc2clk_points[index-1].xbar_mhz;
1065 1291
1066 for (index = 0; index < table->mclk_num_points; index++) { 1292 gpc2clk_voltuv =
1067 if (table->mclk_points[index].mhz >= mclk_target) { 1293 table->gpc2clk_points[index-1].uvolt;
1068 mclk_target = table->mclk_points[index].mhz; 1294 gpc2clk_voltuv_sram =
1069 mclk_voltuv = table->mclk_points[index].uvolt; 1295 table->gpc2clk_points[index-1].
1070 mclk_voltuv_sram = 1296 uvolt_sram;
1071 table->mclk_points[index].uvolt_sram; 1297 } else if (index_mclk == table->mclk_num_points - 1) {
1072 break; 1298 /* There is no available combination of MCLK
1299 * and GPC2CLK, we need to fail this
1300 */
1301 gpc2clk_target = 0;
1302 mclk_target = 0;
1303 pstate = VF_POINT_INVALID_PSTATE;
1304 goto find_exit;
1305 } else {
1306 /* recalculate with higher PSTATE */
1307 gpc2clk_target = *gpc2clk;
1308 mclk_target = table->mclk_points[index_mclk+1].
1309 mem_mhz;
1310 goto recalculate_vf_point;
1073 } 1311 }
1074 } 1312 }
1075 if (index == table->mclk_num_points) { 1313
1076 mclk_target = table->mclk_points[index-1].mhz; 1314 mclk_target = mclk_vf->mem_mhz;
1077 mclk_voltuv = table->mclk_points[index-1].uvolt; 1315 mclk_voltuv = mclk_vf->uvolt;
1078 mclk_voltuv_sram = 1316 mclk_voltuv_sram = mclk_vf->uvolt_sram;
1079 table->mclk_points[index-1].uvolt_sram; 1317
1080 }
1081 } while (!table || 1318 } while (!table ||
1082 (ACCESS_ONCE(arb->current_vf_table) != table)); 1319 (ACCESS_ONCE(arb->current_vf_table) != table));
1083 1320
@@ -1088,15 +1325,23 @@ find_exit:
1088 1325
1089 *gpc2clk = gpc2clk_target; 1326 *gpc2clk = gpc2clk_target;
1090 *mclk = mclk_target; 1327 *mclk = mclk_target;
1328 return pstate;
1091} 1329}
1092 1330
1093static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, 1331static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1094 u16 mclk_target, u32 voltuv, u32 voltuv_sram) 1332 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
1333 u32 voltuv_sram)
1095{ 1334{
1096 struct change_fll_clk fllclk; 1335 struct set_fll_clk fllclk;
1097 struct nvgpu_clk_arb *arb = g->clk_arb; 1336 struct nvgpu_clk_arb *arb = g->clk_arb;
1098 int status; 1337 int status;
1099 1338
1339 fllclk.gpc2clkmhz = gpc2clk_target;
1340 fllclk.sys2clkmhz = sys2clk_target;
1341 fllclk.xbar2clkmhz = xbar2clk_target;
1342
1343 fllclk.voltuv = voltuv;
1344
1100 /* if voltage ascends we do: 1345 /* if voltage ascends we do:
1101 * (1) FLL change 1346 * (1) FLL change
1102 * (2) Voltage change 1347 * (2) Voltage change
@@ -1117,17 +1362,11 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1117 if (status < 0) 1362 if (status < 0)
1118 return status; 1363 return status;
1119 1364
1120 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 1365 status = clk_set_fll_clks(g, &fllclk);
1121 fllclk.clkmhz = gpc2clk_target;
1122 fllclk.voltuv = voltuv;
1123 status = clk_program_fll_clks(g, &fllclk);
1124 if (status < 0) 1366 if (status < 0)
1125 return status; 1367 return status;
1126 } else if (voltuv > arb->voltuv_actual) { 1368 } else if (voltuv > arb->voltuv_actual) {
1127 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 1369 status = clk_set_fll_clks(g, &fllclk);
1128 fllclk.clkmhz = gpc2clk_target;
1129 fllclk.voltuv = voltuv;
1130 status = clk_program_fll_clks(g, &fllclk);
1131 if (status < 0) 1370 if (status < 0)
1132 return status; 1371 return status;
1133 1372
@@ -1143,10 +1382,7 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1143 if (status < 0) 1382 if (status < 0)
1144 return status; 1383 return status;
1145 1384
1146 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 1385 status = clk_set_fll_clks(g, &fllclk);
1147 fllclk.clkmhz = gpc2clk_target;
1148 fllclk.voltuv = voltuv;
1149 status = clk_program_fll_clks(g, &fllclk);
1150 if (status < 0) 1386 if (status < 0)
1151 return status; 1387 return status;
1152 } 1388 }