gpu: nvgpu: prevent hang on failing arbiter init

The current code can lead to application hang when opening device on first time if the arbiter fails to set the clocks Also it solves a buffer overrun condition in case the requested frequency is above the maximum range of the VF curve bug 1835042 Change-Id: I385401ea27d5cc4bfa41b7ca2eb3a1db53138418 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1245911 Reviewed-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1268061 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit
author: David Nieto <dmartineznie@nvidia.com> 2016-11-01 14:29:39 -0400
committer: Deepak Nibade <dnibade@nvidia.com> 2016-12-27 04:56:52 -0500
commit: 1f0a38797fbd86b5f5e7f6b43d1c81b2028a82b0 (patch)
tree: 605fa602651b8d42218edae322c0d52529d23450 /drivers/gpu/nvgpu/clk/clk_arb.c
parent: 136a9919e648fd15cc0bb4a2e3de381b29746d3a (diff)
1 files changed, 29 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 1b974d17..f3d6cfab 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -83,6 +83,7 @@ struct nvgpu_clk_arb {
        struct llist_head requests;
        struct gk20a *g;
+        int status;
        struct nvgpu_clk_arb_target actual_pool[2];
        struct nvgpu_clk_arb_target *actual;
@@ -269,7 +270,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
                        atomic_read(&arb->req_nr));
        } while (!atomic_read(&arb->req_nr));
-        return 0;
+        return arb->status;
 init_fail:
@@ -533,6 +535,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
                        clk_cur = table->mclk_points[j].mhz;
                        j++;
                        num_points++;
                }
        }
        table->mclk_num_points = num_points;
@@ -605,7 +608,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
        err = clk_vf_point_cache(g);
        if (err) {
                gk20a_err(dev_from_gk20a(g),
-                        "failed to get GPC2CLK SRAM voltage");
+                        "failed to cache VF table");
                return;
        }
        nvgpu_clk_arb_update_vf_table(arb);
@@ -625,7 +628,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        u32 voltuv, voltuv_sram;
        bool mclk_set, gpc2clk_set;
-        int status;
+        int status = 0;
        /* Temporary variables for checking target frequency */
        u16 gpc2clk_target, mclk_target;
@@ -742,15 +745,21 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        actual->gpc2clk = gpc2clk_target;
        actual->mclk = mclk_target;
        arb->voltuv_actual = voltuv;
+        arb->status = status;
        /* Make changes visible to other threads */
        smp_wmb();
        xchg(&arb->actual, actual);
+        /* status must be visible before atomic inc */
+        smp_wmb();
        atomic_inc(&arb->req_nr);
        wake_up_interruptible(&arb->request_wq);
+        if (status < 0)
+                gk20a_err(dev_from_gk20a(g),
+                        "Error in arbiter update");
 #ifdef CONFIG_DEBUG_FS
        g->ops.read_ptimer(g, &t1);
@@ -1016,20 +1025,23 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
        struct nvgpu_clk_vf_table *table;
        u32 index;
-        gpc2clk_target = *gpc2clk;
-        mclk_target = *mclk;
-        gpc2clk_voltuv = 0;
-        gpc2clk_voltuv_sram = 0;
-        mclk_voltuv = 0;
-        mclk_voltuv_sram = 0;
        do {
+                gpc2clk_target = *gpc2clk;
+                mclk_target = *mclk;
+                gpc2clk_voltuv = 0;
+                gpc2clk_voltuv_sram = 0;
+                mclk_voltuv = 0;
+                mclk_voltuv_sram = 0;
                table = ACCESS_ONCE(arb->current_vf_table);
                /* pointer to table can be updated by callback */
                smp_rmb();
                if (!table)
                        continue;
+                if ((!table->gpc2clk_num_points) || (!table->mclk_num_points))
+                        goto find_exit;
                /* round up the freq requests */
                for (index = 0; index < table->gpc2clk_num_points; index++) {
                        if (table->gpc2clk_points[index].mhz >=
@@ -1045,10 +1057,10 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
                }
                if (index == table->gpc2clk_num_points) {
-                        gpc2clk_target = table->gpc2clk_points[index].mhz;
+                        gpc2clk_target = table->gpc2clk_points[index-1].mhz;
-                        gpc2clk_voltuv = table->gpc2clk_points[index].uvolt;
+                        gpc2clk_voltuv = table->gpc2clk_points[index-1].uvolt;
                        gpc2clk_voltuv_sram =
-                                table->gpc2clk_points[index].uvolt_sram;
+                                table->gpc2clk_points[index-1].uvolt_sram;
                }
                for (index = 0; index < table->mclk_num_points; index++) {
@@ -1061,14 +1073,15 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
                        }
                }
                if (index == table->mclk_num_points) {
-                        mclk_target = table->mclk_points[index].mhz;
+                        mclk_target = table->mclk_points[index-1].mhz;
-                        mclk_voltuv = table->mclk_points[index].uvolt;
+                        mclk_voltuv = table->mclk_points[index-1].uvolt;
                        mclk_voltuv_sram =
-                                table->mclk_points[index].uvolt_sram;
+                                table->mclk_points[index-1].uvolt_sram;
                }
        } while (!table ||
                (ACCESS_ONCE(arb->current_vf_table) != table));
+find_exit:
        *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
        *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
                gpc2clk_voltuv_sram : mclk_voltuv_sram;
@@ -1136,7 +1149,6 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
                status = clk_program_fll_clks(g, &fllclk);
                if (status < 0)
                        return status;
        }
        return 0;
author	David Nieto <dmartineznie@nvidia.com>	2016-11-01 14:29:39 -0400
committer	Deepak Nibade <dnibade@nvidia.com>	2016-12-27 04:56:52 -0500
commit	1f0a38797fbd86b5f5e7f6b43d1c81b2028a82b0 (patch)
tree	605fa602651b8d42218edae322c0d52529d23450 /drivers/gpu/nvgpu/clk/clk_arb.c
parent	136a9919e648fd15cc0bb4a2e3de381b29746d3a (diff)

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 1b974d17..f3d6cfab 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -83,6 +83,7 @@ struct nvgpu_clk_arb {
83	struct llist_head requests;	83	struct llist_head requests;
84		84
85	struct gk20a *g;	85	struct gk20a *g;
		86	int status;
86		87
87	struct nvgpu_clk_arb_target actual_pool[2];	88	struct nvgpu_clk_arb_target actual_pool[2];
88	struct nvgpu_clk_arb_target *actual;	89	struct nvgpu_clk_arb_target *actual;
@@ -269,7 +270,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
269	atomic_read(&arb->req_nr));	270	atomic_read(&arb->req_nr));
270	} while (!atomic_read(&arb->req_nr));	271	} while (!atomic_read(&arb->req_nr));
271		272
272	return 0;	273
		274	return arb->status;
273		275
274	init_fail:	276	init_fail:
275		277
@@ -533,6 +535,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
533	clk_cur = table->mclk_points[j].mhz;	535	clk_cur = table->mclk_points[j].mhz;
534	j++;	536	j++;
535	num_points++;	537	num_points++;
		538
536	}	539	}
537	}	540	}
538	table->mclk_num_points = num_points;	541	table->mclk_num_points = num_points;
@@ -605,7 +608,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
605	err = clk_vf_point_cache(g);	608	err = clk_vf_point_cache(g);
606	if (err) {	609	if (err) {
607	gk20a_err(dev_from_gk20a(g),	610	gk20a_err(dev_from_gk20a(g),
608	"failed to get GPC2CLK SRAM voltage");	611	"failed to cache VF table");
609	return;	612	return;
610	}	613	}
611	nvgpu_clk_arb_update_vf_table(arb);	614	nvgpu_clk_arb_update_vf_table(arb);
@@ -625,7 +628,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
625	u32 voltuv, voltuv_sram;	628	u32 voltuv, voltuv_sram;
626	bool mclk_set, gpc2clk_set;	629	bool mclk_set, gpc2clk_set;
627		630
628	int status;	631	int status = 0;
629		632
630	/* Temporary variables for checking target frequency */	633	/* Temporary variables for checking target frequency */
631	u16 gpc2clk_target, mclk_target;	634	u16 gpc2clk_target, mclk_target;
@@ -742,15 +745,21 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
742	actual->gpc2clk = gpc2clk_target;	745	actual->gpc2clk = gpc2clk_target;
743	actual->mclk = mclk_target;	746	actual->mclk = mclk_target;
744	arb->voltuv_actual = voltuv;	747	arb->voltuv_actual = voltuv;
		748	arb->status = status;
745		749
746	/* Make changes visible to other threads */	750	/* Make changes visible to other threads */
747	smp_wmb();	751	smp_wmb();
748	xchg(&arb->actual, actual);	752	xchg(&arb->actual, actual);
749		753
		754	/* status must be visible before atomic inc */
		755	smp_wmb();
750	atomic_inc(&arb->req_nr);	756	atomic_inc(&arb->req_nr);
751		757
752	wake_up_interruptible(&arb->request_wq);	758	wake_up_interruptible(&arb->request_wq);
753		759
		760	if (status < 0)
		761	gk20a_err(dev_from_gk20a(g),
		762	"Error in arbiter update");
754		763
755	#ifdef CONFIG_DEBUG_FS	764	#ifdef CONFIG_DEBUG_FS
756	g->ops.read_ptimer(g, &t1);	765	g->ops.read_ptimer(g, &t1);
@@ -1016,20 +1025,23 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1016	struct nvgpu_clk_vf_table *table;	1025	struct nvgpu_clk_vf_table *table;
1017	u32 index;	1026	u32 index;
1018		1027
1019	gpc2clk_target = *gpc2clk;
1020	mclk_target = *mclk;
1021	gpc2clk_voltuv = 0;
1022	gpc2clk_voltuv_sram = 0;
1023	mclk_voltuv = 0;
1024	mclk_voltuv_sram = 0;
1025
1026	do {	1028	do {
		1029	gpc2clk_target = *gpc2clk;
		1030	mclk_target = *mclk;
		1031	gpc2clk_voltuv = 0;
		1032	gpc2clk_voltuv_sram = 0;
		1033	mclk_voltuv = 0;
		1034	mclk_voltuv_sram = 0;
		1035
1027	table = ACCESS_ONCE(arb->current_vf_table);	1036	table = ACCESS_ONCE(arb->current_vf_table);
1028	/* pointer to table can be updated by callback */	1037	/* pointer to table can be updated by callback */
1029	smp_rmb();	1038	smp_rmb();
1030		1039
1031	if (!table)	1040	if (!table)
1032	continue;	1041	continue;
		1042	if ((!table->gpc2clk_num_points) \|\| (!table->mclk_num_points))
		1043	goto find_exit;
		1044
1033	/* round up the freq requests */	1045	/* round up the freq requests */
1034	for (index = 0; index < table->gpc2clk_num_points; index++) {	1046	for (index = 0; index < table->gpc2clk_num_points; index++) {
1035	if (table->gpc2clk_points[index].mhz >=	1047	if (table->gpc2clk_points[index].mhz >=
@@ -1045,10 +1057,10 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1045	}	1057	}
1046		1058
1047	if (index == table->gpc2clk_num_points) {	1059	if (index == table->gpc2clk_num_points) {
1048	gpc2clk_target = table->gpc2clk_points[index].mhz;	1060	gpc2clk_target = table->gpc2clk_points[index-1].mhz;
1049	gpc2clk_voltuv = table->gpc2clk_points[index].uvolt;	1061	gpc2clk_voltuv = table->gpc2clk_points[index-1].uvolt;
1050	gpc2clk_voltuv_sram =	1062	gpc2clk_voltuv_sram =
1051	table->gpc2clk_points[index].uvolt_sram;	1063	table->gpc2clk_points[index-1].uvolt_sram;
1052	}	1064	}
1053		1065
1054	for (index = 0; index < table->mclk_num_points; index++) {	1066	for (index = 0; index < table->mclk_num_points; index++) {
@@ -1061,14 +1073,15 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1061	}	1073	}
1062	}	1074	}
1063	if (index == table->mclk_num_points) {	1075	if (index == table->mclk_num_points) {
1064	mclk_target = table->mclk_points[index].mhz;	1076	mclk_target = table->mclk_points[index-1].mhz;
1065	mclk_voltuv = table->mclk_points[index].uvolt;	1077	mclk_voltuv = table->mclk_points[index-1].uvolt;
1066	mclk_voltuv_sram =	1078	mclk_voltuv_sram =
1067	table->mclk_points[index].uvolt_sram;	1079	table->mclk_points[index-1].uvolt_sram;
1068	}	1080	}
1069	} while (!table \|\|	1081	} while (!table \|\|
1070	(ACCESS_ONCE(arb->current_vf_table) != table));	1082	(ACCESS_ONCE(arb->current_vf_table) != table));
1071		1083
		1084	find_exit:
1072	*voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;	1085	*voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
1073	*voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?	1086	*voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
1074	gpc2clk_voltuv_sram : mclk_voltuv_sram;	1087	gpc2clk_voltuv_sram : mclk_voltuv_sram;
@@ -1136,7 +1149,6 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1136	status = clk_program_fll_clks(g, &fllclk);	1149	status = clk_program_fll_clks(g, &fllclk);
1137	if (status < 0)	1150	if (status < 0)
1138	return status;	1151	return status;
1139
1140	}	1152	}
1141		1153
1142	return 0;	1154	return 0;