gpu: nvgpu: vgpu: add pm qos support

Send cmd to RM server to change clk rate when PM_QOS_GPU_FREQ_BOUNDS max changes. Bug 200206160 Change-Id: I7f19e5f711426517baf8e7f934bf41972012644b Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: http://git-master/r/1172792 (cherry picked from commit 973c258fd85449c3862df2498362e358fd3682c9) Reviewed-on: http://git-master/r/1180892 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
author: Richard Zhao <rizhao@nvidia.com> 2016-06-28 20:08:54 -0400
committer: Vijayakumar Subbu <vsubbu@nvidia.com> 2016-07-14 00:46:07 -0400
commit: dc137541b032906e6db45e2f9853fbcff5e267a5 (patch)
tree: efa1b5e4e80469d1a268998d19ac6a2fea6640a2
parent: a2831f098bb22e347009cd73e17946db14ee06ce (diff)
2 files changed, 68 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index c28130f2..dc7c4320 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -17,6 +17,8 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_qos.h>
 #include "vgpu/vgpu.h"
 #include "vgpu/fecs_trace_vgpu.h"
 #include "gk20a/debug_gk20a.h"
@@ -24,6 +26,7 @@
 #include "gk20a/hw_mc_gk20a.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
 #include "gk20a/tsg_gk20a.h"
+#include "gk20a/gk20a_scale.h"
 #include "gk20a/channel_gk20a.h"
 #include "gm20b/hal_gm20b.h"
@@ -430,6 +433,61 @@ done:
        return err;
 }
+static int vgpu_qos_notify(struct notifier_block *nb,
+                          unsigned long n, void *data)
+{
+        struct gk20a_scale_profile *profile =
+                        container_of(nb, struct gk20a_scale_profile,
+                        qos_notify_block);
+        struct gk20a_platform *platform = gk20a_get_platform(profile->dev);
+        struct tegra_vgpu_cmd_msg msg = {};
+        struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+        u32 max_freq;
+        int err;
+        gk20a_dbg_fn("");
+        max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
+        msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
+        msg.handle = platform->virt_handle;
+        p->rate = max_freq;
+        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+        err = err ? err : msg.ret;
+        if (err)
+                gk20a_err(profile->dev, "%s failed, err=%d", __func__, err);
+        return NOTIFY_OK; /* need notify call further */
+}
+static int vgpu_pm_qos_init(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct gk20a_scale_profile *profile;
+        profile = kzalloc(sizeof(*profile), GFP_KERNEL);
+        if (!profile)
+                return -ENOMEM;
+        profile->dev = dev;
+        profile->qos_notify_block.notifier_call = vgpu_qos_notify;
+        g->scale_profile = profile;
+        pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                &profile->qos_notify_block);
+        return 0;
+}
+static void vgpu_pm_qos_remove(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                &g->scale_profile->qos_notify_block);
+        kfree(g->scale_profile);
+        g->scale_profile = NULL;
+}
 static int vgpu_pm_init(struct device *dev)
 {
        int err = 0;
@@ -437,6 +495,9 @@ static int vgpu_pm_init(struct device *dev)
        gk20a_dbg_fn("");
        __pm_runtime_disable(dev, false);
+        err = vgpu_pm_qos_init(dev);
+        if (err)
+                return err;
        return err;
 }
@@ -534,6 +595,7 @@ int vgpu_remove(struct platform_device *pdev)
        struct gk20a *g = get_gk20a(dev);
        gk20a_dbg_fn("");
+        vgpu_pm_qos_remove(dev);
        if (g->remove_support)
                g->remove_support(dev);
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h
index 6e8f5d53..c821f31a 100644
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -97,6 +97,7 @@ enum {
        TEGRA_VGPU_CMD_CHANNEL_ENABLE = 58,
        TEGRA_VGPU_CMD_READ_PTIMER = 59,
        TEGRA_VGPU_CMD_SET_POWERGATE = 60,
+        TEGRA_VGPU_CMD_SET_GPU_CLK_RATE = 61,
 };
 struct tegra_vgpu_connect_params {
@@ -399,6 +400,10 @@ struct tegra_vgpu_set_powergate_params {
        u32 mode;
 };
+struct tegra_vgpu_gpu_clk_rate_params {
+        u32 rate; /* in kHz */
+};
 struct tegra_vgpu_cmd_msg {
        u32 cmd;
        int ret;
@@ -441,6 +446,7 @@ struct tegra_vgpu_cmd_msg {
                struct tegra_vgpu_tsg_runlist_interleave_params tsg_interleave;
                struct tegra_vgpu_read_ptimer_params read_ptimer;
                struct tegra_vgpu_set_powergate_params set_powergate;
+                struct tegra_vgpu_gpu_clk_rate_params gpu_clk_rate;
                char padding[192];
        } params;
 };
author	Richard Zhao <rizhao@nvidia.com>	2016-06-28 20:08:54 -0400
committer	Vijayakumar Subbu <vsubbu@nvidia.com>	2016-07-14 00:46:07 -0400
commit	dc137541b032906e6db45e2f9853fbcff5e267a5 (patch)
tree	efa1b5e4e80469d1a268998d19ac6a2fea6640a2
parent	a2831f098bb22e347009cd73e17946db14ee06ce (diff)

diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index c28130f2..dc7c4320 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -17,6 +17,8 @@
17	#include <linux/delay.h>	17	#include <linux/delay.h>
18	#include <linux/dma-mapping.h>	18	#include <linux/dma-mapping.h>
19	#include <linux/pm_runtime.h>	19	#include <linux/pm_runtime.h>
		20	#include <linux/pm_qos.h>
		21
20	#include "vgpu/vgpu.h"	22	#include "vgpu/vgpu.h"
21	#include "vgpu/fecs_trace_vgpu.h"	23	#include "vgpu/fecs_trace_vgpu.h"
22	#include "gk20a/debug_gk20a.h"	24	#include "gk20a/debug_gk20a.h"
@@ -24,6 +26,7 @@
24	#include "gk20a/hw_mc_gk20a.h"	26	#include "gk20a/hw_mc_gk20a.h"
25	#include "gk20a/ctxsw_trace_gk20a.h"	27	#include "gk20a/ctxsw_trace_gk20a.h"
26	#include "gk20a/tsg_gk20a.h"	28	#include "gk20a/tsg_gk20a.h"
		29	#include "gk20a/gk20a_scale.h"
27	#include "gk20a/channel_gk20a.h"	30	#include "gk20a/channel_gk20a.h"
28	#include "gm20b/hal_gm20b.h"	31	#include "gm20b/hal_gm20b.h"
29		32
@@ -430,6 +433,61 @@ done:
430	return err;	433	return err;
431	}	434	}
432		435
		436	static int vgpu_qos_notify(struct notifier_block *nb,
		437	unsigned long n, void *data)
		438	{
		439	struct gk20a_scale_profile *profile =
		440	container_of(nb, struct gk20a_scale_profile,
		441	qos_notify_block);
		442	struct gk20a_platform *platform = gk20a_get_platform(profile->dev);
		443	struct tegra_vgpu_cmd_msg msg = {};
		444	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
		445	u32 max_freq;
		446	int err;
		447
		448	gk20a_dbg_fn("");
		449
		450	max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
		451
		452	msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
		453	msg.handle = platform->virt_handle;
		454	p->rate = max_freq;
		455	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
		456	err = err ? err : msg.ret;
		457	if (err)
		458	gk20a_err(profile->dev, "%s failed, err=%d", __func__, err);
		459
		460	return NOTIFY_OK; /* need notify call further */
		461	}
		462
		463	static int vgpu_pm_qos_init(struct device *dev)
		464	{
		465	struct gk20a *g = get_gk20a(dev);
		466	struct gk20a_scale_profile *profile;
		467
		468	profile = kzalloc(sizeof(*profile), GFP_KERNEL);
		469	if (!profile)
		470	return -ENOMEM;
		471
		472	profile->dev = dev;
		473	profile->qos_notify_block.notifier_call = vgpu_qos_notify;
		474	g->scale_profile = profile;
		475	pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
		476	&profile->qos_notify_block);
		477
		478	return 0;
		479	}
		480
		481	static void vgpu_pm_qos_remove(struct device *dev)
		482	{
		483	struct gk20a *g = get_gk20a(dev);
		484
		485	pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
		486	&g->scale_profile->qos_notify_block);
		487	kfree(g->scale_profile);
		488	g->scale_profile = NULL;
		489	}
		490
433	static int vgpu_pm_init(struct device *dev)	491	static int vgpu_pm_init(struct device *dev)
434	{	492	{
435	int err = 0;	493	int err = 0;
@@ -437,6 +495,9 @@ static int vgpu_pm_init(struct device *dev)
437	gk20a_dbg_fn("");	495	gk20a_dbg_fn("");
438		496
439	__pm_runtime_disable(dev, false);	497	__pm_runtime_disable(dev, false);
		498	err = vgpu_pm_qos_init(dev);
		499	if (err)
		500	return err;
440		501
441	return err;	502	return err;
442	}	503	}
@@ -534,6 +595,7 @@ int vgpu_remove(struct platform_device *pdev)
534	struct gk20a *g = get_gk20a(dev);	595	struct gk20a *g = get_gk20a(dev);
535	gk20a_dbg_fn("");	596	gk20a_dbg_fn("");
536		597
		598	vgpu_pm_qos_remove(dev);
537	if (g->remove_support)	599	if (g->remove_support)
538	g->remove_support(dev);	600	g->remove_support(dev);
539		601


diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index 6e8f5d53..c821f31a 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h
@@ -97,6 +97,7 @@ enum {
97	TEGRA_VGPU_CMD_CHANNEL_ENABLE = 58,	97	TEGRA_VGPU_CMD_CHANNEL_ENABLE = 58,
98	TEGRA_VGPU_CMD_READ_PTIMER = 59,	98	TEGRA_VGPU_CMD_READ_PTIMER = 59,
99	TEGRA_VGPU_CMD_SET_POWERGATE = 60,	99	TEGRA_VGPU_CMD_SET_POWERGATE = 60,
		100	TEGRA_VGPU_CMD_SET_GPU_CLK_RATE = 61,
100	};	101	};
101		102
102	struct tegra_vgpu_connect_params {	103	struct tegra_vgpu_connect_params {
@@ -399,6 +400,10 @@ struct tegra_vgpu_set_powergate_params {
399	u32 mode;	400	u32 mode;
400	};	401	};
401		402
		403	struct tegra_vgpu_gpu_clk_rate_params {
		404	u32 rate; /* in kHz */
		405	};
		406
402	struct tegra_vgpu_cmd_msg {	407	struct tegra_vgpu_cmd_msg {
403	u32 cmd;	408	u32 cmd;
404	int ret;	409	int ret;
@@ -441,6 +446,7 @@ struct tegra_vgpu_cmd_msg {
441	struct tegra_vgpu_tsg_runlist_interleave_params tsg_interleave;	446	struct tegra_vgpu_tsg_runlist_interleave_params tsg_interleave;
442	struct tegra_vgpu_read_ptimer_params read_ptimer;	447	struct tegra_vgpu_read_ptimer_params read_ptimer;
443	struct tegra_vgpu_set_powergate_params set_powergate;	448	struct tegra_vgpu_set_powergate_params set_powergate;
		449	struct tegra_vgpu_gpu_clk_rate_params gpu_clk_rate;
444	char padding[192];	450	char padding[192];
445	} params;	451	} params;
446	};	452	};