From b3a7c2b305ec6f895dc236f0c5f163bd4cbeb248 Mon Sep 17 00:00:00 2001
From: Sachit Kadle <skadle@nvidia.com>
Date: Tue, 24 Jan 2017 10:22:13 -0800
Subject: gpu: nvgpu: vgpu: add devfreq support

Add devfreq governor support in order to allow frequency scaling
in virtualization config. GPU clock frequency operations are
re-directed to the server over RPC.

Bug 200237433

Change-Id: I1c8e565a4fff36d3456dc72ebb20795b7822650e
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1295542
(cherry picked from commit d5c956fc06697eda3829c67cb22987e538213b29)
Reviewed-on: http://git-master/r/1280968
(cherry picked from commit 25e2b3cf7cb5559a6849c0024d42c157564a9be2)
Reviewed-on: http://git-master/r/1321835
(cherry picked from commit f871b52fd3f553d6b6375a3c848fbca272ed8e29)
Reviewed-on: http://git-master/r/1313468
Tested-by: Aparna Das <aparnad@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile.nvgpu              |   1 +
 drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c |  16 +++-
 drivers/gpu/nvgpu/vgpu/clk_vgpu.c             | 130 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/vgpu/clk_vgpu.h             |  23 +++++
 drivers/gpu/nvgpu/vgpu/vgpu.c                 |  49 +++++++---
 include/linux/tegra_vgpu.h                    |  10 ++
 6 files changed, 216 insertions(+), 13 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/vgpu/clk_vgpu.c
 create mode 100644 drivers/gpu/nvgpu/vgpu/clk_vgpu.h

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index ac58d512..d16e2c3e 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -150,6 +150,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
 	vgpu/dbg_vgpu.o \
 	vgpu/fecs_trace_vgpu.o \
 	vgpu/tsg_vgpu.o \
+	vgpu/clk_vgpu.o \
 	vgpu/css_vgpu.o \
 	vgpu/gk20a/vgpu_hal_gk20a.o \
 	vgpu/gk20a/vgpu_gr_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
index e33fc29f..f2c877f9 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
@@ -16,6 +16,7 @@
 #include "gk20a.h"
 #include "hal_gk20a.h"
 #include "platform_gk20a.h"
+#include "vgpu/clk_vgpu.h"
 
 #include <nvgpu/nvhost.h>
 
@@ -23,7 +24,14 @@ static int gk20a_tegra_probe(struct device *dev)
 {
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	return nvgpu_get_nvhost_dev(platform->g);
+	int ret;
+
+	ret = nvgpu_get_nvhost_dev(platform->g);
+	if (ret)
+		return ret;
+
+	vgpu_init_clk_support(platform->g);
+	return 0;
 #else
 	return 0;
 #endif
@@ -47,5 +55,11 @@ struct gk20a_platform vgpu_tegra_platform = {
 	.probe = gk20a_tegra_probe,
 	.default_big_page_size	= SZ_128K,
 
+	.clk_round_rate = vgpu_clk_round_rate,
+	.get_clk_freqs = vgpu_clk_get_freqs,
+
+	/* frequency scaling configuration */
+	.devfreq_governor = "userspace",
+
 	.virtual_dev = true,
 };
diff --git a/drivers/gpu/nvgpu/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/vgpu/clk_vgpu.c
new file mode 100644
index 00000000..fe5533b6
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/clk_vgpu.c
@@ -0,0 +1,130 @@
+/*
+ * Virtualized GPU Clock Interface
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "vgpu/vgpu.h"
+#include "vgpu/clk_vgpu.h"
+
+static unsigned long
+vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE];
+
+static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+	int err;
+	unsigned long ret = 0;
+
+	gk20a_dbg_fn("");
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE;
+		msg.handle = vgpu_get_handle(g);
+		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+		err = err ? err : msg.ret;
+		if (err)
+			nvgpu_err(g, "%s failed - %d", __func__, err);
+		else
+			/* return frequency in Hz */
+			ret = p->rate * 1000;
+		break;
+	case CTRL_CLK_DOMAIN_PWRCLK:
+		nvgpu_err(g, "unsupported clock: %u", api_domain);
+		break;
+	default:
+		nvgpu_err(g, "unknown clock: %u", api_domain);
+		break;
+	}
+
+	return ret;
+}
+
+static int vgpu_clk_set_rate(struct gk20a *g,
+				u32 api_domain, unsigned long rate)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+	int err = -EINVAL;
+
+	gk20a_dbg_fn("");
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
+		msg.handle = vgpu_get_handle(g);
+
+		/* server dvfs framework requires frequency in kHz */
+		p->rate = (u32)(rate / 1000);
+		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+		err = err ? err : msg.ret;
+		if (err)
+			nvgpu_err(g, "%s failed - %d", __func__, err);
+		break;
+	case CTRL_CLK_DOMAIN_PWRCLK:
+		nvgpu_err(g, "unsupported clock: %u", api_domain);
+		break;
+	default:
+		nvgpu_err(g, "unknown clock: %u", api_domain);
+		break;
+	}
+
+	return err;
+}
+
+void vgpu_init_clk_support(struct gk20a *g)
+{
+	g->ops.clk.get_rate = vgpu_clk_get_rate;
+	g->ops.clk.set_rate = vgpu_clk_set_rate;
+}
+
+long vgpu_clk_round_rate(struct device *dev, unsigned long rate)
+{
+	/* server will handle frequency rounding */
+	return rate;
+}
+
+int vgpu_clk_get_freqs(struct device *dev,
+		unsigned long **freqs, int *num_freqs)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_get_gpu_freq_table_params *p =
+					&msg.params.get_gpu_freq_table;
+	unsigned int i;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE;
+	msg.handle = vgpu_get_handle(g);
+
+	p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE;
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	if (err) {
+		nvgpu_err(g, "%s failed - %d", __func__, err);
+		return err;
+	}
+
+	/* return frequency in Hz */
+	for (i = 0; i < p->num_freqs; i++)
+		vgpu_freq_table[i] = p->freqs[i] * 1000;
+
+	*freqs = vgpu_freq_table;
+	*num_freqs = p->num_freqs;
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/vgpu/clk_vgpu.h
new file mode 100644
index 00000000..a90b63d8
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/clk_vgpu.h
@@ -0,0 +1,23 @@
+/*
+ * Virtualized GPU Clock Interface
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _CLK_VIRT_H_
+#define _CLK_VIRT_H_
+
+void vgpu_init_clk_support(struct gk20a *g);
+long vgpu_clk_round_rate(struct device *dev, unsigned long rate);
+int vgpu_clk_get_freqs(struct device *dev,
+			unsigned long **freqs, int *num_freqs);
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 2b1c93dd..e8a778f5 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -26,6 +26,7 @@
 
 #include "vgpu/vgpu.h"
 #include "vgpu/fecs_trace_vgpu.h"
+#include "vgpu/clk_vgpu.h"
 #include "gk20a/hal_gk20a.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
 #include "gk20a/tsg_gk20a.h"
@@ -538,18 +539,22 @@ static int vgpu_qos_notify(struct notifier_block *nb,
 static int vgpu_pm_qos_init(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_scale_profile *profile;
+	struct gk20a_scale_profile *profile = g->scale_profile;
 
-	profile = nvgpu_kzalloc(g, sizeof(*profile));
-	if (!profile)
-		return -ENOMEM;
+	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) {
+		if (!profile)
+			return -EINVAL;
+	} else {
+		profile = nvgpu_kzalloc(g, sizeof(*profile));
+		if (!profile)
+			return -ENOMEM;
+		g->scale_profile = profile;
+	}
 
 	profile->dev = dev;
 	profile->qos_notify_block.notifier_call = vgpu_qos_notify;
-	g->scale_profile = profile;
 	pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
 				&profile->qos_notify_block);
-
 	return 0;
 }
 
@@ -565,11 +570,31 @@ static void vgpu_pm_qos_remove(struct device *dev)
 
 static int vgpu_pm_init(struct device *dev)
 {
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long *freqs;
+	int num_freqs;
 	int err = 0;
 
 	gk20a_dbg_fn("");
 
 	__pm_runtime_disable(dev, false);
+
+	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
+		gk20a_scale_init(dev);
+
+	if (g->devfreq) {
+		/* set min/max frequency based on frequency table */
+		err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs);
+		if (err)
+			return err;
+
+		if (num_freqs < 1)
+			return -EINVAL;
+
+		g->devfreq->min_freq = freqs[0];
+		g->devfreq->max_freq = freqs[num_freqs - 1];
+	}
+
 	err = vgpu_pm_qos_init(dev);
 	if (err)
 		return err;
@@ -675,12 +700,6 @@ int vgpu_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	err = vgpu_pm_init(dev);
-	if (err) {
-		dev_err(dev, "pm init failed");
-		return err;
-	}
-
 	if (platform->late_probe) {
 		err = platform->late_probe(dev);
 		if (err) {
@@ -708,6 +727,12 @@ int vgpu_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	err = vgpu_pm_init(dev);
+	if (err) {
+		dev_err(dev, "pm init failed");
+		return err;
+	}
+
 	err = nvgpu_thread_create(&priv->intr_handler, gk20a,
 			vgpu_intr_thread, "gk20a");
 	if (err)
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h
index 67f51806..4d1e1ac9 100644
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -100,6 +100,8 @@ enum {
 	TEGRA_VGPU_CMD_SUSPEND_CONTEXTS = 66,
 	TEGRA_VGPU_CMD_RESUME_CONTEXTS = 67,
 	TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE = 68,
+	TEGRA_VGPU_CMD_GET_GPU_CLK_RATE = 69,
+	TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70,
 	TEGRA_VGPU_CMD_PROF_MGT = 72,
 	TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74,
 };
@@ -485,6 +487,13 @@ struct tegra_vgpu_prof_mgt_params {
 	u32 mode;
 };
 
+#define TEGRA_VGPU_GPU_FREQ_TABLE_SIZE		25
+
+struct tegra_vgpu_get_gpu_freq_table_params {
+	u32 num_freqs;
+	u32 freqs[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE]; /* in kHz */
+};
+
 struct tegra_vgpu_cmd_msg {
 	u32 cmd;
 	int ret;
@@ -536,6 +545,7 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
 		struct tegra_vgpu_prof_mgt_params prof_management;
 		struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper;
+		struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table;
 		char padding[192];
 	} params;
 };
-- 
cgit v1.2.2