gpu: nvgpu: Move gk20a_scale to be Linux only

Move gk20a_scale.[ch] to be common/linux/scale.[ch]. The code is Linux specific, and only referred from Linux specific source files. Change the license back to GPL. JIRA NVGPU-259 Change-Id: I89fa905a1fea4f93c826ddfe2ffce34aefc1b0a2 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1588650 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2017-10-30 15:52:52 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-01 13:55:41 -0400
commit: 15e259bc5255e891f776a84b5f28a97ea0567178 (patch)
tree: ca4a92cafe8e01b8bdb87c0bbdc5fc18913af841 /drivers/gpu/nvgpu/common
parent: 964a849d6176da362c375d7d72b94289e9b905de (diff)
6 files changed, 498 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index e4a65692..759607a2 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -23,7 +23,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
-#include "gk20a/gk20a_scale.h"
+#include "scale.h"
 #include "gk20a/gk20a.h"
 #include "gk20a/platform_gk20a.h"
 #include "module.h"
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 4f66fc67..d0abc836 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -35,7 +35,7 @@
 #include "gk20a/platform_gk20a.h"
 #include "sysfs.h"
 #include "vgpu/vgpu.h"
-#include "gk20a/gk20a_scale.h"
+#include "scale.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
 #include "pci.h"
 #include "module.h"
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
index 5786feab..2d6d156c 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
@@ -48,9 +48,9 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/platform_gk20a.h"
-#include "gk20a/gk20a_scale.h"
 #include "gm20b/clk_gm20b.h"
+#include "scale.h"
 #include "clk.h"
 #include "os_linux.h"
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
index e5d19976..fb8686c2 100644
--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
@@ -35,11 +35,11 @@
 #include "gk20a/platform_gk20a.h"
 #include "gk20a/gk20a.h"
-#include "gk20a/gk20a_scale.h"
 #include "platform_gk20a_tegra.h"
 #include "gp10b/platform_gp10b.h"
 #include "platform_gp10b_tegra.h"
+#include "scale.h"
 /* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
 #define GP10B_FREQ_SELECT_STEP  8
diff --git a/drivers/gpu/nvgpu/common/linux/scale.c b/drivers/gpu/nvgpu/common/linux/scale.c
new file mode 100644
index 00000000..05f09dcc
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/scale.c
@@ -0,0 +1,428 @@
+/*
+ * gk20a clock scaling profile
+ *
+ * Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/devfreq.h>
+#include <linux/export.h>
+#include <soc/tegra/chip-id.h>
+#include <linux/pm_qos.h>
+#include <governor.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/platform_gk20a.h"
+#include "scale.h"
+#include "os_linux.h"
+/*
+ * gk20a_scale_qos_notify()
+ *
+ * This function is called when the minimum QoS requirement for the device
+ * has changed. The function calls postscaling callback if it is defined.
+ */
+#if defined(CONFIG_COMMON_CLK)
+int gk20a_scale_qos_notify(struct notifier_block *nb,
+                          unsigned long n, void *p)
+{
+        struct gk20a_scale_profile *profile =
+                        container_of(nb, struct gk20a_scale_profile,
+                        qos_notify_block);
+        struct gk20a *g = get_gk20a(profile->dev);
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct devfreq *devfreq = l->devfreq;
+        if (!devfreq)
+                return NOTIFY_OK;
+        mutex_lock(&devfreq->lock);
+        /* check for pm_qos min and max frequency requirement */
+        profile->qos_min_freq =
+          (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
+        profile->qos_max_freq =
+          (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
+        if (profile->qos_min_freq > profile->qos_max_freq) {
+                nvgpu_err(g,
+                        "QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
+                        profile->qos_min_freq, profile->qos_max_freq);
+                profile->qos_min_freq = profile->qos_max_freq;
+        }
+        update_devfreq(devfreq);
+        mutex_unlock(&devfreq->lock);
+        return NOTIFY_OK;
+}
+#else
+int gk20a_scale_qos_notify(struct notifier_block *nb,
+                          unsigned long n, void *p)
+{
+        struct gk20a_scale_profile *profile =
+                container_of(nb, struct gk20a_scale_profile,
+                             qos_notify_block);
+        struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
+        struct gk20a *g = get_gk20a(profile->dev);
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        unsigned long freq;
+        if (!platform->postscale)
+                return NOTIFY_OK;
+        /* get the frequency requirement. if devfreq is enabled, check if it
+         * has higher demand than qos */
+        freq = platform->clk_round_rate(profile->dev,
+                        (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
+        if (l->devfreq)
+                freq = max(l->devfreq->previous_freq, freq);
+        /* Update gpu load because we may scale the emc target
+         * if the gpu load changed. */
+        nvgpu_pmu_load_update(g);
+        platform->postscale(profile->dev, freq);
+        return NOTIFY_OK;
+}
+#endif
+/*
+ * gk20a_scale_make_freq_table(profile)
+ *
+ * This function initialises the frequency table for the given device profile
+ */
+static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
+        int num_freqs, err;
+        unsigned long *freqs;
+        if (platform->get_clk_freqs) {
+                /* get gpu frequency table */
+                err = platform->get_clk_freqs(profile->dev, &freqs,
+                                        &num_freqs);
+                if (err)
+                        return -ENOSYS;
+        } else
+                return -ENOSYS;
+        profile->devfreq_profile.freq_table = (unsigned long *)freqs;
+        profile->devfreq_profile.max_state = num_freqs;
+        return 0;
+}
+/*
+ * gk20a_scale_target(dev, *freq, flags)
+ *
+ * This function scales the clock
+ */
+static int gk20a_scale_target(struct device *dev, unsigned long *freq,
+                              u32 flags)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        struct gk20a *g = platform->g;
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct gk20a_scale_profile *profile = g->scale_profile;
+        struct devfreq *devfreq = l->devfreq;
+        unsigned long local_freq = *freq;
+        unsigned long rounded_rate;
+        unsigned long min_freq = 0, max_freq = 0;
+        /*
+         * Calculate floor and cap frequency values
+         *
+         * Policy :
+         * We have two APIs to clip the frequency
+         *  1. devfreq
+         *  2. pm_qos
+         *
+         * To calculate floor (min) freq, we select MAX of floor frequencies
+         * requested from both APIs
+         * To get cap (max) freq, we select MIN of max frequencies
+         *
+         * In case we have conflict (min_freq > max_freq) after above
+         * steps, we ensure that max_freq wins over min_freq
+         */
+        min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
+        max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
+        if (min_freq > max_freq)
+                min_freq = max_freq;
+        /* Clip requested frequency */
+        if (local_freq < min_freq)
+                local_freq = min_freq;
+        if (local_freq > max_freq)
+                local_freq = max_freq;
+        /* set the final frequency */
+        rounded_rate = platform->clk_round_rate(dev, local_freq);
+        /* Check for duplicate request */
+        if (rounded_rate == g->last_freq)
+                return 0;
+        if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
+                *freq = rounded_rate;
+        else {
+                g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
+                *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
+        }
+        g->last_freq = *freq;
+        /* postscale will only scale emc (dram clock) if evaluating
+         * gk20a_tegra_get_emc_rate() produces a new or different emc
+         * target because the load or_and gpufreq has changed */
+        if (platform->postscale)
+                platform->postscale(dev, rounded_rate);
+        return 0;
+}
+/*
+ * update_load_estimate_gpmu(profile)
+ *
+ * Update load estimate using gpmu. The gpmu value is normalised
+ * based on the time it was asked last time.
+ */
+static void update_load_estimate_gpmu(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct gk20a_scale_profile *profile = g->scale_profile;
+        unsigned long dt;
+        u32 busy_time;
+        ktime_t t;
+        t = ktime_get();
+        dt = ktime_us_delta(t, profile->last_event_time);
+        profile->dev_stat.total_time = dt;
+        profile->last_event_time = t;
+        nvgpu_pmu_load_norm(g, &busy_time);
+        profile->dev_stat.busy_time = (busy_time * dt) / 1000;
+}
+/*
+ * gk20a_scale_suspend(dev)
+ *
+ * This function informs devfreq of suspend
+ */
+void gk20a_scale_suspend(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct devfreq *devfreq = l->devfreq;
+        if (!devfreq)
+                return;
+        devfreq_suspend_device(devfreq);
+}
+/*
+ * gk20a_scale_resume(dev)
+ *
+ * This functions informs devfreq of resume
+ */
+void gk20a_scale_resume(struct device *dev)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct devfreq *devfreq = l->devfreq;
+        if (!devfreq)
+                return;
+        g->last_freq = 0;
+        devfreq_resume_device(devfreq);
+}
+/*
+ * gk20a_scale_get_dev_status(dev, *stat)
+ *
+ * This function queries the current device status.
+ */
+static int gk20a_scale_get_dev_status(struct device *dev,
+                                      struct devfreq_dev_status *stat)
+{
+        struct gk20a *g = get_gk20a(dev);
+        struct gk20a_scale_profile *profile = g->scale_profile;
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        /* update the software shadow */
+        nvgpu_pmu_load_update(g);
+        /* inform edp about new constraint */
+        if (platform->prescale)
+                platform->prescale(dev);
+        /* Make sure there are correct values for the current frequency */
+        profile->dev_stat.current_frequency =
+                                g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
+        /* Update load estimate */
+        update_load_estimate_gpmu(dev);
+        /* Copy the contents of the current device status */
+        *stat = profile->dev_stat;
+        /* Finally, clear out the local values */
+        profile->dev_stat.total_time = 0;
+        profile->dev_stat.busy_time = 0;
+        return 0;
+}
+/*
+ * get_cur_freq(struct device *dev, unsigned long *freq)
+ *
+ * This function gets the current GPU clock rate.
+ */
+static int get_cur_freq(struct device *dev, unsigned long *freq)
+{
+        struct gk20a *g = get_gk20a(dev);
+        *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
+        return 0;
+}
+/*
+ * gk20a_scale_init(dev)
+ */
+void gk20a_scale_init(struct device *dev)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        struct gk20a *g = platform->g;
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct gk20a_scale_profile *profile;
+        int err;
+        if (g->scale_profile)
+                return;
+        if (!platform->devfreq_governor && !platform->qos_notify)
+                return;
+        profile = nvgpu_kzalloc(g, sizeof(*profile));
+        profile->dev = dev;
+        profile->dev_stat.busy = false;
+        /* Create frequency table */
+        err = gk20a_scale_make_freq_table(profile);
+        if (err || !profile->devfreq_profile.max_state)
+                goto err_get_freqs;
+        profile->qos_min_freq = 0;
+        profile->qos_max_freq = UINT_MAX;
+        /* Store device profile so we can access it if devfreq governor
+         * init needs that */
+        g->scale_profile = profile;
+        if (platform->devfreq_governor) {
+                struct devfreq *devfreq;
+                profile->devfreq_profile.initial_freq =
+                        profile->devfreq_profile.freq_table[0];
+                profile->devfreq_profile.target = gk20a_scale_target;
+                profile->devfreq_profile.get_dev_status =
+                        gk20a_scale_get_dev_status;
+                profile->devfreq_profile.get_cur_freq = get_cur_freq;
+                profile->devfreq_profile.polling_ms = 25;
+                devfreq = devfreq_add_device(dev,
+                                        &profile->devfreq_profile,
+                                        platform->devfreq_governor, NULL);
+                if (IS_ERR(devfreq))
+                        devfreq = NULL;
+                l->devfreq = devfreq;
+        }
+        /* Should we register QoS callback for this device? */
+        if (platform->qos_notify) {
+                profile->qos_notify_block.notifier_call =
+                                        platform->qos_notify;
+                pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                        &profile->qos_notify_block);
+                pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                        &profile->qos_notify_block);
+        }
+        return;
+err_get_freqs:
+        nvgpu_kfree(g, profile);
+}
+void gk20a_scale_exit(struct device *dev)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        struct gk20a *g = platform->g;
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        int err;
+        if (platform->qos_notify) {
+                pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                &g->scale_profile->qos_notify_block);
+                pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+                                &g->scale_profile->qos_notify_block);
+        }
+        if (platform->devfreq_governor) {
+                err = devfreq_remove_device(l->devfreq);
+                l->devfreq = NULL;
+        }
+        nvgpu_kfree(g, g->scale_profile);
+        g->scale_profile = NULL;
+}
+/*
+ * gk20a_scale_hw_init(dev)
+ *
+ * Initialize hardware portion of the device
+ */
+void gk20a_scale_hw_init(struct device *dev)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        struct gk20a_scale_profile *profile = platform->g->scale_profile;
+        /* make sure that scaling has bee initialised */
+        if (!profile)
+                return;
+        profile->dev_stat.total_time = 0;
+        profile->last_event_time = ktime_get();
+}
diff --git a/drivers/gpu/nvgpu/common/linux/scale.h b/drivers/gpu/nvgpu/common/linux/scale.h
new file mode 100644
index 00000000..c1e6fe86
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/scale.h
@@ -0,0 +1,66 @@
+/*
+ * gk20a clock scaling profile
+ *
+ * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GK20A_SCALE_H
+#define GK20A_SCALE_H
+#include <linux/devfreq.h>
+struct clk;
+struct gk20a_scale_profile {
+        struct device                   *dev;
+        ktime_t                         last_event_time;
+        struct devfreq_dev_profile      devfreq_profile;
+        struct devfreq_dev_status       dev_stat;
+        struct notifier_block           qos_notify_block;
+        unsigned long                   qos_min_freq;
+        unsigned long                   qos_max_freq;
+        void                            *private_data;
+};
+/* Initialization and de-initialization for module */
+void gk20a_scale_init(struct device *);
+void gk20a_scale_exit(struct device *);
+void gk20a_scale_hw_init(struct device *dev);
+#if defined(CONFIG_GK20A_DEVFREQ)
+/*
+ * call when performing submit to notify scaling mechanism that the module is
+ * in use
+ */
+void gk20a_scale_notify_busy(struct device *);
+void gk20a_scale_notify_idle(struct device *);
+void gk20a_scale_suspend(struct device *);
+void gk20a_scale_resume(struct device *);
+int gk20a_scale_qos_notify(struct notifier_block *nb,
+                        unsigned long n, void *p);
+#else
+static inline void gk20a_scale_notify_busy(struct device *dev) {}
+static inline void gk20a_scale_notify_idle(struct device *dev) {}
+static inline void gk20a_scale_suspend(struct device *dev) {}
+static inline void gk20a_scale_resume(struct device *dev) {}
+static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
+                        unsigned long n, void *p)
+{
+        return -ENOSYS;
+}
+#endif
+#endif
author	Terje Bergstrom <tbergstrom@nvidia.com>	2017-10-30 15:52:52 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-01 13:55:41 -0400
commit	15e259bc5255e891f776a84b5f28a97ea0567178 (patch)
tree	ca4a92cafe8e01b8bdb87c0bbdc5fc18913af841 /drivers/gpu/nvgpu/common
parent	964a849d6176da362c375d7d72b94289e9b905de (diff)

diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c index e4a65692..759607a2 100644 --- a/drivers/gpu/nvgpu/common/linux/driver_common.c +++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -23,7 +23,7 @@
23	#include <nvgpu/enabled.h>	23	#include <nvgpu/enabled.h>
24	#include <nvgpu/debug.h>	24	#include <nvgpu/debug.h>
25		25
26	#include "gk20a/gk20a_scale.h"	26	#include "scale.h"
27	#include "gk20a/gk20a.h"	27	#include "gk20a/gk20a.h"
28	#include "gk20a/platform_gk20a.h"	28	#include "gk20a/platform_gk20a.h"
29	#include "module.h"	29	#include "module.h"


diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 4f66fc67..d0abc836 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -35,7 +35,7 @@
35	#include "gk20a/platform_gk20a.h"	35	#include "gk20a/platform_gk20a.h"
36	#include "sysfs.h"	36	#include "sysfs.h"
37	#include "vgpu/vgpu.h"	37	#include "vgpu/vgpu.h"
38	#include "gk20a/gk20a_scale.h"	38	#include "scale.h"
39	#include "gk20a/ctxsw_trace_gk20a.h"	39	#include "gk20a/ctxsw_trace_gk20a.h"
40	#include "pci.h"	40	#include "pci.h"
41	#include "module.h"	41	#include "module.h"


diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c index 5786feab..2d6d156c 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
@@ -48,9 +48,9 @@
48		48
49	#include "gk20a/gk20a.h"	49	#include "gk20a/gk20a.h"
50	#include "gk20a/platform_gk20a.h"	50	#include "gk20a/platform_gk20a.h"
51	#include "gk20a/gk20a_scale.h"
52	#include "gm20b/clk_gm20b.h"	51	#include "gm20b/clk_gm20b.h"
53		52
		53	#include "scale.h"
54	#include "clk.h"	54	#include "clk.h"
55	#include "os_linux.h"	55	#include "os_linux.h"
56		56


diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c index e5d19976..fb8686c2 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
@@ -35,11 +35,11 @@
35		35
36	#include "gk20a/platform_gk20a.h"	36	#include "gk20a/platform_gk20a.h"
37	#include "gk20a/gk20a.h"	37	#include "gk20a/gk20a.h"
38	#include "gk20a/gk20a_scale.h"
39		38
40	#include "platform_gk20a_tegra.h"	39	#include "platform_gk20a_tegra.h"
41	#include "gp10b/platform_gp10b.h"	40	#include "gp10b/platform_gp10b.h"
42	#include "platform_gp10b_tegra.h"	41	#include "platform_gp10b_tegra.h"
		42	#include "scale.h"
43		43
44	/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */	44	/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
45	#define GP10B_FREQ_SELECT_STEP 8	45	#define GP10B_FREQ_SELECT_STEP 8


diff --git a/drivers/gpu/nvgpu/common/linux/scale.c b/drivers/gpu/nvgpu/common/linux/scale.c new file mode 100644 index 00000000..05f09dcc --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/scale.c
@@ -0,0 +1,428 @@
		1	/*
		2	* gk20a clock scaling profile
		3	*
		4	* Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved.
		5	*
		6	* This program is free software; you can redistribute it and/or modify it
		7	* under the terms and conditions of the GNU General Public License,
		8	* version 2, as published by the Free Software Foundation.
		9	*
		10	* This program is distributed in the hope it will be useful, but WITHOUT
		11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		13	* more details.
		14	*
		15	* You should have received a copy of the GNU General Public License
		16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
		17	*/
		18
		19	#include <linux/devfreq.h>
		20	#include <linux/export.h>
		21	#include <soc/tegra/chip-id.h>
		22	#include <linux/pm_qos.h>
		23
		24	#include <governor.h>
		25
		26	#include <nvgpu/kmem.h>
		27	#include <nvgpu/log.h>
		28
		29	#include "gk20a/gk20a.h"
		30	#include "gk20a/platform_gk20a.h"
		31	#include "scale.h"
		32	#include "os_linux.h"
		33
		34	/*
		35	* gk20a_scale_qos_notify()
		36	*
		37	* This function is called when the minimum QoS requirement for the device
		38	* has changed. The function calls postscaling callback if it is defined.
		39	*/
		40
		41	#if defined(CONFIG_COMMON_CLK)
		42	int gk20a_scale_qos_notify(struct notifier_block *nb,
		43	unsigned long n, void *p)
		44	{
		45	struct gk20a_scale_profile *profile =
		46	container_of(nb, struct gk20a_scale_profile,
		47	qos_notify_block);
		48	struct gk20a *g = get_gk20a(profile->dev);
		49	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		50	struct devfreq *devfreq = l->devfreq;
		51
		52	if (!devfreq)
		53	return NOTIFY_OK;
		54
		55	mutex_lock(&devfreq->lock);
		56	/* check for pm_qos min and max frequency requirement */
		57	profile->qos_min_freq =
		58	(unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
		59	profile->qos_max_freq =
		60	(unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
		61
		62	if (profile->qos_min_freq > profile->qos_max_freq) {
		63	nvgpu_err(g,
		64	"QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
		65	profile->qos_min_freq, profile->qos_max_freq);
		66	profile->qos_min_freq = profile->qos_max_freq;
		67	}
		68
		69	update_devfreq(devfreq);
		70	mutex_unlock(&devfreq->lock);
		71
		72	return NOTIFY_OK;
		73	}
		74	#else
		75	int gk20a_scale_qos_notify(struct notifier_block *nb,
		76	unsigned long n, void *p)
		77	{
		78	struct gk20a_scale_profile *profile =
		79	container_of(nb, struct gk20a_scale_profile,
		80	qos_notify_block);
		81	struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
		82	struct gk20a *g = get_gk20a(profile->dev);
		83	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		84	unsigned long freq;
		85
		86	if (!platform->postscale)
		87	return NOTIFY_OK;
		88
		89	/* get the frequency requirement. if devfreq is enabled, check if it
		90	* has higher demand than qos */
		91	freq = platform->clk_round_rate(profile->dev,
		92	(u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
		93	if (l->devfreq)
		94	freq = max(l->devfreq->previous_freq, freq);
		95
		96	/* Update gpu load because we may scale the emc target
		97	* if the gpu load changed. */
		98	nvgpu_pmu_load_update(g);
		99	platform->postscale(profile->dev, freq);
		100
		101	return NOTIFY_OK;
		102	}
		103	#endif
		104
		105	/*
		106	* gk20a_scale_make_freq_table(profile)
		107	*
		108	* This function initialises the frequency table for the given device profile
		109	*/
		110
		111	static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
		112	{
		113	struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
		114	int num_freqs, err;
		115	unsigned long *freqs;
		116
		117	if (platform->get_clk_freqs) {
		118	/* get gpu frequency table */
		119	err = platform->get_clk_freqs(profile->dev, &freqs,
		120	&num_freqs);
		121	if (err)
		122	return -ENOSYS;
		123	} else
		124	return -ENOSYS;
		125
		126	profile->devfreq_profile.freq_table = (unsigned long *)freqs;
		127	profile->devfreq_profile.max_state = num_freqs;
		128
		129	return 0;
		130	}
		131
		132	/*
		133	* gk20a_scale_target(dev, *freq, flags)
		134	*
		135	* This function scales the clock
		136	*/
		137
		138	static int gk20a_scale_target(struct device dev, unsigned long freq,
		139	u32 flags)
		140	{
		141	struct gk20a_platform *platform = dev_get_drvdata(dev);
		142	struct gk20a *g = platform->g;
		143	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		144	struct gk20a_scale_profile *profile = g->scale_profile;
		145	struct devfreq *devfreq = l->devfreq;
		146	unsigned long local_freq = *freq;
		147	unsigned long rounded_rate;
		148	unsigned long min_freq = 0, max_freq = 0;
		149
		150	/*
		151	* Calculate floor and cap frequency values
		152	*
		153	* Policy :
		154	* We have two APIs to clip the frequency
		155	* 1. devfreq
		156	* 2. pm_qos
		157	*
		158	* To calculate floor (min) freq, we select MAX of floor frequencies
		159	* requested from both APIs
		160	* To get cap (max) freq, we select MIN of max frequencies
		161	*
		162	* In case we have conflict (min_freq > max_freq) after above
		163	* steps, we ensure that max_freq wins over min_freq
		164	*/
		165	min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
		166	max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
		167
		168	if (min_freq > max_freq)
		169	min_freq = max_freq;
		170
		171	/* Clip requested frequency */
		172	if (local_freq < min_freq)
		173	local_freq = min_freq;
		174
		175	if (local_freq > max_freq)
		176	local_freq = max_freq;
		177
		178	/* set the final frequency */
		179	rounded_rate = platform->clk_round_rate(dev, local_freq);
		180
		181	/* Check for duplicate request */
		182	if (rounded_rate == g->last_freq)
		183	return 0;
		184
		185	if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
		186	*freq = rounded_rate;
		187	else {
		188	g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
		189	*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
		190	}
		191
		192	g->last_freq = *freq;
		193
		194	/* postscale will only scale emc (dram clock) if evaluating
		195	* gk20a_tegra_get_emc_rate() produces a new or different emc
		196	* target because the load or_and gpufreq has changed */
		197	if (platform->postscale)
		198	platform->postscale(dev, rounded_rate);
		199
		200	return 0;
		201	}
		202
		203	/*
		204	* update_load_estimate_gpmu(profile)
		205	*
		206	* Update load estimate using gpmu. The gpmu value is normalised
		207	* based on the time it was asked last time.
		208	*/
		209
		210	static void update_load_estimate_gpmu(struct device *dev)
		211	{
		212	struct gk20a *g = get_gk20a(dev);
		213	struct gk20a_scale_profile *profile = g->scale_profile;
		214	unsigned long dt;
		215	u32 busy_time;
		216	ktime_t t;
		217
		218	t = ktime_get();
		219	dt = ktime_us_delta(t, profile->last_event_time);
		220
		221	profile->dev_stat.total_time = dt;
		222	profile->last_event_time = t;
		223	nvgpu_pmu_load_norm(g, &busy_time);
		224	profile->dev_stat.busy_time = (busy_time * dt) / 1000;
		225	}
		226
		227	/*
		228	* gk20a_scale_suspend(dev)
		229	*
		230	* This function informs devfreq of suspend
		231	*/
		232
		233	void gk20a_scale_suspend(struct device *dev)
		234	{
		235	struct gk20a *g = get_gk20a(dev);
		236	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		237	struct devfreq *devfreq = l->devfreq;
		238
		239	if (!devfreq)
		240	return;
		241
		242	devfreq_suspend_device(devfreq);
		243	}
		244
		245	/*
		246	* gk20a_scale_resume(dev)
		247	*
		248	* This functions informs devfreq of resume
		249	*/
		250
		251	void gk20a_scale_resume(struct device *dev)
		252	{
		253	struct gk20a *g = get_gk20a(dev);
		254	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		255	struct devfreq *devfreq = l->devfreq;
		256
		257	if (!devfreq)
		258	return;
		259
		260	g->last_freq = 0;
		261	devfreq_resume_device(devfreq);
		262	}
		263
		264	/*
		265	* gk20a_scale_get_dev_status(dev, *stat)
		266	*
		267	* This function queries the current device status.
		268	*/
		269
		270	static int gk20a_scale_get_dev_status(struct device *dev,
		271	struct devfreq_dev_status *stat)
		272	{
		273	struct gk20a *g = get_gk20a(dev);
		274	struct gk20a_scale_profile *profile = g->scale_profile;
		275	struct gk20a_platform *platform = dev_get_drvdata(dev);
		276
		277	/* update the software shadow */
		278	nvgpu_pmu_load_update(g);
		279
		280	/* inform edp about new constraint */
		281	if (platform->prescale)
		282	platform->prescale(dev);
		283
		284	/* Make sure there are correct values for the current frequency */
		285	profile->dev_stat.current_frequency =
		286	g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
		287
		288	/* Update load estimate */
		289	update_load_estimate_gpmu(dev);
		290
		291	/* Copy the contents of the current device status */
		292	*stat = profile->dev_stat;
		293
		294	/* Finally, clear out the local values */
		295	profile->dev_stat.total_time = 0;
		296	profile->dev_stat.busy_time = 0;
		297
		298	return 0;
		299	}
		300
		301	/*
		302	* get_cur_freq(struct device dev, unsigned long freq)
		303	*
		304	* This function gets the current GPU clock rate.
		305	*/
		306
		307	static int get_cur_freq(struct device dev, unsigned long freq)
		308	{
		309	struct gk20a *g = get_gk20a(dev);
		310	*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
		311	return 0;
		312	}
		313
		314
		315	/*
		316	* gk20a_scale_init(dev)
		317	*/
		318
		319	void gk20a_scale_init(struct device *dev)
		320	{
		321	struct gk20a_platform *platform = dev_get_drvdata(dev);
		322	struct gk20a *g = platform->g;
		323	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		324	struct gk20a_scale_profile *profile;
		325	int err;
		326
		327	if (g->scale_profile)
		328	return;
		329
		330	if (!platform->devfreq_governor && !platform->qos_notify)
		331	return;
		332
		333	profile = nvgpu_kzalloc(g, sizeof(*profile));
		334
		335	profile->dev = dev;
		336	profile->dev_stat.busy = false;
		337
		338	/* Create frequency table */
		339	err = gk20a_scale_make_freq_table(profile);
		340	if (err \|\| !profile->devfreq_profile.max_state)
		341	goto err_get_freqs;
		342
		343	profile->qos_min_freq = 0;
		344	profile->qos_max_freq = UINT_MAX;
		345
		346	/* Store device profile so we can access it if devfreq governor
		347	* init needs that */
		348	g->scale_profile = profile;
		349
		350	if (platform->devfreq_governor) {
		351	struct devfreq *devfreq;
		352
		353	profile->devfreq_profile.initial_freq =
		354	profile->devfreq_profile.freq_table[0];
		355	profile->devfreq_profile.target = gk20a_scale_target;
		356	profile->devfreq_profile.get_dev_status =
		357	gk20a_scale_get_dev_status;
		358	profile->devfreq_profile.get_cur_freq = get_cur_freq;
		359	profile->devfreq_profile.polling_ms = 25;
		360
		361	devfreq = devfreq_add_device(dev,
		362	&profile->devfreq_profile,
		363	platform->devfreq_governor, NULL);
		364
		365	if (IS_ERR(devfreq))
		366	devfreq = NULL;
		367
		368	l->devfreq = devfreq;
		369	}
		370
		371	/* Should we register QoS callback for this device? */
		372	if (platform->qos_notify) {
		373	profile->qos_notify_block.notifier_call =
		374	platform->qos_notify;
		375
		376	pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
		377	&profile->qos_notify_block);
		378	pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
		379	&profile->qos_notify_block);
		380	}
		381
		382	return;
		383
		384	err_get_freqs:
		385	nvgpu_kfree(g, profile);
		386	}
		387
		388	void gk20a_scale_exit(struct device *dev)
		389	{
		390	struct gk20a_platform *platform = dev_get_drvdata(dev);
		391	struct gk20a *g = platform->g;
		392	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		393	int err;
		394
		395	if (platform->qos_notify) {
		396	pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
		397	&g->scale_profile->qos_notify_block);
		398	pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
		399	&g->scale_profile->qos_notify_block);
		400	}
		401
		402	if (platform->devfreq_governor) {
		403	err = devfreq_remove_device(l->devfreq);
		404	l->devfreq = NULL;
		405	}
		406
		407	nvgpu_kfree(g, g->scale_profile);
		408	g->scale_profile = NULL;
		409	}
		410
		411	/*
		412	* gk20a_scale_hw_init(dev)
		413	*
		414	* Initialize hardware portion of the device
		415	*/
		416
		417	void gk20a_scale_hw_init(struct device *dev)
		418	{
		419	struct gk20a_platform *platform = dev_get_drvdata(dev);
		420	struct gk20a_scale_profile *profile = platform->g->scale_profile;
		421
		422	/* make sure that scaling has bee initialised */
		423	if (!profile)
		424	return;
		425
		426	profile->dev_stat.total_time = 0;
		427	profile->last_event_time = ktime_get();
		428	}


diff --git a/drivers/gpu/nvgpu/common/linux/scale.h b/drivers/gpu/nvgpu/common/linux/scale.h new file mode 100644 index 00000000..c1e6fe86 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/scale.h
@@ -0,0 +1,66 @@
		1	/*
		2	* gk20a clock scaling profile
		3	*
		4	* Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
		5	*
		6	* This program is free software; you can redistribute it and/or modify it
		7	* under the terms and conditions of the GNU General Public License,
		8	* version 2, as published by the Free Software Foundation.
		9	*
		10	* This program is distributed in the hope it will be useful, but WITHOUT
		11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		13	* more details.
		14	*
		15	* You should have received a copy of the GNU General Public License
		16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
		17	*/
		18
		19	#ifndef GK20A_SCALE_H
		20	#define GK20A_SCALE_H
		21
		22	#include <linux/devfreq.h>
		23
		24	struct clk;
		25
		26	struct gk20a_scale_profile {
		27	struct device *dev;
		28	ktime_t last_event_time;
		29	struct devfreq_dev_profile devfreq_profile;
		30	struct devfreq_dev_status dev_stat;
		31	struct notifier_block qos_notify_block;
		32	unsigned long qos_min_freq;
		33	unsigned long qos_max_freq;
		34	void *private_data;
		35	};
		36
		37	/* Initialization and de-initialization for module */
		38	void gk20a_scale_init(struct device *);
		39	void gk20a_scale_exit(struct device *);
		40	void gk20a_scale_hw_init(struct device *dev);
		41
		42	#if defined(CONFIG_GK20A_DEVFREQ)
		43	/*
		44	* call when performing submit to notify scaling mechanism that the module is
		45	* in use
		46	*/
		47	void gk20a_scale_notify_busy(struct device *);
		48	void gk20a_scale_notify_idle(struct device *);
		49
		50	void gk20a_scale_suspend(struct device *);
		51	void gk20a_scale_resume(struct device *);
		52	int gk20a_scale_qos_notify(struct notifier_block *nb,
		53	unsigned long n, void *p);
		54	#else
		55	static inline void gk20a_scale_notify_busy(struct device *dev) {}
		56	static inline void gk20a_scale_notify_idle(struct device *dev) {}
		57	static inline void gk20a_scale_suspend(struct device *dev) {}
		58	static inline void gk20a_scale_resume(struct device *dev) {}
		59	static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
		60	unsigned long n, void *p)
		61	{
		62	return -ENOSYS;
		63	}
		64	#endif
		65
		66	#endif