1 files changed, 561 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
new file mode 100644
index 00000000..35658f31
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -0,0 +1,561 @@
+/*
+ * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
+ *
+ * GK20A Tegra Platform Interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/debugfs.h>
+#include <linux/tegra-powergate.h>
+#include <linux/platform_data/tegra_edp.h>
+#include <linux/nvhost_ioctl.h>
+#include <linux/dma-buf.h>
+#include <linux/nvmap.h>
+#include <mach/irqs.h>
+#include <mach/pm_domains.h>
+#include "../../../arch/arm/mach-tegra/iomap.h"
+#include "gk20a.h"
+#include "hal_gk20a.h"
+#include "platform_gk20a.h"
+#include "gk20a_scale.h"
+#define TEGRA_GK20A_INTR                INT_GPU
+#define TEGRA_GK20A_INTR_NONSTALL       INT_GPU_NONSTALL
+#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
+#define TEGRA_GK20A_SIM_SIZE 0x1000     /*tbd: this is a high-side guess */
+extern struct device tegra_vpr_dev;
+struct gk20a_platform t132_gk20a_tegra_platform;
+struct gk20a_emc_params {
+        long                            emc_slope;
+        long                            emc_offset;
+        long                            emc_dip_slope;
+        long                            emc_dip_offset;
+        long                            emc_xmid;
+        bool                            linear;
+};
+/*
+ * 20.12 fixed point arithmetic
+ */
+static const int FXFRAC = 12;
+static const int FX_HALF = (1 << 12) / 2;
+#define INT_TO_FX(x) ((x) << FXFRAC)
+#define FX_TO_INT(x) ((x) >> FXFRAC)
+#define MHZ_TO_HZ(x) ((x) * 1000000)
+#define HZ_TO_MHZ(x) ((x) / 1000000)
+int FXMUL(int x, int y)
+{
+        return ((long long) x * (long long) y) >> FXFRAC;
+}
+int FXDIV(int x, int y)
+{
+        /* long long div operation not supported, must shift manually. This
+         * would have been
+         *
+         *    return (((long long) x) << FXFRAC) / (long long) y;
+         */
+        int pos, t;
+        if (x == 0)
+                return 0;
+        /* find largest allowable right shift to numerator, limit to FXFRAC */
+        t = x < 0 ? -x : x;
+        pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
+        if (pos > FXFRAC)
+                pos = FXFRAC;
+        y >>= FXFRAC - pos;
+        if (y == 0)
+                return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
+        return (x << pos) / y;
+}
+static int gk20a_tegra_channel_busy(struct platform_device *dev)
+{
+        int ret = 0;
+        /* Explicitly turn on the host1x clocks
+         * - This is needed as host1x driver sets ignore_children = true
+         * to cater the use case of display clock ON but host1x clock OFF
+         * in OS-Idle-Display-ON case
+         * - This was easily done in ACM as it only checked the ref count
+         * of host1x (or any device for that matter) to be zero before
+         * turning off its clock
+         * - However, runtime PM checks to see if *ANY* child of device is
+         * in ACTIVE state and if yes, it doesn't suspend the parent. As a
+         * result of this, display && host1x clocks remains ON during
+         * OS-Idle-Display-ON case
+         * - The code below fixes this use-case
+         */
+        if (to_platform_device(dev->dev.parent))
+                ret = nvhost_module_busy_ext(
+                        to_platform_device(dev->dev.parent));
+        return ret;
+}
+static void gk20a_tegra_channel_idle(struct platform_device *dev)
+{
+        /* Explicitly turn off the host1x clocks */
+        if (to_platform_device(dev->dev.parent))
+                nvhost_module_idle_ext(to_platform_device(dev->dev.parent));
+}
+static void gk20a_tegra_secure_destroy(struct platform_device *pdev,
+                                       struct gr_ctx_buffer_desc *desc)
+{
+        gk20a_free_sgtable(&desc->sgt);
+        dma_free_attrs(&tegra_vpr_dev, desc->size,
+                        (void *)(uintptr_t)&desc->iova,
+                        desc->iova, &desc->attrs);
+}
+static int gk20a_tegra_secure_alloc(struct platform_device *pdev,
+                                    struct gr_ctx_buffer_desc *desc,
+                                    size_t size)
+{
+        struct device *dev = &pdev->dev;
+        DEFINE_DMA_ATTRS(attrs);
+        dma_addr_t iova;
+        struct sg_table *sgt;
+        struct page *page;
+        int err = 0;
+        dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+        (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
+                                      GFP_KERNEL, &attrs);
+        if (dma_mapping_error(&tegra_vpr_dev, iova))
+                return -ENOMEM;
+        desc->iova = iova;
+        desc->size = size;
+        desc->attrs = attrs;
+        desc->destroy = gk20a_tegra_secure_destroy;
+        sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+        if (!sgt) {
+                gk20a_err(dev, "failed to allocate memory\n");
+                goto fail;
+        }
+        err = sg_alloc_table(sgt, 1, GFP_KERNEL);
+        if (err) {
+                gk20a_err(dev, "failed to allocate sg_table\n");
+                goto fail_sgt;
+        }
+        page = phys_to_page(iova);
+        sg_set_page(sgt->sgl, page, size, 0);
+        sg_dma_address(sgt->sgl) = iova;
+        desc->sgt = sgt;
+        return err;
+fail_sgt:
+        kfree(sgt);
+fail:
+        dma_free_attrs(&tegra_vpr_dev, desc->size,
+                        (void *)(uintptr_t)&desc->iova,
+                        desc->iova, &desc->attrs);
+        return err;
+}
+/*
+ * gk20a_tegra_get_emc_rate()
+ *
+ * This function returns the minimum emc clock based on gpu frequency
+ */
+long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
+{
+        long hz;
+        freq = INT_TO_FX(HZ_TO_MHZ(freq));
+        hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
+        hz -= FXMUL(emc_params->emc_dip_slope,
+                FXMUL(freq - emc_params->emc_xmid,
+                        freq - emc_params->emc_xmid)) +
+                emc_params->emc_dip_offset;
+        hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
+        hz = (hz < 0) ? 0 : hz;
+        return hz;
+}
+/*
+ * gk20a_tegra_postscale(profile, freq)
+ *
+ * This function sets emc frequency based on current gpu frequency
+ */
+static void gk20a_tegra_postscale(struct platform_device *pdev,
+                                  unsigned long freq)
+{
+        struct gk20a_platform *platform = platform_get_drvdata(pdev);
+        struct gk20a_scale_profile *profile = platform->g->scale_profile;
+        struct gk20a_emc_params *emc_params = profile->private_data;
+        struct gk20a *g = get_gk20a(pdev);
+        long after = gk20a_clk_get_rate(g);
+        long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
+        clk_set_rate(platform->clk[2], emc_target);
+}
+/*
+ * gk20a_tegra_prescale(profile, freq)
+ *
+ * This function informs EDP about changed constraints.
+ */
+static void gk20a_tegra_prescale(struct platform_device *pdev)
+{
+        struct gk20a *g = get_gk20a(pdev);
+        u32 avg = 0;
+        gk20a_pmu_load_norm(g, &avg);
+        tegra_edp_notify_gpu_load(avg);
+}
+/*
+ * gk20a_tegra_calibrate_emc()
+ *
+ * Compute emc scaling parameters
+ *
+ * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
+ *
+ * Remc - 3d.emc rate
+ * R3d  - 3d.cbus rate
+ * Rm   - 3d.cbus 'middle' rate = (max + min)/2
+ * S    - emc_slope
+ * O    - emc_offset
+ * Sd   - emc_dip_slope
+ * Od   - emc_dip_offset
+ *
+ * this superposes a quadratic dip centered around the middle 3d
+ * frequency over a linear correlation of 3d.emc to 3d clock
+ * rates.
+ *
+ * S, O are chosen so that the maximum 3d rate produces the
+ * maximum 3d.emc rate exactly, and the minimum 3d rate produces
+ * at least the minimum 3d.emc rate.
+ *
+ * Sd and Od are chosen to produce the largest dip that will
+ * keep 3d.emc frequencies monotonously decreasing with 3d
+ * frequencies. To achieve this, the first derivative of Remc
+ * with respect to R3d should be zero for the minimal 3d rate:
+ *
+ *   R'emc = S - 2 * Sd * (R3d - Rm)
+ *   R'emc(R3d-min) = 0
+ *   S = 2 * Sd * (R3d-min - Rm)
+ *     = 2 * Sd * (R3d-min - R3d-max) / 2
+ *
+ *   +------------------------------+
+ *   | Sd = S / (R3d-min - R3d-max) |
+ *   +------------------------------+
+ *
+ *   dip = Sd * (R3d - Rm)^2 + Od
+ *
+ * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
+ *
+ *   Sd * (R3d-min - Rm)^2 + Od = 0
+ *   Od = -Sd * ((R3d-min - R3d-max) / 2)^2
+ *      = -Sd * ((R3d-min - R3d-max)^2) / 4
+ *
+ *   +------------------------------+
+ *   | Od = (emc-max - emc-min) / 4 |
+ *   +------------------------------+
+ *
+ */
+void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
+                               struct clk *clk_3d, struct clk *clk_3d_emc)
+{
+        long correction;
+        unsigned long max_emc;
+        unsigned long min_emc;
+        unsigned long min_rate_3d;
+        unsigned long max_rate_3d;
+        max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
+        max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
+        min_emc = clk_round_rate(clk_3d_emc, 0);
+        min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
+        max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
+        max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
+        min_rate_3d = clk_round_rate(clk_3d, 0);
+        min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
+        emc_params->emc_slope =
+                FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
+        emc_params->emc_offset = max_emc -
+                FXMUL(emc_params->emc_slope, max_rate_3d);
+        /* Guarantee max 3d rate maps to max emc rate */
+        emc_params->emc_offset += max_emc -
+                (FXMUL(emc_params->emc_slope, max_rate_3d) +
+                emc_params->emc_offset);
+        emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
+        emc_params->emc_dip_slope =
+                -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
+        emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
+        correction =
+                emc_params->emc_dip_offset +
+                        FXMUL(emc_params->emc_dip_slope,
+                        FXMUL(max_rate_3d - emc_params->emc_xmid,
+                                max_rate_3d - emc_params->emc_xmid));
+        emc_params->emc_dip_offset -= correction;
+}
+/*
+ * gk20a_tegra_railgate()
+ *
+ * Gate (disable) gk20a power rail
+ */
+static int gk20a_tegra_railgate(struct platform_device *pdev)
+{
+        if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
+                tegra_powergate_partition(TEGRA_POWERGATE_GPU);
+        return 0;
+}
+/*
+ * gk20a_tegra_unrailgate()
+ *
+ * Ungate (enable) gk20a power rail
+ */
+static int gk20a_tegra_unrailgate(struct platform_device *pdev)
+{
+        tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
+        return 0;
+}
+struct {
+        char *name;
+        unsigned long default_rate;
+} tegra_gk20a_clocks[] = {
+        {"PLLG_ref", UINT_MAX},
+        {"pwr", 204000000},
+        {"emc", UINT_MAX} };
+/*
+ * gk20a_tegra_get_clocks()
+ *
+ * This function finds clocks in tegra platform and populates
+ * the clock information to gk20a platform data.
+ */
+static int gk20a_tegra_get_clocks(struct platform_device *pdev)
+{
+        struct gk20a_platform *platform = platform_get_drvdata(pdev);
+        char devname[16];
+        int i;
+        int ret = 0;
+        snprintf(devname, sizeof(devname),
+                 (pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n",
+                 pdev->name, pdev->id);
+        platform->num_clks = 0;
+        for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
+                long rate = tegra_gk20a_clocks[i].default_rate;
+                struct clk *c;
+                c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
+                if (IS_ERR(c)) {
+                        ret = PTR_ERR(c);
+                        goto err_get_clock;
+                }
+                rate = clk_round_rate(c, rate);
+                clk_set_rate(c, rate);
+                platform->clk[i] = c;
+        }
+        platform->num_clks = i;
+        return 0;
+err_get_clock:
+        while (i--)
+                clk_put(platform->clk[i]);
+        return ret;
+}
+static void gk20a_tegra_scale_init(struct platform_device *pdev)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(pdev);
+        struct gk20a_scale_profile *profile = platform->g->scale_profile;
+                struct gk20a_emc_params *emc_params;
+        if (!profile)
+                return;
+        emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL);
+        if (!emc_params)
+                return;
+        gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
+                                  platform->clk[2]);
+        profile->private_data = emc_params;
+}
+static void gk20a_tegra_debug_dump(struct platform_device *pdev)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(pdev);
+        struct gk20a *g = platform->g;
+        nvhost_debug_dump_device(g->dev);
+}
+static int gk20a_tegra_probe(struct platform_device *dev)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(dev);
+        if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) {
+                t132_gk20a_tegra_platform.g = platform->g;
+                *platform = t132_gk20a_tegra_platform;
+        }
+        gk20a_tegra_get_clocks(dev);
+        return 0;
+}
+static int gk20a_tegra_late_probe(struct platform_device *dev)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(dev);
+        /* Make gk20a power domain a subdomain of mc */
+        tegra_pd_add_sd(&platform->g->pd);
+        /* Initialise tegra specific scaling quirks */
+        gk20a_tegra_scale_init(dev);
+        return 0;
+}
+static int gk20a_tegra_suspend(struct device *dev)
+{
+        tegra_edp_notify_gpu_load(0);
+        return 0;
+}
+static struct resource gk20a_tegra_resources[] = {
+        {
+        .start = TEGRA_GK20A_BAR0_BASE,
+        .end   = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1,
+        .flags = IORESOURCE_MEM,
+        },
+        {
+        .start = TEGRA_GK20A_BAR1_BASE,
+        .end   = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1,
+        .flags = IORESOURCE_MEM,
+        },
+        { /* Used on ASIM only */
+        .start = TEGRA_GK20A_SIM_BASE,
+        .end   = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1,
+        .flags = IORESOURCE_MEM,
+        },
+        {
+        .start = TEGRA_GK20A_INTR,
+        .end   = TEGRA_GK20A_INTR,
+        .flags = IORESOURCE_IRQ,
+        },
+        {
+        .start = TEGRA_GK20A_INTR_NONSTALL,
+        .end   = TEGRA_GK20A_INTR_NONSTALL,
+        .flags = IORESOURCE_IRQ,
+        },
+};
+struct gk20a_platform t132_gk20a_tegra_platform = {
+        .has_syncpoints = true,
+        /* power management configuration */
+        .railgate_delay         = 500,
+        .clockgate_delay        = 50,
+        .probe = gk20a_tegra_probe,
+        .late_probe = gk20a_tegra_late_probe,
+        /* power management callbacks */
+        .suspend = gk20a_tegra_suspend,
+        .railgate = gk20a_tegra_railgate,
+        .unrailgate = gk20a_tegra_unrailgate,
+        /* frequency scaling configuration */
+        .prescale = gk20a_tegra_prescale,
+        .postscale = gk20a_tegra_postscale,
+        .devfreq_governor = "nvhost_podgov",
+        .qos_id = PM_QOS_GPU_FREQ_MIN,
+        .channel_busy = gk20a_tegra_channel_busy,
+        .channel_idle = gk20a_tegra_channel_idle,
+        .secure_alloc = gk20a_tegra_secure_alloc,
+        .dump_platform_dependencies = gk20a_tegra_debug_dump,
+};
+struct gk20a_platform gk20a_tegra_platform = {
+        .has_syncpoints = true,
+        /* power management configuration */
+        .railgate_delay         = 500,
+        .clockgate_delay        = 50,
+        .can_railgate           = true,
+        .probe = gk20a_tegra_probe,
+        .late_probe = gk20a_tegra_late_probe,
+        /* power management callbacks */
+        .suspend = gk20a_tegra_suspend,
+        .railgate = gk20a_tegra_railgate,
+        .unrailgate = gk20a_tegra_unrailgate,
+        /* frequency scaling configuration */
+        .prescale = gk20a_tegra_prescale,
+        .postscale = gk20a_tegra_postscale,
+        .devfreq_governor = "nvhost_podgov",
+        .qos_id = PM_QOS_GPU_FREQ_MIN,
+        .channel_busy = gk20a_tegra_channel_busy,
+        .channel_idle = gk20a_tegra_channel_idle,
+        .secure_alloc = gk20a_tegra_secure_alloc,
+        .dump_platform_dependencies = gk20a_tegra_debug_dump,
+};
+struct platform_device tegra_gk20a_device = {
+        .name           = "gk20a",
+        .resource       = gk20a_tegra_resources,
+        .num_resources  = ARRAY_SIZE(gk20a_tegra_resources),
+        .dev            = {
+                .platform_data = &gk20a_tegra_platform,
+        },
+};

diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c new file mode 100644 index 00000000..35658f31 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -0,0 +1,561 @@
	1	/*
	2	* drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
	3	*
	4	* GK20A Tegra Platform Interface
	5	*
	6	* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
	7	*
	8	* This program is free software; you can redistribute it and/or modify it
	9	* under the terms and conditions of the GNU General Public License,
	10	* version 2, as published by the Free Software Foundation.
	11	*
	12	* This program is distributed in the hope it will be useful, but WITHOUT
	13	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	14	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	15	* more details.
	16	*/
	17
	18	#include <linux/debugfs.h>
	19	#include <linux/tegra-powergate.h>
	20	#include <linux/platform_data/tegra_edp.h>
	21	#include <linux/nvhost_ioctl.h>
	22	#include <linux/dma-buf.h>
	23	#include <linux/nvmap.h>
	24	#include <mach/irqs.h>
	25	#include <mach/pm_domains.h>
	26
	27	#include "../../../arch/arm/mach-tegra/iomap.h"
	28
	29	#include "gk20a.h"
	30	#include "hal_gk20a.h"
	31	#include "platform_gk20a.h"
	32	#include "gk20a_scale.h"
	33
	34	#define TEGRA_GK20A_INTR INT_GPU
	35	#define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL
	36
	37	#define TEGRA_GK20A_SIM_BASE 0x538F0000 /tbd: get from iomap.h /
	38	#define TEGRA_GK20A_SIM_SIZE 0x1000 /tbd: this is a high-side guess /
	39
	40	extern struct device tegra_vpr_dev;
	41	struct gk20a_platform t132_gk20a_tegra_platform;
	42
	43	struct gk20a_emc_params {
	44	long emc_slope;
	45	long emc_offset;
	46	long emc_dip_slope;
	47	long emc_dip_offset;
	48	long emc_xmid;
	49	bool linear;
	50	};
	51
	52	/*
	53	* 20.12 fixed point arithmetic
	54	*/
	55
	56	static const int FXFRAC = 12;
	57	static const int FX_HALF = (1 << 12) / 2;
	58
	59	#define INT_TO_FX(x) ((x) << FXFRAC)
	60	#define FX_TO_INT(x) ((x) >> FXFRAC)
	61
	62	#define MHZ_TO_HZ(x) ((x) * 1000000)
	63	#define HZ_TO_MHZ(x) ((x) / 1000000)
	64
	65	int FXMUL(int x, int y)
	66	{
	67	return ((long long) x * (long long) y) >> FXFRAC;
	68	}
	69
	70	int FXDIV(int x, int y)
	71	{
	72	/* long long div operation not supported, must shift manually. This
	73	* would have been
	74	*
	75	* return (((long long) x) << FXFRAC) / (long long) y;
	76	*/
	77	int pos, t;
	78	if (x == 0)
	79	return 0;
	80
	81	/* find largest allowable right shift to numerator, limit to FXFRAC */
	82	t = x < 0 ? -x : x;
	83	pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
	84	if (pos > FXFRAC)
	85	pos = FXFRAC;
	86
	87	y >>= FXFRAC - pos;
	88	if (y == 0)
	89	return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
	90
	91	return (x << pos) / y;
	92	}
	93
	94	static int gk20a_tegra_channel_busy(struct platform_device *dev)
	95	{
	96	int ret = 0;
	97
	98	/* Explicitly turn on the host1x clocks
	99	* - This is needed as host1x driver sets ignore_children = true
	100	* to cater the use case of display clock ON but host1x clock OFF
	101	* in OS-Idle-Display-ON case
	102	* - This was easily done in ACM as it only checked the ref count
	103	* of host1x (or any device for that matter) to be zero before
	104	* turning off its clock
	105	* - However, runtime PM checks to see if ANY child of device is
	106	* in ACTIVE state and if yes, it doesn't suspend the parent. As a
	107	* result of this, display && host1x clocks remains ON during
	108	* OS-Idle-Display-ON case
	109	* - The code below fixes this use-case
	110	*/
	111	if (to_platform_device(dev->dev.parent))
	112	ret = nvhost_module_busy_ext(
	113	to_platform_device(dev->dev.parent));
	114
	115	return ret;
	116	}
	117
	118	static void gk20a_tegra_channel_idle(struct platform_device *dev)
	119	{
	120	/* Explicitly turn off the host1x clocks */
	121	if (to_platform_device(dev->dev.parent))
	122	nvhost_module_idle_ext(to_platform_device(dev->dev.parent));
	123	}
	124
	125	static void gk20a_tegra_secure_destroy(struct platform_device *pdev,
	126	struct gr_ctx_buffer_desc *desc)
	127	{
	128	gk20a_free_sgtable(&desc->sgt);
	129	dma_free_attrs(&tegra_vpr_dev, desc->size,
	130	(void *)(uintptr_t)&desc->iova,
	131	desc->iova, &desc->attrs);
	132	}
	133
	134	static int gk20a_tegra_secure_alloc(struct platform_device *pdev,
	135	struct gr_ctx_buffer_desc *desc,
	136	size_t size)
	137	{
	138	struct device *dev = &pdev->dev;
	139	DEFINE_DMA_ATTRS(attrs);
	140	dma_addr_t iova;
	141	struct sg_table *sgt;
	142	struct page *page;
	143	int err = 0;
	144
	145	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
	146
	147	(void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
	148	GFP_KERNEL, &attrs);
	149	if (dma_mapping_error(&tegra_vpr_dev, iova))
	150	return -ENOMEM;
	151
	152	desc->iova = iova;
	153	desc->size = size;
	154	desc->attrs = attrs;
	155	desc->destroy = gk20a_tegra_secure_destroy;
	156
	157	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
	158	if (!sgt) {
	159	gk20a_err(dev, "failed to allocate memory\n");
	160	goto fail;
	161	}
	162	err = sg_alloc_table(sgt, 1, GFP_KERNEL);
	163	if (err) {
	164	gk20a_err(dev, "failed to allocate sg_table\n");
	165	goto fail_sgt;
	166	}
	167	page = phys_to_page(iova);
	168	sg_set_page(sgt->sgl, page, size, 0);
	169	sg_dma_address(sgt->sgl) = iova;
	170
	171	desc->sgt = sgt;
	172
	173	return err;
	174
	175	fail_sgt:
	176	kfree(sgt);
	177	fail:
	178	dma_free_attrs(&tegra_vpr_dev, desc->size,
	179	(void *)(uintptr_t)&desc->iova,
	180	desc->iova, &desc->attrs);
	181	return err;
	182	}
	183
	184	/*
	185	* gk20a_tegra_get_emc_rate()
	186	*
	187	* This function returns the minimum emc clock based on gpu frequency
	188	*/
	189
	190	long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
	191	{
	192	long hz;
	193
	194	freq = INT_TO_FX(HZ_TO_MHZ(freq));
	195	hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
	196
	197	hz -= FXMUL(emc_params->emc_dip_slope,
	198	FXMUL(freq - emc_params->emc_xmid,
	199	freq - emc_params->emc_xmid)) +
	200	emc_params->emc_dip_offset;
	201
	202	hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
	203	hz = (hz < 0) ? 0 : hz;
	204
	205	return hz;
	206	}
	207
	208	/*
	209	* gk20a_tegra_postscale(profile, freq)
	210	*
	211	* This function sets emc frequency based on current gpu frequency
	212	*/
	213
	214	static void gk20a_tegra_postscale(struct platform_device *pdev,
	215	unsigned long freq)
	216	{
	217	struct gk20a_platform *platform = platform_get_drvdata(pdev);
	218	struct gk20a_scale_profile *profile = platform->g->scale_profile;
	219	struct gk20a_emc_params *emc_params = profile->private_data;
	220	struct gk20a *g = get_gk20a(pdev);
	221
	222	long after = gk20a_clk_get_rate(g);
	223	long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
	224
	225	clk_set_rate(platform->clk[2], emc_target);
	226	}
	227
	228	/*
	229	* gk20a_tegra_prescale(profile, freq)
	230	*
	231	* This function informs EDP about changed constraints.
	232	*/
	233
	234	static void gk20a_tegra_prescale(struct platform_device *pdev)
	235	{
	236	struct gk20a *g = get_gk20a(pdev);
	237	u32 avg = 0;
	238
	239	gk20a_pmu_load_norm(g, &avg);
	240	tegra_edp_notify_gpu_load(avg);
	241	}
	242
	243	/*
	244	* gk20a_tegra_calibrate_emc()
	245	*
	246	* Compute emc scaling parameters
	247	*
	248	* Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
	249	*
	250	* Remc - 3d.emc rate
	251	* R3d - 3d.cbus rate
	252	* Rm - 3d.cbus 'middle' rate = (max + min)/2
	253	* S - emc_slope
	254	* O - emc_offset
	255	* Sd - emc_dip_slope
	256	* Od - emc_dip_offset
	257	*
	258	* this superposes a quadratic dip centered around the middle 3d
	259	* frequency over a linear correlation of 3d.emc to 3d clock
	260	* rates.
	261	*
	262	* S, O are chosen so that the maximum 3d rate produces the
	263	* maximum 3d.emc rate exactly, and the minimum 3d rate produces
	264	* at least the minimum 3d.emc rate.
	265	*
	266	* Sd and Od are chosen to produce the largest dip that will
	267	* keep 3d.emc frequencies monotonously decreasing with 3d
	268	* frequencies. To achieve this, the first derivative of Remc
	269	* with respect to R3d should be zero for the minimal 3d rate:
	270	*
	271	* R'emc = S - 2 * Sd * (R3d - Rm)
	272	* R'emc(R3d-min) = 0
	273	* S = 2 * Sd * (R3d-min - Rm)
	274	* = 2 * Sd * (R3d-min - R3d-max) / 2
	275	*
	276	* +------------------------------+
	277	* \| Sd = S / (R3d-min - R3d-max) \|
	278	* +------------------------------+
	279	*
	280	* dip = Sd * (R3d - Rm)^2 + Od
	281	*
	282	* requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
	283	*
	284	* Sd * (R3d-min - Rm)^2 + Od = 0
	285	* Od = -Sd * ((R3d-min - R3d-max) / 2)^2
	286	* = -Sd * ((R3d-min - R3d-max)^2) / 4
	287	*
	288	* +------------------------------+
	289	* \| Od = (emc-max - emc-min) / 4 \|
	290	* +------------------------------+
	291	*
	292	*/
	293
	294	void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
	295	struct clk clk_3d, struct clk clk_3d_emc)
	296	{
	297	long correction;
	298	unsigned long max_emc;
	299	unsigned long min_emc;
	300	unsigned long min_rate_3d;
	301	unsigned long max_rate_3d;
	302
	303	max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
	304	max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
	305
	306	min_emc = clk_round_rate(clk_3d_emc, 0);
	307	min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
	308
	309	max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
	310	max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
	311
	312	min_rate_3d = clk_round_rate(clk_3d, 0);
	313	min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
	314
	315	emc_params->emc_slope =
	316	FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
	317	emc_params->emc_offset = max_emc -
	318	FXMUL(emc_params->emc_slope, max_rate_3d);
	319	/* Guarantee max 3d rate maps to max emc rate */
	320	emc_params->emc_offset += max_emc -
	321	(FXMUL(emc_params->emc_slope, max_rate_3d) +
	322	emc_params->emc_offset);
	323
	324	emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
	325	emc_params->emc_dip_slope =
	326	-FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
	327	emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
	328	correction =
	329	emc_params->emc_dip_offset +
	330	FXMUL(emc_params->emc_dip_slope,
	331	FXMUL(max_rate_3d - emc_params->emc_xmid,
	332	max_rate_3d - emc_params->emc_xmid));
	333	emc_params->emc_dip_offset -= correction;
	334	}
	335
	336	/*
	337	* gk20a_tegra_railgate()
	338	*
	339	* Gate (disable) gk20a power rail
	340	*/
	341
	342	static int gk20a_tegra_railgate(struct platform_device *pdev)
	343	{
	344	if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
	345	tegra_powergate_partition(TEGRA_POWERGATE_GPU);
	346	return 0;
	347	}
	348
	349	/*
	350	* gk20a_tegra_unrailgate()
	351	*
	352	* Ungate (enable) gk20a power rail
	353	*/
	354
	355	static int gk20a_tegra_unrailgate(struct platform_device *pdev)
	356	{
	357	tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
	358	return 0;
	359	}
	360
	361	struct {
	362	char *name;
	363	unsigned long default_rate;
	364	} tegra_gk20a_clocks[] = {
	365	{"PLLG_ref", UINT_MAX},
	366	{"pwr", 204000000},
	367	{"emc", UINT_MAX} };
	368
	369	/*
	370	* gk20a_tegra_get_clocks()
	371	*
	372	* This function finds clocks in tegra platform and populates
	373	* the clock information to gk20a platform data.
	374	*/
	375
	376	static int gk20a_tegra_get_clocks(struct platform_device *pdev)
	377	{
	378	struct gk20a_platform *platform = platform_get_drvdata(pdev);
	379	char devname[16];
	380	int i;
	381	int ret = 0;
	382
	383	snprintf(devname, sizeof(devname),
	384	(pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n",
	385	pdev->name, pdev->id);
	386
	387	platform->num_clks = 0;
	388	for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
	389	long rate = tegra_gk20a_clocks[i].default_rate;
	390	struct clk *c;
	391
	392	c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
	393	if (IS_ERR(c)) {
	394	ret = PTR_ERR(c);
	395	goto err_get_clock;
	396	}
	397	rate = clk_round_rate(c, rate);
	398	clk_set_rate(c, rate);
	399	platform->clk[i] = c;
	400	}
	401	platform->num_clks = i;
	402
	403	return 0;
	404
	405	err_get_clock:
	406
	407	while (i--)
	408	clk_put(platform->clk[i]);
	409	return ret;
	410	}
	411
	412	static void gk20a_tegra_scale_init(struct platform_device *pdev)
	413	{
	414	struct gk20a_platform *platform = gk20a_get_platform(pdev);
	415	struct gk20a_scale_profile *profile = platform->g->scale_profile;
	416	struct gk20a_emc_params *emc_params;
	417
	418	if (!profile)
	419	return;
	420
	421	emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL);
	422	if (!emc_params)
	423	return;
	424
	425	gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
	426	platform->clk[2]);
	427
	428	profile->private_data = emc_params;
	429	}
	430
	431	static void gk20a_tegra_debug_dump(struct platform_device *pdev)
	432	{
	433	struct gk20a_platform *platform = gk20a_get_platform(pdev);
	434	struct gk20a *g = platform->g;
	435	nvhost_debug_dump_device(g->dev);
	436	}
	437
	438	static int gk20a_tegra_probe(struct platform_device *dev)
	439	{
	440	struct gk20a_platform *platform = gk20a_get_platform(dev);
	441
	442	if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) {
	443	t132_gk20a_tegra_platform.g = platform->g;
	444	*platform = t132_gk20a_tegra_platform;
	445	}
	446
	447	gk20a_tegra_get_clocks(dev);
	448
	449	return 0;
	450	}
	451
	452	static int gk20a_tegra_late_probe(struct platform_device *dev)
	453	{
	454	struct gk20a_platform *platform = gk20a_get_platform(dev);
	455
	456	/* Make gk20a power domain a subdomain of mc */
	457	tegra_pd_add_sd(&platform->g->pd);
	458
	459	/* Initialise tegra specific scaling quirks */
	460	gk20a_tegra_scale_init(dev);
	461
	462	return 0;
	463	}
	464
	465	static int gk20a_tegra_suspend(struct device *dev)
	466	{
	467	tegra_edp_notify_gpu_load(0);
	468	return 0;
	469	}
	470
	471	static struct resource gk20a_tegra_resources[] = {
	472	{
	473	.start = TEGRA_GK20A_BAR0_BASE,
	474	.end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1,
	475	.flags = IORESOURCE_MEM,
	476	},
	477	{
	478	.start = TEGRA_GK20A_BAR1_BASE,
	479	.end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1,
	480	.flags = IORESOURCE_MEM,
	481	},
	482	{ /* Used on ASIM only */
	483	.start = TEGRA_GK20A_SIM_BASE,
	484	.end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1,
	485	.flags = IORESOURCE_MEM,
	486	},
	487	{
	488	.start = TEGRA_GK20A_INTR,
	489	.end = TEGRA_GK20A_INTR,
	490	.flags = IORESOURCE_IRQ,
	491	},
	492	{
	493	.start = TEGRA_GK20A_INTR_NONSTALL,
	494	.end = TEGRA_GK20A_INTR_NONSTALL,
	495	.flags = IORESOURCE_IRQ,
	496	},
	497	};
	498
	499	struct gk20a_platform t132_gk20a_tegra_platform = {
	500	.has_syncpoints = true,
	501
	502	/* power management configuration */
	503	.railgate_delay = 500,
	504	.clockgate_delay = 50,
	505
	506	.probe = gk20a_tegra_probe,
	507	.late_probe = gk20a_tegra_late_probe,
	508
	509	/* power management callbacks */
	510	.suspend = gk20a_tegra_suspend,
	511	.railgate = gk20a_tegra_railgate,
	512	.unrailgate = gk20a_tegra_unrailgate,
	513
	514	/* frequency scaling configuration */
	515	.prescale = gk20a_tegra_prescale,
	516	.postscale = gk20a_tegra_postscale,
	517	.devfreq_governor = "nvhost_podgov",
	518	.qos_id = PM_QOS_GPU_FREQ_MIN,
	519
	520	.channel_busy = gk20a_tegra_channel_busy,
	521	.channel_idle = gk20a_tegra_channel_idle,
	522	.secure_alloc = gk20a_tegra_secure_alloc,
	523	.dump_platform_dependencies = gk20a_tegra_debug_dump,
	524	};
	525
	526	struct gk20a_platform gk20a_tegra_platform = {
	527	.has_syncpoints = true,
	528
	529	/* power management configuration */
	530	.railgate_delay = 500,
	531	.clockgate_delay = 50,
	532	.can_railgate = true,
	533
	534	.probe = gk20a_tegra_probe,
	535	.late_probe = gk20a_tegra_late_probe,
	536
	537	/* power management callbacks */
	538	.suspend = gk20a_tegra_suspend,
	539	.railgate = gk20a_tegra_railgate,
	540	.unrailgate = gk20a_tegra_unrailgate,
	541
	542	/* frequency scaling configuration */
	543	.prescale = gk20a_tegra_prescale,
	544	.postscale = gk20a_tegra_postscale,
	545	.devfreq_governor = "nvhost_podgov",
	546	.qos_id = PM_QOS_GPU_FREQ_MIN,
	547
	548	.channel_busy = gk20a_tegra_channel_busy,
	549	.channel_idle = gk20a_tegra_channel_idle,
	550	.secure_alloc = gk20a_tegra_secure_alloc,
	551	.dump_platform_dependencies = gk20a_tegra_debug_dump,
	552	};
	553
	554	struct platform_device tegra_gk20a_device = {
	555	.name = "gk20a",
	556	.resource = gk20a_tegra_resources,
	557	.num_resources = ARRAY_SIZE(gk20a_tegra_resources),
	558	.dev = {
	559	.platform_data = &gk20a_tegra_platform,
	560	},
	561	};