/*
* drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
*
* GK20A Tegra Platform Interface
*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/of_platform.h>
#include <linux/debugfs.h>
#include <linux/tegra-powergate.h>
#include <linux/platform_data/tegra_edp.h>
#include <linux/nvhost_ioctl.h>
#include <linux/dma-buf.h>
#include <linux/nvmap.h>
#include <linux/tegra_pm_domains.h>
#include <mach/irqs.h>
#include "../../../arch/arm/mach-tegra/iomap.h"
#include "gk20a.h"
#include "hal_gk20a.h"
#include "platform_gk20a.h"
#include "gk20a_scale.h"
#define TEGRA_GK20A_INTR INT_GPU
#define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL
#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
extern struct device tegra_vpr_dev;
struct gk20a_platform t132_gk20a_tegra_platform;
struct gk20a_emc_params {
long emc_slope;
long emc_offset;
long emc_dip_slope;
long emc_dip_offset;
long emc_xmid;
bool linear;
};
/*
* 20.12 fixed point arithmetic
*/
static const int FXFRAC = 12;
static const int FX_HALF = (1 << 12) / 2;
#define INT_TO_FX(x) ((x) << FXFRAC)
#define FX_TO_INT(x) ((x) >> FXFRAC)
#define MHZ_TO_HZ(x) ((x) * 1000000)
#define HZ_TO_MHZ(x) ((x) / 1000000)
int FXMUL(int x, int y)
{
return ((long long) x * (long long) y) >> FXFRAC;
}
int FXDIV(int x, int y)
{
/* long long div operation not supported, must shift manually. This
* would have been
*
* return (((long long) x) << FXFRAC) / (long long) y;
*/
int pos, t;
if (x == 0)
return 0;
/* find largest allowable right shift to numerator, limit to FXFRAC */
t = x < 0 ? -x : x;
pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
if (pos > FXFRAC)
pos = FXFRAC;
y >>= FXFRAC - pos;
if (y == 0)
return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
return (x << pos) / y;
}
static void gk20a_tegra_secure_page_destroy(struct platform_device *pdev,
struct secure_page_buffer *secure_buffer)
{
dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
(void *)(uintptr_t)secure_buffer->iova,
secure_buffer->iova, &secure_buffer->attrs);
}
static int gk20a_tegra_secure_page_alloc(struct platform_device *pdev)
{
struct gk20a_platform *platform = platform_get_drvdata(pdev);
struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
DEFINE_DMA_ATTRS(attrs);
dma_addr_t iova;
size_t size = PAGE_SIZE;
(void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
DMA_MEMORY_NOMAP, &attrs);
if (dma_mapping_error(&tegra_vpr_dev, iova))
return -ENOMEM;
secure_buffer->size = size;
secure_buffer->iova = iova;
secure_buffer->attrs = attrs;
secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
return 0;
}
static void gk20a_tegra_secure_destroy(struct platform_device *pdev,
struct gr_ctx_buffer_desc *desc)
{
gk20a_free_sgtable(&desc->sgt);
dma_free_attrs(&tegra_vpr_dev, desc->size,
(void *)(uintptr_t)desc->iova,
desc->iova, &desc->attrs);
}
static int gk20a_tegra_secure_alloc(struct platform_device *pdev,
struct gr_ctx_buffer_desc *desc,
size_t size)
{
struct gk20a_platform *platform = platform_get_drvdata(pdev);
struct device *dev = &pdev->dev;
DEFINE_DMA_ATTRS(attrs);
dma_addr_t iova;
struct sg_table *sgt;
struct page *page;
int err = 0;
if (!platform->secure_alloc_ready)
return -EINVAL;
(void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
DMA_MEMORY_NOMAP, &attrs);
if (dma_mapping_error(&tegra_vpr_dev, iova))
return -ENOMEM;
desc->iova = iova;
desc->size = size;
desc->attrs = attrs;
desc->destroy = gk20a_tegra_secure_destroy;
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
if (!sgt) {
gk20a_err(dev, "failed to allocate memory\n");
goto fail;
}
err = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (err) {
gk20a_err(dev, "failed to allocate sg_table\n");
goto fail_sgt;
}
page = phys_to_page(iova);
sg_set_page(sgt->sgl, page, size, 0);
sg_dma_address(sgt->sgl) = iova;
desc->sgt = sgt;
return err;
fail_sgt:
kfree(sgt);
fail:
dma_free_attrs(&tegra_vpr_dev, desc->size,
(void *)(uintptr_t)&desc->iova,
desc->iova, &desc->attrs);
return err;
}
/*
* gk20a_tegra_get_emc_rate()
*
* This function returns the minimum emc clock based on gpu frequency
*/
long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
{
long hz;
freq = INT_TO_FX(HZ_TO_MHZ(freq));
hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
hz -= FXMUL(emc_params->emc_dip_slope,
FXMUL(freq - emc_params->emc_xmid,
freq - emc_params->emc_xmid)) +
emc_params->emc_dip_offset;
hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
hz = (hz < 0) ? 0 : hz;
return hz;
}
/*
* gk20a_tegra_postscale(profile, freq)
*
* This function sets emc frequency based on current gpu frequency
*/
static void gk20a_tegra_postscale(struct platform_device *pdev,
unsigned long freq)
{
struct gk20a_platform *platform = platform_get_drvdata(pdev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a_emc_params *emc_params = profile->private_data;
struct gk20a *g = get_gk20a(pdev);
long after = gk20a_clk_get_rate(g);
long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
clk_set_rate(platform->clk[2], emc_target);
}
/*
* gk20a_tegra_prescale(profile, freq)
*
* This function informs EDP about changed constraints.
*/
static void gk20a_tegra_prescale(struct platform_device *pdev)
{
struct gk20a *g = get_gk20a(pdev);
u32 avg = 0;
gk20a_pmu_load_norm(g, &avg);
tegra_edp_notify_gpu_load(avg);
}
/*
* gk20a_tegra_calibrate_emc()
*
* Compute emc scaling parameters
*
* Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
*
* Remc - 3d.emc rate
* R3d - 3d.cbus rate
* Rm - 3d.cbus 'middle' rate = (max + min)/2
* S - emc_slope
* O - emc_offset
* Sd - emc_dip_slope
* Od - emc_dip_offset
*
* this superposes a quadratic dip centered around the middle 3d
* frequency over a linear correlation of 3d.emc to 3d clock
* rates.
*
* S, O are chosen so that the maximum 3d rate produces the
* maximum 3d.emc rate exactly, and the minimum 3d rate produces
* at least the minimum 3d.emc rate.
*
* Sd and Od are chosen to produce the largest dip that will
* keep 3d.emc frequencies monotonously decreasing with 3d
* frequencies. To achieve this, the first derivative of Remc
* with respect to R3d should be zero for the minimal 3d rate:
*
* R'emc = S - 2 * Sd * (R3d - Rm)
* R'emc(R3d-min) = 0
* S = 2 * Sd * (R3d-min - Rm)
* = 2 * Sd * (R3d-min - R3d-max) / 2
*
* +------------------------------+
* | Sd = S / (R3d-min - R3d-max) |
* +------------------------------+
*
* dip = Sd * (R3d - Rm)^2 + Od
*
* requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
*
* Sd * (R3d-min - Rm)^2 + Od = 0
* Od = -Sd * ((R3d-min - R3d-max) / 2)^2
* = -Sd * ((R3d-min - R3d-max)^2) / 4
*
* +------------------------------+
* | Od = (emc-max - emc-min) / 4 |
* +------------------------------+
*
*/
void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
struct clk *clk_3d, struct clk *clk_3d_emc)
{
long correction;
unsigned long max_emc;
unsigned long min_emc;
unsigned long min_rate_3d;
unsigned long max_rate_3d;
max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
min_emc = clk_round_rate(clk_3d_emc, 0);
min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
min_rate_3d = clk_round_rate(clk_3d, 0);
min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
emc_params->emc_slope =
FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
emc_params->emc_offset = max_emc -
FXMUL(emc_params->emc_slope, max_rate_3d);
/* Guarantee max 3d rate maps to max emc rate */
emc_params->emc_offset += max_emc -
(FXMUL(emc_params->emc_slope, max_rate_3d) +
emc_params->emc_offset);
emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
emc_params->emc_dip_slope =
-FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
correction =
emc_params->emc_dip_offset +
FXMUL(emc_params->emc_dip_slope,
FXMUL(max_rate_3d - emc_params->emc_xmid,
max_rate_3d - emc_params->emc_xmid));
emc_params->emc_dip_offset -= correction;
}
/*
* gk20a_tegra_is_railgated()
*
* Check status of gk20a power rail
*/
static bool gk20a_tegra_is_railgated(struct platform_device *pdev)
{
return !tegra_powergate_is_powered(TEGRA_POWERGATE_GPU);
}
/*
* gk20a_tegra_railgate()
*
* Gate (disable) gk20a power rail
*/
static int gk20a_tegra_railgate(struct platform_device *pdev)
{
if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
tegra_powergate_partition(TEGRA_POWERGATE_GPU);
return 0;
}
/*
* gk20a_tegra_unrailgate()
*
* Ungate (enable) gk20a power rail
*/
static int gk20a_tegra_unrailgate(struct platform_device *pdev)
{
int ret;
ret = tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
return ret;
}
struct {
char *name;
unsigned long default_rate;
} tegra_gk20a_clocks[] = {
{"PLLG_ref", UINT_MAX},
{"pwr", 204000000},
{"emc", UINT_MAX} };
/*
* gk20a_tegra_get_clocks()
*
* This function finds clocks in tegra platform and populates
* the clock information to gk20a platform data.
*/
static int gk20a_tegra_get_clocks(struct platform_device *pdev)
{
struct gk20a_platform *platform = platform_get_drvdata(pdev);
char devname[16];
int i;
int ret = 0;
snprintf(devname, sizeof(devname),
(pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n",
pdev->name, pdev->id);
platform->num_clks = 0;
for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
long rate = tegra_gk20a_clocks[i].default_rate;
struct clk *c;
c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
if (IS_ERR(c)) {
ret = PTR_ERR(c);
goto err_get_clock;
}
rate = clk_round_rate(c, rate);
clk_set_rate(c, rate);
platform->clk[i] = c;
}
platform->num_clks = i;
return 0;
err_get_clock:
while (i--)
clk_put(platform->clk[i]);
return ret;
}
static void gk20a_tegra_scale_init(struct platform_device *pdev)
{
struct gk20a_platform *platform = gk20a_get_platform(pdev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a_emc_params *emc_params;
if (!profile)
return;
emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL);
if (!emc_params)
return;
gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
platform->clk[2]);
profile->private_data = emc_params;
}
static void gk20a_tegra_debug_dump(struct platform_device *pdev)
{
struct gk20a_platform *platform = gk20a_get_platform(pdev);
struct gk20a *g = platform->g;
nvhost_debug_dump_device(g->host1x_dev);
}
static int gk20a_tegra_probe(struct platform_device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct device_node *np = dev->dev.of_node;
const __be32 *host1x_ptr;
struct platform_device *host1x_pdev = NULL;
host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
if (host1x_ptr) {
struct device_node *host1x_node =
of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
host1x_pdev = of_find_device_by_node(host1x_node);
if (!host1x_pdev) {
dev_warn(&dev->dev, "host1x device not available");
return -EPROBE_DEFER;
}
} else {
host1x_pdev = to_platform_device(dev->dev.parent);
dev_warn(&dev->dev, "host1x reference not found. assuming host1x to be parent");
}
platform->g->host1x_dev = host1x_pdev;
if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) {
t132_gk20a_tegra_platform.g = platform->g;
*platform = t132_gk20a_tegra_platform;
}
gk20a_tegra_get_clocks(dev);
return 0;
}
static int gk20a_tegra_late_probe(struct platform_device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
/* Make gk20a power domain a subdomain of host1x */
nvhost_register_client_domain(&platform->g->pd);
/* Initialise tegra specific scaling quirks */
gk20a_tegra_scale_init(dev);
return 0;
}
static int gk20a_tegra_suspend(struct device *dev)
{
tegra_edp_notify_gpu_load(0);
return 0;
}
static struct resource gk20a_tegra_resources[] = {
{
.start = TEGRA_GK20A_BAR0_BASE,
.end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1,
.flags = IORESOURCE_MEM,
},
{
.start = TEGRA_GK20A_BAR1_BASE,
.end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1,
.flags = IORESOURCE_MEM,
},
{ /* Used on ASIM only */
.start = TEGRA_GK20A_SIM_BASE,
.end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1,
.flags = IORESOURCE_MEM,
},
{
.start = TEGRA_GK20A_INTR,
.end = TEGRA_GK20A_INTR,
.flags = IORESOURCE_IRQ,
},
{
.start = TEGRA_GK20A_INTR_NONSTALL,
.end = TEGRA_GK20A_INTR_NONSTALL,
.flags = IORESOURCE_IRQ,
},
};
struct gk20a_platform t132_gk20a_tegra_platform = {
.has_syncpoints = true,
/* power management configuration */
.railgate_delay = 500,
.clockgate_delay = 50,
.can_railgate = true,
.enable_slcg = true,
.enable_blcg = true,
.enable_elcg = true,
.enable_elpg = true,
.enable_aelpg = true,
.probe = gk20a_tegra_probe,
.late_probe = gk20a_tegra_late_probe,
/* power management callbacks */
.suspend = gk20a_tegra_suspend,
.railgate = gk20a_tegra_railgate,
.unrailgate = gk20a_tegra_unrailgate,
.is_railgated = gk20a_tegra_is_railgated,
/* frequency scaling configuration */
.prescale = gk20a_tegra_prescale,
.postscale = gk20a_tegra_postscale,
.devfreq_governor = "nvhost_podgov",
.qos_id = PM_QOS_GPU_FREQ_MIN,
.secure_alloc = gk20a_tegra_secure_alloc,
.secure_page_alloc = gk20a_tegra_secure_page_alloc,
.dump_platform_dependencies = gk20a_tegra_debug_dump,
};
struct gk20a_platform gk20a_tegra_platform = {
.has_syncpoints = true,
/* power management configuration */
.railgate_delay = 500,
.clockgate_delay = 50,
.can_railgate = true,
.enable_slcg = true,
.enable_blcg = true,
.enable_elcg = true,
.enable_elpg = true,
.enable_aelpg = true,
.probe = gk20a_tegra_probe,
.late_probe = gk20a_tegra_late_probe,
/* power management callbacks */
.suspend = gk20a_tegra_suspend,
.railgate = gk20a_tegra_railgate,
.unrailgate = gk20a_tegra_unrailgate,
.is_railgated = gk20a_tegra_is_railgated,
/* frequency scaling configuration */
.prescale = gk20a_tegra_prescale,
.postscale = gk20a_tegra_postscale,
.devfreq_governor = "nvhost_podgov",
.qos_id = PM_QOS_GPU_FREQ_MIN,
.secure_alloc = gk20a_tegra_secure_alloc,
.secure_page_alloc = gk20a_tegra_secure_page_alloc,
.dump_platform_dependencies = gk20a_tegra_debug_dump,
};
struct gk20a_platform gm20b_tegra_platform = {
.has_syncpoints = true,
/* power management configuration */
.railgate_delay = 500,
.clockgate_delay = 50,
/* Disable all power features for gm20b */
.can_railgate = false,
.enable_slcg = false,
.enable_blcg = false,
.enable_elcg = false,
.enable_elpg = false,
.enable_aelpg = false,
.probe = gk20a_tegra_probe,
.late_probe = gk20a_tegra_late_probe,
/* power management callbacks */
.suspend = gk20a_tegra_suspend,
/* frequency scaling configuration */
.prescale = gk20a_tegra_prescale,
.postscale = gk20a_tegra_postscale,
.devfreq_governor = "nvhost_podgov",
.qos_id = PM_QOS_GPU_FREQ_MIN,
.secure_alloc = gk20a_tegra_secure_alloc,
.secure_page_alloc = gk20a_tegra_secure_page_alloc,
.dump_platform_dependencies = gk20a_tegra_debug_dump,
};
struct platform_device tegra_gk20a_device = {
.name = "gk20a",
.resource = gk20a_tegra_resources,
.num_resources = ARRAY_SIZE(gk20a_tegra_resources),
.dev = {
.platform_data = &gk20a_tegra_platform,
},
};