summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/Kconfig2
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c35
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c33
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h17
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c14
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h11
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c4
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c1
17 files changed, 130 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig
index e0b433c1..00fd3b08 100644
--- a/drivers/gpu/nvgpu/Kconfig
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -8,7 +8,7 @@ config GK20A
8config GK20A_DEFAULT_TIMEOUT 8config GK20A_DEFAULT_TIMEOUT
9 depends on GK20A 9 depends on GK20A
10 int "Default timeout for submits" 10 int "Default timeout for submits"
11 default 5000 11 default 3000
12 help 12 help
13 Default timeout for jobs in milliseconds. Set to zero for no timeout. 13 Default timeout for jobs in milliseconds. Set to zero for no timeout.
14 14
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index cd4e71bf..6d8633a7 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1035,6 +1035,7 @@ __releases(&cde_app->mutex)
1035 goto exit_unlock; 1035 goto exit_unlock;
1036 } 1036 }
1037 1037
1038 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1038 dma_buf_vunmap(compbits_scatter_buf, surface); 1039 dma_buf_vunmap(compbits_scatter_buf, surface);
1039 surface = NULL; 1040 surface = NULL;
1040 } 1041 }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 2dc8e9a0..92b4b3de 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -136,8 +136,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
136 u32 timeslice_timeout) 136 u32 timeslice_timeout)
137{ 137{
138 void *inst_ptr; 138 void *inst_ptr;
139 struct gk20a_platform *platform = platform_get_drvdata(c->g->dev);
139 int shift = 3; 140 int shift = 3;
140 int value = timeslice_timeout; 141 int value = scale_ptimer(timeslice_timeout,
142 platform->ptimerscaling10x);
141 143
142 inst_ptr = c->inst_block.cpu_va; 144 inst_ptr = c->inst_block.cpu_va;
143 if (!inst_ptr) 145 if (!inst_ptr)
@@ -1474,8 +1476,11 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1474 1476
1475static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch) 1477static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch)
1476{ 1478{
1477 if (ch->g->timeouts_enabled && ch->g->ch_wdt_enabled) 1479 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
1478 return NVGPU_CHANNEL_WATCHDOG_DEFAULT_TIMEOUT_MS; 1480
1481 if (ch->g->timeouts_enabled && ch->g->ch_wdt_enabled &&
1482 platform->ch_wdt_timeout_ms)
1483 return platform->ch_wdt_timeout_ms;
1479 else 1484 else
1480 return (u32)MAX_SCHEDULE_TIMEOUT; 1485 return (u32)MAX_SCHEDULE_TIMEOUT;
1481} 1486}
@@ -1568,6 +1573,21 @@ static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
1568 mutex_unlock(&ch->timeout.lock); 1573 mutex_unlock(&ch->timeout.lock);
1569} 1574}
1570 1575
1576void gk20a_channel_timeout_stop_all_channels(struct gk20a *g)
1577{
1578 u32 chid;
1579 struct fifo_gk20a *f = &g->fifo;
1580
1581 for (chid = 0; chid < f->num_channels; chid++) {
1582 struct channel_gk20a *ch = &f->channel[chid];
1583
1584 if (gk20a_channel_get(ch)) {
1585 gk20a_channel_timeout_stop(ch);
1586 gk20a_channel_put(ch);
1587 }
1588 }
1589}
1590
1571static void gk20a_channel_timeout_handler(struct work_struct *work) 1591static void gk20a_channel_timeout_handler(struct work_struct *work)
1572{ 1592{
1573 struct channel_gk20a_job *job; 1593 struct channel_gk20a_job *job;
@@ -2339,16 +2359,13 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2339 /* set priority of graphics channel */ 2359 /* set priority of graphics channel */
2340 switch (priority) { 2360 switch (priority) {
2341 case NVGPU_PRIORITY_LOW: 2361 case NVGPU_PRIORITY_LOW:
2342 /* 64 << 3 = 512us */ 2362 timeslice_timeout = ch->g->timeslice_low_priority_us;
2343 timeslice_timeout = 64;
2344 break; 2363 break;
2345 case NVGPU_PRIORITY_MEDIUM: 2364 case NVGPU_PRIORITY_MEDIUM:
2346 /* 128 << 3 = 1024us */ 2365 timeslice_timeout = ch->g->timeslice_medium_priority_us;
2347 timeslice_timeout = 128;
2348 break; 2366 break;
2349 case NVGPU_PRIORITY_HIGH: 2367 case NVGPU_PRIORITY_HIGH:
2350 /* 255 << 3 = 2048us */ 2368 timeslice_timeout = ch->g->timeslice_high_priority_us;
2351 timeslice_timeout = 255;
2352 break; 2369 break;
2353 default: 2370 default:
2354 pr_err("Unsupported priority"); 2371 pr_err("Unsupported priority");
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 9d74b412..280c50b1 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -38,8 +38,6 @@ struct gk20a_fence;
38#include "gr_gk20a.h" 38#include "gr_gk20a.h"
39#include "fence_gk20a.h" 39#include "fence_gk20a.h"
40 40
41#define NVGPU_CHANNEL_WATCHDOG_DEFAULT_TIMEOUT_MS 5000
42
43struct gpfifo { 41struct gpfifo {
44 u32 entry0; 42 u32 entry0;
45 u32 entry1; 43 u32 entry1;
@@ -258,4 +256,5 @@ void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch);
258int channel_gk20a_setup_ramfc(struct channel_gk20a *c, 256int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
259 u64 gpfifo_base, u32 gpfifo_entries, u32 flags); 257 u64 gpfifo_base, u32 gpfifo_entries, u32 flags);
260void channel_gk20a_enable(struct channel_gk20a *ch); 258void channel_gk20a_enable(struct channel_gk20a *ch);
259void gk20a_channel_timeout_stop_all_channels(struct gk20a *g);
261#endif /* CHANNEL_GK20A_H */ 260#endif /* CHANNEL_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index bda0dab0..1cac683d 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -418,6 +418,9 @@ void gk20a_debug_init(struct platform_device *pdev)
418 debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs, 418 debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs,
419 &gk20a_debug_trace_cmdbuf); 419 &gk20a_debug_trace_cmdbuf);
420 420
421 debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
422 platform->debugfs, &platform->ch_wdt_timeout_ms);
423
421#if defined(GK20A_DEBUG) 424#if defined(GK20A_DEBUG)
422 debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs, 425 debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs,
423 &gk20a_dbg_mask); 426 &gk20a_dbg_mask);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index f736fe8c..4f3363f2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -344,6 +344,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
344 u32 mask; 344 u32 mask;
345 u32 timeout; 345 u32 timeout;
346 int i; 346 int i;
347 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
347 348
348 gk20a_dbg_fn(""); 349 gk20a_dbg_fn("");
349 /* enable pmc pfifo */ 350 /* enable pmc pfifo */
@@ -408,8 +409,9 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
408 if (g->ops.fifo.apply_pb_timeout) 409 if (g->ops.fifo.apply_pb_timeout)
409 g->ops.fifo.apply_pb_timeout(g); 410 g->ops.fifo.apply_pb_timeout(g);
410 411
411 timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US | 412 timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
412 fifo_eng_timeout_detection_enabled_f(); 413 timeout = scale_ptimer(timeout, platform->ptimerscaling10x);
414 timeout |= fifo_eng_timeout_detection_enabled_f();
413 gk20a_writel(g, fifo_eng_timeout_r(), timeout); 415 gk20a_writel(g, fifo_eng_timeout_r(), timeout);
414 416
415 gk20a_dbg_fn("done"); 417 gk20a_dbg_fn("done");
@@ -1448,6 +1450,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1448 struct channel_gk20a *ch = &f->channel[id]; 1450 struct channel_gk20a *ch = &f->channel[id];
1449 1451
1450 if (is_tsg) { 1452 if (is_tsg) {
1453 gk20a_channel_timeout_stop_all_channels(g);
1451 gk20a_fifo_recover(g, BIT(engine_id), id, true, 1454 gk20a_fifo_recover(g, BIT(engine_id), id, true,
1452 true, true); 1455 true, true);
1453 ret = true; 1456 ret = true;
@@ -1465,6 +1468,11 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1465 "fifo sched ctxsw timeout error:" 1468 "fifo sched ctxsw timeout error:"
1466 "engine = %u, ch = %d", engine_id, id); 1469 "engine = %u, ch = %d", engine_id, id);
1467 gk20a_gr_debug_dump(g->dev); 1470 gk20a_gr_debug_dump(g->dev);
1471 /*
1472 * Cancel all channels' timeout since SCHED error might
1473 * trigger multiple watchdogs at a time
1474 */
1475 gk20a_channel_timeout_stop_all_channels(g);
1468 gk20a_fifo_recover(g, BIT(engine_id), id, false, 1476 gk20a_fifo_recover(g, BIT(engine_id), id, false,
1469 true, ch->timeout_debug_dump); 1477 true, ch->timeout_debug_dump);
1470 ret = true; 1478 ret = true;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index c606e027..0cc29026 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -40,6 +40,7 @@
40#include <linux/clk/tegra.h> 40#include <linux/clk/tegra.h>
41#include <linux/kthread.h> 41#include <linux/kthread.h>
42#include <linux/platform/tegra/common.h> 42#include <linux/platform/tegra/common.h>
43#include <linux/reset.h>
43 44
44#include <linux/sched.h> 45#include <linux/sched.h>
45 46
@@ -1166,7 +1167,10 @@ static void gk20a_pm_shutdown(struct platform_device *pdev)
1166#endif 1167#endif
1167 1168
1168 /* Be ready for rail-gate after this point */ 1169 /* Be ready for rail-gate after this point */
1169 gk20a_pm_prepare_poweroff(&pdev->dev); 1170 if (gk20a_gpu_is_virtual(pdev))
1171 vgpu_pm_prepare_poweroff(&pdev->dev);
1172 else
1173 gk20a_pm_prepare_poweroff(&pdev->dev);
1170} 1174}
1171 1175
1172#ifdef CONFIG_PM 1176#ifdef CONFIG_PM
@@ -1295,6 +1299,10 @@ static int gk20a_pm_init(struct platform_device *dev)
1295 if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) 1299 if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
1296 err = gk20a_pm_initialise_domain(dev); 1300 err = gk20a_pm_initialise_domain(dev);
1297 1301
1302 platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
1303 if (IS_ERR(platform->reset_control))
1304 platform->reset_control = NULL;
1305
1298 return err; 1306 return err;
1299} 1307}
1300 1308
@@ -1452,6 +1460,10 @@ static int gk20a_probe(struct platform_device *dev)
1452 gk20a->timeouts_enabled = true; 1460 gk20a->timeouts_enabled = true;
1453 gk20a->ch_wdt_enabled = true; 1461 gk20a->ch_wdt_enabled = true;
1454 1462
1463 gk20a->timeslice_low_priority_us = 1300;
1464 gk20a->timeslice_medium_priority_us = 2600;
1465 gk20a->timeslice_high_priority_us = 5200;
1466
1455 /* Set up initial power settings. For non-slicon platforms, disable * 1467 /* Set up initial power settings. For non-slicon platforms, disable *
1456 * power features and for silicon platforms, read from platform data */ 1468 * power features and for silicon platforms, read from platform data */
1457 gk20a->slcg_enabled = 1469 gk20a->slcg_enabled =
@@ -1504,6 +1516,25 @@ static int gk20a_probe(struct platform_device *dev)
1504 S_IRUGO|S_IWUSR, 1516 S_IRUGO|S_IWUSR,
1505 platform->debugfs, 1517 platform->debugfs,
1506 &gk20a->mm.disable_bigpage); 1518 &gk20a->mm.disable_bigpage);
1519
1520 gk20a->debugfs_timeslice_low_priority_us =
1521 debugfs_create_u32("timeslice_low_priority_us",
1522 S_IRUGO|S_IWUSR,
1523 platform->debugfs,
1524 &gk20a->timeslice_low_priority_us);
1525
1526 gk20a->debugfs_timeslice_medium_priority_us =
1527 debugfs_create_u32("timeslice_medium_priority_us",
1528 S_IRUGO|S_IWUSR,
1529 platform->debugfs,
1530 &gk20a->timeslice_medium_priority_us);
1531
1532 gk20a->debugfs_timeslice_high_priority_us =
1533 debugfs_create_u32("timeslice_high_priority_us",
1534 S_IRUGO|S_IWUSR,
1535 platform->debugfs,
1536 &gk20a->timeslice_high_priority_us);
1537
1507 gr_gk20a_debugfs_init(gk20a); 1538 gr_gk20a_debugfs_init(gk20a);
1508 gk20a_pmu_debugfs_init(dev); 1539 gk20a_pmu_debugfs_init(dev);
1509 gk20a_cde_debugfs_init(dev); 1540 gk20a_cde_debugfs_init(dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a5e130cb..bec2d2a0 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -215,6 +215,7 @@ struct gpu_ops {
215 void (*blcg_ltc_load_gating_prod)(struct gk20a *g, bool prod); 215 void (*blcg_ltc_load_gating_prod)(struct gk20a *g, bool prod);
216 void (*blcg_pwr_csb_load_gating_prod)(struct gk20a *g, bool prod); 216 void (*blcg_pwr_csb_load_gating_prod)(struct gk20a *g, bool prod);
217 void (*blcg_pmu_load_gating_prod)(struct gk20a *g, bool prod); 217 void (*blcg_pmu_load_gating_prod)(struct gk20a *g, bool prod);
218 void (*blcg_xbar_load_gating_prod)(struct gk20a *g, bool prod);
218 void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); 219 void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod);
219 } clock_gating; 220 } clock_gating;
220 struct { 221 struct {
@@ -490,6 +491,11 @@ struct gk20a {
490 u32 ch_wdt_enabled; 491 u32 ch_wdt_enabled;
491 struct mutex ch_wdt_lock; 492 struct mutex ch_wdt_lock;
492 493
494 /* Channel priorities */
495 u32 timeslice_low_priority_us;
496 u32 timeslice_medium_priority_us;
497 u32 timeslice_high_priority_us;
498
493 bool slcg_enabled; 499 bool slcg_enabled;
494 bool blcg_enabled; 500 bool blcg_enabled;
495 bool elcg_enabled; 501 bool elcg_enabled;
@@ -509,6 +515,10 @@ struct gk20a {
509 struct dentry *debugfs_bypass_smmu; 515 struct dentry *debugfs_bypass_smmu;
510 struct dentry *debugfs_disable_bigpage; 516 struct dentry *debugfs_disable_bigpage;
511 struct dentry *debugfs_gr_default_attrib_cb_size; 517 struct dentry *debugfs_gr_default_attrib_cb_size;
518
519 struct dentry * debugfs_timeslice_low_priority_us;
520 struct dentry * debugfs_timeslice_medium_priority_us;
521 struct dentry * debugfs_timeslice_high_priority_us;
512#endif 522#endif
513 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; 523 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
514 524
@@ -885,4 +895,11 @@ void gk20a_user_deinit(struct platform_device *dev);
885 895
886extern void gk20a_debug_dump_device(struct platform_device *pdev); 896extern void gk20a_debug_dump_device(struct platform_device *pdev);
887 897
898static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
899{
900 if (((timeout*10) % scale10x) >= (scale10x/2))
901 return ((timeout * 10) / scale10x) + 1;
902 else
903 return (timeout * 10) / scale10x;
904}
888#endif /* GK20A_H */ 905#endif /* GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
index 96a945f8..996fe221 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -115,6 +115,9 @@ static ssize_t blcg_enable_store(struct device *device,
115 g->ops.clock_gating.blcg_ltc_load_gating_prod(g, g->blcg_enabled); 115 g->ops.clock_gating.blcg_ltc_load_gating_prod(g, g->blcg_enabled);
116 if (g->ops.clock_gating.blcg_pmu_load_gating_prod) 116 if (g->ops.clock_gating.blcg_pmu_load_gating_prod)
117 g->ops.clock_gating.blcg_pmu_load_gating_prod(g, g->blcg_enabled); 117 g->ops.clock_gating.blcg_pmu_load_gating_prod(g, g->blcg_enabled);
118 if (g->ops.clock_gating.blcg_xbar_load_gating_prod)
119 g->ops.clock_gating.blcg_xbar_load_gating_prod(g,
120 g->blcg_enabled);
118 gk20a_idle(g->dev); 121 gk20a_idle(g->dev);
119 122
120 dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" : 123 dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" :
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 19d9cffc..ea06bd20 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1622,12 +1622,12 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1622 if (!ctx_ptr) 1622 if (!ctx_ptr)
1623 return -ENOMEM; 1623 return -ENOMEM;
1624 1624
1625 if (g->ops.gr.enable_cde_in_fecs && c->cde)
1626 g->ops.gr.enable_cde_in_fecs(ctx_ptr);
1627
1628 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1625 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1629 gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); 1626 gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);
1630 1627
1628 if (g->ops.gr.enable_cde_in_fecs && c->cde)
1629 g->ops.gr.enable_cde_in_fecs(ctx_ptr);
1630
1631 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1631 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
1632 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1632 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
1633 1633
@@ -4227,11 +4227,18 @@ static void gr_gk20a_load_gating_prod(struct gk20a *g)
4227 g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, 4227 g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g,
4228 g->slcg_enabled); 4228 g->slcg_enabled);
4229 g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled); 4229 g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled);
4230 if (g->ops.clock_gating.slcg_xbar_load_gating_prod)
4231 g->ops.clock_gating.slcg_xbar_load_gating_prod(g,
4232 g->slcg_enabled);
4230 4233
4234 /* blcg prod values */
4231 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); 4235 g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
4232 if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) 4236 if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod)
4233 g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, 4237 g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g,
4234 g->blcg_enabled); 4238 g->blcg_enabled);
4239 if (g->ops.clock_gating.blcg_xbar_load_gating_prod)
4240 g->ops.clock_gating.blcg_xbar_load_gating_prod(g,
4241 g->blcg_enabled);
4235 g->ops.clock_gating.pg_gr_load_gating_prod(g, true); 4242 g->ops.clock_gating.pg_gr_load_gating_prod(g, true);
4236} 4243}
4237 4244
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b0c864d4..334f251c 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2144,7 +2144,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2144 struct scatterlist **sgl, 2144 struct scatterlist **sgl,
2145 u64 *offset, 2145 u64 *offset,
2146 u64 *iova, 2146 u64 *iova,
2147 u32 kind_v, u32 *ctag, 2147 u32 kind_v, u64 *ctag,
2148 bool cacheable, bool unammped_pte, 2148 bool cacheable, bool unammped_pte,
2149 int rw_flag, bool sparse, bool priv) 2149 int rw_flag, bool sparse, bool priv)
2150{ 2150{
@@ -2194,12 +2194,12 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2194 struct scatterlist **sgl, 2194 struct scatterlist **sgl,
2195 u64 *offset, 2195 u64 *offset,
2196 u64 *iova, 2196 u64 *iova,
2197 u32 kind_v, u32 *ctag, 2197 u32 kind_v, u64 *ctag,
2198 bool cacheable, bool unmapped_pte, 2198 bool cacheable, bool unmapped_pte,
2199 int rw_flag, bool sparse, bool priv) 2199 int rw_flag, bool sparse, bool priv)
2200{ 2200{
2201 struct gk20a *g = gk20a_from_vm(vm); 2201 struct gk20a *g = gk20a_from_vm(vm);
2202 u32 ctag_granularity = g->ops.fb.compression_page_size(g); 2202 u64 ctag_granularity = g->ops.fb.compression_page_size(g);
2203 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 2203 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
2204 u32 pte_w[2] = {0, 0}; /* invalid pte */ 2204 u32 pte_w[2] = {0, 0}; /* invalid pte */
2205 2205
@@ -2218,7 +2218,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2218 2218
2219 pte_w[1] = gmmu_pte_aperture_video_memory_f() | 2219 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
2220 gmmu_pte_kind_f(kind_v) | 2220 gmmu_pte_kind_f(kind_v) |
2221 gmmu_pte_comptagline_f(*ctag / ctag_granularity); 2221 gmmu_pte_comptagline_f((u32)(*ctag / ctag_granularity));
2222 2222
2223 if (rw_flag == gk20a_mem_flag_read_only) { 2223 if (rw_flag == gk20a_mem_flag_read_only) {
2224 pte_w[0] |= gmmu_pte_read_only_true_f(); 2224 pte_w[0] |= gmmu_pte_read_only_true_f();
@@ -2244,7 +2244,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2244 gk20a_dbg(gpu_dbg_pte, 2244 gk20a_dbg(gpu_dbg_pte,
2245 "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", 2245 "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]",
2246 i, *iova, 2246 i, *iova,
2247 kind_v, *ctag / ctag_granularity, !cacheable, 2247 kind_v, (u32)(*ctag / ctag_granularity), !cacheable,
2248 pte_w[1], pte_w[0]); 2248 pte_w[1], pte_w[0]);
2249 2249
2250 if (*ctag) 2250 if (*ctag)
@@ -2287,7 +2287,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2287 u64 *offset, 2287 u64 *offset,
2288 u64 *iova, 2288 u64 *iova,
2289 u64 gpu_va, u64 gpu_end, 2289 u64 gpu_va, u64 gpu_end,
2290 u8 kind_v, u32 *ctag, 2290 u8 kind_v, u64 *ctag,
2291 bool cacheable, bool unmapped_pte, 2291 bool cacheable, bool unmapped_pte,
2292 int rw_flag, 2292 int rw_flag,
2293 bool sparse, 2293 bool sparse,
@@ -2390,7 +2390,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2390{ 2390{
2391 struct gk20a *g = gk20a_from_vm(vm); 2391 struct gk20a *g = gk20a_from_vm(vm);
2392 int ctag_granularity = g->ops.fb.compression_page_size(g); 2392 int ctag_granularity = g->ops.fb.compression_page_size(g);
2393 u32 ctag = ctag_offset * ctag_granularity; 2393 u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
2394 u64 iova = 0; 2394 u64 iova = 0;
2395 u64 space_to_skip = buffer_offset; 2395 u64 space_to_skip = buffer_offset;
2396 u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; 2396 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 6786e3c2..c60f1bb7 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -220,7 +220,7 @@ struct gk20a_mmu_level {
220 struct scatterlist **sgl, 220 struct scatterlist **sgl,
221 u64 *offset, 221 u64 *offset,
222 u64 *iova, 222 u64 *iova,
223 u32 kind_v, u32 *ctag, 223 u32 kind_v, u64 *ctag,
224 bool cacheable, bool unmapped_pte, 224 bool cacheable, bool unmapped_pte,
225 int rw_flag, bool sparse, bool priv); 225 int rw_flag, bool sparse, bool priv);
226 size_t entry_size; 226 size_t entry_size;
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index 29c88f44..0c3c6ff3 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Platform (SoC) Interface 4 * GK20A Platform (SoC) Interface
5 * 5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -55,6 +55,9 @@ struct gk20a_platform {
55 struct clk *clk[3]; 55 struct clk *clk[3];
56 int num_clks; 56 int num_clks;
57 57
58 /* Reset control for device */
59 struct reset_control *reset_control;
60
58 /* Delay before rail gated */ 61 /* Delay before rail gated */
59 int railgate_delay; 62 int railgate_delay;
60 63
@@ -76,6 +79,9 @@ struct gk20a_platform {
76 /* Adaptative ELPG: true = enable flase = disable */ 79 /* Adaptative ELPG: true = enable flase = disable */
77 bool enable_aelpg; 80 bool enable_aelpg;
78 81
82 /* Timeout for per-channel watchdog (in mS) */
83 u32 ch_wdt_timeout_ms;
84
79 /* Enable SMMU bypass by default */ 85 /* Enable SMMU bypass by default */
80 bool bypass_smmu; 86 bool bypass_smmu;
81 87
@@ -91,6 +97,9 @@ struct gk20a_platform {
91 /* Default big page size 64K or 128K */ 97 /* Default big page size 64K or 128K */
92 u32 default_big_page_size; 98 u32 default_big_page_size;
93 99
100 /* scaling factor for ptimer */
101 u32 ptimerscaling10x;
102
94 /* Initialize the platform interface of the gk20a driver. 103 /* Initialize the platform interface of the gk20a driver.
95 * 104 *
96 * The platform implementation of this function must 105 * The platform implementation of this function must
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index f8e1e3b7..2c9cfb63 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -812,11 +812,14 @@ struct gk20a_platform gk20a_tegra_platform = {
812 .enable_elcg = true, 812 .enable_elcg = true,
813 .enable_elpg = true, 813 .enable_elpg = true,
814 .enable_aelpg = true, 814 .enable_aelpg = true,
815 .ptimerscaling10x = 26,
815 816
816 .force_reset_in_do_idle = false, 817 .force_reset_in_do_idle = false,
817 818
818 .default_big_page_size = SZ_128K, 819 .default_big_page_size = SZ_128K,
819 820
821 .ch_wdt_timeout_ms = 7000,
822
820 .probe = gk20a_tegra_probe, 823 .probe = gk20a_tegra_probe,
821 .late_probe = gk20a_tegra_late_probe, 824 .late_probe = gk20a_tegra_late_probe,
822 .remove = gk20a_tegra_remove, 825 .remove = gk20a_tegra_remove,
@@ -858,11 +861,14 @@ struct gk20a_platform gm20b_tegra_platform = {
858 .enable_elcg = true, 861 .enable_elcg = true,
859 .enable_elpg = true, 862 .enable_elpg = true,
860 .enable_aelpg = true, 863 .enable_aelpg = true,
864 .ptimerscaling10x = 26,
861 865
862 .force_reset_in_do_idle = false, 866 .force_reset_in_do_idle = false,
863 867
864 .default_big_page_size = SZ_128K, 868 .default_big_page_size = SZ_128K,
865 869
870 .ch_wdt_timeout_ms = 5000,
871
866 .probe = gk20a_tegra_probe, 872 .probe = gk20a_tegra_probe,
867 .late_probe = gk20a_tegra_late_probe, 873 .late_probe = gk20a_tegra_late_probe,
868 .remove = gk20a_tegra_remove, 874 .remove = gk20a_tegra_remove,
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 4d459ef4..09230437 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -4179,7 +4179,6 @@ int gk20a_pmu_destroy(struct gk20a *g)
4179 g->pg_gating_cnt += gating_cnt; 4179 g->pg_gating_cnt += gating_cnt;
4180 4180
4181 mutex_lock(&pmu->isr_mutex); 4181 mutex_lock(&pmu->isr_mutex);
4182 pmu_enable(pmu, false);
4183 pmu->isr_enabled = false; 4182 pmu->isr_enabled = false;
4184 mutex_unlock(&pmu->isr_mutex); 4183 mutex_unlock(&pmu->isr_mutex);
4185 4184
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index b8533f46..d590f566 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1041,10 +1041,10 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g)
1041#endif 1041#endif
1042} 1042}
1043 1043
1044void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) 1044static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr)
1045{ 1045{
1046 u32 cde_v; 1046 u32 cde_v;
1047 1047
1048 cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); 1048 cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0);
1049 cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); 1049 cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f();
1050 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); 1050 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v);
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index e32f8943..edd70f13 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -120,7 +120,6 @@ int gm20b_init_hal(struct gk20a *g)
120 } 120 }
121 } 121 }
122#endif 122#endif
123
124 gm20b_init_mc(gops); 123 gm20b_init_mc(gops);
125 gm20b_init_ltc(gops); 124 gm20b_init_ltc(gops);
126 gm20b_init_gr(gops); 125 gm20b_init_gr(gops);