diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/Kconfig | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 35 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 33 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 17 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a.h | 11 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 |
17 files changed, 130 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig index e0b433c1..00fd3b08 100644 --- a/drivers/gpu/nvgpu/Kconfig +++ b/drivers/gpu/nvgpu/Kconfig | |||
@@ -8,7 +8,7 @@ config GK20A | |||
8 | config GK20A_DEFAULT_TIMEOUT | 8 | config GK20A_DEFAULT_TIMEOUT |
9 | depends on GK20A | 9 | depends on GK20A |
10 | int "Default timeout for submits" | 10 | int "Default timeout for submits" |
11 | default 5000 | 11 | default 3000 |
12 | help | 12 | help |
13 | Default timeout for jobs in milliseconds. Set to zero for no timeout. | 13 | Default timeout for jobs in milliseconds. Set to zero for no timeout. |
14 | 14 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index cd4e71bf..6d8633a7 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -1035,6 +1035,7 @@ __releases(&cde_app->mutex) | |||
1035 | goto exit_unlock; | 1035 | goto exit_unlock; |
1036 | } | 1036 | } |
1037 | 1037 | ||
1038 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
1038 | dma_buf_vunmap(compbits_scatter_buf, surface); | 1039 | dma_buf_vunmap(compbits_scatter_buf, surface); |
1039 | surface = NULL; | 1040 | surface = NULL; |
1040 | } | 1041 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 2dc8e9a0..92b4b3de 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -136,8 +136,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | |||
136 | u32 timeslice_timeout) | 136 | u32 timeslice_timeout) |
137 | { | 137 | { |
138 | void *inst_ptr; | 138 | void *inst_ptr; |
139 | struct gk20a_platform *platform = platform_get_drvdata(c->g->dev); | ||
139 | int shift = 3; | 140 | int shift = 3; |
140 | int value = timeslice_timeout; | 141 | int value = scale_ptimer(timeslice_timeout, |
142 | platform->ptimerscaling10x); | ||
141 | 143 | ||
142 | inst_ptr = c->inst_block.cpu_va; | 144 | inst_ptr = c->inst_block.cpu_va; |
143 | if (!inst_ptr) | 145 | if (!inst_ptr) |
@@ -1474,8 +1476,11 @@ bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | |||
1474 | 1476 | ||
1475 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch) | 1477 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch) |
1476 | { | 1478 | { |
1477 | if (ch->g->timeouts_enabled && ch->g->ch_wdt_enabled) | 1479 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); |
1478 | return NVGPU_CHANNEL_WATCHDOG_DEFAULT_TIMEOUT_MS; | 1480 | |
1481 | if (ch->g->timeouts_enabled && ch->g->ch_wdt_enabled && | ||
1482 | platform->ch_wdt_timeout_ms) | ||
1483 | return platform->ch_wdt_timeout_ms; | ||
1479 | else | 1484 | else |
1480 | return (u32)MAX_SCHEDULE_TIMEOUT; | 1485 | return (u32)MAX_SCHEDULE_TIMEOUT; |
1481 | } | 1486 | } |
@@ -1568,6 +1573,21 @@ static void gk20a_channel_timeout_stop(struct channel_gk20a *ch) | |||
1568 | mutex_unlock(&ch->timeout.lock); | 1573 | mutex_unlock(&ch->timeout.lock); |
1569 | } | 1574 | } |
1570 | 1575 | ||
1576 | void gk20a_channel_timeout_stop_all_channels(struct gk20a *g) | ||
1577 | { | ||
1578 | u32 chid; | ||
1579 | struct fifo_gk20a *f = &g->fifo; | ||
1580 | |||
1581 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1582 | struct channel_gk20a *ch = &f->channel[chid]; | ||
1583 | |||
1584 | if (gk20a_channel_get(ch)) { | ||
1585 | gk20a_channel_timeout_stop(ch); | ||
1586 | gk20a_channel_put(ch); | ||
1587 | } | ||
1588 | } | ||
1589 | } | ||
1590 | |||
1571 | static void gk20a_channel_timeout_handler(struct work_struct *work) | 1591 | static void gk20a_channel_timeout_handler(struct work_struct *work) |
1572 | { | 1592 | { |
1573 | struct channel_gk20a_job *job; | 1593 | struct channel_gk20a_job *job; |
@@ -2339,16 +2359,13 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch, | |||
2339 | /* set priority of graphics channel */ | 2359 | /* set priority of graphics channel */ |
2340 | switch (priority) { | 2360 | switch (priority) { |
2341 | case NVGPU_PRIORITY_LOW: | 2361 | case NVGPU_PRIORITY_LOW: |
2342 | /* 64 << 3 = 512us */ | 2362 | timeslice_timeout = ch->g->timeslice_low_priority_us; |
2343 | timeslice_timeout = 64; | ||
2344 | break; | 2363 | break; |
2345 | case NVGPU_PRIORITY_MEDIUM: | 2364 | case NVGPU_PRIORITY_MEDIUM: |
2346 | /* 128 << 3 = 1024us */ | 2365 | timeslice_timeout = ch->g->timeslice_medium_priority_us; |
2347 | timeslice_timeout = 128; | ||
2348 | break; | 2366 | break; |
2349 | case NVGPU_PRIORITY_HIGH: | 2367 | case NVGPU_PRIORITY_HIGH: |
2350 | /* 255 << 3 = 2048us */ | 2368 | timeslice_timeout = ch->g->timeslice_high_priority_us; |
2351 | timeslice_timeout = 255; | ||
2352 | break; | 2369 | break; |
2353 | default: | 2370 | default: |
2354 | pr_err("Unsupported priority"); | 2371 | pr_err("Unsupported priority"); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9d74b412..280c50b1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -38,8 +38,6 @@ struct gk20a_fence; | |||
38 | #include "gr_gk20a.h" | 38 | #include "gr_gk20a.h" |
39 | #include "fence_gk20a.h" | 39 | #include "fence_gk20a.h" |
40 | 40 | ||
41 | #define NVGPU_CHANNEL_WATCHDOG_DEFAULT_TIMEOUT_MS 5000 | ||
42 | |||
43 | struct gpfifo { | 41 | struct gpfifo { |
44 | u32 entry0; | 42 | u32 entry0; |
45 | u32 entry1; | 43 | u32 entry1; |
@@ -258,4 +256,5 @@ void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch); | |||
258 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | 256 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, |
259 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags); | 257 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags); |
260 | void channel_gk20a_enable(struct channel_gk20a *ch); | 258 | void channel_gk20a_enable(struct channel_gk20a *ch); |
259 | void gk20a_channel_timeout_stop_all_channels(struct gk20a *g); | ||
261 | #endif /* CHANNEL_GK20A_H */ | 260 | #endif /* CHANNEL_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index bda0dab0..1cac683d 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c | |||
@@ -418,6 +418,9 @@ void gk20a_debug_init(struct platform_device *pdev) | |||
418 | debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs, | 418 | debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs, |
419 | &gk20a_debug_trace_cmdbuf); | 419 | &gk20a_debug_trace_cmdbuf); |
420 | 420 | ||
421 | debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR, | ||
422 | platform->debugfs, &platform->ch_wdt_timeout_ms); | ||
423 | |||
421 | #if defined(GK20A_DEBUG) | 424 | #if defined(GK20A_DEBUG) |
422 | debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs, | 425 | debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs, |
423 | &gk20a_dbg_mask); | 426 | &gk20a_dbg_mask); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index f736fe8c..4f3363f2 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -344,6 +344,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) | |||
344 | u32 mask; | 344 | u32 mask; |
345 | u32 timeout; | 345 | u32 timeout; |
346 | int i; | 346 | int i; |
347 | struct gk20a_platform *platform = platform_get_drvdata(g->dev); | ||
347 | 348 | ||
348 | gk20a_dbg_fn(""); | 349 | gk20a_dbg_fn(""); |
349 | /* enable pmc pfifo */ | 350 | /* enable pmc pfifo */ |
@@ -408,8 +409,9 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) | |||
408 | if (g->ops.fifo.apply_pb_timeout) | 409 | if (g->ops.fifo.apply_pb_timeout) |
409 | g->ops.fifo.apply_pb_timeout(g); | 410 | g->ops.fifo.apply_pb_timeout(g); |
410 | 411 | ||
411 | timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US | | 412 | timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US; |
412 | fifo_eng_timeout_detection_enabled_f(); | 413 | timeout = scale_ptimer(timeout, platform->ptimerscaling10x); |
414 | timeout |= fifo_eng_timeout_detection_enabled_f(); | ||
413 | gk20a_writel(g, fifo_eng_timeout_r(), timeout); | 415 | gk20a_writel(g, fifo_eng_timeout_r(), timeout); |
414 | 416 | ||
415 | gk20a_dbg_fn("done"); | 417 | gk20a_dbg_fn("done"); |
@@ -1448,6 +1450,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) | |||
1448 | struct channel_gk20a *ch = &f->channel[id]; | 1450 | struct channel_gk20a *ch = &f->channel[id]; |
1449 | 1451 | ||
1450 | if (is_tsg) { | 1452 | if (is_tsg) { |
1453 | gk20a_channel_timeout_stop_all_channels(g); | ||
1451 | gk20a_fifo_recover(g, BIT(engine_id), id, true, | 1454 | gk20a_fifo_recover(g, BIT(engine_id), id, true, |
1452 | true, true); | 1455 | true, true); |
1453 | ret = true; | 1456 | ret = true; |
@@ -1465,6 +1468,11 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) | |||
1465 | "fifo sched ctxsw timeout error:" | 1468 | "fifo sched ctxsw timeout error:" |
1466 | "engine = %u, ch = %d", engine_id, id); | 1469 | "engine = %u, ch = %d", engine_id, id); |
1467 | gk20a_gr_debug_dump(g->dev); | 1470 | gk20a_gr_debug_dump(g->dev); |
1471 | /* | ||
1472 | * Cancel all channels' timeout since SCHED error might | ||
1473 | * trigger multiple watchdogs at a time | ||
1474 | */ | ||
1475 | gk20a_channel_timeout_stop_all_channels(g); | ||
1468 | gk20a_fifo_recover(g, BIT(engine_id), id, false, | 1476 | gk20a_fifo_recover(g, BIT(engine_id), id, false, |
1469 | true, ch->timeout_debug_dump); | 1477 | true, ch->timeout_debug_dump); |
1470 | ret = true; | 1478 | ret = true; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index c606e027..0cc29026 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/clk/tegra.h> | 40 | #include <linux/clk/tegra.h> |
41 | #include <linux/kthread.h> | 41 | #include <linux/kthread.h> |
42 | #include <linux/platform/tegra/common.h> | 42 | #include <linux/platform/tegra/common.h> |
43 | #include <linux/reset.h> | ||
43 | 44 | ||
44 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
45 | 46 | ||
@@ -1166,7 +1167,10 @@ static void gk20a_pm_shutdown(struct platform_device *pdev) | |||
1166 | #endif | 1167 | #endif |
1167 | 1168 | ||
1168 | /* Be ready for rail-gate after this point */ | 1169 | /* Be ready for rail-gate after this point */ |
1169 | gk20a_pm_prepare_poweroff(&pdev->dev); | 1170 | if (gk20a_gpu_is_virtual(pdev)) |
1171 | vgpu_pm_prepare_poweroff(&pdev->dev); | ||
1172 | else | ||
1173 | gk20a_pm_prepare_poweroff(&pdev->dev); | ||
1170 | } | 1174 | } |
1171 | 1175 | ||
1172 | #ifdef CONFIG_PM | 1176 | #ifdef CONFIG_PM |
@@ -1295,6 +1299,10 @@ static int gk20a_pm_init(struct platform_device *dev) | |||
1295 | if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) | 1299 | if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) |
1296 | err = gk20a_pm_initialise_domain(dev); | 1300 | err = gk20a_pm_initialise_domain(dev); |
1297 | 1301 | ||
1302 | platform->reset_control = devm_reset_control_get(&dev->dev, NULL); | ||
1303 | if (IS_ERR(platform->reset_control)) | ||
1304 | platform->reset_control = NULL; | ||
1305 | |||
1298 | return err; | 1306 | return err; |
1299 | } | 1307 | } |
1300 | 1308 | ||
@@ -1452,6 +1460,10 @@ static int gk20a_probe(struct platform_device *dev) | |||
1452 | gk20a->timeouts_enabled = true; | 1460 | gk20a->timeouts_enabled = true; |
1453 | gk20a->ch_wdt_enabled = true; | 1461 | gk20a->ch_wdt_enabled = true; |
1454 | 1462 | ||
1463 | gk20a->timeslice_low_priority_us = 1300; | ||
1464 | gk20a->timeslice_medium_priority_us = 2600; | ||
1465 | gk20a->timeslice_high_priority_us = 5200; | ||
1466 | |||
1455 | /* Set up initial power settings. For non-slicon platforms, disable * | 1467 | /* Set up initial power settings. For non-slicon platforms, disable * |
1456 | * power features and for silicon platforms, read from platform data */ | 1468 | * power features and for silicon platforms, read from platform data */ |
1457 | gk20a->slcg_enabled = | 1469 | gk20a->slcg_enabled = |
@@ -1504,6 +1516,25 @@ static int gk20a_probe(struct platform_device *dev) | |||
1504 | S_IRUGO|S_IWUSR, | 1516 | S_IRUGO|S_IWUSR, |
1505 | platform->debugfs, | 1517 | platform->debugfs, |
1506 | &gk20a->mm.disable_bigpage); | 1518 | &gk20a->mm.disable_bigpage); |
1519 | |||
1520 | gk20a->debugfs_timeslice_low_priority_us = | ||
1521 | debugfs_create_u32("timeslice_low_priority_us", | ||
1522 | S_IRUGO|S_IWUSR, | ||
1523 | platform->debugfs, | ||
1524 | &gk20a->timeslice_low_priority_us); | ||
1525 | |||
1526 | gk20a->debugfs_timeslice_medium_priority_us = | ||
1527 | debugfs_create_u32("timeslice_medium_priority_us", | ||
1528 | S_IRUGO|S_IWUSR, | ||
1529 | platform->debugfs, | ||
1530 | &gk20a->timeslice_medium_priority_us); | ||
1531 | |||
1532 | gk20a->debugfs_timeslice_high_priority_us = | ||
1533 | debugfs_create_u32("timeslice_high_priority_us", | ||
1534 | S_IRUGO|S_IWUSR, | ||
1535 | platform->debugfs, | ||
1536 | &gk20a->timeslice_high_priority_us); | ||
1537 | |||
1507 | gr_gk20a_debugfs_init(gk20a); | 1538 | gr_gk20a_debugfs_init(gk20a); |
1508 | gk20a_pmu_debugfs_init(dev); | 1539 | gk20a_pmu_debugfs_init(dev); |
1509 | gk20a_cde_debugfs_init(dev); | 1540 | gk20a_cde_debugfs_init(dev); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a5e130cb..bec2d2a0 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -215,6 +215,7 @@ struct gpu_ops { | |||
215 | void (*blcg_ltc_load_gating_prod)(struct gk20a *g, bool prod); | 215 | void (*blcg_ltc_load_gating_prod)(struct gk20a *g, bool prod); |
216 | void (*blcg_pwr_csb_load_gating_prod)(struct gk20a *g, bool prod); | 216 | void (*blcg_pwr_csb_load_gating_prod)(struct gk20a *g, bool prod); |
217 | void (*blcg_pmu_load_gating_prod)(struct gk20a *g, bool prod); | 217 | void (*blcg_pmu_load_gating_prod)(struct gk20a *g, bool prod); |
218 | void (*blcg_xbar_load_gating_prod)(struct gk20a *g, bool prod); | ||
218 | void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); | 219 | void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); |
219 | } clock_gating; | 220 | } clock_gating; |
220 | struct { | 221 | struct { |
@@ -490,6 +491,11 @@ struct gk20a { | |||
490 | u32 ch_wdt_enabled; | 491 | u32 ch_wdt_enabled; |
491 | struct mutex ch_wdt_lock; | 492 | struct mutex ch_wdt_lock; |
492 | 493 | ||
494 | /* Channel priorities */ | ||
495 | u32 timeslice_low_priority_us; | ||
496 | u32 timeslice_medium_priority_us; | ||
497 | u32 timeslice_high_priority_us; | ||
498 | |||
493 | bool slcg_enabled; | 499 | bool slcg_enabled; |
494 | bool blcg_enabled; | 500 | bool blcg_enabled; |
495 | bool elcg_enabled; | 501 | bool elcg_enabled; |
@@ -509,6 +515,10 @@ struct gk20a { | |||
509 | struct dentry *debugfs_bypass_smmu; | 515 | struct dentry *debugfs_bypass_smmu; |
510 | struct dentry *debugfs_disable_bigpage; | 516 | struct dentry *debugfs_disable_bigpage; |
511 | struct dentry *debugfs_gr_default_attrib_cb_size; | 517 | struct dentry *debugfs_gr_default_attrib_cb_size; |
518 | |||
519 | struct dentry * debugfs_timeslice_low_priority_us; | ||
520 | struct dentry * debugfs_timeslice_medium_priority_us; | ||
521 | struct dentry * debugfs_timeslice_high_priority_us; | ||
512 | #endif | 522 | #endif |
513 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; | 523 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; |
514 | 524 | ||
@@ -885,4 +895,11 @@ void gk20a_user_deinit(struct platform_device *dev); | |||
885 | 895 | ||
886 | extern void gk20a_debug_dump_device(struct platform_device *pdev); | 896 | extern void gk20a_debug_dump_device(struct platform_device *pdev); |
887 | 897 | ||
898 | static inline u32 scale_ptimer(u32 timeout , u32 scale10x) | ||
899 | { | ||
900 | if (((timeout*10) % scale10x) >= (scale10x/2)) | ||
901 | return ((timeout * 10) / scale10x) + 1; | ||
902 | else | ||
903 | return (timeout * 10) / scale10x; | ||
904 | } | ||
888 | #endif /* GK20A_H */ | 905 | #endif /* GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index 96a945f8..996fe221 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | |||
@@ -115,6 +115,9 @@ static ssize_t blcg_enable_store(struct device *device, | |||
115 | g->ops.clock_gating.blcg_ltc_load_gating_prod(g, g->blcg_enabled); | 115 | g->ops.clock_gating.blcg_ltc_load_gating_prod(g, g->blcg_enabled); |
116 | if (g->ops.clock_gating.blcg_pmu_load_gating_prod) | 116 | if (g->ops.clock_gating.blcg_pmu_load_gating_prod) |
117 | g->ops.clock_gating.blcg_pmu_load_gating_prod(g, g->blcg_enabled); | 117 | g->ops.clock_gating.blcg_pmu_load_gating_prod(g, g->blcg_enabled); |
118 | if (g->ops.clock_gating.blcg_xbar_load_gating_prod) | ||
119 | g->ops.clock_gating.blcg_xbar_load_gating_prod(g, | ||
120 | g->blcg_enabled); | ||
118 | gk20a_idle(g->dev); | 121 | gk20a_idle(g->dev); |
119 | 122 | ||
120 | dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" : | 123 | dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" : |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 19d9cffc..ea06bd20 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1622,12 +1622,12 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1622 | if (!ctx_ptr) | 1622 | if (!ctx_ptr) |
1623 | return -ENOMEM; | 1623 | return -ENOMEM; |
1624 | 1624 | ||
1625 | if (g->ops.gr.enable_cde_in_fecs && c->cde) | ||
1626 | g->ops.gr.enable_cde_in_fecs(ctx_ptr); | ||
1627 | |||
1628 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | 1625 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) |
1629 | gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); | 1626 | gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); |
1630 | 1627 | ||
1628 | if (g->ops.gr.enable_cde_in_fecs && c->cde) | ||
1629 | g->ops.gr.enable_cde_in_fecs(ctx_ptr); | ||
1630 | |||
1631 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); | 1631 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); |
1632 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); | 1632 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); |
1633 | 1633 | ||
@@ -4227,11 +4227,18 @@ static void gr_gk20a_load_gating_prod(struct gk20a *g) | |||
4227 | g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, | 4227 | g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, |
4228 | g->slcg_enabled); | 4228 | g->slcg_enabled); |
4229 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled); | 4229 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled); |
4230 | if (g->ops.clock_gating.slcg_xbar_load_gating_prod) | ||
4231 | g->ops.clock_gating.slcg_xbar_load_gating_prod(g, | ||
4232 | g->slcg_enabled); | ||
4230 | 4233 | ||
4234 | /* blcg prod values */ | ||
4231 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); | 4235 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); |
4232 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) | 4236 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) |
4233 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, | 4237 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, |
4234 | g->blcg_enabled); | 4238 | g->blcg_enabled); |
4239 | if (g->ops.clock_gating.blcg_xbar_load_gating_prod) | ||
4240 | g->ops.clock_gating.blcg_xbar_load_gating_prod(g, | ||
4241 | g->blcg_enabled); | ||
4235 | g->ops.clock_gating.pg_gr_load_gating_prod(g, true); | 4242 | g->ops.clock_gating.pg_gr_load_gating_prod(g, true); |
4236 | } | 4243 | } |
4237 | 4244 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b0c864d4..334f251c 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -2144,7 +2144,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2144 | struct scatterlist **sgl, | 2144 | struct scatterlist **sgl, |
2145 | u64 *offset, | 2145 | u64 *offset, |
2146 | u64 *iova, | 2146 | u64 *iova, |
2147 | u32 kind_v, u32 *ctag, | 2147 | u32 kind_v, u64 *ctag, |
2148 | bool cacheable, bool unammped_pte, | 2148 | bool cacheable, bool unammped_pte, |
2149 | int rw_flag, bool sparse, bool priv) | 2149 | int rw_flag, bool sparse, bool priv) |
2150 | { | 2150 | { |
@@ -2194,12 +2194,12 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2194 | struct scatterlist **sgl, | 2194 | struct scatterlist **sgl, |
2195 | u64 *offset, | 2195 | u64 *offset, |
2196 | u64 *iova, | 2196 | u64 *iova, |
2197 | u32 kind_v, u32 *ctag, | 2197 | u32 kind_v, u64 *ctag, |
2198 | bool cacheable, bool unmapped_pte, | 2198 | bool cacheable, bool unmapped_pte, |
2199 | int rw_flag, bool sparse, bool priv) | 2199 | int rw_flag, bool sparse, bool priv) |
2200 | { | 2200 | { |
2201 | struct gk20a *g = gk20a_from_vm(vm); | 2201 | struct gk20a *g = gk20a_from_vm(vm); |
2202 | u32 ctag_granularity = g->ops.fb.compression_page_size(g); | 2202 | u64 ctag_granularity = g->ops.fb.compression_page_size(g); |
2203 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 2203 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
2204 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | 2204 | u32 pte_w[2] = {0, 0}; /* invalid pte */ |
2205 | 2205 | ||
@@ -2218,7 +2218,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2218 | 2218 | ||
2219 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | 2219 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | |
2220 | gmmu_pte_kind_f(kind_v) | | 2220 | gmmu_pte_kind_f(kind_v) | |
2221 | gmmu_pte_comptagline_f(*ctag / ctag_granularity); | 2221 | gmmu_pte_comptagline_f((u32)(*ctag / ctag_granularity)); |
2222 | 2222 | ||
2223 | if (rw_flag == gk20a_mem_flag_read_only) { | 2223 | if (rw_flag == gk20a_mem_flag_read_only) { |
2224 | pte_w[0] |= gmmu_pte_read_only_true_f(); | 2224 | pte_w[0] |= gmmu_pte_read_only_true_f(); |
@@ -2244,7 +2244,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2244 | gk20a_dbg(gpu_dbg_pte, | 2244 | gk20a_dbg(gpu_dbg_pte, |
2245 | "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", | 2245 | "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", |
2246 | i, *iova, | 2246 | i, *iova, |
2247 | kind_v, *ctag / ctag_granularity, !cacheable, | 2247 | kind_v, (u32)(*ctag / ctag_granularity), !cacheable, |
2248 | pte_w[1], pte_w[0]); | 2248 | pte_w[1], pte_w[0]); |
2249 | 2249 | ||
2250 | if (*ctag) | 2250 | if (*ctag) |
@@ -2287,7 +2287,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2287 | u64 *offset, | 2287 | u64 *offset, |
2288 | u64 *iova, | 2288 | u64 *iova, |
2289 | u64 gpu_va, u64 gpu_end, | 2289 | u64 gpu_va, u64 gpu_end, |
2290 | u8 kind_v, u32 *ctag, | 2290 | u8 kind_v, u64 *ctag, |
2291 | bool cacheable, bool unmapped_pte, | 2291 | bool cacheable, bool unmapped_pte, |
2292 | int rw_flag, | 2292 | int rw_flag, |
2293 | bool sparse, | 2293 | bool sparse, |
@@ -2390,7 +2390,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2390 | { | 2390 | { |
2391 | struct gk20a *g = gk20a_from_vm(vm); | 2391 | struct gk20a *g = gk20a_from_vm(vm); |
2392 | int ctag_granularity = g->ops.fb.compression_page_size(g); | 2392 | int ctag_granularity = g->ops.fb.compression_page_size(g); |
2393 | u32 ctag = ctag_offset * ctag_granularity; | 2393 | u64 ctag = (u64)ctag_offset * (u64)ctag_granularity; |
2394 | u64 iova = 0; | 2394 | u64 iova = 0; |
2395 | u64 space_to_skip = buffer_offset; | 2395 | u64 space_to_skip = buffer_offset; |
2396 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | 2396 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 6786e3c2..c60f1bb7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -220,7 +220,7 @@ struct gk20a_mmu_level { | |||
220 | struct scatterlist **sgl, | 220 | struct scatterlist **sgl, |
221 | u64 *offset, | 221 | u64 *offset, |
222 | u64 *iova, | 222 | u64 *iova, |
223 | u32 kind_v, u32 *ctag, | 223 | u32 kind_v, u64 *ctag, |
224 | bool cacheable, bool unmapped_pte, | 224 | bool cacheable, bool unmapped_pte, |
225 | int rw_flag, bool sparse, bool priv); | 225 | int rw_flag, bool sparse, bool priv); |
226 | size_t entry_size; | 226 | size_t entry_size; |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index 29c88f44..0c3c6ff3 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Platform (SoC) Interface | 4 | * GK20A Platform (SoC) Interface |
5 | * | 5 | * |
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -55,6 +55,9 @@ struct gk20a_platform { | |||
55 | struct clk *clk[3]; | 55 | struct clk *clk[3]; |
56 | int num_clks; | 56 | int num_clks; |
57 | 57 | ||
58 | /* Reset control for device */ | ||
59 | struct reset_control *reset_control; | ||
60 | |||
58 | /* Delay before rail gated */ | 61 | /* Delay before rail gated */ |
59 | int railgate_delay; | 62 | int railgate_delay; |
60 | 63 | ||
@@ -76,6 +79,9 @@ struct gk20a_platform { | |||
76 | /* Adaptative ELPG: true = enable flase = disable */ | 79 | /* Adaptative ELPG: true = enable flase = disable */ |
77 | bool enable_aelpg; | 80 | bool enable_aelpg; |
78 | 81 | ||
82 | /* Timeout for per-channel watchdog (in mS) */ | ||
83 | u32 ch_wdt_timeout_ms; | ||
84 | |||
79 | /* Enable SMMU bypass by default */ | 85 | /* Enable SMMU bypass by default */ |
80 | bool bypass_smmu; | 86 | bool bypass_smmu; |
81 | 87 | ||
@@ -91,6 +97,9 @@ struct gk20a_platform { | |||
91 | /* Default big page size 64K or 128K */ | 97 | /* Default big page size 64K or 128K */ |
92 | u32 default_big_page_size; | 98 | u32 default_big_page_size; |
93 | 99 | ||
100 | /* scaling factor for ptimer */ | ||
101 | u32 ptimerscaling10x; | ||
102 | |||
94 | /* Initialize the platform interface of the gk20a driver. | 103 | /* Initialize the platform interface of the gk20a driver. |
95 | * | 104 | * |
96 | * The platform implementation of this function must | 105 | * The platform implementation of this function must |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index f8e1e3b7..2c9cfb63 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | |||
@@ -812,11 +812,14 @@ struct gk20a_platform gk20a_tegra_platform = { | |||
812 | .enable_elcg = true, | 812 | .enable_elcg = true, |
813 | .enable_elpg = true, | 813 | .enable_elpg = true, |
814 | .enable_aelpg = true, | 814 | .enable_aelpg = true, |
815 | .ptimerscaling10x = 26, | ||
815 | 816 | ||
816 | .force_reset_in_do_idle = false, | 817 | .force_reset_in_do_idle = false, |
817 | 818 | ||
818 | .default_big_page_size = SZ_128K, | 819 | .default_big_page_size = SZ_128K, |
819 | 820 | ||
821 | .ch_wdt_timeout_ms = 7000, | ||
822 | |||
820 | .probe = gk20a_tegra_probe, | 823 | .probe = gk20a_tegra_probe, |
821 | .late_probe = gk20a_tegra_late_probe, | 824 | .late_probe = gk20a_tegra_late_probe, |
822 | .remove = gk20a_tegra_remove, | 825 | .remove = gk20a_tegra_remove, |
@@ -858,11 +861,14 @@ struct gk20a_platform gm20b_tegra_platform = { | |||
858 | .enable_elcg = true, | 861 | .enable_elcg = true, |
859 | .enable_elpg = true, | 862 | .enable_elpg = true, |
860 | .enable_aelpg = true, | 863 | .enable_aelpg = true, |
864 | .ptimerscaling10x = 26, | ||
861 | 865 | ||
862 | .force_reset_in_do_idle = false, | 866 | .force_reset_in_do_idle = false, |
863 | 867 | ||
864 | .default_big_page_size = SZ_128K, | 868 | .default_big_page_size = SZ_128K, |
865 | 869 | ||
870 | .ch_wdt_timeout_ms = 5000, | ||
871 | |||
866 | .probe = gk20a_tegra_probe, | 872 | .probe = gk20a_tegra_probe, |
867 | .late_probe = gk20a_tegra_late_probe, | 873 | .late_probe = gk20a_tegra_late_probe, |
868 | .remove = gk20a_tegra_remove, | 874 | .remove = gk20a_tegra_remove, |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 4d459ef4..09230437 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -4179,7 +4179,6 @@ int gk20a_pmu_destroy(struct gk20a *g) | |||
4179 | g->pg_gating_cnt += gating_cnt; | 4179 | g->pg_gating_cnt += gating_cnt; |
4180 | 4180 | ||
4181 | mutex_lock(&pmu->isr_mutex); | 4181 | mutex_lock(&pmu->isr_mutex); |
4182 | pmu_enable(pmu, false); | ||
4183 | pmu->isr_enabled = false; | 4182 | pmu->isr_enabled = false; |
4184 | mutex_unlock(&pmu->isr_mutex); | 4183 | mutex_unlock(&pmu->isr_mutex); |
4185 | 4184 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index b8533f46..d590f566 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -1041,10 +1041,10 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g) | |||
1041 | #endif | 1041 | #endif |
1042 | } | 1042 | } |
1043 | 1043 | ||
1044 | void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) | 1044 | static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) |
1045 | { | 1045 | { |
1046 | u32 cde_v; | 1046 | u32 cde_v; |
1047 | 1047 | ||
1048 | cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); | 1048 | cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); |
1049 | cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); | 1049 | cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); |
1050 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); | 1050 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index e32f8943..edd70f13 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -120,7 +120,6 @@ int gm20b_init_hal(struct gk20a *g) | |||
120 | } | 120 | } |
121 | } | 121 | } |
122 | #endif | 122 | #endif |
123 | |||
124 | gm20b_init_mc(gops); | 123 | gm20b_init_mc(gops); |
125 | gm20b_init_ltc(gops); | 124 | gm20b_init_ltc(gops); |
126 | gm20b_init_gr(gops); | 125 | gm20b_init_gr(gops); |