diff options
author | Vijayakumar <vsubbu@nvidia.com> | 2015-02-10 04:48:54 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:06:41 -0400 |
commit | aa96b6bd1efa1e26a757080137486884972d248c (patch) | |
tree | 3e64695e63ba15462f325a1bd222c5b702326965 /drivers/gpu/nvgpu/gk20a | |
parent | dbc46f0bf2dc4f6f03f53427fe0595fd8909e2db (diff) |
gpu: nvgpu: optimize fecs status polling
bug 200078367
using udelay for fecs status polling
during GR init phase brings down fecs
transaction time to < 20usec from few
hundred usec.
Change-Id: I61a27daaf1187ac086a42779b46aa3fbee3b37f2
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/691918
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 44 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 |
2 files changed, 29 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 8869f4c4..c6f4a336 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -365,16 +365,19 @@ static int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long end_jiffies, | |||
365 | static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | 365 | static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, |
366 | u32 *mailbox_ret, u32 opc_success, | 366 | u32 *mailbox_ret, u32 opc_success, |
367 | u32 mailbox_ok, u32 opc_fail, | 367 | u32 mailbox_ok, u32 opc_fail, |
368 | u32 mailbox_fail) | 368 | u32 mailbox_fail, bool sleepduringwait) |
369 | { | 369 | { |
370 | unsigned long end_jiffies = jiffies + | 370 | unsigned long end_jiffies = jiffies + |
371 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | 371 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); |
372 | u32 delay = GR_IDLE_CHECK_DEFAULT; | 372 | u32 delay = GR_FECS_POLL_INTERVAL; |
373 | u32 check = WAIT_UCODE_LOOP; | 373 | u32 check = WAIT_UCODE_LOOP; |
374 | u32 reg; | 374 | u32 reg; |
375 | 375 | ||
376 | gk20a_dbg_fn(""); | 376 | gk20a_dbg_fn(""); |
377 | 377 | ||
378 | if (sleepduringwait) | ||
379 | delay = GR_IDLE_CHECK_DEFAULT; | ||
380 | |||
378 | while (check == WAIT_UCODE_LOOP) { | 381 | while (check == WAIT_UCODE_LOOP) { |
379 | if (!time_before(jiffies, end_jiffies) && | 382 | if (!time_before(jiffies, end_jiffies) && |
380 | tegra_platform_is_silicon()) | 383 | tegra_platform_is_silicon()) |
@@ -448,8 +451,11 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | |||
448 | break; | 451 | break; |
449 | } | 452 | } |
450 | 453 | ||
451 | usleep_range(delay, delay * 2); | 454 | if (sleepduringwait) { |
452 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | 455 | usleep_range(delay, delay * 2); |
456 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
457 | } else | ||
458 | udelay(delay); | ||
453 | } | 459 | } |
454 | 460 | ||
455 | if (check == WAIT_UCODE_TIMEOUT) { | 461 | if (check == WAIT_UCODE_TIMEOUT) { |
@@ -472,7 +478,8 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | |||
472 | /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) | 478 | /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) |
473 | * We should replace most, if not all, fecs method calls to this instead. */ | 479 | * We should replace most, if not all, fecs method calls to this instead. */ |
474 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | 480 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, |
475 | struct fecs_method_op_gk20a op) | 481 | struct fecs_method_op_gk20a op, |
482 | bool sleepduringwait) | ||
476 | { | 483 | { |
477 | struct gr_gk20a *gr = &g->gr; | 484 | struct gr_gk20a *gr = &g->gr; |
478 | int ret; | 485 | int ret; |
@@ -497,7 +504,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
497 | 504 | ||
498 | ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, | 505 | ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, |
499 | op.cond.ok, op.mailbox.ok, | 506 | op.cond.ok, op.mailbox.ok, |
500 | op.cond.fail, op.mailbox.fail); | 507 | op.cond.fail, op.mailbox.fail, |
508 | sleepduringwait); | ||
501 | 509 | ||
502 | mutex_unlock(&gr->fecs_mutex); | 510 | mutex_unlock(&gr->fecs_mutex); |
503 | 511 | ||
@@ -515,7 +523,7 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret) | |||
515 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), | 523 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), |
516 | .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, | 524 | .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, |
517 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | 525 | .cond.ok = GR_IS_UCODE_OP_EQUAL, |
518 | .cond.fail = GR_IS_UCODE_OP_EQUAL }); | 526 | .cond.fail = GR_IS_UCODE_OP_EQUAL }, true); |
519 | } | 527 | } |
520 | 528 | ||
521 | /* Stop processing (stall) context switches at FECS. | 529 | /* Stop processing (stall) context switches at FECS. |
@@ -548,7 +556,7 @@ int gr_gk20a_halt_pipe(struct gk20a *g) | |||
548 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), | 556 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), |
549 | .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, | 557 | .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, |
550 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | 558 | .cond.ok = GR_IS_UCODE_OP_EQUAL, |
551 | .cond.fail = GR_IS_UCODE_OP_EQUAL }); | 559 | .cond.fail = GR_IS_UCODE_OP_EQUAL }, false); |
552 | } | 560 | } |
553 | 561 | ||
554 | 562 | ||
@@ -686,7 +694,7 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | |||
686 | .ok = 0x10, | 694 | .ok = 0x10, |
687 | .fail = 0x20, }, | 695 | .fail = 0x20, }, |
688 | .cond.ok = GR_IS_UCODE_OP_AND, | 696 | .cond.ok = GR_IS_UCODE_OP_AND, |
689 | .cond.fail = GR_IS_UCODE_OP_AND}); | 697 | .cond.fail = GR_IS_UCODE_OP_AND}, true); |
690 | if (ret) | 698 | if (ret) |
691 | gk20a_err(dev_from_gk20a(g), | 699 | gk20a_err(dev_from_gk20a(g), |
692 | "bind channel instance failed"); | 700 | "bind channel instance failed"); |
@@ -1382,7 +1390,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) | |||
1382 | }, | 1390 | }, |
1383 | .cond.ok = GR_IS_UCODE_OP_AND, | 1391 | .cond.ok = GR_IS_UCODE_OP_AND, |
1384 | .cond.fail = GR_IS_UCODE_OP_AND, | 1392 | .cond.fail = GR_IS_UCODE_OP_AND, |
1385 | }); | 1393 | }, true); |
1386 | 1394 | ||
1387 | if (ret) | 1395 | if (ret) |
1388 | gk20a_err(dev_from_gk20a(g), "save context image failed"); | 1396 | gk20a_err(dev_from_gk20a(g), "save context image failed"); |
@@ -1668,7 +1676,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1668 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), | 1676 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), |
1669 | .fail = 0}, | 1677 | .fail = 0}, |
1670 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | 1678 | .cond.ok = GR_IS_UCODE_OP_EQUAL, |
1671 | .cond.fail = GR_IS_UCODE_OP_SKIP}); | 1679 | .cond.fail = GR_IS_UCODE_OP_SKIP}, false); |
1672 | 1680 | ||
1673 | if (ret) | 1681 | if (ret) |
1674 | gk20a_err(dev_from_gk20a(g), | 1682 | gk20a_err(dev_from_gk20a(g), |
@@ -2145,7 +2153,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) | |||
2145 | ret = gr_gk20a_ctx_wait_ucode(g, 0, NULL, | 2153 | ret = gr_gk20a_ctx_wait_ucode(g, 0, NULL, |
2146 | GR_IS_UCODE_OP_EQUAL, | 2154 | GR_IS_UCODE_OP_EQUAL, |
2147 | eUcodeHandshakeInitComplete, | 2155 | eUcodeHandshakeInitComplete, |
2148 | GR_IS_UCODE_OP_SKIP, 0); | 2156 | GR_IS_UCODE_OP_SKIP, 0, false); |
2149 | if (ret) { | 2157 | if (ret) { |
2150 | gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout"); | 2158 | gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout"); |
2151 | return ret; | 2159 | return ret; |
@@ -2181,7 +2189,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) | |||
2181 | op.method.addr = | 2189 | op.method.addr = |
2182 | gr_fecs_method_push_adr_discover_image_size_v(); | 2190 | gr_fecs_method_push_adr_discover_image_size_v(); |
2183 | op.mailbox.ret = &g->gr.ctx_vars.golden_image_size; | 2191 | op.mailbox.ret = &g->gr.ctx_vars.golden_image_size; |
2184 | ret = gr_gk20a_submit_fecs_method_op(g, op); | 2192 | ret = gr_gk20a_submit_fecs_method_op(g, op, false); |
2185 | if (ret) { | 2193 | if (ret) { |
2186 | gk20a_err(dev_from_gk20a(g), | 2194 | gk20a_err(dev_from_gk20a(g), |
2187 | "query golden image size failed"); | 2195 | "query golden image size failed"); |
@@ -2190,7 +2198,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) | |||
2190 | op.method.addr = | 2198 | op.method.addr = |
2191 | gr_fecs_method_push_adr_discover_zcull_image_size_v(); | 2199 | gr_fecs_method_push_adr_discover_zcull_image_size_v(); |
2192 | op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size; | 2200 | op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size; |
2193 | ret = gr_gk20a_submit_fecs_method_op(g, op); | 2201 | ret = gr_gk20a_submit_fecs_method_op(g, op, false); |
2194 | if (ret) { | 2202 | if (ret) { |
2195 | gk20a_err(dev_from_gk20a(g), | 2203 | gk20a_err(dev_from_gk20a(g), |
2196 | "query zcull ctx image size failed"); | 2204 | "query zcull ctx image size failed"); |
@@ -2199,7 +2207,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) | |||
2199 | op.method.addr = | 2207 | op.method.addr = |
2200 | gr_fecs_method_push_adr_discover_pm_image_size_v(); | 2208 | gr_fecs_method_push_adr_discover_pm_image_size_v(); |
2201 | op.mailbox.ret = &pm_ctx_image_size; | 2209 | op.mailbox.ret = &pm_ctx_image_size; |
2202 | ret = gr_gk20a_submit_fecs_method_op(g, op); | 2210 | ret = gr_gk20a_submit_fecs_method_op(g, op, false); |
2203 | if (ret) { | 2211 | if (ret) { |
2204 | gk20a_err(dev_from_gk20a(g), | 2212 | gk20a_err(dev_from_gk20a(g), |
2205 | "query pm ctx image size failed"); | 2213 | "query pm ctx image size failed"); |
@@ -5798,7 +5806,7 @@ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size) | |||
5798 | .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, | 5806 | .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, |
5799 | .mailbox.ok = 0, | 5807 | .mailbox.ok = 0, |
5800 | .cond.fail = GR_IS_UCODE_OP_SKIP, | 5808 | .cond.fail = GR_IS_UCODE_OP_SKIP, |
5801 | .mailbox.fail = 0}); | 5809 | .mailbox.fail = 0}, false); |
5802 | } | 5810 | } |
5803 | 5811 | ||
5804 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) | 5812 | int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) |
@@ -5816,7 +5824,7 @@ int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) | |||
5816 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | 5824 | .cond.ok = GR_IS_UCODE_OP_EQUAL, |
5817 | .mailbox.ok = 1, | 5825 | .mailbox.ok = 1, |
5818 | .cond.fail = GR_IS_UCODE_OP_SKIP, | 5826 | .cond.fail = GR_IS_UCODE_OP_SKIP, |
5819 | .mailbox.fail = 0}); | 5827 | .mailbox.fail = 0}, false); |
5820 | } | 5828 | } |
5821 | 5829 | ||
5822 | int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va) | 5830 | int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va) |
@@ -5832,7 +5840,7 @@ int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va) | |||
5832 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | 5840 | .cond.ok = GR_IS_UCODE_OP_EQUAL, |
5833 | .mailbox.ok = 1, | 5841 | .mailbox.ok = 1, |
5834 | .cond.fail = GR_IS_UCODE_OP_SKIP, | 5842 | .cond.fail = GR_IS_UCODE_OP_SKIP, |
5835 | .mailbox.fail = 0}); | 5843 | .mailbox.fail = 0}, false); |
5836 | } | 5844 | } |
5837 | 5845 | ||
5838 | int gk20a_gr_suspend(struct gk20a *g) | 5846 | int gk20a_gr_suspend(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 0d511499..4b94f863 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #define GR_IDLE_CHECK_DEFAULT 100 /* usec */ | 29 | #define GR_IDLE_CHECK_DEFAULT 100 /* usec */ |
30 | #define GR_IDLE_CHECK_MAX 5000 /* usec */ | 30 | #define GR_IDLE_CHECK_MAX 5000 /* usec */ |
31 | #define GR_FECS_POLL_INTERVAL 5 /* usec */ | ||
31 | 32 | ||
32 | #define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF | 33 | #define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF |
33 | #define INVALID_MAX_WAYS 0xFFFFFFFF | 34 | #define INVALID_MAX_WAYS 0xFFFFFFFF |
@@ -491,7 +492,8 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, | |||
491 | u32 expect_delay); | 492 | u32 expect_delay); |
492 | int gr_gk20a_init_ctx_state(struct gk20a *g); | 493 | int gr_gk20a_init_ctx_state(struct gk20a *g); |
493 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | 494 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, |
494 | struct fecs_method_op_gk20a op); | 495 | struct fecs_method_op_gk20a op, |
496 | bool sleepduringwait); | ||
495 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 497 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
496 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 498 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, |
497 | u32 class, u32 padding); | 499 | u32 class, u32 padding); |