From aa96b6bd1efa1e26a757080137486884972d248c Mon Sep 17 00:00:00 2001 From: Vijayakumar Date: Tue, 10 Feb 2015 15:18:54 +0530 Subject: gpu: nvgpu: optimize fecs status polling bug 200078367 using udelay for fecs status polling during GR init phase brings down fecs transaction time to < 20usec from few hundred usec. Change-Id: I61a27daaf1187ac086a42779b46aa3fbee3b37f2 Signed-off-by: Vijayakumar Reviewed-on: http://git-master/r/691918 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 44 ++++++++++++++++++++++---------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 +++- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 7 +++--- 3 files changed, 32 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 8869f4c4..c6f4a336 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -365,16 +365,19 @@ static int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long end_jiffies, static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, u32 *mailbox_ret, u32 opc_success, u32 mailbox_ok, u32 opc_fail, - u32 mailbox_fail) + u32 mailbox_fail, bool sleepduringwait) { unsigned long end_jiffies = jiffies + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); - u32 delay = GR_IDLE_CHECK_DEFAULT; + u32 delay = GR_FECS_POLL_INTERVAL; u32 check = WAIT_UCODE_LOOP; u32 reg; gk20a_dbg_fn(""); + if (sleepduringwait) + delay = GR_IDLE_CHECK_DEFAULT; + while (check == WAIT_UCODE_LOOP) { if (!time_before(jiffies, end_jiffies) && tegra_platform_is_silicon()) @@ -448,8 +451,11 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, break; } - usleep_range(delay, delay * 2); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + if (sleepduringwait) { + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + } else + udelay(delay); } if (check == WAIT_UCODE_TIMEOUT) { @@ -472,7 +478,8 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) * We should replace most, if not all, fecs method calls to this instead. */ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, - struct fecs_method_op_gk20a op) + struct fecs_method_op_gk20a op, + bool sleepduringwait) { struct gr_gk20a *gr = &g->gr; int ret; @@ -497,7 +504,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret, op.cond.ok, op.mailbox.ok, - op.cond.fail, op.mailbox.fail); + op.cond.fail, op.mailbox.fail, + sleepduringwait); mutex_unlock(&gr->fecs_mutex); @@ -515,7 +523,7 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret) .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, .cond.ok = GR_IS_UCODE_OP_EQUAL, - .cond.fail = GR_IS_UCODE_OP_EQUAL }); + .cond.fail = GR_IS_UCODE_OP_EQUAL }, true); } /* Stop processing (stall) context switches at FECS. @@ -548,7 +556,7 @@ int gr_gk20a_halt_pipe(struct gk20a *g) .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), }, .cond.ok = GR_IS_UCODE_OP_EQUAL, - .cond.fail = GR_IS_UCODE_OP_EQUAL }); + .cond.fail = GR_IS_UCODE_OP_EQUAL }, false); } @@ -686,7 +694,7 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, .ok = 0x10, .fail = 0x20, }, .cond.ok = GR_IS_UCODE_OP_AND, - .cond.fail = GR_IS_UCODE_OP_AND}); + .cond.fail = GR_IS_UCODE_OP_AND}, true); if (ret) gk20a_err(dev_from_gk20a(g), "bind channel instance failed"); @@ -1382,7 +1390,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) }, .cond.ok = GR_IS_UCODE_OP_AND, .cond.fail = GR_IS_UCODE_OP_AND, - }); + }, true); if (ret) gk20a_err(dev_from_gk20a(g), "save context image failed"); @@ -1668,7 +1676,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), .fail = 0}, .cond.ok = GR_IS_UCODE_OP_EQUAL, - .cond.fail = GR_IS_UCODE_OP_SKIP}); + .cond.fail = GR_IS_UCODE_OP_SKIP}, false); if (ret) gk20a_err(dev_from_gk20a(g), @@ -2145,7 +2153,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) ret = gr_gk20a_ctx_wait_ucode(g, 0, NULL, GR_IS_UCODE_OP_EQUAL, eUcodeHandshakeInitComplete, - GR_IS_UCODE_OP_SKIP, 0); + GR_IS_UCODE_OP_SKIP, 0, false); if (ret) { gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout"); return ret; @@ -2181,7 +2189,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) op.method.addr = gr_fecs_method_push_adr_discover_image_size_v(); op.mailbox.ret = &g->gr.ctx_vars.golden_image_size; - ret = gr_gk20a_submit_fecs_method_op(g, op); + ret = gr_gk20a_submit_fecs_method_op(g, op, false); if (ret) { gk20a_err(dev_from_gk20a(g), "query golden image size failed"); @@ -2190,7 +2198,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) op.method.addr = gr_fecs_method_push_adr_discover_zcull_image_size_v(); op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size; - ret = gr_gk20a_submit_fecs_method_op(g, op); + ret = gr_gk20a_submit_fecs_method_op(g, op, false); if (ret) { gk20a_err(dev_from_gk20a(g), "query zcull ctx image size failed"); @@ -2199,7 +2207,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) op.method.addr = gr_fecs_method_push_adr_discover_pm_image_size_v(); op.mailbox.ret = &pm_ctx_image_size; - ret = gr_gk20a_submit_fecs_method_op(g, op); + ret = gr_gk20a_submit_fecs_method_op(g, op, false); if (ret) { gk20a_err(dev_from_gk20a(g), "query pm ctx image size failed"); @@ -5798,7 +5806,7 @@ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size) .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL, .mailbox.ok = 0, .cond.fail = GR_IS_UCODE_OP_SKIP, - .mailbox.fail = 0}); + .mailbox.fail = 0}, false); } int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) @@ -5816,7 +5824,7 @@ int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr) .cond.ok = GR_IS_UCODE_OP_EQUAL, .mailbox.ok = 1, .cond.fail = GR_IS_UCODE_OP_SKIP, - .mailbox.fail = 0}); + .mailbox.fail = 0}, false); } int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va) @@ -5832,7 +5840,7 @@ int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va) .cond.ok = GR_IS_UCODE_OP_EQUAL, .mailbox.ok = 1, .cond.fail = GR_IS_UCODE_OP_SKIP, - .mailbox.fail = 0}); + .mailbox.fail = 0}, false); } int gk20a_gr_suspend(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 0d511499..4b94f863 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -28,6 +28,7 @@ #define GR_IDLE_CHECK_DEFAULT 100 /* usec */ #define GR_IDLE_CHECK_MAX 5000 /* usec */ +#define GR_FECS_POLL_INTERVAL 5 /* usec */ #define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF #define INVALID_MAX_WAYS 0xFFFFFFFF @@ -491,7 +492,8 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, u32 expect_delay); int gr_gk20a_init_ctx_state(struct gk20a *g); int gr_gk20a_submit_fecs_method_op(struct gk20a *g, - struct fecs_method_op_gk20a op); + struct fecs_method_op_gk20a op, + bool sleepduringwait); int gr_gk20a_alloc_gr_ctx(struct gk20a *g, struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, u32 class, u32 padding); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 5b49ba8b..84391377 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1,7 +1,7 @@ /* * GM20B GPC MMU * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -672,7 +672,7 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val) { unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); - unsigned long delay = GR_IDLE_CHECK_DEFAULT; + unsigned long delay = GR_FECS_POLL_INTERVAL; u32 reg; gk20a_dbg_fn(""); @@ -681,8 +681,7 @@ static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val) reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); if (reg == val) return 0; - usleep_range(delay, delay * 2); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); + udelay(delay); } while (time_before(jiffies, end_jiffies) || !tegra_platform_is_silicon()); -- cgit v1.2.2