aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_pm.c
diff options
context:
space:
mode:
authorKumar, Mahesh <mahesh1.kumar@intel.com>2017-08-17 09:45:23 -0400
committerMaarten Lankhorst <maarten.lankhorst@linux.intel.com>2017-09-07 07:34:05 -0400
commit7e452fdbfca85cd279ecb0d8e9ab6fdd1e8c97fc (patch)
tree70b90a700273e4b4c0e53377e36e1a027e6fd9dd /drivers/gpu/drm/i915/intel_pm.c
parent0b4d7cbff2be27c89617e1ca60a546019f7ff276 (diff)
drm/i915/skl+: Optimize WM calculation
Plane configuration parameters doesn't change for each WM-level calculation. Currently we compute same parameters 8 times for each wm-level. This patch optimizes it by calculating these parameters in beginning & reuse during each level-wm calculation. Changes since V1: - rebase on top of Rodrigo's series for CNL Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170817134529.2839-3-mahesh1.kumar@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c190
1 files changed, 105 insertions, 85 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0201816a4229..2dd3af3debe9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4376,134 +4376,146 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4376 downscale_amount); 4376 downscale_amount);
4377} 4377}
4378 4378
4379static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 4379static int
4380 struct intel_crtc_state *cstate, 4380skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
4381 const struct intel_plane_state *intel_pstate, 4381 struct intel_crtc_state *cstate,
4382 uint16_t ddb_allocation, 4382 const struct intel_plane_state *intel_pstate,
4383 int level, 4383 struct skl_wm_params *wp)
4384 uint16_t *out_blocks, /* out */
4385 uint8_t *out_lines, /* out */
4386 bool *enabled /* out */)
4387{ 4384{
4388 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); 4385 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
4389 const struct drm_plane_state *pstate = &intel_pstate->base; 4386 const struct drm_plane_state *pstate = &intel_pstate->base;
4390 const struct drm_framebuffer *fb = pstate->fb; 4387 const struct drm_framebuffer *fb = pstate->fb;
4391 uint32_t latency = dev_priv->wm.skl_latency[level];
4392 uint_fixed_16_16_t method1, method2;
4393 uint_fixed_16_16_t plane_blocks_per_line;
4394 uint_fixed_16_16_t selected_result;
4395 uint32_t interm_pbpl; 4388 uint32_t interm_pbpl;
4396 uint32_t plane_bytes_per_line;
4397 uint32_t res_blocks, res_lines;
4398 uint8_t cpp;
4399 uint32_t width = 0;
4400 uint32_t plane_pixel_rate;
4401 uint_fixed_16_16_t y_tile_minimum;
4402 uint32_t y_min_scanlines;
4403 struct intel_atomic_state *state = 4389 struct intel_atomic_state *state =
4404 to_intel_atomic_state(cstate->base.state); 4390 to_intel_atomic_state(cstate->base.state);
4405 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); 4391 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4406 bool y_tiled, x_tiled;
4407 4392
4408 if (latency == 0 || 4393 if (!intel_wm_plane_visible(cstate, intel_pstate))
4409 !intel_wm_plane_visible(cstate, intel_pstate)) {
4410 *enabled = false;
4411 return 0; 4394 return 0;
4412 }
4413 4395
4414 y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || 4396 wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4415 fb->modifier == I915_FORMAT_MOD_Yf_TILED || 4397 fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4416 fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || 4398 fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4417 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; 4399 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4418 x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; 4400 wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4419 4401 wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4420 /* Display WA #1141: kbl,cfl */ 4402 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4421 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
4422 dev_priv->ipc_enabled)
4423 latency += 4;
4424
4425 if (apply_memory_bw_wa && x_tiled)
4426 latency += 15;
4427 4403
4428 if (plane->id == PLANE_CURSOR) { 4404 if (plane->id == PLANE_CURSOR) {
4429 width = intel_pstate->base.crtc_w; 4405 wp->width = intel_pstate->base.crtc_w;
4430 } else { 4406 } else {
4431 /* 4407 /*
4432 * Src coordinates are already rotated by 270 degrees for 4408 * Src coordinates are already rotated by 270 degrees for
4433 * the 90/270 degree plane rotation cases (to match the 4409 * the 90/270 degree plane rotation cases (to match the
4434 * GTT mapping), hence no need to account for rotation here. 4410 * GTT mapping), hence no need to account for rotation here.
4435 */ 4411 */
4436 width = drm_rect_width(&intel_pstate->base.src) >> 16; 4412 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
4437 } 4413 }
4438 4414
4439 cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : 4415 wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
4440 fb->format->cpp[0]; 4416 fb->format->cpp[0];
4441 plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); 4417 wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4418 intel_pstate);
4442 4419
4443 if (drm_rotation_90_or_270(pstate->rotation)) { 4420 if (drm_rotation_90_or_270(pstate->rotation)) {
4444 4421
4445 switch (cpp) { 4422 switch (wp->cpp) {
4446 case 1: 4423 case 1:
4447 y_min_scanlines = 16; 4424 wp->y_min_scanlines = 16;
4448 break; 4425 break;
4449 case 2: 4426 case 2:
4450 y_min_scanlines = 8; 4427 wp->y_min_scanlines = 8;
4451 break; 4428 break;
4452 case 4: 4429 case 4:
4453 y_min_scanlines = 4; 4430 wp->y_min_scanlines = 4;
4454 break; 4431 break;
4455 default: 4432 default:
4456 MISSING_CASE(cpp); 4433 MISSING_CASE(wp->cpp);
4457 return -EINVAL; 4434 return -EINVAL;
4458 } 4435 }
4459 } else { 4436 } else {
4460 y_min_scanlines = 4; 4437 wp->y_min_scanlines = 4;
4461 } 4438 }
4462 4439
4463 if (apply_memory_bw_wa) 4440 if (apply_memory_bw_wa)
4464 y_min_scanlines *= 2; 4441 wp->y_min_scanlines *= 2;
4465 4442
4466 plane_bytes_per_line = width * cpp; 4443 wp->plane_bytes_per_line = wp->width * wp->cpp;
4467 if (y_tiled) { 4444 if (wp->y_tiled) {
4468 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * 4445 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4469 y_min_scanlines, 512); 4446 wp->y_min_scanlines, 512);
4470 4447
4471 if (INTEL_GEN(dev_priv) >= 10) 4448 if (INTEL_GEN(dev_priv) >= 10)
4472 interm_pbpl++; 4449 interm_pbpl++;
4473 4450
4474 plane_blocks_per_line = div_fixed16(interm_pbpl, 4451 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4475 y_min_scanlines); 4452 wp->y_min_scanlines);
4476 } else if (x_tiled && INTEL_GEN(dev_priv) == 9) { 4453 } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
4477 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); 4454 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
4478 plane_blocks_per_line = u32_to_fixed16(interm_pbpl); 4455 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4479 } else { 4456 } else {
4480 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; 4457 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
4481 plane_blocks_per_line = u32_to_fixed16(interm_pbpl); 4458 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4482 } 4459 }
4483 4460
4484 method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency); 4461 wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4485 method2 = skl_wm_method2(plane_pixel_rate, 4462 wp->plane_blocks_per_line);
4463 wp->linetime_us = fixed16_to_u32_round_up(
4464 intel_get_linetime_us(cstate));
4465
4466 return 0;
4467}
4468
4469static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
4470 struct intel_crtc_state *cstate,
4471 const struct intel_plane_state *intel_pstate,
4472 uint16_t ddb_allocation,
4473 int level,
4474 const struct skl_wm_params *wp,
4475 uint16_t *out_blocks, /* out */
4476 uint8_t *out_lines, /* out */
4477 bool *enabled /* out */)
4478{
4479 const struct drm_plane_state *pstate = &intel_pstate->base;
4480 uint32_t latency = dev_priv->wm.skl_latency[level];
4481 uint_fixed_16_16_t method1, method2;
4482 uint_fixed_16_16_t selected_result;
4483 uint32_t res_blocks, res_lines;
4484 struct intel_atomic_state *state =
4485 to_intel_atomic_state(cstate->base.state);
4486 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4487
4488 if (latency == 0 ||
4489 !intel_wm_plane_visible(cstate, intel_pstate)) {
4490 *enabled = false;
4491 return 0;
4492 }
4493
4494 /* Display WA #1141: kbl,cfl */
4495 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
4496 dev_priv->ipc_enabled)
4497 latency += 4;
4498
4499 if (apply_memory_bw_wa && wp->x_tiled)
4500 latency += 15;
4501
4502 method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4503 wp->cpp, latency);
4504 method2 = skl_wm_method2(wp->plane_pixel_rate,
4486 cstate->base.adjusted_mode.crtc_htotal, 4505 cstate->base.adjusted_mode.crtc_htotal,
4487 latency, 4506 latency,
4488 plane_blocks_per_line); 4507 wp->plane_blocks_per_line);
4489
4490 y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
4491 plane_blocks_per_line);
4492 4508
4493 if (y_tiled) { 4509 if (wp->y_tiled) {
4494 selected_result = max_fixed16(method2, y_tile_minimum); 4510 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4495 } else { 4511 } else {
4496 uint32_t linetime_us; 4512 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4497 4513 512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
4498 linetime_us = fixed16_to_u32_round_up(
4499 intel_get_linetime_us(cstate));
4500 if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
4501 (plane_bytes_per_line / 512 < 1))
4502 selected_result = method2; 4514 selected_result = method2;
4503 else if (ddb_allocation >= 4515 else if (ddb_allocation >=
4504 fixed16_to_u32_round_up(plane_blocks_per_line)) 4516 fixed16_to_u32_round_up(wp->plane_blocks_per_line))
4505 selected_result = min_fixed16(method1, method2); 4517 selected_result = min_fixed16(method1, method2);
4506 else if (latency >= linetime_us) 4518 else if (latency >= wp->linetime_us)
4507 selected_result = min_fixed16(method1, method2); 4519 selected_result = min_fixed16(method1, method2);
4508 else 4520 else
4509 selected_result = method1; 4521 selected_result = method1;
@@ -4511,19 +4523,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
4511 4523
4512 res_blocks = fixed16_to_u32_round_up(selected_result) + 1; 4524 res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4513 res_lines = div_round_up_fixed16(selected_result, 4525 res_lines = div_round_up_fixed16(selected_result,
4514 plane_blocks_per_line); 4526 wp->plane_blocks_per_line);
4515 4527
4516 /* Display WA #1125: skl,bxt,kbl,glk */ 4528 /* Display WA #1125: skl,bxt,kbl,glk */
4517 if (level == 0 && 4529 if (level == 0 && wp->rc_surface)
4518 (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || 4530 res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
4519 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS))
4520 res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
4521 4531
4522 /* Display WA #1126: skl,bxt,kbl,glk */ 4532 /* Display WA #1126: skl,bxt,kbl,glk */
4523 if (level >= 1 && level <= 7) { 4533 if (level >= 1 && level <= 7) {
4524 if (y_tiled) { 4534 if (wp->y_tiled) {
4525 res_blocks += fixed16_to_u32_round_up(y_tile_minimum); 4535 res_blocks += fixed16_to_u32_round_up(
4526 res_lines += y_min_scanlines; 4536 wp->y_tile_minimum);
4537 res_lines += wp->y_min_scanlines;
4527 } else { 4538 } else {
4528 res_blocks++; 4539 res_blocks++;
4529 } 4540 }
@@ -4561,6 +4572,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
4561 struct skl_ddb_allocation *ddb, 4572 struct skl_ddb_allocation *ddb,
4562 struct intel_crtc_state *cstate, 4573 struct intel_crtc_state *cstate,
4563 const struct intel_plane_state *intel_pstate, 4574 const struct intel_plane_state *intel_pstate,
4575 const struct skl_wm_params *wm_params,
4564 struct skl_plane_wm *wm) 4576 struct skl_plane_wm *wm)
4565{ 4577{
4566 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4578 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
@@ -4584,6 +4596,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
4584 intel_pstate, 4596 intel_pstate,
4585 ddb_blocks, 4597 ddb_blocks,
4586 level, 4598 level,
4599 wm_params,
4587 &result->plane_res_b, 4600 &result->plane_res_b,
4588 &result->plane_res_l, 4601 &result->plane_res_l,
4589 &result->plane_en); 4602 &result->plane_en);
@@ -4648,11 +4661,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4648 const struct intel_plane_state *intel_pstate = 4661 const struct intel_plane_state *intel_pstate =
4649 to_intel_plane_state(pstate); 4662 to_intel_plane_state(pstate);
4650 enum plane_id plane_id = to_intel_plane(plane)->id; 4663 enum plane_id plane_id = to_intel_plane(plane)->id;
4664 struct skl_wm_params wm_params;
4651 4665
4652 wm = &pipe_wm->planes[plane_id]; 4666 wm = &pipe_wm->planes[plane_id];
4667 memset(&wm_params, 0, sizeof(struct skl_wm_params));
4668
4669 ret = skl_compute_plane_wm_params(dev_priv, cstate,
4670 intel_pstate, &wm_params);
4671 if (ret)
4672 return ret;
4653 4673
4654 ret = skl_compute_wm_levels(dev_priv, ddb, cstate, 4674 ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4655 intel_pstate, wm); 4675 intel_pstate, &wm_params, wm);
4656 if (ret) 4676 if (ret)
4657 return ret; 4677 return ret;
4658 skl_compute_transition_wm(cstate, &wm->trans_wm); 4678 skl_compute_transition_wm(cstate, &wm->trans_wm);