diff options
author | Kumar, Mahesh <mahesh1.kumar@intel.com> | 2017-08-17 09:45:23 -0400 |
---|---|---|
committer | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2017-09-07 07:34:05 -0400 |
commit | 7e452fdbfca85cd279ecb0d8e9ab6fdd1e8c97fc (patch) | |
tree | 70b90a700273e4b4c0e53377e36e1a027e6fd9dd /drivers/gpu/drm/i915/intel_pm.c | |
parent | 0b4d7cbff2be27c89617e1ca60a546019f7ff276 (diff) |
drm/i915/skl+: Optimize WM calculation
Plane configuration parameters doesn't change for each WM-level
calculation. Currently we compute same parameters 8 times for each
wm-level.
This patch optimizes it by calculating these parameters in beginning
& reuse during each level-wm calculation.
Changes since V1:
- rebase on top of Rodrigo's series for CNL
Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170817134529.2839-3-mahesh1.kumar@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 190 |
1 files changed, 105 insertions, 85 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0201816a4229..2dd3af3debe9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c | |||
@@ -4376,134 +4376,146 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, | |||
4376 | downscale_amount); | 4376 | downscale_amount); |
4377 | } | 4377 | } |
4378 | 4378 | ||
4379 | static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, | 4379 | static int |
4380 | struct intel_crtc_state *cstate, | 4380 | skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, |
4381 | const struct intel_plane_state *intel_pstate, | 4381 | struct intel_crtc_state *cstate, |
4382 | uint16_t ddb_allocation, | 4382 | const struct intel_plane_state *intel_pstate, |
4383 | int level, | 4383 | struct skl_wm_params *wp) |
4384 | uint16_t *out_blocks, /* out */ | ||
4385 | uint8_t *out_lines, /* out */ | ||
4386 | bool *enabled /* out */) | ||
4387 | { | 4384 | { |
4388 | struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); | 4385 | struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); |
4389 | const struct drm_plane_state *pstate = &intel_pstate->base; | 4386 | const struct drm_plane_state *pstate = &intel_pstate->base; |
4390 | const struct drm_framebuffer *fb = pstate->fb; | 4387 | const struct drm_framebuffer *fb = pstate->fb; |
4391 | uint32_t latency = dev_priv->wm.skl_latency[level]; | ||
4392 | uint_fixed_16_16_t method1, method2; | ||
4393 | uint_fixed_16_16_t plane_blocks_per_line; | ||
4394 | uint_fixed_16_16_t selected_result; | ||
4395 | uint32_t interm_pbpl; | 4388 | uint32_t interm_pbpl; |
4396 | uint32_t plane_bytes_per_line; | ||
4397 | uint32_t res_blocks, res_lines; | ||
4398 | uint8_t cpp; | ||
4399 | uint32_t width = 0; | ||
4400 | uint32_t plane_pixel_rate; | ||
4401 | uint_fixed_16_16_t y_tile_minimum; | ||
4402 | uint32_t y_min_scanlines; | ||
4403 | struct intel_atomic_state *state = | 4389 | struct intel_atomic_state *state = |
4404 | to_intel_atomic_state(cstate->base.state); | 4390 | to_intel_atomic_state(cstate->base.state); |
4405 | bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); | 4391 | bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); |
4406 | bool y_tiled, x_tiled; | ||
4407 | 4392 | ||
4408 | if (latency == 0 || | 4393 | if (!intel_wm_plane_visible(cstate, intel_pstate)) |
4409 | !intel_wm_plane_visible(cstate, intel_pstate)) { | ||
4410 | *enabled = false; | ||
4411 | return 0; | 4394 | return 0; |
4412 | } | ||
4413 | 4395 | ||
4414 | y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || | 4396 | wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || |
4415 | fb->modifier == I915_FORMAT_MOD_Yf_TILED || | 4397 | fb->modifier == I915_FORMAT_MOD_Yf_TILED || |
4416 | fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || | 4398 | fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || |
4417 | fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; | 4399 | fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; |
4418 | x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; | 4400 | wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; |
4419 | 4401 | wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || | |
4420 | /* Display WA #1141: kbl,cfl */ | 4402 | fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; |
4421 | if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && | ||
4422 | dev_priv->ipc_enabled) | ||
4423 | latency += 4; | ||
4424 | |||
4425 | if (apply_memory_bw_wa && x_tiled) | ||
4426 | latency += 15; | ||
4427 | 4403 | ||
4428 | if (plane->id == PLANE_CURSOR) { | 4404 | if (plane->id == PLANE_CURSOR) { |
4429 | width = intel_pstate->base.crtc_w; | 4405 | wp->width = intel_pstate->base.crtc_w; |
4430 | } else { | 4406 | } else { |
4431 | /* | 4407 | /* |
4432 | * Src coordinates are already rotated by 270 degrees for | 4408 | * Src coordinates are already rotated by 270 degrees for |
4433 | * the 90/270 degree plane rotation cases (to match the | 4409 | * the 90/270 degree plane rotation cases (to match the |
4434 | * GTT mapping), hence no need to account for rotation here. | 4410 | * GTT mapping), hence no need to account for rotation here. |
4435 | */ | 4411 | */ |
4436 | width = drm_rect_width(&intel_pstate->base.src) >> 16; | 4412 | wp->width = drm_rect_width(&intel_pstate->base.src) >> 16; |
4437 | } | 4413 | } |
4438 | 4414 | ||
4439 | cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : | 4415 | wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : |
4440 | fb->format->cpp[0]; | 4416 | fb->format->cpp[0]; |
4441 | plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); | 4417 | wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, |
4418 | intel_pstate); | ||
4442 | 4419 | ||
4443 | if (drm_rotation_90_or_270(pstate->rotation)) { | 4420 | if (drm_rotation_90_or_270(pstate->rotation)) { |
4444 | 4421 | ||
4445 | switch (cpp) { | 4422 | switch (wp->cpp) { |
4446 | case 1: | 4423 | case 1: |
4447 | y_min_scanlines = 16; | 4424 | wp->y_min_scanlines = 16; |
4448 | break; | 4425 | break; |
4449 | case 2: | 4426 | case 2: |
4450 | y_min_scanlines = 8; | 4427 | wp->y_min_scanlines = 8; |
4451 | break; | 4428 | break; |
4452 | case 4: | 4429 | case 4: |
4453 | y_min_scanlines = 4; | 4430 | wp->y_min_scanlines = 4; |
4454 | break; | 4431 | break; |
4455 | default: | 4432 | default: |
4456 | MISSING_CASE(cpp); | 4433 | MISSING_CASE(wp->cpp); |
4457 | return -EINVAL; | 4434 | return -EINVAL; |
4458 | } | 4435 | } |
4459 | } else { | 4436 | } else { |
4460 | y_min_scanlines = 4; | 4437 | wp->y_min_scanlines = 4; |
4461 | } | 4438 | } |
4462 | 4439 | ||
4463 | if (apply_memory_bw_wa) | 4440 | if (apply_memory_bw_wa) |
4464 | y_min_scanlines *= 2; | 4441 | wp->y_min_scanlines *= 2; |
4465 | 4442 | ||
4466 | plane_bytes_per_line = width * cpp; | 4443 | wp->plane_bytes_per_line = wp->width * wp->cpp; |
4467 | if (y_tiled) { | 4444 | if (wp->y_tiled) { |
4468 | interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * | 4445 | interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line * |
4469 | y_min_scanlines, 512); | 4446 | wp->y_min_scanlines, 512); |
4470 | 4447 | ||
4471 | if (INTEL_GEN(dev_priv) >= 10) | 4448 | if (INTEL_GEN(dev_priv) >= 10) |
4472 | interm_pbpl++; | 4449 | interm_pbpl++; |
4473 | 4450 | ||
4474 | plane_blocks_per_line = div_fixed16(interm_pbpl, | 4451 | wp->plane_blocks_per_line = div_fixed16(interm_pbpl, |
4475 | y_min_scanlines); | 4452 | wp->y_min_scanlines); |
4476 | } else if (x_tiled && INTEL_GEN(dev_priv) == 9) { | 4453 | } else if (wp->x_tiled && IS_GEN9(dev_priv)) { |
4477 | interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); | 4454 | interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512); |
4478 | plane_blocks_per_line = u32_to_fixed16(interm_pbpl); | 4455 | wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); |
4479 | } else { | 4456 | } else { |
4480 | interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; | 4457 | interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1; |
4481 | plane_blocks_per_line = u32_to_fixed16(interm_pbpl); | 4458 | wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); |
4482 | } | 4459 | } |
4483 | 4460 | ||
4484 | method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency); | 4461 | wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines, |
4485 | method2 = skl_wm_method2(plane_pixel_rate, | 4462 | wp->plane_blocks_per_line); |
4463 | wp->linetime_us = fixed16_to_u32_round_up( | ||
4464 | intel_get_linetime_us(cstate)); | ||
4465 | |||
4466 | return 0; | ||
4467 | } | ||
4468 | |||
4469 | static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, | ||
4470 | struct intel_crtc_state *cstate, | ||
4471 | const struct intel_plane_state *intel_pstate, | ||
4472 | uint16_t ddb_allocation, | ||
4473 | int level, | ||
4474 | const struct skl_wm_params *wp, | ||
4475 | uint16_t *out_blocks, /* out */ | ||
4476 | uint8_t *out_lines, /* out */ | ||
4477 | bool *enabled /* out */) | ||
4478 | { | ||
4479 | const struct drm_plane_state *pstate = &intel_pstate->base; | ||
4480 | uint32_t latency = dev_priv->wm.skl_latency[level]; | ||
4481 | uint_fixed_16_16_t method1, method2; | ||
4482 | uint_fixed_16_16_t selected_result; | ||
4483 | uint32_t res_blocks, res_lines; | ||
4484 | struct intel_atomic_state *state = | ||
4485 | to_intel_atomic_state(cstate->base.state); | ||
4486 | bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); | ||
4487 | |||
4488 | if (latency == 0 || | ||
4489 | !intel_wm_plane_visible(cstate, intel_pstate)) { | ||
4490 | *enabled = false; | ||
4491 | return 0; | ||
4492 | } | ||
4493 | |||
4494 | /* Display WA #1141: kbl,cfl */ | ||
4495 | if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && | ||
4496 | dev_priv->ipc_enabled) | ||
4497 | latency += 4; | ||
4498 | |||
4499 | if (apply_memory_bw_wa && wp->x_tiled) | ||
4500 | latency += 15; | ||
4501 | |||
4502 | method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, | ||
4503 | wp->cpp, latency); | ||
4504 | method2 = skl_wm_method2(wp->plane_pixel_rate, | ||
4486 | cstate->base.adjusted_mode.crtc_htotal, | 4505 | cstate->base.adjusted_mode.crtc_htotal, |
4487 | latency, | 4506 | latency, |
4488 | plane_blocks_per_line); | 4507 | wp->plane_blocks_per_line); |
4489 | |||
4490 | y_tile_minimum = mul_u32_fixed16(y_min_scanlines, | ||
4491 | plane_blocks_per_line); | ||
4492 | 4508 | ||
4493 | if (y_tiled) { | 4509 | if (wp->y_tiled) { |
4494 | selected_result = max_fixed16(method2, y_tile_minimum); | 4510 | selected_result = max_fixed16(method2, wp->y_tile_minimum); |
4495 | } else { | 4511 | } else { |
4496 | uint32_t linetime_us; | 4512 | if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / |
4497 | 4513 | 512 < 1) && (wp->plane_bytes_per_line / 512 < 1)) | |
4498 | linetime_us = fixed16_to_u32_round_up( | ||
4499 | intel_get_linetime_us(cstate)); | ||
4500 | if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && | ||
4501 | (plane_bytes_per_line / 512 < 1)) | ||
4502 | selected_result = method2; | 4514 | selected_result = method2; |
4503 | else if (ddb_allocation >= | 4515 | else if (ddb_allocation >= |
4504 | fixed16_to_u32_round_up(plane_blocks_per_line)) | 4516 | fixed16_to_u32_round_up(wp->plane_blocks_per_line)) |
4505 | selected_result = min_fixed16(method1, method2); | 4517 | selected_result = min_fixed16(method1, method2); |
4506 | else if (latency >= linetime_us) | 4518 | else if (latency >= wp->linetime_us) |
4507 | selected_result = min_fixed16(method1, method2); | 4519 | selected_result = min_fixed16(method1, method2); |
4508 | else | 4520 | else |
4509 | selected_result = method1; | 4521 | selected_result = method1; |
@@ -4511,19 +4523,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, | |||
4511 | 4523 | ||
4512 | res_blocks = fixed16_to_u32_round_up(selected_result) + 1; | 4524 | res_blocks = fixed16_to_u32_round_up(selected_result) + 1; |
4513 | res_lines = div_round_up_fixed16(selected_result, | 4525 | res_lines = div_round_up_fixed16(selected_result, |
4514 | plane_blocks_per_line); | 4526 | wp->plane_blocks_per_line); |
4515 | 4527 | ||
4516 | /* Display WA #1125: skl,bxt,kbl,glk */ | 4528 | /* Display WA #1125: skl,bxt,kbl,glk */ |
4517 | if (level == 0 && | 4529 | if (level == 0 && wp->rc_surface) |
4518 | (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || | 4530 | res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); |
4519 | fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) | ||
4520 | res_blocks += fixed16_to_u32_round_up(y_tile_minimum); | ||
4521 | 4531 | ||
4522 | /* Display WA #1126: skl,bxt,kbl,glk */ | 4532 | /* Display WA #1126: skl,bxt,kbl,glk */ |
4523 | if (level >= 1 && level <= 7) { | 4533 | if (level >= 1 && level <= 7) { |
4524 | if (y_tiled) { | 4534 | if (wp->y_tiled) { |
4525 | res_blocks += fixed16_to_u32_round_up(y_tile_minimum); | 4535 | res_blocks += fixed16_to_u32_round_up( |
4526 | res_lines += y_min_scanlines; | 4536 | wp->y_tile_minimum); |
4537 | res_lines += wp->y_min_scanlines; | ||
4527 | } else { | 4538 | } else { |
4528 | res_blocks++; | 4539 | res_blocks++; |
4529 | } | 4540 | } |
@@ -4561,6 +4572,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, | |||
4561 | struct skl_ddb_allocation *ddb, | 4572 | struct skl_ddb_allocation *ddb, |
4562 | struct intel_crtc_state *cstate, | 4573 | struct intel_crtc_state *cstate, |
4563 | const struct intel_plane_state *intel_pstate, | 4574 | const struct intel_plane_state *intel_pstate, |
4575 | const struct skl_wm_params *wm_params, | ||
4564 | struct skl_plane_wm *wm) | 4576 | struct skl_plane_wm *wm) |
4565 | { | 4577 | { |
4566 | struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); | 4578 | struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); |
@@ -4584,6 +4596,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, | |||
4584 | intel_pstate, | 4596 | intel_pstate, |
4585 | ddb_blocks, | 4597 | ddb_blocks, |
4586 | level, | 4598 | level, |
4599 | wm_params, | ||
4587 | &result->plane_res_b, | 4600 | &result->plane_res_b, |
4588 | &result->plane_res_l, | 4601 | &result->plane_res_l, |
4589 | &result->plane_en); | 4602 | &result->plane_en); |
@@ -4648,11 +4661,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, | |||
4648 | const struct intel_plane_state *intel_pstate = | 4661 | const struct intel_plane_state *intel_pstate = |
4649 | to_intel_plane_state(pstate); | 4662 | to_intel_plane_state(pstate); |
4650 | enum plane_id plane_id = to_intel_plane(plane)->id; | 4663 | enum plane_id plane_id = to_intel_plane(plane)->id; |
4664 | struct skl_wm_params wm_params; | ||
4651 | 4665 | ||
4652 | wm = &pipe_wm->planes[plane_id]; | 4666 | wm = &pipe_wm->planes[plane_id]; |
4667 | memset(&wm_params, 0, sizeof(struct skl_wm_params)); | ||
4668 | |||
4669 | ret = skl_compute_plane_wm_params(dev_priv, cstate, | ||
4670 | intel_pstate, &wm_params); | ||
4671 | if (ret) | ||
4672 | return ret; | ||
4653 | 4673 | ||
4654 | ret = skl_compute_wm_levels(dev_priv, ddb, cstate, | 4674 | ret = skl_compute_wm_levels(dev_priv, ddb, cstate, |
4655 | intel_pstate, wm); | 4675 | intel_pstate, &wm_params, wm); |
4656 | if (ret) | 4676 | if (ret) |
4657 | return ret; | 4677 | return ret; |
4658 | skl_compute_transition_wm(cstate, &wm->trans_wm); | 4678 | skl_compute_transition_wm(cstate, &wm->trans_wm); |