aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-12-05 00:00:09 -0500
committerDave Airlie <airlied@redhat.com>2018-12-05 00:00:14 -0500
commit818182dd1097fdc492aaef9b08755ea13274352d (patch)
treef0ea9232b7bf532486a504d9738ae6ac674b3098
parent167bfe534dc2de680ef706dbb903c9a7bfcb2dd8 (diff)
parent97c78f4d07e5033717c08b650462b3087ecfe8e8 (diff)
Merge tag 'imx-drm-next-2018-12-03' of git://git.pengutronix.de/git/pza/linux into drm-next
drm/imx: update image-convert with fixes for multi-tiled scaling Update the ipu-v3 mem2mem image-convert code, with some fixes for race conditions, alignment issues, and visual artifacts due to tile alignment and scaling factor issues when scaling images larger than hardware limitations in multiple tiles. This will allow the V4L2 mem2mem scaler driver to write output images larger than 1024x1024 pixels. Also switch drm/imx source files to SPDX license identifiers, constify struct clk_ops in imx-tve, and add a timeout warning to the busy wait in ipu_plane_disable(). Signed-off-by: Dave Airlie <airlied@redhat.com> From: Philipp Zabel <p.zabel@pengutronix.de> Link: https://patchwork.freedesktop.org/patch/msgid/1543835266.5647.1.camel@pengutronix.de
-rw-r--r--drivers/gpu/drm/imx/dw_hdmi-imx.c5
-rw-r--r--drivers/gpu/drm/imx/imx-drm-core.c11
-rw-r--r--drivers/gpu/drm/imx/imx-ldb.c10
-rw-r--r--drivers/gpu/drm/imx/imx-tve.c12
-rw-r--r--drivers/gpu/drm/imx/ipuv3-crtc.c10
-rw-r--r--drivers/gpu/drm/imx/ipuv3-plane.c18
-rw-r--r--drivers/gpu/drm/imx/parallel-display.c10
-rw-r--r--drivers/gpu/ipu-v3/ipu-cpmem.c52
-rw-r--r--drivers/gpu/ipu-v3/ipu-ic.c52
-rw-r--r--drivers/gpu/ipu-v3/ipu-image-convert.c1019
-rw-r--r--include/video/imx-ipu-v3.h9
11 files changed, 940 insertions, 268 deletions
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index fe6becdcc29e..77a26fd3a44a 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -1,10 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Copyright (C) 2011-2013 Freescale Semiconductor, Inc. 2/* Copyright (C) 2011-2013 Freescale Semiconductor, Inc.
2 * 3 *
3 * derived from imx-hdmi.c(renamed to bridge/dw_hdmi.c now) 4 * derived from imx-hdmi.c(renamed to bridge/dw_hdmi.c now)
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */ 5 */
9#include <linux/module.h> 6#include <linux/module.h>
10#include <linux/platform_device.h> 7#include <linux/platform_device.h>
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 0e6942f21a4e..820c7e3878f0 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -1,17 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Freescale i.MX drm driver 3 * Freescale i.MX drm driver
3 * 4 *
4 * Copyright (C) 2011 Sascha Hauer, Pengutronix 5 * Copyright (C) 2011 Sascha Hauer, Pengutronix
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */ 6 */
16#include <linux/component.h> 7#include <linux/component.h>
17#include <linux/device.h> 8#include <linux/device.h>
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 3bd0f8a18e74..2c5bbe317353 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -1,16 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * i.MX drm driver - LVDS display bridge 3 * i.MX drm driver - LVDS display bridge
3 * 4 *
4 * Copyright (C) 2012 Sascha Hauer, Pengutronix 5 * Copyright (C) 2012 Sascha Hauer, Pengutronix
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */ 6 */
15 7
16#include <linux/module.h> 8#include <linux/module.h>
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index cffd3310240e..293dd5752583 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -1,16 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * i.MX drm driver - Television Encoder (TVEv2) 3 * i.MX drm driver - Television Encoder (TVEv2)
3 * 4 *
4 * Copyright (C) 2013 Philipp Zabel, Pengutronix 5 * Copyright (C) 2013 Philipp Zabel, Pengutronix
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */ 6 */
15 7
16#include <linux/clk.h> 8#include <linux/clk.h>
@@ -442,7 +434,7 @@ static int clk_tve_di_set_rate(struct clk_hw *hw, unsigned long rate,
442 return 0; 434 return 0;
443} 435}
444 436
445static struct clk_ops clk_tve_di_ops = { 437static const struct clk_ops clk_tve_di_ops = {
446 .round_rate = clk_tve_di_round_rate, 438 .round_rate = clk_tve_di_round_rate,
447 .set_rate = clk_tve_di_set_rate, 439 .set_rate = clk_tve_di_set_rate,
448 .recalc_rate = clk_tve_di_recalc_rate, 440 .recalc_rate = clk_tve_di_recalc_rate,
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 7d4b710b837a..058b53c0aa7e 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -1,16 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * i.MX IPUv3 Graphics driver 3 * i.MX IPUv3 Graphics driver
3 * 4 *
4 * Copyright (C) 2011 Sascha Hauer, Pengutronix 5 * Copyright (C) 2011 Sascha Hauer, Pengutronix
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */ 6 */
15#include <linux/component.h> 7#include <linux/component.h>
16#include <linux/module.h> 8#include <linux/module.h>
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 40605fdf0e33..c390924de93d 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -1,16 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * i.MX IPUv3 DP Overlay Planes 3 * i.MX IPUv3 DP Overlay Planes
3 * 4 *
4 * Copyright (C) 2013 Philipp Zabel, Pengutronix 5 * Copyright (C) 2013 Philipp Zabel, Pengutronix
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */ 6 */
15 7
16#include <drm/drmP.h> 8#include <drm/drmP.h>
@@ -236,9 +228,15 @@ static void ipu_plane_enable(struct ipu_plane *ipu_plane)
236 228
237void ipu_plane_disable(struct ipu_plane *ipu_plane, bool disable_dp_channel) 229void ipu_plane_disable(struct ipu_plane *ipu_plane, bool disable_dp_channel)
238{ 230{
231 int ret;
232
239 DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__); 233 DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
240 234
241 ipu_idmac_wait_busy(ipu_plane->ipu_ch, 50); 235 ret = ipu_idmac_wait_busy(ipu_plane->ipu_ch, 50);
236 if (ret == -ETIMEDOUT) {
237 DRM_ERROR("[PLANE:%d] IDMAC timeout\n",
238 ipu_plane->base.base.id);
239 }
242 240
243 if (ipu_plane->dp && disable_dp_channel) 241 if (ipu_plane->dp && disable_dp_channel)
244 ipu_dp_disable_channel(ipu_plane->dp, false); 242 ipu_dp_disable_channel(ipu_plane->dp, false);
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index aefd04e18f93..f3ce51121dd6 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -1,16 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * i.MX drm driver - parallel display implementation 3 * i.MX drm driver - parallel display implementation
3 * 4 *
4 * Copyright (C) 2012 Sascha Hauer, Pengutronix 5 * Copyright (C) 2012 Sascha Hauer, Pengutronix
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */ 6 */
15 7
16#include <linux/component.h> 8#include <linux/component.h>
diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index a9d2501500a1..163fadb8a33a 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -259,6 +259,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_high_priority);
259 259
260void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf) 260void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf)
261{ 261{
262 WARN_ON_ONCE(buf & 0x7);
263
262 if (bufnum) 264 if (bufnum)
263 ipu_ch_param_write_field(ch, IPU_FIELD_EBA1, buf >> 3); 265 ipu_ch_param_write_field(ch, IPU_FIELD_EBA1, buf >> 3);
264 else 266 else
@@ -268,6 +270,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_buffer);
268 270
269void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off) 271void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off)
270{ 272{
273 WARN_ON_ONCE((u_off & 0x7) || (v_off & 0x7));
274
271 ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_off / 8); 275 ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_off / 8);
272 ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_off / 8); 276 ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_off / 8);
273} 277}
@@ -435,6 +439,8 @@ void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch,
435 unsigned int uv_stride, 439 unsigned int uv_stride,
436 unsigned int u_offset, unsigned int v_offset) 440 unsigned int u_offset, unsigned int v_offset)
437{ 441{
442 WARN_ON_ONCE((u_offset & 0x7) || (v_offset & 0x7));
443
438 ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, uv_stride - 1); 444 ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, uv_stride - 1);
439 ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8); 445 ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8);
440 ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8); 446 ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8);
@@ -739,48 +745,56 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image)
739 switch (pix->pixelformat) { 745 switch (pix->pixelformat) {
740 case V4L2_PIX_FMT_YUV420: 746 case V4L2_PIX_FMT_YUV420:
741 offset = Y_OFFSET(pix, image->rect.left, image->rect.top); 747 offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
742 u_offset = U_OFFSET(pix, image->rect.left, 748 u_offset = image->u_offset ?
743 image->rect.top) - offset; 749 image->u_offset : U_OFFSET(pix, image->rect.left,
744 v_offset = V_OFFSET(pix, image->rect.left, 750 image->rect.top) - offset;
745 image->rect.top) - offset; 751 v_offset = image->v_offset ?
752 image->v_offset : V_OFFSET(pix, image->rect.left,
753 image->rect.top) - offset;
746 754
747 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, 755 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
748 u_offset, v_offset); 756 u_offset, v_offset);
749 break; 757 break;
750 case V4L2_PIX_FMT_YVU420: 758 case V4L2_PIX_FMT_YVU420:
751 offset = Y_OFFSET(pix, image->rect.left, image->rect.top); 759 offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
752 u_offset = U_OFFSET(pix, image->rect.left, 760 u_offset = image->u_offset ?
753 image->rect.top) - offset; 761 image->u_offset : V_OFFSET(pix, image->rect.left,
754 v_offset = V_OFFSET(pix, image->rect.left, 762 image->rect.top) - offset;
755 image->rect.top) - offset; 763 v_offset = image->v_offset ?
764 image->v_offset : U_OFFSET(pix, image->rect.left,
765 image->rect.top) - offset;
756 766
757 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, 767 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
758 v_offset, u_offset); 768 u_offset, v_offset);
759 break; 769 break;
760 case V4L2_PIX_FMT_YUV422P: 770 case V4L2_PIX_FMT_YUV422P:
761 offset = Y_OFFSET(pix, image->rect.left, image->rect.top); 771 offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
762 u_offset = U2_OFFSET(pix, image->rect.left, 772 u_offset = image->u_offset ?
763 image->rect.top) - offset; 773 image->u_offset : U2_OFFSET(pix, image->rect.left,
764 v_offset = V2_OFFSET(pix, image->rect.left, 774 image->rect.top) - offset;
765 image->rect.top) - offset; 775 v_offset = image->v_offset ?
776 image->v_offset : V2_OFFSET(pix, image->rect.left,
777 image->rect.top) - offset;
766 778
767 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, 779 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
768 u_offset, v_offset); 780 u_offset, v_offset);
769 break; 781 break;
770 case V4L2_PIX_FMT_NV12: 782 case V4L2_PIX_FMT_NV12:
771 offset = Y_OFFSET(pix, image->rect.left, image->rect.top); 783 offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
772 u_offset = UV_OFFSET(pix, image->rect.left, 784 u_offset = image->u_offset ?
773 image->rect.top) - offset; 785 image->u_offset : UV_OFFSET(pix, image->rect.left,
774 v_offset = 0; 786 image->rect.top) - offset;
787 v_offset = image->v_offset ? image->v_offset : 0;
775 788
776 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline, 789 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline,
777 u_offset, v_offset); 790 u_offset, v_offset);
778 break; 791 break;
779 case V4L2_PIX_FMT_NV16: 792 case V4L2_PIX_FMT_NV16:
780 offset = Y_OFFSET(pix, image->rect.left, image->rect.top); 793 offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
781 u_offset = UV2_OFFSET(pix, image->rect.left, 794 u_offset = image->u_offset ?
782 image->rect.top) - offset; 795 image->u_offset : UV2_OFFSET(pix, image->rect.left,
783 v_offset = 0; 796 image->rect.top) - offset;
797 v_offset = image->v_offset ? image->v_offset : 0;
784 798
785 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline, 799 ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline,
786 u_offset, v_offset); 800 u_offset, v_offset);
diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c
index 67cc820253a9..594c3cbc8291 100644
--- a/drivers/gpu/ipu-v3/ipu-ic.c
+++ b/drivers/gpu/ipu-v3/ipu-ic.c
@@ -442,36 +442,40 @@ unlock:
442} 442}
443EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init); 443EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init);
444 444
445int ipu_ic_task_init(struct ipu_ic *ic, 445int ipu_ic_task_init_rsc(struct ipu_ic *ic,
446 int in_width, int in_height, 446 int in_width, int in_height,
447 int out_width, int out_height, 447 int out_width, int out_height,
448 enum ipu_color_space in_cs, 448 enum ipu_color_space in_cs,
449 enum ipu_color_space out_cs) 449 enum ipu_color_space out_cs,
450 u32 rsc)
450{ 451{
451 struct ipu_ic_priv *priv = ic->priv; 452 struct ipu_ic_priv *priv = ic->priv;
452 u32 reg, downsize_coeff, resize_coeff; 453 u32 downsize_coeff, resize_coeff;
453 unsigned long flags; 454 unsigned long flags;
454 int ret = 0; 455 int ret = 0;
455 456
456 /* Setup vertical resizing */ 457 if (!rsc) {
457 ret = calc_resize_coeffs(ic, in_height, out_height, 458 /* Setup vertical resizing */
458 &resize_coeff, &downsize_coeff);
459 if (ret)
460 return ret;
461 459
462 reg = (downsize_coeff << 30) | (resize_coeff << 16); 460 ret = calc_resize_coeffs(ic, in_height, out_height,
461 &resize_coeff, &downsize_coeff);
462 if (ret)
463 return ret;
464
465 rsc = (downsize_coeff << 30) | (resize_coeff << 16);
463 466
464 /* Setup horizontal resizing */ 467 /* Setup horizontal resizing */
465 ret = calc_resize_coeffs(ic, in_width, out_width, 468 ret = calc_resize_coeffs(ic, in_width, out_width,
466 &resize_coeff, &downsize_coeff); 469 &resize_coeff, &downsize_coeff);
467 if (ret) 470 if (ret)
468 return ret; 471 return ret;
469 472
470 reg |= (downsize_coeff << 14) | resize_coeff; 473 rsc |= (downsize_coeff << 14) | resize_coeff;
474 }
471 475
472 spin_lock_irqsave(&priv->lock, flags); 476 spin_lock_irqsave(&priv->lock, flags);
473 477
474 ipu_ic_write(ic, reg, ic->reg->rsc); 478 ipu_ic_write(ic, rsc, ic->reg->rsc);
475 479
476 /* Setup color space conversion */ 480 /* Setup color space conversion */
477 ic->in_cs = in_cs; 481 ic->in_cs = in_cs;
@@ -487,6 +491,16 @@ unlock:
487 spin_unlock_irqrestore(&priv->lock, flags); 491 spin_unlock_irqrestore(&priv->lock, flags);
488 return ret; 492 return ret;
489} 493}
494
495int ipu_ic_task_init(struct ipu_ic *ic,
496 int in_width, int in_height,
497 int out_width, int out_height,
498 enum ipu_color_space in_cs,
499 enum ipu_color_space out_cs)
500{
501 return ipu_ic_task_init_rsc(ic, in_width, in_height, out_width,
502 out_height, in_cs, out_cs, 0);
503}
490EXPORT_SYMBOL_GPL(ipu_ic_task_init); 504EXPORT_SYMBOL_GPL(ipu_ic_task_init);
491 505
492int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel, 506int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel,
diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index f4081962784c..13103ab86050 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -37,17 +37,36 @@
37 * when double_buffering boolean is set). 37 * when double_buffering boolean is set).
38 * 38 *
39 * Note that the input frame must be split up into the same number 39 * Note that the input frame must be split up into the same number
40 * of tiles as the output frame. 40 * of tiles as the output frame:
41 * 41 *
42 * FIXME: at this point there is no attempt to deal with visible seams 42 * +---------+-----+
43 * at the tile boundaries when upscaling. The seams are caused by a reset 43 * +-----+---+ | A | B |
44 * of the bilinear upscale interpolation when starting a new tile. The 44 * | A | B | | | |
45 * seams are barely visible for small upscale factors, but become 45 * +-----+---+ --> +---------+-----+
46 * increasingly visible as the upscale factor gets larger, since more 46 * | C | D | | C | D |
47 * interpolated pixels get thrown out at the tile boundaries. A possilble 47 * +-----+---+ | | |
48 * fix might be to overlap tiles of different sizes, but this must be done 48 * +---------+-----+
49 * while also maintaining the IDMAC dma buffer address alignment and 8x8 IRT 49 *
50 * alignment restrictions of each tile. 50 * Clockwise 90° rotations are handled by first rescaling into a
51 * reusable temporary tile buffer and then rotating with the 8x8
52 * block rotator, writing to the correct destination:
53 *
54 * +-----+-----+
55 * | | |
56 * +-----+---+ +---------+ | C | A |
57 * | A | B | | A,B, | | | | |
58 * +-----+---+ --> | C,D | | --> | | |
59 * | C | D | +---------+ +-----+-----+
60 * +-----+---+ | D | B |
61 * | | |
62 * +-----+-----+
63 *
64 * If the 8x8 block rotator is used, horizontal or vertical flipping
65 * is done during the rotation step, otherwise flipping is done
66 * during the scaling step.
67 * With rotation or flipping, tile order changes between input and
68 * output image. Tiles are numbered row major from top left to bottom
69 * right for both input and output image.
51 */ 70 */
52 71
53#define MAX_STRIPES_W 4 72#define MAX_STRIPES_W 4
@@ -84,6 +103,8 @@ struct ipu_image_convert_dma_chan {
84struct ipu_image_tile { 103struct ipu_image_tile {
85 u32 width; 104 u32 width;
86 u32 height; 105 u32 height;
106 u32 left;
107 u32 top;
87 /* size and strides are in bytes */ 108 /* size and strides are in bytes */
88 u32 size; 109 u32 size;
89 u32 stride; 110 u32 stride;
@@ -135,6 +156,12 @@ struct ipu_image_convert_ctx {
135 struct ipu_image_convert_image in; 156 struct ipu_image_convert_image in;
136 struct ipu_image_convert_image out; 157 struct ipu_image_convert_image out;
137 enum ipu_rotate_mode rot_mode; 158 enum ipu_rotate_mode rot_mode;
159 u32 downsize_coeff_h;
160 u32 downsize_coeff_v;
161 u32 image_resize_coeff_h;
162 u32 image_resize_coeff_v;
163 u32 resize_coeffs_h[MAX_STRIPES_W];
164 u32 resize_coeffs_v[MAX_STRIPES_H];
138 165
139 /* intermediate buffer for rotation */ 166 /* intermediate buffer for rotation */
140 struct ipu_image_convert_dma_buf rot_intermediate[2]; 167 struct ipu_image_convert_dma_buf rot_intermediate[2];
@@ -300,12 +327,11 @@ static void dump_format(struct ipu_image_convert_ctx *ctx,
300 struct ipu_image_convert_priv *priv = chan->priv; 327 struct ipu_image_convert_priv *priv = chan->priv;
301 328
302 dev_dbg(priv->ipu->dev, 329 dev_dbg(priv->ipu->dev,
303 "task %u: ctx %p: %s format: %dx%d (%dx%d tiles of size %dx%d), %c%c%c%c\n", 330 "task %u: ctx %p: %s format: %dx%d (%dx%d tiles), %c%c%c%c\n",
304 chan->ic_task, ctx, 331 chan->ic_task, ctx,
305 ic_image->type == IMAGE_CONVERT_OUT ? "Output" : "Input", 332 ic_image->type == IMAGE_CONVERT_OUT ? "Output" : "Input",
306 ic_image->base.pix.width, ic_image->base.pix.height, 333 ic_image->base.pix.width, ic_image->base.pix.height,
307 ic_image->num_cols, ic_image->num_rows, 334 ic_image->num_cols, ic_image->num_rows,
308 ic_image->tile[0].width, ic_image->tile[0].height,
309 ic_image->fmt->fourcc & 0xff, 335 ic_image->fmt->fourcc & 0xff,
310 (ic_image->fmt->fourcc >> 8) & 0xff, 336 (ic_image->fmt->fourcc >> 8) & 0xff,
311 (ic_image->fmt->fourcc >> 16) & 0xff, 337 (ic_image->fmt->fourcc >> 16) & 0xff,
@@ -353,24 +379,459 @@ static int alloc_dma_buf(struct ipu_image_convert_priv *priv,
353 379
354static inline int num_stripes(int dim) 380static inline int num_stripes(int dim)
355{ 381{
356 if (dim <= 1024) 382 return (dim - 1) / 1024 + 1;
357 return 1; 383}
358 else if (dim <= 2048) 384
385/*
386 * Calculate downsizing coefficients, which are the same for all tiles,
387 * and bilinear resizing coefficients, which are used to find the best
388 * seam positions.
389 */
390static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
391 struct ipu_image *in,
392 struct ipu_image *out)
393{
394 u32 downsized_width = in->rect.width;
395 u32 downsized_height = in->rect.height;
396 u32 downsize_coeff_v = 0;
397 u32 downsize_coeff_h = 0;
398 u32 resized_width = out->rect.width;
399 u32 resized_height = out->rect.height;
400 u32 resize_coeff_h;
401 u32 resize_coeff_v;
402
403 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
404 resized_width = out->rect.height;
405 resized_height = out->rect.width;
406 }
407
408 /* Do not let invalid input lead to an endless loop below */
409 if (WARN_ON(resized_width == 0 || resized_height == 0))
410 return -EINVAL;
411
412 while (downsized_width >= resized_width * 2) {
413 downsized_width >>= 1;
414 downsize_coeff_h++;
415 }
416
417 while (downsized_height >= resized_height * 2) {
418 downsized_height >>= 1;
419 downsize_coeff_v++;
420 }
421
422 /*
423 * Calculate the bilinear resizing coefficients that could be used if
424 * we were converting with a single tile. The bottom right output pixel
425 * should sample as close as possible to the bottom right input pixel
426 * out of the decimator, but not overshoot it:
427 */
428 resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1);
429 resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1);
430
431 dev_dbg(ctx->chan->priv->ipu->dev,
432 "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u tiles\n",
433 __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v,
434 resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows);
435
436 if (downsize_coeff_h > 2 || downsize_coeff_v > 2 ||
437 resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff)
438 return -EINVAL;
439
440 ctx->downsize_coeff_h = downsize_coeff_h;
441 ctx->downsize_coeff_v = downsize_coeff_v;
442 ctx->image_resize_coeff_h = resize_coeff_h;
443 ctx->image_resize_coeff_v = resize_coeff_v;
444
445 return 0;
446}
447
448#define round_closest(x, y) round_down((x) + (y)/2, (y))
449
450/*
451 * Find the best aligned seam position in the inverval [out_start, out_end].
452 * Rotation and image offsets are out of scope.
453 *
454 * @out_start: start of inverval, must be within 1024 pixels / lines
455 * of out_end
456 * @out_end: end of interval, smaller than or equal to out_edge
457 * @in_edge: input right / bottom edge
458 * @out_edge: output right / bottom edge
459 * @in_align: input alignment, either horizontal 8-byte line start address
460 * alignment, or pixel alignment due to image format
461 * @out_align: output alignment, either horizontal 8-byte line start address
462 * alignment, or pixel alignment due to image format or rotator
463 * block size
464 * @in_burst: horizontal input burst size in case of horizontal flip
465 * @out_burst: horizontal output burst size or rotator block size
466 * @downsize_coeff: downsizing section coefficient
467 * @resize_coeff: main processing section resizing coefficient
468 * @_in_seam: aligned input seam position return value
469 * @_out_seam: aligned output seam position return value
470 */
471static void find_best_seam(struct ipu_image_convert_ctx *ctx,
472 unsigned int out_start,
473 unsigned int out_end,
474 unsigned int in_edge,
475 unsigned int out_edge,
476 unsigned int in_align,
477 unsigned int out_align,
478 unsigned int in_burst,
479 unsigned int out_burst,
480 unsigned int downsize_coeff,
481 unsigned int resize_coeff,
482 u32 *_in_seam,
483 u32 *_out_seam)
484{
485 struct device *dev = ctx->chan->priv->ipu->dev;
486 unsigned int out_pos;
487 /* Input / output seam position candidates */
488 unsigned int out_seam = 0;
489 unsigned int in_seam = 0;
490 unsigned int min_diff = UINT_MAX;
491
492 /*
493 * Output tiles must start at a multiple of 8 bytes horizontally and
494 * possibly at an even line horizontally depending on the pixel format.
495 * Only consider output aligned positions for the seam.
496 */
497 out_start = round_up(out_start, out_align);
498 for (out_pos = out_start; out_pos < out_end; out_pos += out_align) {
499 unsigned int in_pos;
500 unsigned int in_pos_aligned;
501 unsigned int abs_diff;
502
503 /*
504 * Tiles in the right row / bottom column may not be allowed to
505 * overshoot horizontally / vertically. out_burst may be the
506 * actual DMA burst size, or the rotator block size.
507 */
508 if ((out_burst > 1) && (out_edge - out_pos) % out_burst)
509 continue;
510
511 /*
512 * Input sample position, corresponding to out_pos, 19.13 fixed
513 * point.
514 */
515 in_pos = (out_pos * resize_coeff) << downsize_coeff;
516 /*
517 * The closest input sample position that we could actually
518 * start the input tile at, 19.13 fixed point.
519 */
520 in_pos_aligned = round_closest(in_pos, 8192U * in_align);
521
522 if ((in_burst > 1) &&
523 (in_edge - in_pos_aligned / 8192U) % in_burst)
524 continue;
525
526 if (in_pos < in_pos_aligned)
527 abs_diff = in_pos_aligned - in_pos;
528 else
529 abs_diff = in_pos - in_pos_aligned;
530
531 if (abs_diff < min_diff) {
532 in_seam = in_pos_aligned;
533 out_seam = out_pos;
534 min_diff = abs_diff;
535 }
536 }
537
538 *_out_seam = out_seam;
539 /* Convert 19.13 fixed point to integer seam position */
540 *_in_seam = DIV_ROUND_CLOSEST(in_seam, 8192U);
541
542 dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) diff %u.%03u\n",
543 __func__, out_seam, out_align, out_start, out_end,
544 *_in_seam, in_align, min_diff / 8192,
545 DIV_ROUND_CLOSEST(min_diff % 8192 * 1000, 8192));
546}
547
548/*
549 * Tile left edges are required to be aligned to multiples of 8 bytes
550 * by the IDMAC.
551 */
552static inline u32 tile_left_align(const struct ipu_image_pixfmt *fmt)
553{
554 if (fmt->planar)
555 return fmt->uv_packed ? 8 : 8 * fmt->uv_width_dec;
556 else
557 return fmt->bpp == 32 ? 2 : fmt->bpp == 16 ? 4 : 8;
558}
559
560/*
561 * Tile top edge alignment is only limited by chroma subsampling.
562 */
563static inline u32 tile_top_align(const struct ipu_image_pixfmt *fmt)
564{
565 return fmt->uv_height_dec > 1 ? 2 : 1;
566}
567
568static inline u32 tile_width_align(enum ipu_image_convert_type type,
569 const struct ipu_image_pixfmt *fmt,
570 enum ipu_rotate_mode rot_mode)
571{
572 if (type == IMAGE_CONVERT_IN) {
573 /*
574 * The IC burst reads 8 pixels at a time. Reading beyond the
575 * end of the line is usually acceptable. Those pixels are
576 * ignored, unless the IC has to write the scaled line in
577 * reverse.
578 */
579 return (!ipu_rot_mode_is_irt(rot_mode) &&
580 (rot_mode & IPU_ROT_BIT_HFLIP)) ? 8 : 2;
581 }
582
583 /*
584 * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled
585 * formats to guarantee 8-byte aligned line start addresses in the
586 * chroma planes when IRT is used. Align to 8x8 pixel IRT block size
587 * for all other formats.
588 */
589 return (ipu_rot_mode_is_irt(rot_mode) &&
590 fmt->planar && !fmt->uv_packed) ?
591 8 * fmt->uv_width_dec : 8;
592}
593
594static inline u32 tile_height_align(enum ipu_image_convert_type type,
595 const struct ipu_image_pixfmt *fmt,
596 enum ipu_rotate_mode rot_mode)
597{
598 if (type == IMAGE_CONVERT_IN || !ipu_rot_mode_is_irt(rot_mode))
359 return 2; 599 return 2;
600
601 /*
602 * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled
603 * formats to guarantee 8-byte aligned line start addresses in the
604 * chroma planes when IRT is used. Align to 8x8 pixel IRT block size
605 * for all other formats.
606 */
607 return (fmt->planar && !fmt->uv_packed) ? 8 * fmt->uv_width_dec : 8;
608}
609
610/*
611 * Fill in left position and width and for all tiles in an input column, and
612 * for all corresponding output tiles. If the 90° rotator is used, the output
613 * tiles are in a row, and output tile top position and height are set.
614 */
615static void fill_tile_column(struct ipu_image_convert_ctx *ctx,
616 unsigned int col,
617 struct ipu_image_convert_image *in,
618 unsigned int in_left, unsigned int in_width,
619 struct ipu_image_convert_image *out,
620 unsigned int out_left, unsigned int out_width)
621{
622 unsigned int row, tile_idx;
623 struct ipu_image_tile *in_tile, *out_tile;
624
625 for (row = 0; row < in->num_rows; row++) {
626 tile_idx = in->num_cols * row + col;
627 in_tile = &in->tile[tile_idx];
628 out_tile = &out->tile[ctx->out_tile_map[tile_idx]];
629
630 in_tile->left = in_left;
631 in_tile->width = in_width;
632
633 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
634 out_tile->top = out_left;
635 out_tile->height = out_width;
636 } else {
637 out_tile->left = out_left;
638 out_tile->width = out_width;
639 }
640 }
641}
642
643/*
644 * Fill in top position and height and for all tiles in an input row, and
645 * for all corresponding output tiles. If the 90° rotator is used, the output
646 * tiles are in a column, and output tile left position and width are set.
647 */
648static void fill_tile_row(struct ipu_image_convert_ctx *ctx, unsigned int row,
649 struct ipu_image_convert_image *in,
650 unsigned int in_top, unsigned int in_height,
651 struct ipu_image_convert_image *out,
652 unsigned int out_top, unsigned int out_height)
653{
654 unsigned int col, tile_idx;
655 struct ipu_image_tile *in_tile, *out_tile;
656
657 for (col = 0; col < in->num_cols; col++) {
658 tile_idx = in->num_cols * row + col;
659 in_tile = &in->tile[tile_idx];
660 out_tile = &out->tile[ctx->out_tile_map[tile_idx]];
661
662 in_tile->top = in_top;
663 in_tile->height = in_height;
664
665 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
666 out_tile->left = out_top;
667 out_tile->width = out_height;
668 } else {
669 out_tile->top = out_top;
670 out_tile->height = out_height;
671 }
672 }
673}
674
675/*
676 * Find the best horizontal and vertical seam positions to split into tiles.
677 * Minimize the fractional part of the input sampling position for the
678 * top / left pixels of each tile.
679 */
680static void find_seams(struct ipu_image_convert_ctx *ctx,
681 struct ipu_image_convert_image *in,
682 struct ipu_image_convert_image *out)
683{
684 struct device *dev = ctx->chan->priv->ipu->dev;
685 unsigned int resized_width = out->base.rect.width;
686 unsigned int resized_height = out->base.rect.height;
687 unsigned int col;
688 unsigned int row;
689 unsigned int in_left_align = tile_left_align(in->fmt);
690 unsigned int in_top_align = tile_top_align(in->fmt);
691 unsigned int out_left_align = tile_left_align(out->fmt);
692 unsigned int out_top_align = tile_top_align(out->fmt);
693 unsigned int out_width_align = tile_width_align(out->type, out->fmt,
694 ctx->rot_mode);
695 unsigned int out_height_align = tile_height_align(out->type, out->fmt,
696 ctx->rot_mode);
697 unsigned int in_right = in->base.rect.width;
698 unsigned int in_bottom = in->base.rect.height;
699 unsigned int out_right = out->base.rect.width;
700 unsigned int out_bottom = out->base.rect.height;
701 unsigned int flipped_out_left;
702 unsigned int flipped_out_top;
703
704 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
705 /* Switch width/height and align top left to IRT block size */
706 resized_width = out->base.rect.height;
707 resized_height = out->base.rect.width;
708 out_left_align = out_height_align;
709 out_top_align = out_width_align;
710 out_width_align = out_left_align;
711 out_height_align = out_top_align;
712 out_right = out->base.rect.height;
713 out_bottom = out->base.rect.width;
714 }
715
716 for (col = in->num_cols - 1; col > 0; col--) {
717 bool allow_in_overshoot = ipu_rot_mode_is_irt(ctx->rot_mode) ||
718 !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
719 bool allow_out_overshoot = (col < in->num_cols - 1) &&
720 !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
721 unsigned int out_start;
722 unsigned int out_end;
723 unsigned int in_left;
724 unsigned int out_left;
725
726 /*
727 * Align input width to burst length if the scaling step flips
728 * horizontally.
729 */
730
731 /* Start within 1024 pixels of the right edge */
732 out_start = max_t(int, 0, out_right - 1024);
733 /* End before having to add more columns to the left */
734 out_end = min_t(unsigned int, out_right, col * 1024);
735
736 find_best_seam(ctx, out_start, out_end,
737 in_right, out_right,
738 in_left_align, out_left_align,
739 allow_in_overshoot ? 1 : 8 /* burst length */,
740 allow_out_overshoot ? 1 : out_width_align,
741 ctx->downsize_coeff_h, ctx->image_resize_coeff_h,
742 &in_left, &out_left);
743
744 if (ctx->rot_mode & IPU_ROT_BIT_HFLIP)
745 flipped_out_left = resized_width - out_right;
746 else
747 flipped_out_left = out_left;
748
749 fill_tile_column(ctx, col, in, in_left, in_right - in_left,
750 out, flipped_out_left, out_right - out_left);
751
752 dev_dbg(dev, "%s: col %u: %u, %u -> %u, %u\n", __func__, col,
753 in_left, in_right - in_left,
754 flipped_out_left, out_right - out_left);
755
756 in_right = in_left;
757 out_right = out_left;
758 }
759
760 flipped_out_left = (ctx->rot_mode & IPU_ROT_BIT_HFLIP) ?
761 resized_width - out_right : 0;
762
763 fill_tile_column(ctx, 0, in, 0, in_right,
764 out, flipped_out_left, out_right);
765
766 dev_dbg(dev, "%s: col 0: 0, %u -> %u, %u\n", __func__,
767 in_right, flipped_out_left, out_right);
768
769 for (row = in->num_rows - 1; row > 0; row--) {
770 bool allow_overshoot = row < in->num_rows - 1;
771 unsigned int out_start;
772 unsigned int out_end;
773 unsigned int in_top;
774 unsigned int out_top;
775
776 /* Start within 1024 lines of the bottom edge */
777 out_start = max_t(int, 0, out_bottom - 1024);
778 /* End before having to add more rows above */
779 out_end = min_t(unsigned int, out_bottom, row * 1024);
780
781 find_best_seam(ctx, out_start, out_end,
782 in_bottom, out_bottom,
783 in_top_align, out_top_align,
784 1, allow_overshoot ? 1 : out_height_align,
785 ctx->downsize_coeff_v, ctx->image_resize_coeff_v,
786 &in_top, &out_top);
787
788 if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^
789 ipu_rot_mode_is_irt(ctx->rot_mode))
790 flipped_out_top = resized_height - out_bottom;
791 else
792 flipped_out_top = out_top;
793
794 fill_tile_row(ctx, row, in, in_top, in_bottom - in_top,
795 out, flipped_out_top, out_bottom - out_top);
796
797 dev_dbg(dev, "%s: row %u: %u, %u -> %u, %u\n", __func__, row,
798 in_top, in_bottom - in_top,
799 flipped_out_top, out_bottom - out_top);
800
801 in_bottom = in_top;
802 out_bottom = out_top;
803 }
804
805 if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^
806 ipu_rot_mode_is_irt(ctx->rot_mode))
807 flipped_out_top = resized_height - out_bottom;
360 else 808 else
361 return 4; 809 flipped_out_top = 0;
810
811 fill_tile_row(ctx, 0, in, 0, in_bottom,
812 out, flipped_out_top, out_bottom);
813
814 dev_dbg(dev, "%s: row 0: 0, %u -> %u, %u\n", __func__,
815 in_bottom, flipped_out_top, out_bottom);
362} 816}
363 817
364static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, 818static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
365 struct ipu_image_convert_image *image) 819 struct ipu_image_convert_image *image)
366{ 820{
367 int i; 821 struct ipu_image_convert_chan *chan = ctx->chan;
822 struct ipu_image_convert_priv *priv = chan->priv;
823 unsigned int i;
368 824
369 for (i = 0; i < ctx->num_tiles; i++) { 825 for (i = 0; i < ctx->num_tiles; i++) {
370 struct ipu_image_tile *tile = &image->tile[i]; 826 struct ipu_image_tile *tile;
827 const unsigned int row = i / image->num_cols;
828 const unsigned int col = i % image->num_cols;
829
830 if (image->type == IMAGE_CONVERT_OUT)
831 tile = &image->tile[ctx->out_tile_map[i]];
832 else
833 tile = &image->tile[i];
371 834
372 tile->height = image->base.pix.height / image->num_rows;
373 tile->width = image->base.pix.width / image->num_cols;
374 tile->size = ((tile->height * image->fmt->bpp) >> 3) * 835 tile->size = ((tile->height * image->fmt->bpp) >> 3) *
375 tile->width; 836 tile->width;
376 837
@@ -383,6 +844,13 @@ static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
383 tile->rot_stride = 844 tile->rot_stride =
384 (image->fmt->bpp * tile->height) >> 3; 845 (image->fmt->bpp * tile->height) >> 3;
385 } 846 }
847
848 dev_dbg(priv->ipu->dev,
849 "task %u: ctx %p: %s@[%u,%u]: %ux%u@%u,%u\n",
850 chan->ic_task, ctx,
851 image->type == IMAGE_CONVERT_IN ? "Input" : "Output",
852 row, col,
853 tile->width, tile->height, tile->left, tile->top);
386 } 854 }
387} 855}
388 856
@@ -459,14 +927,14 @@ static void calc_out_tile_map(struct ipu_image_convert_ctx *ctx)
459 } 927 }
460} 928}
461 929
462static void calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, 930static int calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx,
463 struct ipu_image_convert_image *image) 931 struct ipu_image_convert_image *image)
464{ 932{
465 struct ipu_image_convert_chan *chan = ctx->chan; 933 struct ipu_image_convert_chan *chan = ctx->chan;
466 struct ipu_image_convert_priv *priv = chan->priv; 934 struct ipu_image_convert_priv *priv = chan->priv;
467 const struct ipu_image_pixfmt *fmt = image->fmt; 935 const struct ipu_image_pixfmt *fmt = image->fmt;
468 unsigned int row, col, tile = 0; 936 unsigned int row, col, tile = 0;
469 u32 H, w, h, y_stride, uv_stride; 937 u32 H, top, y_stride, uv_stride;
470 u32 uv_row_off, uv_col_off, uv_off, u_off, v_off, tmp; 938 u32 uv_row_off, uv_col_off, uv_off, u_off, v_off, tmp;
471 u32 y_row_off, y_col_off, y_off; 939 u32 y_row_off, y_col_off, y_off;
472 u32 y_size, uv_size; 940 u32 y_size, uv_size;
@@ -483,13 +951,12 @@ static void calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx,
483 uv_size = y_size / (fmt->uv_width_dec * fmt->uv_height_dec); 951 uv_size = y_size / (fmt->uv_width_dec * fmt->uv_height_dec);
484 952
485 for (row = 0; row < image->num_rows; row++) { 953 for (row = 0; row < image->num_rows; row++) {
486 w = image->tile[tile].width; 954 top = image->tile[tile].top;
487 h = image->tile[tile].height; 955 y_row_off = top * y_stride;
488 y_row_off = row * h * y_stride; 956 uv_row_off = (top * uv_stride) / fmt->uv_height_dec;
489 uv_row_off = (row * h * uv_stride) / fmt->uv_height_dec;
490 957
491 for (col = 0; col < image->num_cols; col++) { 958 for (col = 0; col < image->num_cols; col++) {
492 y_col_off = col * w; 959 y_col_off = image->tile[tile].left;
493 uv_col_off = y_col_off / fmt->uv_width_dec; 960 uv_col_off = y_col_off / fmt->uv_width_dec;
494 if (fmt->uv_packed) 961 if (fmt->uv_packed)
495 uv_col_off *= 2; 962 uv_col_off *= 2;
@@ -509,24 +976,30 @@ static void calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx,
509 image->tile[tile].u_off = u_off; 976 image->tile[tile].u_off = u_off;
510 image->tile[tile++].v_off = v_off; 977 image->tile[tile++].v_off = v_off;
511 978
512 dev_dbg(priv->ipu->dev, 979 if ((y_off & 0x7) || (u_off & 0x7) || (v_off & 0x7)) {
513 "task %u: ctx %p: %s@[%d,%d]: y_off %08x, u_off %08x, v_off %08x\n", 980 dev_err(priv->ipu->dev,
514 chan->ic_task, ctx, 981 "task %u: ctx %p: %s@[%d,%d]: "
515 image->type == IMAGE_CONVERT_IN ? 982 "y_off %08x, u_off %08x, v_off %08x\n",
516 "Input" : "Output", row, col, 983 chan->ic_task, ctx,
517 y_off, u_off, v_off); 984 image->type == IMAGE_CONVERT_IN ?
985 "Input" : "Output", row, col,
986 y_off, u_off, v_off);
987 return -EINVAL;
988 }
518 } 989 }
519 } 990 }
991
992 return 0;
520} 993}
521 994
522static void calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx, 995static int calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx,
523 struct ipu_image_convert_image *image) 996 struct ipu_image_convert_image *image)
524{ 997{
525 struct ipu_image_convert_chan *chan = ctx->chan; 998 struct ipu_image_convert_chan *chan = ctx->chan;
526 struct ipu_image_convert_priv *priv = chan->priv; 999 struct ipu_image_convert_priv *priv = chan->priv;
527 const struct ipu_image_pixfmt *fmt = image->fmt; 1000 const struct ipu_image_pixfmt *fmt = image->fmt;
528 unsigned int row, col, tile = 0; 1001 unsigned int row, col, tile = 0;
529 u32 w, h, bpp, stride; 1002 u32 bpp, stride, offset;
530 u32 row_off, col_off; 1003 u32 row_off, col_off;
531 1004
532 /* setup some convenience vars */ 1005 /* setup some convenience vars */
@@ -534,34 +1007,183 @@ static void calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx,
534 bpp = fmt->bpp; 1007 bpp = fmt->bpp;
535 1008
536 for (row = 0; row < image->num_rows; row++) { 1009 for (row = 0; row < image->num_rows; row++) {
537 w = image->tile[tile].width; 1010 row_off = image->tile[tile].top * stride;
538 h = image->tile[tile].height;
539 row_off = row * h * stride;
540 1011
541 for (col = 0; col < image->num_cols; col++) { 1012 for (col = 0; col < image->num_cols; col++) {
542 col_off = (col * w * bpp) >> 3; 1013 col_off = (image->tile[tile].left * bpp) >> 3;
1014
1015 offset = row_off + col_off;
543 1016
544 image->tile[tile].offset = row_off + col_off; 1017 image->tile[tile].offset = offset;
545 image->tile[tile].u_off = 0; 1018 image->tile[tile].u_off = 0;
546 image->tile[tile++].v_off = 0; 1019 image->tile[tile++].v_off = 0;
547 1020
548 dev_dbg(priv->ipu->dev, 1021 if (offset & 0x7) {
549 "task %u: ctx %p: %s@[%d,%d]: phys %08x\n", 1022 dev_err(priv->ipu->dev,
550 chan->ic_task, ctx, 1023 "task %u: ctx %p: %s@[%d,%d]: "
551 image->type == IMAGE_CONVERT_IN ? 1024 "phys %08x\n",
552 "Input" : "Output", row, col, 1025 chan->ic_task, ctx,
553 row_off + col_off); 1026 image->type == IMAGE_CONVERT_IN ?
1027 "Input" : "Output", row, col,
1028 row_off + col_off);
1029 return -EINVAL;
1030 }
554 } 1031 }
555 } 1032 }
1033
1034 return 0;
556} 1035}
557 1036
558static void calc_tile_offsets(struct ipu_image_convert_ctx *ctx, 1037static int calc_tile_offsets(struct ipu_image_convert_ctx *ctx,
559 struct ipu_image_convert_image *image) 1038 struct ipu_image_convert_image *image)
560{ 1039{
561 if (image->fmt->planar) 1040 if (image->fmt->planar)
562 calc_tile_offsets_planar(ctx, image); 1041 return calc_tile_offsets_planar(ctx, image);
1042
1043 return calc_tile_offsets_packed(ctx, image);
1044}
1045
1046/*
1047 * Calculate the resizing ratio for the IC main processing section given input
1048 * size, fixed downsizing coefficient, and output size.
1049 * Either round to closest for the next tile's first pixel to minimize seams
1050 * and distortion (for all but right column / bottom row), or round down to
1051 * avoid sampling beyond the edges of the input image for this tile's last
1052 * pixel.
1053 * Returns the resizing coefficient, resizing ratio is 8192.0 / resize_coeff.
1054 */
1055static u32 calc_resize_coeff(u32 input_size, u32 downsize_coeff,
1056 u32 output_size, bool allow_overshoot)
1057{
1058 u32 downsized = input_size >> downsize_coeff;
1059
1060 if (allow_overshoot)
1061 return DIV_ROUND_CLOSEST(8192 * downsized, output_size);
563 else 1062 else
564 calc_tile_offsets_packed(ctx, image); 1063 return 8192 * (downsized - 1) / (output_size - 1);
1064}
1065
1066/*
1067 * Slightly modify resize coefficients per tile to hide the bilinear
1068 * interpolator reset at tile borders, shifting the right / bottom edge
1069 * by up to a half input pixel. This removes noticeable seams between
1070 * tiles at higher upscaling factors.
1071 */
1072static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
1073{
1074 struct ipu_image_convert_chan *chan = ctx->chan;
1075 struct ipu_image_convert_priv *priv = chan->priv;
1076 struct ipu_image_tile *in_tile, *out_tile;
1077 unsigned int col, row, tile_idx;
1078 unsigned int last_output;
1079
1080 for (col = 0; col < ctx->in.num_cols; col++) {
1081 bool closest = (col < ctx->in.num_cols - 1) &&
1082 !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
1083 u32 resized_width;
1084 u32 resize_coeff_h;
1085
1086 tile_idx = col;
1087 in_tile = &ctx->in.tile[tile_idx];
1088 out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
1089
1090 if (ipu_rot_mode_is_irt(ctx->rot_mode))
1091 resized_width = out_tile->height;
1092 else
1093 resized_width = out_tile->width;
1094
1095 resize_coeff_h = calc_resize_coeff(in_tile->width,
1096 ctx->downsize_coeff_h,
1097 resized_width, closest);
1098
1099 dev_dbg(priv->ipu->dev, "%s: column %u hscale: *8192/%u\n",
1100 __func__, col, resize_coeff_h);
1101
1102
1103 for (row = 0; row < ctx->in.num_rows; row++) {
1104 tile_idx = row * ctx->in.num_cols + col;
1105 in_tile = &ctx->in.tile[tile_idx];
1106 out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
1107
1108 /*
1109 * With the horizontal scaling factor known, round up
1110 * resized width (output width or height) to burst size.
1111 */
1112 if (ipu_rot_mode_is_irt(ctx->rot_mode))
1113 out_tile->height = round_up(resized_width, 8);
1114 else
1115 out_tile->width = round_up(resized_width, 8);
1116
1117 /*
1118 * Calculate input width from the last accessed input
1119 * pixel given resized width and scaling coefficients.
1120 * Round up to burst size.
1121 */
1122 last_output = round_up(resized_width, 8) - 1;
1123 if (closest)
1124 last_output++;
1125 in_tile->width = round_up(
1126 (DIV_ROUND_UP(last_output * resize_coeff_h,
1127 8192) + 1)
1128 << ctx->downsize_coeff_h, 8);
1129 }
1130
1131 ctx->resize_coeffs_h[col] = resize_coeff_h;
1132 }
1133
1134 for (row = 0; row < ctx->in.num_rows; row++) {
1135 bool closest = (row < ctx->in.num_rows - 1) &&
1136 !(ctx->rot_mode & IPU_ROT_BIT_VFLIP);
1137 u32 resized_height;
1138 u32 resize_coeff_v;
1139
1140 tile_idx = row * ctx->in.num_cols;
1141 in_tile = &ctx->in.tile[tile_idx];
1142 out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
1143
1144 if (ipu_rot_mode_is_irt(ctx->rot_mode))
1145 resized_height = out_tile->width;
1146 else
1147 resized_height = out_tile->height;
1148
1149 resize_coeff_v = calc_resize_coeff(in_tile->height,
1150 ctx->downsize_coeff_v,
1151 resized_height, closest);
1152
1153 dev_dbg(priv->ipu->dev, "%s: row %u vscale: *8192/%u\n",
1154 __func__, row, resize_coeff_v);
1155
1156 for (col = 0; col < ctx->in.num_cols; col++) {
1157 tile_idx = row * ctx->in.num_cols + col;
1158 in_tile = &ctx->in.tile[tile_idx];
1159 out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
1160
1161 /*
1162 * With the vertical scaling factor known, round up
1163 * resized height (output width or height) to IDMAC
1164 * limitations.
1165 */
1166 if (ipu_rot_mode_is_irt(ctx->rot_mode))
1167 out_tile->width = round_up(resized_height, 2);
1168 else
1169 out_tile->height = round_up(resized_height, 2);
1170
1171 /*
1172 * Calculate input width from the last accessed input
1173 * pixel given resized height and scaling coefficients.
1174 * Align to IDMAC restrictions.
1175 */
1176 last_output = round_up(resized_height, 2) - 1;
1177 if (closest)
1178 last_output++;
1179 in_tile->height = round_up(
1180 (DIV_ROUND_UP(last_output * resize_coeff_v,
1181 8192) + 1)
1182 << ctx->downsize_coeff_v, 2);
1183 }
1184
1185 ctx->resize_coeffs_v[row] = resize_coeff_v;
1186 }
565} 1187}
566 1188
567/* 1189/*
@@ -611,7 +1233,8 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx,
611 struct ipuv3_channel *channel, 1233 struct ipuv3_channel *channel,
612 struct ipu_image_convert_image *image, 1234 struct ipu_image_convert_image *image,
613 enum ipu_rotate_mode rot_mode, 1235 enum ipu_rotate_mode rot_mode,
614 bool rot_swap_width_height) 1236 bool rot_swap_width_height,
1237 unsigned int tile)
615{ 1238{
616 struct ipu_image_convert_chan *chan = ctx->chan; 1239 struct ipu_image_convert_chan *chan = ctx->chan;
617 unsigned int burst_size; 1240 unsigned int burst_size;
@@ -621,23 +1244,23 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx,
621 unsigned int tile_idx[2]; 1244 unsigned int tile_idx[2];
622 1245
623 if (image->type == IMAGE_CONVERT_OUT) { 1246 if (image->type == IMAGE_CONVERT_OUT) {
624 tile_idx[0] = ctx->out_tile_map[0]; 1247 tile_idx[0] = ctx->out_tile_map[tile];
625 tile_idx[1] = ctx->out_tile_map[1]; 1248 tile_idx[1] = ctx->out_tile_map[1];
626 } else { 1249 } else {
627 tile_idx[0] = 0; 1250 tile_idx[0] = tile;
628 tile_idx[1] = 1; 1251 tile_idx[1] = 1;
629 } 1252 }
630 1253
631 if (rot_swap_width_height) { 1254 if (rot_swap_width_height) {
632 width = image->tile[0].height; 1255 width = image->tile[tile_idx[0]].height;
633 height = image->tile[0].width; 1256 height = image->tile[tile_idx[0]].width;
634 stride = image->tile[0].rot_stride; 1257 stride = image->tile[tile_idx[0]].rot_stride;
635 addr0 = ctx->rot_intermediate[0].phys; 1258 addr0 = ctx->rot_intermediate[0].phys;
636 if (ctx->double_buffering) 1259 if (ctx->double_buffering)
637 addr1 = ctx->rot_intermediate[1].phys; 1260 addr1 = ctx->rot_intermediate[1].phys;
638 } else { 1261 } else {
639 width = image->tile[0].width; 1262 width = image->tile[tile_idx[0]].width;
640 height = image->tile[0].height; 1263 height = image->tile[tile_idx[0]].height;
641 stride = image->stride; 1264 stride = image->stride;
642 addr0 = image->base.phys0 + 1265 addr0 = image->base.phys0 +
643 image->tile[tile_idx[0]].offset; 1266 image->tile[tile_idx[0]].offset;
@@ -655,12 +1278,12 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx,
655 tile_image.pix.pixelformat = image->fmt->fourcc; 1278 tile_image.pix.pixelformat = image->fmt->fourcc;
656 tile_image.phys0 = addr0; 1279 tile_image.phys0 = addr0;
657 tile_image.phys1 = addr1; 1280 tile_image.phys1 = addr1;
658 ipu_cpmem_set_image(channel, &tile_image); 1281 if (image->fmt->planar && !rot_swap_width_height) {
1282 tile_image.u_offset = image->tile[tile_idx[0]].u_off;
1283 tile_image.v_offset = image->tile[tile_idx[0]].v_off;
1284 }
659 1285
660 if (image->fmt->planar && !rot_swap_width_height) 1286 ipu_cpmem_set_image(channel, &tile_image);
661 ipu_cpmem_set_uv_offset(channel,
662 image->tile[tile_idx[0]].u_off,
663 image->tile[tile_idx[0]].v_off);
664 1287
665 if (rot_mode) 1288 if (rot_mode)
666 ipu_cpmem_set_rotation(channel, rot_mode); 1289 ipu_cpmem_set_rotation(channel, rot_mode);
@@ -687,7 +1310,7 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx,
687 ipu_idmac_set_double_buffer(channel, ctx->double_buffering); 1310 ipu_idmac_set_double_buffer(channel, ctx->double_buffering);
688} 1311}
689 1312
690static int convert_start(struct ipu_image_convert_run *run) 1313static int convert_start(struct ipu_image_convert_run *run, unsigned int tile)
691{ 1314{
692 struct ipu_image_convert_ctx *ctx = run->ctx; 1315 struct ipu_image_convert_ctx *ctx = run->ctx;
693 struct ipu_image_convert_chan *chan = ctx->chan; 1316 struct ipu_image_convert_chan *chan = ctx->chan;
@@ -695,31 +1318,47 @@ static int convert_start(struct ipu_image_convert_run *run)
695 struct ipu_image_convert_image *s_image = &ctx->in; 1318 struct ipu_image_convert_image *s_image = &ctx->in;
696 struct ipu_image_convert_image *d_image = &ctx->out; 1319 struct ipu_image_convert_image *d_image = &ctx->out;
697 enum ipu_color_space src_cs, dest_cs; 1320 enum ipu_color_space src_cs, dest_cs;
1321 unsigned int dst_tile = ctx->out_tile_map[tile];
698 unsigned int dest_width, dest_height; 1322 unsigned int dest_width, dest_height;
1323 unsigned int col, row;
1324 u32 rsc;
699 int ret; 1325 int ret;
700 1326
701 dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p\n", 1327 dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p tile %u -> %u\n",
702 __func__, chan->ic_task, ctx, run); 1328 __func__, chan->ic_task, ctx, run, tile, dst_tile);
703 1329
704 src_cs = ipu_pixelformat_to_colorspace(s_image->fmt->fourcc); 1330 src_cs = ipu_pixelformat_to_colorspace(s_image->fmt->fourcc);
705 dest_cs = ipu_pixelformat_to_colorspace(d_image->fmt->fourcc); 1331 dest_cs = ipu_pixelformat_to_colorspace(d_image->fmt->fourcc);
706 1332
707 if (ipu_rot_mode_is_irt(ctx->rot_mode)) { 1333 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
708 /* swap width/height for resizer */ 1334 /* swap width/height for resizer */
709 dest_width = d_image->tile[0].height; 1335 dest_width = d_image->tile[dst_tile].height;
710 dest_height = d_image->tile[0].width; 1336 dest_height = d_image->tile[dst_tile].width;
711 } else { 1337 } else {
712 dest_width = d_image->tile[0].width; 1338 dest_width = d_image->tile[dst_tile].width;
713 dest_height = d_image->tile[0].height; 1339 dest_height = d_image->tile[dst_tile].height;
714 } 1340 }
715 1341
1342 row = tile / s_image->num_cols;
1343 col = tile % s_image->num_cols;
1344
1345 rsc = (ctx->downsize_coeff_v << 30) |
1346 (ctx->resize_coeffs_v[row] << 16) |
1347 (ctx->downsize_coeff_h << 14) |
1348 (ctx->resize_coeffs_h[col]);
1349
1350 dev_dbg(priv->ipu->dev, "%s: %ux%u -> %ux%u (rsc = 0x%x)\n",
1351 __func__, s_image->tile[tile].width,
1352 s_image->tile[tile].height, dest_width, dest_height, rsc);
1353
716 /* setup the IC resizer and CSC */ 1354 /* setup the IC resizer and CSC */
717 ret = ipu_ic_task_init(chan->ic, 1355 ret = ipu_ic_task_init_rsc(chan->ic,
718 s_image->tile[0].width, 1356 s_image->tile[tile].width,
719 s_image->tile[0].height, 1357 s_image->tile[tile].height,
720 dest_width, 1358 dest_width,
721 dest_height, 1359 dest_height,
722 src_cs, dest_cs); 1360 src_cs, dest_cs,
1361 rsc);
723 if (ret) { 1362 if (ret) {
724 dev_err(priv->ipu->dev, "ipu_ic_task_init failed, %d\n", ret); 1363 dev_err(priv->ipu->dev, "ipu_ic_task_init failed, %d\n", ret);
725 return ret; 1364 return ret;
@@ -727,27 +1366,27 @@ static int convert_start(struct ipu_image_convert_run *run)
727 1366
728 /* init the source MEM-->IC PP IDMAC channel */ 1367 /* init the source MEM-->IC PP IDMAC channel */
729 init_idmac_channel(ctx, chan->in_chan, s_image, 1368 init_idmac_channel(ctx, chan->in_chan, s_image,
730 IPU_ROTATE_NONE, false); 1369 IPU_ROTATE_NONE, false, tile);
731 1370
732 if (ipu_rot_mode_is_irt(ctx->rot_mode)) { 1371 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
733 /* init the IC PP-->MEM IDMAC channel */ 1372 /* init the IC PP-->MEM IDMAC channel */
734 init_idmac_channel(ctx, chan->out_chan, d_image, 1373 init_idmac_channel(ctx, chan->out_chan, d_image,
735 IPU_ROTATE_NONE, true); 1374 IPU_ROTATE_NONE, true, tile);
736 1375
737 /* init the MEM-->IC PP ROT IDMAC channel */ 1376 /* init the MEM-->IC PP ROT IDMAC channel */
738 init_idmac_channel(ctx, chan->rotation_in_chan, d_image, 1377 init_idmac_channel(ctx, chan->rotation_in_chan, d_image,
739 ctx->rot_mode, true); 1378 ctx->rot_mode, true, tile);
740 1379
741 /* init the destination IC PP ROT-->MEM IDMAC channel */ 1380 /* init the destination IC PP ROT-->MEM IDMAC channel */
742 init_idmac_channel(ctx, chan->rotation_out_chan, d_image, 1381 init_idmac_channel(ctx, chan->rotation_out_chan, d_image,
743 IPU_ROTATE_NONE, false); 1382 IPU_ROTATE_NONE, false, tile);
744 1383
745 /* now link IC PP-->MEM to MEM-->IC PP ROT */ 1384 /* now link IC PP-->MEM to MEM-->IC PP ROT */
746 ipu_idmac_link(chan->out_chan, chan->rotation_in_chan); 1385 ipu_idmac_link(chan->out_chan, chan->rotation_in_chan);
747 } else { 1386 } else {
748 /* init the destination IC PP-->MEM IDMAC channel */ 1387 /* init the destination IC PP-->MEM IDMAC channel */
749 init_idmac_channel(ctx, chan->out_chan, d_image, 1388 init_idmac_channel(ctx, chan->out_chan, d_image,
750 ctx->rot_mode, false); 1389 ctx->rot_mode, false, tile);
751 } 1390 }
752 1391
753 /* enable the IC */ 1392 /* enable the IC */
@@ -805,7 +1444,7 @@ static int do_run(struct ipu_image_convert_run *run)
805 list_del(&run->list); 1444 list_del(&run->list);
806 chan->current_run = run; 1445 chan->current_run = run;
807 1446
808 return convert_start(run); 1447 return convert_start(run, 0);
809} 1448}
810 1449
811/* hold irqlock when calling */ 1450/* hold irqlock when calling */
@@ -896,7 +1535,7 @@ static irqreturn_t do_bh(int irq, void *dev_id)
896 dev_dbg(priv->ipu->dev, 1535 dev_dbg(priv->ipu->dev,
897 "%s: task %u: signaling abort for ctx %p\n", 1536 "%s: task %u: signaling abort for ctx %p\n",
898 __func__, chan->ic_task, ctx); 1537 __func__, chan->ic_task, ctx);
899 complete(&ctx->aborted); 1538 complete_all(&ctx->aborted);
900 } 1539 }
901 } 1540 }
902 1541
@@ -908,6 +1547,24 @@ static irqreturn_t do_bh(int irq, void *dev_id)
908 return IRQ_HANDLED; 1547 return IRQ_HANDLED;
909} 1548}
910 1549
1550static bool ic_settings_changed(struct ipu_image_convert_ctx *ctx)
1551{
1552 unsigned int cur_tile = ctx->next_tile - 1;
1553 unsigned int next_tile = ctx->next_tile;
1554
1555 if (ctx->resize_coeffs_h[cur_tile % ctx->in.num_cols] !=
1556 ctx->resize_coeffs_h[next_tile % ctx->in.num_cols] ||
1557 ctx->resize_coeffs_v[cur_tile / ctx->in.num_cols] !=
1558 ctx->resize_coeffs_v[next_tile / ctx->in.num_cols] ||
1559 ctx->in.tile[cur_tile].width != ctx->in.tile[next_tile].width ||
1560 ctx->in.tile[cur_tile].height != ctx->in.tile[next_tile].height ||
1561 ctx->out.tile[cur_tile].width != ctx->out.tile[next_tile].width ||
1562 ctx->out.tile[cur_tile].height != ctx->out.tile[next_tile].height)
1563 return true;
1564
1565 return false;
1566}
1567
911/* hold irqlock when calling */ 1568/* hold irqlock when calling */
912static irqreturn_t do_irq(struct ipu_image_convert_run *run) 1569static irqreturn_t do_irq(struct ipu_image_convert_run *run)
913{ 1570{
@@ -951,27 +1608,32 @@ static irqreturn_t do_irq(struct ipu_image_convert_run *run)
951 * not done, place the next tile buffers. 1608 * not done, place the next tile buffers.
952 */ 1609 */
953 if (!ctx->double_buffering) { 1610 if (!ctx->double_buffering) {
954 1611 if (ic_settings_changed(ctx)) {
955 src_tile = &s_image->tile[ctx->next_tile]; 1612 convert_stop(run);
956 dst_idx = ctx->out_tile_map[ctx->next_tile]; 1613 convert_start(run, ctx->next_tile);
957 dst_tile = &d_image->tile[dst_idx]; 1614 } else {
958 1615 src_tile = &s_image->tile[ctx->next_tile];
959 ipu_cpmem_set_buffer(chan->in_chan, 0, 1616 dst_idx = ctx->out_tile_map[ctx->next_tile];
960 s_image->base.phys0 + src_tile->offset); 1617 dst_tile = &d_image->tile[dst_idx];
961 ipu_cpmem_set_buffer(outch, 0, 1618
962 d_image->base.phys0 + dst_tile->offset); 1619 ipu_cpmem_set_buffer(chan->in_chan, 0,
963 if (s_image->fmt->planar) 1620 s_image->base.phys0 +
964 ipu_cpmem_set_uv_offset(chan->in_chan, 1621 src_tile->offset);
965 src_tile->u_off, 1622 ipu_cpmem_set_buffer(outch, 0,
966 src_tile->v_off); 1623 d_image->base.phys0 +
967 if (d_image->fmt->planar) 1624 dst_tile->offset);
968 ipu_cpmem_set_uv_offset(outch, 1625 if (s_image->fmt->planar)
969 dst_tile->u_off, 1626 ipu_cpmem_set_uv_offset(chan->in_chan,
970 dst_tile->v_off); 1627 src_tile->u_off,
971 1628 src_tile->v_off);
972 ipu_idmac_select_buffer(chan->in_chan, 0); 1629 if (d_image->fmt->planar)
973 ipu_idmac_select_buffer(outch, 0); 1630 ipu_cpmem_set_uv_offset(outch,
974 1631 dst_tile->u_off,
1632 dst_tile->v_off);
1633
1634 ipu_idmac_select_buffer(chan->in_chan, 0);
1635 ipu_idmac_select_buffer(outch, 0);
1636 }
975 } else if (ctx->next_tile < ctx->num_tiles - 1) { 1637 } else if (ctx->next_tile < ctx->num_tiles - 1) {
976 1638
977 src_tile = &s_image->tile[ctx->next_tile + 1]; 1639 src_tile = &s_image->tile[ctx->next_tile + 1];
@@ -1198,9 +1860,6 @@ static int fill_image(struct ipu_image_convert_ctx *ctx,
1198 else 1860 else
1199 ic_image->stride = ic_image->base.pix.bytesperline; 1861 ic_image->stride = ic_image->base.pix.bytesperline;
1200 1862
1201 calc_tile_dimensions(ctx, ic_image);
1202 calc_tile_offsets(ctx, ic_image);
1203
1204 return 0; 1863 return 0;
1205} 1864}
1206 1865
@@ -1221,40 +1880,11 @@ static unsigned int clamp_align(unsigned int x, unsigned int min,
1221 return x; 1880 return x;
1222} 1881}
1223 1882
1224/*
1225 * We have to adjust the tile width such that the tile physaddrs and
1226 * U and V plane offsets are multiples of 8 bytes as required by
1227 * the IPU DMA Controller. For the planar formats, this corresponds
1228 * to a pixel alignment of 16 (but use a more formal equation since
1229 * the variables are available). For all the packed formats, 8 is
1230 * good enough.
1231 */
1232static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt)
1233{
1234 return fmt->planar ? 8 * fmt->uv_width_dec : 8;
1235}
1236
1237/*
1238 * For tile height alignment, we have to ensure that the output tile
1239 * heights are multiples of 8 lines if the IRT is required by the
1240 * given rotation mode (the IRT performs rotations on 8x8 blocks
1241 * at a time). If the IRT is not used, or for input image tiles,
1242 * 2 lines are good enough.
1243 */
1244static inline u32 tile_height_align(enum ipu_image_convert_type type,
1245 enum ipu_rotate_mode rot_mode)
1246{
1247 return (type == IMAGE_CONVERT_OUT &&
1248 ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2;
1249}
1250
1251/* Adjusts input/output images to IPU restrictions */ 1883/* Adjusts input/output images to IPU restrictions */
1252void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, 1884void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out,
1253 enum ipu_rotate_mode rot_mode) 1885 enum ipu_rotate_mode rot_mode)
1254{ 1886{
1255 const struct ipu_image_pixfmt *infmt, *outfmt; 1887 const struct ipu_image_pixfmt *infmt, *outfmt;
1256 unsigned int num_in_rows, num_in_cols;
1257 unsigned int num_out_rows, num_out_cols;
1258 u32 w_align, h_align; 1888 u32 w_align, h_align;
1259 1889
1260 infmt = get_format(in->pix.pixelformat); 1890 infmt = get_format(in->pix.pixelformat);
@@ -1286,36 +1916,31 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out,
1286 in->pix.height / 4); 1916 in->pix.height / 4);
1287 } 1917 }
1288 1918
1289 /* get tiling rows/cols from output format */
1290 num_out_rows = num_stripes(out->pix.height);
1291 num_out_cols = num_stripes(out->pix.width);
1292 if (ipu_rot_mode_is_irt(rot_mode)) {
1293 num_in_rows = num_out_cols;
1294 num_in_cols = num_out_rows;
1295 } else {
1296 num_in_rows = num_out_rows;
1297 num_in_cols = num_out_cols;
1298 }
1299
1300 /* align input width/height */ 1919 /* align input width/height */
1301 w_align = ilog2(tile_width_align(infmt) * num_in_cols); 1920 w_align = ilog2(tile_width_align(IMAGE_CONVERT_IN, infmt, rot_mode));
1302 h_align = ilog2(tile_height_align(IMAGE_CONVERT_IN, rot_mode) * 1921 h_align = ilog2(tile_height_align(IMAGE_CONVERT_IN, infmt, rot_mode));
1303 num_in_rows);
1304 in->pix.width = clamp_align(in->pix.width, MIN_W, MAX_W, w_align); 1922 in->pix.width = clamp_align(in->pix.width, MIN_W, MAX_W, w_align);
1305 in->pix.height = clamp_align(in->pix.height, MIN_H, MAX_H, h_align); 1923 in->pix.height = clamp_align(in->pix.height, MIN_H, MAX_H, h_align);
1306 1924
1307 /* align output width/height */ 1925 /* align output width/height */
1308 w_align = ilog2(tile_width_align(outfmt) * num_out_cols); 1926 w_align = ilog2(tile_width_align(IMAGE_CONVERT_OUT, outfmt, rot_mode));
1309 h_align = ilog2(tile_height_align(IMAGE_CONVERT_OUT, rot_mode) * 1927 h_align = ilog2(tile_height_align(IMAGE_CONVERT_OUT, outfmt, rot_mode));
1310 num_out_rows);
1311 out->pix.width = clamp_align(out->pix.width, MIN_W, MAX_W, w_align); 1928 out->pix.width = clamp_align(out->pix.width, MIN_W, MAX_W, w_align);
1312 out->pix.height = clamp_align(out->pix.height, MIN_H, MAX_H, h_align); 1929 out->pix.height = clamp_align(out->pix.height, MIN_H, MAX_H, h_align);
1313 1930
1314 /* set input/output strides and image sizes */ 1931 /* set input/output strides and image sizes */
1315 in->pix.bytesperline = (in->pix.width * infmt->bpp) >> 3; 1932 in->pix.bytesperline = infmt->planar ?
1316 in->pix.sizeimage = in->pix.height * in->pix.bytesperline; 1933 clamp_align(in->pix.width, 2 << w_align, MAX_W, w_align) :
1317 out->pix.bytesperline = (out->pix.width * outfmt->bpp) >> 3; 1934 clamp_align((in->pix.width * infmt->bpp) >> 3,
1318 out->pix.sizeimage = out->pix.height * out->pix.bytesperline; 1935 2 << w_align, MAX_W, w_align);
1936 in->pix.sizeimage = infmt->planar ?
1937 (in->pix.height * in->pix.bytesperline * infmt->bpp) >> 3 :
1938 in->pix.height * in->pix.bytesperline;
1939 out->pix.bytesperline = outfmt->planar ? out->pix.width :
1940 (out->pix.width * outfmt->bpp) >> 3;
1941 out->pix.sizeimage = outfmt->planar ?
1942 (out->pix.height * out->pix.bytesperline * outfmt->bpp) >> 3 :
1943 out->pix.height * out->pix.bytesperline;
1319} 1944}
1320EXPORT_SYMBOL_GPL(ipu_image_convert_adjust); 1945EXPORT_SYMBOL_GPL(ipu_image_convert_adjust);
1321 1946
@@ -1360,6 +1985,7 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
1360 struct ipu_image_convert_chan *chan; 1985 struct ipu_image_convert_chan *chan;
1361 struct ipu_image_convert_ctx *ctx; 1986 struct ipu_image_convert_ctx *ctx;
1362 unsigned long flags; 1987 unsigned long flags;
1988 unsigned int i;
1363 bool get_res; 1989 bool get_res;
1364 int ret; 1990 int ret;
1365 1991
@@ -1412,8 +2038,26 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
1412 if (ret) 2038 if (ret)
1413 goto out_free; 2039 goto out_free;
1414 2040
2041 ret = calc_image_resize_coefficients(ctx, in, out);
2042 if (ret)
2043 goto out_free;
2044
1415 calc_out_tile_map(ctx); 2045 calc_out_tile_map(ctx);
1416 2046
2047 find_seams(ctx, s_image, d_image);
2048
2049 calc_tile_dimensions(ctx, s_image);
2050 ret = calc_tile_offsets(ctx, s_image);
2051 if (ret)
2052 goto out_free;
2053
2054 calc_tile_dimensions(ctx, d_image);
2055 ret = calc_tile_offsets(ctx, d_image);
2056 if (ret)
2057 goto out_free;
2058
2059 calc_tile_resize_coefficients(ctx);
2060
1417 dump_format(ctx, s_image); 2061 dump_format(ctx, s_image);
1418 dump_format(ctx, d_image); 2062 dump_format(ctx, d_image);
1419 2063
@@ -1429,21 +2073,51 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
1429 * for every tile, and therefore would have to be updated for 2073 * for every tile, and therefore would have to be updated for
1430 * each buffer which is not possible. So double-buffering is 2074 * each buffer which is not possible. So double-buffering is
1431 * impossible when either the source or destination images are 2075 * impossible when either the source or destination images are
1432 * a planar format (YUV420, YUV422P, etc.). 2076 * a planar format (YUV420, YUV422P, etc.). Further, differently
2077 * sized tiles or different resizing coefficients per tile
2078 * prevent double-buffering as well.
1433 */ 2079 */
1434 ctx->double_buffering = (ctx->num_tiles > 1 && 2080 ctx->double_buffering = (ctx->num_tiles > 1 &&
1435 !s_image->fmt->planar && 2081 !s_image->fmt->planar &&
1436 !d_image->fmt->planar); 2082 !d_image->fmt->planar);
2083 for (i = 1; i < ctx->num_tiles; i++) {
2084 if (ctx->in.tile[i].width != ctx->in.tile[0].width ||
2085 ctx->in.tile[i].height != ctx->in.tile[0].height ||
2086 ctx->out.tile[i].width != ctx->out.tile[0].width ||
2087 ctx->out.tile[i].height != ctx->out.tile[0].height) {
2088 ctx->double_buffering = false;
2089 break;
2090 }
2091 }
2092 for (i = 1; i < ctx->in.num_cols; i++) {
2093 if (ctx->resize_coeffs_h[i] != ctx->resize_coeffs_h[0]) {
2094 ctx->double_buffering = false;
2095 break;
2096 }
2097 }
2098 for (i = 1; i < ctx->in.num_rows; i++) {
2099 if (ctx->resize_coeffs_v[i] != ctx->resize_coeffs_v[0]) {
2100 ctx->double_buffering = false;
2101 break;
2102 }
2103 }
1437 2104
1438 if (ipu_rot_mode_is_irt(ctx->rot_mode)) { 2105 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
2106 unsigned long intermediate_size = d_image->tile[0].size;
2107
2108 for (i = 1; i < ctx->num_tiles; i++) {
2109 if (d_image->tile[i].size > intermediate_size)
2110 intermediate_size = d_image->tile[i].size;
2111 }
2112
1439 ret = alloc_dma_buf(priv, &ctx->rot_intermediate[0], 2113 ret = alloc_dma_buf(priv, &ctx->rot_intermediate[0],
1440 d_image->tile[0].size); 2114 intermediate_size);
1441 if (ret) 2115 if (ret)
1442 goto out_free; 2116 goto out_free;
1443 if (ctx->double_buffering) { 2117 if (ctx->double_buffering) {
1444 ret = alloc_dma_buf(priv, 2118 ret = alloc_dma_buf(priv,
1445 &ctx->rot_intermediate[1], 2119 &ctx->rot_intermediate[1],
1446 d_image->tile[0].size); 2120 intermediate_size);
1447 if (ret) 2121 if (ret)
1448 goto out_free_dmabuf0; 2122 goto out_free_dmabuf0;
1449 } 2123 }
@@ -1524,16 +2198,13 @@ unlock:
1524EXPORT_SYMBOL_GPL(ipu_image_convert_queue); 2198EXPORT_SYMBOL_GPL(ipu_image_convert_queue);
1525 2199
1526/* Abort any active or pending conversions for this context */ 2200/* Abort any active or pending conversions for this context */
1527void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) 2201static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx)
1528{ 2202{
1529 struct ipu_image_convert_chan *chan = ctx->chan; 2203 struct ipu_image_convert_chan *chan = ctx->chan;
1530 struct ipu_image_convert_priv *priv = chan->priv; 2204 struct ipu_image_convert_priv *priv = chan->priv;
1531 struct ipu_image_convert_run *run, *active_run, *tmp; 2205 struct ipu_image_convert_run *run, *active_run, *tmp;
1532 unsigned long flags; 2206 unsigned long flags;
1533 int run_count, ret; 2207 int run_count, ret;
1534 bool need_abort;
1535
1536 reinit_completion(&ctx->aborted);
1537 2208
1538 spin_lock_irqsave(&chan->irqlock, flags); 2209 spin_lock_irqsave(&chan->irqlock, flags);
1539 2210
@@ -1549,22 +2220,28 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx)
1549 active_run = (chan->current_run && chan->current_run->ctx == ctx) ? 2220 active_run = (chan->current_run && chan->current_run->ctx == ctx) ?
1550 chan->current_run : NULL; 2221 chan->current_run : NULL;
1551 2222
1552 need_abort = (run_count || active_run); 2223 if (active_run)
2224 reinit_completion(&ctx->aborted);
1553 2225
1554 ctx->aborting = need_abort; 2226 ctx->aborting = true;
1555 2227
1556 spin_unlock_irqrestore(&chan->irqlock, flags); 2228 spin_unlock_irqrestore(&chan->irqlock, flags);
1557 2229
1558 if (!need_abort) { 2230 if (!run_count && !active_run) {
1559 dev_dbg(priv->ipu->dev, 2231 dev_dbg(priv->ipu->dev,
1560 "%s: task %u: no abort needed for ctx %p\n", 2232 "%s: task %u: no abort needed for ctx %p\n",
1561 __func__, chan->ic_task, ctx); 2233 __func__, chan->ic_task, ctx);
1562 return; 2234 return;
1563 } 2235 }
1564 2236
2237 if (!active_run) {
2238 empty_done_q(chan);
2239 return;
2240 }
2241
1565 dev_dbg(priv->ipu->dev, 2242 dev_dbg(priv->ipu->dev,
1566 "%s: task %u: wait for completion: %d runs, active run %p\n", 2243 "%s: task %u: wait for completion: %d runs\n",
1567 __func__, chan->ic_task, run_count, active_run); 2244 __func__, chan->ic_task, run_count);
1568 2245
1569 ret = wait_for_completion_timeout(&ctx->aborted, 2246 ret = wait_for_completion_timeout(&ctx->aborted,
1570 msecs_to_jiffies(10000)); 2247 msecs_to_jiffies(10000));
@@ -1572,7 +2249,11 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx)
1572 dev_warn(priv->ipu->dev, "%s: timeout\n", __func__); 2249 dev_warn(priv->ipu->dev, "%s: timeout\n", __func__);
1573 force_abort(ctx); 2250 force_abort(ctx);
1574 } 2251 }
2252}
1575 2253
2254void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx)
2255{
2256 __ipu_image_convert_abort(ctx);
1576 ctx->aborting = false; 2257 ctx->aborting = false;
1577} 2258}
1578EXPORT_SYMBOL_GPL(ipu_image_convert_abort); 2259EXPORT_SYMBOL_GPL(ipu_image_convert_abort);
@@ -1586,7 +2267,7 @@ void ipu_image_convert_unprepare(struct ipu_image_convert_ctx *ctx)
1586 bool put_res; 2267 bool put_res;
1587 2268
1588 /* make sure no runs are hanging around */ 2269 /* make sure no runs are hanging around */
1589 ipu_image_convert_abort(ctx); 2270 __ipu_image_convert_abort(ctx);
1590 2271
1591 dev_dbg(priv->ipu->dev, "%s: task %u: removing ctx %p\n", __func__, 2272 dev_dbg(priv->ipu->dev, "%s: task %u: removing ctx %p\n", __func__,
1592 chan->ic_task, ctx); 2273 chan->ic_task, ctx);
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index abbad94e14a1..e582e8e7527a 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -246,6 +246,9 @@ struct ipu_image {
246 struct v4l2_rect rect; 246 struct v4l2_rect rect;
247 dma_addr_t phys0; 247 dma_addr_t phys0;
248 dma_addr_t phys1; 248 dma_addr_t phys1;
249 /* chroma plane offset overrides */
250 u32 u_offset;
251 u32 v_offset;
249}; 252};
250 253
251void ipu_cpmem_zero(struct ipuv3_channel *ch); 254void ipu_cpmem_zero(struct ipuv3_channel *ch);
@@ -387,6 +390,12 @@ int ipu_ic_task_init(struct ipu_ic *ic,
387 int out_width, int out_height, 390 int out_width, int out_height,
388 enum ipu_color_space in_cs, 391 enum ipu_color_space in_cs,
389 enum ipu_color_space out_cs); 392 enum ipu_color_space out_cs);
393int ipu_ic_task_init_rsc(struct ipu_ic *ic,
394 int in_width, int in_height,
395 int out_width, int out_height,
396 enum ipu_color_space in_cs,
397 enum ipu_color_space out_cs,
398 u32 rsc);
390int ipu_ic_task_graphics_init(struct ipu_ic *ic, 399int ipu_ic_task_graphics_init(struct ipu_ic *ic,
391 enum ipu_color_space in_g_cs, 400 enum ipu_color_space in_g_cs,
392 bool galpha_en, u32 galpha, 401 bool galpha_en, u32 galpha,