diff options
author | Philipp Zabel <p.zabel@pengutronix.de> | 2018-09-18 05:34:17 -0400 |
---|---|---|
committer | Philipp Zabel <p.zabel@pengutronix.de> | 2018-11-05 08:40:08 -0500 |
commit | ff652fcf84f7a0977bbad4eab976f7813665fbc8 (patch) | |
tree | 44f063e3bec021c785a50b4064ec4bce9c291dd7 | |
parent | a3f42419e2a72b174a7d993debea85df7a56bd4b (diff) |
gpu: ipu-v3: image-convert: relax alignment restrictions
For the planar but U/V-packed formats NV12 and NV16, 8 pixel width
alignment is good enough to fulfill the 8 byte stride requirement.
If we allow the input 8-pixel DMA bursts to overshoot the end of the
line, the only input alignment restrictions are dictated by the pixel
format and 8-byte aligned line start address.
Since different tile sizes are allowed, the output tile with / height
alignment doesn't need to be multiplied by number of columns / rows.
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
[slongerbeam@gmail.com: Bring in the fixes to format width and
height alignment restrictions from imx-media-mem2mem.c.]
Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
-rw-r--r-- | drivers/gpu/ipu-v3/ipu-image-convert.c | 81 |
1 files changed, 41 insertions, 40 deletions
diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 0451d699f515..0829723a7599 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c | |||
@@ -551,31 +551,46 @@ static inline u32 tile_top_align(const struct ipu_image_pixfmt *fmt) | |||
551 | return fmt->uv_height_dec > 1 ? 2 : 1; | 551 | return fmt->uv_height_dec > 1 ? 2 : 1; |
552 | } | 552 | } |
553 | 553 | ||
554 | /* | 554 | static inline u32 tile_width_align(enum ipu_image_convert_type type, |
555 | * We have to adjust the tile width such that the tile physaddrs and | 555 | const struct ipu_image_pixfmt *fmt, |
556 | * U and V plane offsets are multiples of 8 bytes as required by | 556 | enum ipu_rotate_mode rot_mode) |
557 | * the IPU DMA Controller. For the planar formats, this corresponds | ||
558 | * to a pixel alignment of 16 (but use a more formal equation since | ||
559 | * the variables are available). For all the packed formats, 8 is | ||
560 | * good enough. | ||
561 | */ | ||
562 | static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt) | ||
563 | { | 557 | { |
564 | return fmt->planar ? 8 * fmt->uv_width_dec : 8; | 558 | if (type == IMAGE_CONVERT_IN) { |
559 | /* | ||
560 | * The IC burst reads 8 pixels at a time. Reading beyond the | ||
561 | * end of the line is usually acceptable. Those pixels are | ||
562 | * ignored, unless the IC has to write the scaled line in | ||
563 | * reverse. | ||
564 | */ | ||
565 | return (!ipu_rot_mode_is_irt(rot_mode) && | ||
566 | (rot_mode & IPU_ROT_BIT_HFLIP)) ? 8 : 2; | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled | ||
571 | * formats to guarantee 8-byte aligned line start addresses in the | ||
572 | * chroma planes when IRT is used. Align to 8x8 pixel IRT block size | ||
573 | * for all other formats. | ||
574 | */ | ||
575 | return (ipu_rot_mode_is_irt(rot_mode) && | ||
576 | fmt->planar && !fmt->uv_packed) ? | ||
577 | 8 * fmt->uv_width_dec : 8; | ||
565 | } | 578 | } |
566 | 579 | ||
567 | /* | ||
568 | * For tile height alignment, we have to ensure that the output tile | ||
569 | * heights are multiples of 8 lines if the IRT is required by the | ||
570 | * given rotation mode (the IRT performs rotations on 8x8 blocks | ||
571 | * at a time). If the IRT is not used, or for input image tiles, | ||
572 | * 2 lines are good enough. | ||
573 | */ | ||
574 | static inline u32 tile_height_align(enum ipu_image_convert_type type, | 580 | static inline u32 tile_height_align(enum ipu_image_convert_type type, |
581 | const struct ipu_image_pixfmt *fmt, | ||
575 | enum ipu_rotate_mode rot_mode) | 582 | enum ipu_rotate_mode rot_mode) |
576 | { | 583 | { |
577 | return (type == IMAGE_CONVERT_OUT && | 584 | if (type == IMAGE_CONVERT_IN || !ipu_rot_mode_is_irt(rot_mode)) |
578 | ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2; | 585 | return 2; |
586 | |||
587 | /* | ||
588 | * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled | ||
589 | * formats to guarantee 8-byte aligned line start addresses in the | ||
590 | * chroma planes when IRT is used. Align to 8x8 pixel IRT block size | ||
591 | * for all other formats. | ||
592 | */ | ||
593 | return (fmt->planar && !fmt->uv_packed) ? 8 * fmt->uv_width_dec : 8; | ||
579 | } | 594 | } |
580 | 595 | ||
581 | /* | 596 | /* |
@@ -661,8 +676,9 @@ static void find_seams(struct ipu_image_convert_ctx *ctx, | |||
661 | unsigned int in_top_align = tile_top_align(in->fmt); | 676 | unsigned int in_top_align = tile_top_align(in->fmt); |
662 | unsigned int out_left_align = tile_left_align(out->fmt); | 677 | unsigned int out_left_align = tile_left_align(out->fmt); |
663 | unsigned int out_top_align = tile_top_align(out->fmt); | 678 | unsigned int out_top_align = tile_top_align(out->fmt); |
664 | unsigned int out_width_align = tile_width_align(out->fmt); | 679 | unsigned int out_width_align = tile_width_align(out->type, out->fmt, |
665 | unsigned int out_height_align = tile_height_align(out->type, | 680 | ctx->rot_mode); |
681 | unsigned int out_height_align = tile_height_align(out->type, out->fmt, | ||
666 | ctx->rot_mode); | 682 | ctx->rot_mode); |
667 | unsigned int in_right = in->base.rect.width; | 683 | unsigned int in_right = in->base.rect.width; |
668 | unsigned int in_bottom = in->base.rect.height; | 684 | unsigned int in_bottom = in->base.rect.height; |
@@ -1855,8 +1871,6 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, | |||
1855 | enum ipu_rotate_mode rot_mode) | 1871 | enum ipu_rotate_mode rot_mode) |
1856 | { | 1872 | { |
1857 | const struct ipu_image_pixfmt *infmt, *outfmt; | 1873 | const struct ipu_image_pixfmt *infmt, *outfmt; |
1858 | unsigned int num_in_rows, num_in_cols; | ||
1859 | unsigned int num_out_rows, num_out_cols; | ||
1860 | u32 w_align, h_align; | 1874 | u32 w_align, h_align; |
1861 | 1875 | ||
1862 | infmt = get_format(in->pix.pixelformat); | 1876 | infmt = get_format(in->pix.pixelformat); |
@@ -1888,28 +1902,15 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, | |||
1888 | in->pix.height / 4); | 1902 | in->pix.height / 4); |
1889 | } | 1903 | } |
1890 | 1904 | ||
1891 | /* get tiling rows/cols from output format */ | ||
1892 | num_out_rows = num_stripes(out->pix.height); | ||
1893 | num_out_cols = num_stripes(out->pix.width); | ||
1894 | if (ipu_rot_mode_is_irt(rot_mode)) { | ||
1895 | num_in_rows = num_out_cols; | ||
1896 | num_in_cols = num_out_rows; | ||
1897 | } else { | ||
1898 | num_in_rows = num_out_rows; | ||
1899 | num_in_cols = num_out_cols; | ||
1900 | } | ||
1901 | |||
1902 | /* align input width/height */ | 1905 | /* align input width/height */ |
1903 | w_align = ilog2(tile_width_align(infmt) * num_in_cols); | 1906 | w_align = ilog2(tile_width_align(IMAGE_CONVERT_IN, infmt, rot_mode)); |
1904 | h_align = ilog2(tile_height_align(IMAGE_CONVERT_IN, rot_mode) * | 1907 | h_align = ilog2(tile_height_align(IMAGE_CONVERT_IN, infmt, rot_mode)); |
1905 | num_in_rows); | ||
1906 | in->pix.width = clamp_align(in->pix.width, MIN_W, MAX_W, w_align); | 1908 | in->pix.width = clamp_align(in->pix.width, MIN_W, MAX_W, w_align); |
1907 | in->pix.height = clamp_align(in->pix.height, MIN_H, MAX_H, h_align); | 1909 | in->pix.height = clamp_align(in->pix.height, MIN_H, MAX_H, h_align); |
1908 | 1910 | ||
1909 | /* align output width/height */ | 1911 | /* align output width/height */ |
1910 | w_align = ilog2(tile_width_align(outfmt) * num_out_cols); | 1912 | w_align = ilog2(tile_width_align(IMAGE_CONVERT_OUT, outfmt, rot_mode)); |
1911 | h_align = ilog2(tile_height_align(IMAGE_CONVERT_OUT, rot_mode) * | 1913 | h_align = ilog2(tile_height_align(IMAGE_CONVERT_OUT, outfmt, rot_mode)); |
1912 | num_out_rows); | ||
1913 | out->pix.width = clamp_align(out->pix.width, MIN_W, MAX_W, w_align); | 1914 | out->pix.width = clamp_align(out->pix.width, MIN_W, MAX_W, w_align); |
1914 | out->pix.height = clamp_align(out->pix.height, MIN_H, MAX_H, h_align); | 1915 | out->pix.height = clamp_align(out->pix.height, MIN_H, MAX_H, h_align); |
1915 | 1916 | ||