aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Zabel <p.zabel@pengutronix.de>2018-09-18 05:34:15 -0400
committerPhilipp Zabel <p.zabel@pengutronix.de>2018-11-05 08:40:07 -0500
commit64fbae5e3e2e1b60ae420810216220f59fcde78d (patch)
tree333492b0aecc2ce4243da7507bf21b0e648ec98c
parent76e77bf543c57be14c3daac40f6fb7578ac90992 (diff)
gpu: ipu-v3: image-convert: select optimal seam positions
Select seam positions that minimize distortions during seam hiding while satifying input and output IDMAC, rotator, and image format constraints. This code looks for aligned output seam positions that minimize the difference between the fractional corresponding ideal input positions and the input positions rounded to alignment requirements. Since now tiles can be sized differently, alignment restrictions of the complete image can be relaxed in the next step. Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> Acked-by: Steve Longerbeam <slongerbeam@gmail.com> Tested-by: Steve Longerbeam <slongerbeam@gmail.com>
-rw-r--r--drivers/gpu/ipu-v3/ipu-image-convert.c343
1 files changed, 337 insertions, 6 deletions
diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index a407ca3b367b..a674241dd0b8 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -432,6 +432,126 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
432 return 0; 432 return 0;
433} 433}
434 434
435#define round_closest(x, y) round_down((x) + (y)/2, (y))
436
437/*
438 * Find the best aligned seam position in the inverval [out_start, out_end].
439 * Rotation and image offsets are out of scope.
440 *
441 * @out_start: start of inverval, must be within 1024 pixels / lines
442 * of out_end
443 * @out_end: end of interval, smaller than or equal to out_edge
444 * @in_edge: input right / bottom edge
445 * @out_edge: output right / bottom edge
446 * @in_align: input alignment, either horizontal 8-byte line start address
447 * alignment, or pixel alignment due to image format
448 * @out_align: output alignment, either horizontal 8-byte line start address
449 * alignment, or pixel alignment due to image format or rotator
450 * block size
451 * @in_burst: horizontal input burst size in case of horizontal flip
452 * @out_burst: horizontal output burst size or rotator block size
453 * @downsize_coeff: downsizing section coefficient
454 * @resize_coeff: main processing section resizing coefficient
455 * @_in_seam: aligned input seam position return value
456 * @_out_seam: aligned output seam position return value
457 */
458static void find_best_seam(struct ipu_image_convert_ctx *ctx,
459 unsigned int out_start,
460 unsigned int out_end,
461 unsigned int in_edge,
462 unsigned int out_edge,
463 unsigned int in_align,
464 unsigned int out_align,
465 unsigned int in_burst,
466 unsigned int out_burst,
467 unsigned int downsize_coeff,
468 unsigned int resize_coeff,
469 u32 *_in_seam,
470 u32 *_out_seam)
471{
472 struct device *dev = ctx->chan->priv->ipu->dev;
473 unsigned int out_pos;
474 /* Input / output seam position candidates */
475 unsigned int out_seam = 0;
476 unsigned int in_seam = 0;
477 unsigned int min_diff = UINT_MAX;
478
479 /*
480 * Output tiles must start at a multiple of 8 bytes horizontally and
481 * possibly at an even line horizontally depending on the pixel format.
482 * Only consider output aligned positions for the seam.
483 */
484 out_start = round_up(out_start, out_align);
485 for (out_pos = out_start; out_pos < out_end; out_pos += out_align) {
486 unsigned int in_pos;
487 unsigned int in_pos_aligned;
488 unsigned int abs_diff;
489
490 /*
491 * Tiles in the right row / bottom column may not be allowed to
492 * overshoot horizontally / vertically. out_burst may be the
493 * actual DMA burst size, or the rotator block size.
494 */
495 if ((out_burst > 1) && (out_edge - out_pos) % out_burst)
496 continue;
497
498 /*
499 * Input sample position, corresponding to out_pos, 19.13 fixed
500 * point.
501 */
502 in_pos = (out_pos * resize_coeff) << downsize_coeff;
503 /*
504 * The closest input sample position that we could actually
505 * start the input tile at, 19.13 fixed point.
506 */
507 in_pos_aligned = round_closest(in_pos, 8192U * in_align);
508
509 if ((in_burst > 1) &&
510 (in_edge - in_pos_aligned / 8192U) % in_burst)
511 continue;
512
513 if (in_pos < in_pos_aligned)
514 abs_diff = in_pos_aligned - in_pos;
515 else
516 abs_diff = in_pos - in_pos_aligned;
517
518 if (abs_diff < min_diff) {
519 in_seam = in_pos_aligned;
520 out_seam = out_pos;
521 min_diff = abs_diff;
522 }
523 }
524
525 *_out_seam = out_seam;
526 /* Convert 19.13 fixed point to integer seam position */
527 *_in_seam = DIV_ROUND_CLOSEST(in_seam, 8192U);
528
529 dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) diff %u.%03u\n",
530 __func__, out_seam, out_align, out_start, out_end,
531 *_in_seam, in_align, min_diff / 8192,
532 DIV_ROUND_CLOSEST(min_diff % 8192 * 1000, 8192));
533}
534
535/*
536 * Tile left edges are required to be aligned to multiples of 8 bytes
537 * by the IDMAC.
538 */
539static inline u32 tile_left_align(const struct ipu_image_pixfmt *fmt)
540{
541 if (fmt->planar)
542 return fmt->uv_packed ? 8 : 8 * fmt->uv_width_dec;
543 else
544 return fmt->bpp == 32 ? 2 : fmt->bpp == 16 ? 4 : 8;
545}
546
547/*
548 * Tile top edge alignment is only limited by chroma subsampling.
549 */
550static inline u32 tile_top_align(const struct ipu_image_pixfmt *fmt)
551{
552 return fmt->uv_height_dec > 1 ? 2 : 1;
553}
554
435/* 555/*
436 * We have to adjust the tile width such that the tile physaddrs and 556 * We have to adjust the tile width such that the tile physaddrs and
437 * U and V plane offsets are multiples of 8 bytes as required by 557 * U and V plane offsets are multiples of 8 bytes as required by
@@ -459,20 +579,228 @@ static inline u32 tile_height_align(enum ipu_image_convert_type type,
459 ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2; 579 ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2;
460} 580}
461 581
582/*
583 * Fill in left position and width and for all tiles in an input column, and
584 * for all corresponding output tiles. If the 90° rotator is used, the output
585 * tiles are in a row, and output tile top position and height are set.
586 */
587static void fill_tile_column(struct ipu_image_convert_ctx *ctx,
588 unsigned int col,
589 struct ipu_image_convert_image *in,
590 unsigned int in_left, unsigned int in_width,
591 struct ipu_image_convert_image *out,
592 unsigned int out_left, unsigned int out_width)
593{
594 unsigned int row, tile_idx;
595 struct ipu_image_tile *in_tile, *out_tile;
596
597 for (row = 0; row < in->num_rows; row++) {
598 tile_idx = in->num_cols * row + col;
599 in_tile = &in->tile[tile_idx];
600 out_tile = &out->tile[ctx->out_tile_map[tile_idx]];
601
602 in_tile->left = in_left;
603 in_tile->width = in_width;
604
605 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
606 out_tile->top = out_left;
607 out_tile->height = out_width;
608 } else {
609 out_tile->left = out_left;
610 out_tile->width = out_width;
611 }
612 }
613}
614
615/*
616 * Fill in top position and height and for all tiles in an input row, and
617 * for all corresponding output tiles. If the 90° rotator is used, the output
618 * tiles are in a column, and output tile left position and width are set.
619 */
620static void fill_tile_row(struct ipu_image_convert_ctx *ctx, unsigned int row,
621 struct ipu_image_convert_image *in,
622 unsigned int in_top, unsigned int in_height,
623 struct ipu_image_convert_image *out,
624 unsigned int out_top, unsigned int out_height)
625{
626 unsigned int col, tile_idx;
627 struct ipu_image_tile *in_tile, *out_tile;
628
629 for (col = 0; col < in->num_cols; col++) {
630 tile_idx = in->num_cols * row + col;
631 in_tile = &in->tile[tile_idx];
632 out_tile = &out->tile[ctx->out_tile_map[tile_idx]];
633
634 in_tile->top = in_top;
635 in_tile->height = in_height;
636
637 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
638 out_tile->left = out_top;
639 out_tile->width = out_height;
640 } else {
641 out_tile->top = out_top;
642 out_tile->height = out_height;
643 }
644 }
645}
646
647/*
648 * Find the best horizontal and vertical seam positions to split into tiles.
649 * Minimize the fractional part of the input sampling position for the
650 * top / left pixels of each tile.
651 */
652static void find_seams(struct ipu_image_convert_ctx *ctx,
653 struct ipu_image_convert_image *in,
654 struct ipu_image_convert_image *out)
655{
656 struct device *dev = ctx->chan->priv->ipu->dev;
657 unsigned int resized_width = out->base.rect.width;
658 unsigned int resized_height = out->base.rect.height;
659 unsigned int col;
660 unsigned int row;
661 unsigned int in_left_align = tile_left_align(in->fmt);
662 unsigned int in_top_align = tile_top_align(in->fmt);
663 unsigned int out_left_align = tile_left_align(out->fmt);
664 unsigned int out_top_align = tile_top_align(out->fmt);
665 unsigned int out_width_align = tile_width_align(out->fmt);
666 unsigned int out_height_align = tile_height_align(out->type,
667 ctx->rot_mode);
668 unsigned int in_right = in->base.rect.width;
669 unsigned int in_bottom = in->base.rect.height;
670 unsigned int out_right = out->base.rect.width;
671 unsigned int out_bottom = out->base.rect.height;
672 unsigned int flipped_out_left;
673 unsigned int flipped_out_top;
674
675 if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
676 /* Switch width/height and align top left to IRT block size */
677 resized_width = out->base.rect.height;
678 resized_height = out->base.rect.width;
679 out_left_align = out_height_align;
680 out_top_align = out_width_align;
681 out_width_align = out_left_align;
682 out_height_align = out_top_align;
683 out_right = out->base.rect.height;
684 out_bottom = out->base.rect.width;
685 }
686
687 for (col = in->num_cols - 1; col > 0; col--) {
688 bool allow_in_overshoot = ipu_rot_mode_is_irt(ctx->rot_mode) ||
689 !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
690 bool allow_out_overshoot = (col < in->num_cols - 1) &&
691 !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
692 unsigned int out_start;
693 unsigned int out_end;
694 unsigned int in_left;
695 unsigned int out_left;
696
697 /*
698 * Align input width to burst length if the scaling step flips
699 * horizontally.
700 */
701
702 /* Start within 1024 pixels of the right edge */
703 out_start = max_t(int, 0, out_right - 1024);
704 /* End before having to add more columns to the left */
705 out_end = min_t(unsigned int, out_right, col * 1024);
706
707 find_best_seam(ctx, out_start, out_end,
708 in_right, out_right,
709 in_left_align, out_left_align,
710 allow_in_overshoot ? 1 : 8 /* burst length */,
711 allow_out_overshoot ? 1 : out_width_align,
712 ctx->downsize_coeff_h, ctx->image_resize_coeff_h,
713 &in_left, &out_left);
714
715 if (ctx->rot_mode & IPU_ROT_BIT_HFLIP)
716 flipped_out_left = resized_width - out_right;
717 else
718 flipped_out_left = out_left;
719
720 fill_tile_column(ctx, col, in, in_left, in_right - in_left,
721 out, flipped_out_left, out_right - out_left);
722
723 dev_dbg(dev, "%s: col %u: %u, %u -> %u, %u\n", __func__, col,
724 in_left, in_right - in_left,
725 flipped_out_left, out_right - out_left);
726
727 in_right = in_left;
728 out_right = out_left;
729 }
730
731 flipped_out_left = (ctx->rot_mode & IPU_ROT_BIT_HFLIP) ?
732 resized_width - out_right : 0;
733
734 fill_tile_column(ctx, 0, in, 0, in_right,
735 out, flipped_out_left, out_right);
736
737 dev_dbg(dev, "%s: col 0: 0, %u -> %u, %u\n", __func__,
738 in_right, flipped_out_left, out_right);
739
740 for (row = in->num_rows - 1; row > 0; row--) {
741 bool allow_overshoot = row < in->num_rows - 1;
742 unsigned int out_start;
743 unsigned int out_end;
744 unsigned int in_top;
745 unsigned int out_top;
746
747 /* Start within 1024 lines of the bottom edge */
748 out_start = max_t(int, 0, out_bottom - 1024);
749 /* End before having to add more rows above */
750 out_end = min_t(unsigned int, out_bottom, row * 1024);
751
752 find_best_seam(ctx, out_start, out_end,
753 in_bottom, out_bottom,
754 in_top_align, out_top_align,
755 1, allow_overshoot ? 1 : out_height_align,
756 ctx->downsize_coeff_v, ctx->image_resize_coeff_v,
757 &in_top, &out_top);
758
759 if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^
760 ipu_rot_mode_is_irt(ctx->rot_mode))
761 flipped_out_top = resized_height - out_bottom;
762 else
763 flipped_out_top = out_top;
764
765 fill_tile_row(ctx, row, in, in_top, in_bottom - in_top,
766 out, flipped_out_top, out_bottom - out_top);
767
768 dev_dbg(dev, "%s: row %u: %u, %u -> %u, %u\n", __func__, row,
769 in_top, in_bottom - in_top,
770 flipped_out_top, out_bottom - out_top);
771
772 in_bottom = in_top;
773 out_bottom = out_top;
774 }
775
776 if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^
777 ipu_rot_mode_is_irt(ctx->rot_mode))
778 flipped_out_top = resized_height - out_bottom;
779 else
780 flipped_out_top = 0;
781
782 fill_tile_row(ctx, 0, in, 0, in_bottom,
783 out, flipped_out_top, out_bottom);
784
785 dev_dbg(dev, "%s: row 0: 0, %u -> %u, %u\n", __func__,
786 in_bottom, flipped_out_top, out_bottom);
787}
788
462static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, 789static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
463 struct ipu_image_convert_image *image) 790 struct ipu_image_convert_image *image)
464{ 791{
465 unsigned int i; 792 unsigned int i;
466 793
467 for (i = 0; i < ctx->num_tiles; i++) { 794 for (i = 0; i < ctx->num_tiles; i++) {
468 struct ipu_image_tile *tile = &image->tile[i]; 795 struct ipu_image_tile *tile;
469 const unsigned int row = i / image->num_cols; 796 const unsigned int row = i / image->num_cols;
470 const unsigned int col = i % image->num_cols; 797 const unsigned int col = i % image->num_cols;
471 798
472 tile->height = image->base.pix.height / image->num_rows; 799 if (image->type == IMAGE_CONVERT_OUT)
473 tile->width = image->base.pix.width / image->num_cols; 800 tile = &image->tile[ctx->out_tile_map[i]];
474 tile->left = col * tile->width; 801 else
475 tile->top = row * tile->height; 802 tile = &image->tile[i];
803
476 tile->size = ((tile->height * image->fmt->bpp) >> 3) * 804 tile->size = ((tile->height * image->fmt->bpp) >> 3) *
477 tile->width; 805 tile->width;
478 806
@@ -1682,6 +2010,10 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
1682 if (ret) 2010 if (ret)
1683 goto out_free; 2011 goto out_free;
1684 2012
2013 calc_out_tile_map(ctx);
2014
2015 find_seams(ctx, s_image, d_image);
2016
1685 calc_tile_dimensions(ctx, s_image); 2017 calc_tile_dimensions(ctx, s_image);
1686 ret = calc_tile_offsets(ctx, s_image); 2018 ret = calc_tile_offsets(ctx, s_image);
1687 if (ret) 2019 if (ret)
@@ -1692,7 +2024,6 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
1692 if (ret) 2024 if (ret)
1693 goto out_free; 2025 goto out_free;
1694 2026
1695 calc_out_tile_map(ctx);
1696 calc_tile_resize_coefficients(ctx); 2027 calc_tile_resize_coefficients(ctx);
1697 2028
1698 dump_format(ctx, s_image); 2029 dump_format(ctx, s_image);