aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2017-06-07 20:13:35 -0400
committerEric Anholt <eric@anholt.net>2017-06-15 19:02:45 -0400
commit98830d91da082b0285d35bdf5b5ae98decac7df6 (patch)
tree6d508301468c194fc03bc3f91225fc02670e9d61
parentac7c748317f515f426f48e72964c71142506f7a6 (diff)
drm/vc4: Add T-format scanout support.
The T tiling format is what V3D uses for textures, with no raster support at all until later revisions of the hardware (and always at a large 3D performance penalty). If we can't scan out V3D's format, then we often need to do a relayout at some stage of the pipeline, either right before texturing from the scanout buffer (common in X11 without a compositor) or between a tiled screen buffer right before scanout (an option I've considered in trying to resolve this inconsistency, but which means needing to use the dirty fb ioctl and having some update policy). T-format scanout lets us avoid either of those shadow copies, for a massive, obvious performance improvement to X11 window dragging without a compositor. Unfortunately, enabling a compositor to work around the discrepancy has turned out to be too costly in memory consumption for the Raspbian distribution. Because the HVS operates a scanline at a time, compositing from T does increase the memory bandwidth cost of scanout. On my 1920x1080@32bpp display on a RPi3, we go from about 15% of system memory bandwidth with linear to about 20% with tiled. However, for X11 this still ends up being a huge performance win in active usage. This patch doesn't yet handle src_x/src_y offsetting within the tiled buffer. However, we fail to do so for untiled buffers already. Signed-off-by: Eric Anholt <eric@anholt.net> Link: http://patchwork.freedesktop.org/patch/msgid/20170608001336.12842-1-eric@anholt.net Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c31
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h19
-rw-r--r--include/uapi/drm/drm_fourcc.h23
3 files changed, 68 insertions, 5 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index da18dec21696..fa6809d8b0fe 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -500,8 +500,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
500 u32 ctl0_offset = vc4_state->dlist_count; 500 u32 ctl0_offset = vc4_state->dlist_count;
501 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 501 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
502 int num_planes = drm_format_num_planes(format->drm); 502 int num_planes = drm_format_num_planes(format->drm);
503 u32 scl0, scl1; 503 u32 scl0, scl1, pitch0;
504 u32 lbm_size; 504 u32 lbm_size, tiling;
505 unsigned long irqflags; 505 unsigned long irqflags;
506 int ret, i; 506 int ret, i;
507 507
@@ -542,11 +542,31 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
542 scl1 = vc4_get_scl_field(state, 0); 542 scl1 = vc4_get_scl_field(state, 0);
543 } 543 }
544 544
545 switch (fb->modifier) {
546 case DRM_FORMAT_MOD_LINEAR:
547 tiling = SCALER_CTL0_TILING_LINEAR;
548 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
549 break;
550 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
551 tiling = SCALER_CTL0_TILING_256B_OR_T;
552
553 pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET),
554 VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L),
555 VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5,
556 SCALER_PITCH0_TILE_WIDTH_R));
557 break;
558 default:
559 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
560 (long long)fb->modifier);
561 return -EINVAL;
562 }
563
545 /* Control word */ 564 /* Control word */
546 vc4_dlist_write(vc4_state, 565 vc4_dlist_write(vc4_state,
547 SCALER_CTL0_VALID | 566 SCALER_CTL0_VALID |
548 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 567 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
549 (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 568 (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
569 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
550 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 570 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
551 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 571 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
552 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 572 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
@@ -600,8 +620,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
600 for (i = 0; i < num_planes; i++) 620 for (i = 0; i < num_planes; i++)
601 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 621 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
602 622
603 /* Pitch word 0/1/2 */ 623 /* Pitch word 0 */
604 for (i = 0; i < num_planes; i++) { 624 vc4_dlist_write(vc4_state, pitch0);
625
626 /* Pitch word 1/2 */
627 for (i = 1; i < num_planes; i++) {
605 vc4_dlist_write(vc4_state, 628 vc4_dlist_write(vc4_state,
606 VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH)); 629 VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH));
607 } 630 }
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 932093936178..d382c34c1b9e 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -709,6 +709,13 @@ enum hvs_pixel_format {
709#define SCALER_CTL0_SIZE_MASK VC4_MASK(29, 24) 709#define SCALER_CTL0_SIZE_MASK VC4_MASK(29, 24)
710#define SCALER_CTL0_SIZE_SHIFT 24 710#define SCALER_CTL0_SIZE_SHIFT 24
711 711
712#define SCALER_CTL0_TILING_MASK VC4_MASK(21, 20)
713#define SCALER_CTL0_TILING_SHIFT 20
714#define SCALER_CTL0_TILING_LINEAR 0
715#define SCALER_CTL0_TILING_64B 1
716#define SCALER_CTL0_TILING_128B 2
717#define SCALER_CTL0_TILING_256B_OR_T 3
718
712#define SCALER_CTL0_HFLIP BIT(16) 719#define SCALER_CTL0_HFLIP BIT(16)
713#define SCALER_CTL0_VFLIP BIT(15) 720#define SCALER_CTL0_VFLIP BIT(15)
714 721
@@ -838,7 +845,19 @@ enum hvs_pixel_format {
838#define SCALER_PPF_KERNEL_OFFSET_SHIFT 0 845#define SCALER_PPF_KERNEL_OFFSET_SHIFT 0
839#define SCALER_PPF_KERNEL_UNCACHED BIT(31) 846#define SCALER_PPF_KERNEL_UNCACHED BIT(31)
840 847
848/* PITCH0/1/2 fields for raster. */
841#define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0) 849#define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0)
842#define SCALER_SRC_PITCH_SHIFT 0 850#define SCALER_SRC_PITCH_SHIFT 0
843 851
852/* PITCH0 fields for T-tiled. */
853#define SCALER_PITCH0_TILE_WIDTH_L_MASK VC4_MASK(22, 16)
854#define SCALER_PITCH0_TILE_WIDTH_L_SHIFT 16
855#define SCALER_PITCH0_TILE_LINE_DIR BIT(15)
856#define SCALER_PITCH0_TILE_INITIAL_LINE_DIR BIT(14)
857/* Y offset within a tile. */
858#define SCALER_PITCH0_TILE_Y_OFFSET_MASK VC4_MASK(13, 7)
859#define SCALER_PITCH0_TILE_Y_OFFSET_SHIFT 7
860#define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0)
861#define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0
862
844#endif /* VC4_REGS_H */ 863#endif /* VC4_REGS_H */
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 55e301047b3e..7586c46f68bf 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -182,6 +182,7 @@ extern "C" {
182#define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04 182#define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04
183#define DRM_FORMAT_MOD_VENDOR_QCOM 0x05 183#define DRM_FORMAT_MOD_VENDOR_QCOM 0x05
184#define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06 184#define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
185#define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
185/* add more to the end as needed */ 186/* add more to the end as needed */
186 187
187#define fourcc_mod_code(vendor, val) \ 188#define fourcc_mod_code(vendor, val) \
@@ -306,7 +307,6 @@ extern "C" {
306 */ 307 */
307#define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) 308#define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
308 309
309
310/* NVIDIA Tegra frame buffer modifiers */ 310/* NVIDIA Tegra frame buffer modifiers */
311 311
312/* 312/*
@@ -351,6 +351,27 @@ extern "C" {
351 */ 351 */
352#define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v) 352#define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v)
353 353
354/*
355 * Broadcom VC4 "T" format
356 *
357 * This is the primary layout that the V3D GPU can texture from (it
358 * can't do linear). The T format has:
359 *
360 * - 64b utiles of pixels in a raster-order grid according to cpp. It's 4x4
361 * pixels at 32 bit depth.
362 *
363 * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually
364 * 16x16 pixels).
365 *
366 * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels). On
367 * even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows
368 * they're (TR, BR, BL, TL), where bottom left is start of memory.
369 *
370 * - an image made of 4k tiles in rows either left-to-right (even rows of 4k
371 * tiles) or right-to-left (odd rows of 4k tiles).
372 */
373#define DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED fourcc_mod_code(BROADCOM, 1)
374
354#if defined(__cplusplus) 375#if defined(__cplusplus)
355} 376}
356#endif 377#endif