aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2016-08-24 22:36:36 -0400
committerDave Airlie <airlied@redhat.com>2016-08-24 22:36:36 -0400
commit51d6120792ab5f46d6f5f7f37b65d05cc1afc019 (patch)
tree48a1c5fb08e3a794d1e2a9a88fbffe85a9b34869
parent78acdd4a7e5a5de56c4ac1e10390a98b7c605ed6 (diff)
parent351243897b15aba02ad15317724d616aeaf00c7d (diff)
Merge branch 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel into drm-next
drm-intel-next-2016-08-22: - bugfixes and cleanups for rcu-protected requests (Chris) - atomic modeset fixes for gpu reset on pre-g4x (Maarten&Ville) - guc submission improvements (Dave Gordon) - panel power sequence cleanup (Imre) - better use of stolen and unmappable ggtt (Chris), plus prep work to make that happen - rework of framebuffer offsets, prep for multi-plane framebuffers (Ville) - fully partial ggtt vmaps, including fenced ones (Chris) - move lots more of the gem tracking from the object to the vma (Chris) - tune the command parser (Chris) - allow fbc without fences on recent platforms (Chris) - fbc frontbuffer tracking fixes (Chris) - fast prefaulting using io-mappping.h pgprot caching (Chris) * 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel: (141 commits) io-mapping: Fixup for different names of writecombine io-mapping.h: s/PAGE_KERNEL_IO/PAGE_KERNEL/ drm/i915: Update DRIVER_DATE to 20160822 drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass drm/i915: Embed the io-mapping struct inside drm_i915_private io-mapping: Always create a struct to hold metadata about the io-mapping drm/i915/fbc: Allow on unfenced surfaces, for recent gen drm/i915/fbc: Don't set an illegal fence if unfenced drm/i915: Flush delayed fence releases after reset drm/i915: Reattach comment, complete type specification drm/i915/cmdparser: Accelerate copies from WC memory drm/i915/cmdparser: Use binary search for faster register lookup drm/i915/cmdparser: Check for SKIP descriptors first drm/i915/cmdparser: Compare against the previous command descriptor drm/i915/cmdparser: Improve hash function drm/i915/cmdparser: Only cache the dst vmap drm/i915/cmdparser: Use cached vmappings drm/i915/cmdparser: Add the TIMESTAMP register for the other engines drm/i915/cmdparser: Make initialisation failure non-fatal drm/i915: Stop discarding GTT cache-domain on unbind vma ...
-rw-r--r--arch/x86/kernel/early-quirks.c9
-rw-r--r--drivers/char/agp/intel-gtt.c2
-rw-r--r--drivers/gpu/drm/Makefile2
-rw-r--r--drivers/gpu/drm/i915/Makefile6
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c309
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c334
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h504
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c904
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c74
-rw-r--r--drivers/gpu/drm/i915/i915_gem_dmabuf.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c7
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c457
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence.c483
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c191
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h65
-rw-r--r--drivers/gpu/drm/i915/i915_gem_render_state.c40
-rw-r--r--drivers/gpu/drm/i915/i915_gem_render_state.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c76
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.h61
-rw-r--r--drivers/gpu/drm/i915/i915_gem_stolen.c26
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c72
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c17
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c438
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.c243
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c26
-rw-r--r--drivers/gpu/drm/i915/i915_memcpy.c101
-rw-r--r--drivers/gpu/drm/i915/i915_mm.c84
-rw-r--r--drivers/gpu/drm/i915/i915_params.c6
-rw-r--r--drivers/gpu/drm/i915/i915_params.h1
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h152
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c41
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c91
-rw-r--r--drivers/gpu/drm/i915/intel_display.c1131
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c85
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h41
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c120
-rw-r--r--drivers/gpu/drm/i915/intel_fbc.c70
-rw-r--r--drivers/gpu/drm/i915/intel_fbdev.c21
-rw-r--r--drivers/gpu/drm/i915/intel_guc.h18
-rw-r--r--drivers/gpu/drm/i915/intel_guc_loader.c58
-rw-r--r--drivers/gpu/drm/i915/intel_hotplug.c3
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c149
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c151
-rw-r--r--drivers/gpu/drm/i915/intel_overlay.c71
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c24
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c481
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h81
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c8
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c111
-rw-r--r--include/drm/i915_drm.h2
-rw-r--r--include/linux/io-mapping.h98
-rw-r--r--include/uapi/drm/i915_drm.h16
53 files changed, 4259 insertions, 3309 deletions
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index de7501edb21c..8b8852bc2f4a 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -317,16 +317,11 @@ static phys_addr_t __init i85x_stolen_base(int num, int slot, int func,
317static phys_addr_t __init i865_stolen_base(int num, int slot, int func, 317static phys_addr_t __init i865_stolen_base(int num, int slot, int func,
318 size_t stolen_size) 318 size_t stolen_size)
319{ 319{
320 u16 toud; 320 u16 toud = 0;
321 321
322 /*
323 * FIXME is the graphics stolen memory region
324 * always at TOUD? Ie. is it always the last
325 * one to be allocated by the BIOS?
326 */
327 toud = read_pci_config_16(0, 0, 0, I865_TOUD); 322 toud = read_pci_config_16(0, 0, 0, I865_TOUD);
328 323
329 return (phys_addr_t)toud << 16; 324 return (phys_addr_t)(toud << 16) + i845_tseg_size();
330} 325}
331 326
332static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, 327static phys_addr_t __init gen3_stolen_base(int num, int slot, int func,
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 44311296ec02..0f7d28a98b9a 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -845,6 +845,8 @@ void intel_gtt_insert_page(dma_addr_t addr,
845 unsigned int flags) 845 unsigned int flags)
846{ 846{
847 intel_private.driver->write_entry(addr, pg, flags); 847 intel_private.driver->write_entry(addr, pg, flags);
848 if (intel_private.driver->chipset_flush)
849 intel_private.driver->chipset_flush();
848} 850}
849EXPORT_SYMBOL(intel_gtt_insert_page); 851EXPORT_SYMBOL(intel_gtt_insert_page);
850 852
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 193ff2d09479..4054c94a2301 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -47,7 +47,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
47obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ 47obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
48obj-$(CONFIG_DRM_MGA) += mga/ 48obj-$(CONFIG_DRM_MGA) += mga/
49obj-$(CONFIG_DRM_I810) += i810/ 49obj-$(CONFIG_DRM_I810) += i810/
50obj-$(CONFIG_DRM_I915) += i915/ 50obj-$(CONFIG_DRM_I915) += i915/
51obj-$(CONFIG_DRM_MGAG200) += mgag200/ 51obj-$(CONFIG_DRM_MGAG200) += mgag200/
52obj-$(CONFIG_DRM_VC4) += vc4/ 52obj-$(CONFIG_DRM_VC4) += vc4/
53obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ 53obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dda724f04445..a7da24640e88 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -3,12 +3,16 @@
3# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. 3# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
4 4
5subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror 5subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
6subdir-ccflags-y += \
7 $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)
6 8
7# Please keep these build lists sorted! 9# Please keep these build lists sorted!
8 10
9# core driver code 11# core driver code
10i915-y := i915_drv.o \ 12i915-y := i915_drv.o \
11 i915_irq.o \ 13 i915_irq.o \
14 i915_memcpy.o \
15 i915_mm.o \
12 i915_params.o \ 16 i915_params.o \
13 i915_pci.o \ 17 i915_pci.o \
14 i915_suspend.o \ 18 i915_suspend.o \
@@ -110,6 +114,6 @@ i915-y += intel_gvt.o
110include $(src)/gvt/Makefile 114include $(src)/gvt/Makefile
111endif 115endif
112 116
113obj-$(CONFIG_DRM_I915) += i915.o 117obj-$(CONFIG_DRM_I915) += i915.o
114 118
115CFLAGS_i915_trace_points.o := -I$(src) 119CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 1db829c8b912..3c72b3b103e7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -86,24 +86,25 @@
86 * general bitmasking mechanism. 86 * general bitmasking mechanism.
87 */ 87 */
88 88
89#define STD_MI_OPCODE_MASK 0xFF800000 89#define STD_MI_OPCODE_SHIFT (32 - 9)
90#define STD_3D_OPCODE_MASK 0xFFFF0000 90#define STD_3D_OPCODE_SHIFT (32 - 16)
91#define STD_2D_OPCODE_MASK 0xFFC00000 91#define STD_2D_OPCODE_SHIFT (32 - 10)
92#define STD_MFX_OPCODE_MASK 0xFFFF0000 92#define STD_MFX_OPCODE_SHIFT (32 - 16)
93#define MIN_OPCODE_SHIFT 16
93 94
94#define CMD(op, opm, f, lm, fl, ...) \ 95#define CMD(op, opm, f, lm, fl, ...) \
95 { \ 96 { \
96 .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ 97 .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
97 .cmd = { (op), (opm) }, \ 98 .cmd = { (op), ~0u << (opm) }, \
98 .length = { (lm) }, \ 99 .length = { (lm) }, \
99 __VA_ARGS__ \ 100 __VA_ARGS__ \
100 } 101 }
101 102
102/* Convenience macros to compress the tables */ 103/* Convenience macros to compress the tables */
103#define SMI STD_MI_OPCODE_MASK 104#define SMI STD_MI_OPCODE_SHIFT
104#define S3D STD_3D_OPCODE_MASK 105#define S3D STD_3D_OPCODE_SHIFT
105#define S2D STD_2D_OPCODE_MASK 106#define S2D STD_2D_OPCODE_SHIFT
106#define SMFX STD_MFX_OPCODE_MASK 107#define SMFX STD_MFX_OPCODE_SHIFT
107#define F true 108#define F true
108#define S CMD_DESC_SKIP 109#define S CMD_DESC_SKIP
109#define R CMD_DESC_REJECT 110#define R CMD_DESC_REJECT
@@ -350,6 +351,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
350 CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), 351 CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
351}; 352};
352 353
354static const struct drm_i915_cmd_descriptor noop_desc =
355 CMD(MI_NOOP, SMI, F, 1, S);
356
353#undef CMD 357#undef CMD
354#undef SMI 358#undef SMI
355#undef S3D 359#undef S3D
@@ -458,6 +462,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
458 REG32(GEN7_GPGPU_DISPATCHDIMX), 462 REG32(GEN7_GPGPU_DISPATCHDIMX),
459 REG32(GEN7_GPGPU_DISPATCHDIMY), 463 REG32(GEN7_GPGPU_DISPATCHDIMY),
460 REG32(GEN7_GPGPU_DISPATCHDIMZ), 464 REG32(GEN7_GPGPU_DISPATCHDIMZ),
465 REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
461 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0), 466 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0),
462 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1), 467 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1),
463 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2), 468 REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2),
@@ -473,6 +478,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
473 REG32(GEN7_L3SQCREG1), 478 REG32(GEN7_L3SQCREG1),
474 REG32(GEN7_L3CNTLREG2), 479 REG32(GEN7_L3CNTLREG2),
475 REG32(GEN7_L3CNTLREG3), 480 REG32(GEN7_L3CNTLREG3),
481 REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
476}; 482};
477 483
478static const struct drm_i915_reg_descriptor hsw_render_regs[] = { 484static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
@@ -502,7 +508,10 @@ static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
502}; 508};
503 509
504static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { 510static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
511 REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
512 REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
505 REG32(BCS_SWCTRL), 513 REG32(BCS_SWCTRL),
514 REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
506}; 515};
507 516
508static const struct drm_i915_reg_descriptor ivb_master_regs[] = { 517static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
@@ -691,12 +700,26 @@ struct cmd_node {
691 * non-opcode bits being set. But if we don't include those bits, some 3D 700 * non-opcode bits being set. But if we don't include those bits, some 3D
692 * commands may hash to the same bucket due to not including opcode bits that 701 * commands may hash to the same bucket due to not including opcode bits that
693 * make the command unique. For now, we will risk hashing to the same bucket. 702 * make the command unique. For now, we will risk hashing to the same bucket.
694 *
695 * If we attempt to generate a perfect hash, we should be able to look at bits
696 * 31:29 of a command from a batch buffer and use the full mask for that
697 * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
698 */ 703 */
699#define CMD_HASH_MASK STD_MI_OPCODE_MASK 704static inline u32 cmd_header_key(u32 x)
705{
706 u32 shift;
707
708 switch (x >> INSTR_CLIENT_SHIFT) {
709 default:
710 case INSTR_MI_CLIENT:
711 shift = STD_MI_OPCODE_SHIFT;
712 break;
713 case INSTR_RC_CLIENT:
714 shift = STD_3D_OPCODE_SHIFT;
715 break;
716 case INSTR_BC_CLIENT:
717 shift = STD_2D_OPCODE_SHIFT;
718 break;
719 }
720
721 return x >> shift;
722}
700 723
701static int init_hash_table(struct intel_engine_cs *engine, 724static int init_hash_table(struct intel_engine_cs *engine,
702 const struct drm_i915_cmd_table *cmd_tables, 725 const struct drm_i915_cmd_table *cmd_tables,
@@ -720,7 +743,7 @@ static int init_hash_table(struct intel_engine_cs *engine,
720 743
721 desc_node->desc = desc; 744 desc_node->desc = desc;
722 hash_add(engine->cmd_hash, &desc_node->node, 745 hash_add(engine->cmd_hash, &desc_node->node,
723 desc->cmd.value & CMD_HASH_MASK); 746 cmd_header_key(desc->cmd.value));
724 } 747 }
725 } 748 }
726 749
@@ -746,17 +769,15 @@ static void fini_hash_table(struct intel_engine_cs *engine)
746 * Optionally initializes fields related to batch buffer command parsing in the 769 * Optionally initializes fields related to batch buffer command parsing in the
747 * struct intel_engine_cs based on whether the platform requires software 770 * struct intel_engine_cs based on whether the platform requires software
748 * command parsing. 771 * command parsing.
749 *
750 * Return: non-zero if initialization fails
751 */ 772 */
752int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) 773void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
753{ 774{
754 const struct drm_i915_cmd_table *cmd_tables; 775 const struct drm_i915_cmd_table *cmd_tables;
755 int cmd_table_count; 776 int cmd_table_count;
756 int ret; 777 int ret;
757 778
758 if (!IS_GEN7(engine->i915)) 779 if (!IS_GEN7(engine->i915))
759 return 0; 780 return;
760 781
761 switch (engine->id) { 782 switch (engine->id) {
762 case RCS: 783 case RCS:
@@ -811,24 +832,27 @@ int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
811 break; 832 break;
812 default: 833 default:
813 MISSING_CASE(engine->id); 834 MISSING_CASE(engine->id);
814 BUG(); 835 return;
815 } 836 }
816 837
817 BUG_ON(!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)); 838 if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) {
818 BUG_ON(!validate_regs_sorted(engine)); 839 DRM_ERROR("%s: command descriptions are not sorted\n",
819 840 engine->name);
820 WARN_ON(!hash_empty(engine->cmd_hash)); 841 return;
842 }
843 if (!validate_regs_sorted(engine)) {
844 DRM_ERROR("%s: registers are not sorted\n", engine->name);
845 return;
846 }
821 847
822 ret = init_hash_table(engine, cmd_tables, cmd_table_count); 848 ret = init_hash_table(engine, cmd_tables, cmd_table_count);
823 if (ret) { 849 if (ret) {
824 DRM_ERROR("CMD: cmd_parser_init failed!\n"); 850 DRM_ERROR("%s: initialised failed!\n", engine->name);
825 fini_hash_table(engine); 851 fini_hash_table(engine);
826 return ret; 852 return;
827 } 853 }
828 854
829 engine->needs_cmd_parser = true; 855 engine->needs_cmd_parser = true;
830
831 return 0;
832} 856}
833 857
834/** 858/**
@@ -853,12 +877,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
853 struct cmd_node *desc_node; 877 struct cmd_node *desc_node;
854 878
855 hash_for_each_possible(engine->cmd_hash, desc_node, node, 879 hash_for_each_possible(engine->cmd_hash, desc_node, node,
856 cmd_header & CMD_HASH_MASK) { 880 cmd_header_key(cmd_header)) {
857 const struct drm_i915_cmd_descriptor *desc = desc_node->desc; 881 const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
858 u32 masked_cmd = desc->cmd.mask & cmd_header; 882 if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
859 u32 masked_value = desc->cmd.value & desc->cmd.mask;
860
861 if (masked_cmd == masked_value)
862 return desc; 883 return desc;
863 } 884 }
864 885
@@ -876,11 +897,14 @@ find_cmd_in_table(struct intel_engine_cs *engine,
876static const struct drm_i915_cmd_descriptor* 897static const struct drm_i915_cmd_descriptor*
877find_cmd(struct intel_engine_cs *engine, 898find_cmd(struct intel_engine_cs *engine,
878 u32 cmd_header, 899 u32 cmd_header,
900 const struct drm_i915_cmd_descriptor *desc,
879 struct drm_i915_cmd_descriptor *default_desc) 901 struct drm_i915_cmd_descriptor *default_desc)
880{ 902{
881 const struct drm_i915_cmd_descriptor *desc;
882 u32 mask; 903 u32 mask;
883 904
905 if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
906 return desc;
907
884 desc = find_cmd_in_table(engine, cmd_header); 908 desc = find_cmd_in_table(engine, cmd_header);
885 if (desc) 909 if (desc)
886 return desc; 910 return desc;
@@ -889,140 +913,127 @@ find_cmd(struct intel_engine_cs *engine,
889 if (!mask) 913 if (!mask)
890 return NULL; 914 return NULL;
891 915
892 BUG_ON(!default_desc); 916 default_desc->cmd.value = cmd_header;
893 default_desc->flags = CMD_DESC_SKIP; 917 default_desc->cmd.mask = ~0u << MIN_OPCODE_SHIFT;
894 default_desc->length.mask = mask; 918 default_desc->length.mask = mask;
895 919 default_desc->flags = CMD_DESC_SKIP;
896 return default_desc; 920 return default_desc;
897} 921}
898 922
899static const struct drm_i915_reg_descriptor * 923static const struct drm_i915_reg_descriptor *
900find_reg(const struct drm_i915_reg_descriptor *table, 924__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
901 int count, u32 addr)
902{ 925{
903 int i; 926 int start = 0, end = count;
904 927 while (start < end) {
905 for (i = 0; i < count; i++) { 928 int mid = start + (end - start) / 2;
906 if (i915_mmio_reg_offset(table[i].addr) == addr) 929 int ret = addr - i915_mmio_reg_offset(table[mid].addr);
907 return &table[i]; 930 if (ret < 0)
931 end = mid;
932 else if (ret > 0)
933 start = mid + 1;
934 else
935 return &table[mid];
908 } 936 }
909
910 return NULL; 937 return NULL;
911} 938}
912 939
913static const struct drm_i915_reg_descriptor * 940static const struct drm_i915_reg_descriptor *
914find_reg_in_tables(const struct drm_i915_reg_table *tables, 941find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
915 int count, bool is_master, u32 addr)
916{ 942{
917 int i; 943 const struct drm_i915_reg_table *table = engine->reg_tables;
918 const struct drm_i915_reg_table *table; 944 int count = engine->reg_table_count;
919 const struct drm_i915_reg_descriptor *reg;
920 945
921 for (i = 0; i < count; i++) { 946 do {
922 table = &tables[i];
923 if (!table->master || is_master) { 947 if (!table->master || is_master) {
924 reg = find_reg(table->regs, table->num_regs, 948 const struct drm_i915_reg_descriptor *reg;
925 addr); 949
950 reg = __find_reg(table->regs, table->num_regs, addr);
926 if (reg != NULL) 951 if (reg != NULL)
927 return reg; 952 return reg;
928 } 953 }
929 } 954 } while (table++, --count);
930 955
931 return NULL; 956 return NULL;
932} 957}
933 958
934static u32 *vmap_batch(struct drm_i915_gem_object *obj, 959/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
935 unsigned start, unsigned len) 960static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
936{
937 int i;
938 void *addr = NULL;
939 struct sg_page_iter sg_iter;
940 int first_page = start >> PAGE_SHIFT;
941 int last_page = (len + start + 4095) >> PAGE_SHIFT;
942 int npages = last_page - first_page;
943 struct page **pages;
944
945 pages = drm_malloc_ab(npages, sizeof(*pages));
946 if (pages == NULL) {
947 DRM_DEBUG_DRIVER("Failed to get space for pages\n");
948 goto finish;
949 }
950
951 i = 0;
952 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
953 pages[i++] = sg_page_iter_page(&sg_iter);
954 if (i == npages)
955 break;
956 }
957
958 addr = vmap(pages, i, 0, PAGE_KERNEL);
959 if (addr == NULL) {
960 DRM_DEBUG_DRIVER("Failed to vmap pages\n");
961 goto finish;
962 }
963
964finish:
965 if (pages)
966 drm_free_large(pages);
967 return (u32*)addr;
968}
969
970/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
971static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
972 struct drm_i915_gem_object *src_obj, 961 struct drm_i915_gem_object *src_obj,
973 u32 batch_start_offset, 962 u32 batch_start_offset,
974 u32 batch_len) 963 u32 batch_len,
964 bool *needs_clflush_after)
975{ 965{
976 int needs_clflush = 0; 966 unsigned int src_needs_clflush;
977 void *src_base, *src; 967 unsigned int dst_needs_clflush;
978 void *dst = NULL; 968 void *dst, *src;
979 int ret; 969 int ret;
980 970
981 if (batch_len > dest_obj->base.size || 971 ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
982 batch_len + batch_start_offset > src_obj->base.size) 972 if (ret)
983 return ERR_PTR(-E2BIG);
984
985 if (WARN_ON(dest_obj->pages_pin_count == 0))
986 return ERR_PTR(-ENODEV);
987
988 ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
989 if (ret) {
990 DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
991 return ERR_PTR(ret); 973 return ERR_PTR(ret);
992 }
993 974
994 src_base = vmap_batch(src_obj, batch_start_offset, batch_len); 975 ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
995 if (!src_base) { 976 if (ret) {
996 DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); 977 dst = ERR_PTR(ret);
997 ret = -ENOMEM;
998 goto unpin_src; 978 goto unpin_src;
999 } 979 }
1000 980
1001 ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); 981 dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
1002 if (ret) { 982 if (IS_ERR(dst))
1003 DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); 983 goto unpin_dst;
1004 goto unmap_src; 984
985 src = ERR_PTR(-ENODEV);
986 if (src_needs_clflush &&
987 i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
988 src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
989 if (!IS_ERR(src)) {
990 i915_memcpy_from_wc(dst,
991 src + batch_start_offset,
992 ALIGN(batch_len, 16));
993 i915_gem_object_unpin_map(src_obj);
994 }
1005 } 995 }
1006 996 if (IS_ERR(src)) {
1007 dst = vmap_batch(dest_obj, 0, batch_len); 997 void *ptr;
1008 if (!dst) { 998 int offset, n;
1009 DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n"); 999
1010 ret = -ENOMEM; 1000 offset = offset_in_page(batch_start_offset);
1011 goto unmap_src; 1001
1002 /* We can avoid clflushing partial cachelines before the write
1003 * if we only every write full cache-lines. Since we know that
1004 * both the source and destination are in multiples of
1005 * PAGE_SIZE, we can simply round up to the next cacheline.
1006 * We don't care about copying too much here as we only
1007 * validate up to the end of the batch.
1008 */
1009 if (dst_needs_clflush & CLFLUSH_BEFORE)
1010 batch_len = roundup(batch_len,
1011 boot_cpu_data.x86_clflush_size);
1012
1013 ptr = dst;
1014 for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
1015 int len = min_t(int, batch_len, PAGE_SIZE - offset);
1016
1017 src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
1018 if (src_needs_clflush)
1019 drm_clflush_virt_range(src + offset, len);
1020 memcpy(ptr, src + offset, len);
1021 kunmap_atomic(src);
1022
1023 ptr += len;
1024 batch_len -= len;
1025 offset = 0;
1026 }
1012 } 1027 }
1013 1028
1014 src = src_base + offset_in_page(batch_start_offset); 1029 /* dst_obj is returned with vmap pinned */
1015 if (needs_clflush) 1030 *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
1016 drm_clflush_virt_range(src, batch_len);
1017
1018 memcpy(dst, src, batch_len);
1019 1031
1020unmap_src: 1032unpin_dst:
1021 vunmap(src_base); 1033 i915_gem_obj_finish_shmem_access(dst_obj);
1022unpin_src: 1034unpin_src:
1023 i915_gem_object_unpin_pages(src_obj); 1035 i915_gem_obj_finish_shmem_access(src_obj);
1024 1036 return dst;
1025 return ret ? ERR_PTR(ret) : dst;
1026} 1037}
1027 1038
1028/** 1039/**
@@ -1052,6 +1063,9 @@ static bool check_cmd(const struct intel_engine_cs *engine,
1052 const bool is_master, 1063 const bool is_master,
1053 bool *oacontrol_set) 1064 bool *oacontrol_set)
1054{ 1065{
1066 if (desc->flags & CMD_DESC_SKIP)
1067 return true;
1068
1055 if (desc->flags & CMD_DESC_REJECT) { 1069 if (desc->flags & CMD_DESC_REJECT) {
1056 DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); 1070 DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
1057 return false; 1071 return false;
@@ -1076,10 +1090,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
1076 offset += step) { 1090 offset += step) {
1077 const u32 reg_addr = cmd[offset] & desc->reg.mask; 1091 const u32 reg_addr = cmd[offset] & desc->reg.mask;
1078 const struct drm_i915_reg_descriptor *reg = 1092 const struct drm_i915_reg_descriptor *reg =
1079 find_reg_in_tables(engine->reg_tables, 1093 find_reg(engine, is_master, reg_addr);
1080 engine->reg_table_count,
1081 is_master,
1082 reg_addr);
1083 1094
1084 if (!reg) { 1095 if (!reg) {
1085 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", 1096 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
@@ -1200,16 +1211,19 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
1200 u32 batch_len, 1211 u32 batch_len,
1201 bool is_master) 1212 bool is_master)
1202{ 1213{
1203 u32 *cmd, *batch_base, *batch_end; 1214 u32 *cmd, *batch_end;
1204 struct drm_i915_cmd_descriptor default_desc = { 0 }; 1215 struct drm_i915_cmd_descriptor default_desc = noop_desc;
1216 const struct drm_i915_cmd_descriptor *desc = &default_desc;
1205 bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ 1217 bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
1218 bool needs_clflush_after = false;
1206 int ret = 0; 1219 int ret = 0;
1207 1220
1208 batch_base = copy_batch(shadow_batch_obj, batch_obj, 1221 cmd = copy_batch(shadow_batch_obj, batch_obj,
1209 batch_start_offset, batch_len); 1222 batch_start_offset, batch_len,
1210 if (IS_ERR(batch_base)) { 1223 &needs_clflush_after);
1224 if (IS_ERR(cmd)) {
1211 DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); 1225 DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
1212 return PTR_ERR(batch_base); 1226 return PTR_ERR(cmd);
1213 } 1227 }
1214 1228
1215 /* 1229 /*
@@ -1217,17 +1231,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
1217 * large or larger and copy_batch() will write MI_NOPs to the extra 1231 * large or larger and copy_batch() will write MI_NOPs to the extra
1218 * space. Parsing should be faster in some cases this way. 1232 * space. Parsing should be faster in some cases this way.
1219 */ 1233 */
1220 batch_end = batch_base + (batch_len / sizeof(*batch_end)); 1234 batch_end = cmd + (batch_len / sizeof(*batch_end));
1221
1222 cmd = batch_base;
1223 while (cmd < batch_end) { 1235 while (cmd < batch_end) {
1224 const struct drm_i915_cmd_descriptor *desc;
1225 u32 length; 1236 u32 length;
1226 1237
1227 if (*cmd == MI_BATCH_BUFFER_END) 1238 if (*cmd == MI_BATCH_BUFFER_END)
1228 break; 1239 break;
1229 1240
1230 desc = find_cmd(engine, *cmd, &default_desc); 1241 desc = find_cmd(engine, *cmd, desc, &default_desc);
1231 if (!desc) { 1242 if (!desc) {
1232 DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", 1243 DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
1233 *cmd); 1244 *cmd);
@@ -1278,7 +1289,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
1278 ret = -EINVAL; 1289 ret = -EINVAL;
1279 } 1290 }
1280 1291
1281 vunmap(batch_base); 1292 if (ret == 0 && needs_clflush_after)
1293 drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len);
1294 i915_gem_object_unpin_map(shadow_batch_obj);
1282 1295
1283 return ret; 1296 return ret;
1284} 1297}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 64e41cf74d11..a95d7bc81fb9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -40,12 +40,6 @@
40#include <drm/i915_drm.h> 40#include <drm/i915_drm.h>
41#include "i915_drv.h" 41#include "i915_drv.h"
42 42
43enum {
44 ACTIVE_LIST,
45 INACTIVE_LIST,
46 PINNED_LIST,
47};
48
49/* As the drm_debugfs_init() routines are called before dev->dev_private is 43/* As the drm_debugfs_init() routines are called before dev->dev_private is
50 * allocated we need to hook into the minor for release. */ 44 * allocated we need to hook into the minor for release. */
51static int 45static int
@@ -111,7 +105,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
111 105
112static char get_global_flag(struct drm_i915_gem_object *obj) 106static char get_global_flag(struct drm_i915_gem_object *obj)
113{ 107{
114 return i915_gem_obj_to_ggtt(obj) ? 'g' : ' '; 108 return i915_gem_object_to_ggtt(obj, NULL) ? 'g' : ' ';
115} 109}
116 110
117static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) 111static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
@@ -158,11 +152,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
158 seq_printf(m, "%x ", 152 seq_printf(m, "%x ",
159 i915_gem_active_get_seqno(&obj->last_read[id], 153 i915_gem_active_get_seqno(&obj->last_read[id],
160 &obj->base.dev->struct_mutex)); 154 &obj->base.dev->struct_mutex));
161 seq_printf(m, "] %x %x%s%s%s", 155 seq_printf(m, "] %x %s%s%s",
162 i915_gem_active_get_seqno(&obj->last_write, 156 i915_gem_active_get_seqno(&obj->last_write,
163 &obj->base.dev->struct_mutex), 157 &obj->base.dev->struct_mutex),
164 i915_gem_active_get_seqno(&obj->last_fence,
165 &obj->base.dev->struct_mutex),
166 i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), 158 i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
167 obj->dirty ? " dirty" : "", 159 obj->dirty ? " dirty" : "",
168 obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); 160 obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -175,8 +167,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
175 seq_printf(m, " (pinned x %d)", pin_count); 167 seq_printf(m, " (pinned x %d)", pin_count);
176 if (obj->pin_display) 168 if (obj->pin_display)
177 seq_printf(m, " (display)"); 169 seq_printf(m, " (display)");
178 if (obj->fence_reg != I915_FENCE_REG_NONE)
179 seq_printf(m, " (fence: %d)", obj->fence_reg);
180 list_for_each_entry(vma, &obj->vma_list, obj_link) { 170 list_for_each_entry(vma, &obj->vma_list, obj_link) {
181 if (!drm_mm_node_allocated(&vma->node)) 171 if (!drm_mm_node_allocated(&vma->node))
182 continue; 172 continue;
@@ -186,6 +176,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
186 vma->node.start, vma->node.size); 176 vma->node.start, vma->node.size);
187 if (i915_vma_is_ggtt(vma)) 177 if (i915_vma_is_ggtt(vma))
188 seq_printf(m, ", type: %u", vma->ggtt_view.type); 178 seq_printf(m, ", type: %u", vma->ggtt_view.type);
179 if (vma->fence)
180 seq_printf(m, " , fence: %d%s",
181 vma->fence->id,
182 i915_gem_active_isset(&vma->last_fence) ? "*" : "");
189 seq_puts(m, ")"); 183 seq_puts(m, ")");
190 } 184 }
191 if (obj->stolen) 185 if (obj->stolen)
@@ -210,53 +204,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
210 seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); 204 seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
211} 205}
212 206
213static int i915_gem_object_list_info(struct seq_file *m, void *data)
214{
215 struct drm_info_node *node = m->private;
216 uintptr_t list = (uintptr_t) node->info_ent->data;
217 struct list_head *head;
218 struct drm_device *dev = node->minor->dev;
219 struct drm_i915_private *dev_priv = to_i915(dev);
220 struct i915_ggtt *ggtt = &dev_priv->ggtt;
221 struct i915_vma *vma;
222 u64 total_obj_size, total_gtt_size;
223 int count, ret;
224
225 ret = mutex_lock_interruptible(&dev->struct_mutex);
226 if (ret)
227 return ret;
228
229 /* FIXME: the user of this interface might want more than just GGTT */
230 switch (list) {
231 case ACTIVE_LIST:
232 seq_puts(m, "Active:\n");
233 head = &ggtt->base.active_list;
234 break;
235 case INACTIVE_LIST:
236 seq_puts(m, "Inactive:\n");
237 head = &ggtt->base.inactive_list;
238 break;
239 default:
240 mutex_unlock(&dev->struct_mutex);
241 return -EINVAL;
242 }
243
244 total_obj_size = total_gtt_size = count = 0;
245 list_for_each_entry(vma, head, vm_link) {
246 seq_printf(m, " ");
247 describe_obj(m, vma->obj);
248 seq_printf(m, "\n");
249 total_obj_size += vma->obj->base.size;
250 total_gtt_size += vma->node.size;
251 count++;
252 }
253 mutex_unlock(&dev->struct_mutex);
254
255 seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
256 count, total_obj_size, total_gtt_size);
257 return 0;
258}
259
260static int obj_rank_by_stolen(void *priv, 207static int obj_rank_by_stolen(void *priv,
261 struct list_head *A, struct list_head *B) 208 struct list_head *A, struct list_head *B)
262{ 209{
@@ -322,17 +269,6 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
322 return 0; 269 return 0;
323} 270}
324 271
325#define count_objects(list, member) do { \
326 list_for_each_entry(obj, list, member) { \
327 size += i915_gem_obj_total_ggtt_size(obj); \
328 ++count; \
329 if (obj->map_and_fenceable) { \
330 mappable_size += i915_gem_obj_ggtt_size(obj); \
331 ++mappable_count; \
332 } \
333 } \
334} while (0)
335
336struct file_stats { 272struct file_stats {
337 struct drm_i915_file_private *file_priv; 273 struct drm_i915_file_private *file_priv;
338 unsigned long count; 274 unsigned long count;
@@ -418,9 +354,9 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
418 354
419 for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { 355 for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
420 if (ctx->engine[n].state) 356 if (ctx->engine[n].state)
421 per_file_stats(0, ctx->engine[n].state, data); 357 per_file_stats(0, ctx->engine[n].state->obj, data);
422 if (ctx->engine[n].ring) 358 if (ctx->engine[n].ring)
423 per_file_stats(0, ctx->engine[n].ring->obj, data); 359 per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
424 } 360 }
425 361
426 return 0; 362 return 0;
@@ -447,30 +383,16 @@ static void print_context_stats(struct seq_file *m,
447 print_file_stats(m, "[k]contexts", stats); 383 print_file_stats(m, "[k]contexts", stats);
448} 384}
449 385
450#define count_vmas(list, member) do { \
451 list_for_each_entry(vma, list, member) { \
452 size += i915_gem_obj_total_ggtt_size(vma->obj); \
453 ++count; \
454 if (vma->obj->map_and_fenceable) { \
455 mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
456 ++mappable_count; \
457 } \
458 } \
459} while (0)
460
461static int i915_gem_object_info(struct seq_file *m, void* data) 386static int i915_gem_object_info(struct seq_file *m, void* data)
462{ 387{
463 struct drm_info_node *node = m->private; 388 struct drm_info_node *node = m->private;
464 struct drm_device *dev = node->minor->dev; 389 struct drm_device *dev = node->minor->dev;
465 struct drm_i915_private *dev_priv = to_i915(dev); 390 struct drm_i915_private *dev_priv = to_i915(dev);
466 struct i915_ggtt *ggtt = &dev_priv->ggtt; 391 struct i915_ggtt *ggtt = &dev_priv->ggtt;
467 u32 count, mappable_count, purgeable_count; 392 u32 count, mapped_count, purgeable_count, dpy_count;
468 u64 size, mappable_size, purgeable_size; 393 u64 size, mapped_size, purgeable_size, dpy_size;
469 unsigned long pin_mapped_count = 0, pin_mapped_purgeable_count = 0;
470 u64 pin_mapped_size = 0, pin_mapped_purgeable_size = 0;
471 struct drm_i915_gem_object *obj; 394 struct drm_i915_gem_object *obj;
472 struct drm_file *file; 395 struct drm_file *file;
473 struct i915_vma *vma;
474 int ret; 396 int ret;
475 397
476 ret = mutex_lock_interruptible(&dev->struct_mutex); 398 ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -481,70 +403,53 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
481 dev_priv->mm.object_count, 403 dev_priv->mm.object_count,
482 dev_priv->mm.object_memory); 404 dev_priv->mm.object_memory);
483 405
484 size = count = mappable_size = mappable_count = 0; 406 size = count = 0;
485 count_objects(&dev_priv->mm.bound_list, global_list); 407 mapped_size = mapped_count = 0;
486 seq_printf(m, "%u [%u] objects, %llu [%llu] bytes in gtt\n", 408 purgeable_size = purgeable_count = 0;
487 count, mappable_count, size, mappable_size); 409 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
488 410 size += obj->base.size;
489 size = count = mappable_size = mappable_count = 0; 411 ++count;
490 count_vmas(&ggtt->base.active_list, vm_link);
491 seq_printf(m, " %u [%u] active objects, %llu [%llu] bytes\n",
492 count, mappable_count, size, mappable_size);
493 412
494 size = count = mappable_size = mappable_count = 0; 413 if (obj->madv == I915_MADV_DONTNEED) {
495 count_vmas(&ggtt->base.inactive_list, vm_link); 414 purgeable_size += obj->base.size;
496 seq_printf(m, " %u [%u] inactive objects, %llu [%llu] bytes\n", 415 ++purgeable_count;
497 count, mappable_count, size, mappable_size); 416 }
498 417
499 size = count = purgeable_size = purgeable_count = 0;
500 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
501 size += obj->base.size, ++count;
502 if (obj->madv == I915_MADV_DONTNEED)
503 purgeable_size += obj->base.size, ++purgeable_count;
504 if (obj->mapping) { 418 if (obj->mapping) {
505 pin_mapped_count++; 419 mapped_count++;
506 pin_mapped_size += obj->base.size; 420 mapped_size += obj->base.size;
507 if (obj->pages_pin_count == 0) {
508 pin_mapped_purgeable_count++;
509 pin_mapped_purgeable_size += obj->base.size;
510 }
511 } 421 }
512 } 422 }
513 seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); 423 seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
514 424
515 size = count = mappable_size = mappable_count = 0; 425 size = count = dpy_size = dpy_count = 0;
516 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 426 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
517 if (obj->fault_mappable) { 427 size += obj->base.size;
518 size += i915_gem_obj_ggtt_size(obj); 428 ++count;
519 ++count; 429
520 }
521 if (obj->pin_display) { 430 if (obj->pin_display) {
522 mappable_size += i915_gem_obj_ggtt_size(obj); 431 dpy_size += obj->base.size;
523 ++mappable_count; 432 ++dpy_count;
524 } 433 }
434
525 if (obj->madv == I915_MADV_DONTNEED) { 435 if (obj->madv == I915_MADV_DONTNEED) {
526 purgeable_size += obj->base.size; 436 purgeable_size += obj->base.size;
527 ++purgeable_count; 437 ++purgeable_count;
528 } 438 }
439
529 if (obj->mapping) { 440 if (obj->mapping) {
530 pin_mapped_count++; 441 mapped_count++;
531 pin_mapped_size += obj->base.size; 442 mapped_size += obj->base.size;
532 if (obj->pages_pin_count == 0) {
533 pin_mapped_purgeable_count++;
534 pin_mapped_purgeable_size += obj->base.size;
535 }
536 } 443 }
537 } 444 }
445 seq_printf(m, "%u bound objects, %llu bytes\n",
446 count, size);
538 seq_printf(m, "%u purgeable objects, %llu bytes\n", 447 seq_printf(m, "%u purgeable objects, %llu bytes\n",
539 purgeable_count, purgeable_size); 448 purgeable_count, purgeable_size);
540 seq_printf(m, "%u pinned mappable objects, %llu bytes\n", 449 seq_printf(m, "%u mapped objects, %llu bytes\n",
541 mappable_count, mappable_size); 450 mapped_count, mapped_size);
542 seq_printf(m, "%u fault mappable objects, %llu bytes\n", 451 seq_printf(m, "%u display objects (pinned), %llu bytes\n",
543 count, size); 452 dpy_count, dpy_size);
544 seq_printf(m,
545 "%lu [%lu] pin mapped objects, %llu [%llu] bytes [purgeable]\n",
546 pin_mapped_count, pin_mapped_purgeable_count,
547 pin_mapped_size, pin_mapped_purgeable_size);
548 453
549 seq_printf(m, "%llu [%llu] gtt total\n", 454 seq_printf(m, "%llu [%llu] gtt total\n",
550 ggtt->base.total, ggtt->mappable_end - ggtt->base.start); 455 ggtt->base.total, ggtt->mappable_end - ggtt->base.start);
@@ -557,6 +462,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
557 print_context_stats(m, dev_priv); 462 print_context_stats(m, dev_priv);
558 list_for_each_entry_reverse(file, &dev->filelist, lhead) { 463 list_for_each_entry_reverse(file, &dev->filelist, lhead) {
559 struct file_stats stats; 464 struct file_stats stats;
465 struct drm_i915_file_private *file_priv = file->driver_priv;
466 struct drm_i915_gem_request *request;
560 struct task_struct *task; 467 struct task_struct *task;
561 468
562 memset(&stats, 0, sizeof(stats)); 469 memset(&stats, 0, sizeof(stats));
@@ -570,10 +477,17 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
570 * still alive (e.g. get_pid(current) => fork() => exit()). 477 * still alive (e.g. get_pid(current) => fork() => exit()).
571 * Therefore, we need to protect this ->comm access using RCU. 478 * Therefore, we need to protect this ->comm access using RCU.
572 */ 479 */
480 mutex_lock(&dev->struct_mutex);
481 request = list_first_entry_or_null(&file_priv->mm.request_list,
482 struct drm_i915_gem_request,
483 client_list);
573 rcu_read_lock(); 484 rcu_read_lock();
574 task = pid_task(file->pid, PIDTYPE_PID); 485 task = pid_task(request && request->ctx->pid ?
486 request->ctx->pid : file->pid,
487 PIDTYPE_PID);
575 print_file_stats(m, task ? task->comm : "<unknown>", stats); 488 print_file_stats(m, task ? task->comm : "<unknown>", stats);
576 rcu_read_unlock(); 489 rcu_read_unlock();
490 mutex_unlock(&dev->struct_mutex);
577 } 491 }
578 mutex_unlock(&dev->filelist_mutex); 492 mutex_unlock(&dev->filelist_mutex);
579 493
@@ -584,8 +498,8 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
584{ 498{
585 struct drm_info_node *node = m->private; 499 struct drm_info_node *node = m->private;
586 struct drm_device *dev = node->minor->dev; 500 struct drm_device *dev = node->minor->dev;
587 uintptr_t list = (uintptr_t) node->info_ent->data;
588 struct drm_i915_private *dev_priv = to_i915(dev); 501 struct drm_i915_private *dev_priv = to_i915(dev);
502 bool show_pin_display_only = !!data;
589 struct drm_i915_gem_object *obj; 503 struct drm_i915_gem_object *obj;
590 u64 total_obj_size, total_gtt_size; 504 u64 total_obj_size, total_gtt_size;
591 int count, ret; 505 int count, ret;
@@ -596,7 +510,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
596 510
597 total_obj_size = total_gtt_size = count = 0; 511 total_obj_size = total_gtt_size = count = 0;
598 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 512 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
599 if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj)) 513 if (show_pin_display_only && !obj->pin_display)
600 continue; 514 continue;
601 515
602 seq_puts(m, " "); 516 seq_puts(m, " ");
@@ -755,12 +669,11 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
755 669
756 seq_printf(m, "%s requests: %d\n", engine->name, count); 670 seq_printf(m, "%s requests: %d\n", engine->name, count);
757 list_for_each_entry(req, &engine->request_list, link) { 671 list_for_each_entry(req, &engine->request_list, link) {
672 struct pid *pid = req->ctx->pid;
758 struct task_struct *task; 673 struct task_struct *task;
759 674
760 rcu_read_lock(); 675 rcu_read_lock();
761 task = NULL; 676 task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
762 if (req->pid)
763 task = pid_task(req->pid, PIDTYPE_PID);
764 seq_printf(m, " %x @ %d: %s [%d]\n", 677 seq_printf(m, " %x @ %d: %s [%d]\n",
765 req->fence.seqno, 678 req->fence.seqno,
766 (int) (jiffies - req->emitted_jiffies), 679 (int) (jiffies - req->emitted_jiffies),
@@ -787,8 +700,6 @@ static void i915_ring_seqno_info(struct seq_file *m,
787 700
788 seq_printf(m, "Current sequence (%s): %x\n", 701 seq_printf(m, "Current sequence (%s): %x\n",
789 engine->name, intel_engine_get_seqno(engine)); 702 engine->name, intel_engine_get_seqno(engine));
790 seq_printf(m, "Current user interrupts (%s): %lx\n",
791 engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
792 703
793 spin_lock(&b->lock); 704 spin_lock(&b->lock);
794 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { 705 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -1027,14 +938,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
1027 938
1028 seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs); 939 seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
1029 for (i = 0; i < dev_priv->num_fence_regs; i++) { 940 for (i = 0; i < dev_priv->num_fence_regs; i++) {
1030 struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj; 941 struct i915_vma *vma = dev_priv->fence_regs[i].vma;
1031 942
1032 seq_printf(m, "Fence %d, pin count = %d, object = ", 943 seq_printf(m, "Fence %d, pin count = %d, object = ",
1033 i, dev_priv->fence_regs[i].pin_count); 944 i, dev_priv->fence_regs[i].pin_count);
1034 if (obj == NULL) 945 if (!vma)
1035 seq_puts(m, "unused"); 946 seq_puts(m, "unused");
1036 else 947 else
1037 describe_obj(m, obj); 948 describe_obj(m, vma->obj);
1038 seq_putc(m, '\n'); 949 seq_putc(m, '\n');
1039 } 950 }
1040 951
@@ -1434,11 +1345,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
1434 engine->hangcheck.seqno, 1345 engine->hangcheck.seqno,
1435 seqno[id], 1346 seqno[id],
1436 engine->last_submitted_seqno); 1347 engine->last_submitted_seqno);
1437 seq_printf(m, "\twaiters? %d\n", 1348 seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
1438 intel_engine_has_waiter(engine)); 1349 yesno(intel_engine_has_waiter(engine)),
1439 seq_printf(m, "\tuser interrupts = %lx [current %lx]\n", 1350 yesno(test_bit(engine->id,
1440 engine->hangcheck.user_interrupts, 1351 &dev_priv->gpu_error.missed_irq_rings)));
1441 READ_ONCE(engine->breadcrumbs.irq_wakeups));
1442 seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", 1352 seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
1443 (long long)engine->hangcheck.acthd, 1353 (long long)engine->hangcheck.acthd,
1444 (long long)acthd[id]); 1354 (long long)acthd[id]);
@@ -2052,18 +1962,17 @@ static int i915_context_status(struct seq_file *m, void *unused)
2052 1962
2053 list_for_each_entry(ctx, &dev_priv->context_list, link) { 1963 list_for_each_entry(ctx, &dev_priv->context_list, link) {
2054 seq_printf(m, "HW context %u ", ctx->hw_id); 1964 seq_printf(m, "HW context %u ", ctx->hw_id);
2055 if (IS_ERR(ctx->file_priv)) { 1965 if (ctx->pid) {
2056 seq_puts(m, "(deleted) ");
2057 } else if (ctx->file_priv) {
2058 struct pid *pid = ctx->file_priv->file->pid;
2059 struct task_struct *task; 1966 struct task_struct *task;
2060 1967
2061 task = get_pid_task(pid, PIDTYPE_PID); 1968 task = get_pid_task(ctx->pid, PIDTYPE_PID);
2062 if (task) { 1969 if (task) {
2063 seq_printf(m, "(%s [%d]) ", 1970 seq_printf(m, "(%s [%d]) ",
2064 task->comm, task->pid); 1971 task->comm, task->pid);
2065 put_task_struct(task); 1972 put_task_struct(task);
2066 } 1973 }
1974 } else if (IS_ERR(ctx->file_priv)) {
1975 seq_puts(m, "(deleted) ");
2067 } else { 1976 } else {
2068 seq_puts(m, "(kernel) "); 1977 seq_puts(m, "(kernel) ");
2069 } 1978 }
@@ -2077,7 +1986,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
2077 seq_printf(m, "%s: ", engine->name); 1986 seq_printf(m, "%s: ", engine->name);
2078 seq_putc(m, ce->initialised ? 'I' : 'i'); 1987 seq_putc(m, ce->initialised ? 'I' : 'i');
2079 if (ce->state) 1988 if (ce->state)
2080 describe_obj(m, ce->state); 1989 describe_obj(m, ce->state->obj);
2081 if (ce->ring) 1990 if (ce->ring)
2082 describe_ctx_ring(m, ce->ring); 1991 describe_ctx_ring(m, ce->ring);
2083 seq_putc(m, '\n'); 1992 seq_putc(m, '\n');
@@ -2095,36 +2004,34 @@ static void i915_dump_lrc_obj(struct seq_file *m,
2095 struct i915_gem_context *ctx, 2004 struct i915_gem_context *ctx,
2096 struct intel_engine_cs *engine) 2005 struct intel_engine_cs *engine)
2097{ 2006{
2098 struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state; 2007 struct i915_vma *vma = ctx->engine[engine->id].state;
2099 struct page *page; 2008 struct page *page;
2100 uint32_t *reg_state;
2101 int j; 2009 int j;
2102 unsigned long ggtt_offset = 0;
2103 2010
2104 seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id); 2011 seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
2105 2012
2106 if (ctx_obj == NULL) { 2013 if (!vma) {
2107 seq_puts(m, "\tNot allocated\n"); 2014 seq_puts(m, "\tFake context\n");
2108 return; 2015 return;
2109 } 2016 }
2110 2017
2111 if (!i915_gem_obj_ggtt_bound(ctx_obj)) 2018 if (vma->flags & I915_VMA_GLOBAL_BIND)
2112 seq_puts(m, "\tNot bound in GGTT\n"); 2019 seq_printf(m, "\tBound in GGTT at 0x%08x\n",
2113 else 2020 i915_ggtt_offset(vma));
2114 ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
2115 2021
2116 if (i915_gem_object_get_pages(ctx_obj)) { 2022 if (i915_gem_object_get_pages(vma->obj)) {
2117 seq_puts(m, "\tFailed to get pages for context object\n"); 2023 seq_puts(m, "\tFailed to get pages for context object\n\n");
2118 return; 2024 return;
2119 } 2025 }
2120 2026
2121 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); 2027 page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN);
2122 if (!WARN_ON(page == NULL)) { 2028 if (page) {
2123 reg_state = kmap_atomic(page); 2029 u32 *reg_state = kmap_atomic(page);
2124 2030
2125 for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { 2031 for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
2126 seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", 2032 seq_printf(m,
2127 ggtt_offset + 4096 + (j * 4), 2033 "\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
2034 j * 4,
2128 reg_state[j], reg_state[j + 1], 2035 reg_state[j], reg_state[j + 1],
2129 reg_state[j + 2], reg_state[j + 3]); 2036 reg_state[j + 2], reg_state[j + 3]);
2130 } 2037 }
@@ -2444,6 +2351,20 @@ static int count_irq_waiters(struct drm_i915_private *i915)
2444 return count; 2351 return count;
2445} 2352}
2446 2353
2354static const char *rps_power_to_str(unsigned int power)
2355{
2356 static const char * const strings[] = {
2357 [LOW_POWER] = "low power",
2358 [BETWEEN] = "mixed",
2359 [HIGH_POWER] = "high power",
2360 };
2361
2362 if (power >= ARRAY_SIZE(strings) || !strings[power])
2363 return "unknown";
2364
2365 return strings[power];
2366}
2367
2447static int i915_rps_boost_info(struct seq_file *m, void *data) 2368static int i915_rps_boost_info(struct seq_file *m, void *data)
2448{ 2369{
2449 struct drm_info_node *node = m->private; 2370 struct drm_info_node *node = m->private;
@@ -2455,12 +2376,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
2455 seq_printf(m, "GPU busy? %s [%x]\n", 2376 seq_printf(m, "GPU busy? %s [%x]\n",
2456 yesno(dev_priv->gt.awake), dev_priv->gt.active_engines); 2377 yesno(dev_priv->gt.awake), dev_priv->gt.active_engines);
2457 seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); 2378 seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
2458 seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n", 2379 seq_printf(m, "Frequency requested %d\n",
2459 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 2380 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
2381 seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
2460 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 2382 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
2461 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), 2383 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
2462 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), 2384 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
2463 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); 2385 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
2386 seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
2387 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq),
2388 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
2389 intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
2464 2390
2465 mutex_lock(&dev->filelist_mutex); 2391 mutex_lock(&dev->filelist_mutex);
2466 spin_lock(&dev_priv->rps.client_lock); 2392 spin_lock(&dev_priv->rps.client_lock);
@@ -2481,6 +2407,31 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
2481 spin_unlock(&dev_priv->rps.client_lock); 2407 spin_unlock(&dev_priv->rps.client_lock);
2482 mutex_unlock(&dev->filelist_mutex); 2408 mutex_unlock(&dev->filelist_mutex);
2483 2409
2410 if (INTEL_GEN(dev_priv) >= 6 &&
2411 dev_priv->rps.enabled &&
2412 dev_priv->gt.active_engines) {
2413 u32 rpup, rpupei;
2414 u32 rpdown, rpdownei;
2415
2416 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
2417 rpup = I915_READ_FW(GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK;
2418 rpupei = I915_READ_FW(GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK;
2419 rpdown = I915_READ_FW(GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK;
2420 rpdownei = I915_READ_FW(GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK;
2421 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2422
2423 seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
2424 rps_power_to_str(dev_priv->rps.power));
2425 seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
2426 100 * rpup / rpupei,
2427 dev_priv->rps.up_threshold);
2428 seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
2429 100 * rpdown / rpdownei,
2430 dev_priv->rps.down_threshold);
2431 } else {
2432 seq_puts(m, "\nRPS Autotuning inactive\n");
2433 }
2434
2484 return 0; 2435 return 0;
2485} 2436}
2486 2437
@@ -2547,6 +2498,7 @@ static void i915_guc_client_info(struct seq_file *m,
2547 struct i915_guc_client *client) 2498 struct i915_guc_client *client)
2548{ 2499{
2549 struct intel_engine_cs *engine; 2500 struct intel_engine_cs *engine;
2501 enum intel_engine_id id;
2550 uint64_t tot = 0; 2502 uint64_t tot = 0;
2551 2503
2552 seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", 2504 seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@@ -2557,15 +2509,14 @@ static void i915_guc_client_info(struct seq_file *m,
2557 client->wq_size, client->wq_offset, client->wq_tail); 2509 client->wq_size, client->wq_offset, client->wq_tail);
2558 2510
2559 seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); 2511 seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
2560 seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
2561 seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); 2512 seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
2562 seq_printf(m, "\tLast submission result: %d\n", client->retcode); 2513 seq_printf(m, "\tLast submission result: %d\n", client->retcode);
2563 2514
2564 for_each_engine(engine, dev_priv) { 2515 for_each_engine_id(engine, dev_priv, id) {
2516 u64 submissions = client->submissions[id];
2517 tot += submissions;
2565 seq_printf(m, "\tSubmissions: %llu %s\n", 2518 seq_printf(m, "\tSubmissions: %llu %s\n",
2566 client->submissions[engine->id], 2519 submissions, engine->name);
2567 engine->name);
2568 tot += client->submissions[engine->id];
2569 } 2520 }
2570 seq_printf(m, "\tTotal: %llu\n", tot); 2521 seq_printf(m, "\tTotal: %llu\n", tot);
2571} 2522}
@@ -2578,6 +2529,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
2578 struct intel_guc guc; 2529 struct intel_guc guc;
2579 struct i915_guc_client client = {}; 2530 struct i915_guc_client client = {};
2580 struct intel_engine_cs *engine; 2531 struct intel_engine_cs *engine;
2532 enum intel_engine_id id;
2581 u64 total = 0; 2533 u64 total = 0;
2582 2534
2583 if (!HAS_GUC_SCHED(dev_priv)) 2535 if (!HAS_GUC_SCHED(dev_priv))
@@ -2604,11 +2556,11 @@ static int i915_guc_info(struct seq_file *m, void *data)
2604 seq_printf(m, "GuC last action error code: %d\n", guc.action_err); 2556 seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
2605 2557
2606 seq_printf(m, "\nGuC submissions:\n"); 2558 seq_printf(m, "\nGuC submissions:\n");
2607 for_each_engine(engine, dev_priv) { 2559 for_each_engine_id(engine, dev_priv, id) {
2560 u64 submissions = guc.submissions[id];
2561 total += submissions;
2608 seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", 2562 seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
2609 engine->name, guc.submissions[engine->id], 2563 engine->name, submissions, guc.last_seqno[id]);
2610 guc.last_seqno[engine->id]);
2611 total += guc.submissions[engine->id];
2612 } 2564 }
2613 seq_printf(m, "\t%s: %llu\n", "Total", total); 2565 seq_printf(m, "\t%s: %llu\n", "Total", total);
2614 2566
@@ -2625,15 +2577,15 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
2625 struct drm_info_node *node = m->private; 2577 struct drm_info_node *node = m->private;
2626 struct drm_device *dev = node->minor->dev; 2578 struct drm_device *dev = node->minor->dev;
2627 struct drm_i915_private *dev_priv = to_i915(dev); 2579 struct drm_i915_private *dev_priv = to_i915(dev);
2628 struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj; 2580 struct drm_i915_gem_object *obj;
2629 u32 *log;
2630 int i = 0, pg; 2581 int i = 0, pg;
2631 2582
2632 if (!log_obj) 2583 if (!dev_priv->guc.log_vma)
2633 return 0; 2584 return 0;
2634 2585
2635 for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) { 2586 obj = dev_priv->guc.log_vma->obj;
2636 log = kmap_atomic(i915_gem_object_get_page(log_obj, pg)); 2587 for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
2588 u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
2637 2589
2638 for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) 2590 for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
2639 seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", 2591 seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
@@ -3237,7 +3189,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
3237 struct drm_device *dev = node->minor->dev; 3189 struct drm_device *dev = node->minor->dev;
3238 struct drm_i915_private *dev_priv = to_i915(dev); 3190 struct drm_i915_private *dev_priv = to_i915(dev);
3239 struct intel_engine_cs *engine; 3191 struct intel_engine_cs *engine;
3240 int num_rings = hweight32(INTEL_INFO(dev)->ring_mask); 3192 int num_rings = INTEL_INFO(dev)->num_rings;
3241 enum intel_engine_id id; 3193 enum intel_engine_id id;
3242 int j, ret; 3194 int j, ret;
3243 3195
@@ -3255,7 +3207,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
3255 struct page *page; 3207 struct page *page;
3256 uint64_t *seqno; 3208 uint64_t *seqno;
3257 3209
3258 page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0); 3210 page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0);
3259 3211
3260 seqno = (uint64_t *)kmap_atomic(page); 3212 seqno = (uint64_t *)kmap_atomic(page);
3261 for_each_engine_id(engine, dev_priv, id) { 3213 for_each_engine_id(engine, dev_priv, id) {
@@ -5386,9 +5338,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
5386 {"i915_capabilities", i915_capabilities, 0}, 5338 {"i915_capabilities", i915_capabilities, 0},
5387 {"i915_gem_objects", i915_gem_object_info, 0}, 5339 {"i915_gem_objects", i915_gem_object_info, 0},
5388 {"i915_gem_gtt", i915_gem_gtt_info, 0}, 5340 {"i915_gem_gtt", i915_gem_gtt_info, 0},
5389 {"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST}, 5341 {"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1},
5390 {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
5391 {"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
5392 {"i915_gem_stolen", i915_gem_stolen_list_info }, 5342 {"i915_gem_stolen", i915_gem_stolen_list_info },
5393 {"i915_gem_pageflip", i915_gem_pageflip_info, 0}, 5343 {"i915_gem_pageflip", i915_gem_pageflip_info, 0},
5394 {"i915_gem_request", i915_gem_request_info, 0}, 5344 {"i915_gem_request", i915_gem_request_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 57eb380a2c21..13ae340ef1f3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -827,6 +827,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
827 mutex_init(&dev_priv->wm.wm_mutex); 827 mutex_init(&dev_priv->wm.wm_mutex);
828 mutex_init(&dev_priv->pps_mutex); 828 mutex_init(&dev_priv->pps_mutex);
829 829
830 i915_memcpy_init_early(dev_priv);
831
830 ret = i915_workqueues_init(dev_priv); 832 ret = i915_workqueues_init(dev_priv);
831 if (ret < 0) 833 if (ret < 0)
832 return ret; 834 return ret;
@@ -1560,6 +1562,7 @@ static int i915_drm_resume(struct drm_device *dev)
1560 i915_gem_resume(dev); 1562 i915_gem_resume(dev);
1561 1563
1562 i915_restore_state(dev); 1564 i915_restore_state(dev);
1565 intel_pps_unlock_regs_wa(dev_priv);
1563 intel_opregion_setup(dev_priv); 1566 intel_opregion_setup(dev_priv);
1564 1567
1565 intel_init_pch_refclk(dev); 1568 intel_init_pch_refclk(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c36d17659ebe..e6069057eb98 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -70,7 +70,7 @@
70 70
71#define DRIVER_NAME "i915" 71#define DRIVER_NAME "i915"
72#define DRIVER_DESC "Intel Graphics" 72#define DRIVER_DESC "Intel Graphics"
73#define DRIVER_DATE "20160808" 73#define DRIVER_DATE "20160822"
74 74
75#undef WARN_ON 75#undef WARN_ON
76/* Many gcc seem to no see through this and fall over :( */ 76/* Many gcc seem to no see through this and fall over :( */
@@ -455,15 +455,21 @@ struct intel_opregion {
455struct intel_overlay; 455struct intel_overlay;
456struct intel_overlay_error_state; 456struct intel_overlay_error_state;
457 457
458#define I915_FENCE_REG_NONE -1
459#define I915_MAX_NUM_FENCES 32
460/* 32 fences + sign bit for FENCE_REG_NONE */
461#define I915_MAX_NUM_FENCE_BITS 6
462
463struct drm_i915_fence_reg { 458struct drm_i915_fence_reg {
464 struct list_head lru_list; 459 struct list_head link;
465 struct drm_i915_gem_object *obj; 460 struct drm_i915_private *i915;
461 struct i915_vma *vma;
466 int pin_count; 462 int pin_count;
463 int id;
464 /**
465 * Whether the tiling parameters for the currently
466 * associated fence register have changed. Note that
467 * for the purposes of tracking tiling changes we also
468 * treat the unfenced register, the register slot that
469 * the object occupies whilst it executes a fenced
470 * command (such as BLT on gen2/3), as a "fence".
471 */
472 bool dirty;
467}; 473};
468 474
469struct sdvo_device_mapping { 475struct sdvo_device_mapping {
@@ -475,130 +481,6 @@ struct sdvo_device_mapping {
475 u8 ddc_pin; 481 u8 ddc_pin;
476}; 482};
477 483
478struct intel_display_error_state;
479
480struct drm_i915_error_state {
481 struct kref ref;
482 struct timeval time;
483
484 char error_msg[128];
485 bool simulated;
486 int iommu;
487 u32 reset_count;
488 u32 suspend_count;
489
490 /* Generic register state */
491 u32 eir;
492 u32 pgtbl_er;
493 u32 ier;
494 u32 gtier[4];
495 u32 ccid;
496 u32 derrmr;
497 u32 forcewake;
498 u32 error; /* gen6+ */
499 u32 err_int; /* gen7 */
500 u32 fault_data0; /* gen8, gen9 */
501 u32 fault_data1; /* gen8, gen9 */
502 u32 done_reg;
503 u32 gac_eco;
504 u32 gam_ecochk;
505 u32 gab_ctl;
506 u32 gfx_mode;
507 u32 extra_instdone[I915_NUM_INSTDONE_REG];
508 u64 fence[I915_MAX_NUM_FENCES];
509 struct intel_overlay_error_state *overlay;
510 struct intel_display_error_state *display;
511 struct drm_i915_error_object *semaphore_obj;
512
513 struct drm_i915_error_engine {
514 int engine_id;
515 /* Software tracked state */
516 bool waiting;
517 int num_waiters;
518 int hangcheck_score;
519 enum intel_engine_hangcheck_action hangcheck_action;
520 int num_requests;
521
522 /* our own tracking of ring head and tail */
523 u32 cpu_ring_head;
524 u32 cpu_ring_tail;
525
526 u32 last_seqno;
527 u32 semaphore_seqno[I915_NUM_ENGINES - 1];
528
529 /* Register state */
530 u32 start;
531 u32 tail;
532 u32 head;
533 u32 ctl;
534 u32 hws;
535 u32 ipeir;
536 u32 ipehr;
537 u32 instdone;
538 u32 bbstate;
539 u32 instpm;
540 u32 instps;
541 u32 seqno;
542 u64 bbaddr;
543 u64 acthd;
544 u32 fault_reg;
545 u64 faddr;
546 u32 rc_psmi; /* sleep state */
547 u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
548
549 struct drm_i915_error_object {
550 int page_count;
551 u64 gtt_offset;
552 u32 *pages[0];
553 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
554
555 struct drm_i915_error_object *wa_ctx;
556
557 struct drm_i915_error_request {
558 long jiffies;
559 u32 seqno;
560 u32 tail;
561 } *requests;
562
563 struct drm_i915_error_waiter {
564 char comm[TASK_COMM_LEN];
565 pid_t pid;
566 u32 seqno;
567 } *waiters;
568
569 struct {
570 u32 gfx_mode;
571 union {
572 u64 pdp[4];
573 u32 pp_dir_base;
574 };
575 } vm_info;
576
577 pid_t pid;
578 char comm[TASK_COMM_LEN];
579 } engine[I915_NUM_ENGINES];
580
581 struct drm_i915_error_buffer {
582 u32 size;
583 u32 name;
584 u32 rseqno[I915_NUM_ENGINES], wseqno;
585 u64 gtt_offset;
586 u32 read_domains;
587 u32 write_domain;
588 s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
589 s32 pinned:2;
590 u32 tiling:2;
591 u32 dirty:1;
592 u32 purgeable:1;
593 u32 userptr:1;
594 s32 engine:4;
595 u32 cache_level:3;
596 } **active_bo, **pinned_bo;
597
598 u32 *active_bo_count, *pinned_bo_count;
599 u32 vm_count;
600};
601
602struct intel_connector; 484struct intel_connector;
603struct intel_encoder; 485struct intel_encoder;
604struct intel_crtc_state; 486struct intel_crtc_state;
@@ -793,6 +675,7 @@ struct intel_device_info {
793 u8 gen; 675 u8 gen;
794 u16 gen_mask; 676 u16 gen_mask;
795 u8 ring_mask; /* Rings supported by the HW */ 677 u8 ring_mask; /* Rings supported by the HW */
678 u8 num_rings;
796 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON); 679 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON);
797 /* Register offsets for the various display pipes and transcoders */ 680 /* Register offsets for the various display pipes and transcoders */
798 int pipe_offsets[I915_MAX_TRANSCODERS]; 681 int pipe_offsets[I915_MAX_TRANSCODERS];
@@ -822,6 +705,134 @@ struct intel_device_info {
822#undef DEFINE_FLAG 705#undef DEFINE_FLAG
823#undef SEP_SEMICOLON 706#undef SEP_SEMICOLON
824 707
708struct intel_display_error_state;
709
710struct drm_i915_error_state {
711 struct kref ref;
712 struct timeval time;
713
714 char error_msg[128];
715 bool simulated;
716 int iommu;
717 u32 reset_count;
718 u32 suspend_count;
719 struct intel_device_info device_info;
720
721 /* Generic register state */
722 u32 eir;
723 u32 pgtbl_er;
724 u32 ier;
725 u32 gtier[4];
726 u32 ccid;
727 u32 derrmr;
728 u32 forcewake;
729 u32 error; /* gen6+ */
730 u32 err_int; /* gen7 */
731 u32 fault_data0; /* gen8, gen9 */
732 u32 fault_data1; /* gen8, gen9 */
733 u32 done_reg;
734 u32 gac_eco;
735 u32 gam_ecochk;
736 u32 gab_ctl;
737 u32 gfx_mode;
738 u32 extra_instdone[I915_NUM_INSTDONE_REG];
739 u64 fence[I915_MAX_NUM_FENCES];
740 struct intel_overlay_error_state *overlay;
741 struct intel_display_error_state *display;
742 struct drm_i915_error_object *semaphore;
743
744 struct drm_i915_error_engine {
745 int engine_id;
746 /* Software tracked state */
747 bool waiting;
748 int num_waiters;
749 int hangcheck_score;
750 enum intel_engine_hangcheck_action hangcheck_action;
751 struct i915_address_space *vm;
752 int num_requests;
753
754 /* our own tracking of ring head and tail */
755 u32 cpu_ring_head;
756 u32 cpu_ring_tail;
757
758 u32 last_seqno;
759 u32 semaphore_seqno[I915_NUM_ENGINES - 1];
760
761 /* Register state */
762 u32 start;
763 u32 tail;
764 u32 head;
765 u32 ctl;
766 u32 mode;
767 u32 hws;
768 u32 ipeir;
769 u32 ipehr;
770 u32 instdone;
771 u32 bbstate;
772 u32 instpm;
773 u32 instps;
774 u32 seqno;
775 u64 bbaddr;
776 u64 acthd;
777 u32 fault_reg;
778 u64 faddr;
779 u32 rc_psmi; /* sleep state */
780 u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
781
782 struct drm_i915_error_object {
783 int page_count;
784 u64 gtt_offset;
785 u64 gtt_size;
786 u32 *pages[0];
787 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
788
789 struct drm_i915_error_object *wa_ctx;
790
791 struct drm_i915_error_request {
792 long jiffies;
793 pid_t pid;
794 u32 seqno;
795 u32 head;
796 u32 tail;
797 } *requests;
798
799 struct drm_i915_error_waiter {
800 char comm[TASK_COMM_LEN];
801 pid_t pid;
802 u32 seqno;
803 } *waiters;
804
805 struct {
806 u32 gfx_mode;
807 union {
808 u64 pdp[4];
809 u32 pp_dir_base;
810 };
811 } vm_info;
812
813 pid_t pid;
814 char comm[TASK_COMM_LEN];
815 } engine[I915_NUM_ENGINES];
816
817 struct drm_i915_error_buffer {
818 u32 size;
819 u32 name;
820 u32 rseqno[I915_NUM_ENGINES], wseqno;
821 u64 gtt_offset;
822 u32 read_domains;
823 u32 write_domain;
824 s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
825 u32 tiling:2;
826 u32 dirty:1;
827 u32 purgeable:1;
828 u32 userptr:1;
829 s32 engine:4;
830 u32 cache_level:3;
831 } *active_bo[I915_NUM_ENGINES], *pinned_bo;
832 u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
833 struct i915_address_space *active_vm[I915_NUM_ENGINES];
834};
835
825enum i915_cache_level { 836enum i915_cache_level {
826 I915_CACHE_NONE = 0, 837 I915_CACHE_NONE = 0,
827 I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ 838 I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
@@ -878,22 +889,23 @@ struct i915_gem_context {
878 struct drm_i915_private *i915; 889 struct drm_i915_private *i915;
879 struct drm_i915_file_private *file_priv; 890 struct drm_i915_file_private *file_priv;
880 struct i915_hw_ppgtt *ppgtt; 891 struct i915_hw_ppgtt *ppgtt;
892 struct pid *pid;
881 893
882 struct i915_ctx_hang_stats hang_stats; 894 struct i915_ctx_hang_stats hang_stats;
883 895
884 /* Unique identifier for this context, used by the hw for tracking */
885 unsigned long flags; 896 unsigned long flags;
886#define CONTEXT_NO_ZEROMAP BIT(0) 897#define CONTEXT_NO_ZEROMAP BIT(0)
887#define CONTEXT_NO_ERROR_CAPTURE BIT(1) 898#define CONTEXT_NO_ERROR_CAPTURE BIT(1)
888 unsigned hw_id; 899
900 /* Unique identifier for this context, used by the hw for tracking */
901 unsigned int hw_id;
889 u32 user_handle; 902 u32 user_handle;
890 903
891 u32 ggtt_alignment; 904 u32 ggtt_alignment;
892 905
893 struct intel_context { 906 struct intel_context {
894 struct drm_i915_gem_object *state; 907 struct i915_vma *state;
895 struct intel_ring *ring; 908 struct intel_ring *ring;
896 struct i915_vma *lrc_vma;
897 uint32_t *lrc_reg_state; 909 uint32_t *lrc_reg_state;
898 u64 lrc_desc; 910 u64 lrc_desc;
899 int pin_count; 911 int pin_count;
@@ -1061,13 +1073,6 @@ struct intel_gmbus {
1061 1073
1062struct i915_suspend_saved_registers { 1074struct i915_suspend_saved_registers {
1063 u32 saveDSPARB; 1075 u32 saveDSPARB;
1064 u32 saveLVDS;
1065 u32 savePP_ON_DELAYS;
1066 u32 savePP_OFF_DELAYS;
1067 u32 savePP_ON;
1068 u32 savePP_OFF;
1069 u32 savePP_CONTROL;
1070 u32 savePP_DIVISOR;
1071 u32 saveFBC_CONTROL; 1076 u32 saveFBC_CONTROL;
1072 u32 saveCACHE_MODE_0; 1077 u32 saveCACHE_MODE_0;
1073 u32 saveMI_ARB_STATE; 1078 u32 saveMI_ARB_STATE;
@@ -1749,12 +1754,14 @@ struct drm_i915_private {
1749 1754
1750 uint32_t psr_mmio_base; 1755 uint32_t psr_mmio_base;
1751 1756
1757 uint32_t pps_mmio_base;
1758
1752 wait_queue_head_t gmbus_wait_queue; 1759 wait_queue_head_t gmbus_wait_queue;
1753 1760
1754 struct pci_dev *bridge_dev; 1761 struct pci_dev *bridge_dev;
1755 struct i915_gem_context *kernel_context; 1762 struct i915_gem_context *kernel_context;
1756 struct intel_engine_cs engine[I915_NUM_ENGINES]; 1763 struct intel_engine_cs engine[I915_NUM_ENGINES];
1757 struct drm_i915_gem_object *semaphore_obj; 1764 struct i915_vma *semaphore;
1758 u32 next_seqno; 1765 u32 next_seqno;
1759 1766
1760 struct drm_dma_handle *status_page_dmah; 1767 struct drm_dma_handle *status_page_dmah;
@@ -1840,6 +1847,7 @@ struct drm_i915_private {
1840 enum modeset_restore modeset_restore; 1847 enum modeset_restore modeset_restore;
1841 struct mutex modeset_restore_lock; 1848 struct mutex modeset_restore_lock;
1842 struct drm_atomic_state *modeset_restore_state; 1849 struct drm_atomic_state *modeset_restore_state;
1850 struct drm_modeset_acquire_ctx reset_ctx;
1843 1851
1844 struct list_head vm_list; /* Global list of all address spaces */ 1852 struct list_head vm_list; /* Global list of all address spaces */
1845 struct i915_ggtt ggtt; /* VM representing the global address space */ 1853 struct i915_ggtt ggtt; /* VM representing the global address space */
@@ -2171,33 +2179,11 @@ struct drm_i915_gem_object {
2171 unsigned int dirty:1; 2179 unsigned int dirty:1;
2172 2180
2173 /** 2181 /**
2174 * Fence register bits (if any) for this object. Will be set
2175 * as needed when mapped into the GTT.
2176 * Protected by dev->struct_mutex.
2177 */
2178 signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
2179
2180 /**
2181 * Advice: are the backing pages purgeable? 2182 * Advice: are the backing pages purgeable?
2182 */ 2183 */
2183 unsigned int madv:2; 2184 unsigned int madv:2;
2184 2185
2185 /** 2186 /**
2186 * Whether the tiling parameters for the currently associated fence
2187 * register have changed. Note that for the purposes of tracking
2188 * tiling changes we also treat the unfenced register, the register
2189 * slot that the object occupies whilst it executes a fenced
2190 * command (such as BLT on gen2/3), as a "fence".
2191 */
2192 unsigned int fence_dirty:1;
2193
2194 /**
2195 * Is the object at the current location in the gtt mappable and
2196 * fenceable? Used to avoid costly recalculations.
2197 */
2198 unsigned int map_and_fenceable:1;
2199
2200 /**
2201 * Whether the current gtt mapping needs to be mappable (and isn't just 2187 * Whether the current gtt mapping needs to be mappable (and isn't just
2202 * mappable by accident). Track pin and fault separate for a more 2188 * mappable by accident). Track pin and fault separate for a more
2203 * accurate mappable working set. 2189 * accurate mappable working set.
@@ -2213,6 +2199,7 @@ struct drm_i915_gem_object {
2213 unsigned int cache_dirty:1; 2199 unsigned int cache_dirty:1;
2214 2200
2215 atomic_t frontbuffer_bits; 2201 atomic_t frontbuffer_bits;
2202 unsigned int frontbuffer_ggtt_origin; /* write once */
2216 2203
2217 /** Current tiling stride for the object, if it's tiled. */ 2204 /** Current tiling stride for the object, if it's tiled. */
2218 unsigned int tiling_and_stride; 2205 unsigned int tiling_and_stride;
@@ -2220,7 +2207,6 @@ struct drm_i915_gem_object {
2220#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) 2207#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
2221#define STRIDE_MASK (~TILING_MASK) 2208#define STRIDE_MASK (~TILING_MASK)
2222 2209
2223 unsigned int has_wc_mmap;
2224 /** Count of VMA actually bound by this object */ 2210 /** Count of VMA actually bound by this object */
2225 unsigned int bind_count; 2211 unsigned int bind_count;
2226 unsigned int pin_display; 2212 unsigned int pin_display;
@@ -2245,7 +2231,6 @@ struct drm_i915_gem_object {
2245 */ 2231 */
2246 struct i915_gem_active last_read[I915_NUM_ENGINES]; 2232 struct i915_gem_active last_read[I915_NUM_ENGINES];
2247 struct i915_gem_active last_write; 2233 struct i915_gem_active last_write;
2248 struct i915_gem_active last_fence;
2249 2234
2250 /** References from framebuffers, locks out tiling changes. */ 2235 /** References from framebuffers, locks out tiling changes. */
2251 unsigned long framebuffer_references; 2236 unsigned long framebuffer_references;
@@ -2375,6 +2360,18 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
2375 return obj->tiling_and_stride & STRIDE_MASK; 2360 return obj->tiling_and_stride & STRIDE_MASK;
2376} 2361}
2377 2362
2363static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
2364{
2365 i915_gem_object_get(vma->obj);
2366 return vma;
2367}
2368
2369static inline void i915_vma_put(struct i915_vma *vma)
2370{
2371 lockdep_assert_held(&vma->vm->dev->struct_mutex);
2372 i915_gem_object_put(vma->obj);
2373}
2374
2378/* 2375/*
2379 * Optimised SGL iterator for GEM objects 2376 * Optimised SGL iterator for GEM objects
2380 */ 2377 */
@@ -3066,7 +3063,7 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
3066void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); 3063void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
3067void i915_gem_free_object(struct drm_gem_object *obj); 3064void i915_gem_free_object(struct drm_gem_object *obj);
3068 3065
3069int __must_check 3066struct i915_vma * __must_check
3070i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3067i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3071 const struct i915_ggtt_view *view, 3068 const struct i915_ggtt_view *view,
3072 u64 size, 3069 u64 size,
@@ -3085,9 +3082,6 @@ int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
3085void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); 3082void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
3086void i915_gem_release_mmap(struct drm_i915_gem_object *obj); 3083void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
3087 3084
3088int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
3089 int *needs_clflush);
3090
3091int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); 3085int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
3092 3086
3093static inline int __sg_page_count(struct scatterlist *sg) 3087static inline int __sg_page_count(struct scatterlist *sg)
@@ -3147,13 +3141,20 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
3147 obj->pages_pin_count--; 3141 obj->pages_pin_count--;
3148} 3142}
3149 3143
3144enum i915_map_type {
3145 I915_MAP_WB = 0,
3146 I915_MAP_WC,
3147};
3148
3150/** 3149/**
3151 * i915_gem_object_pin_map - return a contiguous mapping of the entire object 3150 * i915_gem_object_pin_map - return a contiguous mapping of the entire object
3152 * @obj - the object to map into kernel address space 3151 * @obj - the object to map into kernel address space
3152 * @type - the type of mapping, used to select pgprot_t
3153 * 3153 *
3154 * Calls i915_gem_object_pin_pages() to prevent reaping of the object's 3154 * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
3155 * pages and then returns a contiguous mapping of the backing storage into 3155 * pages and then returns a contiguous mapping of the backing storage into
3156 * the kernel address space. 3156 * the kernel address space. Based on the @type of mapping, the PTE will be
3157 * set to either WriteBack or WriteCombine (via pgprot_t).
3157 * 3158 *
3158 * The caller must hold the struct_mutex, and is responsible for calling 3159 * The caller must hold the struct_mutex, and is responsible for calling
3159 * i915_gem_object_unpin_map() when the mapping is no longer required. 3160 * i915_gem_object_unpin_map() when the mapping is no longer required.
@@ -3161,7 +3162,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
3161 * Returns the pointer through which to access the mapped object, or an 3162 * Returns the pointer through which to access the mapped object, or an
3162 * ERR_PTR() on error. 3163 * ERR_PTR() on error.
3163 */ 3164 */
3164void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj); 3165void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
3166 enum i915_map_type type);
3165 3167
3166/** 3168/**
3167 * i915_gem_object_unpin_map - releases an earlier mapping 3169 * i915_gem_object_unpin_map - releases an earlier mapping
@@ -3180,6 +3182,20 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
3180 i915_gem_object_unpin_pages(obj); 3182 i915_gem_object_unpin_pages(obj);
3181} 3183}
3182 3184
3185int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
3186 unsigned int *needs_clflush);
3187int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
3188 unsigned int *needs_clflush);
3189#define CLFLUSH_BEFORE 0x1
3190#define CLFLUSH_AFTER 0x2
3191#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
3192
3193static inline void
3194i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
3195{
3196 i915_gem_object_unpin_pages(obj);
3197}
3198
3183int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); 3199int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
3184int i915_gem_object_sync(struct drm_i915_gem_object *obj, 3200int i915_gem_object_sync(struct drm_i915_gem_object *obj,
3185 struct drm_i915_gem_request *to); 3201 struct drm_i915_gem_request *to);
@@ -3262,12 +3278,11 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
3262 bool write); 3278 bool write);
3263int __must_check 3279int __must_check
3264i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); 3280i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
3265int __must_check 3281struct i915_vma * __must_check
3266i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3282i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3267 u32 alignment, 3283 u32 alignment,
3268 const struct i915_ggtt_view *view); 3284 const struct i915_ggtt_view *view);
3269void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 3285void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
3270 const struct i915_ggtt_view *view);
3271int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 3286int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
3272 int align); 3287 int align);
3273int i915_gem_open(struct drm_device *dev, struct drm_file *file); 3288int i915_gem_open(struct drm_device *dev, struct drm_file *file);
@@ -3287,71 +3302,81 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
3287struct dma_buf *i915_gem_prime_export(struct drm_device *dev, 3302struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
3288 struct drm_gem_object *gem_obj, int flags); 3303 struct drm_gem_object *gem_obj, int flags);
3289 3304
3290u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
3291 const struct i915_ggtt_view *view);
3292u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
3293 struct i915_address_space *vm);
3294static inline u64
3295i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
3296{
3297 return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal);
3298}
3299
3300bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
3301 const struct i915_ggtt_view *view);
3302bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
3303 struct i915_address_space *vm);
3304
3305struct i915_vma * 3305struct i915_vma *
3306i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 3306i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
3307 struct i915_address_space *vm); 3307 struct i915_address_space *vm,
3308struct i915_vma * 3308 const struct i915_ggtt_view *view);
3309i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
3310 const struct i915_ggtt_view *view);
3311 3309
3312struct i915_vma * 3310struct i915_vma *
3313i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3311i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3314 struct i915_address_space *vm); 3312 struct i915_address_space *vm,
3315struct i915_vma * 3313 const struct i915_ggtt_view *view);
3316i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3317 const struct i915_ggtt_view *view);
3318
3319static inline struct i915_vma *
3320i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
3321{
3322 return i915_gem_obj_to_ggtt_view(obj, &i915_ggtt_view_normal);
3323}
3324bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
3325 3314
3326/* Some GGTT VM helpers */
3327static inline struct i915_hw_ppgtt * 3315static inline struct i915_hw_ppgtt *
3328i915_vm_to_ppgtt(struct i915_address_space *vm) 3316i915_vm_to_ppgtt(struct i915_address_space *vm)
3329{ 3317{
3330 return container_of(vm, struct i915_hw_ppgtt, base); 3318 return container_of(vm, struct i915_hw_ppgtt, base);
3331} 3319}
3332 3320
3333static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) 3321static inline struct i915_vma *
3322i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj,
3323 const struct i915_ggtt_view *view)
3334{ 3324{
3335 return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal); 3325 return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view);
3336} 3326}
3337 3327
3338unsigned long 3328static inline unsigned long
3339i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj); 3329i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
3340 3330 const struct i915_ggtt_view *view)
3341void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3342 const struct i915_ggtt_view *view);
3343static inline void
3344i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
3345{ 3331{
3346 i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal); 3332 return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view));
3347} 3333}
3348 3334
3349/* i915_gem_fence.c */ 3335/* i915_gem_fence.c */
3350int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj); 3336int __must_check i915_vma_get_fence(struct i915_vma *vma);
3351int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); 3337int __must_check i915_vma_put_fence(struct i915_vma *vma);
3338
3339/**
3340 * i915_vma_pin_fence - pin fencing state
3341 * @vma: vma to pin fencing for
3342 *
3343 * This pins the fencing state (whether tiled or untiled) to make sure the
3344 * vma (and its object) is ready to be used as a scanout target. Fencing
3345 * status must be synchronize first by calling i915_vma_get_fence():
3346 *
3347 * The resulting fence pin reference must be released again with
3348 * i915_vma_unpin_fence().
3349 *
3350 * Returns:
3351 *
3352 * True if the vma has a fence, false otherwise.
3353 */
3354static inline bool
3355i915_vma_pin_fence(struct i915_vma *vma)
3356{
3357 if (vma->fence) {
3358 vma->fence->pin_count++;
3359 return true;
3360 } else
3361 return false;
3362}
3352 3363
3353bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj); 3364/**
3354void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj); 3365 * i915_vma_unpin_fence - unpin fencing state
3366 * @vma: vma to unpin fencing for
3367 *
3368 * This releases the fence pin reference acquired through
3369 * i915_vma_pin_fence. It will handle both objects with and without an
3370 * attached fence correctly, callers do not need to distinguish this.
3371 */
3372static inline void
3373i915_vma_unpin_fence(struct i915_vma *vma)
3374{
3375 if (vma->fence) {
3376 GEM_BUG_ON(vma->fence->pin_count <= 0);
3377 vma->fence->pin_count--;
3378 }
3379}
3355 3380
3356void i915_gem_restore_fences(struct drm_device *dev); 3381void i915_gem_restore_fences(struct drm_device *dev);
3357 3382
@@ -3429,6 +3454,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
3429/* belongs in i915_gem_gtt.h */ 3454/* belongs in i915_gem_gtt.h */
3430static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) 3455static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
3431{ 3456{
3457 wmb();
3432 if (INTEL_GEN(dev_priv) < 6) 3458 if (INTEL_GEN(dev_priv) < 6)
3433 intel_gtt_chipset_flush(); 3459 intel_gtt_chipset_flush();
3434} 3460}
@@ -3516,7 +3542,7 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
3516 3542
3517/* i915_cmd_parser.c */ 3543/* i915_cmd_parser.c */
3518int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); 3544int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
3519int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); 3545void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
3520void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); 3546void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
3521bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine); 3547bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine);
3522int intel_engine_cmd_parser(struct intel_engine_cs *engine, 3548int intel_engine_cmd_parser(struct intel_engine_cs *engine,
@@ -3848,7 +3874,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
3848 * is woken. 3874 * is woken.
3849 */ 3875 */
3850 if (engine->irq_seqno_barrier && 3876 if (engine->irq_seqno_barrier &&
3851 READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current && 3877 rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
3852 cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { 3878 cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
3853 struct task_struct *tsk; 3879 struct task_struct *tsk;
3854 3880
@@ -3873,7 +3899,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
3873 * irq_posted == false but we are still running). 3899 * irq_posted == false but we are still running).
3874 */ 3900 */
3875 rcu_read_lock(); 3901 rcu_read_lock();
3876 tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); 3902 tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
3877 if (tsk && tsk != current) 3903 if (tsk && tsk != current)
3878 /* Note that if the bottom-half is changed as we 3904 /* Note that if the bottom-half is changed as we
3879 * are sending the wake-up, the new bottom-half will 3905 * are sending the wake-up, the new bottom-half will
@@ -3902,4 +3928,32 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
3902 return false; 3928 return false;
3903} 3929}
3904 3930
3931void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
3932bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
3933
3934/* i915_mm.c */
3935int remap_io_mapping(struct vm_area_struct *vma,
3936 unsigned long addr, unsigned long pfn, unsigned long size,
3937 struct io_mapping *iomap);
3938
3939#define ptr_mask_bits(ptr) ({ \
3940 unsigned long __v = (unsigned long)(ptr); \
3941 (typeof(ptr))(__v & PAGE_MASK); \
3942})
3943
3944#define ptr_unpack_bits(ptr, bits) ({ \
3945 unsigned long __v = (unsigned long)(ptr); \
3946 (bits) = __v & ~PAGE_MASK; \
3947 (typeof(ptr))(__v & PAGE_MASK); \
3948})
3949
3950#define ptr_pack_bits(ptr, bits) \
3951 ((typeof(ptr))((unsigned long)(ptr) | (bits)))
3952
3953#define fetch_and_zero(ptr) ({ \
3954 typeof(*ptr) __T = *(ptr); \
3955 *(ptr) = (typeof(*ptr))0; \
3956 __T; \
3957})
3958
3905#endif 3959#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f4f8eaa90f2a..04607d4115d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -279,16 +279,25 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
279 .release = i915_gem_object_release_phys, 279 .release = i915_gem_object_release_phys,
280}; 280};
281 281
282int 282int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
283i915_gem_object_unbind(struct drm_i915_gem_object *obj)
284{ 283{
285 struct i915_vma *vma; 284 struct i915_vma *vma;
286 LIST_HEAD(still_in_list); 285 LIST_HEAD(still_in_list);
287 int ret; 286 int ret;
288 287
289 /* The vma will only be freed if it is marked as closed, and if we wait 288 lockdep_assert_held(&obj->base.dev->struct_mutex);
290 * upon rendering to the vma, we may unbind anything in the list. 289
290 /* Closed vma are removed from the obj->vma_list - but they may
291 * still have an active binding on the object. To remove those we
292 * must wait for all rendering to complete to the object (as unbinding
293 * must anyway), and retire the requests.
291 */ 294 */
295 ret = i915_gem_object_wait_rendering(obj, false);
296 if (ret)
297 return ret;
298
299 i915_gem_retire_requests(to_i915(obj->base.dev));
300
292 while ((vma = list_first_entry_or_null(&obj->vma_list, 301 while ((vma = list_first_entry_or_null(&obj->vma_list,
293 struct i915_vma, 302 struct i915_vma,
294 obj_link))) { 303 obj_link))) {
@@ -600,34 +609,106 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
600 * flush the object from the CPU cache. 609 * flush the object from the CPU cache.
601 */ 610 */
602int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 611int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
603 int *needs_clflush) 612 unsigned int *needs_clflush)
604{ 613{
605 int ret; 614 int ret;
606 615
607 *needs_clflush = 0; 616 *needs_clflush = 0;
608 617
609 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 618 if (!i915_gem_object_has_struct_page(obj))
610 return -EINVAL; 619 return -ENODEV;
611 620
612 ret = i915_gem_object_wait_rendering(obj, true); 621 ret = i915_gem_object_wait_rendering(obj, true);
613 if (ret) 622 if (ret)
614 return ret; 623 return ret;
615 624
616 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 625 ret = i915_gem_object_get_pages(obj);
617 /* If we're not in the cpu read domain, set ourself into the gtt 626 if (ret)
618 * read domain and manually flush cachelines (if required). This 627 return ret;
619 * optimizes for the case when the gpu will dirty the data 628
620 * anyway again before the next pread happens. */ 629 i915_gem_object_pin_pages(obj);
630
631 i915_gem_object_flush_gtt_write_domain(obj);
632
633 /* If we're not in the cpu read domain, set ourself into the gtt
634 * read domain and manually flush cachelines (if required). This
635 * optimizes for the case when the gpu will dirty the data
636 * anyway again before the next pread happens.
637 */
638 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
621 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 639 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
622 obj->cache_level); 640 obj->cache_level);
641
642 if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
643 ret = i915_gem_object_set_to_cpu_domain(obj, false);
644 if (ret)
645 goto err_unpin;
646
647 *needs_clflush = 0;
623 } 648 }
624 649
650 /* return with the pages pinned */
651 return 0;
652
653err_unpin:
654 i915_gem_object_unpin_pages(obj);
655 return ret;
656}
657
658int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
659 unsigned int *needs_clflush)
660{
661 int ret;
662
663 *needs_clflush = 0;
664 if (!i915_gem_object_has_struct_page(obj))
665 return -ENODEV;
666
667 ret = i915_gem_object_wait_rendering(obj, false);
668 if (ret)
669 return ret;
670
625 ret = i915_gem_object_get_pages(obj); 671 ret = i915_gem_object_get_pages(obj);
626 if (ret) 672 if (ret)
627 return ret; 673 return ret;
628 674
629 i915_gem_object_pin_pages(obj); 675 i915_gem_object_pin_pages(obj);
630 676
677 i915_gem_object_flush_gtt_write_domain(obj);
678
679 /* If we're not in the cpu write domain, set ourself into the
680 * gtt write domain and manually flush cachelines (as required).
681 * This optimizes for the case when the gpu will use the data
682 * right away and we therefore have to clflush anyway.
683 */
684 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
685 *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
686
687 /* Same trick applies to invalidate partially written cachelines read
688 * before writing.
689 */
690 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
691 *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
692 obj->cache_level);
693
694 if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
695 ret = i915_gem_object_set_to_cpu_domain(obj, true);
696 if (ret)
697 goto err_unpin;
698
699 *needs_clflush = 0;
700 }
701
702 if ((*needs_clflush & CLFLUSH_AFTER) == 0)
703 obj->cache_dirty = true;
704
705 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
706 obj->dirty = 1;
707 /* return with the pages pinned */
708 return 0;
709
710err_unpin:
711 i915_gem_object_unpin_pages(obj);
631 return ret; 712 return ret;
632} 713}
633 714
@@ -737,14 +818,24 @@ i915_gem_gtt_pread(struct drm_device *dev,
737{ 818{
738 struct drm_i915_private *dev_priv = to_i915(dev); 819 struct drm_i915_private *dev_priv = to_i915(dev);
739 struct i915_ggtt *ggtt = &dev_priv->ggtt; 820 struct i915_ggtt *ggtt = &dev_priv->ggtt;
821 struct i915_vma *vma;
740 struct drm_mm_node node; 822 struct drm_mm_node node;
741 char __user *user_data; 823 char __user *user_data;
742 uint64_t remain; 824 uint64_t remain;
743 uint64_t offset; 825 uint64_t offset;
744 int ret; 826 int ret;
745 827
746 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 828 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
747 if (ret) { 829 if (!IS_ERR(vma)) {
830 node.start = i915_ggtt_offset(vma);
831 node.allocated = false;
832 ret = i915_vma_put_fence(vma);
833 if (ret) {
834 i915_vma_unpin(vma);
835 vma = ERR_PTR(ret);
836 }
837 }
838 if (IS_ERR(vma)) {
748 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); 839 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
749 if (ret) 840 if (ret)
750 goto out; 841 goto out;
@@ -756,12 +847,6 @@ i915_gem_gtt_pread(struct drm_device *dev,
756 } 847 }
757 848
758 i915_gem_object_pin_pages(obj); 849 i915_gem_object_pin_pages(obj);
759 } else {
760 node.start = i915_gem_obj_ggtt_offset(obj);
761 node.allocated = false;
762 ret = i915_gem_object_put_fence(obj);
763 if (ret)
764 goto out_unpin;
765 } 850 }
766 851
767 ret = i915_gem_object_set_to_gtt_domain(obj, false); 852 ret = i915_gem_object_set_to_gtt_domain(obj, false);
@@ -806,7 +891,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
806 * and write to user memory which may result into page 891 * and write to user memory which may result into page
807 * faults, and so we cannot perform this under struct_mutex. 892 * faults, and so we cannot perform this under struct_mutex.
808 */ 893 */
809 if (slow_user_access(ggtt->mappable, page_base, 894 if (slow_user_access(&ggtt->mappable, page_base,
810 page_offset, user_data, 895 page_offset, user_data,
811 page_length, false)) { 896 page_length, false)) {
812 ret = -EFAULT; 897 ret = -EFAULT;
@@ -838,7 +923,7 @@ out_unpin:
838 i915_gem_object_unpin_pages(obj); 923 i915_gem_object_unpin_pages(obj);
839 remove_mappable_node(&node); 924 remove_mappable_node(&node);
840 } else { 925 } else {
841 i915_gem_object_ggtt_unpin(obj); 926 i915_vma_unpin(vma);
842 } 927 }
843out: 928out:
844 return ret; 929 return ret;
@@ -859,19 +944,14 @@ i915_gem_shmem_pread(struct drm_device *dev,
859 int needs_clflush = 0; 944 int needs_clflush = 0;
860 struct sg_page_iter sg_iter; 945 struct sg_page_iter sg_iter;
861 946
862 if (!i915_gem_object_has_struct_page(obj))
863 return -ENODEV;
864
865 user_data = u64_to_user_ptr(args->data_ptr);
866 remain = args->size;
867
868 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
869
870 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 947 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
871 if (ret) 948 if (ret)
872 return ret; 949 return ret;
873 950
951 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
952 user_data = u64_to_user_ptr(args->data_ptr);
874 offset = args->offset; 953 offset = args->offset;
954 remain = args->size;
875 955
876 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 956 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
877 offset >> PAGE_SHIFT) { 957 offset >> PAGE_SHIFT) {
@@ -927,7 +1007,7 @@ next_page:
927 } 1007 }
928 1008
929out: 1009out:
930 i915_gem_object_unpin_pages(obj); 1010 i915_gem_obj_finish_shmem_access(obj);
931 1011
932 return ret; 1012 return ret;
933} 1013}
@@ -1036,6 +1116,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1036{ 1116{
1037 struct i915_ggtt *ggtt = &i915->ggtt; 1117 struct i915_ggtt *ggtt = &i915->ggtt;
1038 struct drm_device *dev = obj->base.dev; 1118 struct drm_device *dev = obj->base.dev;
1119 struct i915_vma *vma;
1039 struct drm_mm_node node; 1120 struct drm_mm_node node;
1040 uint64_t remain, offset; 1121 uint64_t remain, offset;
1041 char __user *user_data; 1122 char __user *user_data;
@@ -1045,9 +1126,18 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1045 if (i915_gem_object_is_tiled(obj)) 1126 if (i915_gem_object_is_tiled(obj))
1046 return -EFAULT; 1127 return -EFAULT;
1047 1128
1048 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1129 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1049 PIN_MAPPABLE | PIN_NONBLOCK); 1130 PIN_MAPPABLE | PIN_NONBLOCK);
1050 if (ret) { 1131 if (!IS_ERR(vma)) {
1132 node.start = i915_ggtt_offset(vma);
1133 node.allocated = false;
1134 ret = i915_vma_put_fence(vma);
1135 if (ret) {
1136 i915_vma_unpin(vma);
1137 vma = ERR_PTR(ret);
1138 }
1139 }
1140 if (IS_ERR(vma)) {
1051 ret = insert_mappable_node(i915, &node, PAGE_SIZE); 1141 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
1052 if (ret) 1142 if (ret)
1053 goto out; 1143 goto out;
@@ -1059,19 +1149,13 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1059 } 1149 }
1060 1150
1061 i915_gem_object_pin_pages(obj); 1151 i915_gem_object_pin_pages(obj);
1062 } else {
1063 node.start = i915_gem_obj_ggtt_offset(obj);
1064 node.allocated = false;
1065 ret = i915_gem_object_put_fence(obj);
1066 if (ret)
1067 goto out_unpin;
1068 } 1152 }
1069 1153
1070 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1154 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1071 if (ret) 1155 if (ret)
1072 goto out_unpin; 1156 goto out_unpin;
1073 1157
1074 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 1158 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1075 obj->dirty = true; 1159 obj->dirty = true;
1076 1160
1077 user_data = u64_to_user_ptr(args->data_ptr); 1161 user_data = u64_to_user_ptr(args->data_ptr);
@@ -1103,11 +1187,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1103 * If the object is non-shmem backed, we retry again with the 1187 * If the object is non-shmem backed, we retry again with the
1104 * path that handles page fault. 1188 * path that handles page fault.
1105 */ 1189 */
1106 if (fast_user_write(ggtt->mappable, page_base, 1190 if (fast_user_write(&ggtt->mappable, page_base,
1107 page_offset, user_data, page_length)) { 1191 page_offset, user_data, page_length)) {
1108 hit_slow_path = true; 1192 hit_slow_path = true;
1109 mutex_unlock(&dev->struct_mutex); 1193 mutex_unlock(&dev->struct_mutex);
1110 if (slow_user_access(ggtt->mappable, 1194 if (slow_user_access(&ggtt->mappable,
1111 page_base, 1195 page_base,
1112 page_offset, user_data, 1196 page_offset, user_data,
1113 page_length, true)) { 1197 page_length, true)) {
@@ -1138,7 +1222,7 @@ out_flush:
1138 } 1222 }
1139 } 1223 }
1140 1224
1141 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 1225 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1142out_unpin: 1226out_unpin:
1143 if (node.allocated) { 1227 if (node.allocated) {
1144 wmb(); 1228 wmb();
@@ -1148,7 +1232,7 @@ out_unpin:
1148 i915_gem_object_unpin_pages(obj); 1232 i915_gem_object_unpin_pages(obj);
1149 remove_mappable_node(&node); 1233 remove_mappable_node(&node);
1150 } else { 1234 } else {
1151 i915_gem_object_ggtt_unpin(obj); 1235 i915_vma_unpin(vma);
1152 } 1236 }
1153out: 1237out:
1154 return ret; 1238 return ret;
@@ -1231,42 +1315,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1231 int shmem_page_offset, page_length, ret = 0; 1315 int shmem_page_offset, page_length, ret = 0;
1232 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 1316 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1233 int hit_slowpath = 0; 1317 int hit_slowpath = 0;
1234 int needs_clflush_after = 0; 1318 unsigned int needs_clflush;
1235 int needs_clflush_before = 0;
1236 struct sg_page_iter sg_iter; 1319 struct sg_page_iter sg_iter;
1237 1320
1238 user_data = u64_to_user_ptr(args->data_ptr); 1321 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1239 remain = args->size;
1240
1241 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1242
1243 ret = i915_gem_object_wait_rendering(obj, false);
1244 if (ret) 1322 if (ret)
1245 return ret; 1323 return ret;
1246 1324
1247 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1325 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1248 /* If we're not in the cpu write domain, set ourself into the gtt 1326 user_data = u64_to_user_ptr(args->data_ptr);
1249 * write domain and manually flush cachelines (if required). This
1250 * optimizes for the case when the gpu will use the data
1251 * right away and we therefore have to clflush anyway. */
1252 needs_clflush_after = cpu_write_needs_clflush(obj);
1253 }
1254 /* Same trick applies to invalidate partially written cachelines read
1255 * before writing. */
1256 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1257 needs_clflush_before =
1258 !cpu_cache_is_coherent(dev, obj->cache_level);
1259
1260 ret = i915_gem_object_get_pages(obj);
1261 if (ret)
1262 return ret;
1263
1264 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1265
1266 i915_gem_object_pin_pages(obj);
1267
1268 offset = args->offset; 1327 offset = args->offset;
1269 obj->dirty = 1; 1328 remain = args->size;
1270 1329
1271 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 1330 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1272 offset >> PAGE_SHIFT) { 1331 offset >> PAGE_SHIFT) {
@@ -1290,7 +1349,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1290 /* If we don't overwrite a cacheline completely we need to be 1349 /* If we don't overwrite a cacheline completely we need to be
1291 * careful to have up-to-date data by first clflushing. Don't 1350 * careful to have up-to-date data by first clflushing. Don't
1292 * overcomplicate things and flush the entire patch. */ 1351 * overcomplicate things and flush the entire patch. */
1293 partial_cacheline_write = needs_clflush_before && 1352 partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
1294 ((shmem_page_offset | page_length) 1353 ((shmem_page_offset | page_length)
1295 & (boot_cpu_data.x86_clflush_size - 1)); 1354 & (boot_cpu_data.x86_clflush_size - 1));
1296 1355
@@ -1300,7 +1359,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1300 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1359 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1301 user_data, page_do_bit17_swizzling, 1360 user_data, page_do_bit17_swizzling,
1302 partial_cacheline_write, 1361 partial_cacheline_write,
1303 needs_clflush_after); 1362 needs_clflush & CLFLUSH_AFTER);
1304 if (ret == 0) 1363 if (ret == 0)
1305 goto next_page; 1364 goto next_page;
1306 1365
@@ -1309,7 +1368,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1309 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1368 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1310 user_data, page_do_bit17_swizzling, 1369 user_data, page_do_bit17_swizzling,
1311 partial_cacheline_write, 1370 partial_cacheline_write,
1312 needs_clflush_after); 1371 needs_clflush & CLFLUSH_AFTER);
1313 1372
1314 mutex_lock(&dev->struct_mutex); 1373 mutex_lock(&dev->struct_mutex);
1315 1374
@@ -1323,7 +1382,7 @@ next_page:
1323 } 1382 }
1324 1383
1325out: 1384out:
1326 i915_gem_object_unpin_pages(obj); 1385 i915_gem_obj_finish_shmem_access(obj);
1327 1386
1328 if (hit_slowpath) { 1387 if (hit_slowpath) {
1329 /* 1388 /*
@@ -1331,17 +1390,15 @@ out:
1331 * cachelines in-line while writing and the object moved 1390 * cachelines in-line while writing and the object moved
1332 * out of the cpu write domain while we've dropped the lock. 1391 * out of the cpu write domain while we've dropped the lock.
1333 */ 1392 */
1334 if (!needs_clflush_after && 1393 if (!(needs_clflush & CLFLUSH_AFTER) &&
1335 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1394 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1336 if (i915_gem_clflush_object(obj, obj->pin_display)) 1395 if (i915_gem_clflush_object(obj, obj->pin_display))
1337 needs_clflush_after = true; 1396 needs_clflush |= CLFLUSH_AFTER;
1338 } 1397 }
1339 } 1398 }
1340 1399
1341 if (needs_clflush_after) 1400 if (needs_clflush & CLFLUSH_AFTER)
1342 i915_gem_chipset_flush(to_i915(dev)); 1401 i915_gem_chipset_flush(to_i915(dev));
1343 else
1344 obj->cache_dirty = true;
1345 1402
1346 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1403 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1347 return ret; 1404 return ret;
@@ -1420,10 +1477,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1420 if (ret == -EFAULT || ret == -ENOSPC) { 1477 if (ret == -EFAULT || ret == -ENOSPC) {
1421 if (obj->phys_handle) 1478 if (obj->phys_handle)
1422 ret = i915_gem_phys_pwrite(obj, args, file); 1479 ret = i915_gem_phys_pwrite(obj, args, file);
1423 else if (i915_gem_object_has_struct_page(obj))
1424 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1425 else 1480 else
1426 ret = -ENODEV; 1481 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1427 } 1482 }
1428 1483
1429 i915_gem_object_put(obj); 1484 i915_gem_object_put(obj);
@@ -1439,11 +1494,11 @@ err:
1439 return ret; 1494 return ret;
1440} 1495}
1441 1496
1442static enum fb_op_origin 1497static inline enum fb_op_origin
1443write_origin(struct drm_i915_gem_object *obj, unsigned domain) 1498write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1444{ 1499{
1445 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? 1500 return (domain == I915_GEM_DOMAIN_GTT ?
1446 ORIGIN_GTT : ORIGIN_CPU; 1501 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1447} 1502}
1448 1503
1449/** 1504/**
@@ -1603,7 +1658,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1603 up_write(&mm->mmap_sem); 1658 up_write(&mm->mmap_sem);
1604 1659
1605 /* This may race, but that's ok, it only gets set */ 1660 /* This may race, but that's ok, it only gets set */
1606 WRITE_ONCE(obj->has_wc_mmap, true); 1661 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1607 } 1662 }
1608 i915_gem_object_put_unlocked(obj); 1663 i915_gem_object_put_unlocked(obj);
1609 if (IS_ERR((void *)addr)) 1664 if (IS_ERR((void *)addr))
@@ -1614,9 +1669,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1614 return 0; 1669 return 0;
1615} 1670}
1616 1671
1672static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1673{
1674 u64 size;
1675
1676 size = i915_gem_object_get_stride(obj);
1677 size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
1678
1679 return size >> PAGE_SHIFT;
1680}
1681
1617/** 1682/**
1618 * i915_gem_fault - fault a page into the GTT 1683 * i915_gem_fault - fault a page into the GTT
1619 * @vma: VMA in question 1684 * @area: CPU VMA in question
1620 * @vmf: fault info 1685 * @vmf: fault info
1621 * 1686 *
1622 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1687 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
@@ -1630,20 +1695,21 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1630 * suffer if the GTT working set is large or there are few fence registers 1695 * suffer if the GTT working set is large or there are few fence registers
1631 * left. 1696 * left.
1632 */ 1697 */
1633int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1698int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1634{ 1699{
1635 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1700#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1701 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1636 struct drm_device *dev = obj->base.dev; 1702 struct drm_device *dev = obj->base.dev;
1637 struct drm_i915_private *dev_priv = to_i915(dev); 1703 struct drm_i915_private *dev_priv = to_i915(dev);
1638 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1704 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1639 struct i915_ggtt_view view = i915_ggtt_view_normal;
1640 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1705 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1706 struct i915_vma *vma;
1641 pgoff_t page_offset; 1707 pgoff_t page_offset;
1642 unsigned long pfn; 1708 unsigned int flags;
1643 int ret; 1709 int ret;
1644 1710
1645 /* We don't use vmf->pgoff since that has the fake offset */ 1711 /* We don't use vmf->pgoff since that has the fake offset */
1646 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1712 page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
1647 PAGE_SHIFT; 1713 PAGE_SHIFT;
1648 1714
1649 trace_i915_gem_object_fault(obj, page_offset, true, write); 1715 trace_i915_gem_object_fault(obj, page_offset, true, write);
@@ -1669,79 +1735,71 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1669 goto err_unlock; 1735 goto err_unlock;
1670 } 1736 }
1671 1737
1672 /* Use a partial view if the object is bigger than the aperture. */ 1738 /* If the object is smaller than a couple of partial vma, it is
1673 if (obj->base.size >= ggtt->mappable_end && 1739 * not worth only creating a single partial vma - we may as well
1674 !i915_gem_object_is_tiled(obj)) { 1740 * clear enough space for the full object.
1675 static const unsigned int chunk_size = 256; // 1 MiB 1741 */
1742 flags = PIN_MAPPABLE;
1743 if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1744 flags |= PIN_NONBLOCK | PIN_NONFAULT;
1745
1746 /* Now pin it into the GTT as needed */
1747 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1748 if (IS_ERR(vma)) {
1749 struct i915_ggtt_view view;
1750 unsigned int chunk_size;
1751
1752 /* Use a partial view if it is bigger than available space */
1753 chunk_size = MIN_CHUNK_PAGES;
1754 if (i915_gem_object_is_tiled(obj))
1755 chunk_size = max(chunk_size, tile_row_pages(obj));
1676 1756
1677 memset(&view, 0, sizeof(view)); 1757 memset(&view, 0, sizeof(view));
1678 view.type = I915_GGTT_VIEW_PARTIAL; 1758 view.type = I915_GGTT_VIEW_PARTIAL;
1679 view.params.partial.offset = rounddown(page_offset, chunk_size); 1759 view.params.partial.offset = rounddown(page_offset, chunk_size);
1680 view.params.partial.size = 1760 view.params.partial.size =
1681 min_t(unsigned int, 1761 min_t(unsigned int, chunk_size,
1682 chunk_size, 1762 (area->vm_end - area->vm_start) / PAGE_SIZE -
1683 (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1684 view.params.partial.offset); 1763 view.params.partial.offset);
1685 }
1686 1764
1687 /* Now pin it into the GTT if needed */ 1765 /* If the partial covers the entire object, just create a
1688 ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 1766 * normal VMA.
1689 if (ret) 1767 */
1768 if (chunk_size >= obj->base.size >> PAGE_SHIFT)
1769 view.type = I915_GGTT_VIEW_NORMAL;
1770
1771 /* Userspace is now writing through an untracked VMA, abandon
1772 * all hope that the hardware is able to track future writes.
1773 */
1774 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1775
1776 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1777 }
1778 if (IS_ERR(vma)) {
1779 ret = PTR_ERR(vma);
1690 goto err_unlock; 1780 goto err_unlock;
1781 }
1691 1782
1692 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1783 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1693 if (ret) 1784 if (ret)
1694 goto err_unpin; 1785 goto err_unpin;
1695 1786
1696 ret = i915_gem_object_get_fence(obj); 1787 ret = i915_vma_get_fence(vma);
1697 if (ret) 1788 if (ret)
1698 goto err_unpin; 1789 goto err_unpin;
1699 1790
1700 /* Finally, remap it using the new GTT offset */ 1791 /* Finally, remap it using the new GTT offset */
1701 pfn = ggtt->mappable_base + 1792 ret = remap_io_mapping(area,
1702 i915_gem_obj_ggtt_offset_view(obj, &view); 1793 area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
1703 pfn >>= PAGE_SHIFT; 1794 (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
1704 1795 min_t(u64, vma->size, area->vm_end - area->vm_start),
1705 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1796 &ggtt->mappable);
1706 /* Overriding existing pages in partial view does not cause 1797 if (ret)
1707 * us any trouble as TLBs are still valid because the fault 1798 goto err_unpin;
1708 * is due to userspace losing part of the mapping or never
1709 * having accessed it before (at this partials' range).
1710 */
1711 unsigned long base = vma->vm_start +
1712 (view.params.partial.offset << PAGE_SHIFT);
1713 unsigned int i;
1714
1715 for (i = 0; i < view.params.partial.size; i++) {
1716 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
1717 if (ret)
1718 break;
1719 }
1720
1721 obj->fault_mappable = true;
1722 } else {
1723 if (!obj->fault_mappable) {
1724 unsigned long size = min_t(unsigned long,
1725 vma->vm_end - vma->vm_start,
1726 obj->base.size);
1727 int i;
1728
1729 for (i = 0; i < size >> PAGE_SHIFT; i++) {
1730 ret = vm_insert_pfn(vma,
1731 (unsigned long)vma->vm_start + i * PAGE_SIZE,
1732 pfn + i);
1733 if (ret)
1734 break;
1735 }
1736 1799
1737 obj->fault_mappable = true; 1800 obj->fault_mappable = true;
1738 } else
1739 ret = vm_insert_pfn(vma,
1740 (unsigned long)vmf->virtual_address,
1741 pfn + page_offset);
1742 }
1743err_unpin: 1801err_unpin:
1744 i915_gem_object_ggtt_unpin_view(obj, &view); 1802 __i915_vma_unpin(vma);
1745err_unlock: 1803err_unlock:
1746 mutex_unlock(&dev->struct_mutex); 1804 mutex_unlock(&dev->struct_mutex);
1747err_rpm: 1805err_rpm:
@@ -2077,10 +2135,14 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2077 list_del(&obj->global_list); 2135 list_del(&obj->global_list);
2078 2136
2079 if (obj->mapping) { 2137 if (obj->mapping) {
2080 if (is_vmalloc_addr(obj->mapping)) 2138 void *ptr;
2081 vunmap(obj->mapping); 2139
2140 ptr = ptr_mask_bits(obj->mapping);
2141 if (is_vmalloc_addr(ptr))
2142 vunmap(ptr);
2082 else 2143 else
2083 kunmap(kmap_to_page(obj->mapping)); 2144 kunmap(kmap_to_page(ptr));
2145
2084 obj->mapping = NULL; 2146 obj->mapping = NULL;
2085 } 2147 }
2086 2148
@@ -2253,7 +2315,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2253} 2315}
2254 2316
2255/* The 'mapping' part of i915_gem_object_pin_map() below */ 2317/* The 'mapping' part of i915_gem_object_pin_map() below */
2256static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) 2318static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2319 enum i915_map_type type)
2257{ 2320{
2258 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2321 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2259 struct sg_table *sgt = obj->pages; 2322 struct sg_table *sgt = obj->pages;
@@ -2262,10 +2325,11 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2262 struct page *stack_pages[32]; 2325 struct page *stack_pages[32];
2263 struct page **pages = stack_pages; 2326 struct page **pages = stack_pages;
2264 unsigned long i = 0; 2327 unsigned long i = 0;
2328 pgprot_t pgprot;
2265 void *addr; 2329 void *addr;
2266 2330
2267 /* A single page can always be kmapped */ 2331 /* A single page can always be kmapped */
2268 if (n_pages == 1) 2332 if (n_pages == 1 && type == I915_MAP_WB)
2269 return kmap(sg_page(sgt->sgl)); 2333 return kmap(sg_page(sgt->sgl));
2270 2334
2271 if (n_pages > ARRAY_SIZE(stack_pages)) { 2335 if (n_pages > ARRAY_SIZE(stack_pages)) {
@@ -2281,7 +2345,15 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2281 /* Check that we have the expected number of pages */ 2345 /* Check that we have the expected number of pages */
2282 GEM_BUG_ON(i != n_pages); 2346 GEM_BUG_ON(i != n_pages);
2283 2347
2284 addr = vmap(pages, n_pages, 0, PAGE_KERNEL); 2348 switch (type) {
2349 case I915_MAP_WB:
2350 pgprot = PAGE_KERNEL;
2351 break;
2352 case I915_MAP_WC:
2353 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2354 break;
2355 }
2356 addr = vmap(pages, n_pages, 0, pgprot);
2285 2357
2286 if (pages != stack_pages) 2358 if (pages != stack_pages)
2287 drm_free_large(pages); 2359 drm_free_large(pages);
@@ -2290,27 +2362,54 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2290} 2362}
2291 2363
2292/* get, pin, and map the pages of the object into kernel space */ 2364/* get, pin, and map the pages of the object into kernel space */
2293void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2365void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2366 enum i915_map_type type)
2294{ 2367{
2368 enum i915_map_type has_type;
2369 bool pinned;
2370 void *ptr;
2295 int ret; 2371 int ret;
2296 2372
2297 lockdep_assert_held(&obj->base.dev->struct_mutex); 2373 lockdep_assert_held(&obj->base.dev->struct_mutex);
2374 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2298 2375
2299 ret = i915_gem_object_get_pages(obj); 2376 ret = i915_gem_object_get_pages(obj);
2300 if (ret) 2377 if (ret)
2301 return ERR_PTR(ret); 2378 return ERR_PTR(ret);
2302 2379
2303 i915_gem_object_pin_pages(obj); 2380 i915_gem_object_pin_pages(obj);
2381 pinned = obj->pages_pin_count > 1;
2304 2382
2305 if (!obj->mapping) { 2383 ptr = ptr_unpack_bits(obj->mapping, has_type);
2306 obj->mapping = i915_gem_object_map(obj); 2384 if (ptr && has_type != type) {
2307 if (!obj->mapping) { 2385 if (pinned) {
2308 i915_gem_object_unpin_pages(obj); 2386 ret = -EBUSY;
2309 return ERR_PTR(-ENOMEM); 2387 goto err;
2310 } 2388 }
2389
2390 if (is_vmalloc_addr(ptr))
2391 vunmap(ptr);
2392 else
2393 kunmap(kmap_to_page(ptr));
2394
2395 ptr = obj->mapping = NULL;
2396 }
2397
2398 if (!ptr) {
2399 ptr = i915_gem_object_map(obj, type);
2400 if (!ptr) {
2401 ret = -ENOMEM;
2402 goto err;
2403 }
2404
2405 obj->mapping = ptr_pack_bits(ptr, type);
2311 } 2406 }
2312 2407
2313 return obj->mapping; 2408 return ptr;
2409
2410err:
2411 i915_gem_object_unpin_pages(obj);
2412 return ERR_PTR(ret);
2314} 2413}
2315 2414
2316static void 2415static void
@@ -2423,15 +2522,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
2423 struct drm_i915_gem_request *request; 2522 struct drm_i915_gem_request *request;
2424 struct intel_ring *ring; 2523 struct intel_ring *ring;
2425 2524
2426 request = i915_gem_active_peek(&engine->last_request,
2427 &engine->i915->drm.struct_mutex);
2428
2429 /* Mark all pending requests as complete so that any concurrent 2525 /* Mark all pending requests as complete so that any concurrent
2430 * (lockless) lookup doesn't try and wait upon the request as we 2526 * (lockless) lookup doesn't try and wait upon the request as we
2431 * reset it. 2527 * reset it.
2432 */ 2528 */
2433 if (request) 2529 intel_engine_init_seqno(engine, engine->last_submitted_seqno);
2434 intel_engine_init_seqno(engine, request->fence.seqno);
2435 2530
2436 /* 2531 /*
2437 * Clear the execlists queue up before freeing the requests, as those 2532 * Clear the execlists queue up before freeing the requests, as those
@@ -2453,6 +2548,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
2453 * implicit references on things like e.g. ppgtt address spaces through 2548 * implicit references on things like e.g. ppgtt address spaces through
2454 * the request. 2549 * the request.
2455 */ 2550 */
2551 request = i915_gem_active_raw(&engine->last_request,
2552 &engine->i915->drm.struct_mutex);
2456 if (request) 2553 if (request)
2457 i915_gem_request_retire_upto(request); 2554 i915_gem_request_retire_upto(request);
2458 GEM_BUG_ON(intel_engine_is_active(engine)); 2555 GEM_BUG_ON(intel_engine_is_active(engine));
@@ -2526,7 +2623,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
2526 container_of(work, typeof(*dev_priv), gt.idle_work.work); 2623 container_of(work, typeof(*dev_priv), gt.idle_work.work);
2527 struct drm_device *dev = &dev_priv->drm; 2624 struct drm_device *dev = &dev_priv->drm;
2528 struct intel_engine_cs *engine; 2625 struct intel_engine_cs *engine;
2529 unsigned int stuck_engines;
2530 bool rearm_hangcheck; 2626 bool rearm_hangcheck;
2531 2627
2532 if (!READ_ONCE(dev_priv->gt.awake)) 2628 if (!READ_ONCE(dev_priv->gt.awake))
@@ -2556,15 +2652,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
2556 dev_priv->gt.awake = false; 2652 dev_priv->gt.awake = false;
2557 rearm_hangcheck = false; 2653 rearm_hangcheck = false;
2558 2654
2559 /* As we have disabled hangcheck, we need to unstick any waiters still
2560 * hanging around. However, as we may be racing against the interrupt
2561 * handler or the waiters themselves, we skip enabling the fake-irq.
2562 */
2563 stuck_engines = intel_kick_waiters(dev_priv);
2564 if (unlikely(stuck_engines))
2565 DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
2566 stuck_engines);
2567
2568 if (INTEL_GEN(dev_priv) >= 6) 2655 if (INTEL_GEN(dev_priv) >= 6)
2569 gen6_rps_idle(dev_priv); 2656 gen6_rps_idle(dev_priv);
2570 intel_runtime_pm_put(dev_priv); 2657 intel_runtime_pm_put(dev_priv);
@@ -2734,27 +2821,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
2734 return 0; 2821 return 0;
2735} 2822}
2736 2823
2737static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2738{
2739 u32 old_write_domain, old_read_domains;
2740
2741 /* Force a pagefault for domain tracking on next user access */
2742 i915_gem_release_mmap(obj);
2743
2744 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2745 return;
2746
2747 old_read_domains = obj->base.read_domains;
2748 old_write_domain = obj->base.write_domain;
2749
2750 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2751 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2752
2753 trace_i915_gem_object_change_domain(obj,
2754 old_read_domains,
2755 old_write_domain);
2756}
2757
2758static void __i915_vma_iounmap(struct i915_vma *vma) 2824static void __i915_vma_iounmap(struct i915_vma *vma)
2759{ 2825{
2760 GEM_BUG_ON(i915_vma_is_pinned(vma)); 2826 GEM_BUG_ON(i915_vma_is_pinned(vma));
@@ -2809,16 +2875,17 @@ int i915_vma_unbind(struct i915_vma *vma)
2809 GEM_BUG_ON(obj->bind_count == 0); 2875 GEM_BUG_ON(obj->bind_count == 0);
2810 GEM_BUG_ON(!obj->pages); 2876 GEM_BUG_ON(!obj->pages);
2811 2877
2812 if (i915_vma_is_ggtt(vma) && 2878 if (i915_vma_is_map_and_fenceable(vma)) {
2813 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2814 i915_gem_object_finish_gtt(obj);
2815
2816 /* release the fence reg _after_ flushing */ 2879 /* release the fence reg _after_ flushing */
2817 ret = i915_gem_object_put_fence(obj); 2880 ret = i915_vma_put_fence(vma);
2818 if (ret) 2881 if (ret)
2819 return ret; 2882 return ret;
2820 2883
2884 /* Force a pagefault for domain tracking on next user access */
2885 i915_gem_release_mmap(obj);
2886
2821 __i915_vma_iounmap(vma); 2887 __i915_vma_iounmap(vma);
2888 vma->flags &= ~I915_VMA_CAN_FENCE;
2822 } 2889 }
2823 2890
2824 if (likely(!vma->vm->closed)) { 2891 if (likely(!vma->vm->closed)) {
@@ -2830,15 +2897,12 @@ int i915_vma_unbind(struct i915_vma *vma)
2830 drm_mm_remove_node(&vma->node); 2897 drm_mm_remove_node(&vma->node);
2831 list_move_tail(&vma->vm_link, &vma->vm->unbound_list); 2898 list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
2832 2899
2833 if (i915_vma_is_ggtt(vma)) { 2900 if (vma->pages != obj->pages) {
2834 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 2901 GEM_BUG_ON(!vma->pages);
2835 obj->map_and_fenceable = false; 2902 sg_free_table(vma->pages);
2836 } else if (vma->ggtt_view.pages) { 2903 kfree(vma->pages);
2837 sg_free_table(vma->ggtt_view.pages);
2838 kfree(vma->ggtt_view.pages);
2839 }
2840 vma->ggtt_view.pages = NULL;
2841 } 2904 }
2905 vma->pages = NULL;
2842 2906
2843 /* Since the unbound list is global, only move to that list if 2907 /* Since the unbound list is global, only move to that list if
2844 * no more VMAs exist. */ 2908 * no more VMAs exist. */
@@ -2930,7 +2994,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
2930 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); 2994 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
2931 struct drm_i915_gem_object *obj = vma->obj; 2995 struct drm_i915_gem_object *obj = vma->obj;
2932 u64 start, end; 2996 u64 start, end;
2933 u64 min_alignment;
2934 int ret; 2997 int ret;
2935 2998
2936 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); 2999 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
@@ -2941,17 +3004,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
2941 size = i915_gem_get_ggtt_size(dev_priv, size, 3004 size = i915_gem_get_ggtt_size(dev_priv, size,
2942 i915_gem_object_get_tiling(obj)); 3005 i915_gem_object_get_tiling(obj));
2943 3006
2944 min_alignment = 3007 alignment = max(max(alignment, vma->display_alignment),
2945 i915_gem_get_ggtt_alignment(dev_priv, size, 3008 i915_gem_get_ggtt_alignment(dev_priv, size,
2946 i915_gem_object_get_tiling(obj), 3009 i915_gem_object_get_tiling(obj),
2947 flags & PIN_MAPPABLE); 3010 flags & PIN_MAPPABLE));
2948 if (alignment == 0)
2949 alignment = min_alignment;
2950 if (alignment & (min_alignment - 1)) {
2951 DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
2952 alignment, min_alignment);
2953 return -EINVAL;
2954 }
2955 3011
2956 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3012 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
2957 3013
@@ -3091,51 +3147,72 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3091static void 3147static void
3092i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3148i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3093{ 3149{
3094 uint32_t old_write_domain; 3150 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3095 3151
3096 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3152 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3097 return; 3153 return;
3098 3154
3099 /* No actual flushing is required for the GTT write domain. Writes 3155 /* No actual flushing is required for the GTT write domain. Writes
3100 * to it immediately go to main memory as far as we know, so there's 3156 * to it "immediately" go to main memory as far as we know, so there's
3101 * no chipset flush. It also doesn't land in render cache. 3157 * no chipset flush. It also doesn't land in render cache.
3102 * 3158 *
3103 * However, we do have to enforce the order so that all writes through 3159 * However, we do have to enforce the order so that all writes through
3104 * the GTT land before any writes to the device, such as updates to 3160 * the GTT land before any writes to the device, such as updates to
3105 * the GATT itself. 3161 * the GATT itself.
3162 *
3163 * We also have to wait a bit for the writes to land from the GTT.
3164 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3165 * timing. This issue has only been observed when switching quickly
3166 * between GTT writes and CPU reads from inside the kernel on recent hw,
3167 * and it appears to only affect discrete GTT blocks (i.e. on LLC
3168 * system agents we cannot reproduce this behaviour).
3106 */ 3169 */
3107 wmb(); 3170 wmb();
3171 if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3172 POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
3108 3173
3109 old_write_domain = obj->base.write_domain; 3174 intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3110 obj->base.write_domain = 0;
3111
3112 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3113 3175
3176 obj->base.write_domain = 0;
3114 trace_i915_gem_object_change_domain(obj, 3177 trace_i915_gem_object_change_domain(obj,
3115 obj->base.read_domains, 3178 obj->base.read_domains,
3116 old_write_domain); 3179 I915_GEM_DOMAIN_GTT);
3117} 3180}
3118 3181
3119/** Flushes the CPU write domain for the object if it's dirty. */ 3182/** Flushes the CPU write domain for the object if it's dirty. */
3120static void 3183static void
3121i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3184i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3122{ 3185{
3123 uint32_t old_write_domain;
3124
3125 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3186 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3126 return; 3187 return;
3127 3188
3128 if (i915_gem_clflush_object(obj, obj->pin_display)) 3189 if (i915_gem_clflush_object(obj, obj->pin_display))
3129 i915_gem_chipset_flush(to_i915(obj->base.dev)); 3190 i915_gem_chipset_flush(to_i915(obj->base.dev));
3130 3191
3131 old_write_domain = obj->base.write_domain;
3132 obj->base.write_domain = 0;
3133
3134 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3192 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3135 3193
3194 obj->base.write_domain = 0;
3136 trace_i915_gem_object_change_domain(obj, 3195 trace_i915_gem_object_change_domain(obj,
3137 obj->base.read_domains, 3196 obj->base.read_domains,
3138 old_write_domain); 3197 I915_GEM_DOMAIN_CPU);
3198}
3199
3200static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
3201{
3202 struct i915_vma *vma;
3203
3204 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3205 if (!i915_vma_is_ggtt(vma))
3206 continue;
3207
3208 if (i915_vma_is_active(vma))
3209 continue;
3210
3211 if (!drm_mm_node_allocated(&vma->node))
3212 continue;
3213
3214 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3215 }
3139} 3216}
3140 3217
3141/** 3218/**
@@ -3150,7 +3227,6 @@ int
3150i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3227i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3151{ 3228{
3152 uint32_t old_write_domain, old_read_domains; 3229 uint32_t old_write_domain, old_read_domains;
3153 struct i915_vma *vma;
3154 int ret; 3230 int ret;
3155 3231
3156 ret = i915_gem_object_wait_rendering(obj, !write); 3232 ret = i915_gem_object_wait_rendering(obj, !write);
@@ -3200,11 +3276,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3200 old_write_domain); 3276 old_write_domain);
3201 3277
3202 /* And bump the LRU for this access */ 3278 /* And bump the LRU for this access */
3203 vma = i915_gem_obj_to_ggtt(obj); 3279 i915_gem_object_bump_inactive_ggtt(obj);
3204 if (vma &&
3205 drm_mm_node_allocated(&vma->node) &&
3206 !i915_vma_is_active(vma))
3207 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3208 3280
3209 return 0; 3281 return 0;
3210} 3282}
@@ -3295,9 +3367,11 @@ restart:
3295 * dropped the fence as all snoopable access is 3367 * dropped the fence as all snoopable access is
3296 * supposed to be linear. 3368 * supposed to be linear.
3297 */ 3369 */
3298 ret = i915_gem_object_put_fence(obj); 3370 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3299 if (ret) 3371 ret = i915_vma_put_fence(vma);
3300 return ret; 3372 if (ret)
3373 return ret;
3374 }
3301 } else { 3375 } else {
3302 /* We either have incoherent backing store and 3376 /* We either have incoherent backing store and
3303 * so no GTT access or the architecture is fully 3377 * so no GTT access or the architecture is fully
@@ -3424,11 +3498,12 @@ rpm_put:
3424 * Can be called from an uninterruptible phase (modesetting) and allows 3498 * Can be called from an uninterruptible phase (modesetting) and allows
3425 * any flushes to be pipelined (for pageflips). 3499 * any flushes to be pipelined (for pageflips).
3426 */ 3500 */
3427int 3501struct i915_vma *
3428i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3502i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3429 u32 alignment, 3503 u32 alignment,
3430 const struct i915_ggtt_view *view) 3504 const struct i915_ggtt_view *view)
3431{ 3505{
3506 struct i915_vma *vma;
3432 u32 old_read_domains, old_write_domain; 3507 u32 old_read_domains, old_write_domain;
3433 int ret; 3508 int ret;
3434 3509
@@ -3448,19 +3523,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3448 */ 3523 */
3449 ret = i915_gem_object_set_cache_level(obj, 3524 ret = i915_gem_object_set_cache_level(obj,
3450 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 3525 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
3451 if (ret) 3526 if (ret) {
3527 vma = ERR_PTR(ret);
3452 goto err_unpin_display; 3528 goto err_unpin_display;
3529 }
3453 3530
3454 /* As the user may map the buffer once pinned in the display plane 3531 /* As the user may map the buffer once pinned in the display plane
3455 * (e.g. libkms for the bootup splash), we have to ensure that we 3532 * (e.g. libkms for the bootup splash), we have to ensure that we
3456 * always use map_and_fenceable for all scanout buffers. 3533 * always use map_and_fenceable for all scanout buffers. However,
3534 * it may simply be too big to fit into mappable, in which case
3535 * put it anyway and hope that userspace can cope (but always first
3536 * try to preserve the existing ABI).
3457 */ 3537 */
3458 ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 3538 vma = ERR_PTR(-ENOSPC);
3459 view->type == I915_GGTT_VIEW_NORMAL ? 3539 if (view->type == I915_GGTT_VIEW_NORMAL)
3460 PIN_MAPPABLE : 0); 3540 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3461 if (ret) 3541 PIN_MAPPABLE | PIN_NONBLOCK);
3542 if (IS_ERR(vma))
3543 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
3544 if (IS_ERR(vma))
3462 goto err_unpin_display; 3545 goto err_unpin_display;
3463 3546
3547 vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3548
3549 WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
3550
3464 i915_gem_object_flush_cpu_write_domain(obj); 3551 i915_gem_object_flush_cpu_write_domain(obj);
3465 3552
3466 old_write_domain = obj->base.write_domain; 3553 old_write_domain = obj->base.write_domain;
@@ -3476,23 +3563,28 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3476 old_read_domains, 3563 old_read_domains,
3477 old_write_domain); 3564 old_write_domain);
3478 3565
3479 return 0; 3566 return vma;
3480 3567
3481err_unpin_display: 3568err_unpin_display:
3482 obj->pin_display--; 3569 obj->pin_display--;
3483 return ret; 3570 return vma;
3484} 3571}
3485 3572
3486void 3573void
3487i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 3574i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3488 const struct i915_ggtt_view *view)
3489{ 3575{
3490 if (WARN_ON(obj->pin_display == 0)) 3576 if (WARN_ON(vma->obj->pin_display == 0))
3491 return; 3577 return;
3492 3578
3493 i915_gem_object_ggtt_unpin_view(obj, view); 3579 if (--vma->obj->pin_display == 0)
3580 vma->display_alignment = 0;
3494 3581
3495 obj->pin_display--; 3582 /* Bump the LRU to try and avoid premature eviction whilst flipping */
3583 if (!i915_vma_is_active(vma))
3584 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3585
3586 i915_vma_unpin(vma);
3587 WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
3496} 3588}
3497 3589
3498/** 3590/**
@@ -3605,8 +3697,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3605static bool 3697static bool
3606i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) 3698i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3607{ 3699{
3608 struct drm_i915_gem_object *obj = vma->obj;
3609
3610 if (!drm_mm_node_allocated(&vma->node)) 3700 if (!drm_mm_node_allocated(&vma->node))
3611 return false; 3701 return false;
3612 3702
@@ -3616,7 +3706,7 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3616 if (alignment && vma->node.start & (alignment - 1)) 3706 if (alignment && vma->node.start & (alignment - 1))
3617 return true; 3707 return true;
3618 3708
3619 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 3709 if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
3620 return true; 3710 return true;
3621 3711
3622 if (flags & PIN_OFFSET_BIAS && 3712 if (flags & PIN_OFFSET_BIAS &&
@@ -3638,10 +3728,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3638 u32 fence_size, fence_alignment; 3728 u32 fence_size, fence_alignment;
3639 3729
3640 fence_size = i915_gem_get_ggtt_size(dev_priv, 3730 fence_size = i915_gem_get_ggtt_size(dev_priv,
3641 obj->base.size, 3731 vma->size,
3642 i915_gem_object_get_tiling(obj)); 3732 i915_gem_object_get_tiling(obj));
3643 fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, 3733 fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
3644 obj->base.size, 3734 vma->size,
3645 i915_gem_object_get_tiling(obj), 3735 i915_gem_object_get_tiling(obj),
3646 true); 3736 true);
3647 3737
@@ -3651,7 +3741,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3651 mappable = (vma->node.start + fence_size <= 3741 mappable = (vma->node.start + fence_size <=
3652 dev_priv->ggtt.mappable_end); 3742 dev_priv->ggtt.mappable_end);
3653 3743
3654 obj->map_and_fenceable = mappable && fenceable; 3744 if (mappable && fenceable)
3745 vma->flags |= I915_VMA_CAN_FENCE;
3746 else
3747 vma->flags &= ~I915_VMA_CAN_FENCE;
3655} 3748}
3656 3749
3657int __i915_vma_do_pin(struct i915_vma *vma, 3750int __i915_vma_do_pin(struct i915_vma *vma,
@@ -3689,53 +3782,46 @@ err:
3689 return ret; 3782 return ret;
3690} 3783}
3691 3784
3692int 3785struct i915_vma *
3693i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3786i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3694 const struct i915_ggtt_view *view, 3787 const struct i915_ggtt_view *view,
3695 u64 size, 3788 u64 size,
3696 u64 alignment, 3789 u64 alignment,
3697 u64 flags) 3790 u64 flags)
3698{ 3791{
3792 struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
3699 struct i915_vma *vma; 3793 struct i915_vma *vma;
3700 int ret; 3794 int ret;
3701 3795
3702 if (!view) 3796 vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
3703 view = &i915_ggtt_view_normal;
3704
3705 vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
3706 if (IS_ERR(vma)) 3797 if (IS_ERR(vma))
3707 return PTR_ERR(vma); 3798 return vma;
3708 3799
3709 if (i915_vma_misplaced(vma, size, alignment, flags)) { 3800 if (i915_vma_misplaced(vma, size, alignment, flags)) {
3710 if (flags & PIN_NONBLOCK && 3801 if (flags & PIN_NONBLOCK &&
3711 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) 3802 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3712 return -ENOSPC; 3803 return ERR_PTR(-ENOSPC);
3713 3804
3714 WARN(i915_vma_is_pinned(vma), 3805 WARN(i915_vma_is_pinned(vma),
3715 "bo is already pinned in ggtt with incorrect alignment:" 3806 "bo is already pinned in ggtt with incorrect alignment:"
3716 " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," 3807 " offset=%08x, req.alignment=%llx,"
3717 " obj->map_and_fenceable=%d\n", 3808 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3718 upper_32_bits(vma->node.start), 3809 i915_ggtt_offset(vma), alignment,
3719 lower_32_bits(vma->node.start),
3720 alignment,
3721 !!(flags & PIN_MAPPABLE), 3810 !!(flags & PIN_MAPPABLE),
3722 obj->map_and_fenceable); 3811 i915_vma_is_map_and_fenceable(vma));
3723 ret = i915_vma_unbind(vma); 3812 ret = i915_vma_unbind(vma);
3724 if (ret) 3813 if (ret)
3725 return ret; 3814 return ERR_PTR(ret);
3726 } 3815 }
3727 3816
3728 return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 3817 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3729} 3818 if (ret)
3819 return ERR_PTR(ret);
3730 3820
3731void 3821 return vma;
3732i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3733 const struct i915_ggtt_view *view)
3734{
3735 i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
3736} 3822}
3737 3823
3738static __always_inline unsigned __busy_read_flag(unsigned int id) 3824static __always_inline unsigned int __busy_read_flag(unsigned int id)
3739{ 3825{
3740 /* Note that we could alias engines in the execbuf API, but 3826 /* Note that we could alias engines in the execbuf API, but
3741 * that would be very unwise as it prevents userspace from 3827 * that would be very unwise as it prevents userspace from
@@ -3750,39 +3836,92 @@ static __always_inline unsigned __busy_read_flag(unsigned int id)
3750 3836
3751static __always_inline unsigned int __busy_write_id(unsigned int id) 3837static __always_inline unsigned int __busy_write_id(unsigned int id)
3752{ 3838{
3753 return id; 3839 /* The uABI guarantees an active writer is also amongst the read
3840 * engines. This would be true if we accessed the activity tracking
3841 * under the lock, but as we perform the lookup of the object and
3842 * its activity locklessly we can not guarantee that the last_write
3843 * being active implies that we have set the same engine flag from
3844 * last_read - hence we always set both read and write busy for
3845 * last_write.
3846 */
3847 return id | __busy_read_flag(id);
3754} 3848}
3755 3849
3756static __always_inline unsigned 3850static __always_inline unsigned int
3757__busy_set_if_active(const struct i915_gem_active *active, 3851__busy_set_if_active(const struct i915_gem_active *active,
3758 unsigned int (*flag)(unsigned int id)) 3852 unsigned int (*flag)(unsigned int id))
3759{ 3853{
3760 /* For more discussion about the barriers and locking concerns, 3854 struct drm_i915_gem_request *request;
3761 * see __i915_gem_active_get_rcu().
3762 */
3763 do {
3764 struct drm_i915_gem_request *request;
3765 unsigned int id;
3766
3767 request = rcu_dereference(active->request);
3768 if (!request || i915_gem_request_completed(request))
3769 return 0;
3770 3855
3771 id = request->engine->exec_id; 3856 request = rcu_dereference(active->request);
3857 if (!request || i915_gem_request_completed(request))
3858 return 0;
3772 3859
3773 /* Check that the pointer wasn't reassigned and overwritten. */ 3860 /* This is racy. See __i915_gem_active_get_rcu() for an in detail
3774 if (request == rcu_access_pointer(active->request)) 3861 * discussion of how to handle the race correctly, but for reporting
3775 return flag(id); 3862 * the busy state we err on the side of potentially reporting the
3776 } while (1); 3863 * wrong engine as being busy (but we guarantee that the result
3864 * is at least self-consistent).
3865 *
3866 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
3867 * whilst we are inspecting it, even under the RCU read lock as we are.
3868 * This means that there is a small window for the engine and/or the
3869 * seqno to have been overwritten. The seqno will always be in the
3870 * future compared to the intended, and so we know that if that
3871 * seqno is idle (on whatever engine) our request is idle and the
3872 * return 0 above is correct.
3873 *
3874 * The issue is that if the engine is switched, it is just as likely
3875 * to report that it is busy (but since the switch happened, we know
3876 * the request should be idle). So there is a small chance that a busy
3877 * result is actually the wrong engine.
3878 *
3879 * So why don't we care?
3880 *
3881 * For starters, the busy ioctl is a heuristic that is by definition
3882 * racy. Even with perfect serialisation in the driver, the hardware
3883 * state is constantly advancing - the state we report to the user
3884 * is stale.
3885 *
3886 * The critical information for the busy-ioctl is whether the object
3887 * is idle as userspace relies on that to detect whether its next
3888 * access will stall, or if it has missed submitting commands to
3889 * the hardware allowing the GPU to stall. We never generate a
3890 * false-positive for idleness, thus busy-ioctl is reliable at the
3891 * most fundamental level, and we maintain the guarantee that a
3892 * busy object left to itself will eventually become idle (and stay
3893 * idle!).
3894 *
3895 * We allow ourselves the leeway of potentially misreporting the busy
3896 * state because that is an optimisation heuristic that is constantly
3897 * in flux. Being quickly able to detect the busy/idle state is much
3898 * more important than accurate logging of exactly which engines were
3899 * busy.
3900 *
3901 * For accuracy in reporting the engine, we could use
3902 *
3903 * result = 0;
3904 * request = __i915_gem_active_get_rcu(active);
3905 * if (request) {
3906 * if (!i915_gem_request_completed(request))
3907 * result = flag(request->engine->exec_id);
3908 * i915_gem_request_put(request);
3909 * }
3910 *
3911 * but that still remains susceptible to both hardware and userspace
3912 * races. So we accept making the result of that race slightly worse,
3913 * given the rarity of the race and its low impact on the result.
3914 */
3915 return flag(READ_ONCE(request->engine->exec_id));
3777} 3916}
3778 3917
3779static inline unsigned 3918static __always_inline unsigned int
3780busy_check_reader(const struct i915_gem_active *active) 3919busy_check_reader(const struct i915_gem_active *active)
3781{ 3920{
3782 return __busy_set_if_active(active, __busy_read_flag); 3921 return __busy_set_if_active(active, __busy_read_flag);
3783} 3922}
3784 3923
3785static inline unsigned 3924static __always_inline unsigned int
3786busy_check_writer(const struct i915_gem_active *active) 3925busy_check_writer(const struct i915_gem_active *active)
3787{ 3926{
3788 return __busy_set_if_active(active, __busy_write_id); 3927 return __busy_set_if_active(active, __busy_write_id);
@@ -3821,11 +3960,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3821 * retired and freed. We take a local copy of the pointer, 3960 * retired and freed. We take a local copy of the pointer,
3822 * but before we add its engine into the busy set, the other 3961 * but before we add its engine into the busy set, the other
3823 * thread reallocates it and assigns it to a task on another 3962 * thread reallocates it and assigns it to a task on another
3824 * engine with a fresh and incomplete seqno. 3963 * engine with a fresh and incomplete seqno. Guarding against
3825 * 3964 * that requires careful serialisation and reference counting,
3826 * So after we lookup the engine's id, we double check that 3965 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
3827 * the active request is the same and only then do we add it 3966 * instead we expect that if the result is busy, which engines
3828 * into the busy set. 3967 * are busy is not completely reliable - we only guarantee
3968 * that the object was busy.
3829 */ 3969 */
3830 rcu_read_lock(); 3970 rcu_read_lock();
3831 3971
@@ -3833,9 +3973,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3833 args->busy |= busy_check_reader(&obj->last_read[idx]); 3973 args->busy |= busy_check_reader(&obj->last_read[idx]);
3834 3974
3835 /* For ABI sanity, we only care that the write engine is in 3975 /* For ABI sanity, we only care that the write engine is in
3836 * the set of read engines. This is ensured by the ordering 3976 * the set of read engines. This should be ensured by the
3837 * of setting last_read/last_write in i915_vma_move_to_active, 3977 * ordering of setting last_read/last_write in
3838 * and then in reverse in retire. 3978 * i915_vma_move_to_active(), and then in reverse in retire.
3979 * However, for good measure, we always report the last_write
3980 * request as a busy read as well as being a busy write.
3839 * 3981 *
3840 * We don't care that the set of active read/write engines 3982 * We don't care that the set of active read/write engines
3841 * may change during construction of the result, as it is 3983 * may change during construction of the result, as it is
@@ -3920,14 +4062,13 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
3920 i915_gem_object_retire__read); 4062 i915_gem_object_retire__read);
3921 init_request_active(&obj->last_write, 4063 init_request_active(&obj->last_write,
3922 i915_gem_object_retire__write); 4064 i915_gem_object_retire__write);
3923 init_request_active(&obj->last_fence, NULL);
3924 INIT_LIST_HEAD(&obj->obj_exec_link); 4065 INIT_LIST_HEAD(&obj->obj_exec_link);
3925 INIT_LIST_HEAD(&obj->vma_list); 4066 INIT_LIST_HEAD(&obj->vma_list);
3926 INIT_LIST_HEAD(&obj->batch_pool_link); 4067 INIT_LIST_HEAD(&obj->batch_pool_link);
3927 4068
3928 obj->ops = ops; 4069 obj->ops = ops;
3929 4070
3930 obj->fence_reg = I915_FENCE_REG_NONE; 4071 obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
3931 obj->madv = I915_MADV_WILLNEED; 4072 obj->madv = I915_MADV_WILLNEED;
3932 4073
3933 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4074 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
@@ -4082,32 +4223,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
4082 intel_runtime_pm_put(dev_priv); 4223 intel_runtime_pm_put(dev_priv);
4083} 4224}
4084 4225
4085struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4086 struct i915_address_space *vm)
4087{
4088 struct i915_vma *vma;
4089 list_for_each_entry(vma, &obj->vma_list, obj_link) {
4090 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4091 vma->vm == vm)
4092 return vma;
4093 }
4094 return NULL;
4095}
4096
4097struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4098 const struct i915_ggtt_view *view)
4099{
4100 struct i915_vma *vma;
4101
4102 GEM_BUG_ON(!view);
4103
4104 list_for_each_entry(vma, &obj->vma_list, obj_link)
4105 if (i915_vma_is_ggtt(vma) &&
4106 i915_ggtt_view_equal(&vma->ggtt_view, view))
4107 return vma;
4108 return NULL;
4109}
4110
4111int i915_gem_suspend(struct drm_device *dev) 4226int i915_gem_suspend(struct drm_device *dev)
4112{ 4227{
4113 struct drm_i915_private *dev_priv = to_i915(dev); 4228 struct drm_i915_private *dev_priv = to_i915(dev);
@@ -4383,6 +4498,7 @@ void
4383i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 4498i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4384{ 4499{
4385 struct drm_device *dev = &dev_priv->drm; 4500 struct drm_device *dev = &dev_priv->drm;
4501 int i;
4386 4502
4387 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 4503 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4388 !IS_CHERRYVIEW(dev_priv)) 4504 !IS_CHERRYVIEW(dev_priv))
@@ -4398,6 +4514,13 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4398 I915_READ(vgtif_reg(avail_rs.fence_num)); 4514 I915_READ(vgtif_reg(avail_rs.fence_num));
4399 4515
4400 /* Initialize fence registers to zero */ 4516 /* Initialize fence registers to zero */
4517 for (i = 0; i < dev_priv->num_fence_regs; i++) {
4518 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4519
4520 fence->i915 = dev_priv;
4521 fence->id = i;
4522 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4523 }
4401 i915_gem_restore_fences(dev); 4524 i915_gem_restore_fences(dev);
4402 4525
4403 i915_gem_detect_bit_6_swizzle(dev); 4526 i915_gem_detect_bit_6_swizzle(dev);
@@ -4433,8 +4556,6 @@ i915_gem_load_init(struct drm_device *dev)
4433 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4556 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4434 for (i = 0; i < I915_NUM_ENGINES; i++) 4557 for (i = 0; i < I915_NUM_ENGINES; i++)
4435 init_engine_lists(&dev_priv->engine[i]); 4558 init_engine_lists(&dev_priv->engine[i]);
4436 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
4437 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4438 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 4559 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4439 i915_gem_retire_work_handler); 4560 i915_gem_retire_work_handler);
4440 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 4561 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4444,8 +4565,6 @@ i915_gem_load_init(struct drm_device *dev)
4444 4565
4445 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4566 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4446 4567
4447 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4448
4449 init_waitqueue_head(&dev_priv->pending_flip_queue); 4568 init_waitqueue_head(&dev_priv->pending_flip_queue);
4450 4569
4451 dev_priv->mm.interruptible = true; 4570 dev_priv->mm.interruptible = true;
@@ -4575,97 +4694,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
4575 } 4694 }
4576} 4695}
4577 4696
4578/* All the new VM stuff */
4579u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4580 struct i915_address_space *vm)
4581{
4582 struct drm_i915_private *dev_priv = to_i915(o->base.dev);
4583 struct i915_vma *vma;
4584
4585 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4586
4587 list_for_each_entry(vma, &o->vma_list, obj_link) {
4588 if (i915_vma_is_ggtt(vma) &&
4589 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4590 continue;
4591 if (vma->vm == vm)
4592 return vma->node.start;
4593 }
4594
4595 WARN(1, "%s vma for this object not found.\n",
4596 i915_is_ggtt(vm) ? "global" : "ppgtt");
4597 return -1;
4598}
4599
4600u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4601 const struct i915_ggtt_view *view)
4602{
4603 struct i915_vma *vma;
4604
4605 list_for_each_entry(vma, &o->vma_list, obj_link)
4606 if (i915_vma_is_ggtt(vma) &&
4607 i915_ggtt_view_equal(&vma->ggtt_view, view))
4608 return vma->node.start;
4609
4610 WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
4611 return -1;
4612}
4613
4614bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4615 struct i915_address_space *vm)
4616{
4617 struct i915_vma *vma;
4618
4619 list_for_each_entry(vma, &o->vma_list, obj_link) {
4620 if (i915_vma_is_ggtt(vma) &&
4621 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4622 continue;
4623 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4624 return true;
4625 }
4626
4627 return false;
4628}
4629
4630bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
4631 const struct i915_ggtt_view *view)
4632{
4633 struct i915_vma *vma;
4634
4635 list_for_each_entry(vma, &o->vma_list, obj_link)
4636 if (i915_vma_is_ggtt(vma) &&
4637 i915_ggtt_view_equal(&vma->ggtt_view, view) &&
4638 drm_mm_node_allocated(&vma->node))
4639 return true;
4640
4641 return false;
4642}
4643
4644unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
4645{
4646 struct i915_vma *vma;
4647
4648 GEM_BUG_ON(list_empty(&o->vma_list));
4649
4650 list_for_each_entry(vma, &o->vma_list, obj_link) {
4651 if (i915_vma_is_ggtt(vma) &&
4652 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
4653 return vma->node.size;
4654 }
4655
4656 return 0;
4657}
4658
4659bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
4660{
4661 struct i915_vma *vma;
4662 list_for_each_entry(vma, &obj->vma_list, obj_link)
4663 if (i915_vma_is_pinned(vma))
4664 return true;
4665
4666 return false;
4667}
4668
4669/* Like i915_gem_object_get_page(), but mark the returned page dirty */ 4697/* Like i915_gem_object_get_page(), but mark the returned page dirty */
4670struct page * 4698struct page *
4671i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 4699i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index bb72af5320b0..35950ee46a1d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -155,9 +155,10 @@ void i915_gem_context_free(struct kref *ctx_ref)
155 if (ce->ring) 155 if (ce->ring)
156 intel_ring_free(ce->ring); 156 intel_ring_free(ce->ring);
157 157
158 i915_gem_object_put(ce->state); 158 i915_vma_put(ce->state);
159 } 159 }
160 160
161 put_pid(ctx->pid);
161 list_del(&ctx->link); 162 list_del(&ctx->link);
162 163
163 ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); 164 ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
@@ -281,13 +282,24 @@ __create_hw_context(struct drm_device *dev,
281 ctx->ggtt_alignment = get_context_alignment(dev_priv); 282 ctx->ggtt_alignment = get_context_alignment(dev_priv);
282 283
283 if (dev_priv->hw_context_size) { 284 if (dev_priv->hw_context_size) {
284 struct drm_i915_gem_object *obj = 285 struct drm_i915_gem_object *obj;
285 i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size); 286 struct i915_vma *vma;
287
288 obj = i915_gem_alloc_context_obj(dev,
289 dev_priv->hw_context_size);
286 if (IS_ERR(obj)) { 290 if (IS_ERR(obj)) {
287 ret = PTR_ERR(obj); 291 ret = PTR_ERR(obj);
288 goto err_out; 292 goto err_out;
289 } 293 }
290 ctx->engine[RCS].state = obj; 294
295 vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
296 if (IS_ERR(vma)) {
297 i915_gem_object_put(obj);
298 ret = PTR_ERR(vma);
299 goto err_out;
300 }
301
302 ctx->engine[RCS].state = vma;
291 } 303 }
292 304
293 /* Default context will never have a file_priv */ 305 /* Default context will never have a file_priv */
@@ -300,6 +312,9 @@ __create_hw_context(struct drm_device *dev,
300 ret = DEFAULT_CONTEXT_HANDLE; 312 ret = DEFAULT_CONTEXT_HANDLE;
301 313
302 ctx->file_priv = file_priv; 314 ctx->file_priv = file_priv;
315 if (file_priv)
316 ctx->pid = get_task_pid(current, PIDTYPE_PID);
317
303 ctx->user_handle = ret; 318 ctx->user_handle = ret;
304 /* NB: Mark all slices as needing a remap so that when the context first 319 /* NB: Mark all slices as needing a remap so that when the context first
305 * loads it will restore whatever remap state already exists. If there 320 * loads it will restore whatever remap state already exists. If there
@@ -399,7 +414,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
399 struct intel_context *ce = &ctx->engine[engine->id]; 414 struct intel_context *ce = &ctx->engine[engine->id];
400 415
401 if (ce->state) 416 if (ce->state)
402 i915_gem_object_ggtt_unpin(ce->state); 417 i915_vma_unpin(ce->state);
403 418
404 i915_gem_context_put(ctx); 419 i915_gem_context_put(ctx);
405 } 420 }
@@ -568,7 +583,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
568 const int num_rings = 583 const int num_rings =
569 /* Use an extended w/a on ivb+ if signalling from other rings */ 584 /* Use an extended w/a on ivb+ if signalling from other rings */
570 i915.semaphores ? 585 i915.semaphores ?
571 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : 586 INTEL_INFO(dev_priv)->num_rings - 1 :
572 0; 587 0;
573 int len, ret; 588 int len, ret;
574 589
@@ -621,8 +636,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
621 intel_ring_emit(ring, MI_NOOP); 636 intel_ring_emit(ring, MI_NOOP);
622 intel_ring_emit(ring, MI_SET_CONTEXT); 637 intel_ring_emit(ring, MI_SET_CONTEXT);
623 intel_ring_emit(ring, 638 intel_ring_emit(ring,
624 i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | 639 i915_ggtt_offset(req->ctx->engine[RCS].state) | flags);
625 flags);
626 /* 640 /*
627 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 641 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
628 * WaMiSetContext_Hang:snb,ivb,vlv 642 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -651,7 +665,8 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
651 MI_STORE_REGISTER_MEM | 665 MI_STORE_REGISTER_MEM |
652 MI_SRM_LRM_GLOBAL_GTT); 666 MI_SRM_LRM_GLOBAL_GTT);
653 intel_ring_emit_reg(ring, last_reg); 667 intel_ring_emit_reg(ring, last_reg);
654 intel_ring_emit(ring, engine->scratch.gtt_offset); 668 intel_ring_emit(ring,
669 i915_ggtt_offset(engine->scratch));
655 intel_ring_emit(ring, MI_NOOP); 670 intel_ring_emit(ring, MI_NOOP);
656 } 671 }
657 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); 672 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
@@ -755,6 +770,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
755 struct i915_gem_context *to = req->ctx; 770 struct i915_gem_context *to = req->ctx;
756 struct intel_engine_cs *engine = req->engine; 771 struct intel_engine_cs *engine = req->engine;
757 struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 772 struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
773 struct i915_vma *vma = to->engine[RCS].state;
758 struct i915_gem_context *from; 774 struct i915_gem_context *from;
759 u32 hw_flags; 775 u32 hw_flags;
760 int ret, i; 776 int ret, i;
@@ -762,9 +778,15 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
762 if (skip_rcs_switch(ppgtt, engine, to)) 778 if (skip_rcs_switch(ppgtt, engine, to))
763 return 0; 779 return 0;
764 780
781 /* Clear this page out of any CPU caches for coherent swap-in/out. */
782 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
783 ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
784 if (ret)
785 return ret;
786 }
787
765 /* Trying to pin first makes error handling easier. */ 788 /* Trying to pin first makes error handling easier. */
766 ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0, 789 ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL);
767 to->ggtt_alignment, 0);
768 if (ret) 790 if (ret)
769 return ret; 791 return ret;
770 792
@@ -777,18 +799,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
777 */ 799 */
778 from = engine->last_context; 800 from = engine->last_context;
779 801
780 /*
781 * Clear this page out of any CPU caches for coherent swap-in/out. Note
782 * that thanks to write = false in this call and us not setting any gpu
783 * write domains when putting a context object onto the active list
784 * (when switching away from it), this won't block.
785 *
786 * XXX: We need a real interface to do this instead of trickery.
787 */
788 ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false);
789 if (ret)
790 goto unpin_out;
791
792 if (needs_pd_load_pre(ppgtt, engine, to)) { 802 if (needs_pd_load_pre(ppgtt, engine, to)) {
793 /* Older GENs and non render rings still want the load first, 803 /* Older GENs and non render rings still want the load first,
794 * "PP_DCLV followed by PP_DIR_BASE register through Load 804 * "PP_DCLV followed by PP_DIR_BASE register through Load
@@ -797,7 +807,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
797 trace_switch_mm(engine, to); 807 trace_switch_mm(engine, to);
798 ret = ppgtt->switch_mm(ppgtt, req); 808 ret = ppgtt->switch_mm(ppgtt, req);
799 if (ret) 809 if (ret)
800 goto unpin_out; 810 goto err;
801 } 811 }
802 812
803 if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) 813 if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
@@ -814,7 +824,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
814 if (to != from || (hw_flags & MI_FORCE_RESTORE)) { 824 if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
815 ret = mi_set_context(req, hw_flags); 825 ret = mi_set_context(req, hw_flags);
816 if (ret) 826 if (ret)
817 goto unpin_out; 827 goto err;
818 } 828 }
819 829
820 /* The backing object for the context is done after switching to the 830 /* The backing object for the context is done after switching to the
@@ -824,8 +834,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
824 * MI_SET_CONTEXT instead of when the next seqno has completed. 834 * MI_SET_CONTEXT instead of when the next seqno has completed.
825 */ 835 */
826 if (from != NULL) { 836 if (from != NULL) {
827 struct drm_i915_gem_object *obj = from->engine[RCS].state;
828
829 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the 837 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
830 * whole damn pipeline, we don't need to explicitly mark the 838 * whole damn pipeline, we don't need to explicitly mark the
831 * object dirty. The only exception is that the context must be 839 * object dirty. The only exception is that the context must be
@@ -833,11 +841,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
833 * able to defer doing this until we know the object would be 841 * able to defer doing this until we know the object would be
834 * swapped, but there is no way to do that yet. 842 * swapped, but there is no way to do that yet.
835 */ 843 */
836 obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; 844 i915_vma_move_to_active(from->engine[RCS].state, req, 0);
837 i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0); 845 /* state is kept alive until the next request */
838 846 i915_vma_unpin(from->engine[RCS].state);
839 /* obj is kept alive until the next request by its active ref */
840 i915_gem_object_ggtt_unpin(obj);
841 i915_gem_context_put(from); 847 i915_gem_context_put(from);
842 } 848 }
843 engine->last_context = i915_gem_context_get(to); 849 engine->last_context = i915_gem_context_get(to);
@@ -882,8 +888,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
882 888
883 return 0; 889 return 0;
884 890
885unpin_out: 891err:
886 i915_gem_object_ggtt_unpin(to->engine[RCS].state); 892 i915_vma_unpin(vma);
887 return ret; 893 return ret;
888} 894}
889 895
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index c60a8d5bbad0..10265bb35604 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
119 if (ret) 119 if (ret)
120 return ERR_PTR(ret); 120 return ERR_PTR(ret);
121 121
122 addr = i915_gem_object_pin_map(obj); 122 addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
123 mutex_unlock(&dev->struct_mutex); 123 mutex_unlock(&dev->struct_mutex);
124 124
125 return addr; 125 return addr;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f76c06e92677..815d5fbe07ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -47,7 +47,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
47} 47}
48 48
49static bool 49static bool
50mark_free(struct i915_vma *vma, struct list_head *unwind) 50mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
51{ 51{
52 if (i915_vma_is_pinned(vma)) 52 if (i915_vma_is_pinned(vma))
53 return false; 53 return false;
@@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
55 if (WARN_ON(!list_empty(&vma->exec_list))) 55 if (WARN_ON(!list_empty(&vma->exec_list)))
56 return false; 56 return false;
57 57
58 if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
59 return false;
60
58 list_add(&vma->exec_list, unwind); 61 list_add(&vma->exec_list, unwind);
59 return drm_mm_scan_add_block(&vma->node); 62 return drm_mm_scan_add_block(&vma->node);
60} 63}
@@ -129,7 +132,7 @@ search_again:
129 phase = phases; 132 phase = phases;
130 do { 133 do {
131 list_for_each_entry(vma, *phase, vm_link) 134 list_for_each_entry(vma, *phase, vm_link)
132 if (mark_free(vma, &eviction_list)) 135 if (mark_free(vma, flags, &eviction_list))
133 goto found; 136 goto found;
134 } while (*++phase); 137 } while (*++phase);
135 138
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c494b79ded20..601156c353cc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -39,6 +39,8 @@
39#include "intel_drv.h" 39#include "intel_drv.h"
40#include "intel_frontbuffer.h" 40#include "intel_frontbuffer.h"
41 41
42#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
43
42#define __EXEC_OBJECT_HAS_PIN (1<<31) 44#define __EXEC_OBJECT_HAS_PIN (1<<31)
43#define __EXEC_OBJECT_HAS_FENCE (1<<30) 45#define __EXEC_OBJECT_HAS_FENCE (1<<30)
44#define __EXEC_OBJECT_NEEDS_MAP (1<<29) 46#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
@@ -59,6 +61,7 @@ struct i915_execbuffer_params {
59}; 61};
60 62
61struct eb_vmas { 63struct eb_vmas {
64 struct drm_i915_private *i915;
62 struct list_head vmas; 65 struct list_head vmas;
63 int and; 66 int and;
64 union { 67 union {
@@ -68,7 +71,8 @@ struct eb_vmas {
68}; 71};
69 72
70static struct eb_vmas * 73static struct eb_vmas *
71eb_create(struct drm_i915_gem_execbuffer2 *args) 74eb_create(struct drm_i915_private *i915,
75 struct drm_i915_gem_execbuffer2 *args)
72{ 76{
73 struct eb_vmas *eb = NULL; 77 struct eb_vmas *eb = NULL;
74 78
@@ -95,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args)
95 } else 99 } else
96 eb->and = -args->buffer_count; 100 eb->and = -args->buffer_count;
97 101
102 eb->i915 = i915;
98 INIT_LIST_HEAD(&eb->vmas); 103 INIT_LIST_HEAD(&eb->vmas);
99 return eb; 104 return eb;
100} 105}
@@ -180,8 +185,8 @@ eb_lookup_vmas(struct eb_vmas *eb,
180 * from the (obj, vm) we don't run the risk of creating 185 * from the (obj, vm) we don't run the risk of creating
181 * duplicated vmas for the same vm. 186 * duplicated vmas for the same vm.
182 */ 187 */
183 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 188 vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL);
184 if (IS_ERR(vma)) { 189 if (unlikely(IS_ERR(vma))) {
185 DRM_DEBUG("Failed to lookup VMA\n"); 190 DRM_DEBUG("Failed to lookup VMA\n");
186 ret = PTR_ERR(vma); 191 ret = PTR_ERR(vma);
187 goto err; 192 goto err;
@@ -245,7 +250,6 @@ static void
245i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) 250i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
246{ 251{
247 struct drm_i915_gem_exec_object2 *entry; 252 struct drm_i915_gem_exec_object2 *entry;
248 struct drm_i915_gem_object *obj = vma->obj;
249 253
250 if (!drm_mm_node_allocated(&vma->node)) 254 if (!drm_mm_node_allocated(&vma->node))
251 return; 255 return;
@@ -253,7 +257,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
253 entry = vma->exec_entry; 257 entry = vma->exec_entry;
254 258
255 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 259 if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
256 i915_gem_object_unpin_fence(obj); 260 i915_vma_unpin_fence(vma);
257 261
258 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 262 if (entry->flags & __EXEC_OBJECT_HAS_PIN)
259 __i915_vma_unpin(vma); 263 __i915_vma_unpin(vma);
@@ -271,13 +275,19 @@ static void eb_destroy(struct eb_vmas *eb)
271 exec_list); 275 exec_list);
272 list_del_init(&vma->exec_list); 276 list_del_init(&vma->exec_list);
273 i915_gem_execbuffer_unreserve_vma(vma); 277 i915_gem_execbuffer_unreserve_vma(vma);
274 i915_gem_object_put(vma->obj); 278 i915_vma_put(vma);
275 } 279 }
276 kfree(eb); 280 kfree(eb);
277} 281}
278 282
279static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 283static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
280{ 284{
285 if (!i915_gem_object_has_struct_page(obj))
286 return false;
287
288 if (DBG_USE_CPU_RELOC)
289 return DBG_USE_CPU_RELOC > 0;
290
281 return (HAS_LLC(obj->base.dev) || 291 return (HAS_LLC(obj->base.dev) ||
282 obj->base.write_domain == I915_GEM_DOMAIN_CPU || 292 obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
283 obj->cache_level != I915_CACHE_NONE); 293 obj->cache_level != I915_CACHE_NONE);
@@ -302,137 +312,243 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address)
302} 312}
303 313
304static inline uint64_t 314static inline uint64_t
305relocation_target(struct drm_i915_gem_relocation_entry *reloc, 315relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
306 uint64_t target_offset) 316 uint64_t target_offset)
307{ 317{
308 return gen8_canonical_addr((int)reloc->delta + target_offset); 318 return gen8_canonical_addr((int)reloc->delta + target_offset);
309} 319}
310 320
311static int 321struct reloc_cache {
312relocate_entry_cpu(struct drm_i915_gem_object *obj, 322 struct drm_i915_private *i915;
313 struct drm_i915_gem_relocation_entry *reloc, 323 struct drm_mm_node node;
314 uint64_t target_offset) 324 unsigned long vaddr;
325 unsigned int page;
326 bool use_64bit_reloc;
327};
328
329static void reloc_cache_init(struct reloc_cache *cache,
330 struct drm_i915_private *i915)
315{ 331{
316 struct drm_device *dev = obj->base.dev; 332 cache->page = -1;
317 uint32_t page_offset = offset_in_page(reloc->offset); 333 cache->vaddr = 0;
318 uint64_t delta = relocation_target(reloc, target_offset); 334 cache->i915 = i915;
319 char *vaddr; 335 cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
320 int ret; 336 cache->node.allocated = false;
337}
321 338
322 ret = i915_gem_object_set_to_cpu_domain(obj, true); 339static inline void *unmask_page(unsigned long p)
323 if (ret) 340{
324 return ret; 341 return (void *)(uintptr_t)(p & PAGE_MASK);
342}
325 343
326 vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, 344static inline unsigned int unmask_flags(unsigned long p)
327 reloc->offset >> PAGE_SHIFT)); 345{
328 *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); 346 return p & ~PAGE_MASK;
347}
348
349#define KMAP 0x4 /* after CLFLUSH_FLAGS */
350
351static void reloc_cache_fini(struct reloc_cache *cache)
352{
353 void *vaddr;
329 354
330 if (INTEL_INFO(dev)->gen >= 8) { 355 if (!cache->vaddr)
331 page_offset = offset_in_page(page_offset + sizeof(uint32_t)); 356 return;
357
358 vaddr = unmask_page(cache->vaddr);
359 if (cache->vaddr & KMAP) {
360 if (cache->vaddr & CLFLUSH_AFTER)
361 mb();
332 362
333 if (page_offset == 0) { 363 kunmap_atomic(vaddr);
334 kunmap_atomic(vaddr); 364 i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
335 vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, 365 } else {
336 (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); 366 wmb();
367 io_mapping_unmap_atomic((void __iomem *)vaddr);
368 if (cache->node.allocated) {
369 struct i915_ggtt *ggtt = &cache->i915->ggtt;
370
371 ggtt->base.clear_range(&ggtt->base,
372 cache->node.start,
373 cache->node.size,
374 true);
375 drm_mm_remove_node(&cache->node);
376 } else {
377 i915_vma_unpin((struct i915_vma *)cache->node.mm);
337 } 378 }
379 }
380}
381
382static void *reloc_kmap(struct drm_i915_gem_object *obj,
383 struct reloc_cache *cache,
384 int page)
385{
386 void *vaddr;
387
388 if (cache->vaddr) {
389 kunmap_atomic(unmask_page(cache->vaddr));
390 } else {
391 unsigned int flushes;
392 int ret;
338 393
339 *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); 394 ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
395 if (ret)
396 return ERR_PTR(ret);
397
398 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
399 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
400
401 cache->vaddr = flushes | KMAP;
402 cache->node.mm = (void *)obj;
403 if (flushes)
404 mb();
340 } 405 }
341 406
342 kunmap_atomic(vaddr); 407 vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
408 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
409 cache->page = page;
343 410
344 return 0; 411 return vaddr;
345} 412}
346 413
347static int 414static void *reloc_iomap(struct drm_i915_gem_object *obj,
348relocate_entry_gtt(struct drm_i915_gem_object *obj, 415 struct reloc_cache *cache,
349 struct drm_i915_gem_relocation_entry *reloc, 416 int page)
350 uint64_t target_offset)
351{ 417{
352 struct drm_device *dev = obj->base.dev; 418 struct i915_ggtt *ggtt = &cache->i915->ggtt;
353 struct drm_i915_private *dev_priv = to_i915(dev); 419 unsigned long offset;
354 struct i915_ggtt *ggtt = &dev_priv->ggtt; 420 void *vaddr;
355 uint64_t delta = relocation_target(reloc, target_offset);
356 uint64_t offset;
357 void __iomem *reloc_page;
358 int ret;
359 421
360 ret = i915_gem_object_set_to_gtt_domain(obj, true); 422 if (cache->node.allocated) {
361 if (ret) 423 wmb();
362 return ret; 424 ggtt->base.insert_page(&ggtt->base,
425 i915_gem_object_get_dma_address(obj, page),
426 cache->node.start, I915_CACHE_NONE, 0);
427 cache->page = page;
428 return unmask_page(cache->vaddr);
429 }
363 430
364 ret = i915_gem_object_put_fence(obj); 431 if (cache->vaddr) {
365 if (ret) 432 io_mapping_unmap_atomic(unmask_page(cache->vaddr));
366 return ret; 433 } else {
434 struct i915_vma *vma;
435 int ret;
367 436
368 /* Map the page containing the relocation we're going to perform. */ 437 if (use_cpu_reloc(obj))
369 offset = i915_gem_obj_ggtt_offset(obj); 438 return NULL;
370 offset += reloc->offset; 439
371 reloc_page = io_mapping_map_atomic_wc(ggtt->mappable, 440 ret = i915_gem_object_set_to_gtt_domain(obj, true);
372 offset & PAGE_MASK); 441 if (ret)
373 iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); 442 return ERR_PTR(ret);
374 443
375 if (INTEL_INFO(dev)->gen >= 8) { 444 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
376 offset += sizeof(uint32_t); 445 PIN_MAPPABLE | PIN_NONBLOCK);
377 446 if (IS_ERR(vma)) {
378 if (offset_in_page(offset) == 0) { 447 memset(&cache->node, 0, sizeof(cache->node));
379 io_mapping_unmap_atomic(reloc_page); 448 ret = drm_mm_insert_node_in_range_generic
380 reloc_page = 449 (&ggtt->base.mm, &cache->node,
381 io_mapping_map_atomic_wc(ggtt->mappable, 450 4096, 0, 0,
382 offset); 451 0, ggtt->mappable_end,
452 DRM_MM_SEARCH_DEFAULT,
453 DRM_MM_CREATE_DEFAULT);
454 if (ret)
455 return ERR_PTR(ret);
456 } else {
457 ret = i915_vma_put_fence(vma);
458 if (ret) {
459 i915_vma_unpin(vma);
460 return ERR_PTR(ret);
461 }
462
463 cache->node.start = vma->node.start;
464 cache->node.mm = (void *)vma;
383 } 465 }
466 }
384 467
385 iowrite32(upper_32_bits(delta), 468 offset = cache->node.start;
386 reloc_page + offset_in_page(offset)); 469 if (cache->node.allocated) {
470 ggtt->base.insert_page(&ggtt->base,
471 i915_gem_object_get_dma_address(obj, page),
472 offset, I915_CACHE_NONE, 0);
473 } else {
474 offset += page << PAGE_SHIFT;
387 } 475 }
388 476
389 io_mapping_unmap_atomic(reloc_page); 477 vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset);
478 cache->page = page;
479 cache->vaddr = (unsigned long)vaddr;
390 480
391 return 0; 481 return vaddr;
392} 482}
393 483
394static void 484static void *reloc_vaddr(struct drm_i915_gem_object *obj,
395clflush_write32(void *addr, uint32_t value) 485 struct reloc_cache *cache,
486 int page)
396{ 487{
397 /* This is not a fast path, so KISS. */ 488 void *vaddr;
398 drm_clflush_virt_range(addr, sizeof(uint32_t)); 489
399 *(uint32_t *)addr = value; 490 if (cache->page == page) {
400 drm_clflush_virt_range(addr, sizeof(uint32_t)); 491 vaddr = unmask_page(cache->vaddr);
492 } else {
493 vaddr = NULL;
494 if ((cache->vaddr & KMAP) == 0)
495 vaddr = reloc_iomap(obj, cache, page);
496 if (!vaddr)
497 vaddr = reloc_kmap(obj, cache, page);
498 }
499
500 return vaddr;
401} 501}
402 502
403static int 503static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
404relocate_entry_clflush(struct drm_i915_gem_object *obj,
405 struct drm_i915_gem_relocation_entry *reloc,
406 uint64_t target_offset)
407{ 504{
408 struct drm_device *dev = obj->base.dev; 505 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
409 uint32_t page_offset = offset_in_page(reloc->offset); 506 if (flushes & CLFLUSH_BEFORE) {
410 uint64_t delta = relocation_target(reloc, target_offset); 507 clflushopt(addr);
411 char *vaddr; 508 mb();
412 int ret; 509 }
413 510
414 ret = i915_gem_object_set_to_gtt_domain(obj, true); 511 *addr = value;
415 if (ret)
416 return ret;
417 512
418 vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, 513 /* Writes to the same cacheline are serialised by the CPU
419 reloc->offset >> PAGE_SHIFT)); 514 * (including clflush). On the write path, we only require
420 clflush_write32(vaddr + page_offset, lower_32_bits(delta)); 515 * that it hits memory in an orderly fashion and place
516 * mb barriers at the start and end of the relocation phase
517 * to ensure ordering of clflush wrt to the system.
518 */
519 if (flushes & CLFLUSH_AFTER)
520 clflushopt(addr);
521 } else
522 *addr = value;
523}
524
525static int
526relocate_entry(struct drm_i915_gem_object *obj,
527 const struct drm_i915_gem_relocation_entry *reloc,
528 struct reloc_cache *cache,
529 u64 target_offset)
530{
531 u64 offset = reloc->offset;
532 bool wide = cache->use_64bit_reloc;
533 void *vaddr;
421 534
422 if (INTEL_INFO(dev)->gen >= 8) { 535 target_offset = relocation_target(reloc, target_offset);
423 page_offset = offset_in_page(page_offset + sizeof(uint32_t)); 536repeat:
537 vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
538 if (IS_ERR(vaddr))
539 return PTR_ERR(vaddr);
424 540
425 if (page_offset == 0) { 541 clflush_write32(vaddr + offset_in_page(offset),
426 kunmap_atomic(vaddr); 542 lower_32_bits(target_offset),
427 vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, 543 cache->vaddr);
428 (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
429 }
430 544
431 clflush_write32(vaddr + page_offset, upper_32_bits(delta)); 545 if (wide) {
546 offset += sizeof(u32);
547 target_offset >>= 32;
548 wide = false;
549 goto repeat;
432 } 550 }
433 551
434 kunmap_atomic(vaddr);
435
436 return 0; 552 return 0;
437} 553}
438 554
@@ -453,7 +569,8 @@ static bool object_is_idle(struct drm_i915_gem_object *obj)
453static int 569static int
454i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 570i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
455 struct eb_vmas *eb, 571 struct eb_vmas *eb,
456 struct drm_i915_gem_relocation_entry *reloc) 572 struct drm_i915_gem_relocation_entry *reloc,
573 struct reloc_cache *cache)
457{ 574{
458 struct drm_device *dev = obj->base.dev; 575 struct drm_device *dev = obj->base.dev;
459 struct drm_gem_object *target_obj; 576 struct drm_gem_object *target_obj;
@@ -516,7 +633,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
516 633
517 /* Check that the relocation address is valid... */ 634 /* Check that the relocation address is valid... */
518 if (unlikely(reloc->offset > 635 if (unlikely(reloc->offset >
519 obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { 636 obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
520 DRM_DEBUG("Relocation beyond object bounds: " 637 DRM_DEBUG("Relocation beyond object bounds: "
521 "obj %p target %d offset %d size %d.\n", 638 "obj %p target %d offset %d size %d.\n",
522 obj, reloc->target_handle, 639 obj, reloc->target_handle,
@@ -536,23 +653,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
536 if (pagefault_disabled() && !object_is_idle(obj)) 653 if (pagefault_disabled() && !object_is_idle(obj))
537 return -EFAULT; 654 return -EFAULT;
538 655
539 if (use_cpu_reloc(obj)) 656 ret = relocate_entry(obj, reloc, cache, target_offset);
540 ret = relocate_entry_cpu(obj, reloc, target_offset);
541 else if (obj->map_and_fenceable)
542 ret = relocate_entry_gtt(obj, reloc, target_offset);
543 else if (static_cpu_has(X86_FEATURE_CLFLUSH))
544 ret = relocate_entry_clflush(obj, reloc, target_offset);
545 else {
546 WARN_ONCE(1, "Impossible case in relocation handling\n");
547 ret = -ENODEV;
548 }
549
550 if (ret) 657 if (ret)
551 return ret; 658 return ret;
552 659
553 /* and update the user's relocation entry */ 660 /* and update the user's relocation entry */
554 reloc->presumed_offset = target_offset; 661 reloc->presumed_offset = target_offset;
555
556 return 0; 662 return 0;
557} 663}
558 664
@@ -564,9 +670,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
564 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 670 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
565 struct drm_i915_gem_relocation_entry __user *user_relocs; 671 struct drm_i915_gem_relocation_entry __user *user_relocs;
566 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 672 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
567 int remain, ret; 673 struct reloc_cache cache;
674 int remain, ret = 0;
568 675
569 user_relocs = u64_to_user_ptr(entry->relocs_ptr); 676 user_relocs = u64_to_user_ptr(entry->relocs_ptr);
677 reloc_cache_init(&cache, eb->i915);
570 678
571 remain = entry->relocation_count; 679 remain = entry->relocation_count;
572 while (remain) { 680 while (remain) {
@@ -576,19 +684,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
576 count = ARRAY_SIZE(stack_reloc); 684 count = ARRAY_SIZE(stack_reloc);
577 remain -= count; 685 remain -= count;
578 686
579 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) 687 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) {
580 return -EFAULT; 688 ret = -EFAULT;
689 goto out;
690 }
581 691
582 do { 692 do {
583 u64 offset = r->presumed_offset; 693 u64 offset = r->presumed_offset;
584 694
585 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); 695 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
586 if (ret) 696 if (ret)
587 return ret; 697 goto out;
588 698
589 if (r->presumed_offset != offset && 699 if (r->presumed_offset != offset &&
590 __put_user(r->presumed_offset, &user_relocs->presumed_offset)) { 700 __put_user(r->presumed_offset,
591 return -EFAULT; 701 &user_relocs->presumed_offset)) {
702 ret = -EFAULT;
703 goto out;
592 } 704 }
593 705
594 user_relocs++; 706 user_relocs++;
@@ -596,7 +708,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
596 } while (--count); 708 } while (--count);
597 } 709 }
598 710
599 return 0; 711out:
712 reloc_cache_fini(&cache);
713 return ret;
600#undef N_RELOC 714#undef N_RELOC
601} 715}
602 716
@@ -606,15 +720,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
606 struct drm_i915_gem_relocation_entry *relocs) 720 struct drm_i915_gem_relocation_entry *relocs)
607{ 721{
608 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 722 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
609 int i, ret; 723 struct reloc_cache cache;
724 int i, ret = 0;
610 725
726 reloc_cache_init(&cache, eb->i915);
611 for (i = 0; i < entry->relocation_count; i++) { 727 for (i = 0; i < entry->relocation_count; i++) {
612 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); 728 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
613 if (ret) 729 if (ret)
614 return ret; 730 break;
615 } 731 }
732 reloc_cache_fini(&cache);
616 733
617 return 0; 734 return ret;
618} 735}
619 736
620static int 737static int
@@ -693,11 +810,11 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
693 entry->flags |= __EXEC_OBJECT_HAS_PIN; 810 entry->flags |= __EXEC_OBJECT_HAS_PIN;
694 811
695 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 812 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
696 ret = i915_gem_object_get_fence(obj); 813 ret = i915_vma_get_fence(vma);
697 if (ret) 814 if (ret)
698 return ret; 815 return ret;
699 816
700 if (i915_gem_object_pin_fence(obj)) 817 if (i915_vma_pin_fence(vma))
701 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 818 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
702 } 819 }
703 820
@@ -739,7 +856,6 @@ static bool
739eb_vma_misplaced(struct i915_vma *vma) 856eb_vma_misplaced(struct i915_vma *vma)
740{ 857{
741 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 858 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
742 struct drm_i915_gem_object *obj = vma->obj;
743 859
744 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && 860 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
745 !i915_vma_is_ggtt(vma)); 861 !i915_vma_is_ggtt(vma));
@@ -760,7 +876,8 @@ eb_vma_misplaced(struct i915_vma *vma)
760 return true; 876 return true;
761 877
762 /* avoid costly ping-pong once a batch bo ended up non-mappable */ 878 /* avoid costly ping-pong once a batch bo ended up non-mappable */
763 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) 879 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
880 !i915_vma_is_map_and_fenceable(vma))
764 return !only_mappable_for_reloc(entry->flags); 881 return !only_mappable_for_reloc(entry->flags);
765 882
766 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && 883 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
@@ -900,7 +1017,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
900 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); 1017 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
901 list_del_init(&vma->exec_list); 1018 list_del_init(&vma->exec_list);
902 i915_gem_execbuffer_unreserve_vma(vma); 1019 i915_gem_execbuffer_unreserve_vma(vma);
903 i915_gem_object_put(vma->obj); 1020 i915_vma_put(vma);
904 } 1021 }
905 1022
906 mutex_unlock(&dev->struct_mutex); 1023 mutex_unlock(&dev->struct_mutex);
@@ -1010,8 +1127,6 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
1010{ 1127{
1011 const unsigned int other_rings = eb_other_engines(req); 1128 const unsigned int other_rings = eb_other_engines(req);
1012 struct i915_vma *vma; 1129 struct i915_vma *vma;
1013 uint32_t flush_domains = 0;
1014 bool flush_chipset = false;
1015 int ret; 1130 int ret;
1016 1131
1017 list_for_each_entry(vma, vmas, exec_list) { 1132 list_for_each_entry(vma, vmas, exec_list) {
@@ -1024,16 +1139,11 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
1024 } 1139 }
1025 1140
1026 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) 1141 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
1027 flush_chipset |= i915_gem_clflush_object(obj, false); 1142 i915_gem_clflush_object(obj, false);
1028
1029 flush_domains |= obj->base.write_domain;
1030 } 1143 }
1031 1144
1032 if (flush_chipset) 1145 /* Unconditionally flush any chipset caches (for streaming writes). */
1033 i915_gem_chipset_flush(req->engine->i915); 1146 i915_gem_chipset_flush(req->engine->i915);
1034
1035 if (flush_domains & I915_GEM_DOMAIN_GTT)
1036 wmb();
1037 1147
1038 /* Unconditionally invalidate GPU caches and TLBs. */ 1148 /* Unconditionally invalidate GPU caches and TLBs. */
1039 return req->engine->emit_flush(req, EMIT_INVALIDATE); 1149 return req->engine->emit_flush(req, EMIT_INVALIDATE);
@@ -1194,15 +1304,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
1194 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1304 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1195 } 1305 }
1196 1306
1197 if (flags & EXEC_OBJECT_NEEDS_FENCE) { 1307 if (flags & EXEC_OBJECT_NEEDS_FENCE)
1198 i915_gem_active_set(&obj->last_fence, req); 1308 i915_gem_active_set(&vma->last_fence, req);
1199 if (flags & __EXEC_OBJECT_HAS_FENCE) {
1200 struct drm_i915_private *dev_priv = req->i915;
1201
1202 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1203 &dev_priv->mm.fence_list);
1204 }
1205 }
1206 1309
1207 i915_vma_set_active(vma, idx); 1310 i915_vma_set_active(vma, idx);
1208 i915_gem_active_set(&vma->last_read[idx], req); 1311 i915_gem_active_set(&vma->last_read[idx], req);
@@ -1281,7 +1384,7 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1281 return 0; 1384 return 0;
1282} 1385}
1283 1386
1284static struct i915_vma* 1387static struct i915_vma *
1285i915_gem_execbuffer_parse(struct intel_engine_cs *engine, 1388i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1286 struct drm_i915_gem_exec_object2 *shadow_exec_entry, 1389 struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1287 struct drm_i915_gem_object *batch_obj, 1390 struct drm_i915_gem_object *batch_obj,
@@ -1305,31 +1408,28 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1305 batch_start_offset, 1408 batch_start_offset,
1306 batch_len, 1409 batch_len,
1307 is_master); 1410 is_master);
1308 if (ret) 1411 if (ret) {
1309 goto err; 1412 if (ret == -EACCES) /* unhandled chained batch */
1310 1413 vma = NULL;
1311 ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); 1414 else
1312 if (ret) 1415 vma = ERR_PTR(ret);
1313 goto err; 1416 goto out;
1417 }
1314 1418
1315 i915_gem_object_unpin_pages(shadow_batch_obj); 1419 vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1420 if (IS_ERR(vma))
1421 goto out;
1316 1422
1317 memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); 1423 memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1318 1424
1319 vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1320 vma->exec_entry = shadow_exec_entry; 1425 vma->exec_entry = shadow_exec_entry;
1321 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; 1426 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1322 i915_gem_object_get(shadow_batch_obj); 1427 i915_gem_object_get(shadow_batch_obj);
1323 list_add_tail(&vma->exec_list, &eb->vmas); 1428 list_add_tail(&vma->exec_list, &eb->vmas);
1324 1429
1325 return vma; 1430out:
1326
1327err:
1328 i915_gem_object_unpin_pages(shadow_batch_obj); 1431 i915_gem_object_unpin_pages(shadow_batch_obj);
1329 if (ret == -EACCES) /* unhandled chained batch */ 1432 return vma;
1330 return NULL;
1331 else
1332 return ERR_PTR(ret);
1333} 1433}
1334 1434
1335static int 1435static int
@@ -1412,7 +1512,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
1412 params->args_batch_start_offset; 1512 params->args_batch_start_offset;
1413 1513
1414 if (exec_len == 0) 1514 if (exec_len == 0)
1415 exec_len = params->batch->size; 1515 exec_len = params->batch->size - params->args_batch_start_offset;
1416 1516
1417 ret = params->engine->emit_bb_start(params->request, 1517 ret = params->engine->emit_bb_start(params->request,
1418 exec_start, exec_len, 1518 exec_start, exec_len,
@@ -1595,7 +1695,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1595 1695
1596 memset(&params_master, 0x00, sizeof(params_master)); 1696 memset(&params_master, 0x00, sizeof(params_master));
1597 1697
1598 eb = eb_create(args); 1698 eb = eb_create(dev_priv, args);
1599 if (eb == NULL) { 1699 if (eb == NULL) {
1600 i915_gem_context_put(ctx); 1700 i915_gem_context_put(ctx);
1601 mutex_unlock(&dev->struct_mutex); 1701 mutex_unlock(&dev->struct_mutex);
@@ -1638,6 +1738,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1638 ret = -EINVAL; 1738 ret = -EINVAL;
1639 goto err; 1739 goto err;
1640 } 1740 }
1741 if (args->batch_start_offset > params->batch->size ||
1742 args->batch_len > params->batch->size - args->batch_start_offset) {
1743 DRM_DEBUG("Attempting to use out-of-bounds batch\n");
1744 ret = -EINVAL;
1745 goto err;
1746 }
1641 1747
1642 params->args_batch_start_offset = args->batch_start_offset; 1748 params->args_batch_start_offset = args->batch_start_offset;
1643 if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { 1749 if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
@@ -1677,6 +1783,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1677 * hsw should have this fixed, but bdw mucks it up again. */ 1783 * hsw should have this fixed, but bdw mucks it up again. */
1678 if (dispatch_flags & I915_DISPATCH_SECURE) { 1784 if (dispatch_flags & I915_DISPATCH_SECURE) {
1679 struct drm_i915_gem_object *obj = params->batch->obj; 1785 struct drm_i915_gem_object *obj = params->batch->obj;
1786 struct i915_vma *vma;
1680 1787
1681 /* 1788 /*
1682 * So on first glance it looks freaky that we pin the batch here 1789 * So on first glance it looks freaky that we pin the batch here
@@ -1688,11 +1795,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1688 * fitting due to fragmentation. 1795 * fitting due to fragmentation.
1689 * So this is actually safe. 1796 * So this is actually safe.
1690 */ 1797 */
1691 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 1798 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
1692 if (ret) 1799 if (IS_ERR(vma)) {
1800 ret = PTR_ERR(vma);
1693 goto err; 1801 goto err;
1802 }
1694 1803
1695 params->batch = i915_gem_obj_to_ggtt(obj); 1804 params->batch = vma;
1696 } 1805 }
1697 1806
1698 /* Allocate a request for this batch buffer nice and early. */ 1807 /* Allocate a request for this batch buffer nice and early. */
@@ -1702,6 +1811,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1702 goto err_batch_unpin; 1811 goto err_batch_unpin;
1703 } 1812 }
1704 1813
1814 /* Whilst this request exists, batch_obj will be on the
1815 * active_list, and so will hold the active reference. Only when this
1816 * request is retired will the the batch_obj be moved onto the
1817 * inactive_list and lose its active reference. Hence we do not need
1818 * to explicitly hold another reference here.
1819 */
1820 params->request->batch = params->batch;
1821
1705 ret = i915_gem_request_add_to_client(params->request, file); 1822 ret = i915_gem_request_add_to_client(params->request, file);
1706 if (ret) 1823 if (ret)
1707 goto err_request; 1824 goto err_request;
@@ -1720,7 +1837,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1720 1837
1721 ret = execbuf_submit(params, args, &eb->vmas); 1838 ret = execbuf_submit(params, args, &eb->vmas);
1722err_request: 1839err_request:
1723 __i915_add_request(params->request, params->batch->obj, ret == 0); 1840 __i915_add_request(params->request, ret == 0);
1724 1841
1725err_batch_unpin: 1842err_batch_unpin:
1726 /* 1843 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 9e8173fe2a09..8df1fa7234e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -55,87 +55,85 @@
55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. 55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
56 */ 56 */
57 57
58static void i965_write_fence_reg(struct drm_device *dev, int reg, 58#define pipelined 0
59 struct drm_i915_gem_object *obj) 59
60static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
61 struct i915_vma *vma)
60{ 62{
61 struct drm_i915_private *dev_priv = to_i915(dev);
62 i915_reg_t fence_reg_lo, fence_reg_hi; 63 i915_reg_t fence_reg_lo, fence_reg_hi;
63 int fence_pitch_shift; 64 int fence_pitch_shift;
65 u64 val;
64 66
65 if (INTEL_INFO(dev)->gen >= 6) { 67 if (INTEL_INFO(fence->i915)->gen >= 6) {
66 fence_reg_lo = FENCE_REG_GEN6_LO(reg); 68 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
67 fence_reg_hi = FENCE_REG_GEN6_HI(reg); 69 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
68 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 70 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
71
69 } else { 72 } else {
70 fence_reg_lo = FENCE_REG_965_LO(reg); 73 fence_reg_lo = FENCE_REG_965_LO(fence->id);
71 fence_reg_hi = FENCE_REG_965_HI(reg); 74 fence_reg_hi = FENCE_REG_965_HI(fence->id);
72 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 75 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
73 } 76 }
74 77
75 /* To w/a incoherency with non-atomic 64-bit register updates, 78 val = 0;
76 * we split the 64-bit update into two 32-bit writes. In order 79 if (vma) {
77 * for a partial fence not to be evaluated between writes, we 80 unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
78 * precede the update with write to turn off the fence register, 81 bool is_y_tiled = tiling == I915_TILING_Y;
79 * and only enable the fence as the last step. 82 unsigned int stride = i915_gem_object_get_stride(vma->obj);
80 * 83 u32 row_size = stride * (is_y_tiled ? 32 : 8);
81 * For extra levels of paranoia, we make sure each step lands 84 u32 size = rounddown((u32)vma->node.size, row_size);
82 * before applying the next step. 85
83 */ 86 val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
84 I915_WRITE(fence_reg_lo, 0); 87 val |= vma->node.start & 0xfffff000;
85 POSTING_READ(fence_reg_lo); 88 val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
86 89 if (is_y_tiled)
87 if (obj) { 90 val |= BIT(I965_FENCE_TILING_Y_SHIFT);
88 u32 size = i915_gem_obj_ggtt_size(obj);
89 unsigned int tiling = i915_gem_object_get_tiling(obj);
90 unsigned int stride = i915_gem_object_get_stride(obj);
91 uint64_t val;
92
93 /* Adjust fence size to match tiled area */
94 if (tiling != I915_TILING_NONE) {
95 uint32_t row_size = stride *
96 (tiling == I915_TILING_Y ? 32 : 8);
97 size = (size / row_size) * row_size;
98 }
99
100 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
101 0xfffff000) << 32;
102 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
103 val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
104 if (tiling == I915_TILING_Y)
105 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
106 val |= I965_FENCE_REG_VALID; 91 val |= I965_FENCE_REG_VALID;
92 }
107 93
108 I915_WRITE(fence_reg_hi, val >> 32); 94 if (!pipelined) {
109 POSTING_READ(fence_reg_hi); 95 struct drm_i915_private *dev_priv = fence->i915;
110 96
111 I915_WRITE(fence_reg_lo, val); 97 /* To w/a incoherency with non-atomic 64-bit register updates,
98 * we split the 64-bit update into two 32-bit writes. In order
99 * for a partial fence not to be evaluated between writes, we
100 * precede the update with write to turn off the fence register,
101 * and only enable the fence as the last step.
102 *
103 * For extra levels of paranoia, we make sure each step lands
104 * before applying the next step.
105 */
106 I915_WRITE(fence_reg_lo, 0);
107 POSTING_READ(fence_reg_lo);
108
109 I915_WRITE(fence_reg_hi, upper_32_bits(val));
110 I915_WRITE(fence_reg_lo, lower_32_bits(val));
112 POSTING_READ(fence_reg_lo); 111 POSTING_READ(fence_reg_lo);
113 } else {
114 I915_WRITE(fence_reg_hi, 0);
115 POSTING_READ(fence_reg_hi);
116 } 112 }
117} 113}
118 114
119static void i915_write_fence_reg(struct drm_device *dev, int reg, 115static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
120 struct drm_i915_gem_object *obj) 116 struct i915_vma *vma)
121{ 117{
122 struct drm_i915_private *dev_priv = to_i915(dev);
123 u32 val; 118 u32 val;
124 119
125 if (obj) { 120 val = 0;
126 u32 size = i915_gem_obj_ggtt_size(obj); 121 if (vma) {
127 unsigned int tiling = i915_gem_object_get_tiling(obj); 122 unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
128 unsigned int stride = i915_gem_object_get_stride(obj); 123 bool is_y_tiled = tiling == I915_TILING_Y;
124 unsigned int stride = i915_gem_object_get_stride(vma->obj);
129 int pitch_val; 125 int pitch_val;
130 int tile_width; 126 int tile_width;
131 127
132 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 128 WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
133 (size & -size) != size || 129 !is_power_of_2(vma->node.size) ||
134 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 130 (vma->node.start & (vma->node.size - 1)),
135 "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 131 "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
136 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 132 vma->node.start,
133 i915_vma_is_map_and_fenceable(vma),
134 vma->node.size);
137 135
138 if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 136 if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
139 tile_width = 128; 137 tile_width = 128;
140 else 138 else
141 tile_width = 512; 139 tile_width = 512;
@@ -144,139 +142,141 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
144 pitch_val = stride / tile_width; 142 pitch_val = stride / tile_width;
145 pitch_val = ffs(pitch_val) - 1; 143 pitch_val = ffs(pitch_val) - 1;
146 144
147 val = i915_gem_obj_ggtt_offset(obj); 145 val = vma->node.start;
148 if (tiling == I915_TILING_Y) 146 if (is_y_tiled)
149 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 147 val |= BIT(I830_FENCE_TILING_Y_SHIFT);
150 val |= I915_FENCE_SIZE_BITS(size); 148 val |= I915_FENCE_SIZE_BITS(vma->node.size);
151 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 149 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
152 val |= I830_FENCE_REG_VALID; 150 val |= I830_FENCE_REG_VALID;
153 } else 151 }
154 val = 0; 152
153 if (!pipelined) {
154 struct drm_i915_private *dev_priv = fence->i915;
155 i915_reg_t reg = FENCE_REG(fence->id);
155 156
156 I915_WRITE(FENCE_REG(reg), val); 157 I915_WRITE(reg, val);
157 POSTING_READ(FENCE_REG(reg)); 158 POSTING_READ(reg);
159 }
158} 160}
159 161
160static void i830_write_fence_reg(struct drm_device *dev, int reg, 162static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
161 struct drm_i915_gem_object *obj) 163 struct i915_vma *vma)
162{ 164{
163 struct drm_i915_private *dev_priv = to_i915(dev); 165 u32 val;
164 uint32_t val;
165 166
166 if (obj) { 167 val = 0;
167 u32 size = i915_gem_obj_ggtt_size(obj); 168 if (vma) {
168 unsigned int tiling = i915_gem_object_get_tiling(obj); 169 unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
169 unsigned int stride = i915_gem_object_get_stride(obj); 170 bool is_y_tiled = tiling == I915_TILING_Y;
170 uint32_t pitch_val; 171 unsigned int stride = i915_gem_object_get_stride(vma->obj);
172 u32 pitch_val;
171 173
172 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 174 WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
173 (size & -size) != size || 175 !is_power_of_2(vma->node.size) ||
174 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 176 (vma->node.start & (vma->node.size - 1)),
175 "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", 177 "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n",
176 i915_gem_obj_ggtt_offset(obj), size); 178 vma->node.start, vma->node.size);
177 179
178 pitch_val = stride / 128; 180 pitch_val = stride / 128;
179 pitch_val = ffs(pitch_val) - 1; 181 pitch_val = ffs(pitch_val) - 1;
180 182
181 val = i915_gem_obj_ggtt_offset(obj); 183 val = vma->node.start;
182 if (tiling == I915_TILING_Y) 184 if (is_y_tiled)
183 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 185 val |= BIT(I830_FENCE_TILING_Y_SHIFT);
184 val |= I830_FENCE_SIZE_BITS(size); 186 val |= I830_FENCE_SIZE_BITS(vma->node.size);
185 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 187 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
186 val |= I830_FENCE_REG_VALID; 188 val |= I830_FENCE_REG_VALID;
187 } else 189 }
188 val = 0;
189 190
190 I915_WRITE(FENCE_REG(reg), val); 191 if (!pipelined) {
191 POSTING_READ(FENCE_REG(reg)); 192 struct drm_i915_private *dev_priv = fence->i915;
192} 193 i915_reg_t reg = FENCE_REG(fence->id);
193 194
194inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 195 I915_WRITE(reg, val);
195{ 196 POSTING_READ(reg);
196 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 197 }
197} 198}
198 199
199static void i915_gem_write_fence(struct drm_device *dev, int reg, 200static void fence_write(struct drm_i915_fence_reg *fence,
200 struct drm_i915_gem_object *obj) 201 struct i915_vma *vma)
201{ 202{
202 struct drm_i915_private *dev_priv = to_i915(dev); 203 /* Previous access through the fence register is marshalled by
203 204 * the mb() inside the fault handlers (i915_gem_release_mmaps)
204 /* Ensure that all CPU reads are completed before installing a fence 205 * and explicitly managed for internal users.
205 * and all writes before removing the fence.
206 */ 206 */
207 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 207
208 mb(); 208 if (IS_GEN2(fence->i915))
209 209 i830_write_fence_reg(fence, vma);
210 WARN(obj && 210 else if (IS_GEN3(fence->i915))
211 (!i915_gem_object_get_stride(obj) || 211 i915_write_fence_reg(fence, vma);
212 !i915_gem_object_get_tiling(obj)), 212 else
213 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 213 i965_write_fence_reg(fence, vma);
214 i915_gem_object_get_stride(obj), 214
215 i915_gem_object_get_tiling(obj)); 215 /* Access through the fenced region afterwards is
216 216 * ordered by the posting reads whilst writing the registers.
217 if (IS_GEN2(dev))
218 i830_write_fence_reg(dev, reg, obj);
219 else if (IS_GEN3(dev))
220 i915_write_fence_reg(dev, reg, obj);
221 else if (INTEL_INFO(dev)->gen >= 4)
222 i965_write_fence_reg(dev, reg, obj);
223
224 /* And similarly be paranoid that no direct access to this region
225 * is reordered to before the fence is installed.
226 */ 217 */
227 if (i915_gem_object_needs_mb(obj))
228 mb();
229}
230 218
231static inline int fence_number(struct drm_i915_private *dev_priv, 219 fence->dirty = false;
232 struct drm_i915_fence_reg *fence)
233{
234 return fence - dev_priv->fence_regs;
235} 220}
236 221
237static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 222static int fence_update(struct drm_i915_fence_reg *fence,
238 struct drm_i915_fence_reg *fence, 223 struct i915_vma *vma)
239 bool enable)
240{ 224{
241 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 225 int ret;
242 int reg = fence_number(dev_priv, fence);
243 226
244 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 227 if (vma) {
228 if (!i915_vma_is_map_and_fenceable(vma))
229 return -EINVAL;
245 230
246 if (enable) { 231 if (WARN(!i915_gem_object_get_stride(vma->obj) ||
247 obj->fence_reg = reg; 232 !i915_gem_object_get_tiling(vma->obj),
248 fence->obj = obj; 233 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
249 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 234 i915_gem_object_get_stride(vma->obj),
250 } else { 235 i915_gem_object_get_tiling(vma->obj)))
251 obj->fence_reg = I915_FENCE_REG_NONE; 236 return -EINVAL;
252 fence->obj = NULL; 237
253 list_del_init(&fence->lru_list); 238 ret = i915_gem_active_retire(&vma->last_fence,
239 &vma->obj->base.dev->struct_mutex);
240 if (ret)
241 return ret;
254 } 242 }
255 obj->fence_dirty = false;
256}
257 243
258static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 244 if (fence->vma) {
259{ 245 ret = i915_gem_active_retire(&fence->vma->last_fence,
260 if (i915_gem_object_is_tiled(obj)) 246 &fence->vma->obj->base.dev->struct_mutex);
261 i915_gem_release_mmap(obj); 247 if (ret)
248 return ret;
249 }
262 250
263 /* As we do not have an associated fence register, we will force 251 if (fence->vma && fence->vma != vma) {
264 * a tiling change if we ever need to acquire one. 252 /* Ensure that all userspace CPU access is completed before
265 */ 253 * stealing the fence.
266 obj->fence_dirty = false; 254 */
267 obj->fence_reg = I915_FENCE_REG_NONE; 255 i915_gem_release_mmap(fence->vma->obj);
268}
269 256
270static int 257 fence->vma->fence = NULL;
271i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 258 fence->vma = NULL;
272{ 259
273 return i915_gem_active_retire(&obj->last_fence, 260 list_move(&fence->link, &fence->i915->mm.fence_list);
274 &obj->base.dev->struct_mutex); 261 }
262
263 fence_write(fence, vma);
264
265 if (vma) {
266 if (fence->vma != vma) {
267 vma->fence = fence;
268 fence->vma = vma;
269 }
270
271 list_move_tail(&fence->link, &fence->i915->mm.fence_list);
272 }
273
274 return 0;
275} 275}
276 276
277/** 277/**
278 * i915_gem_object_put_fence - force-remove fence for an object 278 * i915_vma_put_fence - force-remove fence for a VMA
279 * @obj: object to map through a fence reg 279 * @vma: vma to map linearly (not through a fence reg)
280 * 280 *
281 * This function force-removes any fence from the given object, which is useful 281 * This function force-removes any fence from the given object, which is useful
282 * if the kernel wants to do untiled GTT access. 282 * if the kernel wants to do untiled GTT access.
@@ -286,70 +286,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
286 * 0 on success, negative error code on failure. 286 * 0 on success, negative error code on failure.
287 */ 287 */
288int 288int
289i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 289i915_vma_put_fence(struct i915_vma *vma)
290{ 290{
291 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 291 struct drm_i915_fence_reg *fence = vma->fence;
292 struct drm_i915_fence_reg *fence;
293 int ret;
294
295 ret = i915_gem_object_wait_fence(obj);
296 if (ret)
297 return ret;
298 292
299 if (obj->fence_reg == I915_FENCE_REG_NONE) 293 if (!fence)
300 return 0; 294 return 0;
301 295
302 fence = &dev_priv->fence_regs[obj->fence_reg]; 296 if (fence->pin_count)
303
304 if (WARN_ON(fence->pin_count))
305 return -EBUSY; 297 return -EBUSY;
306 298
307 i915_gem_object_fence_lost(obj); 299 return fence_update(fence, NULL);
308 i915_gem_object_update_fence(obj, fence, false);
309
310 return 0;
311} 300}
312 301
313static struct drm_i915_fence_reg * 302static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
314i915_find_fence_reg(struct drm_device *dev)
315{ 303{
316 struct drm_i915_private *dev_priv = to_i915(dev); 304 struct drm_i915_fence_reg *fence;
317 struct drm_i915_fence_reg *reg, *avail;
318 int i;
319
320 /* First try to find a free reg */
321 avail = NULL;
322 for (i = 0; i < dev_priv->num_fence_regs; i++) {
323 reg = &dev_priv->fence_regs[i];
324 if (!reg->obj)
325 return reg;
326
327 if (!reg->pin_count)
328 avail = reg;
329 }
330
331 if (avail == NULL)
332 goto deadlock;
333 305
334 /* None available, try to steal one or wait for a user to finish */ 306 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
335 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 307 if (fence->pin_count)
336 if (reg->pin_count)
337 continue; 308 continue;
338 309
339 return reg; 310 return fence;
340 } 311 }
341 312
342deadlock:
343 /* Wait for completion of pending flips which consume fences */ 313 /* Wait for completion of pending flips which consume fences */
344 if (intel_has_pending_fb_unpin(dev)) 314 if (intel_has_pending_fb_unpin(&dev_priv->drm))
345 return ERR_PTR(-EAGAIN); 315 return ERR_PTR(-EAGAIN);
346 316
347 return ERR_PTR(-EDEADLK); 317 return ERR_PTR(-EDEADLK);
348} 318}
349 319
350/** 320/**
351 * i915_gem_object_get_fence - set up fencing for an object 321 * i915_vma_get_fence - set up fencing for a vma
352 * @obj: object to map through a fence reg 322 * @vma: vma to map through a fence reg
353 * 323 *
354 * When mapping objects through the GTT, userspace wants to be able to write 324 * When mapping objects through the GTT, userspace wants to be able to write
355 * to them without having to worry about swizzling if the object is tiled. 325 * to them without having to worry about swizzling if the object is tiled.
@@ -366,103 +336,27 @@ deadlock:
366 * 0 on success, negative error code on failure. 336 * 0 on success, negative error code on failure.
367 */ 337 */
368int 338int
369i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 339i915_vma_get_fence(struct i915_vma *vma)
370{ 340{
371 struct drm_device *dev = obj->base.dev; 341 struct drm_i915_fence_reg *fence;
372 struct drm_i915_private *dev_priv = to_i915(dev); 342 struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
373 bool enable = i915_gem_object_is_tiled(obj);
374 struct drm_i915_fence_reg *reg;
375 int ret;
376
377 /* Have we updated the tiling parameters upon the object and so
378 * will need to serialise the write to the associated fence register?
379 */
380 if (obj->fence_dirty) {
381 ret = i915_gem_object_wait_fence(obj);
382 if (ret)
383 return ret;
384 }
385 343
386 /* Just update our place in the LRU if our fence is getting reused. */ 344 /* Just update our place in the LRU if our fence is getting reused. */
387 if (obj->fence_reg != I915_FENCE_REG_NONE) { 345 if (vma->fence) {
388 reg = &dev_priv->fence_regs[obj->fence_reg]; 346 fence = vma->fence;
389 if (!obj->fence_dirty) { 347 if (!fence->dirty) {
390 list_move_tail(&reg->lru_list, 348 list_move_tail(&fence->link,
391 &dev_priv->mm.fence_list); 349 &fence->i915->mm.fence_list);
392 return 0; 350 return 0;
393 } 351 }
394 } else if (enable) { 352 } else if (set) {
395 if (WARN_ON(!obj->map_and_fenceable)) 353 fence = fence_find(to_i915(vma->vm->dev));
396 return -EINVAL; 354 if (IS_ERR(fence))
397 355 return PTR_ERR(fence);
398 reg = i915_find_fence_reg(dev);
399 if (IS_ERR(reg))
400 return PTR_ERR(reg);
401
402 if (reg->obj) {
403 struct drm_i915_gem_object *old = reg->obj;
404
405 ret = i915_gem_object_wait_fence(old);
406 if (ret)
407 return ret;
408
409 i915_gem_object_fence_lost(old);
410 }
411 } else 356 } else
412 return 0; 357 return 0;
413 358
414 i915_gem_object_update_fence(obj, reg, enable); 359 return fence_update(fence, set);
415
416 return 0;
417}
418
419/**
420 * i915_gem_object_pin_fence - pin fencing state
421 * @obj: object to pin fencing for
422 *
423 * This pins the fencing state (whether tiled or untiled) to make sure the
424 * object is ready to be used as a scanout target. Fencing status must be
425 * synchronize first by calling i915_gem_object_get_fence():
426 *
427 * The resulting fence pin reference must be released again with
428 * i915_gem_object_unpin_fence().
429 *
430 * Returns:
431 *
432 * True if the object has a fence, false otherwise.
433 */
434bool
435i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
436{
437 if (obj->fence_reg != I915_FENCE_REG_NONE) {
438 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
439 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
440
441 WARN_ON(!ggtt_vma ||
442 dev_priv->fence_regs[obj->fence_reg].pin_count >
443 i915_vma_pin_count(ggtt_vma));
444 dev_priv->fence_regs[obj->fence_reg].pin_count++;
445 return true;
446 } else
447 return false;
448}
449
450/**
451 * i915_gem_object_unpin_fence - unpin fencing state
452 * @obj: object to unpin fencing for
453 *
454 * This releases the fence pin reference acquired through
455 * i915_gem_object_pin_fence. It will handle both objects with and without an
456 * attached fence correctly, callers do not need to distinguish this.
457 */
458void
459i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
460{
461 if (obj->fence_reg != I915_FENCE_REG_NONE) {
462 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
463 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
464 dev_priv->fence_regs[obj->fence_reg].pin_count--;
465 }
466} 360}
467 361
468/** 362/**
@@ -479,17 +373,16 @@ void i915_gem_restore_fences(struct drm_device *dev)
479 373
480 for (i = 0; i < dev_priv->num_fence_regs; i++) { 374 for (i = 0; i < dev_priv->num_fence_regs; i++) {
481 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 375 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
376 struct i915_vma *vma = reg->vma;
482 377
483 /* 378 /*
484 * Commit delayed tiling changes if we have an object still 379 * Commit delayed tiling changes if we have an object still
485 * attached to the fence, otherwise just clear the fence. 380 * attached to the fence, otherwise just clear the fence.
486 */ 381 */
487 if (reg->obj) { 382 if (vma && !i915_gem_object_is_tiled(vma->obj))
488 i915_gem_object_update_fence(reg->obj, reg, 383 vma = NULL;
489 i915_gem_object_get_tiling(reg->obj)); 384
490 } else { 385 fence_update(reg, vma);
491 i915_gem_write_fence(dev, i, NULL);
492 }
493 } 386 }
494} 387}
495 388
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 18c7c9644761..b90fdcee992a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -170,11 +170,13 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
170{ 170{
171 u32 pte_flags = 0; 171 u32 pte_flags = 0;
172 172
173 vma->pages = vma->obj->pages;
174
173 /* Currently applicable only to VLV */ 175 /* Currently applicable only to VLV */
174 if (vma->obj->gt_ro) 176 if (vma->obj->gt_ro)
175 pte_flags |= PTE_READ_ONLY; 177 pte_flags |= PTE_READ_ONLY;
176 178
177 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 179 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
178 cache_level, pte_flags); 180 cache_level, pte_flags);
179 181
180 return 0; 182 return 0;
@@ -2618,8 +2620,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
2618 if (obj->gt_ro) 2620 if (obj->gt_ro)
2619 pte_flags |= PTE_READ_ONLY; 2621 pte_flags |= PTE_READ_ONLY;
2620 2622
2621 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, 2623 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
2622 vma->node.start,
2623 cache_level, pte_flags); 2624 cache_level, pte_flags);
2624 2625
2625 /* 2626 /*
@@ -2651,8 +2652,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2651 2652
2652 if (flags & I915_VMA_GLOBAL_BIND) { 2653 if (flags & I915_VMA_GLOBAL_BIND) {
2653 vma->vm->insert_entries(vma->vm, 2654 vma->vm->insert_entries(vma->vm,
2654 vma->ggtt_view.pages, 2655 vma->pages, vma->node.start,
2655 vma->node.start,
2656 cache_level, pte_flags); 2656 cache_level, pte_flags);
2657 } 2657 }
2658 2658
@@ -2660,8 +2660,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2660 struct i915_hw_ppgtt *appgtt = 2660 struct i915_hw_ppgtt *appgtt =
2661 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2661 to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2662 appgtt->base.insert_entries(&appgtt->base, 2662 appgtt->base.insert_entries(&appgtt->base,
2663 vma->ggtt_view.pages, 2663 vma->pages, vma->node.start,
2664 vma->node.start,
2665 cache_level, pte_flags); 2664 cache_level, pte_flags);
2666 } 2665 }
2667 2666
@@ -2795,7 +2794,6 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2795 2794
2796 if (dev_priv->mm.aliasing_ppgtt) { 2795 if (dev_priv->mm.aliasing_ppgtt) {
2797 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2796 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2798
2799 ppgtt->base.cleanup(&ppgtt->base); 2797 ppgtt->base.cleanup(&ppgtt->base);
2800 kfree(ppgtt); 2798 kfree(ppgtt);
2801 } 2799 }
@@ -2812,7 +2810,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2812 ggtt->base.cleanup(&ggtt->base); 2810 ggtt->base.cleanup(&ggtt->base);
2813 2811
2814 arch_phys_wc_del(ggtt->mtrr); 2812 arch_phys_wc_del(ggtt->mtrr);
2815 io_mapping_free(ggtt->mappable); 2813 io_mapping_fini(&ggtt->mappable);
2816} 2814}
2817 2815
2818static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2816static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -3210,9 +3208,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3210 if (!HAS_LLC(dev_priv)) 3208 if (!HAS_LLC(dev_priv))
3211 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3209 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3212 3210
3213 ggtt->mappable = 3211 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
3214 io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end); 3212 dev_priv->ggtt.mappable_base,
3215 if (!ggtt->mappable) { 3213 dev_priv->ggtt.mappable_end)) {
3216 ret = -EIO; 3214 ret = -EIO;
3217 goto out_gtt_cleanup; 3215 goto out_gtt_cleanup;
3218 } 3216 }
@@ -3323,6 +3321,7 @@ void i915_vma_destroy(struct i915_vma *vma)
3323 GEM_BUG_ON(vma->node.allocated); 3321 GEM_BUG_ON(vma->node.allocated);
3324 GEM_BUG_ON(i915_vma_is_active(vma)); 3322 GEM_BUG_ON(i915_vma_is_active(vma));
3325 GEM_BUG_ON(!i915_vma_is_closed(vma)); 3323 GEM_BUG_ON(!i915_vma_is_closed(vma));
3324 GEM_BUG_ON(vma->fence);
3326 3325
3327 list_del(&vma->vm_link); 3326 list_del(&vma->vm_link);
3328 if (!i915_vma_is_ggtt(vma)) 3327 if (!i915_vma_is_ggtt(vma))
@@ -3342,33 +3341,29 @@ void i915_vma_close(struct i915_vma *vma)
3342} 3341}
3343 3342
3344static struct i915_vma * 3343static struct i915_vma *
3345__i915_gem_vma_create(struct drm_i915_gem_object *obj, 3344__i915_vma_create(struct drm_i915_gem_object *obj,
3346 struct i915_address_space *vm, 3345 struct i915_address_space *vm,
3347 const struct i915_ggtt_view *view) 3346 const struct i915_ggtt_view *view)
3348{ 3347{
3349 struct i915_vma *vma; 3348 struct i915_vma *vma;
3350 int i; 3349 int i;
3351 3350
3352 GEM_BUG_ON(vm->closed); 3351 GEM_BUG_ON(vm->closed);
3353 3352
3354 if (WARN_ON(i915_is_ggtt(vm) != !!view))
3355 return ERR_PTR(-EINVAL);
3356
3357 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); 3353 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3358 if (vma == NULL) 3354 if (vma == NULL)
3359 return ERR_PTR(-ENOMEM); 3355 return ERR_PTR(-ENOMEM);
3360 3356
3361 INIT_LIST_HEAD(&vma->obj_link);
3362 INIT_LIST_HEAD(&vma->exec_list); 3357 INIT_LIST_HEAD(&vma->exec_list);
3363 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) 3358 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
3364 init_request_active(&vma->last_read[i], i915_vma_retire); 3359 init_request_active(&vma->last_read[i], i915_vma_retire);
3360 init_request_active(&vma->last_fence, NULL);
3365 list_add(&vma->vm_link, &vm->unbound_list); 3361 list_add(&vma->vm_link, &vm->unbound_list);
3366 vma->vm = vm; 3362 vma->vm = vm;
3367 vma->obj = obj; 3363 vma->obj = obj;
3368 vma->size = obj->base.size; 3364 vma->size = obj->base.size;
3369 3365
3370 if (i915_is_ggtt(vm)) { 3366 if (view) {
3371 vma->flags |= I915_VMA_GGTT;
3372 vma->ggtt_view = *view; 3367 vma->ggtt_view = *view;
3373 if (view->type == I915_GGTT_VIEW_PARTIAL) { 3368 if (view->type == I915_GGTT_VIEW_PARTIAL) {
3374 vma->size = view->params.partial.size; 3369 vma->size = view->params.partial.size;
@@ -3378,46 +3373,79 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3378 intel_rotation_info_size(&view->params.rotated); 3373 intel_rotation_info_size(&view->params.rotated);
3379 vma->size <<= PAGE_SHIFT; 3374 vma->size <<= PAGE_SHIFT;
3380 } 3375 }
3376 }
3377
3378 if (i915_is_ggtt(vm)) {
3379 vma->flags |= I915_VMA_GGTT;
3381 } else { 3380 } else {
3382 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3381 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3383 } 3382 }
3384 3383
3385 list_add_tail(&vma->obj_link, &obj->vma_list); 3384 list_add_tail(&vma->obj_link, &obj->vma_list);
3386
3387 return vma; 3385 return vma;
3388} 3386}
3389 3387
3388static inline bool vma_matches(struct i915_vma *vma,
3389 struct i915_address_space *vm,
3390 const struct i915_ggtt_view *view)
3391{
3392 if (vma->vm != vm)
3393 return false;
3394
3395 if (!i915_vma_is_ggtt(vma))
3396 return true;
3397
3398 if (!view)
3399 return vma->ggtt_view.type == 0;
3400
3401 if (vma->ggtt_view.type != view->type)
3402 return false;
3403
3404 return memcmp(&vma->ggtt_view.params,
3405 &view->params,
3406 sizeof(view->params)) == 0;
3407}
3408
3390struct i915_vma * 3409struct i915_vma *
3391i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3410i915_vma_create(struct drm_i915_gem_object *obj,
3392 struct i915_address_space *vm) 3411 struct i915_address_space *vm,
3412 const struct i915_ggtt_view *view)
3413{
3414 GEM_BUG_ON(view && !i915_is_ggtt(vm));
3415 GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
3416
3417 return __i915_vma_create(obj, vm, view);
3418}
3419
3420struct i915_vma *
3421i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
3422 struct i915_address_space *vm,
3423 const struct i915_ggtt_view *view)
3393{ 3424{
3394 struct i915_vma *vma; 3425 struct i915_vma *vma;
3395 3426
3396 vma = i915_gem_obj_to_vma(obj, vm); 3427 list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
3397 if (!vma) 3428 if (vma_matches(vma, vm, view))
3398 vma = __i915_gem_vma_create(obj, vm, 3429 return vma;
3399 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3400 3430
3401 return vma; 3431 return NULL;
3402} 3432}
3403 3433
3404struct i915_vma * 3434struct i915_vma *
3405i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, 3435i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3406 const struct i915_ggtt_view *view) 3436 struct i915_address_space *vm,
3437 const struct i915_ggtt_view *view)
3407{ 3438{
3408 struct drm_device *dev = obj->base.dev; 3439 struct i915_vma *vma;
3409 struct drm_i915_private *dev_priv = to_i915(dev);
3410 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3411 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
3412 3440
3413 GEM_BUG_ON(!view); 3441 GEM_BUG_ON(view && !i915_is_ggtt(vm));
3414 3442
3443 vma = i915_gem_obj_to_vma(obj, vm, view);
3415 if (!vma) 3444 if (!vma)
3416 vma = __i915_gem_vma_create(obj, &ggtt->base, view); 3445 vma = __i915_vma_create(obj, vm, view);
3417 3446
3418 GEM_BUG_ON(i915_vma_is_closed(vma)); 3447 GEM_BUG_ON(i915_vma_is_closed(vma));
3419 return vma; 3448 return vma;
3420
3421} 3449}
3422 3450
3423static struct scatterlist * 3451static struct scatterlist *
@@ -3449,18 +3477,16 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
3449} 3477}
3450 3478
3451static struct sg_table * 3479static struct sg_table *
3452intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, 3480intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
3453 struct drm_i915_gem_object *obj) 3481 struct drm_i915_gem_object *obj)
3454{ 3482{
3455 const size_t n_pages = obj->base.size / PAGE_SIZE; 3483 const size_t n_pages = obj->base.size / PAGE_SIZE;
3456 unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height; 3484 unsigned int size = intel_rotation_info_size(rot_info);
3457 unsigned int size_pages_uv;
3458 struct sgt_iter sgt_iter; 3485 struct sgt_iter sgt_iter;
3459 dma_addr_t dma_addr; 3486 dma_addr_t dma_addr;
3460 unsigned long i; 3487 unsigned long i;
3461 dma_addr_t *page_addr_list; 3488 dma_addr_t *page_addr_list;
3462 struct sg_table *st; 3489 struct sg_table *st;
3463 unsigned int uv_start_page;
3464 struct scatterlist *sg; 3490 struct scatterlist *sg;
3465 int ret = -ENOMEM; 3491 int ret = -ENOMEM;
3466 3492
@@ -3471,18 +3497,12 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3471 if (!page_addr_list) 3497 if (!page_addr_list)
3472 return ERR_PTR(ret); 3498 return ERR_PTR(ret);
3473 3499
3474 /* Account for UV plane with NV12. */
3475 if (rot_info->pixel_format == DRM_FORMAT_NV12)
3476 size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height;
3477 else
3478 size_pages_uv = 0;
3479
3480 /* Allocate target SG list. */ 3500 /* Allocate target SG list. */
3481 st = kmalloc(sizeof(*st), GFP_KERNEL); 3501 st = kmalloc(sizeof(*st), GFP_KERNEL);
3482 if (!st) 3502 if (!st)
3483 goto err_st_alloc; 3503 goto err_st_alloc;
3484 3504
3485 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); 3505 ret = sg_alloc_table(st, size, GFP_KERNEL);
3486 if (ret) 3506 if (ret)
3487 goto err_sg_alloc; 3507 goto err_sg_alloc;
3488 3508
@@ -3495,32 +3515,14 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3495 st->nents = 0; 3515 st->nents = 0;
3496 sg = st->sgl; 3516 sg = st->sgl;
3497 3517
3498 /* Rotate the pages. */ 3518 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3499 sg = rotate_pages(page_addr_list, 0, 3519 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3500 rot_info->plane[0].width, rot_info->plane[0].height, 3520 rot_info->plane[i].width, rot_info->plane[i].height,
3501 rot_info->plane[0].width, 3521 rot_info->plane[i].stride, st, sg);
3502 st, sg);
3503
3504 /* Append the UV plane if NV12. */
3505 if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3506 uv_start_page = size_pages;
3507
3508 /* Check for tile-row un-alignment. */
3509 if (offset_in_page(rot_info->uv_offset))
3510 uv_start_page--;
3511
3512 rot_info->uv_start_page = uv_start_page;
3513
3514 sg = rotate_pages(page_addr_list, rot_info->uv_start_page,
3515 rot_info->plane[1].width, rot_info->plane[1].height,
3516 rot_info->plane[1].width,
3517 st, sg);
3518 } 3522 }
3519 3523
3520 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n", 3524 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
3521 obj->base.size, rot_info->plane[0].width, 3525 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3522 rot_info->plane[0].height, size_pages + size_pages_uv,
3523 size_pages);
3524 3526
3525 drm_free_large(page_addr_list); 3527 drm_free_large(page_addr_list);
3526 3528
@@ -3531,10 +3533,9 @@ err_sg_alloc:
3531err_st_alloc: 3533err_st_alloc:
3532 drm_free_large(page_addr_list); 3534 drm_free_large(page_addr_list);
3533 3535
3534 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n", 3536 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3535 obj->base.size, ret, rot_info->plane[0].width, 3537 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3536 rot_info->plane[0].height, size_pages + size_pages_uv, 3538
3537 size_pages);
3538 return ERR_PTR(ret); 3539 return ERR_PTR(ret);
3539} 3540}
3540 3541
@@ -3584,28 +3585,27 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
3584{ 3585{
3585 int ret = 0; 3586 int ret = 0;
3586 3587
3587 if (vma->ggtt_view.pages) 3588 if (vma->pages)
3588 return 0; 3589 return 0;
3589 3590
3590 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3591 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3591 vma->ggtt_view.pages = vma->obj->pages; 3592 vma->pages = vma->obj->pages;
3592 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3593 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3593 vma->ggtt_view.pages = 3594 vma->pages =
3594 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3595 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3595 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3596 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3596 vma->ggtt_view.pages = 3597 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3597 intel_partial_pages(&vma->ggtt_view, vma->obj);
3598 else 3598 else
3599 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3599 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3600 vma->ggtt_view.type); 3600 vma->ggtt_view.type);
3601 3601
3602 if (!vma->ggtt_view.pages) { 3602 if (!vma->pages) {
3603 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3603 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3604 vma->ggtt_view.type); 3604 vma->ggtt_view.type);
3605 ret = -EINVAL; 3605 ret = -EINVAL;
3606 } else if (IS_ERR(vma->ggtt_view.pages)) { 3606 } else if (IS_ERR(vma->pages)) {
3607 ret = PTR_ERR(vma->ggtt_view.pages); 3607 ret = PTR_ERR(vma->pages);
3608 vma->ggtt_view.pages = NULL; 3608 vma->pages = NULL;
3609 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3609 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3610 vma->ggtt_view.type, ret); 3610 vma->ggtt_view.type, ret);
3611 } 3611 }
@@ -3668,8 +3668,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3668{ 3668{
3669 void __iomem *ptr; 3669 void __iomem *ptr;
3670 3670
3671 /* Access through the GTT requires the device to be awake. */
3672 assert_rpm_wakelock_held(to_i915(vma->vm->dev));
3673
3671 lockdep_assert_held(&vma->vm->dev->struct_mutex); 3674 lockdep_assert_held(&vma->vm->dev->struct_mutex);
3672 if (WARN_ON(!vma->obj->map_and_fenceable)) 3675 if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
3673 return IO_ERR_PTR(-ENODEV); 3676 return IO_ERR_PTR(-ENODEV);
3674 3677
3675 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 3678 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
@@ -3677,7 +3680,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3677 3680
3678 ptr = vma->iomap; 3681 ptr = vma->iomap;
3679 if (ptr == NULL) { 3682 if (ptr == NULL) {
3680 ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable, 3683 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
3681 vma->node.start, 3684 vma->node.start,
3682 vma->node.size); 3685 vma->node.size);
3683 if (ptr == NULL) 3686 if (ptr == NULL)
@@ -3689,3 +3692,15 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3689 __i915_vma_pin(vma); 3692 __i915_vma_pin(vma);
3690 return ptr; 3693 return ptr;
3691} 3694}
3695
3696void i915_vma_unpin_and_release(struct i915_vma **p_vma)
3697{
3698 struct i915_vma *vma;
3699
3700 vma = fetch_and_zero(p_vma);
3701 if (!vma)
3702 return;
3703
3704 i915_vma_unpin(vma);
3705 i915_vma_put(vma);
3706}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index cc56206a1600..a9aec25535ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,7 +38,13 @@
38 38
39#include "i915_gem_request.h" 39#include "i915_gem_request.h"
40 40
41#define I915_FENCE_REG_NONE -1
42#define I915_MAX_NUM_FENCES 32
43/* 32 fences + sign bit for FENCE_REG_NONE */
44#define I915_MAX_NUM_FENCE_BITS 6
45
41struct drm_i915_file_private; 46struct drm_i915_file_private;
47struct drm_i915_fence_reg;
42 48
43typedef uint32_t gen6_pte_t; 49typedef uint32_t gen6_pte_t;
44typedef uint64_t gen8_pte_t; 50typedef uint64_t gen8_pte_t;
@@ -139,12 +145,9 @@ enum i915_ggtt_view_type {
139}; 145};
140 146
141struct intel_rotation_info { 147struct intel_rotation_info {
142 unsigned int uv_offset;
143 uint32_t pixel_format;
144 unsigned int uv_start_page;
145 struct { 148 struct {
146 /* tiles */ 149 /* tiles */
147 unsigned int width, height; 150 unsigned int width, height, stride, offset;
148 } plane[2]; 151 } plane[2];
149}; 152};
150 153
@@ -158,8 +161,6 @@ struct i915_ggtt_view {
158 } partial; 161 } partial;
159 struct intel_rotation_info rotated; 162 struct intel_rotation_info rotated;
160 } params; 163 } params;
161
162 struct sg_table *pages;
163}; 164};
164 165
165extern const struct i915_ggtt_view i915_ggtt_view_normal; 166extern const struct i915_ggtt_view i915_ggtt_view_normal;
@@ -179,8 +180,11 @@ struct i915_vma {
179 struct drm_mm_node node; 180 struct drm_mm_node node;
180 struct drm_i915_gem_object *obj; 181 struct drm_i915_gem_object *obj;
181 struct i915_address_space *vm; 182 struct i915_address_space *vm;
183 struct drm_i915_fence_reg *fence;
184 struct sg_table *pages;
182 void __iomem *iomap; 185 void __iomem *iomap;
183 u64 size; 186 u64 size;
187 u64 display_alignment;
184 188
185 unsigned int flags; 189 unsigned int flags;
186 /** 190 /**
@@ -201,11 +205,13 @@ struct i915_vma {
201#define I915_VMA_LOCAL_BIND BIT(7) 205#define I915_VMA_LOCAL_BIND BIT(7)
202#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) 206#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
203 207
204#define I915_VMA_GGTT BIT(8) 208#define I915_VMA_GGTT BIT(8)
205#define I915_VMA_CLOSED BIT(9) 209#define I915_VMA_CAN_FENCE BIT(9)
210#define I915_VMA_CLOSED BIT(10)
206 211
207 unsigned int active; 212 unsigned int active;
208 struct i915_gem_active last_read[I915_NUM_ENGINES]; 213 struct i915_gem_active last_read[I915_NUM_ENGINES];
214 struct i915_gem_active last_fence;
209 215
210 /** 216 /**
211 * Support different GGTT views into the same object. 217 * Support different GGTT views into the same object.
@@ -232,11 +238,22 @@ struct i915_vma {
232 struct drm_i915_gem_exec_object2 *exec_entry; 238 struct drm_i915_gem_exec_object2 *exec_entry;
233}; 239};
234 240
241struct i915_vma *
242i915_vma_create(struct drm_i915_gem_object *obj,
243 struct i915_address_space *vm,
244 const struct i915_ggtt_view *view);
245void i915_vma_unpin_and_release(struct i915_vma **p_vma);
246
235static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) 247static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
236{ 248{
237 return vma->flags & I915_VMA_GGTT; 249 return vma->flags & I915_VMA_GGTT;
238} 250}
239 251
252static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
253{
254 return vma->flags & I915_VMA_CAN_FENCE;
255}
256
240static inline bool i915_vma_is_closed(const struct i915_vma *vma) 257static inline bool i915_vma_is_closed(const struct i915_vma *vma)
241{ 258{
242 return vma->flags & I915_VMA_CLOSED; 259 return vma->flags & I915_VMA_CLOSED;
@@ -270,6 +287,15 @@ static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
270 return vma->active & BIT(engine); 287 return vma->active & BIT(engine);
271} 288}
272 289
290static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
291{
292 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
293 GEM_BUG_ON(!vma->node.allocated);
294 GEM_BUG_ON(upper_32_bits(vma->node.start));
295 GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
296 return lower_32_bits(vma->node.start);
297}
298
273struct i915_page_dma { 299struct i915_page_dma {
274 struct page *page; 300 struct page *page;
275 union { 301 union {
@@ -413,13 +439,13 @@ struct i915_address_space {
413 */ 439 */
414struct i915_ggtt { 440struct i915_ggtt {
415 struct i915_address_space base; 441 struct i915_address_space base;
442 struct io_mapping mappable; /* Mapping to our CPU mappable region */
416 443
417 size_t stolen_size; /* Total size of stolen memory */ 444 size_t stolen_size; /* Total size of stolen memory */
418 size_t stolen_usable_size; /* Total size minus BIOS reserved */ 445 size_t stolen_usable_size; /* Total size minus BIOS reserved */
419 size_t stolen_reserved_base; 446 size_t stolen_reserved_base;
420 size_t stolen_reserved_size; 447 size_t stolen_reserved_size;
421 u64 mappable_end; /* End offset that we can CPU map */ 448 u64 mappable_end; /* End offset that we can CPU map */
422 struct io_mapping *mappable; /* Mapping to our CPU mappable region */
423 phys_addr_t mappable_base; /* PA of our GMADR */ 449 phys_addr_t mappable_base; /* PA of our GMADR */
424 450
425 /** "Graphics Stolen Memory" holds the global PTEs */ 451 /** "Graphics Stolen Memory" holds the global PTEs */
@@ -608,24 +634,11 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev);
608int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); 634int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
609void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj); 635void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
610 636
611static inline bool
612i915_ggtt_view_equal(const struct i915_ggtt_view *a,
613 const struct i915_ggtt_view *b)
614{
615 if (WARN_ON(!a || !b))
616 return false;
617
618 if (a->type != b->type)
619 return false;
620 if (a->type != I915_GGTT_VIEW_NORMAL)
621 return !memcmp(&a->params, &b->params, sizeof(a->params));
622 return true;
623}
624
625/* Flags used by pin/bind&friends. */ 637/* Flags used by pin/bind&friends. */
626#define PIN_NONBLOCK BIT(0) 638#define PIN_NONBLOCK BIT(0)
627#define PIN_MAPPABLE BIT(1) 639#define PIN_MAPPABLE BIT(1)
628#define PIN_ZONE_4G BIT(2) 640#define PIN_ZONE_4G BIT(2)
641#define PIN_NONFAULT BIT(3)
629 642
630#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ 643#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */
631#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ 644#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */
@@ -715,4 +728,10 @@ static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
715 i915_vma_unpin(vma); 728 i915_vma_unpin(vma);
716} 729}
717 730
731static inline struct page *i915_vma_first_page(struct i915_vma *vma)
732{
733 GEM_BUG_ON(!vma->pages);
734 return sg_page(vma->pages->sgl);
735}
736
718#endif 737#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 57fd767a2d79..95b7e9afd5f8 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -30,8 +30,7 @@
30 30
31struct render_state { 31struct render_state {
32 const struct intel_renderstate_rodata *rodata; 32 const struct intel_renderstate_rodata *rodata;
33 struct drm_i915_gem_object *obj; 33 struct i915_vma *vma;
34 u64 ggtt_offset;
35 u32 aux_batch_size; 34 u32 aux_batch_size;
36 u32 aux_batch_offset; 35 u32 aux_batch_offset;
37}; 36};
@@ -73,7 +72,7 @@ render_state_get_rodata(const struct drm_i915_gem_request *req)
73 72
74static int render_state_setup(struct render_state *so) 73static int render_state_setup(struct render_state *so)
75{ 74{
76 struct drm_device *dev = so->obj->base.dev; 75 struct drm_device *dev = so->vma->vm->dev;
77 const struct intel_renderstate_rodata *rodata = so->rodata; 76 const struct intel_renderstate_rodata *rodata = so->rodata;
78 const bool has_64bit_reloc = INTEL_GEN(dev) >= 8; 77 const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
79 unsigned int i = 0, reloc_index = 0; 78 unsigned int i = 0, reloc_index = 0;
@@ -81,18 +80,18 @@ static int render_state_setup(struct render_state *so)
81 u32 *d; 80 u32 *d;
82 int ret; 81 int ret;
83 82
84 ret = i915_gem_object_set_to_cpu_domain(so->obj, true); 83 ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
85 if (ret) 84 if (ret)
86 return ret; 85 return ret;
87 86
88 page = i915_gem_object_get_dirty_page(so->obj, 0); 87 page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
89 d = kmap(page); 88 d = kmap(page);
90 89
91 while (i < rodata->batch_items) { 90 while (i < rodata->batch_items) {
92 u32 s = rodata->batch[i]; 91 u32 s = rodata->batch[i];
93 92
94 if (i * 4 == rodata->reloc[reloc_index]) { 93 if (i * 4 == rodata->reloc[reloc_index]) {
95 u64 r = s + so->ggtt_offset; 94 u64 r = s + so->vma->node.start;
96 s = lower_32_bits(r); 95 s = lower_32_bits(r);
97 if (has_64bit_reloc) { 96 if (has_64bit_reloc) {
98 if (i + 1 >= rodata->batch_items || 97 if (i + 1 >= rodata->batch_items ||
@@ -154,7 +153,7 @@ static int render_state_setup(struct render_state *so)
154 153
155 kunmap(page); 154 kunmap(page);
156 155
157 ret = i915_gem_object_set_to_gtt_domain(so->obj, false); 156 ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false);
158 if (ret) 157 if (ret)
159 return ret; 158 return ret;
160 159
@@ -175,6 +174,7 @@ err_out:
175int i915_gem_render_state_init(struct drm_i915_gem_request *req) 174int i915_gem_render_state_init(struct drm_i915_gem_request *req)
176{ 175{
177 struct render_state so; 176 struct render_state so;
177 struct drm_i915_gem_object *obj;
178 int ret; 178 int ret;
179 179
180 if (WARN_ON(req->engine->id != RCS)) 180 if (WARN_ON(req->engine->id != RCS))
@@ -187,21 +187,25 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
187 if (so.rodata->batch_items * 4 > 4096) 187 if (so.rodata->batch_items * 4 > 4096)
188 return -EINVAL; 188 return -EINVAL;
189 189
190 so.obj = i915_gem_object_create(&req->i915->drm, 4096); 190 obj = i915_gem_object_create(&req->i915->drm, 4096);
191 if (IS_ERR(so.obj)) 191 if (IS_ERR(obj))
192 return PTR_ERR(so.obj); 192 return PTR_ERR(obj);
193 193
194 ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0); 194 so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL);
195 if (ret) 195 if (IS_ERR(so.vma)) {
196 ret = PTR_ERR(so.vma);
196 goto err_obj; 197 goto err_obj;
198 }
197 199
198 so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj); 200 ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL);
201 if (ret)
202 goto err_obj;
199 203
200 ret = render_state_setup(&so); 204 ret = render_state_setup(&so);
201 if (ret) 205 if (ret)
202 goto err_unpin; 206 goto err_unpin;
203 207
204 ret = req->engine->emit_bb_start(req, so.ggtt_offset, 208 ret = req->engine->emit_bb_start(req, so.vma->node.start,
205 so.rodata->batch_items * 4, 209 so.rodata->batch_items * 4,
206 I915_DISPATCH_SECURE); 210 I915_DISPATCH_SECURE);
207 if (ret) 211 if (ret)
@@ -209,7 +213,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
209 213
210 if (so.aux_batch_size > 8) { 214 if (so.aux_batch_size > 8) {
211 ret = req->engine->emit_bb_start(req, 215 ret = req->engine->emit_bb_start(req,
212 (so.ggtt_offset + 216 (so.vma->node.start +
213 so.aux_batch_offset), 217 so.aux_batch_offset),
214 so.aux_batch_size, 218 so.aux_batch_size,
215 I915_DISPATCH_SECURE); 219 I915_DISPATCH_SECURE);
@@ -217,10 +221,10 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
217 goto err_unpin; 221 goto err_unpin;
218 } 222 }
219 223
220 i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0); 224 i915_vma_move_to_active(so.vma, req, 0);
221err_unpin: 225err_unpin:
222 i915_gem_object_ggtt_unpin(so.obj); 226 i915_vma_unpin(so.vma);
223err_obj: 227err_obj:
224 i915_gem_object_put(so.obj); 228 i915_gem_object_put(obj);
225 return ret; 229 return ret;
226} 230}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index c44fca8599bb..18cce3f06e9c 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -24,7 +24,7 @@
24#ifndef _I915_GEM_RENDER_STATE_H_ 24#ifndef _I915_GEM_RENDER_STATE_H_
25#define _I915_GEM_RENDER_STATE_H_ 25#define _I915_GEM_RENDER_STATE_H_
26 26
27#include <linux/types.h> 27struct drm_i915_gem_request;
28 28
29int i915_gem_render_state_init(struct drm_i915_gem_request *req); 29int i915_gem_render_state_init(struct drm_i915_gem_request *req);
30 30
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6a1661643d3d..1a215320cefb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -137,8 +137,6 @@ int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
137 list_add_tail(&req->client_list, &file_priv->mm.request_list); 137 list_add_tail(&req->client_list, &file_priv->mm.request_list);
138 spin_unlock(&file_priv->mm.lock); 138 spin_unlock(&file_priv->mm.lock);
139 139
140 req->pid = get_pid(task_pid(current));
141
142 return 0; 140 return 0;
143} 141}
144 142
@@ -154,9 +152,6 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
154 list_del(&request->client_list); 152 list_del(&request->client_list);
155 request->file_priv = NULL; 153 request->file_priv = NULL;
156 spin_unlock(&file_priv->mm.lock); 154 spin_unlock(&file_priv->mm.lock);
157
158 put_pid(request->pid);
159 request->pid = NULL;
160} 155}
161 156
162void i915_gem_retire_noop(struct i915_gem_active *active, 157void i915_gem_retire_noop(struct i915_gem_active *active,
@@ -355,7 +350,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
355 if (req && i915_gem_request_completed(req)) 350 if (req && i915_gem_request_completed(req))
356 i915_gem_request_retire(req); 351 i915_gem_request_retire(req);
357 352
358 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 353 /* Beware: Dragons be flying overhead.
354 *
355 * We use RCU to look up requests in flight. The lookups may
356 * race with the request being allocated from the slab freelist.
357 * That is the request we are writing to here, may be in the process
358 * of being read by __i915_gem_active_get_rcu(). As such,
359 * we have to be very careful when overwriting the contents. During
360 * the RCU lookup, we change chase the request->engine pointer,
361 * read the request->fence.seqno and increment the reference count.
362 *
363 * The reference count is incremented atomically. If it is zero,
364 * the lookup knows the request is unallocated and complete. Otherwise,
365 * it is either still in use, or has been reallocated and reset
366 * with fence_init(). This increment is safe for release as we check
367 * that the request we have a reference to and matches the active
368 * request.
369 *
370 * Before we increment the refcount, we chase the request->engine
371 * pointer. We must not call kmem_cache_zalloc() or else we set
372 * that pointer to NULL and cause a crash during the lookup. If
373 * we see the request is completed (based on the value of the
374 * old engine and seqno), the lookup is complete and reports NULL.
375 * If we decide the request is not completed (new engine or seqno),
376 * then we grab a reference and double check that it is still the
377 * active request - which it won't be and restart the lookup.
378 *
379 * Do not use kmem_cache_zalloc() here!
380 */
381 req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
359 if (!req) 382 if (!req)
360 return ERR_PTR(-ENOMEM); 383 return ERR_PTR(-ENOMEM);
361 384
@@ -375,6 +398,12 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
375 req->engine = engine; 398 req->engine = engine;
376 req->ctx = i915_gem_context_get(ctx); 399 req->ctx = i915_gem_context_get(ctx);
377 400
401 /* No zalloc, must clear what we need by hand */
402 req->previous_context = NULL;
403 req->file_priv = NULL;
404 req->batch = NULL;
405 req->elsp_submitted = 0;
406
378 /* 407 /*
379 * Reserve space in the ring buffer for all the commands required to 408 * Reserve space in the ring buffer for all the commands required to
380 * eventually emit this request. This is to guarantee that the 409 * eventually emit this request. This is to guarantee that the
@@ -391,6 +420,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
391 if (ret) 420 if (ret)
392 goto err_ctx; 421 goto err_ctx;
393 422
423 /* Record the position of the start of the request so that
424 * should we detect the updated seqno part-way through the
425 * GPU processing the request, we never over-estimate the
426 * position of the head.
427 */
428 req->head = req->ring->tail;
429
394 return req; 430 return req;
395 431
396err_ctx: 432err_ctx:
@@ -426,22 +462,14 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
426 * request is not being tracked for completion but the work itself is 462 * request is not being tracked for completion but the work itself is
427 * going to happen on the hardware. This would be a Bad Thing(tm). 463 * going to happen on the hardware. This would be a Bad Thing(tm).
428 */ 464 */
429void __i915_add_request(struct drm_i915_gem_request *request, 465void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
430 struct drm_i915_gem_object *obj,
431 bool flush_caches)
432{ 466{
433 struct intel_engine_cs *engine; 467 struct intel_engine_cs *engine = request->engine;
434 struct intel_ring *ring; 468 struct intel_ring *ring = request->ring;
435 u32 request_start; 469 u32 request_start;
436 u32 reserved_tail; 470 u32 reserved_tail;
437 int ret; 471 int ret;
438 472
439 if (WARN_ON(!request))
440 return;
441
442 engine = request->engine;
443 ring = request->ring;
444
445 /* 473 /*
446 * To ensure that this call will not fail, space for its emissions 474 * To ensure that this call will not fail, space for its emissions
447 * should already have been reserved in the ring buffer. Let the ring 475 * should already have been reserved in the ring buffer. Let the ring
@@ -467,16 +495,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
467 495
468 trace_i915_gem_request_add(request); 496 trace_i915_gem_request_add(request);
469 497
470 request->head = request_start;
471
472 /* Whilst this request exists, batch_obj will be on the
473 * active_list, and so will hold the active reference. Only when this
474 * request is retired will the the batch_obj be moved onto the
475 * inactive_list and lose its active reference. Hence we do not need
476 * to explicitly hold another reference here.
477 */
478 request->batch_obj = obj;
479
480 /* Seal the request and mark it as pending execution. Note that 498 /* Seal the request and mark it as pending execution. Note that
481 * we may inspect this state, without holding any locks, during 499 * we may inspect this state, without holding any locks, during
482 * hangcheck. Hence we apply the barrier to ensure that we do not 500 * hangcheck. Hence we apply the barrier to ensure that we do not
@@ -489,10 +507,10 @@ void __i915_add_request(struct drm_i915_gem_request *request,
489 list_add_tail(&request->link, &engine->request_list); 507 list_add_tail(&request->link, &engine->request_list);
490 list_add_tail(&request->ring_link, &ring->request_list); 508 list_add_tail(&request->ring_link, &ring->request_list);
491 509
492 /* Record the position of the start of the request so that 510 /* Record the position of the start of the breadcrumb so that
493 * should we detect the updated seqno part-way through the 511 * should we detect the updated seqno part-way through the
494 * GPU processing the request, we never over-estimate the 512 * GPU processing the request, we never over-estimate the
495 * position of the head. 513 * position of the ring's HEAD.
496 */ 514 */
497 request->postfix = ring->tail; 515 request->postfix = ring->tail;
498 516
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 3496e28785e7..6c72bd8d9423 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -51,6 +51,13 @@ struct intel_signal_node {
51 * emission time to be associated with the request for tracking how far ahead 51 * emission time to be associated with the request for tracking how far ahead
52 * of the GPU the submission is. 52 * of the GPU the submission is.
53 * 53 *
54 * When modifying this structure be very aware that we perform a lockless
55 * RCU lookup of it that may race against reallocation of the struct
56 * from the slab freelist. We intentionally do not zero the structure on
57 * allocation so that the lookup can use the dangling pointers (and is
58 * cogniscent that those pointers may be wrong). Instead, everything that
59 * needs to be initialised must be done so explicitly.
60 *
54 * The requests are reference counted. 61 * The requests are reference counted.
55 */ 62 */
56struct drm_i915_gem_request { 63struct drm_i915_gem_request {
@@ -111,7 +118,7 @@ struct drm_i915_gem_request {
111 /** Batch buffer related to this request if any (used for 118 /** Batch buffer related to this request if any (used for
112 * error state dump only). 119 * error state dump only).
113 */ 120 */
114 struct drm_i915_gem_object *batch_obj; 121 struct i915_vma *batch;
115 struct list_head active_list; 122 struct list_head active_list;
116 123
117 /** Time at which this request was emitted, in jiffies. */ 124 /** Time at which this request was emitted, in jiffies. */
@@ -127,9 +134,6 @@ struct drm_i915_gem_request {
127 /** file_priv list entry for this request */ 134 /** file_priv list entry for this request */
128 struct list_head client_list; 135 struct list_head client_list;
129 136
130 /** process identifier submitting this request */
131 struct pid *pid;
132
133 /** 137 /**
134 * The ELSP only accepts two elements at a time, so we queue 138 * The ELSP only accepts two elements at a time, so we queue
135 * context/tail pairs on a given queue (ring->execlist_queue) until the 139 * context/tail pairs on a given queue (ring->execlist_queue) until the
@@ -218,13 +222,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
218 *pdst = src; 222 *pdst = src;
219} 223}
220 224
221void __i915_add_request(struct drm_i915_gem_request *req, 225void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
222 struct drm_i915_gem_object *batch_obj,
223 bool flush_caches);
224#define i915_add_request(req) \ 226#define i915_add_request(req) \
225 __i915_add_request(req, NULL, true) 227 __i915_add_request(req, true)
226#define i915_add_request_no_flush(req) \ 228#define i915_add_request_no_flush(req) \
227 __i915_add_request(req, NULL, false) 229 __i915_add_request(req, false)
228 230
229struct intel_rps_client; 231struct intel_rps_client;
230#define NO_WAITBOOST ERR_PTR(-1) 232#define NO_WAITBOOST ERR_PTR(-1)
@@ -360,41 +362,34 @@ __i915_gem_active_peek(const struct i915_gem_active *active)
360} 362}
361 363
362/** 364/**
363 * i915_gem_active_peek - report the active request being monitored 365 * i915_gem_active_raw - return the active request
364 * @active - the active tracker 366 * @active - the active tracker
365 * 367 *
366 * i915_gem_active_peek() returns the current request being tracked if 368 * i915_gem_active_raw() returns the current request being tracked, or NULL.
367 * still active, or NULL. It does not obtain a reference on the request 369 * It does not obtain a reference on the request for the caller, so the caller
368 * for the caller, so the caller must hold struct_mutex. 370 * must hold struct_mutex.
369 */ 371 */
370static inline struct drm_i915_gem_request * 372static inline struct drm_i915_gem_request *
371i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) 373i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
372{ 374{
373 struct drm_i915_gem_request *request; 375 return rcu_dereference_protected(active->request,
374 376 lockdep_is_held(mutex));
375 request = rcu_dereference_protected(active->request,
376 lockdep_is_held(mutex));
377 if (!request || i915_gem_request_completed(request))
378 return NULL;
379
380 return request;
381} 377}
382 378
383/** 379/**
384 * i915_gem_active_peek_rcu - report the active request being monitored 380 * i915_gem_active_peek - report the active request being monitored
385 * @active - the active tracker 381 * @active - the active tracker
386 * 382 *
387 * i915_gem_active_peek_rcu() returns the current request being tracked if 383 * i915_gem_active_peek() returns the current request being tracked if
388 * still active, or NULL. It does not obtain a reference on the request 384 * still active, or NULL. It does not obtain a reference on the request
389 * for the caller, and inspection of the request is only valid under 385 * for the caller, so the caller must hold struct_mutex.
390 * the RCU lock.
391 */ 386 */
392static inline struct drm_i915_gem_request * 387static inline struct drm_i915_gem_request *
393i915_gem_active_peek_rcu(const struct i915_gem_active *active) 388i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
394{ 389{
395 struct drm_i915_gem_request *request; 390 struct drm_i915_gem_request *request;
396 391
397 request = rcu_dereference(active->request); 392 request = i915_gem_active_raw(active, mutex);
398 if (!request || i915_gem_request_completed(request)) 393 if (!request || i915_gem_request_completed(request))
399 return NULL; 394 return NULL;
400 395
@@ -465,6 +460,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
465 * just report the active tracker is idle. If the new request is 460 * just report the active tracker is idle. If the new request is
466 * incomplete, then we acquire a reference on it and check that 461 * incomplete, then we acquire a reference on it and check that
467 * it remained the active request. 462 * it remained the active request.
463 *
464 * It is then imperative that we do not zero the request on
465 * reallocation, so that we can chase the dangling pointers!
466 * See i915_gem_request_alloc().
468 */ 467 */
469 do { 468 do {
470 struct drm_i915_gem_request *request; 469 struct drm_i915_gem_request *request;
@@ -497,6 +496,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
497 * incremented) then the following read for rcu_access_pointer() 496 * incremented) then the following read for rcu_access_pointer()
498 * must occur after the atomic operation and so confirm 497 * must occur after the atomic operation and so confirm
499 * that this request is the one currently being tracked. 498 * that this request is the one currently being tracked.
499 *
500 * The corresponding write barrier is part of
501 * rcu_assign_pointer().
500 */ 502 */
501 if (!request || request == rcu_access_pointer(active->request)) 503 if (!request || request == rcu_access_pointer(active->request))
502 return rcu_pointer_handoff(request); 504 return rcu_pointer_handoff(request);
@@ -635,8 +637,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
635 struct drm_i915_gem_request *request; 637 struct drm_i915_gem_request *request;
636 int ret; 638 int ret;
637 639
638 request = rcu_dereference_protected(active->request, 640 request = i915_gem_active_raw(active, mutex);
639 lockdep_is_held(mutex));
640 if (!request) 641 if (!request)
641 return 0; 642 return 0;
642 643
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 13279610eeec..aa050fa1e558 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -115,17 +115,28 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
115 115
116 base = bsm & INTEL_BSM_MASK; 116 base = bsm & INTEL_BSM_MASK;
117 } else if (IS_I865G(dev)) { 117 } else if (IS_I865G(dev)) {
118 u32 tseg_size = 0;
118 u16 toud = 0; 119 u16 toud = 0;
120 u8 tmp;
121
122 pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0),
123 I845_ESMRAMC, &tmp);
124
125 if (tmp & TSEG_ENABLE) {
126 switch (tmp & I845_TSEG_SIZE_MASK) {
127 case I845_TSEG_SIZE_512K:
128 tseg_size = KB(512);
129 break;
130 case I845_TSEG_SIZE_1M:
131 tseg_size = MB(1);
132 break;
133 }
134 }
119 135
120 /*
121 * FIXME is the graphics stolen memory region
122 * always at TOUD? Ie. is it always the last
123 * one to be allocated by the BIOS?
124 */
125 pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0), 136 pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0),
126 I865_TOUD, &toud); 137 I865_TOUD, &toud);
127 138
128 base = toud << 16; 139 base = (toud << 16) + tseg_size;
129 } else if (IS_I85X(dev)) { 140 } else if (IS_I85X(dev)) {
130 u32 tseg_size = 0; 141 u32 tseg_size = 0;
131 u32 tom; 142 u32 tom;
@@ -685,7 +696,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
685 if (gtt_offset == I915_GTT_OFFSET_NONE) 696 if (gtt_offset == I915_GTT_OFFSET_NONE)
686 return obj; 697 return obj;
687 698
688 vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base); 699 vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
689 if (IS_ERR(vma)) { 700 if (IS_ERR(vma)) {
690 ret = PTR_ERR(vma); 701 ret = PTR_ERR(vma);
691 goto err; 702 goto err;
@@ -705,6 +716,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
705 goto err; 716 goto err;
706 } 717 }
707 718
719 vma->pages = obj->pages;
708 vma->flags |= I915_VMA_GLOBAL_BIND; 720 vma->flags |= I915_VMA_GLOBAL_BIND;
709 __i915_vma_set_map_and_fenceable(vma); 721 __i915_vma_set_map_and_fenceable(vma);
710 list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); 722 list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index f4b984de83b5..a14b1e3d4c78 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -116,37 +116,58 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
116 return true; 116 return true;
117} 117}
118 118
119/* Is the current GTT allocation valid for the change in tiling? */ 119static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
120static bool
121i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
122{ 120{
123 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 121 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
124 u32 size; 122 u32 size;
125 123
126 if (tiling_mode == I915_TILING_NONE) 124 if (!i915_vma_is_map_and_fenceable(vma))
127 return true;
128
129 if (INTEL_GEN(dev_priv) >= 4)
130 return true; 125 return true;
131 126
132 if (IS_GEN3(dev_priv)) { 127 if (INTEL_GEN(dev_priv) == 3) {
133 if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) 128 if (vma->node.start & ~I915_FENCE_START_MASK)
134 return false; 129 return false;
135 } else { 130 } else {
136 if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) 131 if (vma->node.start & ~I830_FENCE_START_MASK)
137 return false; 132 return false;
138 } 133 }
139 134
140 size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode); 135 size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
141 if (i915_gem_obj_ggtt_size(obj) != size) 136 if (vma->node.size < size)
142 return false; 137 return false;
143 138
144 if (i915_gem_obj_ggtt_offset(obj) & (size - 1)) 139 if (vma->node.start & (size - 1))
145 return false; 140 return false;
146 141
147 return true; 142 return true;
148} 143}
149 144
145/* Make the current GTT allocation valid for the change in tiling. */
146static int
147i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
148{
149 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
150 struct i915_vma *vma;
151 int ret;
152
153 if (tiling_mode == I915_TILING_NONE)
154 return 0;
155
156 if (INTEL_GEN(dev_priv) >= 4)
157 return 0;
158
159 list_for_each_entry(vma, &obj->vma_list, obj_link) {
160 if (i915_vma_fence_prepare(vma, tiling_mode))
161 continue;
162
163 ret = i915_vma_unbind(vma);
164 if (ret)
165 return ret;
166 }
167
168 return 0;
169}
170
150/** 171/**
151 * i915_gem_set_tiling - IOCTL handler to set tiling mode 172 * i915_gem_set_tiling - IOCTL handler to set tiling mode
152 * @dev: DRM device 173 * @dev: DRM device
@@ -168,7 +189,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
168 struct drm_i915_gem_set_tiling *args = data; 189 struct drm_i915_gem_set_tiling *args = data;
169 struct drm_i915_private *dev_priv = to_i915(dev); 190 struct drm_i915_private *dev_priv = to_i915(dev);
170 struct drm_i915_gem_object *obj; 191 struct drm_i915_gem_object *obj;
171 int ret = 0; 192 int err = 0;
172 193
173 /* Make sure we don't cross-contaminate obj->tiling_and_stride */ 194 /* Make sure we don't cross-contaminate obj->tiling_and_stride */
174 BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); 195 BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
@@ -187,7 +208,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
187 208
188 mutex_lock(&dev->struct_mutex); 209 mutex_lock(&dev->struct_mutex);
189 if (obj->pin_display || obj->framebuffer_references) { 210 if (obj->pin_display || obj->framebuffer_references) {
190 ret = -EBUSY; 211 err = -EBUSY;
191 goto err; 212 goto err;
192 } 213 }
193 214
@@ -234,11 +255,11 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
234 * has to also include the unfenced register the GPU uses 255 * has to also include the unfenced register the GPU uses
235 * whilst executing a fenced command for an untiled object. 256 * whilst executing a fenced command for an untiled object.
236 */ 257 */
237 if (obj->map_and_fenceable &&
238 !i915_gem_object_fence_ok(obj, args->tiling_mode))
239 ret = i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
240 258
241 if (ret == 0) { 259 err = i915_gem_object_fence_prepare(obj, args->tiling_mode);
260 if (!err) {
261 struct i915_vma *vma;
262
242 if (obj->pages && 263 if (obj->pages &&
243 obj->madv == I915_MADV_WILLNEED && 264 obj->madv == I915_MADV_WILLNEED &&
244 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 265 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -248,11 +269,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
248 i915_gem_object_pin_pages(obj); 269 i915_gem_object_pin_pages(obj);
249 } 270 }
250 271
251 obj->fence_dirty = 272 list_for_each_entry(vma, &obj->vma_list, obj_link) {
252 !i915_gem_active_is_idle(&obj->last_fence, 273 if (!vma->fence)
253 &dev->struct_mutex) || 274 continue;
254 obj->fence_reg != I915_FENCE_REG_NONE;
255 275
276 vma->fence->dirty = true;
277 }
256 obj->tiling_and_stride = 278 obj->tiling_and_stride =
257 args->stride | args->tiling_mode; 279 args->stride | args->tiling_mode;
258 280
@@ -281,7 +303,7 @@ err:
281 303
282 intel_runtime_pm_put(dev_priv); 304 intel_runtime_pm_put(dev_priv);
283 305
284 return ret; 306 return err;
285} 307}
286 308
287/** 309/**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 57218cca7e05..be54825ef3e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
542 } 542 }
543 } 543 }
544 obj->userptr.work = ERR_PTR(ret); 544 obj->userptr.work = ERR_PTR(ret);
545 if (ret)
546 __i915_gem_userptr_set_active(obj, false);
547 } 545 }
548 546
549 obj->userptr.workers--; 547 obj->userptr.workers--;
@@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
628 * to the vma (discard or cloning) which should prevent the more 626 * to the vma (discard or cloning) which should prevent the more
629 * egregious cases from causing harm. 627 * egregious cases from causing harm.
630 */ 628 */
631 if (IS_ERR(obj->userptr.work)) { 629
632 /* active flag will have been dropped already by the worker */ 630 if (obj->userptr.work) {
633 ret = PTR_ERR(obj->userptr.work);
634 obj->userptr.work = NULL;
635 return ret;
636 }
637 if (obj->userptr.work)
638 /* active flag should still be held for the pending work */ 631 /* active flag should still be held for the pending work */
639 return -EAGAIN; 632 if (IS_ERR(obj->userptr.work))
633 return PTR_ERR(obj->userptr.work);
634 else
635 return -EAGAIN;
636 }
640 637
641 /* Let the mmu-notifier know that we have begun and need cancellation */ 638 /* Let the mmu-notifier know that we have begun and need cancellation */
642 ret = __i915_gem_userptr_set_active(obj, true); 639 ret = __i915_gem_userptr_set_active(obj, true);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index eecb87063c88..41ec7a183c73 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,16 +42,6 @@ static const char *engine_str(int engine)
42 } 42 }
43} 43}
44 44
45static const char *pin_flag(int pinned)
46{
47 if (pinned > 0)
48 return " P";
49 else if (pinned < 0)
50 return " p";
51 else
52 return "";
53}
54
55static const char *tiling_flag(int tiling) 45static const char *tiling_flag(int tiling)
56{ 46{
57 switch (tiling) { 47 switch (tiling) {
@@ -189,7 +179,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
189{ 179{
190 int i; 180 int i;
191 181
192 err_printf(m, " %s [%d]:\n", name, count); 182 err_printf(m, "%s [%d]:\n", name, count);
193 183
194 while (count--) { 184 while (count--) {
195 err_printf(m, " %08x_%08x %8u %02x %02x [ ", 185 err_printf(m, " %08x_%08x %8u %02x %02x [ ",
@@ -202,7 +192,6 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
202 err_printf(m, "%02x ", err->rseqno[i]); 192 err_printf(m, "%02x ", err->rseqno[i]);
203 193
204 err_printf(m, "] %02x", err->wseqno); 194 err_printf(m, "] %02x", err->wseqno);
205 err_puts(m, pin_flag(err->pinned));
206 err_puts(m, tiling_flag(err->tiling)); 195 err_puts(m, tiling_flag(err->tiling));
207 err_puts(m, dirty_flag(err->dirty)); 196 err_puts(m, dirty_flag(err->dirty));
208 err_puts(m, purgeable_flag(err->purgeable)); 197 err_puts(m, purgeable_flag(err->purgeable));
@@ -247,14 +236,23 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
247 err_printf(m, " HEAD: 0x%08x\n", ee->head); 236 err_printf(m, " HEAD: 0x%08x\n", ee->head);
248 err_printf(m, " TAIL: 0x%08x\n", ee->tail); 237 err_printf(m, " TAIL: 0x%08x\n", ee->tail);
249 err_printf(m, " CTL: 0x%08x\n", ee->ctl); 238 err_printf(m, " CTL: 0x%08x\n", ee->ctl);
239 err_printf(m, " MODE: 0x%08x\n", ee->mode);
250 err_printf(m, " HWS: 0x%08x\n", ee->hws); 240 err_printf(m, " HWS: 0x%08x\n", ee->hws);
251 err_printf(m, " ACTHD: 0x%08x %08x\n", 241 err_printf(m, " ACTHD: 0x%08x %08x\n",
252 (u32)(ee->acthd>>32), (u32)ee->acthd); 242 (u32)(ee->acthd>>32), (u32)ee->acthd);
253 err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir); 243 err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir);
254 err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr); 244 err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr);
255 err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone); 245 err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone);
246 if (ee->batchbuffer) {
247 u64 start = ee->batchbuffer->gtt_offset;
248 u64 end = start + ee->batchbuffer->gtt_size;
249
250 err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n",
251 upper_32_bits(start), lower_32_bits(start),
252 upper_32_bits(end), lower_32_bits(end));
253 }
256 if (INTEL_GEN(m->i915) >= 4) { 254 if (INTEL_GEN(m->i915) >= 4) {
257 err_printf(m, " BBADDR: 0x%08x %08x\n", 255 err_printf(m, " BBADDR: 0x%08x_%08x\n",
258 (u32)(ee->bbaddr>>32), (u32)ee->bbaddr); 256 (u32)(ee->bbaddr>>32), (u32)ee->bbaddr);
259 err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate); 257 err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate);
260 err_printf(m, " INSTPS: 0x%08x\n", ee->instps); 258 err_printf(m, " INSTPS: 0x%08x\n", ee->instps);
@@ -323,6 +321,16 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
323 } 321 }
324} 322}
325 323
324static void err_print_capabilities(struct drm_i915_error_state_buf *m,
325 const struct intel_device_info *info)
326{
327#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x))
328#define SEP_SEMICOLON ;
329 DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_SEMICOLON);
330#undef PRINT_FLAG
331#undef SEP_SEMICOLON
332}
333
326int i915_error_state_to_str(struct drm_i915_error_state_buf *m, 334int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
327 const struct i915_error_state_file_priv *error_priv) 335 const struct i915_error_state_file_priv *error_priv)
328{ 336{
@@ -342,6 +350,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
342 err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, 350 err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
343 error->time.tv_usec); 351 error->time.tv_usec);
344 err_printf(m, "Kernel: " UTS_RELEASE "\n"); 352 err_printf(m, "Kernel: " UTS_RELEASE "\n");
353 err_print_capabilities(m, &error->device_info);
345 max_hangcheck_score = 0; 354 max_hangcheck_score = 0;
346 for (i = 0; i < ARRAY_SIZE(error->engine); i++) { 355 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
347 if (error->engine[i].hangcheck_score > max_hangcheck_score) 356 if (error->engine[i].hangcheck_score > max_hangcheck_score)
@@ -414,18 +423,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
414 error_print_engine(m, &error->engine[i]); 423 error_print_engine(m, &error->engine[i]);
415 } 424 }
416 425
417 for (i = 0; i < error->vm_count; i++) { 426 for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
418 err_printf(m, "vm[%d]\n", i); 427 char buf[128];
428 int len, first = 1;
429
430 if (!error->active_vm[i])
431 break;
432
433 len = scnprintf(buf, sizeof(buf), "Active (");
434 for (j = 0; j < ARRAY_SIZE(error->engine); j++) {
435 if (error->engine[j].vm != error->active_vm[i])
436 continue;
419 437
420 print_error_buffers(m, "Active", 438 len += scnprintf(buf + len, sizeof(buf), "%s%s",
439 first ? "" : ", ",
440 dev_priv->engine[j].name);
441 first = 0;
442 }
443 scnprintf(buf + len, sizeof(buf), ")");
444 print_error_buffers(m, buf,
421 error->active_bo[i], 445 error->active_bo[i],
422 error->active_bo_count[i]); 446 error->active_bo_count[i]);
423
424 print_error_buffers(m, "Pinned",
425 error->pinned_bo[i],
426 error->pinned_bo_count[i]);
427 } 447 }
428 448
449 print_error_buffers(m, "Pinned (global)",
450 error->pinned_bo,
451 error->pinned_bo_count);
452
429 for (i = 0; i < ARRAY_SIZE(error->engine); i++) { 453 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
430 struct drm_i915_error_engine *ee = &error->engine[i]; 454 struct drm_i915_error_engine *ee = &error->engine[i];
431 455
@@ -455,9 +479,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
455 dev_priv->engine[i].name, 479 dev_priv->engine[i].name,
456 ee->num_requests); 480 ee->num_requests);
457 for (j = 0; j < ee->num_requests; j++) { 481 for (j = 0; j < ee->num_requests; j++) {
458 err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", 482 err_printf(m, " pid %d, seqno 0x%08x, emitted %ld, head 0x%08x, tail 0x%08x\n",
483 ee->requests[j].pid,
459 ee->requests[j].seqno, 484 ee->requests[j].seqno,
460 ee->requests[j].jiffies, 485 ee->requests[j].jiffies,
486 ee->requests[j].head,
461 ee->requests[j].tail); 487 ee->requests[j].tail);
462 } 488 }
463 } 489 }
@@ -533,7 +559,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
533 } 559 }
534 } 560 }
535 561
536 if ((obj = error->semaphore_obj)) { 562 if ((obj = error->semaphore)) {
537 err_printf(m, "Semaphore page = 0x%08x\n", 563 err_printf(m, "Semaphore page = 0x%08x\n",
538 lower_32_bits(obj->gtt_offset)); 564 lower_32_bits(obj->gtt_offset));
539 for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { 565 for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
@@ -624,15 +650,12 @@ static void i915_error_state_free(struct kref *error_ref)
624 kfree(ee->waiters); 650 kfree(ee->waiters);
625 } 651 }
626 652
627 i915_error_object_free(error->semaphore_obj); 653 i915_error_object_free(error->semaphore);
628 654
629 for (i = 0; i < error->vm_count; i++) 655 for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
630 kfree(error->active_bo[i]); 656 kfree(error->active_bo[i]);
631
632 kfree(error->active_bo);
633 kfree(error->active_bo_count);
634 kfree(error->pinned_bo); 657 kfree(error->pinned_bo);
635 kfree(error->pinned_bo_count); 658
636 kfree(error->overlay); 659 kfree(error->overlay);
637 kfree(error->display); 660 kfree(error->display);
638 kfree(error); 661 kfree(error);
@@ -640,46 +663,45 @@ static void i915_error_state_free(struct kref *error_ref)
640 663
641static struct drm_i915_error_object * 664static struct drm_i915_error_object *
642i915_error_object_create(struct drm_i915_private *dev_priv, 665i915_error_object_create(struct drm_i915_private *dev_priv,
643 struct drm_i915_gem_object *src, 666 struct i915_vma *vma)
644 struct i915_address_space *vm)
645{ 667{
646 struct i915_ggtt *ggtt = &dev_priv->ggtt; 668 struct i915_ggtt *ggtt = &dev_priv->ggtt;
669 struct drm_i915_gem_object *src;
647 struct drm_i915_error_object *dst; 670 struct drm_i915_error_object *dst;
648 struct i915_vma *vma = NULL;
649 int num_pages; 671 int num_pages;
650 bool use_ggtt; 672 bool use_ggtt;
651 int i = 0; 673 int i = 0;
652 u64 reloc_offset; 674 u64 reloc_offset;
653 675
654 if (src == NULL || src->pages == NULL) 676 if (!vma)
677 return NULL;
678
679 src = vma->obj;
680 if (!src->pages)
655 return NULL; 681 return NULL;
656 682
657 num_pages = src->base.size >> PAGE_SHIFT; 683 num_pages = src->base.size >> PAGE_SHIFT;
658 684
659 dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC); 685 dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
660 if (dst == NULL) 686 if (!dst)
661 return NULL; 687 return NULL;
662 688
663 if (i915_gem_obj_bound(src, vm)) 689 dst->gtt_offset = vma->node.start;
664 dst->gtt_offset = i915_gem_obj_offset(src, vm); 690 dst->gtt_size = vma->node.size;
665 else
666 dst->gtt_offset = -1;
667 691
668 reloc_offset = dst->gtt_offset; 692 reloc_offset = dst->gtt_offset;
669 if (i915_is_ggtt(vm))
670 vma = i915_gem_obj_to_ggtt(src);
671 use_ggtt = (src->cache_level == I915_CACHE_NONE && 693 use_ggtt = (src->cache_level == I915_CACHE_NONE &&
672 vma && (vma->flags & I915_VMA_GLOBAL_BIND) && 694 (vma->flags & I915_VMA_GLOBAL_BIND) &&
673 reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); 695 reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
674 696
675 /* Cannot access stolen address directly, try to use the aperture */ 697 /* Cannot access stolen address directly, try to use the aperture */
676 if (src->stolen) { 698 if (src->stolen) {
677 use_ggtt = true; 699 use_ggtt = true;
678 700
679 if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND)) 701 if (!(vma->flags & I915_VMA_GLOBAL_BIND))
680 goto unwind; 702 goto unwind;
681 703
682 reloc_offset = i915_gem_obj_ggtt_offset(src); 704 reloc_offset = vma->node.start;
683 if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end) 705 if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
684 goto unwind; 706 goto unwind;
685 } 707 }
@@ -707,7 +729,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
707 * captures what the GPU read. 729 * captures what the GPU read.
708 */ 730 */
709 731
710 s = io_mapping_map_atomic_wc(ggtt->mappable, 732 s = io_mapping_map_atomic_wc(&ggtt->mappable,
711 reloc_offset); 733 reloc_offset);
712 memcpy_fromio(d, s, PAGE_SIZE); 734 memcpy_fromio(d, s, PAGE_SIZE);
713 io_mapping_unmap_atomic(s); 735 io_mapping_unmap_atomic(s);
@@ -739,8 +761,6 @@ unwind:
739 kfree(dst); 761 kfree(dst);
740 return NULL; 762 return NULL;
741} 763}
742#define i915_error_ggtt_object_create(dev_priv, src) \
743 i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base)
744 764
745/* The error capture is special as tries to run underneath the normal 765/* The error capture is special as tries to run underneath the normal
746 * locking rules - so we use the raw version of the i915_gem_active lookup. 766 * locking rules - so we use the raw version of the i915_gem_active lookup.
@@ -777,10 +797,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
777 err->gtt_offset = vma->node.start; 797 err->gtt_offset = vma->node.start;
778 err->read_domains = obj->base.read_domains; 798 err->read_domains = obj->base.read_domains;
779 err->write_domain = obj->base.write_domain; 799 err->write_domain = obj->base.write_domain;
780 err->fence_reg = obj->fence_reg; 800 err->fence_reg = vma->fence ? vma->fence->id : -1;
781 err->pinned = 0;
782 if (i915_gem_obj_is_pinned(obj))
783 err->pinned = 1;
784 err->tiling = i915_gem_object_get_tiling(obj); 801 err->tiling = i915_gem_object_get_tiling(obj);
785 err->dirty = obj->dirty; 802 err->dirty = obj->dirty;
786 err->purgeable = obj->madv != I915_MADV_WILLNEED; 803 err->purgeable = obj->madv != I915_MADV_WILLNEED;
@@ -788,13 +805,17 @@ static void capture_bo(struct drm_i915_error_buffer *err,
788 err->cache_level = obj->cache_level; 805 err->cache_level = obj->cache_level;
789} 806}
790 807
791static u32 capture_active_bo(struct drm_i915_error_buffer *err, 808static u32 capture_error_bo(struct drm_i915_error_buffer *err,
792 int count, struct list_head *head) 809 int count, struct list_head *head,
810 bool pinned_only)
793{ 811{
794 struct i915_vma *vma; 812 struct i915_vma *vma;
795 int i = 0; 813 int i = 0;
796 814
797 list_for_each_entry(vma, head, vm_link) { 815 list_for_each_entry(vma, head, vm_link) {
816 if (pinned_only && !i915_vma_is_pinned(vma))
817 continue;
818
798 capture_bo(err++, vma); 819 capture_bo(err++, vma);
799 if (++i == count) 820 if (++i == count)
800 break; 821 break;
@@ -803,28 +824,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
803 return i; 824 return i;
804} 825}
805 826
806static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
807 int count, struct list_head *head,
808 struct i915_address_space *vm)
809{
810 struct drm_i915_gem_object *obj;
811 struct drm_i915_error_buffer * const first = err;
812 struct drm_i915_error_buffer * const last = err + count;
813
814 list_for_each_entry(obj, head, global_list) {
815 struct i915_vma *vma;
816
817 if (err == last)
818 break;
819
820 list_for_each_entry(vma, &obj->vma_list, obj_link)
821 if (vma->vm == vm && i915_vma_is_pinned(vma))
822 capture_bo(err++, vma);
823 }
824
825 return err - first;
826}
827
828/* Generate a semi-unique error code. The code is not meant to have meaning, The 827/* Generate a semi-unique error code. The code is not meant to have meaning, The
829 * code's only purpose is to try to prevent false duplicated bug reports by 828 * code's only purpose is to try to prevent false duplicated bug reports by
830 * grossly estimating a GPU error state. 829 * grossly estimating a GPU error state.
@@ -884,7 +883,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
884 struct intel_engine_cs *to; 883 struct intel_engine_cs *to;
885 enum intel_engine_id id; 884 enum intel_engine_id id;
886 885
887 if (!error->semaphore_obj) 886 if (!error->semaphore)
888 return; 887 return;
889 888
890 for_each_engine_id(to, dev_priv, id) { 889 for_each_engine_id(to, dev_priv, id) {
@@ -897,7 +896,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
897 896
898 signal_offset = 897 signal_offset =
899 (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; 898 (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
900 tmp = error->semaphore_obj->pages[0]; 899 tmp = error->semaphore->pages[0];
901 idx = intel_engine_sync_index(engine, to); 900 idx = intel_engine_sync_index(engine, to);
902 901
903 ee->semaphore_mboxes[idx] = tmp[signal_offset]; 902 ee->semaphore_mboxes[idx] = tmp[signal_offset];
@@ -1007,6 +1006,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
1007 ee->head = I915_READ_HEAD(engine); 1006 ee->head = I915_READ_HEAD(engine);
1008 ee->tail = I915_READ_TAIL(engine); 1007 ee->tail = I915_READ_TAIL(engine);
1009 ee->ctl = I915_READ_CTL(engine); 1008 ee->ctl = I915_READ_CTL(engine);
1009 if (INTEL_GEN(dev_priv) > 2)
1010 ee->mode = I915_READ_MODE(engine);
1010 1011
1011 if (I915_NEED_GFX_HWS(dev_priv)) { 1012 if (I915_NEED_GFX_HWS(dev_priv)) {
1012 i915_reg_t mmio; 1013 i915_reg_t mmio;
@@ -1062,45 +1063,76 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
1062 } 1063 }
1063} 1064}
1064 1065
1065 1066static void engine_record_requests(struct intel_engine_cs *engine,
1066static void i915_gem_record_active_context(struct intel_engine_cs *engine, 1067 struct drm_i915_gem_request *first,
1067 struct drm_i915_error_state *error, 1068 struct drm_i915_error_engine *ee)
1068 struct drm_i915_error_engine *ee)
1069{ 1069{
1070 struct drm_i915_private *dev_priv = engine->i915; 1070 struct drm_i915_gem_request *request;
1071 struct drm_i915_gem_object *obj; 1071 int count;
1072 1072
1073 /* Currently render ring is the only HW context user */ 1073 count = 0;
1074 if (engine->id != RCS || !error->ccid) 1074 request = first;
1075 list_for_each_entry_from(request, &engine->request_list, link)
1076 count++;
1077 if (!count)
1075 return; 1078 return;
1076 1079
1077 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1080 ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
1078 if (!i915_gem_obj_ggtt_bound(obj)) 1081 if (!ee->requests)
1079 continue; 1082 return;
1083
1084 ee->num_requests = count;
1080 1085
1081 if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) { 1086 count = 0;
1082 ee->ctx = i915_error_ggtt_object_create(dev_priv, obj); 1087 request = first;
1088 list_for_each_entry_from(request, &engine->request_list, link) {
1089 struct drm_i915_error_request *erq;
1090
1091 if (count >= ee->num_requests) {
1092 /*
1093 * If the ring request list was changed in
1094 * between the point where the error request
1095 * list was created and dimensioned and this
1096 * point then just exit early to avoid crashes.
1097 *
1098 * We don't need to communicate that the
1099 * request list changed state during error
1100 * state capture and that the error state is
1101 * slightly incorrect as a consequence since we
1102 * are typically only interested in the request
1103 * list state at the point of error state
1104 * capture, not in any changes happening during
1105 * the capture.
1106 */
1083 break; 1107 break;
1084 } 1108 }
1109
1110 erq = &ee->requests[count++];
1111 erq->seqno = request->fence.seqno;
1112 erq->jiffies = request->emitted_jiffies;
1113 erq->head = request->head;
1114 erq->tail = request->tail;
1115
1116 rcu_read_lock();
1117 erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
1118 rcu_read_unlock();
1085 } 1119 }
1120 ee->num_requests = count;
1086} 1121}
1087 1122
1088static void i915_gem_record_rings(struct drm_i915_private *dev_priv, 1123static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1089 struct drm_i915_error_state *error) 1124 struct drm_i915_error_state *error)
1090{ 1125{
1091 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1126 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1092 struct drm_i915_gem_request *request; 1127 int i;
1093 int i, count;
1094 1128
1095 if (dev_priv->semaphore_obj) { 1129 error->semaphore =
1096 error->semaphore_obj = 1130 i915_error_object_create(dev_priv, dev_priv->semaphore);
1097 i915_error_ggtt_object_create(dev_priv,
1098 dev_priv->semaphore_obj);
1099 }
1100 1131
1101 for (i = 0; i < I915_NUM_ENGINES; i++) { 1132 for (i = 0; i < I915_NUM_ENGINES; i++) {
1102 struct intel_engine_cs *engine = &dev_priv->engine[i]; 1133 struct intel_engine_cs *engine = &dev_priv->engine[i];
1103 struct drm_i915_error_engine *ee = &error->engine[i]; 1134 struct drm_i915_error_engine *ee = &error->engine[i];
1135 struct drm_i915_gem_request *request;
1104 1136
1105 ee->pid = -1; 1137 ee->pid = -1;
1106 ee->engine_id = -1; 1138 ee->engine_id = -1;
@@ -1115,10 +1147,10 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1115 1147
1116 request = i915_gem_find_active_request(engine); 1148 request = i915_gem_find_active_request(engine);
1117 if (request) { 1149 if (request) {
1118 struct i915_address_space *vm;
1119 struct intel_ring *ring; 1150 struct intel_ring *ring;
1151 struct pid *pid;
1120 1152
1121 vm = request->ctx->ppgtt ? 1153 ee->vm = request->ctx->ppgtt ?
1122 &request->ctx->ppgtt->base : &ggtt->base; 1154 &request->ctx->ppgtt->base : &ggtt->base;
1123 1155
1124 /* We need to copy these to an anonymous buffer 1156 /* We need to copy these to an anonymous buffer
@@ -1127,19 +1159,23 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1127 */ 1159 */
1128 ee->batchbuffer = 1160 ee->batchbuffer =
1129 i915_error_object_create(dev_priv, 1161 i915_error_object_create(dev_priv,
1130 request->batch_obj, 1162 request->batch);
1131 vm);
1132 1163
1133 if (HAS_BROKEN_CS_TLB(dev_priv)) 1164 if (HAS_BROKEN_CS_TLB(dev_priv))
1134 ee->wa_batchbuffer = 1165 ee->wa_batchbuffer =
1135 i915_error_ggtt_object_create(dev_priv, 1166 i915_error_object_create(dev_priv,
1136 engine->scratch.obj); 1167 engine->scratch);
1168
1169 ee->ctx =
1170 i915_error_object_create(dev_priv,
1171 request->ctx->engine[i].state);
1137 1172
1138 if (request->pid) { 1173 pid = request->ctx->pid;
1174 if (pid) {
1139 struct task_struct *task; 1175 struct task_struct *task;
1140 1176
1141 rcu_read_lock(); 1177 rcu_read_lock();
1142 task = pid_task(request->pid, PIDTYPE_PID); 1178 task = pid_task(pid, PIDTYPE_PID);
1143 if (task) { 1179 if (task) {
1144 strcpy(ee->comm, task->comm); 1180 strcpy(ee->comm, task->comm);
1145 ee->pid = task->pid; 1181 ee->pid = task->pid;
@@ -1154,145 +1190,102 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1154 ee->cpu_ring_head = ring->head; 1190 ee->cpu_ring_head = ring->head;
1155 ee->cpu_ring_tail = ring->tail; 1191 ee->cpu_ring_tail = ring->tail;
1156 ee->ringbuffer = 1192 ee->ringbuffer =
1157 i915_error_ggtt_object_create(dev_priv, 1193 i915_error_object_create(dev_priv, ring->vma);
1158 ring->obj);
1159 }
1160
1161 ee->hws_page =
1162 i915_error_ggtt_object_create(dev_priv,
1163 engine->status_page.obj);
1164 1194
1165 ee->wa_ctx = i915_error_ggtt_object_create(dev_priv, 1195 engine_record_requests(engine, request, ee);
1166 engine->wa_ctx.obj);
1167
1168 i915_gem_record_active_context(engine, error, ee);
1169
1170 count = 0;
1171 list_for_each_entry(request, &engine->request_list, link)
1172 count++;
1173
1174 ee->num_requests = count;
1175 ee->requests =
1176 kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
1177 if (!ee->requests) {
1178 ee->num_requests = 0;
1179 continue;
1180 } 1196 }
1181 1197
1182 count = 0; 1198 ee->hws_page =
1183 list_for_each_entry(request, &engine->request_list, link) { 1199 i915_error_object_create(dev_priv,
1184 struct drm_i915_error_request *erq; 1200 engine->status_page.vma);
1185
1186 if (count >= ee->num_requests) {
1187 /*
1188 * If the ring request list was changed in
1189 * between the point where the error request
1190 * list was created and dimensioned and this
1191 * point then just exit early to avoid crashes.
1192 *
1193 * We don't need to communicate that the
1194 * request list changed state during error
1195 * state capture and that the error state is
1196 * slightly incorrect as a consequence since we
1197 * are typically only interested in the request
1198 * list state at the point of error state
1199 * capture, not in any changes happening during
1200 * the capture.
1201 */
1202 break;
1203 }
1204 1201
1205 erq = &ee->requests[count++]; 1202 ee->wa_ctx =
1206 erq->seqno = request->fence.seqno; 1203 i915_error_object_create(dev_priv, engine->wa_ctx.vma);
1207 erq->jiffies = request->emitted_jiffies;
1208 erq->tail = request->postfix;
1209 }
1210 } 1204 }
1211} 1205}
1212 1206
1213/* FIXME: Since pin count/bound list is global, we duplicate what we capture per
1214 * VM.
1215 */
1216static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, 1207static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
1217 struct drm_i915_error_state *error, 1208 struct drm_i915_error_state *error,
1218 struct i915_address_space *vm, 1209 struct i915_address_space *vm,
1219 const int ndx) 1210 int idx)
1220{ 1211{
1221 struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL; 1212 struct drm_i915_error_buffer *active_bo;
1222 struct drm_i915_gem_object *obj;
1223 struct i915_vma *vma; 1213 struct i915_vma *vma;
1224 int i; 1214 int count;
1225 1215
1226 i = 0; 1216 count = 0;
1227 list_for_each_entry(vma, &vm->active_list, vm_link) 1217 list_for_each_entry(vma, &vm->active_list, vm_link)
1228 i++; 1218 count++;
1229 error->active_bo_count[ndx] = i;
1230
1231 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1232 list_for_each_entry(vma, &obj->vma_list, obj_link)
1233 if (vma->vm == vm && i915_vma_is_pinned(vma))
1234 i++;
1235 }
1236 error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
1237
1238 if (i) {
1239 active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC);
1240 if (active_bo)
1241 pinned_bo = active_bo + error->active_bo_count[ndx];
1242 }
1243 1219
1220 active_bo = NULL;
1221 if (count)
1222 active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
1244 if (active_bo) 1223 if (active_bo)
1245 error->active_bo_count[ndx] = 1224 count = capture_error_bo(active_bo, count, &vm->active_list, false);
1246 capture_active_bo(active_bo, 1225 else
1247 error->active_bo_count[ndx], 1226 count = 0;
1248 &vm->active_list); 1227
1249 1228 error->active_vm[idx] = vm;
1250 if (pinned_bo) 1229 error->active_bo[idx] = active_bo;
1251 error->pinned_bo_count[ndx] = 1230 error->active_bo_count[idx] = count;
1252 capture_pinned_bo(pinned_bo,
1253 error->pinned_bo_count[ndx],
1254 &dev_priv->mm.bound_list, vm);
1255 error->active_bo[ndx] = active_bo;
1256 error->pinned_bo[ndx] = pinned_bo;
1257} 1231}
1258 1232
1259static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, 1233static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
1260 struct drm_i915_error_state *error) 1234 struct drm_i915_error_state *error)
1261{ 1235{
1262 struct i915_address_space *vm; 1236 int cnt = 0, i, j;
1263 int cnt = 0, i = 0; 1237
1264 1238 BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
1265 list_for_each_entry(vm, &dev_priv->vm_list, global_link) 1239 BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
1266 cnt++; 1240 BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
1267 1241
1268 error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC); 1242 /* Scan each engine looking for unique active contexts/vm */
1269 error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC); 1243 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
1270 error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count), 1244 struct drm_i915_error_engine *ee = &error->engine[i];
1271 GFP_ATOMIC); 1245 bool found;
1272 error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count), 1246
1273 GFP_ATOMIC); 1247 if (!ee->vm)
1274 1248 continue;
1275 if (error->active_bo == NULL ||
1276 error->pinned_bo == NULL ||
1277 error->active_bo_count == NULL ||
1278 error->pinned_bo_count == NULL) {
1279 kfree(error->active_bo);
1280 kfree(error->active_bo_count);
1281 kfree(error->pinned_bo);
1282 kfree(error->pinned_bo_count);
1283
1284 error->active_bo = NULL;
1285 error->active_bo_count = NULL;
1286 error->pinned_bo = NULL;
1287 error->pinned_bo_count = NULL;
1288 } else {
1289 list_for_each_entry(vm, &dev_priv->vm_list, global_link)
1290 i915_gem_capture_vm(dev_priv, error, vm, i++);
1291 1249
1292 error->vm_count = cnt; 1250 found = false;
1251 for (j = 0; j < i && !found; j++)
1252 found = error->engine[j].vm == ee->vm;
1253 if (!found)
1254 i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++);
1293 } 1255 }
1294} 1256}
1295 1257
1258static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
1259 struct drm_i915_error_state *error)
1260{
1261 struct i915_address_space *vm = &dev_priv->ggtt.base;
1262 struct drm_i915_error_buffer *bo;
1263 struct i915_vma *vma;
1264 int count_inactive, count_active;
1265
1266 count_inactive = 0;
1267 list_for_each_entry(vma, &vm->active_list, vm_link)
1268 count_inactive++;
1269
1270 count_active = 0;
1271 list_for_each_entry(vma, &vm->inactive_list, vm_link)
1272 count_active++;
1273
1274 bo = NULL;
1275 if (count_inactive + count_active)
1276 bo = kcalloc(count_inactive + count_active,
1277 sizeof(*bo), GFP_ATOMIC);
1278 if (!bo)
1279 return;
1280
1281 count_inactive = capture_error_bo(bo, count_inactive,
1282 &vm->active_list, true);
1283 count_active = capture_error_bo(bo + count_inactive, count_active,
1284 &vm->inactive_list, true);
1285 error->pinned_bo_count = count_inactive + count_active;
1286 error->pinned_bo = bo;
1287}
1288
1296/* Capture all registers which don't fit into another category. */ 1289/* Capture all registers which don't fit into another category. */
1297static void i915_capture_reg_state(struct drm_i915_private *dev_priv, 1290static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
1298 struct drm_i915_error_state *error) 1291 struct drm_i915_error_state *error)
@@ -1403,6 +1396,10 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
1403#endif 1396#endif
1404 error->reset_count = i915_reset_count(&dev_priv->gpu_error); 1397 error->reset_count = i915_reset_count(&dev_priv->gpu_error);
1405 error->suspend_count = dev_priv->suspend_count; 1398 error->suspend_count = dev_priv->suspend_count;
1399
1400 memcpy(&error->device_info,
1401 INTEL_INFO(dev_priv),
1402 sizeof(error->device_info));
1406} 1403}
1407 1404
1408/** 1405/**
@@ -1436,9 +1433,10 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
1436 1433
1437 i915_capture_gen_state(dev_priv, error); 1434 i915_capture_gen_state(dev_priv, error);
1438 i915_capture_reg_state(dev_priv, error); 1435 i915_capture_reg_state(dev_priv, error);
1439 i915_gem_capture_buffers(dev_priv, error);
1440 i915_gem_record_fences(dev_priv, error); 1436 i915_gem_record_fences(dev_priv, error);
1441 i915_gem_record_rings(dev_priv, error); 1437 i915_gem_record_rings(dev_priv, error);
1438 i915_capture_active_buffers(dev_priv, error);
1439 i915_capture_pinned_buffers(dev_priv, error);
1442 1440
1443 do_gettimeofday(&error->time); 1441 do_gettimeofday(&error->time);
1444 1442
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 03a5cef353eb..e4369411f07d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -183,7 +183,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc,
183 struct i915_guc_client *client, 183 struct i915_guc_client *client,
184 u16 new_id) 184 u16 new_id)
185{ 185{
186 struct sg_table *sg = guc->ctx_pool_obj->pages; 186 struct sg_table *sg = guc->ctx_pool_vma->pages;
187 void *doorbell_bitmap = guc->doorbell_bitmap; 187 void *doorbell_bitmap = guc->doorbell_bitmap;
188 struct guc_doorbell_info *doorbell; 188 struct guc_doorbell_info *doorbell;
189 struct guc_context_desc desc; 189 struct guc_context_desc desc;
@@ -325,7 +325,6 @@ static void guc_init_proc_desc(struct intel_guc *guc,
325static void guc_init_ctx_desc(struct intel_guc *guc, 325static void guc_init_ctx_desc(struct intel_guc *guc,
326 struct i915_guc_client *client) 326 struct i915_guc_client *client)
327{ 327{
328 struct drm_i915_gem_object *client_obj = client->client_obj;
329 struct drm_i915_private *dev_priv = guc_to_i915(guc); 328 struct drm_i915_private *dev_priv = guc_to_i915(guc);
330 struct intel_engine_cs *engine; 329 struct intel_engine_cs *engine;
331 struct i915_gem_context *ctx = client->owner; 330 struct i915_gem_context *ctx = client->owner;
@@ -340,10 +339,10 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
340 desc.priority = client->priority; 339 desc.priority = client->priority;
341 desc.db_id = client->doorbell_id; 340 desc.db_id = client->doorbell_id;
342 341
343 for_each_engine(engine, dev_priv) { 342 for_each_engine_masked(engine, dev_priv, client->engines) {
344 struct intel_context *ce = &ctx->engine[engine->id]; 343 struct intel_context *ce = &ctx->engine[engine->id];
345 struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; 344 uint32_t guc_engine_id = engine->guc_id;
346 struct drm_i915_gem_object *obj; 345 struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
347 346
348 /* TODO: We have a design issue to be solved here. Only when we 347 /* TODO: We have a design issue to be solved here. Only when we
349 * receive the first batch, we know which engine is used by the 348 * receive the first batch, we know which engine is used by the
@@ -358,30 +357,29 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
358 lrc->context_desc = lower_32_bits(ce->lrc_desc); 357 lrc->context_desc = lower_32_bits(ce->lrc_desc);
359 358
360 /* The state page is after PPHWSP */ 359 /* The state page is after PPHWSP */
361 gfx_addr = i915_gem_obj_ggtt_offset(ce->state); 360 lrc->ring_lcra =
362 lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; 361 i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
363 lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | 362 lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
364 (engine->guc_id << GUC_ELC_ENGINE_OFFSET); 363 (guc_engine_id << GUC_ELC_ENGINE_OFFSET);
365
366 obj = ce->ring->obj;
367 gfx_addr = i915_gem_obj_ggtt_offset(obj);
368 364
369 lrc->ring_begin = gfx_addr; 365 lrc->ring_begin = i915_ggtt_offset(ce->ring->vma);
370 lrc->ring_end = gfx_addr + obj->base.size - 1; 366 lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
371 lrc->ring_next_free_location = gfx_addr; 367 lrc->ring_next_free_location = lrc->ring_begin;
372 lrc->ring_current_tail_pointer_value = 0; 368 lrc->ring_current_tail_pointer_value = 0;
373 369
374 desc.engines_used |= (1 << engine->guc_id); 370 desc.engines_used |= (1 << guc_engine_id);
375 } 371 }
376 372
373 DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
374 client->engines, desc.engines_used);
377 WARN_ON(desc.engines_used == 0); 375 WARN_ON(desc.engines_used == 0);
378 376
379 /* 377 /*
380 * The doorbell, process descriptor, and workqueue are all parts 378 * The doorbell, process descriptor, and workqueue are all parts
381 * of the client object, which the GuC will reference via the GGTT 379 * of the client object, which the GuC will reference via the GGTT
382 */ 380 */
383 gfx_addr = i915_gem_obj_ggtt_offset(client_obj); 381 gfx_addr = i915_ggtt_offset(client->vma);
384 desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) + 382 desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
385 client->doorbell_offset; 383 client->doorbell_offset;
386 desc.db_trigger_cpu = (uintptr_t)client->client_base + 384 desc.db_trigger_cpu = (uintptr_t)client->client_base +
387 client->doorbell_offset; 385 client->doorbell_offset;
@@ -397,7 +395,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
397 desc.desc_private = (uintptr_t)client; 395 desc.desc_private = (uintptr_t)client;
398 396
399 /* Pool context is pinned already */ 397 /* Pool context is pinned already */
400 sg = guc->ctx_pool_obj->pages; 398 sg = guc->ctx_pool_vma->pages;
401 sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 399 sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
402 sizeof(desc) * client->ctx_index); 400 sizeof(desc) * client->ctx_index);
403} 401}
@@ -410,7 +408,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
410 408
411 memset(&desc, 0, sizeof(desc)); 409 memset(&desc, 0, sizeof(desc));
412 410
413 sg = guc->ctx_pool_obj->pages; 411 sg = guc->ctx_pool_vma->pages;
414 sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 412 sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
415 sizeof(desc) * client->ctx_index); 413 sizeof(desc) * client->ctx_index);
416} 414}
@@ -457,6 +455,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
457 /* wqi_len is in DWords, and does not include the one-word header */ 455 /* wqi_len is in DWords, and does not include the one-word header */
458 const size_t wqi_size = sizeof(struct guc_wq_item); 456 const size_t wqi_size = sizeof(struct guc_wq_item);
459 const u32 wqi_len = wqi_size/sizeof(u32) - 1; 457 const u32 wqi_len = wqi_size/sizeof(u32) - 1;
458 struct intel_engine_cs *engine = rq->engine;
460 struct guc_process_desc *desc; 459 struct guc_process_desc *desc;
461 struct guc_wq_item *wqi; 460 struct guc_wq_item *wqi;
462 void *base; 461 void *base;
@@ -492,18 +491,17 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
492 /* WQ starts from the page after doorbell / process_desc */ 491 /* WQ starts from the page after doorbell / process_desc */
493 wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT; 492 wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
494 wq_off &= PAGE_SIZE - 1; 493 wq_off &= PAGE_SIZE - 1;
495 base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page)); 494 base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page));
496 wqi = (struct guc_wq_item *)((char *)base + wq_off); 495 wqi = (struct guc_wq_item *)((char *)base + wq_off);
497 496
498 /* Now fill in the 4-word work queue item */ 497 /* Now fill in the 4-word work queue item */
499 wqi->header = WQ_TYPE_INORDER | 498 wqi->header = WQ_TYPE_INORDER |
500 (wqi_len << WQ_LEN_SHIFT) | 499 (wqi_len << WQ_LEN_SHIFT) |
501 (rq->engine->guc_id << WQ_TARGET_SHIFT) | 500 (engine->guc_id << WQ_TARGET_SHIFT) |
502 WQ_NO_WCFLUSH_WAIT; 501 WQ_NO_WCFLUSH_WAIT;
503 502
504 /* The GuC wants only the low-order word of the context descriptor */ 503 /* The GuC wants only the low-order word of the context descriptor */
505 wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, 504 wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
506 rq->engine);
507 505
508 wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; 506 wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
509 wqi->fence_id = rq->fence.seqno; 507 wqi->fence_id = rq->fence.seqno;
@@ -611,55 +609,48 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
611 */ 609 */
612 610
613/** 611/**
614 * gem_allocate_guc_obj() - Allocate gem object for GuC usage 612 * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
615 * @dev_priv: driver private data structure 613 * @guc: the guc
616 * @size: size of object 614 * @size: size of area to allocate (both virtual space and memory)
617 * 615 *
618 * This is a wrapper to create a gem obj. In order to use it inside GuC, the 616 * This is a wrapper to create an object for use with the GuC. In order to
619 * object needs to be pinned lifetime. Also we must pin it to gtt space other 617 * use it inside the GuC, an object needs to be pinned lifetime, so we allocate
620 * than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC. 618 * both some backing storage and a range inside the Global GTT. We must pin
619 * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that
620 * range is reserved inside GuC.
621 * 621 *
622 * Return: A drm_i915_gem_object if successful, otherwise NULL. 622 * Return: A i915_vma if successful, otherwise an ERR_PTR.
623 */ 623 */
624static struct drm_i915_gem_object * 624static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
625gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
626{ 625{
626 struct drm_i915_private *dev_priv = guc_to_i915(guc);
627 struct drm_i915_gem_object *obj; 627 struct drm_i915_gem_object *obj;
628 struct i915_vma *vma;
629 int ret;
628 630
629 obj = i915_gem_object_create(&dev_priv->drm, size); 631 obj = i915_gem_object_create(&dev_priv->drm, size);
630 if (IS_ERR(obj)) 632 if (IS_ERR(obj))
631 return NULL; 633 return ERR_CAST(obj);
632 634
633 if (i915_gem_object_get_pages(obj)) { 635 vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
634 i915_gem_object_put(obj); 636 if (IS_ERR(vma))
635 return NULL; 637 goto err;
636 }
637 638
638 if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, 639 ret = i915_vma_pin(vma, 0, PAGE_SIZE,
639 PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { 640 PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
640 i915_gem_object_put(obj); 641 if (ret) {
641 return NULL; 642 vma = ERR_PTR(ret);
643 goto err;
642 } 644 }
643 645
644 /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ 646 /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
645 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); 647 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
646 648
647 return obj; 649 return vma;
648}
649
650/**
651 * gem_release_guc_obj() - Release gem object allocated for GuC usage
652 * @obj: gem obj to be released
653 */
654static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
655{
656 if (!obj)
657 return;
658
659 if (i915_gem_obj_is_pinned(obj))
660 i915_gem_object_ggtt_unpin(obj);
661 650
651err:
662 i915_gem_object_put(obj); 652 i915_gem_object_put(obj);
653 return vma;
663} 654}
664 655
665static void 656static void
@@ -686,7 +677,7 @@ guc_client_free(struct drm_i915_private *dev_priv,
686 kunmap(kmap_to_page(client->client_base)); 677 kunmap(kmap_to_page(client->client_base));
687 } 678 }
688 679
689 gem_release_guc_obj(client->client_obj); 680 i915_vma_unpin_and_release(&client->vma);
690 681
691 if (client->ctx_index != GUC_INVALID_CTX_ID) { 682 if (client->ctx_index != GUC_INVALID_CTX_ID) {
692 guc_fini_ctx_desc(guc, client); 683 guc_fini_ctx_desc(guc, client);
@@ -696,29 +687,47 @@ guc_client_free(struct drm_i915_private *dev_priv,
696 kfree(client); 687 kfree(client);
697} 688}
698 689
690/* Check that a doorbell register is in the expected state */
691static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
692{
693 struct drm_i915_private *dev_priv = guc_to_i915(guc);
694 i915_reg_t drbreg = GEN8_DRBREGL(db_id);
695 uint32_t value = I915_READ(drbreg);
696 bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
697 bool expected = test_bit(db_id, guc->doorbell_bitmap);
698
699 if (enabled == expected)
700 return true;
701
702 DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
703 db_id, drbreg.reg, value,
704 expected ? "active" : "inactive");
705
706 return false;
707}
708
699/* 709/*
700 * Borrow the first client to set up & tear down every doorbell 710 * Borrow the first client to set up & tear down each unused doorbell
701 * in turn, to ensure that all doorbell h/w is (re)initialised. 711 * in turn, to ensure that all doorbell h/w is (re)initialised.
702 */ 712 */
703static void guc_init_doorbell_hw(struct intel_guc *guc) 713static void guc_init_doorbell_hw(struct intel_guc *guc)
704{ 714{
705 struct drm_i915_private *dev_priv = guc_to_i915(guc);
706 struct i915_guc_client *client = guc->execbuf_client; 715 struct i915_guc_client *client = guc->execbuf_client;
707 uint16_t db_id, i; 716 uint16_t db_id;
708 int err; 717 int i, err;
709 718
719 /* Save client's original doorbell selection */
710 db_id = client->doorbell_id; 720 db_id = client->doorbell_id;
711 721
712 for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { 722 for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
713 i915_reg_t drbreg = GEN8_DRBREGL(i); 723 /* Skip if doorbell is OK */
714 u32 value = I915_READ(drbreg); 724 if (guc_doorbell_check(guc, i))
725 continue;
715 726
716 err = guc_update_doorbell_id(guc, client, i); 727 err = guc_update_doorbell_id(guc, client, i);
717 728 if (err)
718 /* Report update failure or unexpectedly active doorbell */ 729 DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
719 if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED))) 730 i, err);
720 DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n",
721 i, drbreg.reg, value, err);
722 } 731 }
723 732
724 /* Restore to original value */ 733 /* Restore to original value */
@@ -727,20 +736,15 @@ static void guc_init_doorbell_hw(struct intel_guc *guc)
727 DRM_ERROR("Failed to restore doorbell to %d, err %d\n", 736 DRM_ERROR("Failed to restore doorbell to %d, err %d\n",
728 db_id, err); 737 db_id, err);
729 738
730 for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { 739 /* Read back & verify all doorbell registers */
731 i915_reg_t drbreg = GEN8_DRBREGL(i); 740 for (i = 0; i < GUC_MAX_DOORBELLS; ++i)
732 u32 value = I915_READ(drbreg); 741 (void)guc_doorbell_check(guc, i);
733
734 if (i != db_id && (value & GUC_DOORBELL_ENABLED))
735 DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n",
736 i, drbreg.reg, value);
737
738 }
739} 742}
740 743
741/** 744/**
742 * guc_client_alloc() - Allocate an i915_guc_client 745 * guc_client_alloc() - Allocate an i915_guc_client
743 * @dev_priv: driver private data structure 746 * @dev_priv: driver private data structure
747 * @engines: The set of engines to enable for this client
744 * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW 748 * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
745 * The kernel client to replace ExecList submission is created with 749 * The kernel client to replace ExecList submission is created with
746 * NORMAL priority. Priority of a client for scheduler can be HIGH, 750 * NORMAL priority. Priority of a client for scheduler can be HIGH,
@@ -752,22 +756,24 @@ static void guc_init_doorbell_hw(struct intel_guc *guc)
752 */ 756 */
753static struct i915_guc_client * 757static struct i915_guc_client *
754guc_client_alloc(struct drm_i915_private *dev_priv, 758guc_client_alloc(struct drm_i915_private *dev_priv,
759 uint32_t engines,
755 uint32_t priority, 760 uint32_t priority,
756 struct i915_gem_context *ctx) 761 struct i915_gem_context *ctx)
757{ 762{
758 struct i915_guc_client *client; 763 struct i915_guc_client *client;
759 struct intel_guc *guc = &dev_priv->guc; 764 struct intel_guc *guc = &dev_priv->guc;
760 struct drm_i915_gem_object *obj; 765 struct i915_vma *vma;
761 uint16_t db_id; 766 uint16_t db_id;
762 767
763 client = kzalloc(sizeof(*client), GFP_KERNEL); 768 client = kzalloc(sizeof(*client), GFP_KERNEL);
764 if (!client) 769 if (!client)
765 return NULL; 770 return NULL;
766 771
767 client->doorbell_id = GUC_INVALID_DOORBELL_ID;
768 client->priority = priority;
769 client->owner = ctx; 772 client->owner = ctx;
770 client->guc = guc; 773 client->guc = guc;
774 client->engines = engines;
775 client->priority = priority;
776 client->doorbell_id = GUC_INVALID_DOORBELL_ID;
771 777
772 client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, 778 client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
773 GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); 779 GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
@@ -777,13 +783,13 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
777 } 783 }
778 784
779 /* The first page is doorbell/proc_desc. Two followed pages are wq. */ 785 /* The first page is doorbell/proc_desc. Two followed pages are wq. */
780 obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE); 786 vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
781 if (!obj) 787 if (IS_ERR(vma))
782 goto err; 788 goto err;
783 789
784 /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ 790 /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
785 client->client_obj = obj; 791 client->vma = vma;
786 client->client_base = kmap(i915_gem_object_get_page(obj, 0)); 792 client->client_base = kmap(i915_vma_first_page(vma));
787 client->wq_offset = GUC_DB_SIZE; 793 client->wq_offset = GUC_DB_SIZE;
788 client->wq_size = GUC_WQ_SIZE; 794 client->wq_size = GUC_WQ_SIZE;
789 795
@@ -809,8 +815,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
809 if (guc_init_doorbell(guc, client, db_id)) 815 if (guc_init_doorbell(guc, client, db_id))
810 goto err; 816 goto err;
811 817
812 DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n", 818 DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
813 priority, client, client->ctx_index); 819 priority, client, client->engines, client->ctx_index);
814 DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", 820 DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
815 client->doorbell_id, client->doorbell_offset); 821 client->doorbell_id, client->doorbell_offset);
816 822
@@ -825,8 +831,7 @@ err:
825 831
826static void guc_create_log(struct intel_guc *guc) 832static void guc_create_log(struct intel_guc *guc)
827{ 833{
828 struct drm_i915_private *dev_priv = guc_to_i915(guc); 834 struct i915_vma *vma;
829 struct drm_i915_gem_object *obj;
830 unsigned long offset; 835 unsigned long offset;
831 uint32_t size, flags; 836 uint32_t size, flags;
832 837
@@ -842,16 +847,16 @@ static void guc_create_log(struct intel_guc *guc)
842 GUC_LOG_ISR_PAGES + 1 + 847 GUC_LOG_ISR_PAGES + 1 +
843 GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; 848 GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
844 849
845 obj = guc->log_obj; 850 vma = guc->log_vma;
846 if (!obj) { 851 if (!vma) {
847 obj = gem_allocate_guc_obj(dev_priv, size); 852 vma = guc_allocate_vma(guc, size);
848 if (!obj) { 853 if (IS_ERR(vma)) {
849 /* logging will be off */ 854 /* logging will be off */
850 i915.guc_log_level = -1; 855 i915.guc_log_level = -1;
851 return; 856 return;
852 } 857 }
853 858
854 guc->log_obj = obj; 859 guc->log_vma = vma;
855 } 860 }
856 861
857 /* each allocated unit is a page */ 862 /* each allocated unit is a page */
@@ -860,7 +865,7 @@ static void guc_create_log(struct intel_guc *guc)
860 (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | 865 (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
861 (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); 866 (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
862 867
863 offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */ 868 offset = i915_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
864 guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; 869 guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
865} 870}
866 871
@@ -889,7 +894,7 @@ static void init_guc_policies(struct guc_policies *policies)
889static void guc_create_ads(struct intel_guc *guc) 894static void guc_create_ads(struct intel_guc *guc)
890{ 895{
891 struct drm_i915_private *dev_priv = guc_to_i915(guc); 896 struct drm_i915_private *dev_priv = guc_to_i915(guc);
892 struct drm_i915_gem_object *obj; 897 struct i915_vma *vma;
893 struct guc_ads *ads; 898 struct guc_ads *ads;
894 struct guc_policies *policies; 899 struct guc_policies *policies;
895 struct guc_mmio_reg_state *reg_state; 900 struct guc_mmio_reg_state *reg_state;
@@ -902,16 +907,16 @@ static void guc_create_ads(struct intel_guc *guc)
902 sizeof(struct guc_mmio_reg_state) + 907 sizeof(struct guc_mmio_reg_state) +
903 GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE; 908 GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
904 909
905 obj = guc->ads_obj; 910 vma = guc->ads_vma;
906 if (!obj) { 911 if (!vma) {
907 obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size)); 912 vma = guc_allocate_vma(guc, PAGE_ALIGN(size));
908 if (!obj) 913 if (IS_ERR(vma))
909 return; 914 return;
910 915
911 guc->ads_obj = obj; 916 guc->ads_vma = vma;
912 } 917 }
913 918
914 page = i915_gem_object_get_page(obj, 0); 919 page = i915_vma_first_page(vma);
915 ads = kmap(page); 920 ads = kmap(page);
916 921
917 /* 922 /*
@@ -922,7 +927,7 @@ static void guc_create_ads(struct intel_guc *guc)
922 * to find it. 927 * to find it.
923 */ 928 */
924 engine = &dev_priv->engine[RCS]; 929 engine = &dev_priv->engine[RCS];
925 ads->golden_context_lrca = engine->status_page.gfx_addr; 930 ads->golden_context_lrca = engine->status_page.ggtt_offset;
926 931
927 for_each_engine(engine, dev_priv) 932 for_each_engine(engine, dev_priv)
928 ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); 933 ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine);
@@ -931,8 +936,8 @@ static void guc_create_ads(struct intel_guc *guc)
931 policies = (void *)ads + sizeof(struct guc_ads); 936 policies = (void *)ads + sizeof(struct guc_ads);
932 init_guc_policies(policies); 937 init_guc_policies(policies);
933 938
934 ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) + 939 ads->scheduler_policies =
935 sizeof(struct guc_ads); 940 i915_ggtt_offset(vma) + sizeof(struct guc_ads);
936 941
937 /* MMIO reg state */ 942 /* MMIO reg state */
938 reg_state = (void *)policies + sizeof(struct guc_policies); 943 reg_state = (void *)policies + sizeof(struct guc_policies);
@@ -960,10 +965,9 @@ static void guc_create_ads(struct intel_guc *guc)
960 */ 965 */
961int i915_guc_submission_init(struct drm_i915_private *dev_priv) 966int i915_guc_submission_init(struct drm_i915_private *dev_priv)
962{ 967{
963 const size_t ctxsize = sizeof(struct guc_context_desc);
964 const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
965 const size_t gemsize = round_up(poolsize, PAGE_SIZE);
966 struct intel_guc *guc = &dev_priv->guc; 968 struct intel_guc *guc = &dev_priv->guc;
969 struct i915_vma *vma;
970 u32 size;
967 971
968 /* Wipe bitmap & delete client in case of reinitialisation */ 972 /* Wipe bitmap & delete client in case of reinitialisation */
969 bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); 973 bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
@@ -972,13 +976,15 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
972 if (!i915.enable_guc_submission) 976 if (!i915.enable_guc_submission)
973 return 0; /* not enabled */ 977 return 0; /* not enabled */
974 978
975 if (guc->ctx_pool_obj) 979 if (guc->ctx_pool_vma)
976 return 0; /* already allocated */ 980 return 0; /* already allocated */
977 981
978 guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize); 982 size = PAGE_ALIGN(GUC_MAX_GPU_CONTEXTS*sizeof(struct guc_context_desc));
979 if (!guc->ctx_pool_obj) 983 vma = guc_allocate_vma(guc, size);
980 return -ENOMEM; 984 if (IS_ERR(vma))
985 return PTR_ERR(vma);
981 986
987 guc->ctx_pool_vma = vma;
982 ida_init(&guc->ctx_ids); 988 ida_init(&guc->ctx_ids);
983 guc_create_log(guc); 989 guc_create_log(guc);
984 guc_create_ads(guc); 990 guc_create_ads(guc);
@@ -994,6 +1000,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
994 1000
995 /* client for execbuf submission */ 1001 /* client for execbuf submission */
996 client = guc_client_alloc(dev_priv, 1002 client = guc_client_alloc(dev_priv,
1003 INTEL_INFO(dev_priv)->ring_mask,
997 GUC_CTX_PRIORITY_KMD_NORMAL, 1004 GUC_CTX_PRIORITY_KMD_NORMAL,
998 dev_priv->kernel_context); 1005 dev_priv->kernel_context);
999 if (!client) { 1006 if (!client) {
@@ -1030,16 +1037,12 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
1030{ 1037{
1031 struct intel_guc *guc = &dev_priv->guc; 1038 struct intel_guc *guc = &dev_priv->guc;
1032 1039
1033 gem_release_guc_obj(dev_priv->guc.ads_obj); 1040 i915_vma_unpin_and_release(&guc->ads_vma);
1034 guc->ads_obj = NULL; 1041 i915_vma_unpin_and_release(&guc->log_vma);
1035
1036 gem_release_guc_obj(dev_priv->guc.log_obj);
1037 guc->log_obj = NULL;
1038 1042
1039 if (guc->ctx_pool_obj) 1043 if (guc->ctx_pool_vma)
1040 ida_destroy(&guc->ctx_ids); 1044 ida_destroy(&guc->ctx_ids);
1041 gem_release_guc_obj(guc->ctx_pool_obj); 1045 i915_vma_unpin_and_release(&guc->ctx_pool_vma);
1042 guc->ctx_pool_obj = NULL;
1043} 1046}
1044 1047
1045/** 1048/**
@@ -1062,7 +1065,7 @@ int intel_guc_suspend(struct drm_device *dev)
1062 /* any value greater than GUC_POWER_D0 */ 1065 /* any value greater than GUC_POWER_D0 */
1063 data[1] = GUC_POWER_D1; 1066 data[1] = GUC_POWER_D1;
1064 /* first page is shared data with GuC */ 1067 /* first page is shared data with GuC */
1065 data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); 1068 data[2] = i915_ggtt_offset(ctx->engine[RCS].state);
1066 1069
1067 return host2guc_action(guc, data, ARRAY_SIZE(data)); 1070 return host2guc_action(guc, data, ARRAY_SIZE(data));
1068} 1071}
@@ -1087,7 +1090,7 @@ int intel_guc_resume(struct drm_device *dev)
1087 data[0] = HOST2GUC_ACTION_EXIT_S_STATE; 1090 data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
1088 data[1] = GUC_POWER_D0; 1091 data[1] = GUC_POWER_D0;
1089 /* first page is shared data with GuC */ 1092 /* first page is shared data with GuC */
1090 data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); 1093 data[2] = i915_ggtt_offset(ctx->engine[RCS].state);
1091 1094
1092 return host2guc_action(guc, data, ARRAY_SIZE(data)); 1095 return host2guc_action(guc, data, ARRAY_SIZE(data));
1093} 1096}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 591f452ece68..ebb83d5a448b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -972,10 +972,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
972static void notify_ring(struct intel_engine_cs *engine) 972static void notify_ring(struct intel_engine_cs *engine)
973{ 973{
974 smp_store_mb(engine->breadcrumbs.irq_posted, true); 974 smp_store_mb(engine->breadcrumbs.irq_posted, true);
975 if (intel_engine_wakeup(engine)) { 975 if (intel_engine_wakeup(engine))
976 trace_i915_gem_request_notify(engine); 976 trace_i915_gem_request_notify(engine);
977 engine->breadcrumbs.irq_wakeups++;
978 }
979} 977}
980 978
981static void vlv_c0_read(struct drm_i915_private *dev_priv, 979static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -3044,22 +3042,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
3044 return HANGCHECK_HUNG; 3042 return HANGCHECK_HUNG;
3045} 3043}
3046 3044
3047static unsigned long kick_waiters(struct intel_engine_cs *engine)
3048{
3049 struct drm_i915_private *i915 = engine->i915;
3050 unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
3051
3052 if (engine->hangcheck.user_interrupts == irq_count &&
3053 !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
3054 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
3055 DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
3056 engine->name);
3057
3058 intel_engine_enable_fake_irq(engine);
3059 }
3060
3061 return irq_count;
3062}
3063/* 3045/*
3064 * This is called when the chip hasn't reported back with completed 3046 * This is called when the chip hasn't reported back with completed
3065 * batchbuffers in a long time. We keep track per ring seqno progress and 3047 * batchbuffers in a long time. We keep track per ring seqno progress and
@@ -3097,7 +3079,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
3097 bool busy = intel_engine_has_waiter(engine); 3079 bool busy = intel_engine_has_waiter(engine);
3098 u64 acthd; 3080 u64 acthd;
3099 u32 seqno; 3081 u32 seqno;
3100 unsigned user_interrupts;
3101 3082
3102 semaphore_clear_deadlocks(dev_priv); 3083 semaphore_clear_deadlocks(dev_priv);
3103 3084
@@ -3114,15 +3095,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
3114 acthd = intel_engine_get_active_head(engine); 3095 acthd = intel_engine_get_active_head(engine);
3115 seqno = intel_engine_get_seqno(engine); 3096 seqno = intel_engine_get_seqno(engine);
3116 3097
3117 /* Reset stuck interrupts between batch advances */
3118 user_interrupts = 0;
3119
3120 if (engine->hangcheck.seqno == seqno) { 3098 if (engine->hangcheck.seqno == seqno) {
3121 if (!intel_engine_is_active(engine)) { 3099 if (!intel_engine_is_active(engine)) {
3122 engine->hangcheck.action = HANGCHECK_IDLE; 3100 engine->hangcheck.action = HANGCHECK_IDLE;
3123 if (busy) { 3101 if (busy) {
3124 /* Safeguard against driver failure */ 3102 /* Safeguard against driver failure */
3125 user_interrupts = kick_waiters(engine);
3126 engine->hangcheck.score += BUSY; 3103 engine->hangcheck.score += BUSY;
3127 } 3104 }
3128 } else { 3105 } else {
@@ -3185,7 +3162,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
3185 3162
3186 engine->hangcheck.seqno = seqno; 3163 engine->hangcheck.seqno = seqno;
3187 engine->hangcheck.acthd = acthd; 3164 engine->hangcheck.acthd = acthd;
3188 engine->hangcheck.user_interrupts = user_interrupts;
3189 busy_count += busy; 3165 busy_count += busy;
3190 } 3166 }
3191 3167
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
new file mode 100644
index 000000000000..49a079494b68
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -0,0 +1,101 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/kernel.h>
26#include <asm/fpu/api.h>
27
28#include "i915_drv.h"
29
30static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
31
32#ifdef CONFIG_AS_MOVNTDQA
33static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
34{
35 kernel_fpu_begin();
36
37 len >>= 4;
38 while (len >= 4) {
39 asm("movntdqa (%0), %%xmm0\n"
40 "movntdqa 16(%0), %%xmm1\n"
41 "movntdqa 32(%0), %%xmm2\n"
42 "movntdqa 48(%0), %%xmm3\n"
43 "movaps %%xmm0, (%1)\n"
44 "movaps %%xmm1, 16(%1)\n"
45 "movaps %%xmm2, 32(%1)\n"
46 "movaps %%xmm3, 48(%1)\n"
47 :: "r" (src), "r" (dst) : "memory");
48 src += 64;
49 dst += 64;
50 len -= 4;
51 }
52 while (len--) {
53 asm("movntdqa (%0), %%xmm0\n"
54 "movaps %%xmm0, (%1)\n"
55 :: "r" (src), "r" (dst) : "memory");
56 src += 16;
57 dst += 16;
58 }
59
60 kernel_fpu_end();
61}
62#endif
63
64/**
65 * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
66 * @dst: destination pointer
67 * @src: source pointer
68 * @len: how many bytes to copy
69 *
70 * i915_memcpy_from_wc copies @len bytes from @src to @dst using
71 * non-temporal instructions where available. Note that all arguments
72 * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
73 * of 16.
74 *
75 * To test whether accelerated reads from WC are supported, use
76 * i915_memcpy_from_wc(NULL, NULL, 0);
77 *
78 * Returns true if the copy was successful, false if the preconditions
79 * are not met.
80 */
81bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
82{
83 if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
84 return false;
85
86#ifdef CONFIG_AS_MOVNTDQA
87 if (static_branch_likely(&has_movntdqa)) {
88 if (likely(len))
89 __memcpy_ntdqa(dst, src, len);
90 return true;
91 }
92#endif
93
94 return false;
95}
96
97void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
98{
99 if (static_cpu_has(X86_FEATURE_XMM4_1))
100 static_branch_enable(&has_movntdqa);
101}
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
new file mode 100644
index 000000000000..e4935dd1fd37
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -0,0 +1,84 @@
1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/mm.h>
26#include <linux/io-mapping.h>
27
28#include <asm/pgtable.h>
29
30#include "i915_drv.h"
31
32struct remap_pfn {
33 struct mm_struct *mm;
34 unsigned long pfn;
35 pgprot_t prot;
36};
37
38static int remap_pfn(pte_t *pte, pgtable_t token,
39 unsigned long addr, void *data)
40{
41 struct remap_pfn *r = data;
42
43 /* Special PTE are not associated with any struct page */
44 set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot)));
45 r->pfn++;
46
47 return 0;
48}
49
50/**
51 * remap_io_mapping - remap an IO mapping to userspace
52 * @vma: user vma to map to
53 * @addr: target user address to start at
54 * @pfn: physical address of kernel memory
55 * @size: size of map area
56 * @iomap: the source io_mapping
57 *
58 * Note: this is only safe if the mm semaphore is held when called.
59 */
60int remap_io_mapping(struct vm_area_struct *vma,
61 unsigned long addr, unsigned long pfn, unsigned long size,
62 struct io_mapping *iomap)
63{
64 struct remap_pfn r;
65 int err;
66
67 GEM_BUG_ON((vma->vm_flags &
68 (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)) !=
69 (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP));
70
71 /* We rely on prevalidation of the io-mapping to skip track_pfn(). */
72 r.mm = vma->vm_mm;
73 r.pfn = pfn;
74 r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
75 (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
76
77 err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r);
78 if (unlikely(err)) {
79 zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
80 return err;
81 }
82
83 return 0;
84}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index b6e404c91eed..768ad89d9cd4 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -45,6 +45,7 @@ struct i915_params i915 __read_mostly = {
45 .fastboot = 0, 45 .fastboot = 0,
46 .prefault_disable = 0, 46 .prefault_disable = 0,
47 .load_detect_test = 0, 47 .load_detect_test = 0,
48 .force_reset_modeset_test = 0,
48 .reset = true, 49 .reset = true,
49 .invert_brightness = 0, 50 .invert_brightness = 0,
50 .disable_display = 0, 51 .disable_display = 0,
@@ -161,6 +162,11 @@ MODULE_PARM_DESC(load_detect_test,
161 "Force-enable the VGA load detect code for testing (default:false). " 162 "Force-enable the VGA load detect code for testing (default:false). "
162 "For developers only."); 163 "For developers only.");
163 164
165module_param_named_unsafe(force_reset_modeset_test, i915.force_reset_modeset_test, bool, 0600);
166MODULE_PARM_DESC(force_reset_modeset_test,
167 "Force a modeset during gpu reset for testing (default:false). "
168 "For developers only.");
169
164module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600); 170module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600);
165MODULE_PARM_DESC(invert_brightness, 171MODULE_PARM_DESC(invert_brightness,
166 "Invert backlight brightness " 172 "Invert backlight brightness "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 0ad020b4a925..3a0dd78ddb38 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -57,6 +57,7 @@ struct i915_params {
57 bool fastboot; 57 bool fastboot;
58 bool prefault_disable; 58 bool prefault_disable;
59 bool load_detect_test; 59 bool load_detect_test;
60 bool force_reset_modeset_test;
60 bool reset; 61 bool reset;
61 bool disable_display; 62 bool disable_display;
62 bool verbose_state_checks; 63 bool verbose_state_checks;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f38a5e20bbee..d4adf2806c50 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3660,8 +3660,17 @@ enum {
3660#define VIDEO_DIP_ENABLE_SPD_HSW (1 << 0) 3660#define VIDEO_DIP_ENABLE_SPD_HSW (1 << 0)
3661 3661
3662/* Panel power sequencing */ 3662/* Panel power sequencing */
3663#define PP_STATUS _MMIO(0x61200) 3663#define PPS_BASE 0x61200
3664#define PP_ON (1 << 31) 3664#define VLV_PPS_BASE (VLV_DISPLAY_BASE + PPS_BASE)
3665#define PCH_PPS_BASE 0xC7200
3666
3667#define _MMIO_PPS(pps_idx, reg) _MMIO(dev_priv->pps_mmio_base - \
3668 PPS_BASE + (reg) + \
3669 (pps_idx) * 0x100)
3670
3671#define _PP_STATUS 0x61200
3672#define PP_STATUS(pps_idx) _MMIO_PPS(pps_idx, _PP_STATUS)
3673#define PP_ON (1 << 31)
3665/* 3674/*
3666 * Indicates that all dependencies of the panel are on: 3675 * Indicates that all dependencies of the panel are on:
3667 * 3676 *
@@ -3669,14 +3678,14 @@ enum {
3669 * - pipe enabled 3678 * - pipe enabled
3670 * - LVDS/DVOB/DVOC on 3679 * - LVDS/DVOB/DVOC on
3671 */ 3680 */
3672#define PP_READY (1 << 30) 3681#define PP_READY (1 << 30)
3673#define PP_SEQUENCE_NONE (0 << 28) 3682#define PP_SEQUENCE_NONE (0 << 28)
3674#define PP_SEQUENCE_POWER_UP (1 << 28) 3683#define PP_SEQUENCE_POWER_UP (1 << 28)
3675#define PP_SEQUENCE_POWER_DOWN (2 << 28) 3684#define PP_SEQUENCE_POWER_DOWN (2 << 28)
3676#define PP_SEQUENCE_MASK (3 << 28) 3685#define PP_SEQUENCE_MASK (3 << 28)
3677#define PP_SEQUENCE_SHIFT 28 3686#define PP_SEQUENCE_SHIFT 28
3678#define PP_CYCLE_DELAY_ACTIVE (1 << 27) 3687#define PP_CYCLE_DELAY_ACTIVE (1 << 27)
3679#define PP_SEQUENCE_STATE_MASK 0x0000000f 3688#define PP_SEQUENCE_STATE_MASK 0x0000000f
3680#define PP_SEQUENCE_STATE_OFF_IDLE (0x0 << 0) 3689#define PP_SEQUENCE_STATE_OFF_IDLE (0x0 << 0)
3681#define PP_SEQUENCE_STATE_OFF_S0_1 (0x1 << 0) 3690#define PP_SEQUENCE_STATE_OFF_S0_1 (0x1 << 0)
3682#define PP_SEQUENCE_STATE_OFF_S0_2 (0x2 << 0) 3691#define PP_SEQUENCE_STATE_OFF_S0_2 (0x2 << 0)
@@ -3686,11 +3695,46 @@ enum {
3686#define PP_SEQUENCE_STATE_ON_S1_2 (0xa << 0) 3695#define PP_SEQUENCE_STATE_ON_S1_2 (0xa << 0)
3687#define PP_SEQUENCE_STATE_ON_S1_3 (0xb << 0) 3696#define PP_SEQUENCE_STATE_ON_S1_3 (0xb << 0)
3688#define PP_SEQUENCE_STATE_RESET (0xf << 0) 3697#define PP_SEQUENCE_STATE_RESET (0xf << 0)
3689#define PP_CONTROL _MMIO(0x61204) 3698
3690#define POWER_TARGET_ON (1 << 0) 3699#define _PP_CONTROL 0x61204
3691#define PP_ON_DELAYS _MMIO(0x61208) 3700#define PP_CONTROL(pps_idx) _MMIO_PPS(pps_idx, _PP_CONTROL)
3692#define PP_OFF_DELAYS _MMIO(0x6120c) 3701#define PANEL_UNLOCK_REGS (0xabcd << 16)
3693#define PP_DIVISOR _MMIO(0x61210) 3702#define PANEL_UNLOCK_MASK (0xffff << 16)
3703#define BXT_POWER_CYCLE_DELAY_MASK 0x1f0
3704#define BXT_POWER_CYCLE_DELAY_SHIFT 4
3705#define EDP_FORCE_VDD (1 << 3)
3706#define EDP_BLC_ENABLE (1 << 2)
3707#define PANEL_POWER_RESET (1 << 1)
3708#define PANEL_POWER_OFF (0 << 0)
3709#define PANEL_POWER_ON (1 << 0)
3710
3711#define _PP_ON_DELAYS 0x61208
3712#define PP_ON_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_ON_DELAYS)
3713#define PANEL_PORT_SELECT_SHIFT 30
3714#define PANEL_PORT_SELECT_MASK (3 << 30)
3715#define PANEL_PORT_SELECT_LVDS (0 << 30)
3716#define PANEL_PORT_SELECT_DPA (1 << 30)
3717#define PANEL_PORT_SELECT_DPC (2 << 30)
3718#define PANEL_PORT_SELECT_DPD (3 << 30)
3719#define PANEL_PORT_SELECT_VLV(port) ((port) << 30)
3720#define PANEL_POWER_UP_DELAY_MASK 0x1fff0000
3721#define PANEL_POWER_UP_DELAY_SHIFT 16
3722#define PANEL_LIGHT_ON_DELAY_MASK 0x1fff
3723#define PANEL_LIGHT_ON_DELAY_SHIFT 0
3724
3725#define _PP_OFF_DELAYS 0x6120C
3726#define PP_OFF_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_OFF_DELAYS)
3727#define PANEL_POWER_DOWN_DELAY_MASK 0x1fff0000
3728#define PANEL_POWER_DOWN_DELAY_SHIFT 16
3729#define PANEL_LIGHT_OFF_DELAY_MASK 0x1fff
3730#define PANEL_LIGHT_OFF_DELAY_SHIFT 0
3731
3732#define _PP_DIVISOR 0x61210
3733#define PP_DIVISOR(pps_idx) _MMIO_PPS(pps_idx, _PP_DIVISOR)
3734#define PP_REFERENCE_DIVIDER_MASK 0xffffff00
3735#define PP_REFERENCE_DIVIDER_SHIFT 8
3736#define PANEL_POWER_CYCLE_DELAY_MASK 0x1f
3737#define PANEL_POWER_CYCLE_DELAY_SHIFT 0
3694 3738
3695/* Panel fitting */ 3739/* Panel fitting */
3696#define PFIT_CONTROL _MMIO(dev_priv->info.display_mmio_offset + 0x61230) 3740#define PFIT_CONTROL _MMIO(dev_priv->info.display_mmio_offset + 0x61230)
@@ -6750,77 +6794,6 @@ enum {
6750#define PCH_LVDS _MMIO(0xe1180) 6794#define PCH_LVDS _MMIO(0xe1180)
6751#define LVDS_DETECTED (1 << 1) 6795#define LVDS_DETECTED (1 << 1)
6752 6796
6753/* vlv has 2 sets of panel control regs. */
6754#define _PIPEA_PP_STATUS (VLV_DISPLAY_BASE + 0x61200)
6755#define _PIPEA_PP_CONTROL (VLV_DISPLAY_BASE + 0x61204)
6756#define _PIPEA_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61208)
6757#define PANEL_PORT_SELECT_VLV(port) ((port) << 30)
6758#define _PIPEA_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6120c)
6759#define _PIPEA_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61210)
6760
6761#define _PIPEB_PP_STATUS (VLV_DISPLAY_BASE + 0x61300)
6762#define _PIPEB_PP_CONTROL (VLV_DISPLAY_BASE + 0x61304)
6763#define _PIPEB_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61308)
6764#define _PIPEB_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6130c)
6765#define _PIPEB_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61310)
6766
6767#define VLV_PIPE_PP_STATUS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_STATUS, _PIPEB_PP_STATUS)
6768#define VLV_PIPE_PP_CONTROL(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_CONTROL, _PIPEB_PP_CONTROL)
6769#define VLV_PIPE_PP_ON_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_ON_DELAYS, _PIPEB_PP_ON_DELAYS)
6770#define VLV_PIPE_PP_OFF_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_OFF_DELAYS, _PIPEB_PP_OFF_DELAYS)
6771#define VLV_PIPE_PP_DIVISOR(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_DIVISOR, _PIPEB_PP_DIVISOR)
6772
6773#define _PCH_PP_STATUS 0xc7200
6774#define _PCH_PP_CONTROL 0xc7204
6775#define PANEL_UNLOCK_REGS (0xabcd << 16)
6776#define PANEL_UNLOCK_MASK (0xffff << 16)
6777#define BXT_POWER_CYCLE_DELAY_MASK (0x1f0)
6778#define BXT_POWER_CYCLE_DELAY_SHIFT 4
6779#define EDP_FORCE_VDD (1 << 3)
6780#define EDP_BLC_ENABLE (1 << 2)
6781#define PANEL_POWER_RESET (1 << 1)
6782#define PANEL_POWER_OFF (0 << 0)
6783#define PANEL_POWER_ON (1 << 0)
6784#define _PCH_PP_ON_DELAYS 0xc7208
6785#define PANEL_PORT_SELECT_MASK (3 << 30)
6786#define PANEL_PORT_SELECT_LVDS (0 << 30)
6787#define PANEL_PORT_SELECT_DPA (1 << 30)
6788#define PANEL_PORT_SELECT_DPC (2 << 30)
6789#define PANEL_PORT_SELECT_DPD (3 << 30)
6790#define PANEL_POWER_UP_DELAY_MASK (0x1fff0000)
6791#define PANEL_POWER_UP_DELAY_SHIFT 16
6792#define PANEL_LIGHT_ON_DELAY_MASK (0x1fff)
6793#define PANEL_LIGHT_ON_DELAY_SHIFT 0
6794
6795#define _PCH_PP_OFF_DELAYS 0xc720c
6796#define PANEL_POWER_DOWN_DELAY_MASK (0x1fff0000)
6797#define PANEL_POWER_DOWN_DELAY_SHIFT 16
6798#define PANEL_LIGHT_OFF_DELAY_MASK (0x1fff)
6799#define PANEL_LIGHT_OFF_DELAY_SHIFT 0
6800
6801#define _PCH_PP_DIVISOR 0xc7210
6802#define PP_REFERENCE_DIVIDER_MASK (0xffffff00)
6803#define PP_REFERENCE_DIVIDER_SHIFT 8
6804#define PANEL_POWER_CYCLE_DELAY_MASK (0x1f)
6805#define PANEL_POWER_CYCLE_DELAY_SHIFT 0
6806
6807#define PCH_PP_STATUS _MMIO(_PCH_PP_STATUS)
6808#define PCH_PP_CONTROL _MMIO(_PCH_PP_CONTROL)
6809#define PCH_PP_ON_DELAYS _MMIO(_PCH_PP_ON_DELAYS)
6810#define PCH_PP_OFF_DELAYS _MMIO(_PCH_PP_OFF_DELAYS)
6811#define PCH_PP_DIVISOR _MMIO(_PCH_PP_DIVISOR)
6812
6813/* BXT PPS changes - 2nd set of PPS registers */
6814#define _BXT_PP_STATUS2 0xc7300
6815#define _BXT_PP_CONTROL2 0xc7304
6816#define _BXT_PP_ON_DELAYS2 0xc7308
6817#define _BXT_PP_OFF_DELAYS2 0xc730c
6818
6819#define BXT_PP_STATUS(n) _MMIO_PIPE(n, _PCH_PP_STATUS, _BXT_PP_STATUS2)
6820#define BXT_PP_CONTROL(n) _MMIO_PIPE(n, _PCH_PP_CONTROL, _BXT_PP_CONTROL2)
6821#define BXT_PP_ON_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2)
6822#define BXT_PP_OFF_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2)
6823
6824#define _PCH_DP_B 0xe4100 6797#define _PCH_DP_B 0xe4100
6825#define PCH_DP_B _MMIO(_PCH_DP_B) 6798#define PCH_DP_B _MMIO(_PCH_DP_B)
6826#define _PCH_DPB_AUX_CH_CTL 0xe4110 6799#define _PCH_DPB_AUX_CH_CTL 0xe4110
@@ -7063,12 +7036,13 @@ enum {
7063#define GEN6_RP_UP_THRESHOLD _MMIO(0xA02C) 7036#define GEN6_RP_UP_THRESHOLD _MMIO(0xA02C)
7064#define GEN6_RP_DOWN_THRESHOLD _MMIO(0xA030) 7037#define GEN6_RP_DOWN_THRESHOLD _MMIO(0xA030)
7065#define GEN6_RP_CUR_UP_EI _MMIO(0xA050) 7038#define GEN6_RP_CUR_UP_EI _MMIO(0xA050)
7066#define GEN6_CURICONT_MASK 0xffffff 7039#define GEN6_RP_EI_MASK 0xffffff
7040#define GEN6_CURICONT_MASK GEN6_RP_EI_MASK
7067#define GEN6_RP_CUR_UP _MMIO(0xA054) 7041#define GEN6_RP_CUR_UP _MMIO(0xA054)
7068#define GEN6_CURBSYTAVG_MASK 0xffffff 7042#define GEN6_CURBSYTAVG_MASK GEN6_RP_EI_MASK
7069#define GEN6_RP_PREV_UP _MMIO(0xA058) 7043#define GEN6_RP_PREV_UP _MMIO(0xA058)
7070#define GEN6_RP_CUR_DOWN_EI _MMIO(0xA05C) 7044#define GEN6_RP_CUR_DOWN_EI _MMIO(0xA05C)
7071#define GEN6_CURIAVG_MASK 0xffffff 7045#define GEN6_CURIAVG_MASK GEN6_RP_EI_MASK
7072#define GEN6_RP_CUR_DOWN _MMIO(0xA060) 7046#define GEN6_RP_CUR_DOWN _MMIO(0xA060)
7073#define GEN6_RP_PREV_DOWN _MMIO(0xA064) 7047#define GEN6_RP_PREV_DOWN _MMIO(0xA064)
7074#define GEN6_RP_UP_EI _MMIO(0xA068) 7048#define GEN6_RP_UP_EI _MMIO(0xA068)
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 5cfe4c7716b4..4f272777b4f4 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -37,25 +37,6 @@ static void i915_save_display(struct drm_device *dev)
37 if (INTEL_INFO(dev)->gen <= 4) 37 if (INTEL_INFO(dev)->gen <= 4)
38 dev_priv->regfile.saveDSPARB = I915_READ(DSPARB); 38 dev_priv->regfile.saveDSPARB = I915_READ(DSPARB);
39 39
40 /* LVDS state */
41 if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
42 dev_priv->regfile.saveLVDS = I915_READ(PCH_LVDS);
43 else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev))
44 dev_priv->regfile.saveLVDS = I915_READ(LVDS);
45
46 /* Panel power sequencer */
47 if (HAS_PCH_SPLIT(dev)) {
48 dev_priv->regfile.savePP_CONTROL = I915_READ(PCH_PP_CONTROL);
49 dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS);
50 dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS);
51 dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR);
52 } else if (INTEL_INFO(dev)->gen <= 4) {
53 dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL);
54 dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS);
55 dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS);
56 dev_priv->regfile.savePP_DIVISOR = I915_READ(PP_DIVISOR);
57 }
58
59 /* save FBC interval */ 40 /* save FBC interval */
60 if (HAS_FBC(dev) && INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev)) 41 if (HAS_FBC(dev) && INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev))
61 dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL); 42 dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL);
@@ -64,33 +45,11 @@ static void i915_save_display(struct drm_device *dev)
64static void i915_restore_display(struct drm_device *dev) 45static void i915_restore_display(struct drm_device *dev)
65{ 46{
66 struct drm_i915_private *dev_priv = to_i915(dev); 47 struct drm_i915_private *dev_priv = to_i915(dev);
67 u32 mask = 0xffffffff;
68 48
69 /* Display arbitration */ 49 /* Display arbitration */
70 if (INTEL_INFO(dev)->gen <= 4) 50 if (INTEL_INFO(dev)->gen <= 4)
71 I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB); 51 I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB);
72 52
73 mask = ~LVDS_PORT_EN;
74
75 /* LVDS state */
76 if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
77 I915_WRITE(PCH_LVDS, dev_priv->regfile.saveLVDS & mask);
78 else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev))
79 I915_WRITE(LVDS, dev_priv->regfile.saveLVDS & mask);
80
81 /* Panel power sequencer */
82 if (HAS_PCH_SPLIT(dev)) {
83 I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
84 I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
85 I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
86 I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
87 } else if (INTEL_INFO(dev)->gen <= 4) {
88 I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
89 I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
90 I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
91 I915_WRITE(PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
92 }
93
94 /* only restore FBC info on the platform that supports FBC*/ 53 /* only restore FBC info on the platform that supports FBC*/
95 intel_fbc_global_disable(dev_priv); 54 intel_fbc_global_disable(dev_priv);
96 55
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 90867446f1a5..2491e4c1eaf0 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -26,6 +26,40 @@
26 26
27#include "i915_drv.h" 27#include "i915_drv.h"
28 28
29static void intel_breadcrumbs_hangcheck(unsigned long data)
30{
31 struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
32 struct intel_breadcrumbs *b = &engine->breadcrumbs;
33
34 if (!b->irq_enabled)
35 return;
36
37 if (time_before(jiffies, b->timeout)) {
38 mod_timer(&b->hangcheck, b->timeout);
39 return;
40 }
41
42 DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
43 set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
44 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
45
46 /* Ensure that even if the GPU hangs, we get woken up.
47 *
48 * However, note that if no one is waiting, we never notice
49 * a gpu hang. Eventually, we will have to wait for a resource
50 * held by the GPU and so trigger a hangcheck. In the most
51 * pathological case, this will be upon memory starvation! To
52 * prevent this, we also queue the hangcheck from the retire
53 * worker.
54 */
55 i915_queue_hangcheck(engine->i915);
56}
57
58static unsigned long wait_timeout(void)
59{
60 return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
61}
62
29static void intel_breadcrumbs_fake_irq(unsigned long data) 63static void intel_breadcrumbs_fake_irq(unsigned long data)
30{ 64{
31 struct intel_engine_cs *engine = (struct intel_engine_cs *)data; 65 struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -37,10 +71,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
37 * every jiffie in order to kick the oldest waiter to do the 71 * every jiffie in order to kick the oldest waiter to do the
38 * coherent seqno check. 72 * coherent seqno check.
39 */ 73 */
40 rcu_read_lock();
41 if (intel_engine_wakeup(engine)) 74 if (intel_engine_wakeup(engine))
42 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); 75 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
43 rcu_read_unlock();
44} 76}
45 77
46static void irq_enable(struct intel_engine_cs *engine) 78static void irq_enable(struct intel_engine_cs *engine)
@@ -51,13 +83,6 @@ static void irq_enable(struct intel_engine_cs *engine)
51 */ 83 */
52 engine->breadcrumbs.irq_posted = true; 84 engine->breadcrumbs.irq_posted = true;
53 85
54 /* Make sure the current hangcheck doesn't falsely accuse a just
55 * started irq handler from missing an interrupt (because the
56 * interrupt count still matches the stale value from when
57 * the irq handler was disabled, many hangchecks ago).
58 */
59 engine->breadcrumbs.irq_wakeups++;
60
61 spin_lock_irq(&engine->i915->irq_lock); 86 spin_lock_irq(&engine->i915->irq_lock);
62 engine->irq_enable(engine); 87 engine->irq_enable(engine);
63 spin_unlock_irq(&engine->i915->irq_lock); 88 spin_unlock_irq(&engine->i915->irq_lock);
@@ -98,17 +123,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
98 } 123 }
99 124
100 if (!b->irq_enabled || 125 if (!b->irq_enabled ||
101 test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) 126 test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
102 mod_timer(&b->fake_irq, jiffies + 1); 127 mod_timer(&b->fake_irq, jiffies + 1);
103 128 } else {
104 /* Ensure that even if the GPU hangs, we get woken up. 129 /* Ensure we never sleep indefinitely */
105 * 130 GEM_BUG_ON(!time_after(b->timeout, jiffies));
106 * However, note that if no one is waiting, we never notice 131 mod_timer(&b->hangcheck, b->timeout);
107 * a gpu hang. Eventually, we will have to wait for a resource 132 }
108 * held by the GPU and so trigger a hangcheck. In the most
109 * pathological case, this will be upon memory starvation!
110 */
111 i915_queue_hangcheck(i915);
112} 133}
113 134
114static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) 135static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
@@ -211,7 +232,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
211 } 232 }
212 rb_link_node(&wait->node, parent, p); 233 rb_link_node(&wait->node, parent, p);
213 rb_insert_color(&wait->node, &b->waiters); 234 rb_insert_color(&wait->node, &b->waiters);
214 GEM_BUG_ON(!first && !b->irq_seqno_bh); 235 GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
215 236
216 if (completed) { 237 if (completed) {
217 struct rb_node *next = rb_next(completed); 238 struct rb_node *next = rb_next(completed);
@@ -219,8 +240,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
219 GEM_BUG_ON(!next && !first); 240 GEM_BUG_ON(!next && !first);
220 if (next && next != &wait->node) { 241 if (next && next != &wait->node) {
221 GEM_BUG_ON(first); 242 GEM_BUG_ON(first);
243 b->timeout = wait_timeout();
222 b->first_wait = to_wait(next); 244 b->first_wait = to_wait(next);
223 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); 245 rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
224 /* As there is a delay between reading the current 246 /* As there is a delay between reading the current
225 * seqno, processing the completed tasks and selecting 247 * seqno, processing the completed tasks and selecting
226 * the next waiter, we may have missed the interrupt 248 * the next waiter, we may have missed the interrupt
@@ -245,8 +267,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
245 267
246 if (first) { 268 if (first) {
247 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); 269 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
270 b->timeout = wait_timeout();
248 b->first_wait = wait; 271 b->first_wait = wait;
249 smp_store_mb(b->irq_seqno_bh, wait->tsk); 272 rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
250 /* After assigning ourselves as the new bottom-half, we must 273 /* After assigning ourselves as the new bottom-half, we must
251 * perform a cursory check to prevent a missed interrupt. 274 * perform a cursory check to prevent a missed interrupt.
252 * Either we miss the interrupt whilst programming the hardware, 275 * Either we miss the interrupt whilst programming the hardware,
@@ -257,7 +280,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
257 */ 280 */
258 __intel_breadcrumbs_enable_irq(b); 281 __intel_breadcrumbs_enable_irq(b);
259 } 282 }
260 GEM_BUG_ON(!b->irq_seqno_bh); 283 GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
261 GEM_BUG_ON(!b->first_wait); 284 GEM_BUG_ON(!b->first_wait);
262 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); 285 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
263 286
@@ -277,11 +300,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
277 return first; 300 return first;
278} 301}
279 302
280void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
281{
282 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
283}
284
285static inline bool chain_wakeup(struct rb_node *rb, int priority) 303static inline bool chain_wakeup(struct rb_node *rb, int priority)
286{ 304{
287 return rb && to_wait(rb)->tsk->prio <= priority; 305 return rb && to_wait(rb)->tsk->prio <= priority;
@@ -317,7 +335,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
317 const int priority = wakeup_priority(b, wait->tsk); 335 const int priority = wakeup_priority(b, wait->tsk);
318 struct rb_node *next; 336 struct rb_node *next;
319 337
320 GEM_BUG_ON(b->irq_seqno_bh != wait->tsk); 338 GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
321 339
322 /* We are the current bottom-half. Find the next candidate, 340 /* We are the current bottom-half. Find the next candidate,
323 * the first waiter in the queue on the remaining oldest 341 * the first waiter in the queue on the remaining oldest
@@ -359,14 +377,15 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
359 * the interrupt, or if we have to handle an 377 * the interrupt, or if we have to handle an
360 * exception rather than a seqno completion. 378 * exception rather than a seqno completion.
361 */ 379 */
380 b->timeout = wait_timeout();
362 b->first_wait = to_wait(next); 381 b->first_wait = to_wait(next);
363 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); 382 rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
364 if (b->first_wait->seqno != wait->seqno) 383 if (b->first_wait->seqno != wait->seqno)
365 __intel_breadcrumbs_enable_irq(b); 384 __intel_breadcrumbs_enable_irq(b);
366 wake_up_process(b->irq_seqno_bh); 385 wake_up_process(b->first_wait->tsk);
367 } else { 386 } else {
368 b->first_wait = NULL; 387 b->first_wait = NULL;
369 WRITE_ONCE(b->irq_seqno_bh, NULL); 388 rcu_assign_pointer(b->irq_seqno_bh, NULL);
370 __intel_breadcrumbs_disable_irq(b); 389 __intel_breadcrumbs_disable_irq(b);
371 } 390 }
372 } else { 391 } else {
@@ -380,7 +399,7 @@ out_unlock:
380 GEM_BUG_ON(b->first_wait == wait); 399 GEM_BUG_ON(b->first_wait == wait);
381 GEM_BUG_ON(rb_first(&b->waiters) != 400 GEM_BUG_ON(rb_first(&b->waiters) !=
382 (b->first_wait ? &b->first_wait->node : NULL)); 401 (b->first_wait ? &b->first_wait->node : NULL));
383 GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters)); 402 GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
384 spin_unlock(&b->lock); 403 spin_unlock(&b->lock);
385} 404}
386 405
@@ -536,6 +555,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
536 setup_timer(&b->fake_irq, 555 setup_timer(&b->fake_irq,
537 intel_breadcrumbs_fake_irq, 556 intel_breadcrumbs_fake_irq,
538 (unsigned long)engine); 557 (unsigned long)engine);
558 setup_timer(&b->hangcheck,
559 intel_breadcrumbs_hangcheck,
560 (unsigned long)engine);
539 561
540 /* Spawn a thread to provide a common bottom-half for all signals. 562 /* Spawn a thread to provide a common bottom-half for all signals.
541 * As this is an asynchronous interface we cannot steal the current 563 * As this is an asynchronous interface we cannot steal the current
@@ -560,6 +582,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
560 if (!IS_ERR_OR_NULL(b->signaler)) 582 if (!IS_ERR_OR_NULL(b->signaler))
561 kthread_stop(b->signaler); 583 kthread_stop(b->signaler);
562 584
585 del_timer_sync(&b->hangcheck);
563 del_timer_sync(&b->fake_irq); 586 del_timer_sync(&b->fake_irq);
564} 587}
565 588
@@ -573,11 +596,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915)
573 * RCU lock, i.e. as we call wake_up_process() we must be holding the 596 * RCU lock, i.e. as we call wake_up_process() we must be holding the
574 * rcu_read_lock(). 597 * rcu_read_lock().
575 */ 598 */
576 rcu_read_lock();
577 for_each_engine(engine, i915) 599 for_each_engine(engine, i915)
578 if (unlikely(intel_engine_wakeup(engine))) 600 if (unlikely(intel_engine_wakeup(engine)))
579 mask |= intel_engine_flag(engine); 601 mask |= intel_engine_flag(engine);
580 rcu_read_unlock();
581 602
582 return mask; 603 return mask;
583} 604}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 54bbb9c70a75..d224f64836c5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1202,8 +1202,8 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv,
1202 if (HAS_PCH_SPLIT(dev)) { 1202 if (HAS_PCH_SPLIT(dev)) {
1203 u32 port_sel; 1203 u32 port_sel;
1204 1204
1205 pp_reg = PCH_PP_CONTROL; 1205 pp_reg = PP_CONTROL(0);
1206 port_sel = I915_READ(PCH_PP_ON_DELAYS) & PANEL_PORT_SELECT_MASK; 1206 port_sel = I915_READ(PP_ON_DELAYS(0)) & PANEL_PORT_SELECT_MASK;
1207 1207
1208 if (port_sel == PANEL_PORT_SELECT_LVDS && 1208 if (port_sel == PANEL_PORT_SELECT_LVDS &&
1209 I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT) 1209 I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT)
@@ -1211,10 +1211,10 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv,
1211 /* XXX: else fix for eDP */ 1211 /* XXX: else fix for eDP */
1212 } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { 1212 } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
1213 /* presumably write lock depends on pipe, not port select */ 1213 /* presumably write lock depends on pipe, not port select */
1214 pp_reg = VLV_PIPE_PP_CONTROL(pipe); 1214 pp_reg = PP_CONTROL(pipe);
1215 panel_pipe = pipe; 1215 panel_pipe = pipe;
1216 } else { 1216 } else {
1217 pp_reg = PP_CONTROL; 1217 pp_reg = PP_CONTROL(0);
1218 if (I915_READ(LVDS) & LVDS_PIPEB_SELECT) 1218 if (I915_READ(LVDS) & LVDS_PIPEB_SELECT)
1219 panel_pipe = PIPE_B; 1219 panel_pipe = PIPE_B;
1220 } 1220 }
@@ -1959,12 +1959,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
1959 * a plane. On ILK+ the pipe PLLs are integrated, so we don't 1959 * a plane. On ILK+ the pipe PLLs are integrated, so we don't
1960 * need the check. 1960 * need the check.
1961 */ 1961 */
1962 if (HAS_GMCH_DISPLAY(dev_priv)) 1962 if (HAS_GMCH_DISPLAY(dev_priv)) {
1963 if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI)) 1963 if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI))
1964 assert_dsi_pll_enabled(dev_priv); 1964 assert_dsi_pll_enabled(dev_priv);
1965 else 1965 else
1966 assert_pll_enabled(dev_priv, pipe); 1966 assert_pll_enabled(dev_priv, pipe);
1967 else { 1967 } else {
1968 if (crtc->config->has_pch_encoder) { 1968 if (crtc->config->has_pch_encoder) {
1969 /* if driving the PCH, we need FDI enabled */ 1969 /* if driving the PCH, we need FDI enabled */
1970 assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder); 1970 assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder);
@@ -2147,33 +2147,6 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
2147 } 2147 }
2148} 2148}
2149 2149
2150static void
2151intel_fill_fb_info(struct drm_i915_private *dev_priv,
2152 struct drm_framebuffer *fb)
2153{
2154 struct intel_rotation_info *info = &to_intel_framebuffer(fb)->rot_info;
2155 unsigned int tile_size, tile_width, tile_height, cpp;
2156
2157 tile_size = intel_tile_size(dev_priv);
2158
2159 cpp = drm_format_plane_cpp(fb->pixel_format, 0);
2160 intel_tile_dims(dev_priv, &tile_width, &tile_height,
2161 fb->modifier[0], cpp);
2162
2163 info->plane[0].width = DIV_ROUND_UP(fb->pitches[0], tile_width * cpp);
2164 info->plane[0].height = DIV_ROUND_UP(fb->height, tile_height);
2165
2166 if (info->pixel_format == DRM_FORMAT_NV12) {
2167 cpp = drm_format_plane_cpp(fb->pixel_format, 1);
2168 intel_tile_dims(dev_priv, &tile_width, &tile_height,
2169 fb->modifier[1], cpp);
2170
2171 info->uv_offset = fb->offsets[1];
2172 info->plane[1].width = DIV_ROUND_UP(fb->pitches[1], tile_width * cpp);
2173 info->plane[1].height = DIV_ROUND_UP(fb->height / 2, tile_height);
2174 }
2175}
2176
2177static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv) 2150static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
2178{ 2151{
2179 if (INTEL_INFO(dev_priv)->gen >= 9) 2152 if (INTEL_INFO(dev_priv)->gen >= 9)
@@ -2206,16 +2179,15 @@ static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv
2206 } 2179 }
2207} 2180}
2208 2181
2209int 2182struct i915_vma *
2210intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, 2183intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
2211 unsigned int rotation)
2212{ 2184{
2213 struct drm_device *dev = fb->dev; 2185 struct drm_device *dev = fb->dev;
2214 struct drm_i915_private *dev_priv = to_i915(dev); 2186 struct drm_i915_private *dev_priv = to_i915(dev);
2215 struct drm_i915_gem_object *obj = intel_fb_obj(fb); 2187 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
2216 struct i915_ggtt_view view; 2188 struct i915_ggtt_view view;
2189 struct i915_vma *vma;
2217 u32 alignment; 2190 u32 alignment;
2218 int ret;
2219 2191
2220 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 2192 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
2221 2193
@@ -2240,75 +2212,112 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
2240 */ 2212 */
2241 intel_runtime_pm_get(dev_priv); 2213 intel_runtime_pm_get(dev_priv);
2242 2214
2243 ret = i915_gem_object_pin_to_display_plane(obj, alignment, 2215 vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
2244 &view); 2216 if (IS_ERR(vma))
2245 if (ret) 2217 goto err;
2246 goto err_pm;
2247
2248 /* Install a fence for tiled scan-out. Pre-i965 always needs a
2249 * fence, whereas 965+ only requires a fence if using
2250 * framebuffer compression. For simplicity, we always install
2251 * a fence as the cost is not that onerous.
2252 */
2253 if (view.type == I915_GGTT_VIEW_NORMAL) {
2254 ret = i915_gem_object_get_fence(obj);
2255 if (ret == -EDEADLK) {
2256 /*
2257 * -EDEADLK means there are no free fences
2258 * no pending flips.
2259 *
2260 * This is propagated to atomic, but it uses
2261 * -EDEADLK to force a locking recovery, so
2262 * change the returned error to -EBUSY.
2263 */
2264 ret = -EBUSY;
2265 goto err_unpin;
2266 } else if (ret)
2267 goto err_unpin;
2268 2218
2269 i915_gem_object_pin_fence(obj); 2219 if (i915_vma_is_map_and_fenceable(vma)) {
2220 /* Install a fence for tiled scan-out. Pre-i965 always needs a
2221 * fence, whereas 965+ only requires a fence if using
2222 * framebuffer compression. For simplicity, we always, when
2223 * possible, install a fence as the cost is not that onerous.
2224 *
2225 * If we fail to fence the tiled scanout, then either the
2226 * modeset will reject the change (which is highly unlikely as
2227 * the affected systems, all but one, do not have unmappable
2228 * space) or we will not be able to enable full powersaving
2229 * techniques (also likely not to apply due to various limits
2230 * FBC and the like impose on the size of the buffer, which
2231 * presumably we violated anyway with this unmappable buffer).
2232 * Anyway, it is presumably better to stumble onwards with
2233 * something and try to run the system in a "less than optimal"
2234 * mode that matches the user configuration.
2235 */
2236 if (i915_vma_get_fence(vma) == 0)
2237 i915_vma_pin_fence(vma);
2270 } 2238 }
2271 2239
2240err:
2272 intel_runtime_pm_put(dev_priv); 2241 intel_runtime_pm_put(dev_priv);
2273 return 0; 2242 return vma;
2274
2275err_unpin:
2276 i915_gem_object_unpin_from_display_plane(obj, &view);
2277err_pm:
2278 intel_runtime_pm_put(dev_priv);
2279 return ret;
2280} 2243}
2281 2244
2282void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) 2245void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
2283{ 2246{
2284 struct drm_i915_gem_object *obj = intel_fb_obj(fb); 2247 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
2285 struct i915_ggtt_view view; 2248 struct i915_ggtt_view view;
2249 struct i915_vma *vma;
2286 2250
2287 WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex)); 2251 WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
2288 2252
2289 intel_fill_fb_ggtt_view(&view, fb, rotation); 2253 intel_fill_fb_ggtt_view(&view, fb, rotation);
2254 vma = i915_gem_object_to_ggtt(obj, &view);
2290 2255
2291 if (view.type == I915_GGTT_VIEW_NORMAL) 2256 i915_vma_unpin_fence(vma);
2292 i915_gem_object_unpin_fence(obj); 2257 i915_gem_object_unpin_from_display_plane(vma);
2258}
2293 2259
2294 i915_gem_object_unpin_from_display_plane(obj, &view); 2260static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane,
2261 unsigned int rotation)
2262{
2263 if (intel_rotation_90_or_270(rotation))
2264 return to_intel_framebuffer(fb)->rotated[plane].pitch;
2265 else
2266 return fb->pitches[plane];
2267}
2268
2269/*
2270 * Convert the x/y offsets into a linear offset.
2271 * Only valid with 0/180 degree rotation, which is fine since linear
2272 * offset is only used with linear buffers on pre-hsw and tiled buffers
2273 * with gen2/3, and 90/270 degree rotations isn't supported on any of them.
2274 */
2275u32 intel_fb_xy_to_linear(int x, int y,
2276 const struct intel_plane_state *state,
2277 int plane)
2278{
2279 const struct drm_framebuffer *fb = state->base.fb;
2280 unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
2281 unsigned int pitch = fb->pitches[plane];
2282
2283 return y * pitch + x * cpp;
2284}
2285
2286/*
2287 * Add the x/y offsets derived from fb->offsets[] to the user
2288 * specified plane src x/y offsets. The resulting x/y offsets
2289 * specify the start of scanout from the beginning of the gtt mapping.
2290 */
2291void intel_add_fb_offsets(int *x, int *y,
2292 const struct intel_plane_state *state,
2293 int plane)
2294
2295{
2296 const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb);
2297 unsigned int rotation = state->base.rotation;
2298
2299 if (intel_rotation_90_or_270(rotation)) {
2300 *x += intel_fb->rotated[plane].x;
2301 *y += intel_fb->rotated[plane].y;
2302 } else {
2303 *x += intel_fb->normal[plane].x;
2304 *y += intel_fb->normal[plane].y;
2305 }
2295} 2306}
2296 2307
2297/* 2308/*
2298 * Adjust the tile offset by moving the difference into
2299 * the x/y offsets.
2300 *
2301 * Input tile dimensions and pitch must already be 2309 * Input tile dimensions and pitch must already be
2302 * rotated to match x and y, and in pixel units. 2310 * rotated to match x and y, and in pixel units.
2303 */ 2311 */
2304static u32 intel_adjust_tile_offset(int *x, int *y, 2312static u32 _intel_adjust_tile_offset(int *x, int *y,
2305 unsigned int tile_width, 2313 unsigned int tile_width,
2306 unsigned int tile_height, 2314 unsigned int tile_height,
2307 unsigned int tile_size, 2315 unsigned int tile_size,
2308 unsigned int pitch_tiles, 2316 unsigned int pitch_tiles,
2309 u32 old_offset, 2317 u32 old_offset,
2310 u32 new_offset) 2318 u32 new_offset)
2311{ 2319{
2320 unsigned int pitch_pixels = pitch_tiles * tile_width;
2312 unsigned int tiles; 2321 unsigned int tiles;
2313 2322
2314 WARN_ON(old_offset & (tile_size - 1)); 2323 WARN_ON(old_offset & (tile_size - 1));
@@ -2320,6 +2329,54 @@ static u32 intel_adjust_tile_offset(int *x, int *y,
2320 *y += tiles / pitch_tiles * tile_height; 2329 *y += tiles / pitch_tiles * tile_height;
2321 *x += tiles % pitch_tiles * tile_width; 2330 *x += tiles % pitch_tiles * tile_width;
2322 2331
2332 /* minimize x in case it got needlessly big */
2333 *y += *x / pitch_pixels * tile_height;
2334 *x %= pitch_pixels;
2335
2336 return new_offset;
2337}
2338
2339/*
2340 * Adjust the tile offset by moving the difference into
2341 * the x/y offsets.
2342 */
2343static u32 intel_adjust_tile_offset(int *x, int *y,
2344 const struct intel_plane_state *state, int plane,
2345 u32 old_offset, u32 new_offset)
2346{
2347 const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
2348 const struct drm_framebuffer *fb = state->base.fb;
2349 unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
2350 unsigned int rotation = state->base.rotation;
2351 unsigned int pitch = intel_fb_pitch(fb, plane, rotation);
2352
2353 WARN_ON(new_offset > old_offset);
2354
2355 if (fb->modifier[plane] != DRM_FORMAT_MOD_NONE) {
2356 unsigned int tile_size, tile_width, tile_height;
2357 unsigned int pitch_tiles;
2358
2359 tile_size = intel_tile_size(dev_priv);
2360 intel_tile_dims(dev_priv, &tile_width, &tile_height,
2361 fb->modifier[plane], cpp);
2362
2363 if (intel_rotation_90_or_270(rotation)) {
2364 pitch_tiles = pitch / tile_height;
2365 swap(tile_width, tile_height);
2366 } else {
2367 pitch_tiles = pitch / (tile_width * cpp);
2368 }
2369
2370 _intel_adjust_tile_offset(x, y, tile_width, tile_height,
2371 tile_size, pitch_tiles,
2372 old_offset, new_offset);
2373 } else {
2374 old_offset += *y * pitch + *x * cpp;
2375
2376 *y = (old_offset - new_offset) / pitch;
2377 *x = ((old_offset - new_offset) - *y * pitch) / cpp;
2378 }
2379
2323 return new_offset; 2380 return new_offset;
2324} 2381}
2325 2382
@@ -2330,18 +2387,24 @@ static u32 intel_adjust_tile_offset(int *x, int *y,
2330 * In the 90/270 rotated case, x and y are assumed 2387 * In the 90/270 rotated case, x and y are assumed
2331 * to be already rotated to match the rotated GTT view, and 2388 * to be already rotated to match the rotated GTT view, and
2332 * pitch is the tile_height aligned framebuffer height. 2389 * pitch is the tile_height aligned framebuffer height.
2390 *
2391 * This function is used when computing the derived information
2392 * under intel_framebuffer, so using any of that information
2393 * here is not allowed. Anything under drm_framebuffer can be
2394 * used. This is why the user has to pass in the pitch since it
2395 * is specified in the rotated orientation.
2333 */ 2396 */
2334u32 intel_compute_tile_offset(int *x, int *y, 2397static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv,
2335 const struct drm_framebuffer *fb, int plane, 2398 int *x, int *y,
2336 unsigned int pitch, 2399 const struct drm_framebuffer *fb, int plane,
2337 unsigned int rotation) 2400 unsigned int pitch,
2401 unsigned int rotation,
2402 u32 alignment)
2338{ 2403{
2339 const struct drm_i915_private *dev_priv = to_i915(fb->dev);
2340 uint64_t fb_modifier = fb->modifier[plane]; 2404 uint64_t fb_modifier = fb->modifier[plane];
2341 unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); 2405 unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
2342 u32 offset, offset_aligned, alignment; 2406 u32 offset, offset_aligned;
2343 2407
2344 alignment = intel_surf_alignment(dev_priv, fb_modifier);
2345 if (alignment) 2408 if (alignment)
2346 alignment--; 2409 alignment--;
2347 2410
@@ -2369,9 +2432,9 @@ u32 intel_compute_tile_offset(int *x, int *y,
2369 offset = (tile_rows * pitch_tiles + tiles) * tile_size; 2432 offset = (tile_rows * pitch_tiles + tiles) * tile_size;
2370 offset_aligned = offset & ~alignment; 2433 offset_aligned = offset & ~alignment;
2371 2434
2372 intel_adjust_tile_offset(x, y, tile_width, tile_height, 2435 _intel_adjust_tile_offset(x, y, tile_width, tile_height,
2373 tile_size, pitch_tiles, 2436 tile_size, pitch_tiles,
2374 offset, offset_aligned); 2437 offset, offset_aligned);
2375 } else { 2438 } else {
2376 offset = *y * pitch + *x * cpp; 2439 offset = *y * pitch + *x * cpp;
2377 offset_aligned = offset & ~alignment; 2440 offset_aligned = offset & ~alignment;
@@ -2383,6 +2446,177 @@ u32 intel_compute_tile_offset(int *x, int *y,
2383 return offset_aligned; 2446 return offset_aligned;
2384} 2447}
2385 2448
2449u32 intel_compute_tile_offset(int *x, int *y,
2450 const struct intel_plane_state *state,
2451 int plane)
2452{
2453 const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
2454 const struct drm_framebuffer *fb = state->base.fb;
2455 unsigned int rotation = state->base.rotation;
2456 int pitch = intel_fb_pitch(fb, plane, rotation);
2457 u32 alignment;
2458
2459 /* AUX_DIST needs only 4K alignment */
2460 if (fb->pixel_format == DRM_FORMAT_NV12 && plane == 1)
2461 alignment = 4096;
2462 else
2463 alignment = intel_surf_alignment(dev_priv, fb->modifier[plane]);
2464
2465 return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
2466 rotation, alignment);
2467}
2468
2469/* Convert the fb->offset[] linear offset into x/y offsets */
2470static void intel_fb_offset_to_xy(int *x, int *y,
2471 const struct drm_framebuffer *fb, int plane)
2472{
2473 unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
2474 unsigned int pitch = fb->pitches[plane];
2475 u32 linear_offset = fb->offsets[plane];
2476
2477 *y = linear_offset / pitch;
2478 *x = linear_offset % pitch / cpp;
2479}
2480
2481static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier)
2482{
2483 switch (fb_modifier) {
2484 case I915_FORMAT_MOD_X_TILED:
2485 return I915_TILING_X;
2486 case I915_FORMAT_MOD_Y_TILED:
2487 return I915_TILING_Y;
2488 default:
2489 return I915_TILING_NONE;
2490 }
2491}
2492
2493static int
2494intel_fill_fb_info(struct drm_i915_private *dev_priv,
2495 struct drm_framebuffer *fb)
2496{
2497 struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
2498 struct intel_rotation_info *rot_info = &intel_fb->rot_info;
2499 u32 gtt_offset_rotated = 0;
2500 unsigned int max_size = 0;
2501 uint32_t format = fb->pixel_format;
2502 int i, num_planes = drm_format_num_planes(format);
2503 unsigned int tile_size = intel_tile_size(dev_priv);
2504
2505 for (i = 0; i < num_planes; i++) {
2506 unsigned int width, height;
2507 unsigned int cpp, size;
2508 u32 offset;
2509 int x, y;
2510
2511 cpp = drm_format_plane_cpp(format, i);
2512 width = drm_format_plane_width(fb->width, format, i);
2513 height = drm_format_plane_height(fb->height, format, i);
2514
2515 intel_fb_offset_to_xy(&x, &y, fb, i);
2516
2517 /*
2518 * The fence (if used) is aligned to the start of the object
2519 * so having the framebuffer wrap around across the edge of the
2520 * fenced region doesn't really work. We have no API to configure
2521 * the fence start offset within the object (nor could we probably
2522 * on gen2/3). So it's just easier if we just require that the
2523 * fb layout agrees with the fence layout. We already check that the
2524 * fb stride matches the fence stride elsewhere.
2525 */
2526 if (i915_gem_object_is_tiled(intel_fb->obj) &&
2527 (x + width) * cpp > fb->pitches[i]) {
2528 DRM_DEBUG("bad fb plane %d offset: 0x%x\n",
2529 i, fb->offsets[i]);
2530 return -EINVAL;
2531 }
2532
2533 /*
2534 * First pixel of the framebuffer from
2535 * the start of the normal gtt mapping.
2536 */
2537 intel_fb->normal[i].x = x;
2538 intel_fb->normal[i].y = y;
2539
2540 offset = _intel_compute_tile_offset(dev_priv, &x, &y,
2541 fb, 0, fb->pitches[i],
2542 DRM_ROTATE_0, tile_size);
2543 offset /= tile_size;
2544
2545 if (fb->modifier[i] != DRM_FORMAT_MOD_NONE) {
2546 unsigned int tile_width, tile_height;
2547 unsigned int pitch_tiles;
2548 struct drm_rect r;
2549
2550 intel_tile_dims(dev_priv, &tile_width, &tile_height,
2551 fb->modifier[i], cpp);
2552
2553 rot_info->plane[i].offset = offset;
2554 rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp);
2555 rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width);
2556 rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height);
2557
2558 intel_fb->rotated[i].pitch =
2559 rot_info->plane[i].height * tile_height;
2560
2561 /* how many tiles does this plane need */
2562 size = rot_info->plane[i].stride * rot_info->plane[i].height;
2563 /*
2564 * If the plane isn't horizontally tile aligned,
2565 * we need one more tile.
2566 */
2567 if (x != 0)
2568 size++;
2569
2570 /* rotate the x/y offsets to match the GTT view */
2571 r.x1 = x;
2572 r.y1 = y;
2573 r.x2 = x + width;
2574 r.y2 = y + height;
2575 drm_rect_rotate(&r,
2576 rot_info->plane[i].width * tile_width,
2577 rot_info->plane[i].height * tile_height,
2578 DRM_ROTATE_270);
2579 x = r.x1;
2580 y = r.y1;
2581
2582 /* rotate the tile dimensions to match the GTT view */
2583 pitch_tiles = intel_fb->rotated[i].pitch / tile_height;
2584 swap(tile_width, tile_height);
2585
2586 /*
2587 * We only keep the x/y offsets, so push all of the
2588 * gtt offset into the x/y offsets.
2589 */
2590 _intel_adjust_tile_offset(&x, &y, tile_size,
2591 tile_width, tile_height, pitch_tiles,
2592 gtt_offset_rotated * tile_size, 0);
2593
2594 gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
2595
2596 /*
2597 * First pixel of the framebuffer from
2598 * the start of the rotated gtt mapping.
2599 */
2600 intel_fb->rotated[i].x = x;
2601 intel_fb->rotated[i].y = y;
2602 } else {
2603 size = DIV_ROUND_UP((y + height) * fb->pitches[i] +
2604 x * cpp, tile_size);
2605 }
2606
2607 /* how many tiles in total needed in the bo */
2608 max_size = max(max_size, offset + size);
2609 }
2610
2611 if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) {
2612 DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n",
2613 max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size);
2614 return -EINVAL;
2615 }
2616
2617 return 0;
2618}
2619
2386static int i9xx_format_to_fourcc(int format) 2620static int i9xx_format_to_fourcc(int format)
2387{ 2621{
2388 switch (format) { 2622 switch (format) {
@@ -2552,7 +2786,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
2552 continue; 2786 continue;
2553 2787
2554 obj = intel_fb_obj(fb); 2788 obj = intel_fb_obj(fb);
2555 if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) { 2789 if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
2556 drm_framebuffer_reference(fb); 2790 drm_framebuffer_reference(fb);
2557 goto valid_fb; 2791 goto valid_fb;
2558 } 2792 }
@@ -2604,6 +2838,169 @@ valid_fb:
2604 &obj->frontbuffer_bits); 2838 &obj->frontbuffer_bits);
2605} 2839}
2606 2840
2841static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane,
2842 unsigned int rotation)
2843{
2844 int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
2845
2846 switch (fb->modifier[plane]) {
2847 case DRM_FORMAT_MOD_NONE:
2848 case I915_FORMAT_MOD_X_TILED:
2849 switch (cpp) {
2850 case 8:
2851 return 4096;
2852 case 4:
2853 case 2:
2854 case 1:
2855 return 8192;
2856 default:
2857 MISSING_CASE(cpp);
2858 break;
2859 }
2860 break;
2861 case I915_FORMAT_MOD_Y_TILED:
2862 case I915_FORMAT_MOD_Yf_TILED:
2863 switch (cpp) {
2864 case 8:
2865 return 2048;
2866 case 4:
2867 return 4096;
2868 case 2:
2869 case 1:
2870 return 8192;
2871 default:
2872 MISSING_CASE(cpp);
2873 break;
2874 }
2875 break;
2876 default:
2877 MISSING_CASE(fb->modifier[plane]);
2878 }
2879
2880 return 2048;
2881}
2882
2883static int skl_check_main_surface(struct intel_plane_state *plane_state)
2884{
2885 const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev);
2886 const struct drm_framebuffer *fb = plane_state->base.fb;
2887 unsigned int rotation = plane_state->base.rotation;
2888 int x = plane_state->base.src.x1 >> 16;
2889 int y = plane_state->base.src.y1 >> 16;
2890 int w = drm_rect_width(&plane_state->base.src) >> 16;
2891 int h = drm_rect_height(&plane_state->base.src) >> 16;
2892 int max_width = skl_max_plane_width(fb, 0, rotation);
2893 int max_height = 4096;
2894 u32 alignment, offset, aux_offset = plane_state->aux.offset;
2895
2896 if (w > max_width || h > max_height) {
2897 DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n",
2898 w, h, max_width, max_height);
2899 return -EINVAL;
2900 }
2901
2902 intel_add_fb_offsets(&x, &y, plane_state, 0);
2903 offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
2904
2905 alignment = intel_surf_alignment(dev_priv, fb->modifier[0]);
2906
2907 /*
2908 * AUX surface offset is specified as the distance from the
2909 * main surface offset, and it must be non-negative. Make
2910 * sure that is what we will get.
2911 */
2912 if (offset > aux_offset)
2913 offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
2914 offset, aux_offset & ~(alignment - 1));
2915
2916 /*
2917 * When using an X-tiled surface, the plane blows up
2918 * if the x offset + width exceed the stride.
2919 *
2920 * TODO: linear and Y-tiled seem fine, Yf untested,
2921 */
2922 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) {
2923 int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
2924
2925 while ((x + w) * cpp > fb->pitches[0]) {
2926 if (offset == 0) {
2927 DRM_DEBUG_KMS("Unable to find suitable display surface offset\n");
2928 return -EINVAL;
2929 }
2930
2931 offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
2932 offset, offset - alignment);
2933 }
2934 }
2935
2936 plane_state->main.offset = offset;
2937 plane_state->main.x = x;
2938 plane_state->main.y = y;
2939
2940 return 0;
2941}
2942
2943static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
2944{
2945 const struct drm_framebuffer *fb = plane_state->base.fb;
2946 unsigned int rotation = plane_state->base.rotation;
2947 int max_width = skl_max_plane_width(fb, 1, rotation);
2948 int max_height = 4096;
2949 int x = plane_state->base.src.x1 >> 17;
2950 int y = plane_state->base.src.y1 >> 17;
2951 int w = drm_rect_width(&plane_state->base.src) >> 17;
2952 int h = drm_rect_height(&plane_state->base.src) >> 17;
2953 u32 offset;
2954
2955 intel_add_fb_offsets(&x, &y, plane_state, 1);
2956 offset = intel_compute_tile_offset(&x, &y, plane_state, 1);
2957
2958 /* FIXME not quite sure how/if these apply to the chroma plane */
2959 if (w > max_width || h > max_height) {
2960 DRM_DEBUG_KMS("CbCr source size %dx%d too big (limit %dx%d)\n",
2961 w, h, max_width, max_height);
2962 return -EINVAL;
2963 }
2964
2965 plane_state->aux.offset = offset;
2966 plane_state->aux.x = x;
2967 plane_state->aux.y = y;
2968
2969 return 0;
2970}
2971
2972int skl_check_plane_surface(struct intel_plane_state *plane_state)
2973{
2974 const struct drm_framebuffer *fb = plane_state->base.fb;
2975 unsigned int rotation = plane_state->base.rotation;
2976 int ret;
2977
2978 /* Rotate src coordinates to match rotated GTT view */
2979 if (intel_rotation_90_or_270(rotation))
2980 drm_rect_rotate(&plane_state->base.src,
2981 fb->width, fb->height, DRM_ROTATE_270);
2982
2983 /*
2984 * Handle the AUX surface first since
2985 * the main surface setup depends on it.
2986 */
2987 if (fb->pixel_format == DRM_FORMAT_NV12) {
2988 ret = skl_check_nv12_aux_surface(plane_state);
2989 if (ret)
2990 return ret;
2991 } else {
2992 plane_state->aux.offset = ~0xfff;
2993 plane_state->aux.x = 0;
2994 plane_state->aux.y = 0;
2995 }
2996
2997 ret = skl_check_main_surface(plane_state);
2998 if (ret)
2999 return ret;
3000
3001 return 0;
3002}
3003
2607static void i9xx_update_primary_plane(struct drm_plane *primary, 3004static void i9xx_update_primary_plane(struct drm_plane *primary,
2608 const struct intel_crtc_state *crtc_state, 3005 const struct intel_crtc_state *crtc_state,
2609 const struct intel_plane_state *plane_state) 3006 const struct intel_plane_state *plane_state)
@@ -2618,7 +3015,6 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
2618 u32 dspcntr; 3015 u32 dspcntr;
2619 i915_reg_t reg = DSPCNTR(plane); 3016 i915_reg_t reg = DSPCNTR(plane);
2620 unsigned int rotation = plane_state->base.rotation; 3017 unsigned int rotation = plane_state->base.rotation;
2621 int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
2622 int x = plane_state->base.src.x1 >> 16; 3018 int x = plane_state->base.src.x1 >> 16;
2623 int y = plane_state->base.src.y1 >> 16; 3019 int y = plane_state->base.src.y1 >> 16;
2624 3020
@@ -2671,36 +3067,31 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
2671 BUG(); 3067 BUG();
2672 } 3068 }
2673 3069
2674 if (INTEL_INFO(dev)->gen >= 4 && i915_gem_object_is_tiled(obj)) 3070 if (INTEL_GEN(dev_priv) >= 4 &&
3071 fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
2675 dspcntr |= DISPPLANE_TILED; 3072 dspcntr |= DISPPLANE_TILED;
2676 3073
2677 if (IS_G4X(dev)) 3074 if (IS_G4X(dev))
2678 dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; 3075 dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
2679 3076
2680 linear_offset = y * fb->pitches[0] + x * cpp; 3077 intel_add_fb_offsets(&x, &y, plane_state, 0);
2681 3078
2682 if (INTEL_INFO(dev)->gen >= 4) { 3079 if (INTEL_INFO(dev)->gen >= 4)
2683 intel_crtc->dspaddr_offset = 3080 intel_crtc->dspaddr_offset =
2684 intel_compute_tile_offset(&x, &y, fb, 0, 3081 intel_compute_tile_offset(&x, &y, plane_state, 0);
2685 fb->pitches[0], rotation);
2686 linear_offset -= intel_crtc->dspaddr_offset;
2687 } else {
2688 intel_crtc->dspaddr_offset = linear_offset;
2689 }
2690 3082
2691 if (rotation == DRM_ROTATE_180) { 3083 if (rotation == DRM_ROTATE_180) {
2692 dspcntr |= DISPPLANE_ROTATE_180; 3084 dspcntr |= DISPPLANE_ROTATE_180;
2693 3085
2694 x += (crtc_state->pipe_src_w - 1); 3086 x += (crtc_state->pipe_src_w - 1);
2695 y += (crtc_state->pipe_src_h - 1); 3087 y += (crtc_state->pipe_src_h - 1);
2696
2697 /* Finding the last pixel of the last line of the display
2698 data and adding to linear_offset*/
2699 linear_offset +=
2700 (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
2701 (crtc_state->pipe_src_w - 1) * cpp;
2702 } 3088 }
2703 3089
3090 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
3091
3092 if (INTEL_INFO(dev)->gen < 4)
3093 intel_crtc->dspaddr_offset = linear_offset;
3094
2704 intel_crtc->adjusted_x = x; 3095 intel_crtc->adjusted_x = x;
2705 intel_crtc->adjusted_y = y; 3096 intel_crtc->adjusted_y = y;
2706 3097
@@ -2709,11 +3100,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
2709 I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); 3100 I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
2710 if (INTEL_INFO(dev)->gen >= 4) { 3101 if (INTEL_INFO(dev)->gen >= 4) {
2711 I915_WRITE(DSPSURF(plane), 3102 I915_WRITE(DSPSURF(plane),
2712 i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset); 3103 intel_fb_gtt_offset(fb, rotation) +
3104 intel_crtc->dspaddr_offset);
2713 I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); 3105 I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
2714 I915_WRITE(DSPLINOFF(plane), linear_offset); 3106 I915_WRITE(DSPLINOFF(plane), linear_offset);
2715 } else 3107 } else
2716 I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset); 3108 I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset);
2717 POSTING_READ(reg); 3109 POSTING_READ(reg);
2718} 3110}
2719 3111
@@ -2741,13 +3133,11 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
2741 struct drm_i915_private *dev_priv = to_i915(dev); 3133 struct drm_i915_private *dev_priv = to_i915(dev);
2742 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3134 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2743 struct drm_framebuffer *fb = plane_state->base.fb; 3135 struct drm_framebuffer *fb = plane_state->base.fb;
2744 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
2745 int plane = intel_crtc->plane; 3136 int plane = intel_crtc->plane;
2746 u32 linear_offset; 3137 u32 linear_offset;
2747 u32 dspcntr; 3138 u32 dspcntr;
2748 i915_reg_t reg = DSPCNTR(plane); 3139 i915_reg_t reg = DSPCNTR(plane);
2749 unsigned int rotation = plane_state->base.rotation; 3140 unsigned int rotation = plane_state->base.rotation;
2750 int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
2751 int x = plane_state->base.src.x1 >> 16; 3141 int x = plane_state->base.src.x1 >> 16;
2752 int y = plane_state->base.src.y1 >> 16; 3142 int y = plane_state->base.src.y1 >> 16;
2753 3143
@@ -2780,32 +3170,28 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
2780 BUG(); 3170 BUG();
2781 } 3171 }
2782 3172
2783 if (i915_gem_object_is_tiled(obj)) 3173 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
2784 dspcntr |= DISPPLANE_TILED; 3174 dspcntr |= DISPPLANE_TILED;
2785 3175
2786 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) 3176 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
2787 dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; 3177 dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
2788 3178
2789 linear_offset = y * fb->pitches[0] + x * cpp; 3179 intel_add_fb_offsets(&x, &y, plane_state, 0);
3180
2790 intel_crtc->dspaddr_offset = 3181 intel_crtc->dspaddr_offset =
2791 intel_compute_tile_offset(&x, &y, fb, 0, 3182 intel_compute_tile_offset(&x, &y, plane_state, 0);
2792 fb->pitches[0], rotation); 3183
2793 linear_offset -= intel_crtc->dspaddr_offset;
2794 if (rotation == DRM_ROTATE_180) { 3184 if (rotation == DRM_ROTATE_180) {
2795 dspcntr |= DISPPLANE_ROTATE_180; 3185 dspcntr |= DISPPLANE_ROTATE_180;
2796 3186
2797 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { 3187 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
2798 x += (crtc_state->pipe_src_w - 1); 3188 x += (crtc_state->pipe_src_w - 1);
2799 y += (crtc_state->pipe_src_h - 1); 3189 y += (crtc_state->pipe_src_h - 1);
2800
2801 /* Finding the last pixel of the last line of the display
2802 data and adding to linear_offset*/
2803 linear_offset +=
2804 (crtc_state->pipe_src_h - 1) * fb->pitches[0] +
2805 (crtc_state->pipe_src_w - 1) * cpp;
2806 } 3190 }
2807 } 3191 }
2808 3192
3193 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
3194
2809 intel_crtc->adjusted_x = x; 3195 intel_crtc->adjusted_x = x;
2810 intel_crtc->adjusted_y = y; 3196 intel_crtc->adjusted_y = y;
2811 3197
@@ -2813,7 +3199,8 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
2813 3199
2814 I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); 3200 I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
2815 I915_WRITE(DSPSURF(plane), 3201 I915_WRITE(DSPSURF(plane),
2816 i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset); 3202 intel_fb_gtt_offset(fb, rotation) +
3203 intel_crtc->dspaddr_offset);
2817 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 3204 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2818 I915_WRITE(DSPOFFSET(plane), (y << 16) | x); 3205 I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
2819 } else { 3206 } else {
@@ -2835,32 +3222,21 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
2835 } 3222 }
2836} 3223}
2837 3224
2838u32 intel_plane_obj_offset(struct intel_plane *intel_plane, 3225u32 intel_fb_gtt_offset(struct drm_framebuffer *fb,
2839 struct drm_i915_gem_object *obj, 3226 unsigned int rotation)
2840 unsigned int plane)
2841{ 3227{
3228 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
2842 struct i915_ggtt_view view; 3229 struct i915_ggtt_view view;
2843 struct i915_vma *vma; 3230 struct i915_vma *vma;
2844 u64 offset;
2845 3231
2846 intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb, 3232 intel_fill_fb_ggtt_view(&view, fb, rotation);
2847 intel_plane->base.state->rotation);
2848 3233
2849 vma = i915_gem_obj_to_ggtt_view(obj, &view); 3234 vma = i915_gem_object_to_ggtt(obj, &view);
2850 if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n", 3235 if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
2851 view.type)) 3236 view.type))
2852 return -1; 3237 return -1;
2853 3238
2854 offset = vma->node.start; 3239 return i915_ggtt_offset(vma);
2855
2856 if (plane == 1) {
2857 offset += vma->ggtt_view.params.rotated.uv_start_page *
2858 PAGE_SIZE;
2859 }
2860
2861 WARN_ON(upper_32_bits(offset));
2862
2863 return lower_32_bits(offset);
2864} 3240}
2865 3241
2866static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) 3242static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
@@ -2890,6 +3266,28 @@ static void skl_detach_scalers(struct intel_crtc *intel_crtc)
2890 } 3266 }
2891} 3267}
2892 3268
3269u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
3270 unsigned int rotation)
3271{
3272 const struct drm_i915_private *dev_priv = to_i915(fb->dev);
3273 u32 stride = intel_fb_pitch(fb, plane, rotation);
3274
3275 /*
3276 * The stride is either expressed as a multiple of 64 bytes chunks for
3277 * linear buffers or in number of tiles for tiled buffers.
3278 */
3279 if (intel_rotation_90_or_270(rotation)) {
3280 int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
3281
3282 stride /= intel_tile_height(dev_priv, fb->modifier[0], cpp);
3283 } else {
3284 stride /= intel_fb_stride_alignment(dev_priv, fb->modifier[0],
3285 fb->pixel_format);
3286 }
3287
3288 return stride;
3289}
3290
2893u32 skl_plane_ctl_format(uint32_t pixel_format) 3291u32 skl_plane_ctl_format(uint32_t pixel_format)
2894{ 3292{
2895 switch (pixel_format) { 3293 switch (pixel_format) {
@@ -2979,16 +3377,14 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
2979 struct drm_i915_private *dev_priv = to_i915(dev); 3377 struct drm_i915_private *dev_priv = to_i915(dev);
2980 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3378 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2981 struct drm_framebuffer *fb = plane_state->base.fb; 3379 struct drm_framebuffer *fb = plane_state->base.fb;
2982 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
2983 int pipe = intel_crtc->pipe; 3380 int pipe = intel_crtc->pipe;
2984 u32 plane_ctl, stride_div, stride; 3381 u32 plane_ctl;
2985 u32 tile_height, plane_offset, plane_size;
2986 unsigned int rotation = plane_state->base.rotation; 3382 unsigned int rotation = plane_state->base.rotation;
2987 int x_offset, y_offset; 3383 u32 stride = skl_plane_stride(fb, 0, rotation);
2988 u32 surf_addr; 3384 u32 surf_addr = plane_state->main.offset;
2989 int scaler_id = plane_state->scaler_id; 3385 int scaler_id = plane_state->scaler_id;
2990 int src_x = plane_state->base.src.x1 >> 16; 3386 int src_x = plane_state->main.x;
2991 int src_y = plane_state->base.src.y1 >> 16; 3387 int src_y = plane_state->main.y;
2992 int src_w = drm_rect_width(&plane_state->base.src) >> 16; 3388 int src_w = drm_rect_width(&plane_state->base.src) >> 16;
2993 int src_h = drm_rect_height(&plane_state->base.src) >> 16; 3389 int src_h = drm_rect_height(&plane_state->base.src) >> 16;
2994 int dst_x = plane_state->base.dst.x1; 3390 int dst_x = plane_state->base.dst.x1;
@@ -3005,36 +3401,19 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
3005 plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; 3401 plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
3006 plane_ctl |= skl_plane_ctl_rotation(rotation); 3402 plane_ctl |= skl_plane_ctl_rotation(rotation);
3007 3403
3008 stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0], 3404 /* Sizes are 0 based */
3009 fb->pixel_format); 3405 src_w--;
3010 surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0); 3406 src_h--;
3011 3407 dst_w--;
3012 WARN_ON(drm_rect_width(&plane_state->base.src) == 0); 3408 dst_h--;
3013
3014 if (intel_rotation_90_or_270(rotation)) {
3015 int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
3016
3017 /* stride = Surface height in tiles */
3018 tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
3019 stride = DIV_ROUND_UP(fb->height, tile_height);
3020 x_offset = stride * tile_height - src_y - src_h;
3021 y_offset = src_x;
3022 plane_size = (src_w - 1) << 16 | (src_h - 1);
3023 } else {
3024 stride = fb->pitches[0] / stride_div;
3025 x_offset = src_x;
3026 y_offset = src_y;
3027 plane_size = (src_h - 1) << 16 | (src_w - 1);
3028 }
3029 plane_offset = y_offset << 16 | x_offset;
3030 3409
3031 intel_crtc->adjusted_x = x_offset; 3410 intel_crtc->adjusted_x = src_x;
3032 intel_crtc->adjusted_y = y_offset; 3411 intel_crtc->adjusted_y = src_y;
3033 3412
3034 I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl); 3413 I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
3035 I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset); 3414 I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x);
3036 I915_WRITE(PLANE_SIZE(pipe, 0), plane_size);
3037 I915_WRITE(PLANE_STRIDE(pipe, 0), stride); 3415 I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
3416 I915_WRITE(PLANE_SIZE(pipe, 0), (src_h << 16) | src_w);
3038 3417
3039 if (scaler_id >= 0) { 3418 if (scaler_id >= 0) {
3040 uint32_t ps_ctrl = 0; 3419 uint32_t ps_ctrl = 0;
@@ -3051,7 +3430,8 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
3051 I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x); 3430 I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x);
3052 } 3431 }
3053 3432
3054 I915_WRITE(PLANE_SURF(pipe, 0), surf_addr); 3433 I915_WRITE(PLANE_SURF(pipe, 0),
3434 intel_fb_gtt_offset(fb, rotation) + surf_addr);
3055 3435
3056 POSTING_READ(PLANE_SURF(pipe, 0)); 3436 POSTING_READ(PLANE_SURF(pipe, 0));
3057} 3437}
@@ -3093,40 +3473,113 @@ static void intel_update_primary_planes(struct drm_device *dev)
3093 3473
3094 for_each_crtc(dev, crtc) { 3474 for_each_crtc(dev, crtc) {
3095 struct intel_plane *plane = to_intel_plane(crtc->primary); 3475 struct intel_plane *plane = to_intel_plane(crtc->primary);
3096 struct intel_plane_state *plane_state; 3476 struct intel_plane_state *plane_state =
3097 3477 to_intel_plane_state(plane->base.state);
3098 drm_modeset_lock_crtc(crtc, &plane->base);
3099 plane_state = to_intel_plane_state(plane->base.state);
3100 3478
3101 if (plane_state->base.visible) 3479 if (plane_state->base.visible)
3102 plane->update_plane(&plane->base, 3480 plane->update_plane(&plane->base,
3103 to_intel_crtc_state(crtc->state), 3481 to_intel_crtc_state(crtc->state),
3104 plane_state); 3482 plane_state);
3483 }
3484}
3485
3486static int
3487__intel_display_resume(struct drm_device *dev,
3488 struct drm_atomic_state *state)
3489{
3490 struct drm_crtc_state *crtc_state;
3491 struct drm_crtc *crtc;
3492 int i, ret;
3493
3494 intel_modeset_setup_hw_state(dev);
3495 i915_redisable_vga(dev);
3496
3497 if (!state)
3498 return 0;
3105 3499
3106 drm_modeset_unlock_crtc(crtc); 3500 for_each_crtc_in_state(state, crtc, crtc_state, i) {
3501 /*
3502 * Force recalculation even if we restore
3503 * current state. With fast modeset this may not result
3504 * in a modeset when the state is compatible.
3505 */
3506 crtc_state->mode_changed = true;
3107 } 3507 }
3508
3509 /* ignore any reset values/BIOS leftovers in the WM registers */
3510 to_intel_atomic_state(state)->skip_intermediate_wm = true;
3511
3512 ret = drm_atomic_commit(state);
3513
3514 WARN_ON(ret == -EDEADLK);
3515 return ret;
3516}
3517
3518static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
3519{
3520 return intel_has_gpu_reset(dev_priv) &&
3521 INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
3108} 3522}
3109 3523
3110void intel_prepare_reset(struct drm_i915_private *dev_priv) 3524void intel_prepare_reset(struct drm_i915_private *dev_priv)
3111{ 3525{
3112 /* no reset support for gen2 */ 3526 struct drm_device *dev = &dev_priv->drm;
3113 if (IS_GEN2(dev_priv)) 3527 struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
3114 return; 3528 struct drm_atomic_state *state;
3529 int ret;
3115 3530
3116 /* reset doesn't touch the display */ 3531 /*
3117 if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) 3532 * Need mode_config.mutex so that we don't
3533 * trample ongoing ->detect() and whatnot.
3534 */
3535 mutex_lock(&dev->mode_config.mutex);
3536 drm_modeset_acquire_init(ctx, 0);
3537 while (1) {
3538 ret = drm_modeset_lock_all_ctx(dev, ctx);
3539 if (ret != -EDEADLK)
3540 break;
3541
3542 drm_modeset_backoff(ctx);
3543 }
3544
3545 /* reset doesn't touch the display, but flips might get nuked anyway, */
3546 if (!i915.force_reset_modeset_test &&
3547 !gpu_reset_clobbers_display(dev_priv))
3118 return; 3548 return;
3119 3549
3120 drm_modeset_lock_all(&dev_priv->drm);
3121 /* 3550 /*
3122 * Disabling the crtcs gracefully seems nicer. Also the 3551 * Disabling the crtcs gracefully seems nicer. Also the
3123 * g33 docs say we should at least disable all the planes. 3552 * g33 docs say we should at least disable all the planes.
3124 */ 3553 */
3125 intel_display_suspend(&dev_priv->drm); 3554 state = drm_atomic_helper_duplicate_state(dev, ctx);
3555 if (IS_ERR(state)) {
3556 ret = PTR_ERR(state);
3557 state = NULL;
3558 DRM_ERROR("Duplicating state failed with %i\n", ret);
3559 goto err;
3560 }
3561
3562 ret = drm_atomic_helper_disable_all(dev, ctx);
3563 if (ret) {
3564 DRM_ERROR("Suspending crtc's failed with %i\n", ret);
3565 goto err;
3566 }
3567
3568 dev_priv->modeset_restore_state = state;
3569 state->acquire_ctx = ctx;
3570 return;
3571
3572err:
3573 drm_atomic_state_free(state);
3126} 3574}
3127 3575
3128void intel_finish_reset(struct drm_i915_private *dev_priv) 3576void intel_finish_reset(struct drm_i915_private *dev_priv)
3129{ 3577{
3578 struct drm_device *dev = &dev_priv->drm;
3579 struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
3580 struct drm_atomic_state *state = dev_priv->modeset_restore_state;
3581 int ret;
3582
3130 /* 3583 /*
3131 * Flips in the rings will be nuked by the reset, 3584 * Flips in the rings will be nuked by the reset,
3132 * so complete all pending flips so that user space 3585 * so complete all pending flips so that user space
@@ -3134,44 +3587,51 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
3134 */ 3587 */
3135 intel_complete_page_flips(dev_priv); 3588 intel_complete_page_flips(dev_priv);
3136 3589
3137 /* no reset support for gen2 */ 3590 dev_priv->modeset_restore_state = NULL;
3138 if (IS_GEN2(dev_priv))
3139 return;
3140 3591
3141 /* reset doesn't touch the display */ 3592 /* reset doesn't touch the display */
3142 if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { 3593 if (!gpu_reset_clobbers_display(dev_priv)) {
3594 if (!state) {
3595 /*
3596 * Flips in the rings have been nuked by the reset,
3597 * so update the base address of all primary
3598 * planes to the the last fb to make sure we're
3599 * showing the correct fb after a reset.
3600 *
3601 * FIXME: Atomic will make this obsolete since we won't schedule
3602 * CS-based flips (which might get lost in gpu resets) any more.
3603 */
3604 intel_update_primary_planes(dev);
3605 } else {
3606 ret = __intel_display_resume(dev, state);
3607 if (ret)
3608 DRM_ERROR("Restoring old state failed with %i\n", ret);
3609 }
3610 } else {
3143 /* 3611 /*
3144 * Flips in the rings have been nuked by the reset, 3612 * The display has been reset as well,
3145 * so update the base address of all primary 3613 * so need a full re-initialization.
3146 * planes to the the last fb to make sure we're
3147 * showing the correct fb after a reset.
3148 *
3149 * FIXME: Atomic will make this obsolete since we won't schedule
3150 * CS-based flips (which might get lost in gpu resets) any more.
3151 */ 3614 */
3152 intel_update_primary_planes(&dev_priv->drm); 3615 intel_runtime_pm_disable_interrupts(dev_priv);
3153 return; 3616 intel_runtime_pm_enable_interrupts(dev_priv);
3154 }
3155 3617
3156 /* 3618 intel_modeset_init_hw(dev);
3157 * The display has been reset as well,
3158 * so need a full re-initialization.
3159 */
3160 intel_runtime_pm_disable_interrupts(dev_priv);
3161 intel_runtime_pm_enable_interrupts(dev_priv);
3162 3619
3163 intel_modeset_init_hw(&dev_priv->drm); 3620 spin_lock_irq(&dev_priv->irq_lock);
3621 if (dev_priv->display.hpd_irq_setup)
3622 dev_priv->display.hpd_irq_setup(dev_priv);
3623 spin_unlock_irq(&dev_priv->irq_lock);
3164 3624
3165 spin_lock_irq(&dev_priv->irq_lock); 3625 ret = __intel_display_resume(dev, state);
3166 if (dev_priv->display.hpd_irq_setup) 3626 if (ret)
3167 dev_priv->display.hpd_irq_setup(dev_priv); 3627 DRM_ERROR("Restoring old state failed with %i\n", ret);
3168 spin_unlock_irq(&dev_priv->irq_lock);
3169
3170 intel_display_resume(&dev_priv->drm);
3171 3628
3172 intel_hpd_init(dev_priv); 3629 intel_hpd_init(dev_priv);
3630 }
3173 3631
3174 drm_modeset_unlock_all(&dev_priv->drm); 3632 drm_modeset_drop_locks(ctx);
3633 drm_modeset_acquire_fini(ctx);
3634 mutex_unlock(&dev->mode_config.mutex);
3175} 3635}
3176 3636
3177static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) 3637static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -9411,7 +9871,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
9411 I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n"); 9871 I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
9412 I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n"); 9872 I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
9413 I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n"); 9873 I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
9414 I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n"); 9874 I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n");
9415 I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE, 9875 I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
9416 "CPU PWM1 enabled\n"); 9876 "CPU PWM1 enabled\n");
9417 if (IS_HASWELL(dev)) 9877 if (IS_HASWELL(dev))
@@ -11198,7 +11658,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
11198 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); 11658 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
11199 intel_ring_emit(ring, fb->pitches[0]); 11659 intel_ring_emit(ring, fb->pitches[0]);
11200 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | 11660 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset |
11201 i915_gem_object_get_tiling(obj)); 11661 intel_fb_modifier_to_tiling(fb->modifier[0]));
11202 11662
11203 /* XXX Enabling the panel-fitter across page-flip is so far 11663 /* XXX Enabling the panel-fitter across page-flip is so far
11204 * untested on non-native modes, so ignore it for now. 11664 * untested on non-native modes, so ignore it for now.
@@ -11230,7 +11690,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
11230 11690
11231 intel_ring_emit(ring, MI_DISPLAY_FLIP | 11691 intel_ring_emit(ring, MI_DISPLAY_FLIP |
11232 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); 11692 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
11233 intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); 11693 intel_ring_emit(ring, fb->pitches[0] |
11694 intel_fb_modifier_to_tiling(fb->modifier[0]));
11234 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); 11695 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
11235 11696
11236 /* Contrary to the suggestions in the documentation, 11697 /* Contrary to the suggestions in the documentation,
@@ -11325,7 +11786,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
11325 intel_ring_emit(ring, MI_STORE_REGISTER_MEM | 11786 intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
11326 MI_SRM_LRM_GLOBAL_GTT); 11787 MI_SRM_LRM_GLOBAL_GTT);
11327 intel_ring_emit_reg(ring, DERRMR); 11788 intel_ring_emit_reg(ring, DERRMR);
11328 intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256); 11789 intel_ring_emit(ring,
11790 i915_ggtt_offset(req->engine->scratch) + 256);
11329 if (IS_GEN8(dev)) { 11791 if (IS_GEN8(dev)) {
11330 intel_ring_emit(ring, 0); 11792 intel_ring_emit(ring, 0);
11331 intel_ring_emit(ring, MI_NOOP); 11793 intel_ring_emit(ring, MI_NOOP);
@@ -11333,7 +11795,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
11333 } 11795 }
11334 11796
11335 intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); 11797 intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
11336 intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); 11798 intel_ring_emit(ring, fb->pitches[0] |
11799 intel_fb_modifier_to_tiling(fb->modifier[0]));
11337 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); 11800 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
11338 intel_ring_emit(ring, (MI_NOOP)); 11801 intel_ring_emit(ring, (MI_NOOP));
11339 11802
@@ -11382,7 +11845,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
11382 struct drm_i915_private *dev_priv = to_i915(dev); 11845 struct drm_i915_private *dev_priv = to_i915(dev);
11383 struct drm_framebuffer *fb = intel_crtc->base.primary->fb; 11846 struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
11384 const enum pipe pipe = intel_crtc->pipe; 11847 const enum pipe pipe = intel_crtc->pipe;
11385 u32 ctl, stride, tile_height; 11848 u32 ctl, stride = skl_plane_stride(fb, 0, rotation);
11386 11849
11387 ctl = I915_READ(PLANE_CTL(pipe, 0)); 11850 ctl = I915_READ(PLANE_CTL(pipe, 0));
11388 ctl &= ~PLANE_CTL_TILED_MASK; 11851 ctl &= ~PLANE_CTL_TILED_MASK;
@@ -11403,20 +11866,6 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
11403 } 11866 }
11404 11867
11405 /* 11868 /*
11406 * The stride is either expressed as a multiple of 64 bytes chunks for
11407 * linear buffers or in number of tiles for tiled buffers.
11408 */
11409 if (intel_rotation_90_or_270(rotation)) {
11410 /* stride = Surface height in tiles */
11411 tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
11412 stride = DIV_ROUND_UP(fb->height, tile_height);
11413 } else {
11414 stride = fb->pitches[0] /
11415 intel_fb_stride_alignment(dev_priv, fb->modifier[0],
11416 fb->pixel_format);
11417 }
11418
11419 /*
11420 * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on 11869 * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
11421 * PLANE_SURF updates, the update is then guaranteed to be atomic. 11870 * PLANE_SURF updates, the update is then guaranteed to be atomic.
11422 */ 11871 */
@@ -11432,15 +11881,13 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc,
11432{ 11881{
11433 struct drm_device *dev = intel_crtc->base.dev; 11882 struct drm_device *dev = intel_crtc->base.dev;
11434 struct drm_i915_private *dev_priv = to_i915(dev); 11883 struct drm_i915_private *dev_priv = to_i915(dev);
11435 struct intel_framebuffer *intel_fb = 11884 struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
11436 to_intel_framebuffer(intel_crtc->base.primary->fb);
11437 struct drm_i915_gem_object *obj = intel_fb->obj;
11438 i915_reg_t reg = DSPCNTR(intel_crtc->plane); 11885 i915_reg_t reg = DSPCNTR(intel_crtc->plane);
11439 u32 dspcntr; 11886 u32 dspcntr;
11440 11887
11441 dspcntr = I915_READ(reg); 11888 dspcntr = I915_READ(reg);
11442 11889
11443 if (i915_gem_object_is_tiled(obj)) 11890 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
11444 dspcntr |= DISPPLANE_TILED; 11891 dspcntr |= DISPPLANE_TILED;
11445 else 11892 else
11446 dspcntr &= ~DISPPLANE_TILED; 11893 dspcntr &= ~DISPPLANE_TILED;
@@ -11577,6 +12024,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11577 struct intel_engine_cs *engine; 12024 struct intel_engine_cs *engine;
11578 bool mmio_flip; 12025 bool mmio_flip;
11579 struct drm_i915_gem_request *request; 12026 struct drm_i915_gem_request *request;
12027 struct i915_vma *vma;
11580 int ret; 12028 int ret;
11581 12029
11582 /* 12030 /*
@@ -11668,8 +12116,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11668 12116
11669 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { 12117 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
11670 engine = &dev_priv->engine[BCS]; 12118 engine = &dev_priv->engine[BCS];
11671 if (i915_gem_object_get_tiling(obj) != 12119 if (fb->modifier[0] != old_fb->modifier[0])
11672 i915_gem_object_get_tiling(intel_fb_obj(work->old_fb)))
11673 /* vlv: DISPLAY_FLIP fails to change tiling */ 12120 /* vlv: DISPLAY_FLIP fails to change tiling */
11674 engine = NULL; 12121 engine = NULL;
11675 } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { 12122 } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
@@ -11685,12 +12132,13 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11685 12132
11686 mmio_flip = use_mmio_flip(engine, obj); 12133 mmio_flip = use_mmio_flip(engine, obj);
11687 12134
11688 ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); 12135 vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
11689 if (ret) 12136 if (IS_ERR(vma)) {
12137 ret = PTR_ERR(vma);
11690 goto cleanup_pending; 12138 goto cleanup_pending;
12139 }
11691 12140
11692 work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary), 12141 work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation);
11693 obj, 0);
11694 work->gtt_offset += intel_crtc->dspaddr_offset; 12142 work->gtt_offset += intel_crtc->dspaddr_offset;
11695 work->rotation = crtc->primary->state->rotation; 12143 work->rotation = crtc->primary->state->rotation;
11696 12144
@@ -14035,7 +14483,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
14035 if (ret) 14483 if (ret)
14036 DRM_DEBUG_KMS("failed to attach phys object\n"); 14484 DRM_DEBUG_KMS("failed to attach phys object\n");
14037 } else { 14485 } else {
14038 ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation); 14486 struct i915_vma *vma;
14487
14488 vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
14489 if (IS_ERR(vma))
14490 ret = PTR_ERR(vma);
14039 } 14491 }
14040 14492
14041 if (ret == 0) { 14493 if (ret == 0) {
@@ -14110,12 +14562,14 @@ intel_check_primary_plane(struct drm_plane *plane,
14110 struct intel_crtc_state *crtc_state, 14562 struct intel_crtc_state *crtc_state,
14111 struct intel_plane_state *state) 14563 struct intel_plane_state *state)
14112{ 14564{
14565 struct drm_i915_private *dev_priv = to_i915(plane->dev);
14113 struct drm_crtc *crtc = state->base.crtc; 14566 struct drm_crtc *crtc = state->base.crtc;
14114 int min_scale = DRM_PLANE_HELPER_NO_SCALING; 14567 int min_scale = DRM_PLANE_HELPER_NO_SCALING;
14115 int max_scale = DRM_PLANE_HELPER_NO_SCALING; 14568 int max_scale = DRM_PLANE_HELPER_NO_SCALING;
14116 bool can_position = false; 14569 bool can_position = false;
14570 int ret;
14117 14571
14118 if (INTEL_INFO(plane->dev)->gen >= 9) { 14572 if (INTEL_GEN(dev_priv) >= 9) {
14119 /* use scaler when colorkey is not required */ 14573 /* use scaler when colorkey is not required */
14120 if (state->ckey.flags == I915_SET_COLORKEY_NONE) { 14574 if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
14121 min_scale = 1; 14575 min_scale = 1;
@@ -14124,10 +14578,23 @@ intel_check_primary_plane(struct drm_plane *plane,
14124 can_position = true; 14578 can_position = true;
14125 } 14579 }
14126 14580
14127 return drm_plane_helper_check_state(&state->base, 14581 ret = drm_plane_helper_check_state(&state->base,
14128 &state->clip, 14582 &state->clip,
14129 min_scale, max_scale, 14583 min_scale, max_scale,
14130 can_position, true); 14584 can_position, true);
14585 if (ret)
14586 return ret;
14587
14588 if (!state->base.fb)
14589 return 0;
14590
14591 if (INTEL_GEN(dev_priv) >= 9) {
14592 ret = skl_check_plane_surface(state);
14593 if (ret)
14594 return ret;
14595 }
14596
14597 return 0;
14131} 14598}
14132 14599
14133static void intel_begin_crtc_commit(struct drm_crtc *crtc, 14600static void intel_begin_crtc_commit(struct drm_crtc *crtc,
@@ -14386,7 +14853,7 @@ intel_update_cursor_plane(struct drm_plane *plane,
14386 if (!obj) 14853 if (!obj)
14387 addr = 0; 14854 addr = 0;
14388 else if (!INTEL_INFO(dev)->cursor_needs_physical) 14855 else if (!INTEL_INFO(dev)->cursor_needs_physical)
14389 addr = i915_gem_obj_ggtt_offset(obj); 14856 addr = i915_gem_object_ggtt_offset(obj, NULL);
14390 else 14857 else
14391 addr = obj->phys_handle->busaddr; 14858 addr = obj->phys_handle->busaddr;
14392 14859
@@ -14639,12 +15106,50 @@ static bool intel_crt_present(struct drm_device *dev)
14639 return true; 15106 return true;
14640} 15107}
14641 15108
15109void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv)
15110{
15111 int pps_num;
15112 int pps_idx;
15113
15114 if (HAS_DDI(dev_priv))
15115 return;
15116 /*
15117 * This w/a is needed at least on CPT/PPT, but to be sure apply it
15118 * everywhere where registers can be write protected.
15119 */
15120 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
15121 pps_num = 2;
15122 else
15123 pps_num = 1;
15124
15125 for (pps_idx = 0; pps_idx < pps_num; pps_idx++) {
15126 u32 val = I915_READ(PP_CONTROL(pps_idx));
15127
15128 val = (val & ~PANEL_UNLOCK_MASK) | PANEL_UNLOCK_REGS;
15129 I915_WRITE(PP_CONTROL(pps_idx), val);
15130 }
15131}
15132
15133static void intel_pps_init(struct drm_i915_private *dev_priv)
15134{
15135 if (HAS_PCH_SPLIT(dev_priv) || IS_BROXTON(dev_priv))
15136 dev_priv->pps_mmio_base = PCH_PPS_BASE;
15137 else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
15138 dev_priv->pps_mmio_base = VLV_PPS_BASE;
15139 else
15140 dev_priv->pps_mmio_base = PPS_BASE;
15141
15142 intel_pps_unlock_regs_wa(dev_priv);
15143}
15144
14642static void intel_setup_outputs(struct drm_device *dev) 15145static void intel_setup_outputs(struct drm_device *dev)
14643{ 15146{
14644 struct drm_i915_private *dev_priv = to_i915(dev); 15147 struct drm_i915_private *dev_priv = to_i915(dev);
14645 struct intel_encoder *encoder; 15148 struct intel_encoder *encoder;
14646 bool dpd_is_edp = false; 15149 bool dpd_is_edp = false;
14647 15150
15151 intel_pps_init(dev_priv);
15152
14648 /* 15153 /*
14649 * intel_edp_init_connector() depends on this completing first, to 15154 * intel_edp_init_connector() depends on this completing first, to
14650 * prevent the registeration of both eDP and LVDS and the incorrect 15155 * prevent the registeration of both eDP and LVDS and the incorrect
@@ -14912,7 +15417,7 @@ static int intel_framebuffer_init(struct drm_device *dev,
14912 struct drm_i915_gem_object *obj) 15417 struct drm_i915_gem_object *obj)
14913{ 15418{
14914 struct drm_i915_private *dev_priv = to_i915(dev); 15419 struct drm_i915_private *dev_priv = to_i915(dev);
14915 unsigned int aligned_height; 15420 unsigned int tiling = i915_gem_object_get_tiling(obj);
14916 int ret; 15421 int ret;
14917 u32 pitch_limit, stride_alignment; 15422 u32 pitch_limit, stride_alignment;
14918 char *format_name; 15423 char *format_name;
@@ -14920,17 +15425,19 @@ static int intel_framebuffer_init(struct drm_device *dev,
14920 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 15425 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
14921 15426
14922 if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { 15427 if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
14923 /* Enforce that fb modifier and tiling mode match, but only for 15428 /*
14924 * X-tiled. This is needed for FBC. */ 15429 * If there's a fence, enforce that
14925 if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) != 15430 * the fb modifier and tiling mode match.
14926 !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) { 15431 */
15432 if (tiling != I915_TILING_NONE &&
15433 tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
14927 DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); 15434 DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
14928 return -EINVAL; 15435 return -EINVAL;
14929 } 15436 }
14930 } else { 15437 } else {
14931 if (i915_gem_object_get_tiling(obj) == I915_TILING_X) 15438 if (tiling == I915_TILING_X) {
14932 mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; 15439 mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
14933 else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) { 15440 } else if (tiling == I915_TILING_Y) {
14934 DRM_DEBUG("No Y tiling for legacy addfb\n"); 15441 DRM_DEBUG("No Y tiling for legacy addfb\n");
14935 return -EINVAL; 15442 return -EINVAL;
14936 } 15443 }
@@ -14954,6 +15461,16 @@ static int intel_framebuffer_init(struct drm_device *dev,
14954 return -EINVAL; 15461 return -EINVAL;
14955 } 15462 }
14956 15463
15464 /*
15465 * gen2/3 display engine uses the fence if present,
15466 * so the tiling mode must match the fb modifier exactly.
15467 */
15468 if (INTEL_INFO(dev_priv)->gen < 4 &&
15469 tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
15470 DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n");
15471 return -EINVAL;
15472 }
15473
14957 stride_alignment = intel_fb_stride_alignment(dev_priv, 15474 stride_alignment = intel_fb_stride_alignment(dev_priv,
14958 mode_cmd->modifier[0], 15475 mode_cmd->modifier[0],
14959 mode_cmd->pixel_format); 15476 mode_cmd->pixel_format);
@@ -14973,7 +15490,11 @@ static int intel_framebuffer_init(struct drm_device *dev,
14973 return -EINVAL; 15490 return -EINVAL;
14974 } 15491 }
14975 15492
14976 if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED && 15493 /*
15494 * If there's a fence, enforce that
15495 * the fb pitch and fence stride match.
15496 */
15497 if (tiling != I915_TILING_NONE &&
14977 mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { 15498 mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) {
14978 DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", 15499 DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
14979 mode_cmd->pitches[0], 15500 mode_cmd->pitches[0],
@@ -15045,17 +15566,12 @@ static int intel_framebuffer_init(struct drm_device *dev,
15045 if (mode_cmd->offsets[0] != 0) 15566 if (mode_cmd->offsets[0] != 0)
15046 return -EINVAL; 15567 return -EINVAL;
15047 15568
15048 aligned_height = intel_fb_align_height(dev, mode_cmd->height,
15049 mode_cmd->pixel_format,
15050 mode_cmd->modifier[0]);
15051 /* FIXME drm helper for size checks (especially planar formats)? */
15052 if (obj->base.size < aligned_height * mode_cmd->pitches[0])
15053 return -EINVAL;
15054
15055 drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd); 15569 drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
15056 intel_fb->obj = obj; 15570 intel_fb->obj = obj;
15057 15571
15058 intel_fill_fb_info(dev_priv, &intel_fb->base); 15572 ret = intel_fill_fb_info(dev_priv, &intel_fb->base);
15573 if (ret)
15574 return ret;
15059 15575
15060 ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); 15576 ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
15061 if (ret) { 15577 if (ret) {
@@ -15768,6 +16284,13 @@ static bool intel_encoder_has_connectors(struct intel_encoder *encoder)
15768 return false; 16284 return false;
15769} 16285}
15770 16286
16287static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
16288 enum transcoder pch_transcoder)
16289{
16290 return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
16291 (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
16292}
16293
15771static void intel_sanitize_crtc(struct intel_crtc *crtc) 16294static void intel_sanitize_crtc(struct intel_crtc *crtc)
15772{ 16295{
15773 struct drm_device *dev = crtc->base.dev; 16296 struct drm_device *dev = crtc->base.dev;
@@ -15846,7 +16369,17 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
15846 * worst a fifo underrun happens which also sets this to false. 16369 * worst a fifo underrun happens which also sets this to false.
15847 */ 16370 */
15848 crtc->cpu_fifo_underrun_disabled = true; 16371 crtc->cpu_fifo_underrun_disabled = true;
15849 crtc->pch_fifo_underrun_disabled = true; 16372 /*
16373 * We track the PCH trancoder underrun reporting state
16374 * within the crtc. With crtc for pipe A housing the underrun
16375 * reporting state for PCH transcoder A, crtc for pipe B housing
16376 * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A,
16377 * and marking underrun reporting as disabled for the non-existing
16378 * PCH transcoders B and C would prevent enabling the south
16379 * error interrupt (see cpt_can_enable_serr_int()).
16380 */
16381 if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe))
16382 crtc->pch_fifo_underrun_disabled = true;
15850 } 16383 }
15851} 16384}
15852 16385
@@ -16160,9 +16693,10 @@ void intel_display_resume(struct drm_device *dev)
16160 struct drm_atomic_state *state = dev_priv->modeset_restore_state; 16693 struct drm_atomic_state *state = dev_priv->modeset_restore_state;
16161 struct drm_modeset_acquire_ctx ctx; 16694 struct drm_modeset_acquire_ctx ctx;
16162 int ret; 16695 int ret;
16163 bool setup = false;
16164 16696
16165 dev_priv->modeset_restore_state = NULL; 16697 dev_priv->modeset_restore_state = NULL;
16698 if (state)
16699 state->acquire_ctx = &ctx;
16166 16700
16167 /* 16701 /*
16168 * This is a cludge because with real atomic modeset mode_config.mutex 16702 * This is a cludge because with real atomic modeset mode_config.mutex
@@ -16173,43 +16707,17 @@ void intel_display_resume(struct drm_device *dev)
16173 mutex_lock(&dev->mode_config.mutex); 16707 mutex_lock(&dev->mode_config.mutex);
16174 drm_modeset_acquire_init(&ctx, 0); 16708 drm_modeset_acquire_init(&ctx, 0);
16175 16709
16176retry: 16710 while (1) {
16177 ret = drm_modeset_lock_all_ctx(dev, &ctx); 16711 ret = drm_modeset_lock_all_ctx(dev, &ctx);
16178 16712 if (ret != -EDEADLK)
16179 if (ret == 0 && !setup) { 16713 break;
16180 setup = true;
16181
16182 intel_modeset_setup_hw_state(dev);
16183 i915_redisable_vga(dev);
16184 }
16185
16186 if (ret == 0 && state) {
16187 struct drm_crtc_state *crtc_state;
16188 struct drm_crtc *crtc;
16189 int i;
16190
16191 state->acquire_ctx = &ctx;
16192
16193 /* ignore any reset values/BIOS leftovers in the WM registers */
16194 to_intel_atomic_state(state)->skip_intermediate_wm = true;
16195
16196 for_each_crtc_in_state(state, crtc, crtc_state, i) {
16197 /*
16198 * Force recalculation even if we restore
16199 * current state. With fast modeset this may not result
16200 * in a modeset when the state is compatible.
16201 */
16202 crtc_state->mode_changed = true;
16203 }
16204
16205 ret = drm_atomic_commit(state);
16206 }
16207 16714
16208 if (ret == -EDEADLK) {
16209 drm_modeset_backoff(&ctx); 16715 drm_modeset_backoff(&ctx);
16210 goto retry;
16211 } 16716 }
16212 16717
16718 if (!ret)
16719 ret = __intel_display_resume(dev, state);
16720
16213 drm_modeset_drop_locks(&ctx); 16721 drm_modeset_drop_locks(&ctx);
16214 drm_modeset_acquire_fini(&ctx); 16722 drm_modeset_acquire_fini(&ctx);
16215 mutex_unlock(&dev->mode_config.mutex); 16723 mutex_unlock(&dev->mode_config.mutex);
@@ -16225,7 +16733,6 @@ void intel_modeset_gem_init(struct drm_device *dev)
16225 struct drm_i915_private *dev_priv = to_i915(dev); 16733 struct drm_i915_private *dev_priv = to_i915(dev);
16226 struct drm_crtc *c; 16734 struct drm_crtc *c;
16227 struct drm_i915_gem_object *obj; 16735 struct drm_i915_gem_object *obj;
16228 int ret;
16229 16736
16230 intel_init_gt_powersave(dev_priv); 16737 intel_init_gt_powersave(dev_priv);
16231 16738
@@ -16239,15 +16746,17 @@ void intel_modeset_gem_init(struct drm_device *dev)
16239 * for this. 16746 * for this.
16240 */ 16747 */
16241 for_each_crtc(dev, c) { 16748 for_each_crtc(dev, c) {
16749 struct i915_vma *vma;
16750
16242 obj = intel_fb_obj(c->primary->fb); 16751 obj = intel_fb_obj(c->primary->fb);
16243 if (obj == NULL) 16752 if (obj == NULL)
16244 continue; 16753 continue;
16245 16754
16246 mutex_lock(&dev->struct_mutex); 16755 mutex_lock(&dev->struct_mutex);
16247 ret = intel_pin_and_fence_fb_obj(c->primary->fb, 16756 vma = intel_pin_and_fence_fb_obj(c->primary->fb,
16248 c->primary->state->rotation); 16757 c->primary->state->rotation);
16249 mutex_unlock(&dev->struct_mutex); 16758 mutex_unlock(&dev->struct_mutex);
16250 if (ret) { 16759 if (IS_ERR(vma)) {
16251 DRM_ERROR("failed to pin boot fb on pipe %d\n", 16760 DRM_ERROR("failed to pin boot fb on pipe %d\n",
16252 to_intel_crtc(c)->pipe); 16761 to_intel_crtc(c)->pipe);
16253 drm_framebuffer_unreference(c->primary->fb); 16762 drm_framebuffer_unreference(c->primary->fb);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 8fe2afa5439e..364db908c191 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -256,6 +256,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev,
256static void 256static void
257intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, 257intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev,
258 struct intel_dp *intel_dp); 258 struct intel_dp *intel_dp);
259static void
260intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp);
259 261
260static void pps_lock(struct intel_dp *intel_dp) 262static void pps_lock(struct intel_dp *intel_dp)
261{ 263{
@@ -463,13 +465,13 @@ typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv,
463static bool vlv_pipe_has_pp_on(struct drm_i915_private *dev_priv, 465static bool vlv_pipe_has_pp_on(struct drm_i915_private *dev_priv,
464 enum pipe pipe) 466 enum pipe pipe)
465{ 467{
466 return I915_READ(VLV_PIPE_PP_STATUS(pipe)) & PP_ON; 468 return I915_READ(PP_STATUS(pipe)) & PP_ON;
467} 469}
468 470
469static bool vlv_pipe_has_vdd_on(struct drm_i915_private *dev_priv, 471static bool vlv_pipe_has_vdd_on(struct drm_i915_private *dev_priv,
470 enum pipe pipe) 472 enum pipe pipe)
471{ 473{
472 return I915_READ(VLV_PIPE_PP_CONTROL(pipe)) & EDP_FORCE_VDD; 474 return I915_READ(PP_CONTROL(pipe)) & EDP_FORCE_VDD;
473} 475}
474 476
475static bool vlv_pipe_any(struct drm_i915_private *dev_priv, 477static bool vlv_pipe_any(struct drm_i915_private *dev_priv,
@@ -486,7 +488,7 @@ vlv_initial_pps_pipe(struct drm_i915_private *dev_priv,
486 enum pipe pipe; 488 enum pipe pipe;
487 489
488 for (pipe = PIPE_A; pipe <= PIPE_B; pipe++) { 490 for (pipe = PIPE_A; pipe <= PIPE_B; pipe++) {
489 u32 port_sel = I915_READ(VLV_PIPE_PP_ON_DELAYS(pipe)) & 491 u32 port_sel = I915_READ(PP_ON_DELAYS(pipe)) &
490 PANEL_PORT_SELECT_MASK; 492 PANEL_PORT_SELECT_MASK;
491 493
492 if (port_sel != PANEL_PORT_SELECT_VLV(port)) 494 if (port_sel != PANEL_PORT_SELECT_VLV(port))
@@ -583,30 +585,21 @@ static void intel_pps_get_registers(struct drm_i915_private *dev_priv,
583 struct intel_dp *intel_dp, 585 struct intel_dp *intel_dp,
584 struct pps_registers *regs) 586 struct pps_registers *regs)
585{ 587{
588 int pps_idx = 0;
589
586 memset(regs, 0, sizeof(*regs)); 590 memset(regs, 0, sizeof(*regs));
587 591
588 if (IS_BROXTON(dev_priv)) { 592 if (IS_BROXTON(dev_priv))
589 int idx = bxt_power_sequencer_idx(intel_dp); 593 pps_idx = bxt_power_sequencer_idx(intel_dp);
590 594 else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
591 regs->pp_ctrl = BXT_PP_CONTROL(idx); 595 pps_idx = vlv_power_sequencer_pipe(intel_dp);
592 regs->pp_stat = BXT_PP_STATUS(idx);
593 regs->pp_on = BXT_PP_ON_DELAYS(idx);
594 regs->pp_off = BXT_PP_OFF_DELAYS(idx);
595 } else if (HAS_PCH_SPLIT(dev_priv)) {
596 regs->pp_ctrl = PCH_PP_CONTROL;
597 regs->pp_stat = PCH_PP_STATUS;
598 regs->pp_on = PCH_PP_ON_DELAYS;
599 regs->pp_off = PCH_PP_OFF_DELAYS;
600 regs->pp_div = PCH_PP_DIVISOR;
601 } else {
602 enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
603 596
604 regs->pp_ctrl = VLV_PIPE_PP_CONTROL(pipe); 597 regs->pp_ctrl = PP_CONTROL(pps_idx);
605 regs->pp_stat = VLV_PIPE_PP_STATUS(pipe); 598 regs->pp_stat = PP_STATUS(pps_idx);
606 regs->pp_on = VLV_PIPE_PP_ON_DELAYS(pipe); 599 regs->pp_on = PP_ON_DELAYS(pps_idx);
607 regs->pp_off = VLV_PIPE_PP_OFF_DELAYS(pipe); 600 regs->pp_off = PP_OFF_DELAYS(pps_idx);
608 regs->pp_div = VLV_PIPE_PP_DIVISOR(pipe); 601 if (!IS_BROXTON(dev_priv))
609 } 602 regs->pp_div = PP_DIVISOR(pps_idx);
610} 603}
611 604
612static i915_reg_t 605static i915_reg_t
@@ -651,8 +644,8 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code,
651 i915_reg_t pp_ctrl_reg, pp_div_reg; 644 i915_reg_t pp_ctrl_reg, pp_div_reg;
652 u32 pp_div; 645 u32 pp_div;
653 646
654 pp_ctrl_reg = VLV_PIPE_PP_CONTROL(pipe); 647 pp_ctrl_reg = PP_CONTROL(pipe);
655 pp_div_reg = VLV_PIPE_PP_DIVISOR(pipe); 648 pp_div_reg = PP_DIVISOR(pipe);
656 pp_div = I915_READ(pp_div_reg); 649 pp_div = I915_READ(pp_div_reg);
657 pp_div &= PP_REFERENCE_DIVIDER_MASK; 650 pp_div &= PP_REFERENCE_DIVIDER_MASK;
658 651
@@ -1836,7 +1829,8 @@ static u32 ironlake_get_pp_control(struct intel_dp *intel_dp)
1836 lockdep_assert_held(&dev_priv->pps_mutex); 1829 lockdep_assert_held(&dev_priv->pps_mutex);
1837 1830
1838 control = I915_READ(_pp_ctrl_reg(intel_dp)); 1831 control = I915_READ(_pp_ctrl_reg(intel_dp));
1839 if (!IS_BROXTON(dev)) { 1832 if (WARN_ON(!HAS_DDI(dev_priv) &&
1833 (control & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS)) {
1840 control &= ~PANEL_UNLOCK_MASK; 1834 control &= ~PANEL_UNLOCK_MASK;
1841 control |= PANEL_UNLOCK_REGS; 1835 control |= PANEL_UNLOCK_REGS;
1842 } 1836 }
@@ -1957,7 +1951,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
1957 DRM_DEBUG_KMS("PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n", 1951 DRM_DEBUG_KMS("PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n",
1958 I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg)); 1952 I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg));
1959 1953
1960 if ((pp & POWER_TARGET_ON) == 0) 1954 if ((pp & PANEL_POWER_ON) == 0)
1961 intel_dp->panel_power_off_time = ktime_get_boottime(); 1955 intel_dp->panel_power_off_time = ktime_get_boottime();
1962 1956
1963 power_domain = intel_display_port_aux_power_domain(intel_encoder); 1957 power_domain = intel_display_port_aux_power_domain(intel_encoder);
@@ -2044,7 +2038,7 @@ static void edp_panel_on(struct intel_dp *intel_dp)
2044 POSTING_READ(pp_ctrl_reg); 2038 POSTING_READ(pp_ctrl_reg);
2045 } 2039 }
2046 2040
2047 pp |= POWER_TARGET_ON; 2041 pp |= PANEL_POWER_ON;
2048 if (!IS_GEN5(dev)) 2042 if (!IS_GEN5(dev))
2049 pp |= PANEL_POWER_RESET; 2043 pp |= PANEL_POWER_RESET;
2050 2044
@@ -2096,7 +2090,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
2096 pp = ironlake_get_pp_control(intel_dp); 2090 pp = ironlake_get_pp_control(intel_dp);
2097 /* We need to switch off panel power _and_ force vdd, for otherwise some 2091 /* We need to switch off panel power _and_ force vdd, for otherwise some
2098 * panels get very unhappy and cease to work. */ 2092 * panels get very unhappy and cease to work. */
2099 pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | 2093 pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD |
2100 EDP_BLC_ENABLE); 2094 EDP_BLC_ENABLE);
2101 2095
2102 pp_ctrl_reg = _pp_ctrl_reg(intel_dp); 2096 pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
@@ -2729,7 +2723,7 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
2729 struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); 2723 struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
2730 struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); 2724 struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
2731 enum pipe pipe = intel_dp->pps_pipe; 2725 enum pipe pipe = intel_dp->pps_pipe;
2732 i915_reg_t pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe); 2726 i915_reg_t pp_on_reg = PP_ON_DELAYS(pipe);
2733 2727
2734 edp_panel_vdd_off_sync(intel_dp); 2728 edp_panel_vdd_off_sync(intel_dp);
2735 2729
@@ -4666,13 +4660,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
4666 4660
4667 pps_lock(intel_dp); 4661 pps_lock(intel_dp);
4668 4662
4669 /* 4663 /* Reinit the power sequencer, in case BIOS did something with it. */
4670 * Read out the current power sequencer assignment, 4664 intel_dp_pps_init(encoder->dev, intel_dp);
4671 * in case the BIOS did something with it.
4672 */
4673 if (IS_VALLEYVIEW(encoder->dev) || IS_CHERRYVIEW(encoder->dev))
4674 vlv_initial_power_sequencer_setup(intel_dp);
4675
4676 intel_edp_panel_vdd_sanitize(intel_dp); 4665 intel_edp_panel_vdd_sanitize(intel_dp);
4677 4666
4678 pps_unlock(intel_dp); 4667 pps_unlock(intel_dp);
@@ -5020,6 +5009,17 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev,
5020 I915_READ(regs.pp_div)); 5009 I915_READ(regs.pp_div));
5021} 5010}
5022 5011
5012static void intel_dp_pps_init(struct drm_device *dev,
5013 struct intel_dp *intel_dp)
5014{
5015 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
5016 vlv_initial_power_sequencer_setup(intel_dp);
5017 } else {
5018 intel_dp_init_panel_power_sequencer(dev, intel_dp);
5019 intel_dp_init_panel_power_sequencer_registers(dev, intel_dp);
5020 }
5021}
5022
5023/** 5023/**
5024 * intel_dp_set_drrs_state - program registers for RR switch to take effect 5024 * intel_dp_set_drrs_state - program registers for RR switch to take effect
5025 * @dev: DRM device 5025 * @dev: DRM device
@@ -5434,14 +5434,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
5434 pps_lock(intel_dp); 5434 pps_lock(intel_dp);
5435 5435
5436 intel_dp_init_panel_power_timestamps(intel_dp); 5436 intel_dp_init_panel_power_timestamps(intel_dp);
5437 5437 intel_dp_pps_init(dev, intel_dp);
5438 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
5439 vlv_initial_power_sequencer_setup(intel_dp);
5440 } else {
5441 intel_dp_init_panel_power_sequencer(dev, intel_dp);
5442 intel_dp_init_panel_power_sequencer_registers(dev, intel_dp);
5443 }
5444
5445 intel_edp_panel_vdd_sanitize(intel_dp); 5438 intel_edp_panel_vdd_sanitize(intel_dp);
5446 5439
5447 pps_unlock(intel_dp); 5440 pps_unlock(intel_dp);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b37b8ef0c745..774aab342f40 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -178,11 +178,22 @@ struct intel_framebuffer {
178 struct drm_framebuffer base; 178 struct drm_framebuffer base;
179 struct drm_i915_gem_object *obj; 179 struct drm_i915_gem_object *obj;
180 struct intel_rotation_info rot_info; 180 struct intel_rotation_info rot_info;
181
182 /* for each plane in the normal GTT view */
183 struct {
184 unsigned int x, y;
185 } normal[2];
186 /* for each plane in the rotated GTT view */
187 struct {
188 unsigned int x, y;
189 unsigned int pitch; /* pixels */
190 } rotated[2];
181}; 191};
182 192
183struct intel_fbdev { 193struct intel_fbdev {
184 struct drm_fb_helper helper; 194 struct drm_fb_helper helper;
185 struct intel_framebuffer *fb; 195 struct intel_framebuffer *fb;
196 struct i915_vma *vma;
186 async_cookie_t cookie; 197 async_cookie_t cookie;
187 int preferred_bpp; 198 int preferred_bpp;
188}; 199};
@@ -340,6 +351,15 @@ struct intel_plane_state {
340 struct drm_plane_state base; 351 struct drm_plane_state base;
341 struct drm_rect clip; 352 struct drm_rect clip;
342 353
354 struct {
355 u32 offset;
356 int x, y;
357 } main;
358 struct {
359 u32 offset;
360 int x, y;
361 } aux;
362
343 /* 363 /*
344 * scaler_id 364 * scaler_id
345 * = -1 : not using a scaler 365 * = -1 : not using a scaler
@@ -1153,12 +1173,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
1153 const char *name, u32 reg, int ref_freq); 1173 const char *name, u32 reg, int ref_freq);
1154extern const struct drm_plane_funcs intel_plane_funcs; 1174extern const struct drm_plane_funcs intel_plane_funcs;
1155void intel_init_display_hooks(struct drm_i915_private *dev_priv); 1175void intel_init_display_hooks(struct drm_i915_private *dev_priv);
1176unsigned int intel_fb_xy_to_linear(int x, int y,
1177 const struct intel_plane_state *state,
1178 int plane);
1179void intel_add_fb_offsets(int *x, int *y,
1180 const struct intel_plane_state *state, int plane);
1156unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info); 1181unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info);
1157bool intel_has_pending_fb_unpin(struct drm_device *dev); 1182bool intel_has_pending_fb_unpin(struct drm_device *dev);
1158void intel_mark_busy(struct drm_i915_private *dev_priv); 1183void intel_mark_busy(struct drm_i915_private *dev_priv);
1159void intel_mark_idle(struct drm_i915_private *dev_priv); 1184void intel_mark_idle(struct drm_i915_private *dev_priv);
1160void intel_crtc_restore_mode(struct drm_crtc *crtc); 1185void intel_crtc_restore_mode(struct drm_crtc *crtc);
1161int intel_display_suspend(struct drm_device *dev); 1186int intel_display_suspend(struct drm_device *dev);
1187void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv);
1162void intel_encoder_destroy(struct drm_encoder *encoder); 1188void intel_encoder_destroy(struct drm_encoder *encoder);
1163int intel_connector_init(struct intel_connector *); 1189int intel_connector_init(struct intel_connector *);
1164struct intel_connector *intel_connector_alloc(void); 1190struct intel_connector *intel_connector_alloc(void);
@@ -1214,8 +1240,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
1214void intel_release_load_detect_pipe(struct drm_connector *connector, 1240void intel_release_load_detect_pipe(struct drm_connector *connector,
1215 struct intel_load_detect_pipe *old, 1241 struct intel_load_detect_pipe *old,
1216 struct drm_modeset_acquire_ctx *ctx); 1242 struct drm_modeset_acquire_ctx *ctx);
1217int intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, 1243struct i915_vma *
1218 unsigned int rotation); 1244intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
1219void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); 1245void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
1220struct drm_framebuffer * 1246struct drm_framebuffer *
1221__intel_framebuffer_create(struct drm_device *dev, 1247__intel_framebuffer_create(struct drm_device *dev,
@@ -1277,9 +1303,7 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state);
1277#define assert_pipe_enabled(d, p) assert_pipe(d, p, true) 1303#define assert_pipe_enabled(d, p) assert_pipe(d, p, true)
1278#define assert_pipe_disabled(d, p) assert_pipe(d, p, false) 1304#define assert_pipe_disabled(d, p) assert_pipe(d, p, false)
1279u32 intel_compute_tile_offset(int *x, int *y, 1305u32 intel_compute_tile_offset(int *x, int *y,
1280 const struct drm_framebuffer *fb, int plane, 1306 const struct intel_plane_state *state, int plane);
1281 unsigned int pitch,
1282 unsigned int rotation);
1283void intel_prepare_reset(struct drm_i915_private *dev_priv); 1307void intel_prepare_reset(struct drm_i915_private *dev_priv);
1284void intel_finish_reset(struct drm_i915_private *dev_priv); 1308void intel_finish_reset(struct drm_i915_private *dev_priv);
1285void hsw_enable_pc8(struct drm_i915_private *dev_priv); 1309void hsw_enable_pc8(struct drm_i915_private *dev_priv);
@@ -1322,13 +1346,14 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
1322int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); 1346int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
1323int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); 1347int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
1324 1348
1325u32 intel_plane_obj_offset(struct intel_plane *intel_plane, 1349u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation);
1326 struct drm_i915_gem_object *obj,
1327 unsigned int plane);
1328 1350
1329u32 skl_plane_ctl_format(uint32_t pixel_format); 1351u32 skl_plane_ctl_format(uint32_t pixel_format);
1330u32 skl_plane_ctl_tiling(uint64_t fb_modifier); 1352u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
1331u32 skl_plane_ctl_rotation(unsigned int rotation); 1353u32 skl_plane_ctl_rotation(unsigned int rotation);
1354u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
1355 unsigned int rotation);
1356int skl_check_plane_surface(struct intel_plane_state *plane_state);
1332 1357
1333/* intel_csr.c */ 1358/* intel_csr.c */
1334void intel_csr_ucode_init(struct drm_i915_private *); 1359void intel_csr_ucode_init(struct drm_i915_private *);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e9b301ae2d0c..2e96a86105c2 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -29,7 +29,7 @@
29static const struct engine_info { 29static const struct engine_info {
30 const char *name; 30 const char *name;
31 unsigned exec_id; 31 unsigned exec_id;
32 unsigned guc_id; 32 enum intel_engine_hw_id hw_id;
33 u32 mmio_base; 33 u32 mmio_base;
34 unsigned irq_shift; 34 unsigned irq_shift;
35 int (*init_legacy)(struct intel_engine_cs *engine); 35 int (*init_legacy)(struct intel_engine_cs *engine);
@@ -38,7 +38,7 @@ static const struct engine_info {
38 [RCS] = { 38 [RCS] = {
39 .name = "render ring", 39 .name = "render ring",
40 .exec_id = I915_EXEC_RENDER, 40 .exec_id = I915_EXEC_RENDER,
41 .guc_id = GUC_RENDER_ENGINE, 41 .hw_id = RCS_HW,
42 .mmio_base = RENDER_RING_BASE, 42 .mmio_base = RENDER_RING_BASE,
43 .irq_shift = GEN8_RCS_IRQ_SHIFT, 43 .irq_shift = GEN8_RCS_IRQ_SHIFT,
44 .init_execlists = logical_render_ring_init, 44 .init_execlists = logical_render_ring_init,
@@ -47,7 +47,7 @@ static const struct engine_info {
47 [BCS] = { 47 [BCS] = {
48 .name = "blitter ring", 48 .name = "blitter ring",
49 .exec_id = I915_EXEC_BLT, 49 .exec_id = I915_EXEC_BLT,
50 .guc_id = GUC_BLITTER_ENGINE, 50 .hw_id = BCS_HW,
51 .mmio_base = BLT_RING_BASE, 51 .mmio_base = BLT_RING_BASE,
52 .irq_shift = GEN8_BCS_IRQ_SHIFT, 52 .irq_shift = GEN8_BCS_IRQ_SHIFT,
53 .init_execlists = logical_xcs_ring_init, 53 .init_execlists = logical_xcs_ring_init,
@@ -56,7 +56,7 @@ static const struct engine_info {
56 [VCS] = { 56 [VCS] = {
57 .name = "bsd ring", 57 .name = "bsd ring",
58 .exec_id = I915_EXEC_BSD, 58 .exec_id = I915_EXEC_BSD,
59 .guc_id = GUC_VIDEO_ENGINE, 59 .hw_id = VCS_HW,
60 .mmio_base = GEN6_BSD_RING_BASE, 60 .mmio_base = GEN6_BSD_RING_BASE,
61 .irq_shift = GEN8_VCS1_IRQ_SHIFT, 61 .irq_shift = GEN8_VCS1_IRQ_SHIFT,
62 .init_execlists = logical_xcs_ring_init, 62 .init_execlists = logical_xcs_ring_init,
@@ -65,7 +65,7 @@ static const struct engine_info {
65 [VCS2] = { 65 [VCS2] = {
66 .name = "bsd2 ring", 66 .name = "bsd2 ring",
67 .exec_id = I915_EXEC_BSD, 67 .exec_id = I915_EXEC_BSD,
68 .guc_id = GUC_VIDEO_ENGINE2, 68 .hw_id = VCS2_HW,
69 .mmio_base = GEN8_BSD2_RING_BASE, 69 .mmio_base = GEN8_BSD2_RING_BASE,
70 .irq_shift = GEN8_VCS2_IRQ_SHIFT, 70 .irq_shift = GEN8_VCS2_IRQ_SHIFT,
71 .init_execlists = logical_xcs_ring_init, 71 .init_execlists = logical_xcs_ring_init,
@@ -74,7 +74,7 @@ static const struct engine_info {
74 [VECS] = { 74 [VECS] = {
75 .name = "video enhancement ring", 75 .name = "video enhancement ring",
76 .exec_id = I915_EXEC_VEBOX, 76 .exec_id = I915_EXEC_VEBOX,
77 .guc_id = GUC_VIDEOENHANCE_ENGINE, 77 .hw_id = VECS_HW,
78 .mmio_base = VEBOX_RING_BASE, 78 .mmio_base = VEBOX_RING_BASE,
79 .irq_shift = GEN8_VECS_IRQ_SHIFT, 79 .irq_shift = GEN8_VECS_IRQ_SHIFT,
80 .init_execlists = logical_xcs_ring_init, 80 .init_execlists = logical_xcs_ring_init,
@@ -93,7 +93,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
93 engine->i915 = dev_priv; 93 engine->i915 = dev_priv;
94 engine->name = info->name; 94 engine->name = info->name;
95 engine->exec_id = info->exec_id; 95 engine->exec_id = info->exec_id;
96 engine->hw_id = engine->guc_id = info->guc_id; 96 engine->hw_id = engine->guc_id = info->hw_id;
97 engine->mmio_base = info->mmio_base; 97 engine->mmio_base = info->mmio_base;
98 engine->irq_shift = info->irq_shift; 98 engine->irq_shift = info->irq_shift;
99 99
@@ -109,6 +109,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
109int intel_engines_init(struct drm_device *dev) 109int intel_engines_init(struct drm_device *dev)
110{ 110{
111 struct drm_i915_private *dev_priv = to_i915(dev); 111 struct drm_i915_private *dev_priv = to_i915(dev);
112 struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
112 unsigned int mask = 0; 113 unsigned int mask = 0;
113 int (*init)(struct intel_engine_cs *engine); 114 int (*init)(struct intel_engine_cs *engine);
114 unsigned int i; 115 unsigned int i;
@@ -142,11 +143,10 @@ int intel_engines_init(struct drm_device *dev)
142 * are added to the driver by a warning and disabling the forgotten 143 * are added to the driver by a warning and disabling the forgotten
143 * engines. 144 * engines.
144 */ 145 */
145 if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { 146 if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask))
146 struct intel_device_info *info = 147 device_info->ring_mask = mask;
147 (struct intel_device_info *)&dev_priv->info; 148
148 info->ring_mask = mask; 149 device_info->num_rings = hweight32(mask);
149 }
150 150
151 return 0; 151 return 0;
152 152
@@ -161,9 +161,56 @@ cleanup:
161 return ret; 161 return ret;
162} 162}
163 163
164void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
165{
166 struct drm_i915_private *dev_priv = engine->i915;
167
168 /* Our semaphore implementation is strictly monotonic (i.e. we proceed
169 * so long as the semaphore value in the register/page is greater
170 * than the sync value), so whenever we reset the seqno,
171 * so long as we reset the tracking semaphore value to 0, it will
172 * always be before the next request's seqno. If we don't reset
173 * the semaphore value, then when the seqno moves backwards all
174 * future waits will complete instantly (causing rendering corruption).
175 */
176 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
177 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
178 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
179 if (HAS_VEBOX(dev_priv))
180 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
181 }
182 if (dev_priv->semaphore) {
183 struct page *page = i915_vma_first_page(dev_priv->semaphore);
184 void *semaphores;
185
186 /* Semaphores are in noncoherent memory, flush to be safe */
187 semaphores = kmap(page);
188 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
189 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
190 drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
191 I915_NUM_ENGINES * gen8_semaphore_seqno_size);
192 kunmap(page);
193 }
194 memset(engine->semaphore.sync_seqno, 0,
195 sizeof(engine->semaphore.sync_seqno));
196
197 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
198 if (engine->irq_seqno_barrier)
199 engine->irq_seqno_barrier(engine);
200 engine->last_submitted_seqno = seqno;
201
202 engine->hangcheck.seqno = seqno;
203
204 /* After manually advancing the seqno, fake the interrupt in case
205 * there are any waiters for that seqno.
206 */
207 intel_engine_wakeup(engine);
208}
209
164void intel_engine_init_hangcheck(struct intel_engine_cs *engine) 210void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
165{ 211{
166 memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); 212 memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
213 clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
167} 214}
168 215
169static void intel_engine_init_requests(struct intel_engine_cs *engine) 216static void intel_engine_init_requests(struct intel_engine_cs *engine)
@@ -192,6 +239,49 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
192 intel_engine_init_requests(engine); 239 intel_engine_init_requests(engine);
193 intel_engine_init_hangcheck(engine); 240 intel_engine_init_hangcheck(engine);
194 i915_gem_batch_pool_init(engine, &engine->batch_pool); 241 i915_gem_batch_pool_init(engine, &engine->batch_pool);
242
243 intel_engine_init_cmd_parser(engine);
244}
245
246int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
247{
248 struct drm_i915_gem_object *obj;
249 struct i915_vma *vma;
250 int ret;
251
252 WARN_ON(engine->scratch);
253
254 obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
255 if (!obj)
256 obj = i915_gem_object_create(&engine->i915->drm, size);
257 if (IS_ERR(obj)) {
258 DRM_ERROR("Failed to allocate scratch page\n");
259 return PTR_ERR(obj);
260 }
261
262 vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
263 if (IS_ERR(vma)) {
264 ret = PTR_ERR(vma);
265 goto err_unref;
266 }
267
268 ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH);
269 if (ret)
270 goto err_unref;
271
272 engine->scratch = vma;
273 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
274 engine->name, i915_ggtt_offset(vma));
275 return 0;
276
277err_unref:
278 i915_gem_object_put(obj);
279 return ret;
280}
281
282static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
283{
284 i915_vma_unpin_and_release(&engine->scratch);
195} 285}
196 286
197/** 287/**
@@ -213,7 +303,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
213 if (ret) 303 if (ret)
214 return ret; 304 return ret;
215 305
216 return intel_engine_init_cmd_parser(engine); 306 return 0;
217} 307}
218 308
219/** 309/**
@@ -225,7 +315,9 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
225 */ 315 */
226void intel_engine_cleanup_common(struct intel_engine_cs *engine) 316void intel_engine_cleanup_common(struct intel_engine_cs *engine)
227{ 317{
228 intel_engine_cleanup_cmd_parser(engine); 318 intel_engine_cleanup_scratch(engine);
319
229 intel_engine_fini_breadcrumbs(engine); 320 intel_engine_fini_breadcrumbs(engine);
321 intel_engine_cleanup_cmd_parser(engine);
230 i915_gem_batch_pool_fini(&engine->batch_pool); 322 i915_gem_batch_pool_fini(&engine->batch_pool);
231} 323}
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index e67b09a3328c..bf8b22ad9aed 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -190,9 +190,13 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv)
190 dpfc_ctl |= DPFC_CTL_LIMIT_2X; 190 dpfc_ctl |= DPFC_CTL_LIMIT_2X;
191 else 191 else
192 dpfc_ctl |= DPFC_CTL_LIMIT_1X; 192 dpfc_ctl |= DPFC_CTL_LIMIT_1X;
193 dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg;
194 193
195 I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); 194 if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
195 dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg;
196 I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
197 } else {
198 I915_WRITE(DPFC_FENCE_YOFF, 0);
199 }
196 200
197 /* enable it... */ 201 /* enable it... */
198 I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); 202 I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
@@ -244,21 +248,29 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
244 dpfc_ctl |= DPFC_CTL_LIMIT_1X; 248 dpfc_ctl |= DPFC_CTL_LIMIT_1X;
245 break; 249 break;
246 } 250 }
247 dpfc_ctl |= DPFC_CTL_FENCE_EN; 251
248 if (IS_GEN5(dev_priv)) 252 if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
249 dpfc_ctl |= params->fb.fence_reg; 253 dpfc_ctl |= DPFC_CTL_FENCE_EN;
254 if (IS_GEN5(dev_priv))
255 dpfc_ctl |= params->fb.fence_reg;
256 if (IS_GEN6(dev_priv)) {
257 I915_WRITE(SNB_DPFC_CTL_SA,
258 SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
259 I915_WRITE(DPFC_CPU_FENCE_OFFSET,
260 params->crtc.fence_y_offset);
261 }
262 } else {
263 if (IS_GEN6(dev_priv)) {
264 I915_WRITE(SNB_DPFC_CTL_SA, 0);
265 I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0);
266 }
267 }
250 268
251 I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset); 269 I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
252 I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID); 270 I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID);
253 /* enable it... */ 271 /* enable it... */
254 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); 272 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
255 273
256 if (IS_GEN6(dev_priv)) {
257 I915_WRITE(SNB_DPFC_CTL_SA,
258 SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
259 I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
260 }
261
262 intel_fbc_recompress(dev_priv); 274 intel_fbc_recompress(dev_priv);
263} 275}
264 276
@@ -305,7 +317,15 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
305 break; 317 break;
306 } 318 }
307 319
308 dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; 320 if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
321 dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
322 I915_WRITE(SNB_DPFC_CTL_SA,
323 SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
324 I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
325 } else {
326 I915_WRITE(SNB_DPFC_CTL_SA,0);
327 I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0);
328 }
309 329
310 if (dev_priv->fbc.false_color) 330 if (dev_priv->fbc.false_color)
311 dpfc_ctl |= FBC_CTL_FALSE_COLOR; 331 dpfc_ctl |= FBC_CTL_FALSE_COLOR;
@@ -324,10 +344,6 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
324 344
325 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); 345 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
326 346
327 I915_WRITE(SNB_DPFC_CTL_SA,
328 SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
329 I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
330
331 intel_fbc_recompress(dev_priv); 347 intel_fbc_recompress(dev_priv);
332} 348}
333 349
@@ -709,6 +725,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
709 return effective_w <= max_w && effective_h <= max_h; 725 return effective_w <= max_w && effective_h <= max_h;
710} 726}
711 727
728/* XXX replace me when we have VMA tracking for intel_plane_state */
729static int get_fence_id(struct drm_framebuffer *fb)
730{
731 struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL);
732
733 return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE;
734}
735
712static void intel_fbc_update_state_cache(struct intel_crtc *crtc, 736static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
713 struct intel_crtc_state *crtc_state, 737 struct intel_crtc_state *crtc_state,
714 struct intel_plane_state *plane_state) 738 struct intel_plane_state *plane_state)
@@ -737,10 +761,10 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
737 /* FIXME: We lack the proper locking here, so only run this on the 761 /* FIXME: We lack the proper locking here, so only run this on the
738 * platforms that need. */ 762 * platforms that need. */
739 if (IS_GEN(dev_priv, 5, 6)) 763 if (IS_GEN(dev_priv, 5, 6))
740 cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj); 764 cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
741 cache->fb.pixel_format = fb->pixel_format; 765 cache->fb.pixel_format = fb->pixel_format;
742 cache->fb.stride = fb->pitches[0]; 766 cache->fb.stride = fb->pitches[0];
743 cache->fb.fence_reg = obj->fence_reg; 767 cache->fb.fence_reg = get_fence_id(fb);
744 cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); 768 cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
745} 769}
746 770
@@ -768,11 +792,17 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
768 792
769 /* The use of a CPU fence is mandatory in order to detect writes 793 /* The use of a CPU fence is mandatory in order to detect writes
770 * by the CPU to the scanout and trigger updates to the FBC. 794 * by the CPU to the scanout and trigger updates to the FBC.
795 *
796 * Note that is possible for a tiled surface to be unmappable (and
797 * so have no fence associated with it) due to aperture constaints
798 * at the time of pinning.
771 */ 799 */
772 if (cache->fb.tiling_mode != I915_TILING_X || 800 if (cache->fb.tiling_mode != I915_TILING_X ||
773 cache->fb.fence_reg == I915_FENCE_REG_NONE) { 801 cache->fb.fence_reg == I915_FENCE_REG_NONE) {
774 fbc->no_fbc_reason = "framebuffer not tiled or fenced"; 802 if (INTEL_GEN(dev_priv) < 5) {
775 return false; 803 fbc->no_fbc_reason = "framebuffer not tiled or fenced";
804 return false;
805 }
776 } 806 }
777 if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) && 807 if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) &&
778 cache->plane.rotation != DRM_ROTATE_0) { 808 cache->plane.rotation != DRM_ROTATE_0) {
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 2c14dfc5e4f0..4003e4908c09 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -187,7 +187,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
187 struct fb_info *info; 187 struct fb_info *info;
188 struct drm_framebuffer *fb; 188 struct drm_framebuffer *fb;
189 struct i915_vma *vma; 189 struct i915_vma *vma;
190 struct drm_i915_gem_object *obj;
191 bool prealloc = false; 190 bool prealloc = false;
192 void __iomem *vaddr; 191 void __iomem *vaddr;
193 int ret; 192 int ret;
@@ -215,17 +214,17 @@ static int intelfb_create(struct drm_fb_helper *helper,
215 sizes->fb_height = intel_fb->base.height; 214 sizes->fb_height = intel_fb->base.height;
216 } 215 }
217 216
218 obj = intel_fb->obj;
219
220 mutex_lock(&dev->struct_mutex); 217 mutex_lock(&dev->struct_mutex);
221 218
222 /* Pin the GGTT vma for our access via info->screen_base. 219 /* Pin the GGTT vma for our access via info->screen_base.
223 * This also validates that any existing fb inherited from the 220 * This also validates that any existing fb inherited from the
224 * BIOS is suitable for own access. 221 * BIOS is suitable for own access.
225 */ 222 */
226 ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); 223 vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0);
227 if (ret) 224 if (IS_ERR(vma)) {
225 ret = PTR_ERR(vma);
228 goto out_unlock; 226 goto out_unlock;
227 }
229 228
230 info = drm_fb_helper_alloc_fbi(helper); 229 info = drm_fb_helper_alloc_fbi(helper);
231 if (IS_ERR(info)) { 230 if (IS_ERR(info)) {
@@ -245,13 +244,11 @@ static int intelfb_create(struct drm_fb_helper *helper,
245 info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; 244 info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT;
246 info->fbops = &intelfb_ops; 245 info->fbops = &intelfb_ops;
247 246
248 vma = i915_gem_obj_to_ggtt(obj);
249
250 /* setup aperture base/size for vesafb takeover */ 247 /* setup aperture base/size for vesafb takeover */
251 info->apertures->ranges[0].base = dev->mode_config.fb_base; 248 info->apertures->ranges[0].base = dev->mode_config.fb_base;
252 info->apertures->ranges[0].size = ggtt->mappable_end; 249 info->apertures->ranges[0].size = ggtt->mappable_end;
253 250
254 info->fix.smem_start = dev->mode_config.fb_base + vma->node.start; 251 info->fix.smem_start = dev->mode_config.fb_base + i915_ggtt_offset(vma);
255 info->fix.smem_len = vma->node.size; 252 info->fix.smem_len = vma->node.size;
256 253
257 vaddr = i915_vma_pin_iomap(vma); 254 vaddr = i915_vma_pin_iomap(vma);
@@ -273,14 +270,14 @@ static int intelfb_create(struct drm_fb_helper *helper,
273 * If the object is stolen however, it will be full of whatever 270 * If the object is stolen however, it will be full of whatever
274 * garbage was left in there. 271 * garbage was left in there.
275 */ 272 */
276 if (ifbdev->fb->obj->stolen && !prealloc) 273 if (intel_fb->obj->stolen && !prealloc)
277 memset_io(info->screen_base, 0, info->screen_size); 274 memset_io(info->screen_base, 0, info->screen_size);
278 275
279 /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ 276 /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
280 277
281 DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n", 278 DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n",
282 fb->width, fb->height, 279 fb->width, fb->height, i915_ggtt_offset(vma));
283 i915_gem_obj_ggtt_offset(obj), obj); 280 ifbdev->vma = vma;
284 281
285 mutex_unlock(&dev->struct_mutex); 282 mutex_unlock(&dev->struct_mutex);
286 vga_switcheroo_client_fb_set(dev->pdev, info); 283 vga_switcheroo_client_fb_set(dev->pdev, info);
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 623cf26cd784..c97326269588 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -63,26 +63,25 @@ struct drm_i915_gem_request;
63 * retcode: errno from last guc_submit() 63 * retcode: errno from last guc_submit()
64 */ 64 */
65struct i915_guc_client { 65struct i915_guc_client {
66 struct drm_i915_gem_object *client_obj; 66 struct i915_vma *vma;
67 void *client_base; /* first page (only) of above */ 67 void *client_base; /* first page (only) of above */
68 struct i915_gem_context *owner; 68 struct i915_gem_context *owner;
69 struct intel_guc *guc; 69 struct intel_guc *guc;
70
71 uint32_t engines; /* bitmap of (host) engine ids */
70 uint32_t priority; 72 uint32_t priority;
71 uint32_t ctx_index; 73 uint32_t ctx_index;
72
73 uint32_t proc_desc_offset; 74 uint32_t proc_desc_offset;
75
74 uint32_t doorbell_offset; 76 uint32_t doorbell_offset;
75 uint32_t cookie; 77 uint32_t cookie;
76 uint16_t doorbell_id; 78 uint16_t doorbell_id;
77 uint16_t padding; /* Maintain alignment */ 79 uint16_t padding[3]; /* Maintain alignment */
78 80
79 uint32_t wq_offset; 81 uint32_t wq_offset;
80 uint32_t wq_size; 82 uint32_t wq_size;
81 uint32_t wq_tail; 83 uint32_t wq_tail;
82 uint32_t unused; /* Was 'wq_head' */
83
84 uint32_t no_wq_space; 84 uint32_t no_wq_space;
85 uint32_t q_fail; /* No longer used */
86 uint32_t b_fail; 85 uint32_t b_fail;
87 int retcode; 86 int retcode;
88 87
@@ -125,11 +124,10 @@ struct intel_guc_fw {
125struct intel_guc { 124struct intel_guc {
126 struct intel_guc_fw guc_fw; 125 struct intel_guc_fw guc_fw;
127 uint32_t log_flags; 126 uint32_t log_flags;
128 struct drm_i915_gem_object *log_obj; 127 struct i915_vma *log_vma;
129
130 struct drm_i915_gem_object *ads_obj;
131 128
132 struct drm_i915_gem_object *ctx_pool_obj; 129 struct i915_vma *ads_vma;
130 struct i915_vma *ctx_pool_vma;
133 struct ida ctx_ids; 131 struct ida ctx_ids;
134 132
135 struct i915_guc_client *execbuf_client; 133 struct i915_guc_client *execbuf_client;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 3763e30cc165..324812d69b70 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -59,13 +59,25 @@
59 * 59 *
60 */ 60 */
61 61
62#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin" 62#define SKL_FW_MAJOR 6
63#define SKL_FW_MINOR 1
64
65#define BXT_FW_MAJOR 8
66#define BXT_FW_MINOR 7
67
68#define KBL_FW_MAJOR 9
69#define KBL_FW_MINOR 14
70
71#define GUC_FW_PATH(platform, major, minor) \
72 "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
73
74#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR)
63MODULE_FIRMWARE(I915_SKL_GUC_UCODE); 75MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
64 76
65#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin" 77#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR)
66MODULE_FIRMWARE(I915_BXT_GUC_UCODE); 78MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
67 79
68#define I915_KBL_GUC_UCODE "i915/kbl_guc_ver9_14.bin" 80#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
69MODULE_FIRMWARE(I915_KBL_GUC_UCODE); 81MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
70 82
71/* User-friendly representation of an enum */ 83/* User-friendly representation of an enum */
@@ -181,16 +193,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
181 i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; 193 i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
182 } 194 }
183 195
184 if (guc->ads_obj) { 196 if (guc->ads_vma) {
185 u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj) 197 u32 ads = i915_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
186 >> PAGE_SHIFT;
187 params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; 198 params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
188 params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; 199 params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
189 } 200 }
190 201
191 /* If GuC submission is enabled, set up additional parameters here */ 202 /* If GuC submission is enabled, set up additional parameters here */
192 if (i915.enable_guc_submission) { 203 if (i915.enable_guc_submission) {
193 u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj); 204 u32 pgs = i915_ggtt_offset(dev_priv->guc.ctx_pool_vma);
194 u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; 205 u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
195 206
196 pgs >>= PAGE_SHIFT; 207 pgs >>= PAGE_SHIFT;
@@ -238,12 +249,12 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv,
238 * Note that GuC needs the CSS header plus uKernel code to be copied by the 249 * Note that GuC needs the CSS header plus uKernel code to be copied by the
239 * DMA engine in one operation, whereas the RSA signature is loaded via MMIO. 250 * DMA engine in one operation, whereas the RSA signature is loaded via MMIO.
240 */ 251 */
241static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv) 252static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
253 struct i915_vma *vma)
242{ 254{
243 struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; 255 struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
244 struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj;
245 unsigned long offset; 256 unsigned long offset;
246 struct sg_table *sg = fw_obj->pages; 257 struct sg_table *sg = vma->pages;
247 u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT]; 258 u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
248 int i, ret = 0; 259 int i, ret = 0;
249 260
@@ -260,7 +271,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
260 I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); 271 I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
261 272
262 /* Set the source address for the new blob */ 273 /* Set the source address for the new blob */
263 offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset; 274 offset = i915_ggtt_offset(vma) + guc_fw->header_offset;
264 I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); 275 I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
265 I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); 276 I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
266 277
@@ -315,6 +326,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
315{ 326{
316 struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; 327 struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
317 struct drm_device *dev = &dev_priv->drm; 328 struct drm_device *dev = &dev_priv->drm;
329 struct i915_vma *vma;
318 int ret; 330 int ret;
319 331
320 ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false); 332 ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false);
@@ -323,10 +335,10 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
323 return ret; 335 return ret;
324 } 336 }
325 337
326 ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0); 338 vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
327 if (ret) { 339 if (IS_ERR(vma)) {
328 DRM_DEBUG_DRIVER("pin failed %d\n", ret); 340 DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma));
329 return ret; 341 return PTR_ERR(vma);
330 } 342 }
331 343
332 /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ 344 /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
@@ -369,7 +381,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
369 381
370 set_guc_init_params(dev_priv); 382 set_guc_init_params(dev_priv);
371 383
372 ret = guc_ucode_xfer_dma(dev_priv); 384 ret = guc_ucode_xfer_dma(dev_priv, vma);
373 385
374 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 386 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
375 387
@@ -377,7 +389,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
377 * We keep the object pages for reuse during resume. But we can unpin it 389 * We keep the object pages for reuse during resume. But we can unpin it
378 * now that DMA has completed, so it doesn't continue to take up space. 390 * now that DMA has completed, so it doesn't continue to take up space.
379 */ 391 */
380 i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj); 392 i915_vma_unpin(vma);
381 393
382 return ret; 394 return ret;
383} 395}
@@ -697,16 +709,16 @@ void intel_guc_init(struct drm_device *dev)
697 fw_path = NULL; 709 fw_path = NULL;
698 } else if (IS_SKYLAKE(dev)) { 710 } else if (IS_SKYLAKE(dev)) {
699 fw_path = I915_SKL_GUC_UCODE; 711 fw_path = I915_SKL_GUC_UCODE;
700 guc_fw->guc_fw_major_wanted = 6; 712 guc_fw->guc_fw_major_wanted = SKL_FW_MAJOR;
701 guc_fw->guc_fw_minor_wanted = 1; 713 guc_fw->guc_fw_minor_wanted = SKL_FW_MINOR;
702 } else if (IS_BROXTON(dev)) { 714 } else if (IS_BROXTON(dev)) {
703 fw_path = I915_BXT_GUC_UCODE; 715 fw_path = I915_BXT_GUC_UCODE;
704 guc_fw->guc_fw_major_wanted = 8; 716 guc_fw->guc_fw_major_wanted = BXT_FW_MAJOR;
705 guc_fw->guc_fw_minor_wanted = 7; 717 guc_fw->guc_fw_minor_wanted = BXT_FW_MINOR;
706 } else if (IS_KABYLAKE(dev)) { 718 } else if (IS_KABYLAKE(dev)) {
707 fw_path = I915_KBL_GUC_UCODE; 719 fw_path = I915_KBL_GUC_UCODE;
708 guc_fw->guc_fw_major_wanted = 9; 720 guc_fw->guc_fw_major_wanted = KBL_FW_MAJOR;
709 guc_fw->guc_fw_minor_wanted = 14; 721 guc_fw->guc_fw_minor_wanted = KBL_FW_MINOR;
710 } else { 722 } else {
711 fw_path = ""; /* unknown device */ 723 fw_path = ""; /* unknown device */
712 } 724 }
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 5dc2c20f6ca1..334d47b5811a 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -477,7 +477,8 @@ void intel_hpd_init(struct drm_i915_private *dev_priv)
477 spin_unlock_irq(&dev_priv->irq_lock); 477 spin_unlock_irq(&dev_priv->irq_lock);
478} 478}
479 479
480void i915_hpd_poll_init_work(struct work_struct *work) { 480static void i915_hpd_poll_init_work(struct work_struct *work)
481{
481 struct drm_i915_private *dev_priv = 482 struct drm_i915_private *dev_priv =
482 container_of(work, struct drm_i915_private, 483 container_of(work, struct drm_i915_private,
483 hotplug.poll_init_work); 484 hotplug.poll_init_work);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 309c5d9b1c57..6b49df4316f4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -315,7 +315,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
315 315
316 desc = ctx->desc_template; /* bits 3-4 */ 316 desc = ctx->desc_template; /* bits 3-4 */
317 desc |= engine->ctx_desc_template; /* bits 0-11 */ 317 desc |= engine->ctx_desc_template; /* bits 0-11 */
318 desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; 318 desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE;
319 /* bits 12-31 */ 319 /* bits 12-31 */
320 desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ 320 desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
321 321
@@ -763,7 +763,6 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
763static int intel_lr_context_pin(struct i915_gem_context *ctx, 763static int intel_lr_context_pin(struct i915_gem_context *ctx,
764 struct intel_engine_cs *engine) 764 struct intel_engine_cs *engine)
765{ 765{
766 struct drm_i915_private *dev_priv = ctx->i915;
767 struct intel_context *ce = &ctx->engine[engine->id]; 766 struct intel_context *ce = &ctx->engine[engine->id];
768 void *vaddr; 767 void *vaddr;
769 u32 *lrc_reg_state; 768 u32 *lrc_reg_state;
@@ -774,16 +773,15 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
774 if (ce->pin_count++) 773 if (ce->pin_count++)
775 return 0; 774 return 0;
776 775
777 ret = i915_gem_object_ggtt_pin(ce->state, NULL, 776 ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
778 0, GEN8_LR_CONTEXT_ALIGN, 777 PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
779 PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
780 if (ret) 778 if (ret)
781 goto err; 779 goto err;
782 780
783 vaddr = i915_gem_object_pin_map(ce->state); 781 vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
784 if (IS_ERR(vaddr)) { 782 if (IS_ERR(vaddr)) {
785 ret = PTR_ERR(vaddr); 783 ret = PTR_ERR(vaddr);
786 goto unpin_ctx_obj; 784 goto unpin_vma;
787 } 785 }
788 786
789 lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; 787 lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
@@ -792,24 +790,26 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
792 if (ret) 790 if (ret)
793 goto unpin_map; 791 goto unpin_map;
794 792
795 ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state);
796 intel_lr_context_descriptor_update(ctx, engine); 793 intel_lr_context_descriptor_update(ctx, engine);
797 794
798 lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start; 795 lrc_reg_state[CTX_RING_BUFFER_START+1] =
796 i915_ggtt_offset(ce->ring->vma);
799 ce->lrc_reg_state = lrc_reg_state; 797 ce->lrc_reg_state = lrc_reg_state;
800 ce->state->dirty = true; 798 ce->state->obj->dirty = true;
801 799
802 /* Invalidate GuC TLB. */ 800 /* Invalidate GuC TLB. */
803 if (i915.enable_guc_submission) 801 if (i915.enable_guc_submission) {
802 struct drm_i915_private *dev_priv = ctx->i915;
804 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); 803 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
804 }
805 805
806 i915_gem_context_get(ctx); 806 i915_gem_context_get(ctx);
807 return 0; 807 return 0;
808 808
809unpin_map: 809unpin_map:
810 i915_gem_object_unpin_map(ce->state); 810 i915_gem_object_unpin_map(ce->state->obj);
811unpin_ctx_obj: 811unpin_vma:
812 i915_gem_object_ggtt_unpin(ce->state); 812 __i915_vma_unpin(ce->state);
813err: 813err:
814 ce->pin_count = 0; 814 ce->pin_count = 0;
815 return ret; 815 return ret;
@@ -828,12 +828,8 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
828 828
829 intel_ring_unpin(ce->ring); 829 intel_ring_unpin(ce->ring);
830 830
831 i915_gem_object_unpin_map(ce->state); 831 i915_gem_object_unpin_map(ce->state->obj);
832 i915_gem_object_ggtt_unpin(ce->state); 832 i915_vma_unpin(ce->state);
833
834 ce->lrc_vma = NULL;
835 ce->lrc_desc = 0;
836 ce->lrc_reg_state = NULL;
837 833
838 i915_gem_context_put(ctx); 834 i915_gem_context_put(ctx);
839} 835}
@@ -919,7 +915,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
919 wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | 915 wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
920 MI_SRM_LRM_GLOBAL_GTT)); 916 MI_SRM_LRM_GLOBAL_GTT));
921 wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); 917 wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
922 wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); 918 wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
923 wa_ctx_emit(batch, index, 0); 919 wa_ctx_emit(batch, index, 0);
924 920
925 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); 921 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
@@ -937,7 +933,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
937 wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | 933 wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
938 MI_SRM_LRM_GLOBAL_GTT)); 934 MI_SRM_LRM_GLOBAL_GTT));
939 wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); 935 wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
940 wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); 936 wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
941 wa_ctx_emit(batch, index, 0); 937 wa_ctx_emit(batch, index, 0);
942 938
943 return index; 939 return index;
@@ -998,7 +994,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
998 994
999 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ 995 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1000 /* Actual scratch location is at 128 bytes offset */ 996 /* Actual scratch location is at 128 bytes offset */
1001 scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; 997 scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
1002 998
1003 wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); 999 wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
1004 wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | 1000 wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
@@ -1077,8 +1073,8 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
1077 /* WaClearSlmSpaceAtContextSwitch:kbl */ 1073 /* WaClearSlmSpaceAtContextSwitch:kbl */
1078 /* Actual scratch location is at 128 bytes offset */ 1074 /* Actual scratch location is at 128 bytes offset */
1079 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { 1075 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
1080 uint32_t scratch_addr 1076 u32 scratch_addr =
1081 = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; 1077 i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
1082 1078
1083 wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); 1079 wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
1084 wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | 1080 wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
@@ -1170,45 +1166,44 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
1170 1166
1171static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) 1167static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
1172{ 1168{
1173 int ret; 1169 struct drm_i915_gem_object *obj;
1170 struct i915_vma *vma;
1171 int err;
1174 1172
1175 engine->wa_ctx.obj = i915_gem_object_create(&engine->i915->drm, 1173 obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size));
1176 PAGE_ALIGN(size)); 1174 if (IS_ERR(obj))
1177 if (IS_ERR(engine->wa_ctx.obj)) { 1175 return PTR_ERR(obj);
1178 DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
1179 ret = PTR_ERR(engine->wa_ctx.obj);
1180 engine->wa_ctx.obj = NULL;
1181 return ret;
1182 }
1183 1176
1184 ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL, 1177 vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
1185 0, PAGE_SIZE, 0); 1178 if (IS_ERR(vma)) {
1186 if (ret) { 1179 err = PTR_ERR(vma);
1187 DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", 1180 goto err;
1188 ret);
1189 i915_gem_object_put(engine->wa_ctx.obj);
1190 return ret;
1191 } 1181 }
1192 1182
1183 err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH);
1184 if (err)
1185 goto err;
1186
1187 engine->wa_ctx.vma = vma;
1193 return 0; 1188 return 0;
1189
1190err:
1191 i915_gem_object_put(obj);
1192 return err;
1194} 1193}
1195 1194
1196static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) 1195static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
1197{ 1196{
1198 if (engine->wa_ctx.obj) { 1197 i915_vma_unpin_and_release(&engine->wa_ctx.vma);
1199 i915_gem_object_ggtt_unpin(engine->wa_ctx.obj);
1200 i915_gem_object_put(engine->wa_ctx.obj);
1201 engine->wa_ctx.obj = NULL;
1202 }
1203} 1198}
1204 1199
1205static int intel_init_workaround_bb(struct intel_engine_cs *engine) 1200static int intel_init_workaround_bb(struct intel_engine_cs *engine)
1206{ 1201{
1207 int ret; 1202 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1208 uint32_t *batch; 1203 uint32_t *batch;
1209 uint32_t offset; 1204 uint32_t offset;
1210 struct page *page; 1205 struct page *page;
1211 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 1206 int ret;
1212 1207
1213 WARN_ON(engine->id != RCS); 1208 WARN_ON(engine->id != RCS);
1214 1209
@@ -1220,7 +1215,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
1220 } 1215 }
1221 1216
1222 /* some WA perform writes to scratch page, ensure it is valid */ 1217 /* some WA perform writes to scratch page, ensure it is valid */
1223 if (engine->scratch.obj == NULL) { 1218 if (!engine->scratch) {
1224 DRM_ERROR("scratch page not allocated for %s\n", engine->name); 1219 DRM_ERROR("scratch page not allocated for %s\n", engine->name);
1225 return -EINVAL; 1220 return -EINVAL;
1226 } 1221 }
@@ -1231,7 +1226,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
1231 return ret; 1226 return ret;
1232 } 1227 }
1233 1228
1234 page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0); 1229 page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
1235 batch = kmap_atomic(page); 1230 batch = kmap_atomic(page);
1236 offset = 0; 1231 offset = 0;
1237 1232
@@ -1278,7 +1273,7 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
1278 struct drm_i915_private *dev_priv = engine->i915; 1273 struct drm_i915_private *dev_priv = engine->i915;
1279 1274
1280 I915_WRITE(RING_HWS_PGA(engine->mmio_base), 1275 I915_WRITE(RING_HWS_PGA(engine->mmio_base),
1281 (u32)engine->status_page.gfx_addr); 1276 engine->status_page.ggtt_offset);
1282 POSTING_READ(RING_HWS_PGA(engine->mmio_base)); 1277 POSTING_READ(RING_HWS_PGA(engine->mmio_base));
1283} 1278}
1284 1279
@@ -1488,7 +1483,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1488{ 1483{
1489 struct intel_ring *ring = request->ring; 1484 struct intel_ring *ring = request->ring;
1490 struct intel_engine_cs *engine = request->engine; 1485 struct intel_engine_cs *engine = request->engine;
1491 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 1486 u32 scratch_addr =
1487 i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
1492 bool vf_flush_wa = false, dc_flush_wa = false; 1488 bool vf_flush_wa = false, dc_flush_wa = false;
1493 u32 flags = 0; 1489 u32 flags = 0;
1494 int ret; 1490 int ret;
@@ -1700,9 +1696,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
1700 1696
1701 intel_engine_cleanup_common(engine); 1697 intel_engine_cleanup_common(engine);
1702 1698
1703 if (engine->status_page.obj) { 1699 if (engine->status_page.vma) {
1704 i915_gem_object_unpin_map(engine->status_page.obj); 1700 i915_gem_object_unpin_map(engine->status_page.vma->obj);
1705 engine->status_page.obj = NULL; 1701 engine->status_page.vma = NULL;
1706 } 1702 }
1707 intel_lr_context_unpin(dev_priv->kernel_context, engine); 1703 intel_lr_context_unpin(dev_priv->kernel_context, engine);
1708 1704
@@ -1747,19 +1743,19 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
1747} 1743}
1748 1744
1749static int 1745static int
1750lrc_setup_hws(struct intel_engine_cs *engine, 1746lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
1751 struct drm_i915_gem_object *dctx_obj)
1752{ 1747{
1748 const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
1753 void *hws; 1749 void *hws;
1754 1750
1755 /* The HWSP is part of the default context object in LRC mode. */ 1751 /* The HWSP is part of the default context object in LRC mode. */
1756 engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + 1752 hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1757 LRC_PPHWSP_PN * PAGE_SIZE;
1758 hws = i915_gem_object_pin_map(dctx_obj);
1759 if (IS_ERR(hws)) 1753 if (IS_ERR(hws))
1760 return PTR_ERR(hws); 1754 return PTR_ERR(hws);
1761 engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; 1755
1762 engine->status_page.obj = dctx_obj; 1756 engine->status_page.page_addr = hws + hws_offset;
1757 engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
1758 engine->status_page.vma = vma;
1763 1759
1764 return 0; 1760 return 0;
1765} 1761}
@@ -1849,11 +1845,10 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
1849 else 1845 else
1850 engine->init_hw = gen8_init_render_ring; 1846 engine->init_hw = gen8_init_render_ring;
1851 engine->init_context = gen8_init_rcs_context; 1847 engine->init_context = gen8_init_rcs_context;
1852 engine->cleanup = intel_fini_pipe_control;
1853 engine->emit_flush = gen8_emit_flush_render; 1848 engine->emit_flush = gen8_emit_flush_render;
1854 engine->emit_request = gen8_emit_request_render; 1849 engine->emit_request = gen8_emit_request_render;
1855 1850
1856 ret = intel_init_pipe_control(engine, 4096); 1851 ret = intel_engine_create_scratch(engine, 4096);
1857 if (ret) 1852 if (ret)
1858 return ret; 1853 return ret;
1859 1854
@@ -1968,7 +1963,7 @@ populate_lr_context(struct i915_gem_context *ctx,
1968 return ret; 1963 return ret;
1969 } 1964 }
1970 1965
1971 vaddr = i915_gem_object_pin_map(ctx_obj); 1966 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
1972 if (IS_ERR(vaddr)) { 1967 if (IS_ERR(vaddr)) {
1973 ret = PTR_ERR(vaddr); 1968 ret = PTR_ERR(vaddr);
1974 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); 1969 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
@@ -2025,9 +2020,9 @@ populate_lr_context(struct i915_gem_context *ctx,
2025 RING_INDIRECT_CTX(engine->mmio_base), 0); 2020 RING_INDIRECT_CTX(engine->mmio_base), 0);
2026 ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, 2021 ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET,
2027 RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); 2022 RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0);
2028 if (engine->wa_ctx.obj) { 2023 if (engine->wa_ctx.vma) {
2029 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 2024 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
2030 uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj); 2025 u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
2031 2026
2032 reg_state[CTX_RCS_INDIRECT_CTX+1] = 2027 reg_state[CTX_RCS_INDIRECT_CTX+1] =
2033 (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | 2028 (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
@@ -2131,6 +2126,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
2131{ 2126{
2132 struct drm_i915_gem_object *ctx_obj; 2127 struct drm_i915_gem_object *ctx_obj;
2133 struct intel_context *ce = &ctx->engine[engine->id]; 2128 struct intel_context *ce = &ctx->engine[engine->id];
2129 struct i915_vma *vma;
2134 uint32_t context_size; 2130 uint32_t context_size;
2135 struct intel_ring *ring; 2131 struct intel_ring *ring;
2136 int ret; 2132 int ret;
@@ -2148,6 +2144,12 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
2148 return PTR_ERR(ctx_obj); 2144 return PTR_ERR(ctx_obj);
2149 } 2145 }
2150 2146
2147 vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL);
2148 if (IS_ERR(vma)) {
2149 ret = PTR_ERR(vma);
2150 goto error_deref_obj;
2151 }
2152
2151 ring = intel_engine_create_ring(engine, ctx->ring_size); 2153 ring = intel_engine_create_ring(engine, ctx->ring_size);
2152 if (IS_ERR(ring)) { 2154 if (IS_ERR(ring)) {
2153 ret = PTR_ERR(ring); 2155 ret = PTR_ERR(ring);
@@ -2161,7 +2163,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
2161 } 2163 }
2162 2164
2163 ce->ring = ring; 2165 ce->ring = ring;
2164 ce->state = ctx_obj; 2166 ce->state = vma;
2165 ce->initialised = engine->init_context == NULL; 2167 ce->initialised = engine->init_context == NULL;
2166 2168
2167 return 0; 2169 return 0;
@@ -2170,8 +2172,6 @@ error_ring_free:
2170 intel_ring_free(ring); 2172 intel_ring_free(ring);
2171error_deref_obj: 2173error_deref_obj:
2172 i915_gem_object_put(ctx_obj); 2174 i915_gem_object_put(ctx_obj);
2173 ce->ring = NULL;
2174 ce->state = NULL;
2175 return ret; 2175 return ret;
2176} 2176}
2177 2177
@@ -2182,24 +2182,23 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
2182 2182
2183 for_each_engine(engine, dev_priv) { 2183 for_each_engine(engine, dev_priv) {
2184 struct intel_context *ce = &ctx->engine[engine->id]; 2184 struct intel_context *ce = &ctx->engine[engine->id];
2185 struct drm_i915_gem_object *ctx_obj = ce->state;
2186 void *vaddr; 2185 void *vaddr;
2187 uint32_t *reg_state; 2186 uint32_t *reg_state;
2188 2187
2189 if (!ctx_obj) 2188 if (!ce->state)
2190 continue; 2189 continue;
2191 2190
2192 vaddr = i915_gem_object_pin_map(ctx_obj); 2191 vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
2193 if (WARN_ON(IS_ERR(vaddr))) 2192 if (WARN_ON(IS_ERR(vaddr)))
2194 continue; 2193 continue;
2195 2194
2196 reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; 2195 reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2197 ctx_obj->dirty = true;
2198 2196
2199 reg_state[CTX_RING_HEAD+1] = 0; 2197 reg_state[CTX_RING_HEAD+1] = 0;
2200 reg_state[CTX_RING_TAIL+1] = 0; 2198 reg_state[CTX_RING_TAIL+1] = 0;
2201 2199
2202 i915_gem_object_unpin_map(ctx_obj); 2200 ce->state->obj->dirty = true;
2201 i915_gem_object_unpin_map(ce->state->obj);
2203 2202
2204 ce->ring->head = 0; 2203 ce->ring->head = 0;
2205 ce->ring->tail = 0; 2204 ce->ring->tail = 0;
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index e29f3d12fd1c..52d6ed6f6966 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -48,6 +48,20 @@ struct intel_lvds_connector {
48 struct notifier_block lid_notifier; 48 struct notifier_block lid_notifier;
49}; 49};
50 50
51struct intel_lvds_pps {
52 /* 100us units */
53 int t1_t2;
54 int t3;
55 int t4;
56 int t5;
57 int tx;
58
59 int divider;
60
61 int port;
62 bool powerdown_on_reset;
63};
64
51struct intel_lvds_encoder { 65struct intel_lvds_encoder {
52 struct intel_encoder base; 66 struct intel_encoder base;
53 67
@@ -55,6 +69,9 @@ struct intel_lvds_encoder {
55 i915_reg_t reg; 69 i915_reg_t reg;
56 u32 a3_power; 70 u32 a3_power;
57 71
72 struct intel_lvds_pps init_pps;
73 u32 init_lvds_val;
74
58 struct intel_lvds_connector *attached_connector; 75 struct intel_lvds_connector *attached_connector;
59}; 76};
60 77
@@ -136,6 +153,83 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
136 pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; 153 pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
137} 154}
138 155
156static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
157 struct intel_lvds_pps *pps)
158{
159 u32 val;
160
161 pps->powerdown_on_reset = I915_READ(PP_CONTROL(0)) & PANEL_POWER_RESET;
162
163 val = I915_READ(PP_ON_DELAYS(0));
164 pps->port = (val & PANEL_PORT_SELECT_MASK) >>
165 PANEL_PORT_SELECT_SHIFT;
166 pps->t1_t2 = (val & PANEL_POWER_UP_DELAY_MASK) >>
167 PANEL_POWER_UP_DELAY_SHIFT;
168 pps->t5 = (val & PANEL_LIGHT_ON_DELAY_MASK) >>
169 PANEL_LIGHT_ON_DELAY_SHIFT;
170
171 val = I915_READ(PP_OFF_DELAYS(0));
172 pps->t3 = (val & PANEL_POWER_DOWN_DELAY_MASK) >>
173 PANEL_POWER_DOWN_DELAY_SHIFT;
174 pps->tx = (val & PANEL_LIGHT_OFF_DELAY_MASK) >>
175 PANEL_LIGHT_OFF_DELAY_SHIFT;
176
177 val = I915_READ(PP_DIVISOR(0));
178 pps->divider = (val & PP_REFERENCE_DIVIDER_MASK) >>
179 PP_REFERENCE_DIVIDER_SHIFT;
180 val = (val & PANEL_POWER_CYCLE_DELAY_MASK) >>
181 PANEL_POWER_CYCLE_DELAY_SHIFT;
182 /*
183 * Remove the BSpec specified +1 (100ms) offset that accounts for a
184 * too short power-cycle delay due to the asynchronous programming of
185 * the register.
186 */
187 if (val)
188 val--;
189 /* Convert from 100ms to 100us units */
190 pps->t4 = val * 1000;
191
192 if (INTEL_INFO(dev_priv)->gen <= 4 &&
193 pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) {
194 DRM_DEBUG_KMS("Panel power timings uninitialized, "
195 "setting defaults\n");
196 /* Set T2 to 40ms and T5 to 200ms in 100 usec units */
197 pps->t1_t2 = 40 * 10;
198 pps->t5 = 200 * 10;
199 /* Set T3 to 35ms and Tx to 200ms in 100 usec units */
200 pps->t3 = 35 * 10;
201 pps->tx = 200 * 10;
202 }
203
204 DRM_DEBUG_DRIVER("LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d "
205 "divider %d port %d powerdown_on_reset %d\n",
206 pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx,
207 pps->divider, pps->port, pps->powerdown_on_reset);
208}
209
210static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv,
211 struct intel_lvds_pps *pps)
212{
213 u32 val;
214
215 val = I915_READ(PP_CONTROL(0));
216 WARN_ON((val & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS);
217 if (pps->powerdown_on_reset)
218 val |= PANEL_POWER_RESET;
219 I915_WRITE(PP_CONTROL(0), val);
220
221 I915_WRITE(PP_ON_DELAYS(0), (pps->port << PANEL_PORT_SELECT_SHIFT) |
222 (pps->t1_t2 << PANEL_POWER_UP_DELAY_SHIFT) |
223 (pps->t5 << PANEL_LIGHT_ON_DELAY_SHIFT));
224 I915_WRITE(PP_OFF_DELAYS(0), (pps->t3 << PANEL_POWER_DOWN_DELAY_SHIFT) |
225 (pps->tx << PANEL_LIGHT_OFF_DELAY_SHIFT));
226
227 val = pps->divider << PP_REFERENCE_DIVIDER_SHIFT;
228 val |= (DIV_ROUND_UP(pps->t4, 1000) + 1) <<
229 PANEL_POWER_CYCLE_DELAY_SHIFT;
230 I915_WRITE(PP_DIVISOR(0), val);
231}
232
139static void intel_pre_enable_lvds(struct intel_encoder *encoder) 233static void intel_pre_enable_lvds(struct intel_encoder *encoder)
140{ 234{
141 struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); 235 struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
@@ -154,7 +248,9 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder)
154 assert_pll_disabled(dev_priv, pipe); 248 assert_pll_disabled(dev_priv, pipe);
155 } 249 }
156 250
157 temp = I915_READ(lvds_encoder->reg); 251 intel_lvds_pps_init_hw(dev_priv, &lvds_encoder->init_pps);
252
253 temp = lvds_encoder->init_lvds_val;
158 temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; 254 temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
159 255
160 if (HAS_PCH_CPT(dev)) { 256 if (HAS_PCH_CPT(dev)) {
@@ -217,21 +313,12 @@ static void intel_enable_lvds(struct intel_encoder *encoder)
217 struct intel_connector *intel_connector = 313 struct intel_connector *intel_connector =
218 &lvds_encoder->attached_connector->base; 314 &lvds_encoder->attached_connector->base;
219 struct drm_i915_private *dev_priv = to_i915(dev); 315 struct drm_i915_private *dev_priv = to_i915(dev);
220 i915_reg_t ctl_reg, stat_reg;
221
222 if (HAS_PCH_SPLIT(dev)) {
223 ctl_reg = PCH_PP_CONTROL;
224 stat_reg = PCH_PP_STATUS;
225 } else {
226 ctl_reg = PP_CONTROL;
227 stat_reg = PP_STATUS;
228 }
229 316
230 I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) | LVDS_PORT_EN); 317 I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) | LVDS_PORT_EN);
231 318
232 I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); 319 I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON);
233 POSTING_READ(lvds_encoder->reg); 320 POSTING_READ(lvds_encoder->reg);
234 if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, PP_ON, 1000)) 321 if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000))
235 DRM_ERROR("timed out waiting for panel to power on\n"); 322 DRM_ERROR("timed out waiting for panel to power on\n");
236 323
237 intel_panel_enable_backlight(intel_connector); 324 intel_panel_enable_backlight(intel_connector);
@@ -242,18 +329,9 @@ static void intel_disable_lvds(struct intel_encoder *encoder)
242 struct drm_device *dev = encoder->base.dev; 329 struct drm_device *dev = encoder->base.dev;
243 struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); 330 struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
244 struct drm_i915_private *dev_priv = to_i915(dev); 331 struct drm_i915_private *dev_priv = to_i915(dev);
245 i915_reg_t ctl_reg, stat_reg;
246
247 if (HAS_PCH_SPLIT(dev)) {
248 ctl_reg = PCH_PP_CONTROL;
249 stat_reg = PCH_PP_STATUS;
250 } else {
251 ctl_reg = PP_CONTROL;
252 stat_reg = PP_STATUS;
253 }
254 332
255 I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); 333 I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) & ~PANEL_POWER_ON);
256 if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, 0, 1000)) 334 if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, 0, 1000))
257 DRM_ERROR("timed out waiting for panel to power off\n"); 335 DRM_ERROR("timed out waiting for panel to power off\n");
258 336
259 I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) & ~LVDS_PORT_EN); 337 I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) & ~LVDS_PORT_EN);
@@ -900,17 +978,6 @@ void intel_lvds_init(struct drm_device *dev)
900 int pipe; 978 int pipe;
901 u8 pin; 979 u8 pin;
902 980
903 /*
904 * Unlock registers and just leave them unlocked. Do this before
905 * checking quirk lists to avoid bogus WARNINGs.
906 */
907 if (HAS_PCH_SPLIT(dev)) {
908 I915_WRITE(PCH_PP_CONTROL,
909 I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS);
910 } else if (INTEL_INFO(dev_priv)->gen < 5) {
911 I915_WRITE(PP_CONTROL,
912 I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
913 }
914 if (!intel_lvds_supported(dev)) 981 if (!intel_lvds_supported(dev))
915 return; 982 return;
916 983
@@ -943,18 +1010,6 @@ void intel_lvds_init(struct drm_device *dev)
943 DRM_DEBUG_KMS("LVDS is not present in VBT, but enabled anyway\n"); 1010 DRM_DEBUG_KMS("LVDS is not present in VBT, but enabled anyway\n");
944 } 1011 }
945 1012
946 /* Set the Panel Power On/Off timings if uninitialized. */
947 if (INTEL_INFO(dev_priv)->gen < 5 &&
948 I915_READ(PP_ON_DELAYS) == 0 && I915_READ(PP_OFF_DELAYS) == 0) {
949 /* Set T2 to 40ms and T5 to 200ms */
950 I915_WRITE(PP_ON_DELAYS, 0x019007d0);
951
952 /* Set T3 to 35ms and Tx to 200ms */
953 I915_WRITE(PP_OFF_DELAYS, 0x015e07d0);
954
955 DRM_DEBUG_KMS("Panel power timings uninitialized, setting defaults\n");
956 }
957
958 lvds_encoder = kzalloc(sizeof(*lvds_encoder), GFP_KERNEL); 1013 lvds_encoder = kzalloc(sizeof(*lvds_encoder), GFP_KERNEL);
959 if (!lvds_encoder) 1014 if (!lvds_encoder)
960 return; 1015 return;
@@ -1020,6 +1075,10 @@ void intel_lvds_init(struct drm_device *dev)
1020 dev->mode_config.scaling_mode_property, 1075 dev->mode_config.scaling_mode_property,
1021 DRM_MODE_SCALE_ASPECT); 1076 DRM_MODE_SCALE_ASPECT);
1022 intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT; 1077 intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT;
1078
1079 intel_lvds_pps_get_hw_state(dev_priv, &lvds_encoder->init_pps);
1080 lvds_encoder->init_lvds_val = lvds;
1081
1023 /* 1082 /*
1024 * LVDS discovery: 1083 * LVDS discovery:
1025 * 1) check for EDID on DDC 1084 * 1) check for EDID on DDC
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 90f3ab424e01..a24bc8c7889f 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -171,8 +171,8 @@ struct overlay_registers {
171struct intel_overlay { 171struct intel_overlay {
172 struct drm_i915_private *i915; 172 struct drm_i915_private *i915;
173 struct intel_crtc *crtc; 173 struct intel_crtc *crtc;
174 struct drm_i915_gem_object *vid_bo; 174 struct i915_vma *vma;
175 struct drm_i915_gem_object *old_vid_bo; 175 struct i915_vma *old_vma;
176 bool active; 176 bool active;
177 bool pfit_active; 177 bool pfit_active;
178 u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */ 178 u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
@@ -196,7 +196,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay)
196 if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) 196 if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
197 regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr; 197 regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
198 else 198 else
199 regs = io_mapping_map_wc(dev_priv->ggtt.mappable, 199 regs = io_mapping_map_wc(&dev_priv->ggtt.mappable,
200 overlay->flip_addr, 200 overlay->flip_addr,
201 PAGE_SIZE); 201 PAGE_SIZE);
202 202
@@ -317,15 +317,17 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
317{ 317{
318 struct intel_overlay *overlay = 318 struct intel_overlay *overlay =
319 container_of(active, typeof(*overlay), last_flip); 319 container_of(active, typeof(*overlay), last_flip);
320 struct drm_i915_gem_object *obj = overlay->old_vid_bo; 320 struct i915_vma *vma;
321 321
322 i915_gem_track_fb(obj, NULL, 322 vma = fetch_and_zero(&overlay->old_vma);
323 INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); 323 if (WARN_ON(!vma))
324 return;
324 325
325 i915_gem_object_ggtt_unpin(obj); 326 i915_gem_track_fb(vma->obj, NULL,
326 i915_gem_object_put(obj); 327 INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
327 328
328 overlay->old_vid_bo = NULL; 329 i915_gem_object_unpin_from_display_plane(vma);
330 i915_vma_put(vma);
329} 331}
330 332
331static void intel_overlay_off_tail(struct i915_gem_active *active, 333static void intel_overlay_off_tail(struct i915_gem_active *active,
@@ -333,15 +335,15 @@ static void intel_overlay_off_tail(struct i915_gem_active *active,
333{ 335{
334 struct intel_overlay *overlay = 336 struct intel_overlay *overlay =
335 container_of(active, typeof(*overlay), last_flip); 337 container_of(active, typeof(*overlay), last_flip);
336 struct drm_i915_gem_object *obj = overlay->vid_bo; 338 struct i915_vma *vma;
337 339
338 /* never have the overlay hw on without showing a frame */ 340 /* never have the overlay hw on without showing a frame */
339 if (WARN_ON(!obj)) 341 vma = fetch_and_zero(&overlay->vma);
342 if (WARN_ON(!vma))
340 return; 343 return;
341 344
342 i915_gem_object_ggtt_unpin(obj); 345 i915_gem_object_unpin_from_display_plane(vma);
343 i915_gem_object_put(obj); 346 i915_vma_put(vma);
344 overlay->vid_bo = NULL;
345 347
346 overlay->crtc->overlay = NULL; 348 overlay->crtc->overlay = NULL;
347 overlay->crtc = NULL; 349 overlay->crtc = NULL;
@@ -421,7 +423,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
421 /* Only wait if there is actually an old frame to release to 423 /* Only wait if there is actually an old frame to release to
422 * guarantee forward progress. 424 * guarantee forward progress.
423 */ 425 */
424 if (!overlay->old_vid_bo) 426 if (!overlay->old_vma)
425 return 0; 427 return 0;
426 428
427 if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { 429 if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
@@ -744,6 +746,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
744 struct drm_i915_private *dev_priv = overlay->i915; 746 struct drm_i915_private *dev_priv = overlay->i915;
745 u32 swidth, swidthsw, sheight, ostride; 747 u32 swidth, swidthsw, sheight, ostride;
746 enum pipe pipe = overlay->crtc->pipe; 748 enum pipe pipe = overlay->crtc->pipe;
749 struct i915_vma *vma;
747 750
748 lockdep_assert_held(&dev_priv->drm.struct_mutex); 751 lockdep_assert_held(&dev_priv->drm.struct_mutex);
749 WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); 752 WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
@@ -752,12 +755,12 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
752 if (ret != 0) 755 if (ret != 0)
753 return ret; 756 return ret;
754 757
755 ret = i915_gem_object_pin_to_display_plane(new_bo, 0, 758 vma = i915_gem_object_pin_to_display_plane(new_bo, 0,
756 &i915_ggtt_view_normal); 759 &i915_ggtt_view_normal);
757 if (ret != 0) 760 if (IS_ERR(vma))
758 return ret; 761 return PTR_ERR(vma);
759 762
760 ret = i915_gem_object_put_fence(new_bo); 763 ret = i915_vma_put_fence(vma);
761 if (ret) 764 if (ret)
762 goto out_unpin; 765 goto out_unpin;
763 766
@@ -798,7 +801,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
798 swidth = params->src_w; 801 swidth = params->src_w;
799 swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width); 802 swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
800 sheight = params->src_h; 803 sheight = params->src_h;
801 iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y); 804 iowrite32(i915_ggtt_offset(vma) + params->offset_Y, &regs->OBUF_0Y);
802 ostride = params->stride_Y; 805 ostride = params->stride_Y;
803 806
804 if (params->format & I915_OVERLAY_YUV_PLANAR) { 807 if (params->format & I915_OVERLAY_YUV_PLANAR) {
@@ -812,8 +815,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
812 params->src_w/uv_hscale); 815 params->src_w/uv_hscale);
813 swidthsw |= max_t(u32, tmp_U, tmp_V) << 16; 816 swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
814 sheight |= (params->src_h/uv_vscale) << 16; 817 sheight |= (params->src_h/uv_vscale) << 16;
815 iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U); 818 iowrite32(i915_ggtt_offset(vma) + params->offset_U,
816 iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V); 819 &regs->OBUF_0U);
820 iowrite32(i915_ggtt_offset(vma) + params->offset_V,
821 &regs->OBUF_0V);
817 ostride |= params->stride_UV << 16; 822 ostride |= params->stride_UV << 16;
818 } 823 }
819 824
@@ -834,18 +839,18 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
834 if (ret) 839 if (ret)
835 goto out_unpin; 840 goto out_unpin;
836 841
837 i915_gem_track_fb(overlay->vid_bo, new_bo, 842 i915_gem_track_fb(overlay->vma->obj, new_bo,
838 INTEL_FRONTBUFFER_OVERLAY(pipe)); 843 INTEL_FRONTBUFFER_OVERLAY(pipe));
839 844
840 overlay->old_vid_bo = overlay->vid_bo; 845 overlay->old_vma = overlay->vma;
841 overlay->vid_bo = new_bo; 846 overlay->vma = vma;
842 847
843 intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe)); 848 intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
844 849
845 return 0; 850 return 0;
846 851
847out_unpin: 852out_unpin:
848 i915_gem_object_ggtt_unpin(new_bo); 853 i915_gem_object_unpin_from_display_plane(vma);
849 return ret; 854 return ret;
850} 855}
851 856
@@ -1368,6 +1373,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
1368 struct intel_overlay *overlay; 1373 struct intel_overlay *overlay;
1369 struct drm_i915_gem_object *reg_bo; 1374 struct drm_i915_gem_object *reg_bo;
1370 struct overlay_registers __iomem *regs; 1375 struct overlay_registers __iomem *regs;
1376 struct i915_vma *vma = NULL;
1371 int ret; 1377 int ret;
1372 1378
1373 if (!HAS_OVERLAY(dev_priv)) 1379 if (!HAS_OVERLAY(dev_priv))
@@ -1401,13 +1407,14 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
1401 } 1407 }
1402 overlay->flip_addr = reg_bo->phys_handle->busaddr; 1408 overlay->flip_addr = reg_bo->phys_handle->busaddr;
1403 } else { 1409 } else {
1404 ret = i915_gem_object_ggtt_pin(reg_bo, NULL, 1410 vma = i915_gem_object_ggtt_pin(reg_bo, NULL,
1405 0, PAGE_SIZE, PIN_MAPPABLE); 1411 0, PAGE_SIZE, PIN_MAPPABLE);
1406 if (ret) { 1412 if (IS_ERR(vma)) {
1407 DRM_ERROR("failed to pin overlay register bo\n"); 1413 DRM_ERROR("failed to pin overlay register bo\n");
1414 ret = PTR_ERR(vma);
1408 goto out_free_bo; 1415 goto out_free_bo;
1409 } 1416 }
1410 overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo); 1417 overlay->flip_addr = i915_ggtt_offset(vma);
1411 1418
1412 ret = i915_gem_object_set_to_gtt_domain(reg_bo, true); 1419 ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1413 if (ret) { 1420 if (ret) {
@@ -1439,8 +1446,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
1439 return; 1446 return;
1440 1447
1441out_unpin_bo: 1448out_unpin_bo:
1442 if (!OVERLAY_NEEDS_PHYSICAL(dev_priv)) 1449 if (vma)
1443 i915_gem_object_ggtt_unpin(reg_bo); 1450 i915_vma_unpin(vma);
1444out_free_bo: 1451out_free_bo:
1445 i915_gem_object_put(reg_bo); 1452 i915_gem_object_put(reg_bo);
1446out_free: 1453out_free:
@@ -1482,7 +1489,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1482 regs = (struct overlay_registers __iomem *) 1489 regs = (struct overlay_registers __iomem *)
1483 overlay->reg_bo->phys_handle->vaddr; 1490 overlay->reg_bo->phys_handle->vaddr;
1484 else 1491 else
1485 regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable, 1492 regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable,
1486 overlay->flip_addr); 1493 overlay->flip_addr);
1487 1494
1488 return regs; 1495 return regs;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 81ab11934d85..8a6751e14ab9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3115,8 +3115,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate)
3115 total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; 3115 total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id];
3116 } 3116 }
3117 3117
3118 WARN_ON(cstate->plane_mask && total_data_rate == 0);
3119
3120 return total_data_rate; 3118 return total_data_rate;
3121} 3119}
3122 3120
@@ -3920,9 +3918,24 @@ skl_compute_ddb(struct drm_atomic_state *state)
3920 * pretend that all pipes switched active status so that we'll 3918 * pretend that all pipes switched active status so that we'll
3921 * ensure a full DDB recompute. 3919 * ensure a full DDB recompute.
3922 */ 3920 */
3923 if (dev_priv->wm.distrust_bios_wm) 3921 if (dev_priv->wm.distrust_bios_wm) {
3922 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
3923 state->acquire_ctx);
3924 if (ret)
3925 return ret;
3926
3924 intel_state->active_pipe_changes = ~0; 3927 intel_state->active_pipe_changes = ~0;
3925 3928
3929 /*
3930 * We usually only initialize intel_state->active_crtcs if we
3931 * we're doing a modeset; make sure this field is always
3932 * initialized during the sanitization process that happens
3933 * on the first commit too.
3934 */
3935 if (!intel_state->modeset)
3936 intel_state->active_crtcs = dev_priv->active_crtcs;
3937 }
3938
3926 /* 3939 /*
3927 * If the modeset changes which CRTC's are active, we need to 3940 * If the modeset changes which CRTC's are active, we need to
3928 * recompute the DDB allocation for *all* active pipes, even 3941 * recompute the DDB allocation for *all* active pipes, even
@@ -5675,8 +5688,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
5675 u32 pcbr; 5688 u32 pcbr;
5676 int pctx_size = 24*1024; 5689 int pctx_size = 24*1024;
5677 5690
5678 mutex_lock(&dev_priv->drm.struct_mutex);
5679
5680 pcbr = I915_READ(VLV_PCBR); 5691 pcbr = I915_READ(VLV_PCBR);
5681 if (pcbr) { 5692 if (pcbr) {
5682 /* BIOS set it up already, grab the pre-alloc'd space */ 5693 /* BIOS set it up already, grab the pre-alloc'd space */
@@ -5712,7 +5723,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
5712out: 5723out:
5713 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5724 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5714 dev_priv->vlv_pctx = pctx; 5725 dev_priv->vlv_pctx = pctx;
5715 mutex_unlock(&dev_priv->drm.struct_mutex);
5716} 5726}
5717 5727
5718static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) 5728static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
@@ -6488,6 +6498,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
6488 intel_runtime_pm_get(dev_priv); 6498 intel_runtime_pm_get(dev_priv);
6489 } 6499 }
6490 6500
6501 mutex_lock(&dev_priv->drm.struct_mutex);
6491 mutex_lock(&dev_priv->rps.hw_lock); 6502 mutex_lock(&dev_priv->rps.hw_lock);
6492 6503
6493 /* Initialize RPS limits (for userspace) */ 6504 /* Initialize RPS limits (for userspace) */
@@ -6529,6 +6540,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
6529 dev_priv->rps.boost_freq = dev_priv->rps.max_freq; 6540 dev_priv->rps.boost_freq = dev_priv->rps.max_freq;
6530 6541
6531 mutex_unlock(&dev_priv->rps.hw_lock); 6542 mutex_unlock(&dev_priv->rps.hw_lock);
6543 mutex_unlock(&dev_priv->drm.struct_mutex);
6532 6544
6533 intel_autoenable_gt_powersave(dev_priv); 6545 intel_autoenable_gt_powersave(dev_priv);
6534} 6546}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e08a1e1b04e4..cc5bcd14b6df 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -176,7 +176,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
176{ 176{
177 struct intel_ring *ring = req->ring; 177 struct intel_ring *ring = req->ring;
178 u32 scratch_addr = 178 u32 scratch_addr =
179 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 179 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
180 int ret; 180 int ret;
181 181
182 ret = intel_ring_begin(req, 6); 182 ret = intel_ring_begin(req, 6);
@@ -212,7 +212,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
212{ 212{
213 struct intel_ring *ring = req->ring; 213 struct intel_ring *ring = req->ring;
214 u32 scratch_addr = 214 u32 scratch_addr =
215 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 215 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
216 u32 flags = 0; 216 u32 flags = 0;
217 int ret; 217 int ret;
218 218
@@ -286,7 +286,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
286{ 286{
287 struct intel_ring *ring = req->ring; 287 struct intel_ring *ring = req->ring;
288 u32 scratch_addr = 288 u32 scratch_addr =
289 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 289 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
290 u32 flags = 0; 290 u32 flags = 0;
291 int ret; 291 int ret;
292 292
@@ -370,7 +370,8 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req,
370static int 370static int
371gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) 371gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
372{ 372{
373 u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 373 u32 scratch_addr =
374 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
374 u32 flags = 0; 375 u32 flags = 0;
375 int ret; 376 int ret;
376 377
@@ -466,7 +467,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
466 mmio = RING_HWS_PGA(engine->mmio_base); 467 mmio = RING_HWS_PGA(engine->mmio_base);
467 } 468 }
468 469
469 I915_WRITE(mmio, (u32)engine->status_page.gfx_addr); 470 I915_WRITE(mmio, engine->status_page.ggtt_offset);
470 POSTING_READ(mmio); 471 POSTING_READ(mmio);
471 472
472 /* 473 /*
@@ -497,7 +498,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
497{ 498{
498 struct drm_i915_private *dev_priv = engine->i915; 499 struct drm_i915_private *dev_priv = engine->i915;
499 500
500 if (!IS_GEN2(dev_priv)) { 501 if (INTEL_GEN(dev_priv) > 2) {
501 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); 502 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
502 if (intel_wait_for_register(dev_priv, 503 if (intel_wait_for_register(dev_priv,
503 RING_MI_MODE(engine->mmio_base), 504 RING_MI_MODE(engine->mmio_base),
@@ -519,7 +520,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
519 I915_WRITE_HEAD(engine, 0); 520 I915_WRITE_HEAD(engine, 0);
520 I915_WRITE_TAIL(engine, 0); 521 I915_WRITE_TAIL(engine, 0);
521 522
522 if (!IS_GEN2(dev_priv)) { 523 if (INTEL_GEN(dev_priv) > 2) {
523 (void)I915_READ_CTL(engine); 524 (void)I915_READ_CTL(engine);
524 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); 525 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
525 } 526 }
@@ -531,7 +532,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
531{ 532{
532 struct drm_i915_private *dev_priv = engine->i915; 533 struct drm_i915_private *dev_priv = engine->i915;
533 struct intel_ring *ring = engine->buffer; 534 struct intel_ring *ring = engine->buffer;
534 struct drm_i915_gem_object *obj = ring->obj;
535 int ret = 0; 535 int ret = 0;
536 536
537 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 537 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -571,7 +571,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
571 * registers with the above sequence (the readback of the HEAD registers 571 * registers with the above sequence (the readback of the HEAD registers
572 * also enforces ordering), otherwise the hw might lose the new ring 572 * also enforces ordering), otherwise the hw might lose the new ring
573 * register values. */ 573 * register values. */
574 I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj)); 574 I915_WRITE_START(engine, i915_ggtt_offset(ring->vma));
575 575
576 /* WaClearRingBufHeadRegAtInit:ctg,elk */ 576 /* WaClearRingBufHeadRegAtInit:ctg,elk */
577 if (I915_READ_HEAD(engine)) 577 if (I915_READ_HEAD(engine))
@@ -586,16 +586,16 @@ static int init_ring_common(struct intel_engine_cs *engine)
586 586
587 /* If the head is still not zero, the ring is dead */ 587 /* If the head is still not zero, the ring is dead */
588 if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 && 588 if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
589 I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) && 589 I915_READ_START(engine) == i915_ggtt_offset(ring->vma) &&
590 (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) { 590 (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
591 DRM_ERROR("%s initialization failed " 591 DRM_ERROR("%s initialization failed "
592 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", 592 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
593 engine->name, 593 engine->name,
594 I915_READ_CTL(engine), 594 I915_READ_CTL(engine),
595 I915_READ_CTL(engine) & RING_VALID, 595 I915_READ_CTL(engine) & RING_VALID,
596 I915_READ_HEAD(engine), I915_READ_TAIL(engine), 596 I915_READ_HEAD(engine), I915_READ_TAIL(engine),
597 I915_READ_START(engine), 597 I915_READ_START(engine),
598 (unsigned long)i915_gem_obj_ggtt_offset(obj)); 598 i915_ggtt_offset(ring->vma));
599 ret = -EIO; 599 ret = -EIO;
600 goto out; 600 goto out;
601 } 601 }
@@ -613,48 +613,6 @@ out:
613 return ret; 613 return ret;
614} 614}
615 615
616void intel_fini_pipe_control(struct intel_engine_cs *engine)
617{
618 if (engine->scratch.obj == NULL)
619 return;
620
621 i915_gem_object_ggtt_unpin(engine->scratch.obj);
622 i915_gem_object_put(engine->scratch.obj);
623 engine->scratch.obj = NULL;
624}
625
626int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
627{
628 struct drm_i915_gem_object *obj;
629 int ret;
630
631 WARN_ON(engine->scratch.obj);
632
633 obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
634 if (!obj)
635 obj = i915_gem_object_create(&engine->i915->drm, size);
636 if (IS_ERR(obj)) {
637 DRM_ERROR("Failed to allocate scratch page\n");
638 ret = PTR_ERR(obj);
639 goto err;
640 }
641
642 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
643 if (ret)
644 goto err_unref;
645
646 engine->scratch.obj = obj;
647 engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
648 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
649 engine->name, engine->scratch.gtt_offset);
650 return 0;
651
652err_unref:
653 i915_gem_object_put(engine->scratch.obj);
654err:
655 return ret;
656}
657
658static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) 616static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
659{ 617{
660 struct intel_ring *ring = req->ring; 618 struct intel_ring *ring = req->ring;
@@ -1300,13 +1258,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
1300{ 1258{
1301 struct drm_i915_private *dev_priv = engine->i915; 1259 struct drm_i915_private *dev_priv = engine->i915;
1302 1260
1303 if (dev_priv->semaphore_obj) { 1261 i915_vma_unpin_and_release(&dev_priv->semaphore);
1304 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1305 i915_gem_object_put(dev_priv->semaphore_obj);
1306 dev_priv->semaphore_obj = NULL;
1307 }
1308
1309 intel_fini_pipe_control(engine);
1310} 1262}
1311 1263
1312static int gen8_rcs_signal(struct drm_i915_gem_request *req) 1264static int gen8_rcs_signal(struct drm_i915_gem_request *req)
@@ -1317,7 +1269,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req)
1317 enum intel_engine_id id; 1269 enum intel_engine_id id;
1318 int ret, num_rings; 1270 int ret, num_rings;
1319 1271
1320 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1272 num_rings = INTEL_INFO(dev_priv)->num_rings;
1321 ret = intel_ring_begin(req, (num_rings-1) * 8); 1273 ret = intel_ring_begin(req, (num_rings-1) * 8);
1322 if (ret) 1274 if (ret)
1323 return ret; 1275 return ret;
@@ -1354,7 +1306,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req)
1354 enum intel_engine_id id; 1306 enum intel_engine_id id;
1355 int ret, num_rings; 1307 int ret, num_rings;
1356 1308
1357 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1309 num_rings = INTEL_INFO(dev_priv)->num_rings;
1358 ret = intel_ring_begin(req, (num_rings-1) * 6); 1310 ret = intel_ring_begin(req, (num_rings-1) * 6);
1359 if (ret) 1311 if (ret)
1360 return ret; 1312 return ret;
@@ -1385,18 +1337,21 @@ static int gen6_signal(struct drm_i915_gem_request *req)
1385{ 1337{
1386 struct intel_ring *ring = req->ring; 1338 struct intel_ring *ring = req->ring;
1387 struct drm_i915_private *dev_priv = req->i915; 1339 struct drm_i915_private *dev_priv = req->i915;
1388 struct intel_engine_cs *useless; 1340 struct intel_engine_cs *engine;
1389 enum intel_engine_id id;
1390 int ret, num_rings; 1341 int ret, num_rings;
1391 1342
1392 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1343 num_rings = INTEL_INFO(dev_priv)->num_rings;
1393 ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); 1344 ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2));
1394 if (ret) 1345 if (ret)
1395 return ret; 1346 return ret;
1396 1347
1397 for_each_engine_id(useless, dev_priv, id) { 1348 for_each_engine(engine, dev_priv) {
1398 i915_reg_t mbox_reg = req->engine->semaphore.mbox.signal[id]; 1349 i915_reg_t mbox_reg;
1350
1351 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
1352 continue;
1399 1353
1354 mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
1400 if (i915_mmio_reg_valid(mbox_reg)) { 1355 if (i915_mmio_reg_valid(mbox_reg)) {
1401 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1356 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1402 intel_ring_emit_reg(ring, mbox_reg); 1357 intel_ring_emit_reg(ring, mbox_reg);
@@ -1543,7 +1498,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req,
1543 u32 dw1 = MI_SEMAPHORE_MBOX | 1498 u32 dw1 = MI_SEMAPHORE_MBOX |
1544 MI_SEMAPHORE_COMPARE | 1499 MI_SEMAPHORE_COMPARE |
1545 MI_SEMAPHORE_REGISTER; 1500 MI_SEMAPHORE_REGISTER;
1546 u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->id]; 1501 u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id];
1547 int ret; 1502 int ret;
1548 1503
1549 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 1504 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
@@ -1764,7 +1719,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
1764 unsigned int dispatch_flags) 1719 unsigned int dispatch_flags)
1765{ 1720{
1766 struct intel_ring *ring = req->ring; 1721 struct intel_ring *ring = req->ring;
1767 u32 cs_offset = req->engine->scratch.gtt_offset; 1722 u32 cs_offset = i915_ggtt_offset(req->engine->scratch);
1768 int ret; 1723 int ret;
1769 1724
1770 ret = intel_ring_begin(req, 6); 1725 ret = intel_ring_begin(req, 6);
@@ -1853,79 +1808,79 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
1853 1808
1854static void cleanup_status_page(struct intel_engine_cs *engine) 1809static void cleanup_status_page(struct intel_engine_cs *engine)
1855{ 1810{
1856 struct drm_i915_gem_object *obj; 1811 struct i915_vma *vma;
1857 1812
1858 obj = engine->status_page.obj; 1813 vma = fetch_and_zero(&engine->status_page.vma);
1859 if (obj == NULL) 1814 if (!vma)
1860 return; 1815 return;
1861 1816
1862 kunmap(sg_page(obj->pages->sgl)); 1817 i915_vma_unpin(vma);
1863 i915_gem_object_ggtt_unpin(obj); 1818 i915_gem_object_unpin_map(vma->obj);
1864 i915_gem_object_put(obj); 1819 i915_vma_put(vma);
1865 engine->status_page.obj = NULL;
1866} 1820}
1867 1821
1868static int init_status_page(struct intel_engine_cs *engine) 1822static int init_status_page(struct intel_engine_cs *engine)
1869{ 1823{
1870 struct drm_i915_gem_object *obj = engine->status_page.obj; 1824 struct drm_i915_gem_object *obj;
1871 1825 struct i915_vma *vma;
1872 if (obj == NULL) { 1826 unsigned int flags;
1873 unsigned flags; 1827 int ret;
1874 int ret;
1875 1828
1876 obj = i915_gem_object_create(&engine->i915->drm, 4096); 1829 obj = i915_gem_object_create(&engine->i915->drm, 4096);
1877 if (IS_ERR(obj)) { 1830 if (IS_ERR(obj)) {
1878 DRM_ERROR("Failed to allocate status page\n"); 1831 DRM_ERROR("Failed to allocate status page\n");
1879 return PTR_ERR(obj); 1832 return PTR_ERR(obj);
1880 } 1833 }
1881 1834
1882 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1835 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1883 if (ret) 1836 if (ret)
1884 goto err_unref; 1837 goto err;
1885
1886 flags = 0;
1887 if (!HAS_LLC(engine->i915))
1888 /* On g33, we cannot place HWS above 256MiB, so
1889 * restrict its pinning to the low mappable arena.
1890 * Though this restriction is not documented for
1891 * gen4, gen5, or byt, they also behave similarly
1892 * and hang if the HWS is placed at the top of the
1893 * GTT. To generalise, it appears that all !llc
1894 * platforms have issues with us placing the HWS
1895 * above the mappable region (even though we never
1896 * actualy map it).
1897 */
1898 flags |= PIN_MAPPABLE;
1899 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
1900 if (ret) {
1901err_unref:
1902 i915_gem_object_put(obj);
1903 return ret;
1904 }
1905 1838
1906 engine->status_page.obj = obj; 1839 vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
1840 if (IS_ERR(vma)) {
1841 ret = PTR_ERR(vma);
1842 goto err;
1907 } 1843 }
1908 1844
1909 engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); 1845 flags = PIN_GLOBAL;
1910 engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); 1846 if (!HAS_LLC(engine->i915))
1911 memset(engine->status_page.page_addr, 0, PAGE_SIZE); 1847 /* On g33, we cannot place HWS above 256MiB, so
1848 * restrict its pinning to the low mappable arena.
1849 * Though this restriction is not documented for
1850 * gen4, gen5, or byt, they also behave similarly
1851 * and hang if the HWS is placed at the top of the
1852 * GTT. To generalise, it appears that all !llc
1853 * platforms have issues with us placing the HWS
1854 * above the mappable region (even though we never
1855 * actualy map it).
1856 */
1857 flags |= PIN_MAPPABLE;
1858 ret = i915_vma_pin(vma, 0, 4096, flags);
1859 if (ret)
1860 goto err;
1912 1861
1913 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1862 engine->status_page.vma = vma;
1914 engine->name, engine->status_page.gfx_addr); 1863 engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
1864 engine->status_page.page_addr =
1865 i915_gem_object_pin_map(obj, I915_MAP_WB);
1915 1866
1867 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1868 engine->name, i915_ggtt_offset(vma));
1916 return 0; 1869 return 0;
1870
1871err:
1872 i915_gem_object_put(obj);
1873 return ret;
1917} 1874}
1918 1875
1919static int init_phys_status_page(struct intel_engine_cs *engine) 1876static int init_phys_status_page(struct intel_engine_cs *engine)
1920{ 1877{
1921 struct drm_i915_private *dev_priv = engine->i915; 1878 struct drm_i915_private *dev_priv = engine->i915;
1922 1879
1923 if (!dev_priv->status_page_dmah) { 1880 dev_priv->status_page_dmah =
1924 dev_priv->status_page_dmah = 1881 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
1925 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); 1882 if (!dev_priv->status_page_dmah)
1926 if (!dev_priv->status_page_dmah) 1883 return -ENOMEM;
1927 return -ENOMEM;
1928 }
1929 1884
1930 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1885 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1931 memset(engine->status_page.page_addr, 0, PAGE_SIZE); 1886 memset(engine->status_page.page_addr, 0, PAGE_SIZE);
@@ -1935,55 +1890,46 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
1935 1890
1936int intel_ring_pin(struct intel_ring *ring) 1891int intel_ring_pin(struct intel_ring *ring)
1937{ 1892{
1938 struct drm_i915_private *dev_priv = ring->engine->i915;
1939 struct drm_i915_gem_object *obj = ring->obj;
1940 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 1893 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
1941 unsigned flags = PIN_OFFSET_BIAS | 4096; 1894 unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096;
1895 enum i915_map_type map;
1896 struct i915_vma *vma = ring->vma;
1942 void *addr; 1897 void *addr;
1943 int ret; 1898 int ret;
1944 1899
1945 if (HAS_LLC(dev_priv) && !obj->stolen) { 1900 GEM_BUG_ON(ring->vaddr);
1946 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
1947 if (ret)
1948 return ret;
1949 1901
1950 ret = i915_gem_object_set_to_cpu_domain(obj, true); 1902 map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
1951 if (ret)
1952 goto err_unpin;
1953 1903
1954 addr = i915_gem_object_pin_map(obj); 1904 if (vma->obj->stolen)
1955 if (IS_ERR(addr)) { 1905 flags |= PIN_MAPPABLE;
1956 ret = PTR_ERR(addr);
1957 goto err_unpin;
1958 }
1959 } else {
1960 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
1961 flags | PIN_MAPPABLE);
1962 if (ret)
1963 return ret;
1964 1906
1965 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1907 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
1966 if (ret) 1908 if (flags & PIN_MAPPABLE || map == I915_MAP_WC)
1967 goto err_unpin; 1909 ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
1910 else
1911 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
1912 if (unlikely(ret))
1913 return ret;
1914 }
1968 1915
1969 /* Access through the GTT requires the device to be awake. */ 1916 ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags);
1970 assert_rpm_wakelock_held(dev_priv); 1917 if (unlikely(ret))
1918 return ret;
1971 1919
1972 addr = (void __force *) 1920 if (i915_vma_is_map_and_fenceable(vma))
1973 i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); 1921 addr = (void __force *)i915_vma_pin_iomap(vma);
1974 if (IS_ERR(addr)) { 1922 else
1975 ret = PTR_ERR(addr); 1923 addr = i915_gem_object_pin_map(vma->obj, map);
1976 goto err_unpin; 1924 if (IS_ERR(addr))
1977 } 1925 goto err;
1978 }
1979 1926
1980 ring->vaddr = addr; 1927 ring->vaddr = addr;
1981 ring->vma = i915_gem_obj_to_ggtt(obj);
1982 return 0; 1928 return 0;
1983 1929
1984err_unpin: 1930err:
1985 i915_gem_object_ggtt_unpin(obj); 1931 i915_vma_unpin(vma);
1986 return ret; 1932 return PTR_ERR(addr);
1987} 1933}
1988 1934
1989void intel_ring_unpin(struct intel_ring *ring) 1935void intel_ring_unpin(struct intel_ring *ring)
@@ -1991,60 +1937,54 @@ void intel_ring_unpin(struct intel_ring *ring)
1991 GEM_BUG_ON(!ring->vma); 1937 GEM_BUG_ON(!ring->vma);
1992 GEM_BUG_ON(!ring->vaddr); 1938 GEM_BUG_ON(!ring->vaddr);
1993 1939
1994 if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen) 1940 if (i915_vma_is_map_and_fenceable(ring->vma))
1995 i915_gem_object_unpin_map(ring->obj);
1996 else
1997 i915_vma_unpin_iomap(ring->vma); 1941 i915_vma_unpin_iomap(ring->vma);
1942 else
1943 i915_gem_object_unpin_map(ring->vma->obj);
1998 ring->vaddr = NULL; 1944 ring->vaddr = NULL;
1999 1945
2000 i915_gem_object_ggtt_unpin(ring->obj); 1946 i915_vma_unpin(ring->vma);
2001 ring->vma = NULL;
2002} 1947}
2003 1948
2004static void intel_destroy_ringbuffer_obj(struct intel_ring *ring) 1949static struct i915_vma *
2005{ 1950intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
2006 i915_gem_object_put(ring->obj);
2007 ring->obj = NULL;
2008}
2009
2010static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2011 struct intel_ring *ring)
2012{ 1951{
2013 struct drm_i915_gem_object *obj; 1952 struct drm_i915_gem_object *obj;
1953 struct i915_vma *vma;
2014 1954
2015 obj = NULL; 1955 obj = i915_gem_object_create_stolen(&dev_priv->drm, size);
2016 if (!HAS_LLC(dev)) 1956 if (!obj)
2017 obj = i915_gem_object_create_stolen(dev, ring->size); 1957 obj = i915_gem_object_create(&dev_priv->drm, size);
2018 if (obj == NULL)
2019 obj = i915_gem_object_create(dev, ring->size);
2020 if (IS_ERR(obj)) 1958 if (IS_ERR(obj))
2021 return PTR_ERR(obj); 1959 return ERR_CAST(obj);
2022 1960
2023 /* mark ring buffers as read-only from GPU side by default */ 1961 /* mark ring buffers as read-only from GPU side by default */
2024 obj->gt_ro = 1; 1962 obj->gt_ro = 1;
2025 1963
2026 ring->obj = obj; 1964 vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
1965 if (IS_ERR(vma))
1966 goto err;
1967
1968 return vma;
2027 1969
2028 return 0; 1970err:
1971 i915_gem_object_put(obj);
1972 return vma;
2029} 1973}
2030 1974
2031struct intel_ring * 1975struct intel_ring *
2032intel_engine_create_ring(struct intel_engine_cs *engine, int size) 1976intel_engine_create_ring(struct intel_engine_cs *engine, int size)
2033{ 1977{
2034 struct intel_ring *ring; 1978 struct intel_ring *ring;
2035 int ret; 1979 struct i915_vma *vma;
2036 1980
2037 GEM_BUG_ON(!is_power_of_2(size)); 1981 GEM_BUG_ON(!is_power_of_2(size));
2038 1982
2039 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 1983 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2040 if (ring == NULL) { 1984 if (!ring)
2041 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
2042 engine->name);
2043 return ERR_PTR(-ENOMEM); 1985 return ERR_PTR(-ENOMEM);
2044 }
2045 1986
2046 ring->engine = engine; 1987 ring->engine = engine;
2047 list_add(&ring->link, &engine->buffers);
2048 1988
2049 INIT_LIST_HEAD(&ring->request_list); 1989 INIT_LIST_HEAD(&ring->request_list);
2050 1990
@@ -2060,22 +2000,21 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
2060 ring->last_retired_head = -1; 2000 ring->last_retired_head = -1;
2061 intel_ring_update_space(ring); 2001 intel_ring_update_space(ring);
2062 2002
2063 ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring); 2003 vma = intel_ring_create_vma(engine->i915, size);
2064 if (ret) { 2004 if (IS_ERR(vma)) {
2065 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
2066 engine->name, ret);
2067 list_del(&ring->link);
2068 kfree(ring); 2005 kfree(ring);
2069 return ERR_PTR(ret); 2006 return ERR_CAST(vma);
2070 } 2007 }
2008 ring->vma = vma;
2071 2009
2010 list_add(&ring->link, &engine->buffers);
2072 return ring; 2011 return ring;
2073} 2012}
2074 2013
2075void 2014void
2076intel_ring_free(struct intel_ring *ring) 2015intel_ring_free(struct intel_ring *ring)
2077{ 2016{
2078 intel_destroy_ringbuffer_obj(ring); 2017 i915_vma_put(ring->vma);
2079 list_del(&ring->link); 2018 list_del(&ring->link);
2080 kfree(ring); 2019 kfree(ring);
2081} 2020}
@@ -2092,8 +2031,12 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
2092 return 0; 2031 return 0;
2093 2032
2094 if (ce->state) { 2033 if (ce->state) {
2095 ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0, 2034 ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false);
2096 ctx->ggtt_alignment, 0); 2035 if (ret)
2036 goto error;
2037
2038 ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment,
2039 PIN_GLOBAL | PIN_HIGH);
2097 if (ret) 2040 if (ret)
2098 goto error; 2041 goto error;
2099 } 2042 }
@@ -2127,7 +2070,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx,
2127 return; 2070 return;
2128 2071
2129 if (ce->state) 2072 if (ce->state)
2130 i915_gem_object_ggtt_unpin(ce->state); 2073 i915_vma_unpin(ce->state);
2131 2074
2132 i915_gem_context_put(ctx); 2075 i915_gem_context_put(ctx);
2133} 2076}
@@ -2165,7 +2108,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
2165 ret = PTR_ERR(ring); 2108 ret = PTR_ERR(ring);
2166 goto error; 2109 goto error;
2167 } 2110 }
2168 engine->buffer = ring;
2169 2111
2170 if (I915_NEED_GFX_HWS(dev_priv)) { 2112 if (I915_NEED_GFX_HWS(dev_priv)) {
2171 ret = init_status_page(engine); 2113 ret = init_status_page(engine);
@@ -2180,11 +2122,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
2180 2122
2181 ret = intel_ring_pin(ring); 2123 ret = intel_ring_pin(ring);
2182 if (ret) { 2124 if (ret) {
2183 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", 2125 intel_ring_free(ring);
2184 engine->name, ret);
2185 intel_destroy_ringbuffer_obj(ring);
2186 goto error; 2126 goto error;
2187 } 2127 }
2128 engine->buffer = ring;
2188 2129
2189 return 0; 2130 return 0;
2190 2131
@@ -2203,7 +2144,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
2203 dev_priv = engine->i915; 2144 dev_priv = engine->i915;
2204 2145
2205 if (engine->buffer) { 2146 if (engine->buffer) {
2206 WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); 2147 WARN_ON(INTEL_GEN(dev_priv) > 2 &&
2148 (I915_READ_MODE(engine) & MODE_IDLE) == 0);
2207 2149
2208 intel_ring_unpin(engine->buffer); 2150 intel_ring_unpin(engine->buffer);
2209 intel_ring_free(engine->buffer); 2151 intel_ring_free(engine->buffer);
@@ -2371,50 +2313,6 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
2371 return 0; 2313 return 0;
2372} 2314}
2373 2315
2374void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
2375{
2376 struct drm_i915_private *dev_priv = engine->i915;
2377
2378 /* Our semaphore implementation is strictly monotonic (i.e. we proceed
2379 * so long as the semaphore value in the register/page is greater
2380 * than the sync value), so whenever we reset the seqno,
2381 * so long as we reset the tracking semaphore value to 0, it will
2382 * always be before the next request's seqno. If we don't reset
2383 * the semaphore value, then when the seqno moves backwards all
2384 * future waits will complete instantly (causing rendering corruption).
2385 */
2386 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
2387 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
2388 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
2389 if (HAS_VEBOX(dev_priv))
2390 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
2391 }
2392 if (dev_priv->semaphore_obj) {
2393 struct drm_i915_gem_object *obj = dev_priv->semaphore_obj;
2394 struct page *page = i915_gem_object_get_dirty_page(obj, 0);
2395 void *semaphores = kmap(page);
2396 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
2397 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
2398 kunmap(page);
2399 }
2400 memset(engine->semaphore.sync_seqno, 0,
2401 sizeof(engine->semaphore.sync_seqno));
2402
2403 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
2404 if (engine->irq_seqno_barrier)
2405 engine->irq_seqno_barrier(engine);
2406 engine->last_submitted_seqno = seqno;
2407
2408 engine->hangcheck.seqno = seqno;
2409
2410 /* After manually advancing the seqno, fake the interrupt in case
2411 * there are any waiters for that seqno.
2412 */
2413 rcu_read_lock();
2414 intel_engine_wakeup(engine);
2415 rcu_read_unlock();
2416}
2417
2418static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) 2316static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
2419{ 2317{
2420 struct drm_i915_private *dev_priv = request->i915; 2318 struct drm_i915_private *dev_priv = request->i915;
@@ -2624,35 +2522,36 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2624 if (!i915.semaphores) 2522 if (!i915.semaphores)
2625 return; 2523 return;
2626 2524
2627 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { 2525 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) {
2526 struct i915_vma *vma;
2527
2628 obj = i915_gem_object_create(&dev_priv->drm, 4096); 2528 obj = i915_gem_object_create(&dev_priv->drm, 4096);
2629 if (IS_ERR(obj)) { 2529 if (IS_ERR(obj))
2630 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); 2530 goto err;
2631 i915.semaphores = 0;
2632 } else {
2633 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2634 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
2635 if (ret != 0) {
2636 i915_gem_object_put(obj);
2637 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2638 i915.semaphores = 0;
2639 } else {
2640 dev_priv->semaphore_obj = obj;
2641 }
2642 }
2643 }
2644 2531
2645 if (!i915.semaphores) 2532 vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
2646 return; 2533 if (IS_ERR(vma))
2534 goto err_obj;
2535
2536 ret = i915_gem_object_set_to_gtt_domain(obj, false);
2537 if (ret)
2538 goto err_obj;
2539
2540 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
2541 if (ret)
2542 goto err_obj;
2543
2544 dev_priv->semaphore = vma;
2545 }
2647 2546
2648 if (INTEL_GEN(dev_priv) >= 8) { 2547 if (INTEL_GEN(dev_priv) >= 8) {
2649 u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); 2548 u32 offset = i915_ggtt_offset(dev_priv->semaphore);
2650 2549
2651 engine->semaphore.sync_to = gen8_ring_sync_to; 2550 engine->semaphore.sync_to = gen8_ring_sync_to;
2652 engine->semaphore.signal = gen8_xcs_signal; 2551 engine->semaphore.signal = gen8_xcs_signal;
2653 2552
2654 for (i = 0; i < I915_NUM_ENGINES; i++) { 2553 for (i = 0; i < I915_NUM_ENGINES; i++) {
2655 u64 ring_offset; 2554 u32 ring_offset;
2656 2555
2657 if (i != engine->id) 2556 if (i != engine->id)
2658 ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); 2557 ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
@@ -2672,47 +2571,55 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2672 * initialized as INVALID. Gen8 will initialize the 2571 * initialized as INVALID. Gen8 will initialize the
2673 * sema between VCS2 and RCS later. 2572 * sema between VCS2 and RCS later.
2674 */ 2573 */
2675 for (i = 0; i < I915_NUM_ENGINES; i++) { 2574 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
2676 static const struct { 2575 static const struct {
2677 u32 wait_mbox; 2576 u32 wait_mbox;
2678 i915_reg_t mbox_reg; 2577 i915_reg_t mbox_reg;
2679 } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = { 2578 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
2680 [RCS] = { 2579 [RCS_HW] = {
2681 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, 2580 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
2682 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, 2581 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
2683 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, 2582 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
2684 }, 2583 },
2685 [VCS] = { 2584 [VCS_HW] = {
2686 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, 2585 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
2687 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, 2586 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
2688 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, 2587 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
2689 }, 2588 },
2690 [BCS] = { 2589 [BCS_HW] = {
2691 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, 2590 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
2692 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, 2591 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
2693 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, 2592 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
2694 }, 2593 },
2695 [VECS] = { 2594 [VECS_HW] = {
2696 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, 2595 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
2697 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, 2596 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
2698 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, 2597 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
2699 }, 2598 },
2700 }; 2599 };
2701 u32 wait_mbox; 2600 u32 wait_mbox;
2702 i915_reg_t mbox_reg; 2601 i915_reg_t mbox_reg;
2703 2602
2704 if (i == engine->id || i == VCS2) { 2603 if (i == engine->hw_id) {
2705 wait_mbox = MI_SEMAPHORE_SYNC_INVALID; 2604 wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
2706 mbox_reg = GEN6_NOSYNC; 2605 mbox_reg = GEN6_NOSYNC;
2707 } else { 2606 } else {
2708 wait_mbox = sem_data[engine->id][i].wait_mbox; 2607 wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
2709 mbox_reg = sem_data[engine->id][i].mbox_reg; 2608 mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
2710 } 2609 }
2711 2610
2712 engine->semaphore.mbox.wait[i] = wait_mbox; 2611 engine->semaphore.mbox.wait[i] = wait_mbox;
2713 engine->semaphore.mbox.signal[i] = mbox_reg; 2612 engine->semaphore.mbox.signal[i] = mbox_reg;
2714 } 2613 }
2715 } 2614 }
2615
2616 return;
2617
2618err_obj:
2619 i915_gem_object_put(obj);
2620err:
2621 DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
2622 i915.semaphores = 0;
2716} 2623}
2717 2624
2718static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 2625static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
@@ -2808,11 +2715,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
2808 return ret; 2715 return ret;
2809 2716
2810 if (INTEL_GEN(dev_priv) >= 6) { 2717 if (INTEL_GEN(dev_priv) >= 6) {
2811 ret = intel_init_pipe_control(engine, 4096); 2718 ret = intel_engine_create_scratch(engine, 4096);
2812 if (ret) 2719 if (ret)
2813 return ret; 2720 return ret;
2814 } else if (HAS_BROKEN_CS_TLB(dev_priv)) { 2721 } else if (HAS_BROKEN_CS_TLB(dev_priv)) {
2815 ret = intel_init_pipe_control(engine, I830_WA_SIZE); 2722 ret = intel_engine_create_scratch(engine, I830_WA_SIZE);
2816 if (ret) 2723 if (ret)
2817 return ret; 2724 return ret;
2818 } 2725 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 43e545e44352..84aea549de5d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -26,10 +26,10 @@
26 */ 26 */
27#define I915_RING_FREE_SPACE 64 27#define I915_RING_FREE_SPACE 64
28 28
29struct intel_hw_status_page { 29struct intel_hw_status_page {
30 u32 *page_addr; 30 struct i915_vma *vma;
31 unsigned int gfx_addr; 31 u32 *page_addr;
32 struct drm_i915_gem_object *obj; 32 u32 ggtt_offset;
33}; 33};
34 34
35#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) 35#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -57,10 +57,10 @@ struct intel_hw_status_page {
57#define GEN8_SEMAPHORE_OFFSET(__from, __to) \ 57#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
58 (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) 58 (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
59#define GEN8_SIGNAL_OFFSET(__ring, to) \ 59#define GEN8_SIGNAL_OFFSET(__ring, to) \
60 (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ 60 (dev_priv->semaphore->node.start + \
61 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) 61 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
62#define GEN8_WAIT_OFFSET(__ring, from) \ 62#define GEN8_WAIT_OFFSET(__ring, from) \
63 (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ 63 (dev_priv->semaphore->node.start + \
64 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) 64 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
65 65
66enum intel_engine_hangcheck_action { 66enum intel_engine_hangcheck_action {
@@ -75,7 +75,6 @@ enum intel_engine_hangcheck_action {
75 75
76struct intel_engine_hangcheck { 76struct intel_engine_hangcheck {
77 u64 acthd; 77 u64 acthd;
78 unsigned long user_interrupts;
79 u32 seqno; 78 u32 seqno;
80 int score; 79 int score;
81 enum intel_engine_hangcheck_action action; 80 enum intel_engine_hangcheck_action action;
@@ -84,9 +83,8 @@ struct intel_engine_hangcheck {
84}; 83};
85 84
86struct intel_ring { 85struct intel_ring {
87 struct drm_i915_gem_object *obj;
88 void *vaddr;
89 struct i915_vma *vma; 86 struct i915_vma *vma;
87 void *vaddr;
90 88
91 struct intel_engine_cs *engine; 89 struct intel_engine_cs *engine;
92 struct list_head link; 90 struct list_head link;
@@ -124,12 +122,12 @@ struct drm_i915_reg_table;
124 * an option for future use. 122 * an option for future use.
125 * size: size of the batch in DWORDS 123 * size: size of the batch in DWORDS
126 */ 124 */
127struct i915_ctx_workarounds { 125struct i915_ctx_workarounds {
128 struct i915_wa_ctx_bb { 126 struct i915_wa_ctx_bb {
129 u32 offset; 127 u32 offset;
130 u32 size; 128 u32 size;
131 } indirect_ctx, per_ctx; 129 } indirect_ctx, per_ctx;
132 struct drm_i915_gem_object *obj; 130 struct i915_vma *vma;
133}; 131};
134 132
135struct drm_i915_gem_request; 133struct drm_i915_gem_request;
@@ -147,8 +145,14 @@ struct intel_engine_cs {
147#define I915_NUM_ENGINES 5 145#define I915_NUM_ENGINES 5
148#define _VCS(n) (VCS + (n)) 146#define _VCS(n) (VCS + (n))
149 unsigned int exec_id; 147 unsigned int exec_id;
150 unsigned int hw_id; 148 enum intel_engine_hw_id {
151 unsigned int guc_id; /* XXX same as hw_id? */ 149 RCS_HW = 0,
150 VCS_HW,
151 BCS_HW,
152 VECS_HW,
153 VCS2_HW
154 } hw_id;
155 enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */
152 u64 fence_context; 156 u64 fence_context;
153 u32 mmio_base; 157 u32 mmio_base;
154 unsigned int irq_shift; 158 unsigned int irq_shift;
@@ -172,8 +176,7 @@ struct intel_engine_cs {
172 * the overhead of waking that client is much preferred. 176 * the overhead of waking that client is much preferred.
173 */ 177 */
174 struct intel_breadcrumbs { 178 struct intel_breadcrumbs {
175 struct task_struct *irq_seqno_bh; /* bh for user interrupts */ 179 struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
176 unsigned long irq_wakeups;
177 bool irq_posted; 180 bool irq_posted;
178 181
179 spinlock_t lock; /* protects the lists of requests */ 182 spinlock_t lock; /* protects the lists of requests */
@@ -183,6 +186,9 @@ struct intel_engine_cs {
183 struct task_struct *signaler; /* used for fence signalling */ 186 struct task_struct *signaler; /* used for fence signalling */
184 struct drm_i915_gem_request *first_signal; 187 struct drm_i915_gem_request *first_signal;
185 struct timer_list fake_irq; /* used after a missed interrupt */ 188 struct timer_list fake_irq; /* used after a missed interrupt */
189 struct timer_list hangcheck; /* detect missed interrupts */
190
191 unsigned long timeout;
186 192
187 bool irq_enabled : 1; 193 bool irq_enabled : 1;
188 bool rpm_wakelock : 1; 194 bool rpm_wakelock : 1;
@@ -197,6 +203,7 @@ struct intel_engine_cs {
197 203
198 struct intel_hw_status_page status_page; 204 struct intel_hw_status_page status_page;
199 struct i915_ctx_workarounds wa_ctx; 205 struct i915_ctx_workarounds wa_ctx;
206 struct i915_vma *scratch;
200 207
201 u32 irq_keep_mask; /* always keep these interrupts */ 208 u32 irq_keep_mask; /* always keep these interrupts */
202 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 209 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
@@ -270,11 +277,14 @@ struct intel_engine_cs {
270 u32 sync_seqno[I915_NUM_ENGINES-1]; 277 u32 sync_seqno[I915_NUM_ENGINES-1];
271 278
272 union { 279 union {
280#define GEN6_SEMAPHORE_LAST VECS_HW
281#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
282#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
273 struct { 283 struct {
274 /* our mbox written by others */ 284 /* our mbox written by others */
275 u32 wait[I915_NUM_ENGINES]; 285 u32 wait[GEN6_NUM_SEMAPHORES];
276 /* mboxes this ring signals to */ 286 /* mboxes this ring signals to */
277 i915_reg_t signal[I915_NUM_ENGINES]; 287 i915_reg_t signal[GEN6_NUM_SEMAPHORES];
278 } mbox; 288 } mbox;
279 u64 signal_ggtt[I915_NUM_ENGINES]; 289 u64 signal_ggtt[I915_NUM_ENGINES];
280 }; 290 };
@@ -310,7 +320,7 @@ struct intel_engine_cs {
310 320
311 /* An RCU guarded pointer to the last request. No reference is 321 /* An RCU guarded pointer to the last request. No reference is
312 * held to the request, users must carefully acquire a reference to 322 * held to the request, users must carefully acquire a reference to
313 * the request using i915_gem_active_get_request_rcu(), or hold the 323 * the request using i915_gem_active_get_rcu(), or hold the
314 * struct_mutex. 324 * struct_mutex.
315 */ 325 */
316 struct i915_gem_active last_request; 326 struct i915_gem_active last_request;
@@ -319,11 +329,6 @@ struct intel_engine_cs {
319 329
320 struct intel_engine_hangcheck hangcheck; 330 struct intel_engine_hangcheck hangcheck;
321 331
322 struct {
323 struct drm_i915_gem_object *obj;
324 u32 gtt_offset;
325 } scratch;
326
327 bool needs_cmd_parser; 332 bool needs_cmd_parser;
328 333
329 /* 334 /*
@@ -475,11 +480,9 @@ void intel_ring_update_space(struct intel_ring *ring);
475 480
476void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); 481void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
477 482
478int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
479void intel_fini_pipe_control(struct intel_engine_cs *engine);
480
481void intel_engine_setup_common(struct intel_engine_cs *engine); 483void intel_engine_setup_common(struct intel_engine_cs *engine);
482int intel_engine_init_common(struct intel_engine_cs *engine); 484int intel_engine_init_common(struct intel_engine_cs *engine);
485int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
483void intel_engine_cleanup_common(struct intel_engine_cs *engine); 486void intel_engine_cleanup_common(struct intel_engine_cs *engine);
484 487
485static inline int intel_engine_idle(struct intel_engine_cs *engine, 488static inline int intel_engine_idle(struct intel_engine_cs *engine,
@@ -515,7 +518,7 @@ int init_workarounds_ring(struct intel_engine_cs *engine);
515 518
516static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) 519static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
517{ 520{
518 return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR; 521 return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
519} 522}
520 523
521/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ 524/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
@@ -538,29 +541,35 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
538 struct intel_wait *wait); 541 struct intel_wait *wait);
539void intel_engine_enable_signaling(struct drm_i915_gem_request *request); 542void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
540 543
541static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine) 544static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
542{ 545{
543 return READ_ONCE(engine->breadcrumbs.irq_seqno_bh); 546 return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
544} 547}
545 548
546static inline bool intel_engine_wakeup(struct intel_engine_cs *engine) 549static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
547{ 550{
548 bool wakeup = false; 551 bool wakeup = false;
549 struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); 552
550 /* Note that for this not to dangerously chase a dangling pointer, 553 /* Note that for this not to dangerously chase a dangling pointer,
551 * the caller is responsible for ensure that the task remain valid for 554 * we must hold the rcu_read_lock here.
552 * wake_up_process() i.e. that the RCU grace period cannot expire.
553 * 555 *
554 * Also note that tsk is likely to be in !TASK_RUNNING state so an 556 * Also note that tsk is likely to be in !TASK_RUNNING state so an
555 * early test for tsk->state != TASK_RUNNING before wake_up_process() 557 * early test for tsk->state != TASK_RUNNING before wake_up_process()
556 * is unlikely to be beneficial. 558 * is unlikely to be beneficial.
557 */ 559 */
558 if (tsk) 560 if (intel_engine_has_waiter(engine)) {
559 wakeup = wake_up_process(tsk); 561 struct task_struct *tsk;
562
563 rcu_read_lock();
564 tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
565 if (tsk)
566 wakeup = wake_up_process(tsk);
567 rcu_read_unlock();
568 }
569
560 return wakeup; 570 return wakeup;
561} 571}
562 572
563void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
564void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); 573void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
565unsigned int intel_kick_waiters(struct drm_i915_private *i915); 574unsigned int intel_kick_waiters(struct drm_i915_private *i915);
566unsigned int intel_kick_signalers(struct drm_i915_private *i915); 575unsigned int intel_kick_signalers(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 1c603bbe5784..a1d73c2de332 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -592,6 +592,8 @@ void bxt_disable_dc9(struct drm_i915_private *dev_priv)
592 DRM_DEBUG_KMS("Disabling DC9\n"); 592 DRM_DEBUG_KMS("Disabling DC9\n");
593 593
594 gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); 594 gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
595
596 intel_pps_unlock_regs_wa(dev_priv);
595} 597}
596 598
597static void assert_csr_loaded(struct drm_i915_private *dev_priv) 599static void assert_csr_loaded(struct drm_i915_private *dev_priv)
@@ -854,7 +856,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
854 struct i915_power_well *power_well) 856 struct i915_power_well *power_well)
855{ 857{
856 enum skl_disp_power_wells power_well_id = power_well->data; 858 enum skl_disp_power_wells power_well_id = power_well->data;
857 struct i915_power_well *cmn_a_well; 859 struct i915_power_well *cmn_a_well = NULL;
858 860
859 if (power_well_id == BXT_DPIO_CMN_BC) { 861 if (power_well_id == BXT_DPIO_CMN_BC) {
860 /* 862 /*
@@ -867,7 +869,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
867 869
868 bxt_ddi_phy_init(dev_priv, bxt_power_well_to_phy(power_well)); 870 bxt_ddi_phy_init(dev_priv, bxt_power_well_to_phy(power_well));
869 871
870 if (power_well_id == BXT_DPIO_CMN_BC) 872 if (cmn_a_well)
871 intel_power_well_put(dev_priv, cmn_a_well); 873 intel_power_well_put(dev_priv, cmn_a_well);
872} 874}
873 875
@@ -1121,6 +1123,8 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
1121 } 1123 }
1122 1124
1123 i915_redisable_vga_power_on(&dev_priv->drm); 1125 i915_redisable_vga_power_on(&dev_priv->drm);
1126
1127 intel_pps_unlock_regs_wa(dev_priv);
1124} 1128}
1125 1129
1126static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) 1130static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index cbdca7e4d307..366900dcde34 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -203,21 +203,19 @@ skl_update_plane(struct drm_plane *drm_plane,
203 struct drm_i915_private *dev_priv = to_i915(dev); 203 struct drm_i915_private *dev_priv = to_i915(dev);
204 struct intel_plane *intel_plane = to_intel_plane(drm_plane); 204 struct intel_plane *intel_plane = to_intel_plane(drm_plane);
205 struct drm_framebuffer *fb = plane_state->base.fb; 205 struct drm_framebuffer *fb = plane_state->base.fb;
206 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
207 const int pipe = intel_plane->pipe; 206 const int pipe = intel_plane->pipe;
208 const int plane = intel_plane->plane + 1; 207 const int plane = intel_plane->plane + 1;
209 u32 plane_ctl, stride_div, stride; 208 u32 plane_ctl;
210 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 209 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
211 u32 surf_addr; 210 u32 surf_addr = plane_state->main.offset;
212 u32 tile_height, plane_offset, plane_size;
213 unsigned int rotation = plane_state->base.rotation; 211 unsigned int rotation = plane_state->base.rotation;
214 int x_offset, y_offset; 212 u32 stride = skl_plane_stride(fb, 0, rotation);
215 int crtc_x = plane_state->base.dst.x1; 213 int crtc_x = plane_state->base.dst.x1;
216 int crtc_y = plane_state->base.dst.y1; 214 int crtc_y = plane_state->base.dst.y1;
217 uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); 215 uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
218 uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); 216 uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
219 uint32_t x = plane_state->base.src.x1 >> 16; 217 uint32_t x = plane_state->main.x;
220 uint32_t y = plane_state->base.src.y1 >> 16; 218 uint32_t y = plane_state->main.y;
221 uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; 219 uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
222 uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; 220 uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
223 221
@@ -230,15 +228,6 @@ skl_update_plane(struct drm_plane *drm_plane,
230 228
231 plane_ctl |= skl_plane_ctl_rotation(rotation); 229 plane_ctl |= skl_plane_ctl_rotation(rotation);
232 230
233 stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
234 fb->pixel_format);
235
236 /* Sizes are 0 based */
237 src_w--;
238 src_h--;
239 crtc_w--;
240 crtc_h--;
241
242 if (key->flags) { 231 if (key->flags) {
243 I915_WRITE(PLANE_KEYVAL(pipe, plane), key->min_value); 232 I915_WRITE(PLANE_KEYVAL(pipe, plane), key->min_value);
244 I915_WRITE(PLANE_KEYMAX(pipe, plane), key->max_value); 233 I915_WRITE(PLANE_KEYMAX(pipe, plane), key->max_value);
@@ -250,28 +239,15 @@ skl_update_plane(struct drm_plane *drm_plane,
250 else if (key->flags & I915_SET_COLORKEY_SOURCE) 239 else if (key->flags & I915_SET_COLORKEY_SOURCE)
251 plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; 240 plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE;
252 241
253 surf_addr = intel_plane_obj_offset(intel_plane, obj, 0); 242 /* Sizes are 0 based */
254 243 src_w--;
255 if (intel_rotation_90_or_270(rotation)) { 244 src_h--;
256 int cpp = drm_format_plane_cpp(fb->pixel_format, 0); 245 crtc_w--;
257 246 crtc_h--;
258 /* stride: Surface height in tiles */
259 tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
260 stride = DIV_ROUND_UP(fb->height, tile_height);
261 plane_size = (src_w << 16) | src_h;
262 x_offset = stride * tile_height - y - (src_h + 1);
263 y_offset = x;
264 } else {
265 stride = fb->pitches[0] / stride_div;
266 plane_size = (src_h << 16) | src_w;
267 x_offset = x;
268 y_offset = y;
269 }
270 plane_offset = y_offset << 16 | x_offset;
271 247
272 I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset); 248 I915_WRITE(PLANE_OFFSET(pipe, plane), (y << 16) | x);
273 I915_WRITE(PLANE_STRIDE(pipe, plane), stride); 249 I915_WRITE(PLANE_STRIDE(pipe, plane), stride);
274 I915_WRITE(PLANE_SIZE(pipe, plane), plane_size); 250 I915_WRITE(PLANE_SIZE(pipe, plane), (src_h << 16) | src_w);
275 251
276 /* program plane scaler */ 252 /* program plane scaler */
277 if (plane_state->scaler_id >= 0) { 253 if (plane_state->scaler_id >= 0) {
@@ -296,7 +272,8 @@ skl_update_plane(struct drm_plane *drm_plane,
296 } 272 }
297 273
298 I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl); 274 I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl);
299 I915_WRITE(PLANE_SURF(pipe, plane), surf_addr); 275 I915_WRITE(PLANE_SURF(pipe, plane),
276 intel_fb_gtt_offset(fb, rotation) + surf_addr);
300 POSTING_READ(PLANE_SURF(pipe, plane)); 277 POSTING_READ(PLANE_SURF(pipe, plane));
301} 278}
302 279
@@ -363,13 +340,11 @@ vlv_update_plane(struct drm_plane *dplane,
363 struct drm_i915_private *dev_priv = to_i915(dev); 340 struct drm_i915_private *dev_priv = to_i915(dev);
364 struct intel_plane *intel_plane = to_intel_plane(dplane); 341 struct intel_plane *intel_plane = to_intel_plane(dplane);
365 struct drm_framebuffer *fb = plane_state->base.fb; 342 struct drm_framebuffer *fb = plane_state->base.fb;
366 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
367 int pipe = intel_plane->pipe; 343 int pipe = intel_plane->pipe;
368 int plane = intel_plane->plane; 344 int plane = intel_plane->plane;
369 u32 sprctl; 345 u32 sprctl;
370 u32 sprsurf_offset, linear_offset; 346 u32 sprsurf_offset, linear_offset;
371 unsigned int rotation = dplane->state->rotation; 347 unsigned int rotation = dplane->state->rotation;
372 int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
373 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 348 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
374 int crtc_x = plane_state->base.dst.x1; 349 int crtc_x = plane_state->base.dst.x1;
375 int crtc_y = plane_state->base.dst.y1; 350 int crtc_y = plane_state->base.dst.y1;
@@ -431,7 +406,7 @@ vlv_update_plane(struct drm_plane *dplane,
431 */ 406 */
432 sprctl |= SP_GAMMA_ENABLE; 407 sprctl |= SP_GAMMA_ENABLE;
433 408
434 if (i915_gem_object_is_tiled(obj)) 409 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
435 sprctl |= SP_TILED; 410 sprctl |= SP_TILED;
436 411
437 /* Sizes are 0 based */ 412 /* Sizes are 0 based */
@@ -440,19 +415,18 @@ vlv_update_plane(struct drm_plane *dplane,
440 crtc_w--; 415 crtc_w--;
441 crtc_h--; 416 crtc_h--;
442 417
443 linear_offset = y * fb->pitches[0] + x * cpp; 418 intel_add_fb_offsets(&x, &y, plane_state, 0);
444 sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, 419 sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
445 fb->pitches[0], rotation);
446 linear_offset -= sprsurf_offset;
447 420
448 if (rotation == DRM_ROTATE_180) { 421 if (rotation == DRM_ROTATE_180) {
449 sprctl |= SP_ROTATE_180; 422 sprctl |= SP_ROTATE_180;
450 423
451 x += src_w; 424 x += src_w;
452 y += src_h; 425 y += src_h;
453 linear_offset += src_h * fb->pitches[0] + src_w * cpp;
454 } 426 }
455 427
428 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
429
456 if (key->flags) { 430 if (key->flags) {
457 I915_WRITE(SPKEYMINVAL(pipe, plane), key->min_value); 431 I915_WRITE(SPKEYMINVAL(pipe, plane), key->min_value);
458 I915_WRITE(SPKEYMAXVAL(pipe, plane), key->max_value); 432 I915_WRITE(SPKEYMAXVAL(pipe, plane), key->max_value);
@@ -468,7 +442,7 @@ vlv_update_plane(struct drm_plane *dplane,
468 I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]); 442 I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]);
469 I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x); 443 I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x);
470 444
471 if (i915_gem_object_is_tiled(obj)) 445 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
472 I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x); 446 I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x);
473 else 447 else
474 I915_WRITE(SPLINOFF(pipe, plane), linear_offset); 448 I915_WRITE(SPLINOFF(pipe, plane), linear_offset);
@@ -477,8 +451,8 @@ vlv_update_plane(struct drm_plane *dplane,
477 451
478 I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w); 452 I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
479 I915_WRITE(SPCNTR(pipe, plane), sprctl); 453 I915_WRITE(SPCNTR(pipe, plane), sprctl);
480 I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) + 454 I915_WRITE(SPSURF(pipe, plane),
481 sprsurf_offset); 455 intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
482 POSTING_READ(SPSURF(pipe, plane)); 456 POSTING_READ(SPSURF(pipe, plane));
483} 457}
484 458
@@ -506,12 +480,10 @@ ivb_update_plane(struct drm_plane *plane,
506 struct drm_i915_private *dev_priv = to_i915(dev); 480 struct drm_i915_private *dev_priv = to_i915(dev);
507 struct intel_plane *intel_plane = to_intel_plane(plane); 481 struct intel_plane *intel_plane = to_intel_plane(plane);
508 struct drm_framebuffer *fb = plane_state->base.fb; 482 struct drm_framebuffer *fb = plane_state->base.fb;
509 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
510 enum pipe pipe = intel_plane->pipe; 483 enum pipe pipe = intel_plane->pipe;
511 u32 sprctl, sprscale = 0; 484 u32 sprctl, sprscale = 0;
512 u32 sprsurf_offset, linear_offset; 485 u32 sprsurf_offset, linear_offset;
513 unsigned int rotation = plane_state->base.rotation; 486 unsigned int rotation = plane_state->base.rotation;
514 int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
515 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 487 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
516 int crtc_x = plane_state->base.dst.x1; 488 int crtc_x = plane_state->base.dst.x1;
517 int crtc_y = plane_state->base.dst.y1; 489 int crtc_y = plane_state->base.dst.y1;
@@ -553,7 +525,7 @@ ivb_update_plane(struct drm_plane *plane,
553 */ 525 */
554 sprctl |= SPRITE_GAMMA_ENABLE; 526 sprctl |= SPRITE_GAMMA_ENABLE;
555 527
556 if (i915_gem_object_is_tiled(obj)) 528 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
557 sprctl |= SPRITE_TILED; 529 sprctl |= SPRITE_TILED;
558 530
559 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 531 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
@@ -573,10 +545,8 @@ ivb_update_plane(struct drm_plane *plane,
573 if (crtc_w != src_w || crtc_h != src_h) 545 if (crtc_w != src_w || crtc_h != src_h)
574 sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h; 546 sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h;
575 547
576 linear_offset = y * fb->pitches[0] + x * cpp; 548 intel_add_fb_offsets(&x, &y, plane_state, 0);
577 sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, 549 sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
578 fb->pitches[0], rotation);
579 linear_offset -= sprsurf_offset;
580 550
581 if (rotation == DRM_ROTATE_180) { 551 if (rotation == DRM_ROTATE_180) {
582 sprctl |= SPRITE_ROTATE_180; 552 sprctl |= SPRITE_ROTATE_180;
@@ -585,10 +555,11 @@ ivb_update_plane(struct drm_plane *plane,
585 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { 555 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
586 x += src_w; 556 x += src_w;
587 y += src_h; 557 y += src_h;
588 linear_offset += src_h * fb->pitches[0] + src_w * cpp;
589 } 558 }
590 } 559 }
591 560
561 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
562
592 if (key->flags) { 563 if (key->flags) {
593 I915_WRITE(SPRKEYVAL(pipe), key->min_value); 564 I915_WRITE(SPRKEYVAL(pipe), key->min_value);
594 I915_WRITE(SPRKEYMAX(pipe), key->max_value); 565 I915_WRITE(SPRKEYMAX(pipe), key->max_value);
@@ -607,7 +578,7 @@ ivb_update_plane(struct drm_plane *plane,
607 * register */ 578 * register */
608 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 579 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
609 I915_WRITE(SPROFFSET(pipe), (y << 16) | x); 580 I915_WRITE(SPROFFSET(pipe), (y << 16) | x);
610 else if (i915_gem_object_is_tiled(obj)) 581 else if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
611 I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); 582 I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x);
612 else 583 else
613 I915_WRITE(SPRLINOFF(pipe), linear_offset); 584 I915_WRITE(SPRLINOFF(pipe), linear_offset);
@@ -617,7 +588,7 @@ ivb_update_plane(struct drm_plane *plane,
617 I915_WRITE(SPRSCALE(pipe), sprscale); 588 I915_WRITE(SPRSCALE(pipe), sprscale);
618 I915_WRITE(SPRCTL(pipe), sprctl); 589 I915_WRITE(SPRCTL(pipe), sprctl);
619 I915_WRITE(SPRSURF(pipe), 590 I915_WRITE(SPRSURF(pipe),
620 i915_gem_obj_ggtt_offset(obj) + sprsurf_offset); 591 intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
621 POSTING_READ(SPRSURF(pipe)); 592 POSTING_READ(SPRSURF(pipe));
622} 593}
623 594
@@ -647,12 +618,10 @@ ilk_update_plane(struct drm_plane *plane,
647 struct drm_i915_private *dev_priv = to_i915(dev); 618 struct drm_i915_private *dev_priv = to_i915(dev);
648 struct intel_plane *intel_plane = to_intel_plane(plane); 619 struct intel_plane *intel_plane = to_intel_plane(plane);
649 struct drm_framebuffer *fb = plane_state->base.fb; 620 struct drm_framebuffer *fb = plane_state->base.fb;
650 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
651 int pipe = intel_plane->pipe; 621 int pipe = intel_plane->pipe;
652 u32 dvscntr, dvsscale; 622 u32 dvscntr, dvsscale;
653 u32 dvssurf_offset, linear_offset; 623 u32 dvssurf_offset, linear_offset;
654 unsigned int rotation = plane_state->base.rotation; 624 unsigned int rotation = plane_state->base.rotation;
655 int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
656 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 625 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
657 int crtc_x = plane_state->base.dst.x1; 626 int crtc_x = plane_state->base.dst.x1;
658 int crtc_y = plane_state->base.dst.y1; 627 int crtc_y = plane_state->base.dst.y1;
@@ -694,7 +663,7 @@ ilk_update_plane(struct drm_plane *plane,
694 */ 663 */
695 dvscntr |= DVS_GAMMA_ENABLE; 664 dvscntr |= DVS_GAMMA_ENABLE;
696 665
697 if (i915_gem_object_is_tiled(obj)) 666 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
698 dvscntr |= DVS_TILED; 667 dvscntr |= DVS_TILED;
699 668
700 if (IS_GEN6(dev)) 669 if (IS_GEN6(dev))
@@ -710,19 +679,18 @@ ilk_update_plane(struct drm_plane *plane,
710 if (crtc_w != src_w || crtc_h != src_h) 679 if (crtc_w != src_w || crtc_h != src_h)
711 dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h; 680 dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h;
712 681
713 linear_offset = y * fb->pitches[0] + x * cpp; 682 intel_add_fb_offsets(&x, &y, plane_state, 0);
714 dvssurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, 683 dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
715 fb->pitches[0], rotation);
716 linear_offset -= dvssurf_offset;
717 684
718 if (rotation == DRM_ROTATE_180) { 685 if (rotation == DRM_ROTATE_180) {
719 dvscntr |= DVS_ROTATE_180; 686 dvscntr |= DVS_ROTATE_180;
720 687
721 x += src_w; 688 x += src_w;
722 y += src_h; 689 y += src_h;
723 linear_offset += src_h * fb->pitches[0] + src_w * cpp;
724 } 690 }
725 691
692 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
693
726 if (key->flags) { 694 if (key->flags) {
727 I915_WRITE(DVSKEYVAL(pipe), key->min_value); 695 I915_WRITE(DVSKEYVAL(pipe), key->min_value);
728 I915_WRITE(DVSKEYMAX(pipe), key->max_value); 696 I915_WRITE(DVSKEYMAX(pipe), key->max_value);
@@ -737,7 +705,7 @@ ilk_update_plane(struct drm_plane *plane,
737 I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); 705 I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]);
738 I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); 706 I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x);
739 707
740 if (i915_gem_object_is_tiled(obj)) 708 if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
741 I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); 709 I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x);
742 else 710 else
743 I915_WRITE(DVSLINOFF(pipe), linear_offset); 711 I915_WRITE(DVSLINOFF(pipe), linear_offset);
@@ -746,7 +714,7 @@ ilk_update_plane(struct drm_plane *plane,
746 I915_WRITE(DVSSCALE(pipe), dvsscale); 714 I915_WRITE(DVSSCALE(pipe), dvsscale);
747 I915_WRITE(DVSCNTR(pipe), dvscntr); 715 I915_WRITE(DVSCNTR(pipe), dvscntr);
748 I915_WRITE(DVSSURF(pipe), 716 I915_WRITE(DVSSURF(pipe),
749 i915_gem_obj_ggtt_offset(obj) + dvssurf_offset); 717 intel_fb_gtt_offset(fb, rotation) + dvssurf_offset);
750 POSTING_READ(DVSSURF(pipe)); 718 POSTING_READ(DVSSURF(pipe));
751} 719}
752 720
@@ -785,6 +753,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
785 int hscale, vscale; 753 int hscale, vscale;
786 int max_scale, min_scale; 754 int max_scale, min_scale;
787 bool can_scale; 755 bool can_scale;
756 int ret;
788 757
789 src->x1 = state->base.src_x; 758 src->x1 = state->base.src_x;
790 src->y1 = state->base.src_y; 759 src->y1 = state->base.src_y;
@@ -949,6 +918,12 @@ intel_check_sprite_plane(struct drm_plane *plane,
949 dst->y1 = crtc_y; 918 dst->y1 = crtc_y;
950 dst->y2 = crtc_y + crtc_h; 919 dst->y2 = crtc_y + crtc_h;
951 920
921 if (INTEL_GEN(dev) >= 9) {
922 ret = skl_check_plane_surface(state);
923 if (ret)
924 return ret;
925 }
926
952 return 0; 927 return 0;
953} 928}
954 929
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index b1755f8db36b..4e1b274e1164 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -93,6 +93,6 @@ extern bool i915_gpu_turbo_disable(void);
93#define I845_TSEG_SIZE_1M (3 << 1) 93#define I845_TSEG_SIZE_1M (3 << 1)
94 94
95#define INTEL_BSM 0x5c 95#define INTEL_BSM 0x5c
96#define INTEL_BSM_MASK (0xFFFF << 20) 96#define INTEL_BSM_MASK (-(1u << 20))
97 97
98#endif /* _I915_DRM_H_ */ 98#endif /* _I915_DRM_H_ */
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 645ad06b5d52..58df02bd93c9 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -31,16 +31,16 @@
31 * See Documentation/io-mapping.txt 31 * See Documentation/io-mapping.txt
32 */ 32 */
33 33
34#ifdef CONFIG_HAVE_ATOMIC_IOMAP
35
36#include <asm/iomap.h>
37
38struct io_mapping { 34struct io_mapping {
39 resource_size_t base; 35 resource_size_t base;
40 unsigned long size; 36 unsigned long size;
41 pgprot_t prot; 37 pgprot_t prot;
38 void __iomem *iomem;
42}; 39};
43 40
41#ifdef CONFIG_HAVE_ATOMIC_IOMAP
42
43#include <asm/iomap.h>
44/* 44/*
45 * For small address space machines, mapping large objects 45 * For small address space machines, mapping large objects
46 * into the kernel virtual space isn't practical. Where 46 * into the kernel virtual space isn't practical. Where
@@ -49,34 +49,25 @@ struct io_mapping {
49 */ 49 */
50 50
51static inline struct io_mapping * 51static inline struct io_mapping *
52io_mapping_create_wc(resource_size_t base, unsigned long size) 52io_mapping_init_wc(struct io_mapping *iomap,
53 resource_size_t base,
54 unsigned long size)
53{ 55{
54 struct io_mapping *iomap;
55 pgprot_t prot; 56 pgprot_t prot;
56 57
57 iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
58 if (!iomap)
59 goto out_err;
60
61 if (iomap_create_wc(base, size, &prot)) 58 if (iomap_create_wc(base, size, &prot))
62 goto out_free; 59 return NULL;
63 60
64 iomap->base = base; 61 iomap->base = base;
65 iomap->size = size; 62 iomap->size = size;
66 iomap->prot = prot; 63 iomap->prot = prot;
67 return iomap; 64 return iomap;
68
69out_free:
70 kfree(iomap);
71out_err:
72 return NULL;
73} 65}
74 66
75static inline void 67static inline void
76io_mapping_free(struct io_mapping *mapping) 68io_mapping_fini(struct io_mapping *mapping)
77{ 69{
78 iomap_free(mapping->base, mapping->size); 70 iomap_free(mapping->base, mapping->size);
79 kfree(mapping);
80} 71}
81 72
82/* Atomic map/unmap */ 73/* Atomic map/unmap */
@@ -121,21 +112,46 @@ io_mapping_unmap(void __iomem *vaddr)
121#else 112#else
122 113
123#include <linux/uaccess.h> 114#include <linux/uaccess.h>
124 115#include <asm/pgtable.h>
125/* this struct isn't actually defined anywhere */
126struct io_mapping;
127 116
128/* Create the io_mapping object*/ 117/* Create the io_mapping object*/
129static inline struct io_mapping * 118static inline struct io_mapping *
130io_mapping_create_wc(resource_size_t base, unsigned long size) 119io_mapping_init_wc(struct io_mapping *iomap,
120 resource_size_t base,
121 unsigned long size)
131{ 122{
132 return (struct io_mapping __force *) ioremap_wc(base, size); 123 iomap->base = base;
124 iomap->size = size;
125 iomap->iomem = ioremap_wc(base, size);
126#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */
127 iomap->prot = pgprot_noncached_wc(PAGE_KERNEL);
128#elif defined(pgprot_writecombine)
129 iomap->prot = pgprot_writecombine(PAGE_KERNEL);
130#else
131 iomap->prot = pgprot_noncached(PAGE_KERNEL);
132#endif
133
134 return iomap;
133} 135}
134 136
135static inline void 137static inline void
136io_mapping_free(struct io_mapping *mapping) 138io_mapping_fini(struct io_mapping *mapping)
139{
140 iounmap(mapping->iomem);
141}
142
143/* Non-atomic map/unmap */
144static inline void __iomem *
145io_mapping_map_wc(struct io_mapping *mapping,
146 unsigned long offset,
147 unsigned long size)
148{
149 return mapping->iomem + offset;
150}
151
152static inline void
153io_mapping_unmap(void __iomem *vaddr)
137{ 154{
138 iounmap((void __force __iomem *) mapping);
139} 155}
140 156
141/* Atomic map/unmap */ 157/* Atomic map/unmap */
@@ -145,30 +161,42 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
145{ 161{
146 preempt_disable(); 162 preempt_disable();
147 pagefault_disable(); 163 pagefault_disable();
148 return ((char __force __iomem *) mapping) + offset; 164 return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
149} 165}
150 166
151static inline void 167static inline void
152io_mapping_unmap_atomic(void __iomem *vaddr) 168io_mapping_unmap_atomic(void __iomem *vaddr)
153{ 169{
170 io_mapping_unmap(vaddr);
154 pagefault_enable(); 171 pagefault_enable();
155 preempt_enable(); 172 preempt_enable();
156} 173}
157 174
158/* Non-atomic map/unmap */ 175#endif /* HAVE_ATOMIC_IOMAP */
159static inline void __iomem * 176
160io_mapping_map_wc(struct io_mapping *mapping, 177static inline struct io_mapping *
161 unsigned long offset, 178io_mapping_create_wc(resource_size_t base,
162 unsigned long size) 179 unsigned long size)
163{ 180{
164 return ((char __force __iomem *) mapping) + offset; 181 struct io_mapping *iomap;
182
183 iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
184 if (!iomap)
185 return NULL;
186
187 if (!io_mapping_init_wc(iomap, base, size)) {
188 kfree(iomap);
189 return NULL;
190 }
191
192 return iomap;
165} 193}
166 194
167static inline void 195static inline void
168io_mapping_unmap(void __iomem *vaddr) 196io_mapping_free(struct io_mapping *iomap)
169{ 197{
198 io_mapping_fini(iomap);
199 kfree(iomap);
170} 200}
171 201
172#endif /* HAVE_ATOMIC_IOMAP */
173
174#endif /* _LINUX_IO_MAPPING_H */ 202#endif /* _LINUX_IO_MAPPING_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 452629de7a57..5501fe83ed92 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -855,7 +855,16 @@ struct drm_i915_gem_busy {
855 * having flushed any pending activity), and a non-zero return that 855 * having flushed any pending activity), and a non-zero return that
856 * the object is still in-flight on the GPU. (The GPU has not yet 856 * the object is still in-flight on the GPU. (The GPU has not yet
857 * signaled completion for all pending requests that reference the 857 * signaled completion for all pending requests that reference the
858 * object.) 858 * object.) An object is guaranteed to become idle eventually (so
859 * long as no new GPU commands are executed upon it). Due to the
860 * asynchronous nature of the hardware, an object reported
861 * as busy may become idle before the ioctl is completed.
862 *
863 * Furthermore, if the object is busy, which engine is busy is only
864 * provided as a guide. There are race conditions which prevent the
865 * report of which engines are busy from being always accurate.
866 * However, the converse is not true. If the object is idle, the
867 * result of the ioctl, that all engines are idle, is accurate.
859 * 868 *
860 * The returned dword is split into two fields to indicate both 869 * The returned dword is split into two fields to indicate both
861 * the engines on which the object is being read, and the 870 * the engines on which the object is being read, and the
@@ -878,6 +887,11 @@ struct drm_i915_gem_busy {
878 * execution engines, e.g. multiple media engines, which are 887 * execution engines, e.g. multiple media engines, which are
879 * mapped to the same identifier in the EXECBUFFER2 ioctl and 888 * mapped to the same identifier in the EXECBUFFER2 ioctl and
880 * so are not separately reported for busyness. 889 * so are not separately reported for busyness.
890 *
891 * Caveat emptor:
892 * Only the boolean result of this query is reliable; that is whether
893 * the object is idle or busy. The report of which engines are busy
894 * should be only used as a heuristic.
881 */ 895 */
882 __u32 busy; 896 __u32 busy;
883}; 897};