aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2016-08-15 02:53:57 -0400
committerDave Airlie <airlied@redhat.com>2016-08-15 02:53:57 -0400
commitfc93ff608b15ae32cde3006b7af860b59cac20ec (patch)
tree203ebf30912f3537f520d9f7c5144bd20de7d3f9 /drivers
parentf8725ad1da5182aea9b08c8ef300e83bac74f756 (diff)
parentc5b7e97b27db4f8a8ffe1072506620679043f006 (diff)
Merge tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel into drm-next
- refactor ddi buffer programming a bit (Ville) - large-scale renaming to untangle naming in the gem code (Chris) - rework vma/active tracking for accurately reaping idle mappings of shared objects (Chris) - misc dp sst/mst probing corner case fixes (Ville) - tons of cleanup&tunings all around in gem - lockless (rcu-protected) request lookup, plus use it everywhere for non(b)locking waits (Chris) - pipe crc debugfs fixes (Rodrigo) - random fixes all over * tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel: (222 commits) drm/i915: Update DRIVER_DATE to 20160808 drm/i915: fix aliasing_ppgtt leak drm/i915: Update comment before i915_spin_request drm/i915: Use drm official vblank_no_hw_counter callback. drm/i915: Fix copy_to_user usage for pipe_crc Revert "drm/i915: Track active streams also for DP SST" drm/i915: fix WaInsertDummyPushConstPs drm/i915: Assert that the request hasn't been retired drm/i915: Repack fence tiling mode and stride into a single integer drm/i915: Document and reject invalid tiling modes drm/i915: Remove locking for get_tiling drm/i915: Remove pinned check from madvise ioctl drm/i915: Reduce locking inside swfinish ioctl drm/i915: Remove (struct_mutex) locking for busy-ioctl drm/i915: Remove (struct_mutex) locking for wait-ioctl drm/i915: Do a nonblocking wait first in pread/pwrite drm/i915: Remove unused no-shrinker-steal drm/i915: Tidy generation of the GTT mmap offset drm/i915/shrinker: Wait before acquiring struct_mutex under oom drm/i915: Simplify do_idling() (Ironlake vt-d w/a) ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/i915/Makefile3
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c74
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c187
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c85
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h517
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2544
-rw-r--r--drivers/gpu/drm/i915/i915_gem_batch_pool.c34
-rw-r--r--drivers/gpu/drm/i915/i915_gem_batch_pool.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c183
-rw-r--r--drivers/gpu/drm/i915/i915_gem_debug.c70
-rw-r--r--drivers/gpu/drm/i915/i915_gem_dmabuf.c71
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c187
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c476
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence.c52
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c587
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h183
-rw-r--r--drivers/gpu/drm/i915/i915_gem_render_state.c120
-rw-r--r--drivers/gpu/drm/i915/i915_gem_render_state.h18
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c767
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.h676
-rw-r--r--drivers/gpu/drm/i915/i915_gem_shrinker.c78
-rw-r--r--drivers/gpu/drm/i915/i915_gem_stolen.c20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c52
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c57
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c459
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.c31
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c57
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c1
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h20
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c78
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h35
-rw-r--r--drivers/gpu/drm/i915/i915_vgpu.c3
-rw-r--r--drivers/gpu/drm/i915/intel_audio.c6
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c31
-rw-r--r--drivers/gpu/drm/i915/intel_csr.c7
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c255
-rw-r--r--drivers/gpu/drm/i915/intel_display.c245
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c231
-rw-r--r--drivers/gpu/drm/i915/intel_dp_mst.c4
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h40
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c231
-rw-r--r--drivers/gpu/drm/i915/intel_fbc.c29
-rw-r--r--drivers/gpu/drm/i915/intel_fbdev.c7
-rw-r--r--drivers/gpu/drm/i915/intel_frontbuffer.c128
-rw-r--r--drivers/gpu/drm/i915/intel_frontbuffer.h91
-rw-r--r--drivers/gpu/drm/i915/intel_guc.h1
-rw-r--r--drivers/gpu/drm/i915/intel_guc_loader.c10
-rw-r--r--drivers/gpu/drm/i915/intel_hotplug.c1
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c837
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.h52
-rw-r--r--drivers/gpu/drm/i915/intel_mocs.c61
-rw-r--r--drivers/gpu/drm/i915/intel_mocs.h2
-rw-r--r--drivers/gpu/drm/i915/intel_overlay.c174
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c354
-rw-r--r--drivers/gpu/drm/i915/intel_psr.c26
-rw-r--r--drivers/gpu/drm/i915/intel_renderstate.h16
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c1157
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h228
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c13
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c14
60 files changed, 6115 insertions, 5867 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 684fc1cd08fa..dda724f04445 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -25,7 +25,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
25i915-y += i915_cmd_parser.o \ 25i915-y += i915_cmd_parser.o \
26 i915_gem_batch_pool.o \ 26 i915_gem_batch_pool.o \
27 i915_gem_context.o \ 27 i915_gem_context.o \
28 i915_gem_debug.o \
29 i915_gem_dmabuf.o \ 28 i915_gem_dmabuf.o \
30 i915_gem_evict.o \ 29 i915_gem_evict.o \
31 i915_gem_execbuffer.o \ 30 i915_gem_execbuffer.o \
@@ -33,6 +32,7 @@ i915-y += i915_cmd_parser.o \
33 i915_gem_gtt.o \ 32 i915_gem_gtt.o \
34 i915_gem.o \ 33 i915_gem.o \
35 i915_gem_render_state.o \ 34 i915_gem_render_state.o \
35 i915_gem_request.o \
36 i915_gem_shrinker.o \ 36 i915_gem_shrinker.o \
37 i915_gem_stolen.o \ 37 i915_gem_stolen.o \
38 i915_gem_tiling.o \ 38 i915_gem_tiling.o \
@@ -40,6 +40,7 @@ i915-y += i915_cmd_parser.o \
40 i915_gpu_error.o \ 40 i915_gpu_error.o \
41 i915_trace_points.o \ 41 i915_trace_points.o \
42 intel_breadcrumbs.o \ 42 intel_breadcrumbs.o \
43 intel_engine_cs.o \
43 intel_lrc.o \ 44 intel_lrc.o \
44 intel_mocs.o \ 45 intel_mocs.o \
45 intel_ringbuffer.o \ 46 intel_ringbuffer.o \
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index b0fd6a7b0603..1db829c8b912 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -62,23 +62,23 @@
62 * The parser always rejects such commands. 62 * The parser always rejects such commands.
63 * 63 *
64 * The majority of the problematic commands fall in the MI_* range, with only a 64 * The majority of the problematic commands fall in the MI_* range, with only a
65 * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW). 65 * few specific commands on each engine (e.g. PIPE_CONTROL and MI_FLUSH_DW).
66 * 66 *
67 * Implementation: 67 * Implementation:
68 * Each ring maintains tables of commands and registers which the parser uses in 68 * Each engine maintains tables of commands and registers which the parser
69 * scanning batch buffers submitted to that ring. 69 * uses in scanning batch buffers submitted to that engine.
70 * 70 *
71 * Since the set of commands that the parser must check for is significantly 71 * Since the set of commands that the parser must check for is significantly
72 * smaller than the number of commands supported, the parser tables contain only 72 * smaller than the number of commands supported, the parser tables contain only
73 * those commands required by the parser. This generally works because command 73 * those commands required by the parser. This generally works because command
74 * opcode ranges have standard command length encodings. So for commands that 74 * opcode ranges have standard command length encodings. So for commands that
75 * the parser does not need to check, it can easily skip them. This is 75 * the parser does not need to check, it can easily skip them. This is
76 * implemented via a per-ring length decoding vfunc. 76 * implemented via a per-engine length decoding vfunc.
77 * 77 *
78 * Unfortunately, there are a number of commands that do not follow the standard 78 * Unfortunately, there are a number of commands that do not follow the standard
79 * length encoding for their opcode range, primarily amongst the MI_* commands. 79 * length encoding for their opcode range, primarily amongst the MI_* commands.
80 * To handle this, the parser provides a way to define explicit "skip" entries 80 * To handle this, the parser provides a way to define explicit "skip" entries
81 * in the per-ring command tables. 81 * in the per-engine command tables.
82 * 82 *
83 * Other command table entries map fairly directly to high level categories 83 * Other command table entries map fairly directly to high level categories
84 * mentioned above: rejected, master-only, register whitelist. The parser 84 * mentioned above: rejected, master-only, register whitelist. The parser
@@ -603,7 +603,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
603 return 0; 603 return 0;
604} 604}
605 605
606static bool validate_cmds_sorted(struct intel_engine_cs *engine, 606static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
607 const struct drm_i915_cmd_table *cmd_tables, 607 const struct drm_i915_cmd_table *cmd_tables,
608 int cmd_table_count) 608 int cmd_table_count)
609{ 609{
@@ -624,8 +624,10 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine,
624 u32 curr = desc->cmd.value & desc->cmd.mask; 624 u32 curr = desc->cmd.value & desc->cmd.mask;
625 625
626 if (curr < previous) { 626 if (curr < previous) {
627 DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n", 627 DRM_ERROR("CMD: %s [%d] command table not sorted: "
628 engine->id, i, j, curr, previous); 628 "table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
629 engine->name, engine->id,
630 i, j, curr, previous);
629 ret = false; 631 ret = false;
630 } 632 }
631 633
@@ -636,7 +638,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine,
636 return ret; 638 return ret;
637} 639}
638 640
639static bool check_sorted(int ring_id, 641static bool check_sorted(const struct intel_engine_cs *engine,
640 const struct drm_i915_reg_descriptor *reg_table, 642 const struct drm_i915_reg_descriptor *reg_table,
641 int reg_count) 643 int reg_count)
642{ 644{
@@ -648,8 +650,10 @@ static bool check_sorted(int ring_id,
648 u32 curr = i915_mmio_reg_offset(reg_table[i].addr); 650 u32 curr = i915_mmio_reg_offset(reg_table[i].addr);
649 651
650 if (curr < previous) { 652 if (curr < previous) {
651 DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", 653 DRM_ERROR("CMD: %s [%d] register table not sorted: "
652 ring_id, i, curr, previous); 654 "entry=%d reg=0x%08X prev=0x%08X\n",
655 engine->name, engine->id,
656 i, curr, previous);
653 ret = false; 657 ret = false;
654 } 658 }
655 659
@@ -666,7 +670,7 @@ static bool validate_regs_sorted(struct intel_engine_cs *engine)
666 670
667 for (i = 0; i < engine->reg_table_count; i++) { 671 for (i = 0; i < engine->reg_table_count; i++) {
668 table = &engine->reg_tables[i]; 672 table = &engine->reg_tables[i];
669 if (!check_sorted(engine->id, table->regs, table->num_regs)) 673 if (!check_sorted(engine, table->regs, table->num_regs))
670 return false; 674 return false;
671 } 675 }
672 676
@@ -736,7 +740,7 @@ static void fini_hash_table(struct intel_engine_cs *engine)
736} 740}
737 741
738/** 742/**
739 * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer 743 * intel_engine_init_cmd_parser() - set cmd parser related fields for an engine
740 * @engine: the engine to initialize 744 * @engine: the engine to initialize
741 * 745 *
742 * Optionally initializes fields related to batch buffer command parsing in the 746 * Optionally initializes fields related to batch buffer command parsing in the
@@ -745,7 +749,7 @@ static void fini_hash_table(struct intel_engine_cs *engine)
745 * 749 *
746 * Return: non-zero if initialization fails 750 * Return: non-zero if initialization fails
747 */ 751 */
748int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) 752int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
749{ 753{
750 const struct drm_i915_cmd_table *cmd_tables; 754 const struct drm_i915_cmd_table *cmd_tables;
751 int cmd_table_count; 755 int cmd_table_count;
@@ -806,8 +810,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
806 engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; 810 engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
807 break; 811 break;
808 default: 812 default:
809 DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n", 813 MISSING_CASE(engine->id);
810 engine->id);
811 BUG(); 814 BUG();
812 } 815 }
813 816
@@ -829,13 +832,13 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
829} 832}
830 833
831/** 834/**
832 * i915_cmd_parser_fini_ring() - clean up cmd parser related fields 835 * intel_engine_cleanup_cmd_parser() - clean up cmd parser related fields
833 * @engine: the engine to clean up 836 * @engine: the engine to clean up
834 * 837 *
835 * Releases any resources related to command parsing that may have been 838 * Releases any resources related to command parsing that may have been
836 * initialized for the specified ring. 839 * initialized for the specified engine.
837 */ 840 */
838void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine) 841void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
839{ 842{
840 if (!engine->needs_cmd_parser) 843 if (!engine->needs_cmd_parser)
841 return; 844 return;
@@ -866,9 +869,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
866 * Returns a pointer to a descriptor for the command specified by cmd_header. 869 * Returns a pointer to a descriptor for the command specified by cmd_header.
867 * 870 *
868 * The caller must supply space for a default descriptor via the default_desc 871 * The caller must supply space for a default descriptor via the default_desc
869 * parameter. If no descriptor for the specified command exists in the ring's 872 * parameter. If no descriptor for the specified command exists in the engine's
870 * command parser tables, this function fills in default_desc based on the 873 * command parser tables, this function fills in default_desc based on the
871 * ring's default length encoding and returns default_desc. 874 * engine's default length encoding and returns default_desc.
872 */ 875 */
873static const struct drm_i915_cmd_descriptor* 876static const struct drm_i915_cmd_descriptor*
874find_cmd(struct intel_engine_cs *engine, 877find_cmd(struct intel_engine_cs *engine,
@@ -1023,15 +1026,16 @@ unpin_src:
1023} 1026}
1024 1027
1025/** 1028/**
1026 * i915_needs_cmd_parser() - should a given ring use software command parsing? 1029 * intel_engine_needs_cmd_parser() - should a given engine use software
1030 * command parsing?
1027 * @engine: the engine in question 1031 * @engine: the engine in question
1028 * 1032 *
1029 * Only certain platforms require software batch buffer command parsing, and 1033 * Only certain platforms require software batch buffer command parsing, and
1030 * only when enabled via module parameter. 1034 * only when enabled via module parameter.
1031 * 1035 *
1032 * Return: true if the ring requires software command parsing 1036 * Return: true if the engine requires software command parsing
1033 */ 1037 */
1034bool i915_needs_cmd_parser(struct intel_engine_cs *engine) 1038bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
1035{ 1039{
1036 if (!engine->needs_cmd_parser) 1040 if (!engine->needs_cmd_parser)
1037 return false; 1041 return false;
@@ -1078,8 +1082,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
1078 reg_addr); 1082 reg_addr);
1079 1083
1080 if (!reg) { 1084 if (!reg) {
1081 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", 1085 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
1082 reg_addr, *cmd, engine->id); 1086 reg_addr, *cmd, engine->exec_id);
1083 return false; 1087 return false;
1084 } 1088 }
1085 1089
@@ -1159,11 +1163,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
1159 desc->bits[i].mask; 1163 desc->bits[i].mask;
1160 1164
1161 if (dword != desc->bits[i].expected) { 1165 if (dword != desc->bits[i].expected) {
1162 DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n", 1166 DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n",
1163 *cmd, 1167 *cmd,
1164 desc->bits[i].mask, 1168 desc->bits[i].mask,
1165 desc->bits[i].expected, 1169 desc->bits[i].expected,
1166 dword, engine->id); 1170 dword, engine->exec_id);
1167 return false; 1171 return false;
1168 } 1172 }
1169 } 1173 }
@@ -1189,12 +1193,12 @@ static bool check_cmd(const struct intel_engine_cs *engine,
1189 * Return: non-zero if the parser finds violations or otherwise fails; -EACCES 1193 * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
1190 * if the batch appears legal but should use hardware parsing 1194 * if the batch appears legal but should use hardware parsing
1191 */ 1195 */
1192int i915_parse_cmds(struct intel_engine_cs *engine, 1196int intel_engine_cmd_parser(struct intel_engine_cs *engine,
1193 struct drm_i915_gem_object *batch_obj, 1197 struct drm_i915_gem_object *batch_obj,
1194 struct drm_i915_gem_object *shadow_batch_obj, 1198 struct drm_i915_gem_object *shadow_batch_obj,
1195 u32 batch_start_offset, 1199 u32 batch_start_offset,
1196 u32 batch_len, 1200 u32 batch_len,
1197 bool is_master) 1201 bool is_master)
1198{ 1202{
1199 u32 *cmd, *batch_base, *batch_end; 1203 u32 *cmd, *batch_base, *batch_end;
1200 struct drm_i915_cmd_descriptor default_desc = { 0 }; 1204 struct drm_i915_cmd_descriptor default_desc = { 0 };
@@ -1295,7 +1299,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
1295 1299
1296 /* If the command parser is not enabled, report 0 - unsupported */ 1300 /* If the command parser is not enabled, report 0 - unsupported */
1297 for_each_engine(engine, dev_priv) { 1301 for_each_engine(engine, dev_priv) {
1298 if (i915_needs_cmd_parser(engine)) { 1302 if (intel_engine_needs_cmd_parser(engine)) {
1299 active = true; 1303 active = true;
1300 break; 1304 break;
1301 } 1305 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9b03cb2813bd..f62285c1ed7f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
91 91
92static char get_active_flag(struct drm_i915_gem_object *obj) 92static char get_active_flag(struct drm_i915_gem_object *obj)
93{ 93{
94 return obj->active ? '*' : ' '; 94 return i915_gem_object_is_active(obj) ? '*' : ' ';
95} 95}
96 96
97static char get_pin_flag(struct drm_i915_gem_object *obj) 97static char get_pin_flag(struct drm_i915_gem_object *obj)
@@ -101,7 +101,7 @@ static char get_pin_flag(struct drm_i915_gem_object *obj)
101 101
102static char get_tiling_flag(struct drm_i915_gem_object *obj) 102static char get_tiling_flag(struct drm_i915_gem_object *obj)
103{ 103{
104 switch (obj->tiling_mode) { 104 switch (i915_gem_object_get_tiling(obj)) {
105 default: 105 default:
106 case I915_TILING_NONE: return ' '; 106 case I915_TILING_NONE: return ' ';
107 case I915_TILING_X: return 'X'; 107 case I915_TILING_X: return 'X';
@@ -125,7 +125,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
125 struct i915_vma *vma; 125 struct i915_vma *vma;
126 126
127 list_for_each_entry(vma, &obj->vma_list, obj_link) { 127 list_for_each_entry(vma, &obj->vma_list, obj_link) {
128 if (vma->is_ggtt && drm_mm_node_allocated(&vma->node)) 128 if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node))
129 size += vma->node.size; 129 size += vma->node.size;
130 } 130 }
131 131
@@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
138 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 138 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
139 struct intel_engine_cs *engine; 139 struct intel_engine_cs *engine;
140 struct i915_vma *vma; 140 struct i915_vma *vma;
141 unsigned int frontbuffer_bits;
141 int pin_count = 0; 142 int pin_count = 0;
142 enum intel_engine_id id; 143 enum intel_engine_id id;
143 144
@@ -155,17 +156,20 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
155 obj->base.write_domain); 156 obj->base.write_domain);
156 for_each_engine_id(engine, dev_priv, id) 157 for_each_engine_id(engine, dev_priv, id)
157 seq_printf(m, "%x ", 158 seq_printf(m, "%x ",
158 i915_gem_request_get_seqno(obj->last_read_req[id])); 159 i915_gem_active_get_seqno(&obj->last_read[id],
160 &obj->base.dev->struct_mutex));
159 seq_printf(m, "] %x %x%s%s%s", 161 seq_printf(m, "] %x %x%s%s%s",
160 i915_gem_request_get_seqno(obj->last_write_req), 162 i915_gem_active_get_seqno(&obj->last_write,
161 i915_gem_request_get_seqno(obj->last_fenced_req), 163 &obj->base.dev->struct_mutex),
164 i915_gem_active_get_seqno(&obj->last_fence,
165 &obj->base.dev->struct_mutex),
162 i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), 166 i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
163 obj->dirty ? " dirty" : "", 167 obj->dirty ? " dirty" : "",
164 obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); 168 obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
165 if (obj->base.name) 169 if (obj->base.name)
166 seq_printf(m, " (name: %d)", obj->base.name); 170 seq_printf(m, " (name: %d)", obj->base.name);
167 list_for_each_entry(vma, &obj->vma_list, obj_link) { 171 list_for_each_entry(vma, &obj->vma_list, obj_link) {
168 if (vma->pin_count > 0) 172 if (i915_vma_is_pinned(vma))
169 pin_count++; 173 pin_count++;
170 } 174 }
171 seq_printf(m, " (pinned x %d)", pin_count); 175 seq_printf(m, " (pinned x %d)", pin_count);
@@ -174,10 +178,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
174 if (obj->fence_reg != I915_FENCE_REG_NONE) 178 if (obj->fence_reg != I915_FENCE_REG_NONE)
175 seq_printf(m, " (fence: %d)", obj->fence_reg); 179 seq_printf(m, " (fence: %d)", obj->fence_reg);
176 list_for_each_entry(vma, &obj->vma_list, obj_link) { 180 list_for_each_entry(vma, &obj->vma_list, obj_link) {
181 if (!drm_mm_node_allocated(&vma->node))
182 continue;
183
177 seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", 184 seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
178 vma->is_ggtt ? "g" : "pp", 185 i915_vma_is_ggtt(vma) ? "g" : "pp",
179 vma->node.start, vma->node.size); 186 vma->node.start, vma->node.size);
180 if (vma->is_ggtt) 187 if (i915_vma_is_ggtt(vma))
181 seq_printf(m, ", type: %u", vma->ggtt_view.type); 188 seq_printf(m, ", type: %u", vma->ggtt_view.type);
182 seq_puts(m, ")"); 189 seq_puts(m, ")");
183 } 190 }
@@ -192,11 +199,15 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
192 *t = '\0'; 199 *t = '\0';
193 seq_printf(m, " (%s mappable)", s); 200 seq_printf(m, " (%s mappable)", s);
194 } 201 }
195 if (obj->last_write_req != NULL) 202
196 seq_printf(m, " (%s)", 203 engine = i915_gem_active_get_engine(&obj->last_write,
197 i915_gem_request_get_engine(obj->last_write_req)->name); 204 &obj->base.dev->struct_mutex);
198 if (obj->frontbuffer_bits) 205 if (engine)
199 seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); 206 seq_printf(m, " (%s)", engine->name);
207
208 frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
209 if (frontbuffer_bits)
210 seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
200} 211}
201 212
202static int i915_gem_object_list_info(struct seq_file *m, void *data) 213static int i915_gem_object_list_info(struct seq_file *m, void *data)
@@ -338,46 +349,29 @@ static int per_file_stats(int id, void *ptr, void *data)
338 349
339 stats->count++; 350 stats->count++;
340 stats->total += obj->base.size; 351 stats->total += obj->base.size;
341 352 if (!obj->bind_count)
353 stats->unbound += obj->base.size;
342 if (obj->base.name || obj->base.dma_buf) 354 if (obj->base.name || obj->base.dma_buf)
343 stats->shared += obj->base.size; 355 stats->shared += obj->base.size;
344 356
345 if (USES_FULL_PPGTT(obj->base.dev)) { 357 list_for_each_entry(vma, &obj->vma_list, obj_link) {
346 list_for_each_entry(vma, &obj->vma_list, obj_link) { 358 if (!drm_mm_node_allocated(&vma->node))
347 struct i915_hw_ppgtt *ppgtt; 359 continue;
348
349 if (!drm_mm_node_allocated(&vma->node))
350 continue;
351 360
352 if (vma->is_ggtt) { 361 if (i915_vma_is_ggtt(vma)) {
353 stats->global += obj->base.size; 362 stats->global += vma->node.size;
354 continue; 363 } else {
355 } 364 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm);
356 365
357 ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); 366 if (ppgtt->base.file != stats->file_priv)
358 if (ppgtt->file_priv != stats->file_priv)
359 continue; 367 continue;
360
361 if (obj->active) /* XXX per-vma statistic */
362 stats->active += obj->base.size;
363 else
364 stats->inactive += obj->base.size;
365
366 return 0;
367 }
368 } else {
369 if (i915_gem_obj_ggtt_bound(obj)) {
370 stats->global += obj->base.size;
371 if (obj->active)
372 stats->active += obj->base.size;
373 else
374 stats->inactive += obj->base.size;
375 return 0;
376 } 368 }
377 }
378 369
379 if (!list_empty(&obj->global_list)) 370 if (i915_vma_is_active(vma))
380 stats->unbound += obj->base.size; 371 stats->active += vma->node.size;
372 else
373 stats->inactive += vma->node.size;
374 }
381 375
382 return 0; 376 return 0;
383} 377}
@@ -425,8 +419,8 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
425 for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { 419 for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
426 if (ctx->engine[n].state) 420 if (ctx->engine[n].state)
427 per_file_stats(0, ctx->engine[n].state, data); 421 per_file_stats(0, ctx->engine[n].state, data);
428 if (ctx->engine[n].ringbuf) 422 if (ctx->engine[n].ring)
429 per_file_stats(0, ctx->engine[n].ringbuf->obj, data); 423 per_file_stats(0, ctx->engine[n].ring->obj, data);
430 } 424 }
431 425
432 return 0; 426 return 0;
@@ -754,13 +748,13 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
754 int count; 748 int count;
755 749
756 count = 0; 750 count = 0;
757 list_for_each_entry(req, &engine->request_list, list) 751 list_for_each_entry(req, &engine->request_list, link)
758 count++; 752 count++;
759 if (count == 0) 753 if (count == 0)
760 continue; 754 continue;
761 755
762 seq_printf(m, "%s requests: %d\n", engine->name, count); 756 seq_printf(m, "%s requests: %d\n", engine->name, count);
763 list_for_each_entry(req, &engine->request_list, list) { 757 list_for_each_entry(req, &engine->request_list, link) {
764 struct task_struct *task; 758 struct task_struct *task;
765 759
766 rcu_read_lock(); 760 rcu_read_lock();
@@ -768,7 +762,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
768 if (req->pid) 762 if (req->pid)
769 task = pid_task(req->pid, PIDTYPE_PID); 763 task = pid_task(req->pid, PIDTYPE_PID);
770 seq_printf(m, " %x @ %d: %s [%d]\n", 764 seq_printf(m, " %x @ %d: %s [%d]\n",
771 req->seqno, 765 req->fence.seqno,
772 (int) (jiffies - req->emitted_jiffies), 766 (int) (jiffies - req->emitted_jiffies),
773 task ? task->comm : "<unknown>", 767 task ? task->comm : "<unknown>",
774 task ? task->pid : -1); 768 task ? task->pid : -1);
@@ -1205,8 +1199,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
1205 1199
1206 intel_runtime_pm_get(dev_priv); 1200 intel_runtime_pm_get(dev_priv);
1207 1201
1208 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
1209
1210 if (IS_GEN5(dev)) { 1202 if (IS_GEN5(dev)) {
1211 u16 rgvswctl = I915_READ16(MEMSWCTL); 1203 u16 rgvswctl = I915_READ16(MEMSWCTL);
1212 u16 rgvstat = I915_READ16(MEMSTAT_ILK); 1204 u16 rgvstat = I915_READ16(MEMSTAT_ILK);
@@ -1381,6 +1373,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
1381 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); 1373 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
1382 seq_printf(m, "Min freq: %d MHz\n", 1374 seq_printf(m, "Min freq: %d MHz\n",
1383 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); 1375 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
1376 seq_printf(m, "Boost freq: %d MHz\n",
1377 intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
1384 seq_printf(m, "Max freq: %d MHz\n", 1378 seq_printf(m, "Max freq: %d MHz\n",
1385 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); 1379 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
1386 seq_printf(m, 1380 seq_printf(m,
@@ -1419,7 +1413,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
1419 intel_runtime_pm_get(dev_priv); 1413 intel_runtime_pm_get(dev_priv);
1420 1414
1421 for_each_engine_id(engine, dev_priv, id) { 1415 for_each_engine_id(engine, dev_priv, id) {
1422 acthd[id] = intel_ring_get_active_head(engine); 1416 acthd[id] = intel_engine_get_active_head(engine);
1423 seqno[id] = intel_engine_get_seqno(engine); 1417 seqno[id] = intel_engine_get_seqno(engine);
1424 } 1418 }
1425 1419
@@ -1602,6 +1596,7 @@ static int gen6_drpc_info(struct seq_file *m)
1602 struct drm_device *dev = node->minor->dev; 1596 struct drm_device *dev = node->minor->dev;
1603 struct drm_i915_private *dev_priv = to_i915(dev); 1597 struct drm_i915_private *dev_priv = to_i915(dev);
1604 u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; 1598 u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
1599 u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
1605 unsigned forcewake_count; 1600 unsigned forcewake_count;
1606 int count = 0, ret; 1601 int count = 0, ret;
1607 1602
@@ -1629,6 +1624,10 @@ static int gen6_drpc_info(struct seq_file *m)
1629 1624
1630 rpmodectl1 = I915_READ(GEN6_RP_CONTROL); 1625 rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
1631 rcctl1 = I915_READ(GEN6_RC_CONTROL); 1626 rcctl1 = I915_READ(GEN6_RC_CONTROL);
1627 if (INTEL_INFO(dev)->gen >= 9) {
1628 gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE);
1629 gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
1630 }
1632 mutex_unlock(&dev->struct_mutex); 1631 mutex_unlock(&dev->struct_mutex);
1633 mutex_lock(&dev_priv->rps.hw_lock); 1632 mutex_lock(&dev_priv->rps.hw_lock);
1634 sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 1633 sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
@@ -1647,6 +1646,12 @@ static int gen6_drpc_info(struct seq_file *m)
1647 yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); 1646 yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
1648 seq_printf(m, "RC6 Enabled: %s\n", 1647 seq_printf(m, "RC6 Enabled: %s\n",
1649 yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); 1648 yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
1649 if (INTEL_INFO(dev)->gen >= 9) {
1650 seq_printf(m, "Render Well Gating Enabled: %s\n",
1651 yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
1652 seq_printf(m, "Media Well Gating Enabled: %s\n",
1653 yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
1654 }
1650 seq_printf(m, "Deep RC6 Enabled: %s\n", 1655 seq_printf(m, "Deep RC6 Enabled: %s\n",
1651 yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); 1656 yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
1652 seq_printf(m, "Deepest RC6 Enabled: %s\n", 1657 seq_printf(m, "Deepest RC6 Enabled: %s\n",
@@ -1675,6 +1680,14 @@ static int gen6_drpc_info(struct seq_file *m)
1675 1680
1676 seq_printf(m, "Core Power Down: %s\n", 1681 seq_printf(m, "Core Power Down: %s\n",
1677 yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); 1682 yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
1683 if (INTEL_INFO(dev)->gen >= 9) {
1684 seq_printf(m, "Render Power Well: %s\n",
1685 (gen9_powergate_status &
1686 GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
1687 seq_printf(m, "Media Power Well: %s\n",
1688 (gen9_powergate_status &
1689 GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
1690 }
1678 1691
1679 /* Not exactly sure what this is */ 1692 /* Not exactly sure what this is */
1680 seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n", 1693 seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n",
@@ -1692,7 +1705,7 @@ static int gen6_drpc_info(struct seq_file *m)
1692 GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); 1705 GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
1693 seq_printf(m, "RC6++ voltage: %dmV\n", 1706 seq_printf(m, "RC6++ voltage: %dmV\n",
1694 GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); 1707 GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
1695 return 0; 1708 return i915_forcewake_domains(m, NULL);
1696} 1709}
1697 1710
1698static int i915_drpc_info(struct seq_file *m, void *unused) 1711static int i915_drpc_info(struct seq_file *m, void *unused)
@@ -1896,8 +1909,6 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
1896 1909
1897 intel_runtime_pm_get(dev_priv); 1910 intel_runtime_pm_get(dev_priv);
1898 1911
1899 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
1900
1901 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); 1912 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
1902 if (ret) 1913 if (ret)
1903 goto out; 1914 goto out;
@@ -2019,12 +2030,11 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
2019 return 0; 2030 return 0;
2020} 2031}
2021 2032
2022static void describe_ctx_ringbuf(struct seq_file *m, 2033static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
2023 struct intel_ringbuffer *ringbuf)
2024{ 2034{
2025 seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", 2035 seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
2026 ringbuf->space, ringbuf->head, ringbuf->tail, 2036 ring->space, ring->head, ring->tail,
2027 ringbuf->last_retired_head); 2037 ring->last_retired_head);
2028} 2038}
2029 2039
2030static int i915_context_status(struct seq_file *m, void *unused) 2040static int i915_context_status(struct seq_file *m, void *unused)
@@ -2068,8 +2078,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
2068 seq_putc(m, ce->initialised ? 'I' : 'i'); 2078 seq_putc(m, ce->initialised ? 'I' : 'i');
2069 if (ce->state) 2079 if (ce->state)
2070 describe_obj(m, ce->state); 2080 describe_obj(m, ce->state);
2071 if (ce->ringbuf) 2081 if (ce->ring)
2072 describe_ctx_ringbuf(m, ce->ringbuf); 2082 describe_ctx_ring(m, ce->ring);
2073 seq_putc(m, '\n'); 2083 seq_putc(m, '\n');
2074 } 2084 }
2075 2085
@@ -2467,13 +2477,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
2467 list_empty(&file_priv->rps.link) ? "" : ", active"); 2477 list_empty(&file_priv->rps.link) ? "" : ", active");
2468 rcu_read_unlock(); 2478 rcu_read_unlock();
2469 } 2479 }
2470 seq_printf(m, "Semaphore boosts: %d%s\n", 2480 seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
2471 dev_priv->rps.semaphores.boosts,
2472 list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
2473 seq_printf(m, "MMIO flip boosts: %d%s\n",
2474 dev_priv->rps.mmioflips.boosts,
2475 list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
2476 seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
2477 spin_unlock(&dev_priv->rps.client_lock); 2481 spin_unlock(&dev_priv->rps.client_lock);
2478 mutex_unlock(&dev->filelist_mutex); 2482 mutex_unlock(&dev->filelist_mutex);
2479 2483
@@ -3228,7 +3232,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
3228 enum intel_engine_id id; 3232 enum intel_engine_id id;
3229 int j, ret; 3233 int j, ret;
3230 3234
3231 if (!i915_semaphore_is_enabled(dev_priv)) { 3235 if (!i915.semaphores) {
3232 seq_puts(m, "Semaphores are disabled\n"); 3236 seq_puts(m, "Semaphores are disabled\n");
3233 return 0; 3237 return 0;
3234 } 3238 }
@@ -3621,7 +3625,6 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count,
3621 while (n_entries > 0) { 3625 while (n_entries > 0) {
3622 struct intel_pipe_crc_entry *entry = 3626 struct intel_pipe_crc_entry *entry =
3623 &pipe_crc->entries[pipe_crc->tail]; 3627 &pipe_crc->entries[pipe_crc->tail];
3624 int ret;
3625 3628
3626 if (CIRC_CNT(pipe_crc->head, pipe_crc->tail, 3629 if (CIRC_CNT(pipe_crc->head, pipe_crc->tail,
3627 INTEL_PIPE_CRC_ENTRIES_NR) < 1) 3630 INTEL_PIPE_CRC_ENTRIES_NR) < 1)
@@ -3638,8 +3641,7 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count,
3638 3641
3639 spin_unlock_irq(&pipe_crc->lock); 3642 spin_unlock_irq(&pipe_crc->lock);
3640 3643
3641 ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN); 3644 if (copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN))
3642 if (ret == PIPE_CRC_LINE_LEN)
3643 return -EFAULT; 3645 return -EFAULT;
3644 3646
3645 user_buf += PIPE_CRC_LINE_LEN; 3647 user_buf += PIPE_CRC_LINE_LEN;
@@ -4921,7 +4923,7 @@ i915_drop_caches_set(void *data, u64 val)
4921 return ret; 4923 return ret;
4922 4924
4923 if (val & DROP_ACTIVE) { 4925 if (val & DROP_ACTIVE) {
4924 ret = i915_gem_wait_for_idle(dev_priv); 4926 ret = i915_gem_wait_for_idle(dev_priv, true);
4925 if (ret) 4927 if (ret)
4926 goto unlock; 4928 goto unlock;
4927 } 4929 }
@@ -4950,20 +4952,11 @@ i915_max_freq_get(void *data, u64 *val)
4950{ 4952{
4951 struct drm_device *dev = data; 4953 struct drm_device *dev = data;
4952 struct drm_i915_private *dev_priv = to_i915(dev); 4954 struct drm_i915_private *dev_priv = to_i915(dev);
4953 int ret;
4954 4955
4955 if (INTEL_INFO(dev)->gen < 6) 4956 if (INTEL_INFO(dev)->gen < 6)
4956 return -ENODEV; 4957 return -ENODEV;
4957 4958
4958 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
4959
4960 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
4961 if (ret)
4962 return ret;
4963
4964 *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); 4959 *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit);
4965 mutex_unlock(&dev_priv->rps.hw_lock);
4966
4967 return 0; 4960 return 0;
4968} 4961}
4969 4962
@@ -4978,8 +4971,6 @@ i915_max_freq_set(void *data, u64 val)
4978 if (INTEL_INFO(dev)->gen < 6) 4971 if (INTEL_INFO(dev)->gen < 6)
4979 return -ENODEV; 4972 return -ENODEV;
4980 4973
4981 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
4982
4983 DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); 4974 DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
4984 4975
4985 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); 4976 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
@@ -5017,20 +5008,11 @@ i915_min_freq_get(void *data, u64 *val)
5017{ 5008{
5018 struct drm_device *dev = data; 5009 struct drm_device *dev = data;
5019 struct drm_i915_private *dev_priv = to_i915(dev); 5010 struct drm_i915_private *dev_priv = to_i915(dev);
5020 int ret;
5021 5011
5022 if (INTEL_INFO(dev)->gen < 6) 5012 if (INTEL_GEN(dev_priv) < 6)
5023 return -ENODEV; 5013 return -ENODEV;
5024 5014
5025 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
5026
5027 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
5028 if (ret)
5029 return ret;
5030
5031 *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); 5015 *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit);
5032 mutex_unlock(&dev_priv->rps.hw_lock);
5033
5034 return 0; 5016 return 0;
5035} 5017}
5036 5018
@@ -5042,11 +5024,9 @@ i915_min_freq_set(void *data, u64 val)
5042 u32 hw_max, hw_min; 5024 u32 hw_max, hw_min;
5043 int ret; 5025 int ret;
5044 5026
5045 if (INTEL_INFO(dev)->gen < 6) 5027 if (INTEL_GEN(dev_priv) < 6)
5046 return -ENODEV; 5028 return -ENODEV;
5047 5029
5048 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
5049
5050 DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); 5030 DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
5051 5031
5052 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); 5032 ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
@@ -5268,7 +5248,8 @@ static void broadwell_sseu_device_status(struct drm_device *dev,
5268static int i915_sseu_status(struct seq_file *m, void *unused) 5248static int i915_sseu_status(struct seq_file *m, void *unused)
5269{ 5249{
5270 struct drm_info_node *node = (struct drm_info_node *) m->private; 5250 struct drm_info_node *node = (struct drm_info_node *) m->private;
5271 struct drm_device *dev = node->minor->dev; 5251 struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
5252 struct drm_device *dev = &dev_priv->drm;
5272 struct sseu_dev_status stat; 5253 struct sseu_dev_status stat;
5273 5254
5274 if (INTEL_INFO(dev)->gen < 8) 5255 if (INTEL_INFO(dev)->gen < 8)
@@ -5298,6 +5279,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
5298 5279
5299 seq_puts(m, "SSEU Device Status\n"); 5280 seq_puts(m, "SSEU Device Status\n");
5300 memset(&stat, 0, sizeof(stat)); 5281 memset(&stat, 0, sizeof(stat));
5282
5283 intel_runtime_pm_get(dev_priv);
5284
5301 if (IS_CHERRYVIEW(dev)) { 5285 if (IS_CHERRYVIEW(dev)) {
5302 cherryview_sseu_device_status(dev, &stat); 5286 cherryview_sseu_device_status(dev, &stat);
5303 } else if (IS_BROADWELL(dev)) { 5287 } else if (IS_BROADWELL(dev)) {
@@ -5305,6 +5289,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
5305 } else if (INTEL_INFO(dev)->gen >= 9) { 5289 } else if (INTEL_INFO(dev)->gen >= 9) {
5306 gen9_sseu_device_status(dev, &stat); 5290 gen9_sseu_device_status(dev, &stat);
5307 } 5291 }
5292
5293 intel_runtime_pm_put(dev_priv);
5294
5308 seq_printf(m, " Enabled Slice Total: %u\n", 5295 seq_printf(m, " Enabled Slice Total: %u\n",
5309 stat.slice_total); 5296 stat.slice_total);
5310 seq_printf(m, " Enabled Subslice Total: %u\n", 5297 seq_printf(m, " Enabled Subslice Total: %u\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 40cd16cf9772..57eb380a2c21 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -228,27 +228,6 @@ static void intel_detect_pch(struct drm_device *dev)
228 pci_dev_put(pch); 228 pci_dev_put(pch);
229} 229}
230 230
231bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv)
232{
233 if (INTEL_GEN(dev_priv) < 6)
234 return false;
235
236 if (i915.semaphores >= 0)
237 return i915.semaphores;
238
239 /* TODO: make semaphores and Execlists play nicely together */
240 if (i915.enable_execlists)
241 return false;
242
243#ifdef CONFIG_INTEL_IOMMU
244 /* Enable semaphores on SNB when IO remapping is off */
245 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped)
246 return false;
247#endif
248
249 return true;
250}
251
252static int i915_getparam(struct drm_device *dev, void *data, 231static int i915_getparam(struct drm_device *dev, void *data,
253 struct drm_file *file_priv) 232 struct drm_file *file_priv)
254{ 233{
@@ -324,7 +303,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
324 value = 1; 303 value = 1;
325 break; 304 break;
326 case I915_PARAM_HAS_SEMAPHORES: 305 case I915_PARAM_HAS_SEMAPHORES:
327 value = i915_semaphore_is_enabled(dev_priv); 306 value = i915.semaphores;
328 break; 307 break;
329 case I915_PARAM_HAS_PRIME_VMAP_FLUSH: 308 case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
330 value = 1; 309 value = 1;
@@ -999,6 +978,9 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
999 i915.enable_ppgtt = 978 i915.enable_ppgtt =
1000 intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt); 979 intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt);
1001 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 980 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
981
982 i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores);
983 DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores));
1002} 984}
1003 985
1004/** 986/**
@@ -1011,8 +993,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
1011static int i915_driver_init_hw(struct drm_i915_private *dev_priv) 993static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
1012{ 994{
1013 struct drm_device *dev = &dev_priv->drm; 995 struct drm_device *dev = &dev_priv->drm;
1014 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1015 uint32_t aperture_size;
1016 int ret; 996 int ret;
1017 997
1018 if (i915_inject_load_failure()) 998 if (i915_inject_load_failure())
@@ -1022,16 +1002,10 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
1022 1002
1023 intel_sanitize_options(dev_priv); 1003 intel_sanitize_options(dev_priv);
1024 1004
1025 ret = i915_ggtt_init_hw(dev); 1005 ret = i915_ggtt_probe_hw(dev_priv);
1026 if (ret) 1006 if (ret)
1027 return ret; 1007 return ret;
1028 1008
1029 ret = i915_ggtt_enable_hw(dev);
1030 if (ret) {
1031 DRM_ERROR("failed to enable GGTT\n");
1032 goto out_ggtt;
1033 }
1034
1035 /* WARNING: Apparently we must kick fbdev drivers before vgacon, 1009 /* WARNING: Apparently we must kick fbdev drivers before vgacon,
1036 * otherwise the vga fbdev driver falls over. */ 1010 * otherwise the vga fbdev driver falls over. */
1037 ret = i915_kick_out_firmware_fb(dev_priv); 1011 ret = i915_kick_out_firmware_fb(dev_priv);
@@ -1046,6 +1020,16 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
1046 goto out_ggtt; 1020 goto out_ggtt;
1047 } 1021 }
1048 1022
1023 ret = i915_ggtt_init_hw(dev_priv);
1024 if (ret)
1025 return ret;
1026
1027 ret = i915_ggtt_enable_hw(dev_priv);
1028 if (ret) {
1029 DRM_ERROR("failed to enable GGTT\n");
1030 goto out_ggtt;
1031 }
1032
1049 pci_set_master(dev->pdev); 1033 pci_set_master(dev->pdev);
1050 1034
1051 /* overlay on gen2 is broken and can't address above 1G */ 1035 /* overlay on gen2 is broken and can't address above 1G */
@@ -1058,7 +1042,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
1058 } 1042 }
1059 } 1043 }
1060 1044
1061
1062 /* 965GM sometimes incorrectly writes to hardware status page (HWS) 1045 /* 965GM sometimes incorrectly writes to hardware status page (HWS)
1063 * using 32bit addressing, overwriting memory if HWS is located 1046 * using 32bit addressing, overwriting memory if HWS is located
1064 * above 4GB. 1047 * above 4GB.
@@ -1077,19 +1060,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
1077 } 1060 }
1078 } 1061 }
1079 1062
1080 aperture_size = ggtt->mappable_end;
1081
1082 ggtt->mappable =
1083 io_mapping_create_wc(ggtt->mappable_base,
1084 aperture_size);
1085 if (!ggtt->mappable) {
1086 ret = -EIO;
1087 goto out_ggtt;
1088 }
1089
1090 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base,
1091 aperture_size);
1092
1093 pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, 1063 pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY,
1094 PM_QOS_DEFAULT_VALUE); 1064 PM_QOS_DEFAULT_VALUE);
1095 1065
@@ -1118,7 +1088,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
1118 return 0; 1088 return 0;
1119 1089
1120out_ggtt: 1090out_ggtt:
1121 i915_ggtt_cleanup_hw(dev); 1091 i915_ggtt_cleanup_hw(dev_priv);
1122 1092
1123 return ret; 1093 return ret;
1124} 1094}
@@ -1130,15 +1100,12 @@ out_ggtt:
1130static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) 1100static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv)
1131{ 1101{
1132 struct drm_device *dev = &dev_priv->drm; 1102 struct drm_device *dev = &dev_priv->drm;
1133 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1134 1103
1135 if (dev->pdev->msi_enabled) 1104 if (dev->pdev->msi_enabled)
1136 pci_disable_msi(dev->pdev); 1105 pci_disable_msi(dev->pdev);
1137 1106
1138 pm_qos_remove_request(&dev_priv->pm_qos); 1107 pm_qos_remove_request(&dev_priv->pm_qos);
1139 arch_phys_wc_del(ggtt->mtrr); 1108 i915_ggtt_cleanup_hw(dev_priv);
1140 io_mapping_free(ggtt->mappable);
1141 i915_ggtt_cleanup_hw(dev);
1142} 1109}
1143 1110
1144/** 1111/**
@@ -1343,7 +1310,7 @@ void i915_driver_unload(struct drm_device *dev)
1343 i915_destroy_error_state(dev); 1310 i915_destroy_error_state(dev);
1344 1311
1345 /* Flush any outstanding unpin_work. */ 1312 /* Flush any outstanding unpin_work. */
1346 flush_workqueue(dev_priv->wq); 1313 drain_workqueue(dev_priv->wq);
1347 1314
1348 intel_guc_fini(dev); 1315 intel_guc_fini(dev);
1349 i915_gem_fini(dev); 1316 i915_gem_fini(dev);
@@ -1458,8 +1425,6 @@ static int i915_drm_suspend(struct drm_device *dev)
1458 1425
1459 intel_guc_suspend(dev); 1426 intel_guc_suspend(dev);
1460 1427
1461 intel_suspend_gt_powersave(dev_priv);
1462
1463 intel_display_suspend(dev); 1428 intel_display_suspend(dev);
1464 1429
1465 intel_dp_mst_suspend(dev); 1430 intel_dp_mst_suspend(dev);
@@ -1586,15 +1551,13 @@ static int i915_drm_resume(struct drm_device *dev)
1586 1551
1587 disable_rpm_wakeref_asserts(dev_priv); 1552 disable_rpm_wakeref_asserts(dev_priv);
1588 1553
1589 ret = i915_ggtt_enable_hw(dev); 1554 ret = i915_ggtt_enable_hw(dev_priv);
1590 if (ret) 1555 if (ret)
1591 DRM_ERROR("failed to re-enable GGTT\n"); 1556 DRM_ERROR("failed to re-enable GGTT\n");
1592 1557
1593 intel_csr_ucode_resume(dev_priv); 1558 intel_csr_ucode_resume(dev_priv);
1594 1559
1595 mutex_lock(&dev->struct_mutex); 1560 i915_gem_resume(dev);
1596 i915_gem_restore_gtt_mappings(dev);
1597 mutex_unlock(&dev->struct_mutex);
1598 1561
1599 i915_restore_state(dev); 1562 i915_restore_state(dev);
1600 intel_opregion_setup(dev_priv); 1563 intel_opregion_setup(dev_priv);
@@ -1652,6 +1615,7 @@ static int i915_drm_resume(struct drm_device *dev)
1652 1615
1653 intel_opregion_notify_adapter(dev_priv, PCI_D0); 1616 intel_opregion_notify_adapter(dev_priv, PCI_D0);
1654 1617
1618 intel_autoenable_gt_powersave(dev_priv);
1655 drm_kms_helper_poll_enable(dev); 1619 drm_kms_helper_poll_enable(dev);
1656 1620
1657 enable_rpm_wakeref_asserts(dev_priv); 1621 enable_rpm_wakeref_asserts(dev_priv);
@@ -1778,8 +1742,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
1778 unsigned reset_counter; 1742 unsigned reset_counter;
1779 int ret; 1743 int ret;
1780 1744
1781 intel_reset_gt_powersave(dev_priv);
1782
1783 mutex_lock(&dev->struct_mutex); 1745 mutex_lock(&dev->struct_mutex);
1784 1746
1785 /* Clear any previous failed attempts at recovery. Time to try again. */ 1747 /* Clear any previous failed attempts at recovery. Time to try again. */
@@ -1835,8 +1797,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
1835 * previous concerns that it doesn't respond well to some forms 1797 * previous concerns that it doesn't respond well to some forms
1836 * of re-init after reset. 1798 * of re-init after reset.
1837 */ 1799 */
1838 if (INTEL_INFO(dev)->gen > 5) 1800 intel_autoenable_gt_powersave(dev_priv);
1839 intel_enable_gt_powersave(dev_priv);
1840 1801
1841 return 0; 1802 return 0;
1842 1803
@@ -2462,7 +2423,6 @@ static int intel_runtime_resume(struct device *device)
2462 * we can do is to hope that things will still work (and disable RPM). 2423 * we can do is to hope that things will still work (and disable RPM).
2463 */ 2424 */
2464 i915_gem_init_swizzling(dev); 2425 i915_gem_init_swizzling(dev);
2465 gen6_update_ring_freq(dev_priv);
2466 2426
2467 intel_runtime_pm_enable_interrupts(dev_priv); 2427 intel_runtime_pm_enable_interrupts(dev_priv);
2468 2428
@@ -2618,6 +2578,7 @@ static struct drm_driver driver = {
2618 .postclose = i915_driver_postclose, 2578 .postclose = i915_driver_postclose,
2619 .set_busid = drm_pci_set_busid, 2579 .set_busid = drm_pci_set_busid,
2620 2580
2581 .gem_close_object = i915_gem_close_object,
2621 .gem_free_object = i915_gem_free_object, 2582 .gem_free_object = i915_gem_free_object,
2622 .gem_vm_ops = &i915_gem_vm_ops, 2583 .gem_vm_ops = &i915_gem_vm_ops,
2623 2584
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 21f939074abc..c36d17659ebe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -61,6 +61,7 @@
61#include "i915_gem.h" 61#include "i915_gem.h"
62#include "i915_gem_gtt.h" 62#include "i915_gem_gtt.h"
63#include "i915_gem_render_state.h" 63#include "i915_gem_render_state.h"
64#include "i915_gem_request.h"
64 65
65#include "intel_gvt.h" 66#include "intel_gvt.h"
66 67
@@ -69,7 +70,7 @@
69 70
70#define DRIVER_NAME "i915" 71#define DRIVER_NAME "i915"
71#define DRIVER_DESC "Intel Graphics" 72#define DRIVER_DESC "Intel Graphics"
72#define DRIVER_DATE "20160711" 73#define DRIVER_DATE "20160808"
73 74
74#undef WARN_ON 75#undef WARN_ON
75/* Many gcc seem to no see through this and fall over :( */ 76/* Many gcc seem to no see through this and fall over :( */
@@ -401,7 +402,7 @@ struct drm_i915_file_private {
401 unsigned boosts; 402 unsigned boosts;
402 } rps; 403 } rps;
403 404
404 unsigned int bsd_ring; 405 unsigned int bsd_engine;
405}; 406};
406 407
407/* Used by dp and fdi links */ 408/* Used by dp and fdi links */
@@ -431,8 +432,6 @@ void intel_link_compute_m_n(int bpp, int nlanes,
431#define DRIVER_MINOR 6 432#define DRIVER_MINOR 6
432#define DRIVER_PATCHLEVEL 0 433#define DRIVER_PATCHLEVEL 0
433 434
434#define WATCH_LISTS 0
435
436struct opregion_header; 435struct opregion_header;
437struct opregion_acpi; 436struct opregion_acpi;
438struct opregion_swsci; 437struct opregion_swsci;
@@ -511,13 +510,13 @@ struct drm_i915_error_state {
511 struct intel_display_error_state *display; 510 struct intel_display_error_state *display;
512 struct drm_i915_error_object *semaphore_obj; 511 struct drm_i915_error_object *semaphore_obj;
513 512
514 struct drm_i915_error_ring { 513 struct drm_i915_error_engine {
515 bool valid; 514 int engine_id;
516 /* Software tracked state */ 515 /* Software tracked state */
517 bool waiting; 516 bool waiting;
518 int num_waiters; 517 int num_waiters;
519 int hangcheck_score; 518 int hangcheck_score;
520 enum intel_ring_hangcheck_action hangcheck_action; 519 enum intel_engine_hangcheck_action hangcheck_action;
521 int num_requests; 520 int num_requests;
522 521
523 /* our own tracking of ring head and tail */ 522 /* our own tracking of ring head and tail */
@@ -577,7 +576,7 @@ struct drm_i915_error_state {
577 576
578 pid_t pid; 577 pid_t pid;
579 char comm[TASK_COMM_LEN]; 578 char comm[TASK_COMM_LEN];
580 } ring[I915_NUM_ENGINES]; 579 } engine[I915_NUM_ENGINES];
581 580
582 struct drm_i915_error_buffer { 581 struct drm_i915_error_buffer {
583 u32 size; 582 u32 size;
@@ -592,7 +591,7 @@ struct drm_i915_error_state {
592 u32 dirty:1; 591 u32 dirty:1;
593 u32 purgeable:1; 592 u32 purgeable:1;
594 u32 userptr:1; 593 u32 userptr:1;
595 s32 ring:4; 594 s32 engine:4;
596 u32 cache_level:3; 595 u32 cache_level:3;
597 } **active_bo, **pinned_bo; 596 } **active_bo, **pinned_bo;
598 597
@@ -893,7 +892,7 @@ struct i915_gem_context {
893 892
894 struct intel_context { 893 struct intel_context {
895 struct drm_i915_gem_object *state; 894 struct drm_i915_gem_object *state;
896 struct intel_ringbuffer *ringbuf; 895 struct intel_ring *ring;
897 struct i915_vma *lrc_vma; 896 struct i915_vma *lrc_vma;
898 uint32_t *lrc_reg_state; 897 uint32_t *lrc_reg_state;
899 u64 lrc_desc; 898 u64 lrc_desc;
@@ -908,6 +907,7 @@ struct i915_gem_context {
908 struct list_head link; 907 struct list_head link;
909 908
910 u8 remap_slice; 909 u8 remap_slice;
910 bool closed:1;
911}; 911};
912 912
913enum fb_op_origin { 913enum fb_op_origin {
@@ -1173,6 +1173,7 @@ struct intel_gen6_power_mgmt {
1173 u8 max_freq_softlimit; /* Max frequency permitted by the driver */ 1173 u8 max_freq_softlimit; /* Max frequency permitted by the driver */
1174 u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ 1174 u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
1175 u8 min_freq; /* AKA RPn. Minimum frequency */ 1175 u8 min_freq; /* AKA RPn. Minimum frequency */
1176 u8 boost_freq; /* Frequency to request when wait boosting */
1176 u8 idle_freq; /* Frequency to request when we are idle */ 1177 u8 idle_freq; /* Frequency to request when we are idle */
1177 u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ 1178 u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
1178 u8 rp1_freq; /* "less than" RP0 power/freqency */ 1179 u8 rp1_freq; /* "less than" RP0 power/freqency */
@@ -1190,11 +1191,9 @@ struct intel_gen6_power_mgmt {
1190 bool client_boost; 1191 bool client_boost;
1191 1192
1192 bool enabled; 1193 bool enabled;
1193 struct delayed_work delayed_resume_work; 1194 struct delayed_work autoenable_work;
1194 unsigned boosts; 1195 unsigned boosts;
1195 1196
1196 struct intel_rps_client semaphores, mmioflips;
1197
1198 /* manual wa residency calculations */ 1197 /* manual wa residency calculations */
1199 struct intel_rps_ei up_ei, down_ei; 1198 struct intel_rps_ei up_ei, down_ei;
1200 1199
@@ -1319,7 +1318,6 @@ struct i915_gem_mm {
1319 struct notifier_block oom_notifier; 1318 struct notifier_block oom_notifier;
1320 struct notifier_block vmap_notifier; 1319 struct notifier_block vmap_notifier;
1321 struct shrinker shrinker; 1320 struct shrinker shrinker;
1322 bool shrinker_no_lock_stealing;
1323 1321
1324 /** LRU list of objects with fence regs on them. */ 1322 /** LRU list of objects with fence regs on them. */
1325 struct list_head fence_list; 1323 struct list_head fence_list;
@@ -1331,7 +1329,7 @@ struct i915_gem_mm {
1331 bool interruptible; 1329 bool interruptible;
1332 1330
1333 /* the indicator for dispatch video commands on two BSD rings */ 1331 /* the indicator for dispatch video commands on two BSD rings */
1334 unsigned int bsd_ring_dispatch_index; 1332 unsigned int bsd_engine_dispatch_index;
1335 1333
1336 /** Bit 6 swizzling required for X tiling */ 1334 /** Bit 6 swizzling required for X tiling */
1337 uint32_t bit_6_swizzle_x; 1335 uint32_t bit_6_swizzle_x;
@@ -1670,7 +1668,7 @@ struct intel_pipe_crc {
1670}; 1668};
1671 1669
1672struct i915_frontbuffer_tracking { 1670struct i915_frontbuffer_tracking {
1673 struct mutex lock; 1671 spinlock_t lock;
1674 1672
1675 /* 1673 /*
1676 * Tracking bits for delayed frontbuffer flushing du to gpu activity or 1674 * Tracking bits for delayed frontbuffer flushing du to gpu activity or
@@ -1705,18 +1703,6 @@ struct i915_virtual_gpu {
1705 bool active; 1703 bool active;
1706}; 1704};
1707 1705
1708struct i915_execbuffer_params {
1709 struct drm_device *dev;
1710 struct drm_file *file;
1711 uint32_t dispatch_flags;
1712 uint32_t args_batch_start_offset;
1713 uint64_t batch_obj_vm_offset;
1714 struct intel_engine_cs *engine;
1715 struct drm_i915_gem_object *batch_obj;
1716 struct i915_gem_context *ctx;
1717 struct drm_i915_gem_request *request;
1718};
1719
1720/* used in computing the new watermarks state */ 1706/* used in computing the new watermarks state */
1721struct intel_wm_config { 1707struct intel_wm_config {
1722 unsigned int num_pipes_active; 1708 unsigned int num_pipes_active;
@@ -1769,7 +1755,7 @@ struct drm_i915_private {
1769 struct i915_gem_context *kernel_context; 1755 struct i915_gem_context *kernel_context;
1770 struct intel_engine_cs engine[I915_NUM_ENGINES]; 1756 struct intel_engine_cs engine[I915_NUM_ENGINES];
1771 struct drm_i915_gem_object *semaphore_obj; 1757 struct drm_i915_gem_object *semaphore_obj;
1772 uint32_t last_seqno, next_seqno; 1758 u32 next_seqno;
1773 1759
1774 struct drm_dma_handle *status_page_dmah; 1760 struct drm_dma_handle *status_page_dmah;
1775 struct resource mch_res; 1761 struct resource mch_res;
@@ -2016,12 +2002,7 @@ struct drm_i915_private {
2016 2002
2017 /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ 2003 /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
2018 struct { 2004 struct {
2019 int (*execbuf_submit)(struct i915_execbuffer_params *params,
2020 struct drm_i915_gem_execbuffer2 *args,
2021 struct list_head *vmas);
2022 int (*init_engines)(struct drm_device *dev);
2023 void (*cleanup_engine)(struct intel_engine_cs *engine); 2005 void (*cleanup_engine)(struct intel_engine_cs *engine);
2024 void (*stop_engine)(struct intel_engine_cs *engine);
2025 2006
2026 /** 2007 /**
2027 * Is the GPU currently considered idle, or busy executing 2008 * Is the GPU currently considered idle, or busy executing
@@ -2144,8 +2125,6 @@ struct drm_i915_gem_object_ops {
2144 */ 2125 */
2145#define INTEL_MAX_SPRITE_BITS_PER_PIPE 5 2126#define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
2146#define INTEL_FRONTBUFFER_BITS_PER_PIPE 8 2127#define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
2147#define INTEL_FRONTBUFFER_BITS \
2148 (INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
2149#define INTEL_FRONTBUFFER_PRIMARY(pipe) \ 2128#define INTEL_FRONTBUFFER_PRIMARY(pipe) \
2150 (1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) 2129 (1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
2151#define INTEL_FRONTBUFFER_CURSOR(pipe) \ 2130#define INTEL_FRONTBUFFER_CURSOR(pipe) \
@@ -2169,18 +2148,21 @@ struct drm_i915_gem_object {
2169 struct drm_mm_node *stolen; 2148 struct drm_mm_node *stolen;
2170 struct list_head global_list; 2149 struct list_head global_list;
2171 2150
2172 struct list_head engine_list[I915_NUM_ENGINES];
2173 /** Used in execbuf to temporarily hold a ref */ 2151 /** Used in execbuf to temporarily hold a ref */
2174 struct list_head obj_exec_link; 2152 struct list_head obj_exec_link;
2175 2153
2176 struct list_head batch_pool_link; 2154 struct list_head batch_pool_link;
2177 2155
2156 unsigned long flags;
2178 /** 2157 /**
2179 * This is set if the object is on the active lists (has pending 2158 * This is set if the object is on the active lists (has pending
2180 * rendering and so a non-zero seqno), and is not set if it i s on 2159 * rendering and so a non-zero seqno), and is not set if it i s on
2181 * inactive (ready to be unbound) list. 2160 * inactive (ready to be unbound) list.
2182 */ 2161 */
2183 unsigned int active:I915_NUM_ENGINES; 2162#define I915_BO_ACTIVE_SHIFT 0
2163#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
2164#define __I915_BO_ACTIVE(bo) \
2165 ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
2184 2166
2185 /** 2167 /**
2186 * This is set if the object has been written to since last bound 2168 * This is set if the object has been written to since last bound
@@ -2201,10 +2183,6 @@ struct drm_i915_gem_object {
2201 unsigned int madv:2; 2183 unsigned int madv:2;
2202 2184
2203 /** 2185 /**
2204 * Current tiling mode for the object.
2205 */
2206 unsigned int tiling_mode:2;
2207 /**
2208 * Whether the tiling parameters for the currently associated fence 2186 * Whether the tiling parameters for the currently associated fence
2209 * register have changed. Note that for the purposes of tracking 2187 * register have changed. Note that for the purposes of tracking
2210 * tiling changes we also treat the unfenced register, the register 2188 * tiling changes we also treat the unfenced register, the register
@@ -2234,9 +2212,17 @@ struct drm_i915_gem_object {
2234 unsigned int cache_level:3; 2212 unsigned int cache_level:3;
2235 unsigned int cache_dirty:1; 2213 unsigned int cache_dirty:1;
2236 2214
2237 unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS; 2215 atomic_t frontbuffer_bits;
2216
2217 /** Current tiling stride for the object, if it's tiled. */
2218 unsigned int tiling_and_stride;
2219#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
2220#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
2221#define STRIDE_MASK (~TILING_MASK)
2238 2222
2239 unsigned int has_wc_mmap; 2223 unsigned int has_wc_mmap;
2224 /** Count of VMA actually bound by this object */
2225 unsigned int bind_count;
2240 unsigned int pin_display; 2226 unsigned int pin_display;
2241 2227
2242 struct sg_table *pages; 2228 struct sg_table *pages;
@@ -2256,14 +2242,10 @@ struct drm_i915_gem_object {
2256 * requests on one ring where the write request is older than the 2242 * requests on one ring where the write request is older than the
2257 * read request. This allows for the CPU to read from an active 2243 * read request. This allows for the CPU to read from an active
2258 * buffer by only waiting for the write to complete. 2244 * buffer by only waiting for the write to complete.
2259 * */ 2245 */
2260 struct drm_i915_gem_request *last_read_req[I915_NUM_ENGINES]; 2246 struct i915_gem_active last_read[I915_NUM_ENGINES];
2261 struct drm_i915_gem_request *last_write_req; 2247 struct i915_gem_active last_write;
2262 /** Breadcrumb of last fenced GPU access to the buffer. */ 2248 struct i915_gem_active last_fence;
2263 struct drm_i915_gem_request *last_fenced_req;
2264
2265 /** Current tiling stride for the object, if it's tiled. */
2266 uint32_t stride;
2267 2249
2268 /** References from framebuffers, locks out tiling changes. */ 2250 /** References from framebuffers, locks out tiling changes. */
2269 unsigned long framebuffer_references; 2251 unsigned long framebuffer_references;
@@ -2287,7 +2269,56 @@ struct drm_i915_gem_object {
2287 } userptr; 2269 } userptr;
2288 }; 2270 };
2289}; 2271};
2290#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) 2272
2273static inline struct drm_i915_gem_object *
2274to_intel_bo(struct drm_gem_object *gem)
2275{
2276 /* Assert that to_intel_bo(NULL) == NULL */
2277 BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
2278
2279 return container_of(gem, struct drm_i915_gem_object, base);
2280}
2281
2282static inline struct drm_i915_gem_object *
2283i915_gem_object_lookup(struct drm_file *file, u32 handle)
2284{
2285 return to_intel_bo(drm_gem_object_lookup(file, handle));
2286}
2287
2288__deprecated
2289extern struct drm_gem_object *
2290drm_gem_object_lookup(struct drm_file *file, u32 handle);
2291
2292__attribute__((nonnull))
2293static inline struct drm_i915_gem_object *
2294i915_gem_object_get(struct drm_i915_gem_object *obj)
2295{
2296 drm_gem_object_reference(&obj->base);
2297 return obj;
2298}
2299
2300__deprecated
2301extern void drm_gem_object_reference(struct drm_gem_object *);
2302
2303__attribute__((nonnull))
2304static inline void
2305i915_gem_object_put(struct drm_i915_gem_object *obj)
2306{
2307 drm_gem_object_unreference(&obj->base);
2308}
2309
2310__deprecated
2311extern void drm_gem_object_unreference(struct drm_gem_object *);
2312
2313__attribute__((nonnull))
2314static inline void
2315i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
2316{
2317 drm_gem_object_unreference_unlocked(&obj->base);
2318}
2319
2320__deprecated
2321extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
2291 2322
2292static inline bool 2323static inline bool
2293i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) 2324i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
@@ -2295,6 +2326,55 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
2295 return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; 2326 return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
2296} 2327}
2297 2328
2329static inline unsigned long
2330i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
2331{
2332 return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
2333}
2334
2335static inline bool
2336i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
2337{
2338 return i915_gem_object_get_active(obj);
2339}
2340
2341static inline void
2342i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
2343{
2344 obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT);
2345}
2346
2347static inline void
2348i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
2349{
2350 obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
2351}
2352
2353static inline bool
2354i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
2355 int engine)
2356{
2357 return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
2358}
2359
2360static inline unsigned int
2361i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
2362{
2363 return obj->tiling_and_stride & TILING_MASK;
2364}
2365
2366static inline bool
2367i915_gem_object_is_tiled(struct drm_i915_gem_object *obj)
2368{
2369 return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
2370}
2371
2372static inline unsigned int
2373i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
2374{
2375 return obj->tiling_and_stride & STRIDE_MASK;
2376}
2377
2298/* 2378/*
2299 * Optimised SGL iterator for GEM objects 2379 * Optimised SGL iterator for GEM objects
2300 */ 2380 */
@@ -2365,171 +2445,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
2365 (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ 2445 (((__iter).curr += PAGE_SIZE) < (__iter).max) || \
2366 ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) 2446 ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
2367 2447
2368/**
2369 * Request queue structure.
2370 *
2371 * The request queue allows us to note sequence numbers that have been emitted
2372 * and may be associated with active buffers to be retired.
2373 *
2374 * By keeping this list, we can avoid having to do questionable sequence
2375 * number comparisons on buffer last_read|write_seqno. It also allows an
2376 * emission time to be associated with the request for tracking how far ahead
2377 * of the GPU the submission is.
2378 *
2379 * The requests are reference counted, so upon creation they should have an
2380 * initial reference taken using kref_init
2381 */
2382struct drm_i915_gem_request {
2383 struct kref ref;
2384
2385 /** On Which ring this request was generated */
2386 struct drm_i915_private *i915;
2387 struct intel_engine_cs *engine;
2388 struct intel_signal_node signaling;
2389
2390 /** GEM sequence number associated with the previous request,
2391 * when the HWS breadcrumb is equal to this the GPU is processing
2392 * this request.
2393 */
2394 u32 previous_seqno;
2395
2396 /** GEM sequence number associated with this request,
2397 * when the HWS breadcrumb is equal or greater than this the GPU
2398 * has finished processing this request.
2399 */
2400 u32 seqno;
2401
2402 /** Position in the ringbuffer of the start of the request */
2403 u32 head;
2404
2405 /**
2406 * Position in the ringbuffer of the start of the postfix.
2407 * This is required to calculate the maximum available ringbuffer
2408 * space without overwriting the postfix.
2409 */
2410 u32 postfix;
2411
2412 /** Position in the ringbuffer of the end of the whole request */
2413 u32 tail;
2414
2415 /** Preallocate space in the ringbuffer for the emitting the request */
2416 u32 reserved_space;
2417
2418 /**
2419 * Context and ring buffer related to this request
2420 * Contexts are refcounted, so when this request is associated with a
2421 * context, we must increment the context's refcount, to guarantee that
2422 * it persists while any request is linked to it. Requests themselves
2423 * are also refcounted, so the request will only be freed when the last
2424 * reference to it is dismissed, and the code in
2425 * i915_gem_request_free() will then decrement the refcount on the
2426 * context.
2427 */
2428 struct i915_gem_context *ctx;
2429 struct intel_ringbuffer *ringbuf;
2430
2431 /**
2432 * Context related to the previous request.
2433 * As the contexts are accessed by the hardware until the switch is
2434 * completed to a new context, the hardware may still be writing
2435 * to the context object after the breadcrumb is visible. We must
2436 * not unpin/unbind/prune that object whilst still active and so
2437 * we keep the previous context pinned until the following (this)
2438 * request is retired.
2439 */
2440 struct i915_gem_context *previous_context;
2441
2442 /** Batch buffer related to this request if any (used for
2443 error state dump only) */
2444 struct drm_i915_gem_object *batch_obj;
2445
2446 /** Time at which this request was emitted, in jiffies. */
2447 unsigned long emitted_jiffies;
2448
2449 /** global list entry for this request */
2450 struct list_head list;
2451
2452 struct drm_i915_file_private *file_priv;
2453 /** file_priv list entry for this request */
2454 struct list_head client_list;
2455
2456 /** process identifier submitting this request */
2457 struct pid *pid;
2458
2459 /**
2460 * The ELSP only accepts two elements at a time, so we queue
2461 * context/tail pairs on a given queue (ring->execlist_queue) until the
2462 * hardware is available. The queue serves a double purpose: we also use
2463 * it to keep track of the up to 2 contexts currently in the hardware
2464 * (usually one in execution and the other queued up by the GPU): We
2465 * only remove elements from the head of the queue when the hardware
2466 * informs us that an element has been completed.
2467 *
2468 * All accesses to the queue are mediated by a spinlock
2469 * (ring->execlist_lock).
2470 */
2471
2472 /** Execlist link in the submission queue.*/
2473 struct list_head execlist_link;
2474
2475 /** Execlists no. of times this request has been sent to the ELSP */
2476 int elsp_submitted;
2477
2478 /** Execlists context hardware id. */
2479 unsigned ctx_hw_id;
2480};
2481
2482struct drm_i915_gem_request * __must_check
2483i915_gem_request_alloc(struct intel_engine_cs *engine,
2484 struct i915_gem_context *ctx);
2485void i915_gem_request_free(struct kref *req_ref);
2486int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
2487 struct drm_file *file);
2488
2489static inline uint32_t
2490i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
2491{
2492 return req ? req->seqno : 0;
2493}
2494
2495static inline struct intel_engine_cs *
2496i915_gem_request_get_engine(struct drm_i915_gem_request *req)
2497{
2498 return req ? req->engine : NULL;
2499}
2500
2501static inline struct drm_i915_gem_request *
2502i915_gem_request_reference(struct drm_i915_gem_request *req)
2503{
2504 if (req)
2505 kref_get(&req->ref);
2506 return req;
2507}
2508
2509static inline void
2510i915_gem_request_unreference(struct drm_i915_gem_request *req)
2511{
2512 kref_put(&req->ref, i915_gem_request_free);
2513}
2514
2515static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
2516 struct drm_i915_gem_request *src)
2517{
2518 if (src)
2519 i915_gem_request_reference(src);
2520
2521 if (*pdst)
2522 i915_gem_request_unreference(*pdst);
2523
2524 *pdst = src;
2525}
2526
2527/*
2528 * XXX: i915_gem_request_completed should be here but currently needs the
2529 * definition of i915_seqno_passed() which is below. It will be moved in
2530 * a later patch when the call to i915_seqno_passed() is obsoleted...
2531 */
2532
2533/* 2448/*
2534 * A command that requires special handling by the command parser. 2449 * A command that requires special handling by the command parser.
2535 */ 2450 */
@@ -2617,8 +2532,9 @@ struct drm_i915_cmd_descriptor {
2617/* 2532/*
2618 * A table of commands requiring special handling by the command parser. 2533 * A table of commands requiring special handling by the command parser.
2619 * 2534 *
2620 * Each ring has an array of tables. Each table consists of an array of command 2535 * Each engine has an array of tables. Each table consists of an array of
2621 * descriptors, which must be sorted with command opcodes in ascending order. 2536 * command descriptors, which must be sorted with command opcodes in
2537 * ascending order.
2622 */ 2538 */
2623struct drm_i915_cmd_table { 2539struct drm_i915_cmd_table {
2624 const struct drm_i915_cmd_descriptor *table; 2540 const struct drm_i915_cmd_descriptor *table;
@@ -2932,6 +2848,8 @@ extern int i915_resume_switcheroo(struct drm_device *dev);
2932int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 2848int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
2933 int enable_ppgtt); 2849 int enable_ppgtt);
2934 2850
2851bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value);
2852
2935/* i915_drv.c */ 2853/* i915_drv.c */
2936void __printf(3, 4) 2854void __printf(3, 4)
2937__i915_printk(struct drm_i915_private *dev_priv, const char *level, 2855__i915_printk(struct drm_i915_private *dev_priv, const char *level,
@@ -3107,11 +3025,6 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
3107 struct drm_file *file_priv); 3025 struct drm_file *file_priv);
3108int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 3026int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
3109 struct drm_file *file_priv); 3027 struct drm_file *file_priv);
3110void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
3111 struct drm_i915_gem_request *req);
3112int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
3113 struct drm_i915_gem_execbuffer2 *args,
3114 struct list_head *vmas);
3115int i915_gem_execbuffer(struct drm_device *dev, void *data, 3028int i915_gem_execbuffer(struct drm_device *dev, void *data,
3116 struct drm_file *file_priv); 3029 struct drm_file *file_priv);
3117int i915_gem_execbuffer2(struct drm_device *dev, void *data, 3030int i915_gem_execbuffer2(struct drm_device *dev, void *data,
@@ -3150,40 +3063,24 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
3150 size_t size); 3063 size_t size);
3151struct drm_i915_gem_object *i915_gem_object_create_from_data( 3064struct drm_i915_gem_object *i915_gem_object_create_from_data(
3152 struct drm_device *dev, const void *data, size_t size); 3065 struct drm_device *dev, const void *data, size_t size);
3066void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
3153void i915_gem_free_object(struct drm_gem_object *obj); 3067void i915_gem_free_object(struct drm_gem_object *obj);
3154void i915_gem_vma_destroy(struct i915_vma *vma); 3068
3155
3156/* Flags used by pin/bind&friends. */
3157#define PIN_MAPPABLE (1<<0)
3158#define PIN_NONBLOCK (1<<1)
3159#define PIN_GLOBAL (1<<2)
3160#define PIN_OFFSET_BIAS (1<<3)
3161#define PIN_USER (1<<4)
3162#define PIN_UPDATE (1<<5)
3163#define PIN_ZONE_4G (1<<6)
3164#define PIN_HIGH (1<<7)
3165#define PIN_OFFSET_FIXED (1<<8)
3166#define PIN_OFFSET_MASK (~4095)
3167int __must_check
3168i915_gem_object_pin(struct drm_i915_gem_object *obj,
3169 struct i915_address_space *vm,
3170 uint32_t alignment,
3171 uint64_t flags);
3172int __must_check 3069int __must_check
3173i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3070i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3174 const struct i915_ggtt_view *view, 3071 const struct i915_ggtt_view *view,
3175 uint32_t alignment, 3072 u64 size,
3176 uint64_t flags); 3073 u64 alignment,
3074 u64 flags);
3177 3075
3178int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3076int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3179 u32 flags); 3077 u32 flags);
3180void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); 3078void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
3181int __must_check i915_vma_unbind(struct i915_vma *vma); 3079int __must_check i915_vma_unbind(struct i915_vma *vma);
3182/* 3080void i915_vma_close(struct i915_vma *vma);
3183 * BEWARE: Do not use the function below unless you can _absolutely_ 3081void i915_vma_destroy(struct i915_vma *vma);
3184 * _guarantee_ VMA in question is _not in use_ anywhere. 3082
3185 */ 3083int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
3186int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma);
3187int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); 3084int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
3188void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); 3085void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
3189void i915_gem_release_mmap(struct drm_i915_gem_object *obj); 3086void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
@@ -3285,10 +3182,10 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
3285 3182
3286int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); 3183int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
3287int i915_gem_object_sync(struct drm_i915_gem_object *obj, 3184int i915_gem_object_sync(struct drm_i915_gem_object *obj,
3288 struct intel_engine_cs *to, 3185 struct drm_i915_gem_request *to);
3289 struct drm_i915_gem_request **to_req);
3290void i915_vma_move_to_active(struct i915_vma *vma, 3186void i915_vma_move_to_active(struct i915_vma *vma,
3291 struct drm_i915_gem_request *req); 3187 struct drm_i915_gem_request *req,
3188 unsigned int flags);
3292int i915_gem_dumb_create(struct drm_file *file_priv, 3189int i915_gem_dumb_create(struct drm_file *file_priv,
3293 struct drm_device *dev, 3190 struct drm_device *dev,
3294 struct drm_mode_create_dumb *args); 3191 struct drm_mode_create_dumb *args);
@@ -3299,44 +3196,12 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
3299 struct drm_i915_gem_object *new, 3196 struct drm_i915_gem_object *new,
3300 unsigned frontbuffer_bits); 3197 unsigned frontbuffer_bits);
3301 3198
3302/**
3303 * Returns true if seq1 is later than seq2.
3304 */
3305static inline bool
3306i915_seqno_passed(uint32_t seq1, uint32_t seq2)
3307{
3308 return (int32_t)(seq1 - seq2) >= 0;
3309}
3310
3311static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
3312{
3313 return i915_seqno_passed(intel_engine_get_seqno(req->engine),
3314 req->previous_seqno);
3315}
3316
3317static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
3318{
3319 return i915_seqno_passed(intel_engine_get_seqno(req->engine),
3320 req->seqno);
3321}
3322
3323bool __i915_spin_request(const struct drm_i915_gem_request *request,
3324 int state, unsigned long timeout_us);
3325static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
3326 int state, unsigned long timeout_us)
3327{
3328 return (i915_gem_request_started(request) &&
3329 __i915_spin_request(request, state, timeout_us));
3330}
3331
3332int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
3333int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); 3199int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
3334 3200
3335struct drm_i915_gem_request * 3201struct drm_i915_gem_request *
3336i915_gem_find_active_request(struct intel_engine_cs *engine); 3202i915_gem_find_active_request(struct intel_engine_cs *engine);
3337 3203
3338void i915_gem_retire_requests(struct drm_i915_private *dev_priv); 3204void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
3339void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
3340 3205
3341static inline u32 i915_reset_counter(struct i915_gpu_error *error) 3206static inline u32 i915_reset_counter(struct i915_gpu_error *error)
3342{ 3207{
@@ -3381,24 +3246,13 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
3381void i915_gem_reset(struct drm_device *dev); 3246void i915_gem_reset(struct drm_device *dev);
3382bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); 3247bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
3383int __must_check i915_gem_init(struct drm_device *dev); 3248int __must_check i915_gem_init(struct drm_device *dev);
3384int i915_gem_init_engines(struct drm_device *dev);
3385int __must_check i915_gem_init_hw(struct drm_device *dev); 3249int __must_check i915_gem_init_hw(struct drm_device *dev);
3386void i915_gem_init_swizzling(struct drm_device *dev); 3250void i915_gem_init_swizzling(struct drm_device *dev);
3387void i915_gem_cleanup_engines(struct drm_device *dev); 3251void i915_gem_cleanup_engines(struct drm_device *dev);
3388int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); 3252int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
3253 bool interruptible);
3389int __must_check i915_gem_suspend(struct drm_device *dev); 3254int __must_check i915_gem_suspend(struct drm_device *dev);
3390void __i915_add_request(struct drm_i915_gem_request *req, 3255void i915_gem_resume(struct drm_device *dev);
3391 struct drm_i915_gem_object *batch_obj,
3392 bool flush_caches);
3393#define i915_add_request(req) \
3394 __i915_add_request(req, NULL, true)
3395#define i915_add_request_no_flush(req) \
3396 __i915_add_request(req, NULL, false)
3397int __i915_wait_request(struct drm_i915_gem_request *req,
3398 bool interruptible,
3399 s64 *timeout,
3400 struct intel_rps_client *rps);
3401int __must_check i915_wait_request(struct drm_i915_gem_request *req);
3402int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); 3256int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
3403int __must_check 3257int __must_check
3404i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 3258i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
@@ -3419,11 +3273,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
3419int i915_gem_open(struct drm_device *dev, struct drm_file *file); 3273int i915_gem_open(struct drm_device *dev, struct drm_file *file);
3420void i915_gem_release(struct drm_device *dev, struct drm_file *file); 3274void i915_gem_release(struct drm_device *dev, struct drm_file *file);
3421 3275
3422uint32_t 3276u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size,
3423i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode); 3277 int tiling_mode);
3424uint32_t 3278u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
3425i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 3279 int tiling_mode, bool fenced);
3426 int tiling_mode, bool fenced);
3427 3280
3428int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3281int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3429 enum i915_cache_level cache_level); 3282 enum i915_cache_level cache_level);
@@ -3444,7 +3297,6 @@ i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
3444 return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal); 3297 return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal);
3445} 3298}
3446 3299
3447bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o);
3448bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 3300bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
3449 const struct i915_ggtt_view *view); 3301 const struct i915_ggtt_view *view);
3450bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 3302bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
@@ -3478,7 +3330,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
3478 return container_of(vm, struct i915_hw_ppgtt, base); 3330 return container_of(vm, struct i915_hw_ppgtt, base);
3479} 3331}
3480 3332
3481
3482static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) 3333static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
3483{ 3334{
3484 return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal); 3335 return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal);
@@ -3487,18 +3338,6 @@ static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
3487unsigned long 3338unsigned long
3488i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj); 3339i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
3489 3340
3490static inline int __must_check
3491i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
3492 uint32_t alignment,
3493 unsigned flags)
3494{
3495 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3496 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3497
3498 return i915_gem_object_pin(obj, &ggtt->base,
3499 alignment, flags | PIN_GLOBAL);
3500}
3501
3502void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 3341void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3503 const struct i915_ggtt_view *view); 3342 const struct i915_ggtt_view *view);
3504static inline void 3343static inline void
@@ -3528,6 +3367,7 @@ void i915_gem_context_reset(struct drm_device *dev);
3528int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); 3367int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
3529void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); 3368void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
3530int i915_switch_context(struct drm_i915_gem_request *req); 3369int i915_switch_context(struct drm_i915_gem_request *req);
3370int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
3531void i915_gem_context_free(struct kref *ctx_ref); 3371void i915_gem_context_free(struct kref *ctx_ref);
3532struct drm_i915_gem_object * 3372struct drm_i915_gem_object *
3533i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); 3373i915_gem_alloc_context_obj(struct drm_device *dev, size_t size);
@@ -3548,12 +3388,14 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
3548 return ctx; 3388 return ctx;
3549} 3389}
3550 3390
3551static inline void i915_gem_context_reference(struct i915_gem_context *ctx) 3391static inline struct i915_gem_context *
3392i915_gem_context_get(struct i915_gem_context *ctx)
3552{ 3393{
3553 kref_get(&ctx->ref); 3394 kref_get(&ctx->ref);
3395 return ctx;
3554} 3396}
3555 3397
3556static inline void i915_gem_context_unreference(struct i915_gem_context *ctx) 3398static inline void i915_gem_context_put(struct i915_gem_context *ctx)
3557{ 3399{
3558 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 3400 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
3559 kref_put(&ctx->ref, i915_gem_context_free); 3401 kref_put(&ctx->ref, i915_gem_context_free);
@@ -3576,13 +3418,10 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
3576 struct drm_file *file); 3418 struct drm_file *file);
3577 3419
3578/* i915_gem_evict.c */ 3420/* i915_gem_evict.c */
3579int __must_check i915_gem_evict_something(struct drm_device *dev, 3421int __must_check i915_gem_evict_something(struct i915_address_space *vm,
3580 struct i915_address_space *vm, 3422 u64 min_size, u64 alignment,
3581 int min_size,
3582 unsigned alignment,
3583 unsigned cache_level, 3423 unsigned cache_level,
3584 unsigned long start, 3424 u64 start, u64 end,
3585 unsigned long end,
3586 unsigned flags); 3425 unsigned flags);
3587int __must_check i915_gem_evict_for_vma(struct i915_vma *target); 3426int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
3588int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); 3427int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
@@ -3634,16 +3473,9 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
3634 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3473 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3635 3474
3636 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 3475 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
3637 obj->tiling_mode != I915_TILING_NONE; 3476 i915_gem_object_is_tiled(obj);
3638} 3477}
3639 3478
3640/* i915_gem_debug.c */
3641#if WATCH_LISTS
3642int i915_verify_lists(struct drm_device *dev);
3643#else
3644#define i915_verify_lists(dev) 0
3645#endif
3646
3647/* i915_debugfs.c */ 3479/* i915_debugfs.c */
3648#ifdef CONFIG_DEBUG_FS 3480#ifdef CONFIG_DEBUG_FS
3649int i915_debugfs_register(struct drm_i915_private *dev_priv); 3481int i915_debugfs_register(struct drm_i915_private *dev_priv);
@@ -3684,15 +3516,15 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
3684 3516
3685/* i915_cmd_parser.c */ 3517/* i915_cmd_parser.c */
3686int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); 3518int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
3687int i915_cmd_parser_init_ring(struct intel_engine_cs *engine); 3519int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
3688void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine); 3520void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
3689bool i915_needs_cmd_parser(struct intel_engine_cs *engine); 3521bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine);
3690int i915_parse_cmds(struct intel_engine_cs *engine, 3522int intel_engine_cmd_parser(struct intel_engine_cs *engine,
3691 struct drm_i915_gem_object *batch_obj, 3523 struct drm_i915_gem_object *batch_obj,
3692 struct drm_i915_gem_object *shadow_batch_obj, 3524 struct drm_i915_gem_object *shadow_batch_obj,
3693 u32 batch_start_offset, 3525 u32 batch_start_offset,
3694 u32 batch_len, 3526 u32 batch_len,
3695 bool is_master); 3527 bool is_master);
3696 3528
3697/* i915_suspend.c */ 3529/* i915_suspend.c */
3698extern int i915_save_state(struct drm_device *dev); 3530extern int i915_save_state(struct drm_device *dev);
@@ -3800,7 +3632,6 @@ extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
3800extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, 3632extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
3801 bool enable); 3633 bool enable);
3802 3634
3803extern bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv);
3804int i915_reg_read_ioctl(struct drm_device *dev, void *data, 3635int i915_reg_read_ioctl(struct drm_device *dev, void *data,
3805 struct drm_file *file); 3636 struct drm_file *file);
3806 3637
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 11681501d7b1..f4f8eaa90f2a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,10 +29,13 @@
29#include <drm/drm_vma_manager.h> 29#include <drm/drm_vma_manager.h>
30#include <drm/i915_drm.h> 30#include <drm/i915_drm.h>
31#include "i915_drv.h" 31#include "i915_drv.h"
32#include "i915_gem_dmabuf.h"
32#include "i915_vgpu.h" 33#include "i915_vgpu.h"
33#include "i915_trace.h" 34#include "i915_trace.h"
34#include "intel_drv.h" 35#include "intel_drv.h"
36#include "intel_frontbuffer.h"
35#include "intel_mocs.h" 37#include "intel_mocs.h"
38#include <linux/reservation.h>
36#include <linux/shmem_fs.h> 39#include <linux/shmem_fs.h>
37#include <linux/slab.h> 40#include <linux/slab.h>
38#include <linux/swap.h> 41#include <linux/swap.h>
@@ -41,10 +44,6 @@
41 44
42static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 45static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
43static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 46static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
44static void
45i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
46static void
47i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
48 47
49static bool cpu_cache_is_coherent(struct drm_device *dev, 48static bool cpu_cache_is_coherent(struct drm_device *dev,
50 enum i915_cache_level level) 49 enum i915_cache_level level)
@@ -139,7 +138,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
139 if (ret) 138 if (ret)
140 return ret; 139 return ret;
141 140
142 WARN_ON(i915_verify_lists(dev));
143 return 0; 141 return 0;
144} 142}
145 143
@@ -156,10 +154,10 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
156 pinned = 0; 154 pinned = 0;
157 mutex_lock(&dev->struct_mutex); 155 mutex_lock(&dev->struct_mutex);
158 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 156 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
159 if (vma->pin_count) 157 if (i915_vma_is_pinned(vma))
160 pinned += vma->node.size; 158 pinned += vma->node.size;
161 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 159 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
162 if (vma->pin_count) 160 if (i915_vma_is_pinned(vma))
163 pinned += vma->node.size; 161 pinned += vma->node.size;
164 mutex_unlock(&dev->struct_mutex); 162 mutex_unlock(&dev->struct_mutex);
165 163
@@ -281,23 +279,119 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
281 .release = i915_gem_object_release_phys, 279 .release = i915_gem_object_release_phys,
282}; 280};
283 281
284static int 282int
285drop_pages(struct drm_i915_gem_object *obj) 283i915_gem_object_unbind(struct drm_i915_gem_object *obj)
286{ 284{
287 struct i915_vma *vma, *next; 285 struct i915_vma *vma;
286 LIST_HEAD(still_in_list);
288 int ret; 287 int ret;
289 288
290 drm_gem_object_reference(&obj->base); 289 /* The vma will only be freed if it is marked as closed, and if we wait
291 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 290 * upon rendering to the vma, we may unbind anything in the list.
292 if (i915_vma_unbind(vma)) 291 */
292 while ((vma = list_first_entry_or_null(&obj->vma_list,
293 struct i915_vma,
294 obj_link))) {
295 list_move_tail(&vma->obj_link, &still_in_list);
296 ret = i915_vma_unbind(vma);
297 if (ret)
293 break; 298 break;
294 299 }
295 ret = i915_gem_object_put_pages(obj); 300 list_splice(&still_in_list, &obj->vma_list);
296 drm_gem_object_unreference(&obj->base);
297 301
298 return ret; 302 return ret;
299} 303}
300 304
305/**
306 * Ensures that all rendering to the object has completed and the object is
307 * safe to unbind from the GTT or access from the CPU.
308 * @obj: i915 gem object
309 * @readonly: waiting for just read access or read-write access
310 */
311int
312i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
313 bool readonly)
314{
315 struct reservation_object *resv;
316 struct i915_gem_active *active;
317 unsigned long active_mask;
318 int idx;
319
320 lockdep_assert_held(&obj->base.dev->struct_mutex);
321
322 if (!readonly) {
323 active = obj->last_read;
324 active_mask = i915_gem_object_get_active(obj);
325 } else {
326 active_mask = 1;
327 active = &obj->last_write;
328 }
329
330 for_each_active(active_mask, idx) {
331 int ret;
332
333 ret = i915_gem_active_wait(&active[idx],
334 &obj->base.dev->struct_mutex);
335 if (ret)
336 return ret;
337 }
338
339 resv = i915_gem_object_get_dmabuf_resv(obj);
340 if (resv) {
341 long err;
342
343 err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
344 MAX_SCHEDULE_TIMEOUT);
345 if (err < 0)
346 return err;
347 }
348
349 return 0;
350}
351
352/* A nonblocking variant of the above wait. Must be called prior to
353 * acquiring the mutex for the object, as the object state may change
354 * during this call. A reference must be held by the caller for the object.
355 */
356static __must_check int
357__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
358 struct intel_rps_client *rps,
359 bool readonly)
360{
361 struct i915_gem_active *active;
362 unsigned long active_mask;
363 int idx;
364
365 active_mask = __I915_BO_ACTIVE(obj);
366 if (!active_mask)
367 return 0;
368
369 if (!readonly) {
370 active = obj->last_read;
371 } else {
372 active_mask = 1;
373 active = &obj->last_write;
374 }
375
376 for_each_active(active_mask, idx) {
377 int ret;
378
379 ret = i915_gem_active_wait_unlocked(&active[idx],
380 true, NULL, rps);
381 if (ret)
382 return ret;
383 }
384
385 return 0;
386}
387
388static struct intel_rps_client *to_rps_client(struct drm_file *file)
389{
390 struct drm_i915_file_private *fpriv = file->driver_priv;
391
392 return &fpriv->rps;
393}
394
301int 395int
302i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 396i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
303 int align) 397 int align)
@@ -318,7 +412,11 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
318 if (obj->base.filp == NULL) 412 if (obj->base.filp == NULL)
319 return -EINVAL; 413 return -EINVAL;
320 414
321 ret = drop_pages(obj); 415 ret = i915_gem_object_unbind(obj);
416 if (ret)
417 return ret;
418
419 ret = i915_gem_object_put_pages(obj);
322 if (ret) 420 if (ret)
323 return ret; 421 return ret;
324 422
@@ -408,7 +506,7 @@ i915_gem_create(struct drm_file *file,
408 506
409 ret = drm_gem_handle_create(file, &obj->base, &handle); 507 ret = drm_gem_handle_create(file, &obj->base, &handle);
410 /* drop reference from allocate - handle holds it now */ 508 /* drop reference from allocate - handle holds it now */
411 drm_gem_object_unreference_unlocked(&obj->base); 509 i915_gem_object_put_unlocked(obj);
412 if (ret) 510 if (ret)
413 return ret; 511 return ret;
414 512
@@ -511,6 +609,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
511 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 609 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
512 return -EINVAL; 610 return -EINVAL;
513 611
612 ret = i915_gem_object_wait_rendering(obj, true);
613 if (ret)
614 return ret;
615
514 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 616 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
515 /* If we're not in the cpu read domain, set ourself into the gtt 617 /* If we're not in the cpu read domain, set ourself into the gtt
516 * read domain and manually flush cachelines (if required). This 618 * read domain and manually flush cachelines (if required). This
@@ -518,9 +620,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
518 * anyway again before the next pread happens. */ 620 * anyway again before the next pread happens. */
519 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 621 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
520 obj->cache_level); 622 obj->cache_level);
521 ret = i915_gem_object_wait_rendering(obj, true);
522 if (ret)
523 return ret;
524 } 623 }
525 624
526 ret = i915_gem_object_get_pages(obj); 625 ret = i915_gem_object_get_pages(obj);
@@ -644,7 +743,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
644 uint64_t offset; 743 uint64_t offset;
645 int ret; 744 int ret;
646 745
647 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 746 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
648 if (ret) { 747 if (ret) {
649 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); 748 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
650 if (ret) 749 if (ret)
@@ -857,36 +956,44 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
857 args->size)) 956 args->size))
858 return -EFAULT; 957 return -EFAULT;
859 958
860 ret = i915_mutex_lock_interruptible(dev); 959 obj = i915_gem_object_lookup(file, args->handle);
861 if (ret) 960 if (!obj)
862 return ret; 961 return -ENOENT;
863
864 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
865 if (&obj->base == NULL) {
866 ret = -ENOENT;
867 goto unlock;
868 }
869 962
870 /* Bounds check source. */ 963 /* Bounds check source. */
871 if (args->offset > obj->base.size || 964 if (args->offset > obj->base.size ||
872 args->size > obj->base.size - args->offset) { 965 args->size > obj->base.size - args->offset) {
873 ret = -EINVAL; 966 ret = -EINVAL;
874 goto out; 967 goto err;
875 } 968 }
876 969
877 trace_i915_gem_object_pread(obj, args->offset, args->size); 970 trace_i915_gem_object_pread(obj, args->offset, args->size);
878 971
972 ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
973 if (ret)
974 goto err;
975
976 ret = i915_mutex_lock_interruptible(dev);
977 if (ret)
978 goto err;
979
879 ret = i915_gem_shmem_pread(dev, obj, args, file); 980 ret = i915_gem_shmem_pread(dev, obj, args, file);
880 981
881 /* pread for non shmem backed objects */ 982 /* pread for non shmem backed objects */
882 if (ret == -EFAULT || ret == -ENODEV) 983 if (ret == -EFAULT || ret == -ENODEV) {
984 intel_runtime_pm_get(to_i915(dev));
883 ret = i915_gem_gtt_pread(dev, obj, args->size, 985 ret = i915_gem_gtt_pread(dev, obj, args->size,
884 args->offset, args->data_ptr); 986 args->offset, args->data_ptr);
987 intel_runtime_pm_put(to_i915(dev));
988 }
885 989
886out: 990 i915_gem_object_put(obj);
887 drm_gem_object_unreference(&obj->base);
888unlock:
889 mutex_unlock(&dev->struct_mutex); 991 mutex_unlock(&dev->struct_mutex);
992
993 return ret;
994
995err:
996 i915_gem_object_put_unlocked(obj);
890 return ret; 997 return ret;
891} 998}
892 999
@@ -916,7 +1023,7 @@ fast_user_write(struct io_mapping *mapping,
916/** 1023/**
917 * This is the fast pwrite path, where we copy the data directly from the 1024 * This is the fast pwrite path, where we copy the data directly from the
918 * user into the GTT, uncached. 1025 * user into the GTT, uncached.
919 * @dev: drm device pointer 1026 * @i915: i915 device private data
920 * @obj: i915 gem object 1027 * @obj: i915 gem object
921 * @args: pwrite arguments structure 1028 * @args: pwrite arguments structure
922 * @file: drm file pointer 1029 * @file: drm file pointer
@@ -935,10 +1042,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
935 int ret; 1042 int ret;
936 bool hit_slow_path = false; 1043 bool hit_slow_path = false;
937 1044
938 if (obj->tiling_mode != I915_TILING_NONE) 1045 if (i915_gem_object_is_tiled(obj))
939 return -EFAULT; 1046 return -EFAULT;
940 1047
941 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 1048 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1049 PIN_MAPPABLE | PIN_NONBLOCK);
942 if (ret) { 1050 if (ret) {
943 ret = insert_mappable_node(i915, &node, PAGE_SIZE); 1051 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
944 if (ret) 1052 if (ret)
@@ -1132,15 +1240,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1132 1240
1133 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 1241 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1134 1242
1243 ret = i915_gem_object_wait_rendering(obj, false);
1244 if (ret)
1245 return ret;
1246
1135 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1247 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1136 /* If we're not in the cpu write domain, set ourself into the gtt 1248 /* If we're not in the cpu write domain, set ourself into the gtt
1137 * write domain and manually flush cachelines (if required). This 1249 * write domain and manually flush cachelines (if required). This
1138 * optimizes for the case when the gpu will use the data 1250 * optimizes for the case when the gpu will use the data
1139 * right away and we therefore have to clflush anyway. */ 1251 * right away and we therefore have to clflush anyway. */
1140 needs_clflush_after = cpu_write_needs_clflush(obj); 1252 needs_clflush_after = cpu_write_needs_clflush(obj);
1141 ret = i915_gem_object_wait_rendering(obj, false);
1142 if (ret)
1143 return ret;
1144 } 1253 }
1145 /* Same trick applies to invalidate partially written cachelines read 1254 /* Same trick applies to invalidate partially written cachelines read
1146 * before writing. */ 1255 * before writing. */
@@ -1270,27 +1379,29 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1270 return -EFAULT; 1379 return -EFAULT;
1271 } 1380 }
1272 1381
1273 intel_runtime_pm_get(dev_priv); 1382 obj = i915_gem_object_lookup(file, args->handle);
1274 1383 if (!obj)
1275 ret = i915_mutex_lock_interruptible(dev); 1384 return -ENOENT;
1276 if (ret)
1277 goto put_rpm;
1278
1279 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
1280 if (&obj->base == NULL) {
1281 ret = -ENOENT;
1282 goto unlock;
1283 }
1284 1385
1285 /* Bounds check destination. */ 1386 /* Bounds check destination. */
1286 if (args->offset > obj->base.size || 1387 if (args->offset > obj->base.size ||
1287 args->size > obj->base.size - args->offset) { 1388 args->size > obj->base.size - args->offset) {
1288 ret = -EINVAL; 1389 ret = -EINVAL;
1289 goto out; 1390 goto err;
1290 } 1391 }
1291 1392
1292 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1393 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1293 1394
1395 ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
1396 if (ret)
1397 goto err;
1398
1399 intel_runtime_pm_get(dev_priv);
1400
1401 ret = i915_mutex_lock_interruptible(dev);
1402 if (ret)
1403 goto err_rpm;
1404
1294 ret = -EFAULT; 1405 ret = -EFAULT;
1295 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1406 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1296 * it would end up going through the fenced access, and we'll get 1407 * it would end up going through the fenced access, and we'll get
@@ -1306,7 +1417,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1306 * textures). Fallback to the shmem path in that case. */ 1417 * textures). Fallback to the shmem path in that case. */
1307 } 1418 }
1308 1419
1309 if (ret == -EFAULT) { 1420 if (ret == -EFAULT || ret == -ENOSPC) {
1310 if (obj->phys_handle) 1421 if (obj->phys_handle)
1311 ret = i915_gem_phys_pwrite(obj, args, file); 1422 ret = i915_gem_phys_pwrite(obj, args, file);
1312 else if (i915_gem_object_has_struct_page(obj)) 1423 else if (i915_gem_object_has_struct_page(obj))
@@ -1315,494 +1426,19 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1315 ret = -ENODEV; 1426 ret = -ENODEV;
1316 } 1427 }
1317 1428
1318out: 1429 i915_gem_object_put(obj);
1319 drm_gem_object_unreference(&obj->base);
1320unlock:
1321 mutex_unlock(&dev->struct_mutex); 1430 mutex_unlock(&dev->struct_mutex);
1322put_rpm:
1323 intel_runtime_pm_put(dev_priv); 1431 intel_runtime_pm_put(dev_priv);
1324 1432
1325 return ret; 1433 return ret;
1326}
1327
1328static int
1329i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
1330{
1331 if (__i915_terminally_wedged(reset_counter))
1332 return -EIO;
1333
1334 if (__i915_reset_in_progress(reset_counter)) {
1335 /* Non-interruptible callers can't handle -EAGAIN, hence return
1336 * -EIO unconditionally for these. */
1337 if (!interruptible)
1338 return -EIO;
1339
1340 return -EAGAIN;
1341 }
1342
1343 return 0;
1344}
1345
1346static unsigned long local_clock_us(unsigned *cpu)
1347{
1348 unsigned long t;
1349
1350 /* Cheaply and approximately convert from nanoseconds to microseconds.
1351 * The result and subsequent calculations are also defined in the same
1352 * approximate microseconds units. The principal source of timing
1353 * error here is from the simple truncation.
1354 *
1355 * Note that local_clock() is only defined wrt to the current CPU;
1356 * the comparisons are no longer valid if we switch CPUs. Instead of
1357 * blocking preemption for the entire busywait, we can detect the CPU
1358 * switch and use that as indicator of system load and a reason to
1359 * stop busywaiting, see busywait_stop().
1360 */
1361 *cpu = get_cpu();
1362 t = local_clock() >> 10;
1363 put_cpu();
1364
1365 return t;
1366}
1367
1368static bool busywait_stop(unsigned long timeout, unsigned cpu)
1369{
1370 unsigned this_cpu;
1371
1372 if (time_after(local_clock_us(&this_cpu), timeout))
1373 return true;
1374
1375 return this_cpu != cpu;
1376}
1377
1378bool __i915_spin_request(const struct drm_i915_gem_request *req,
1379 int state, unsigned long timeout_us)
1380{
1381 unsigned cpu;
1382
1383 /* When waiting for high frequency requests, e.g. during synchronous
1384 * rendering split between the CPU and GPU, the finite amount of time
1385 * required to set up the irq and wait upon it limits the response
1386 * rate. By busywaiting on the request completion for a short while we
1387 * can service the high frequency waits as quick as possible. However,
1388 * if it is a slow request, we want to sleep as quickly as possible.
1389 * The tradeoff between waiting and sleeping is roughly the time it
1390 * takes to sleep on a request, on the order of a microsecond.
1391 */
1392
1393 timeout_us += local_clock_us(&cpu);
1394 do {
1395 if (i915_gem_request_completed(req))
1396 return true;
1397
1398 if (signal_pending_state(state, current))
1399 break;
1400
1401 if (busywait_stop(timeout_us, cpu))
1402 break;
1403
1404 cpu_relax_lowlatency();
1405 } while (!need_resched());
1406
1407 return false;
1408}
1409
1410/**
1411 * __i915_wait_request - wait until execution of request has finished
1412 * @req: duh!
1413 * @interruptible: do an interruptible wait (normally yes)
1414 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1415 * @rps: RPS client
1416 *
1417 * Note: It is of utmost importance that the passed in seqno and reset_counter
1418 * values have been read by the caller in an smp safe manner. Where read-side
1419 * locks are involved, it is sufficient to read the reset_counter before
1420 * unlocking the lock that protects the seqno. For lockless tricks, the
1421 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1422 * inserted.
1423 *
1424 * Returns 0 if the request was found within the alloted time. Else returns the
1425 * errno with remaining time filled in timeout argument.
1426 */
1427int __i915_wait_request(struct drm_i915_gem_request *req,
1428 bool interruptible,
1429 s64 *timeout,
1430 struct intel_rps_client *rps)
1431{
1432 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1433 DEFINE_WAIT(reset);
1434 struct intel_wait wait;
1435 unsigned long timeout_remain;
1436 s64 before = 0; /* Only to silence a compiler warning. */
1437 int ret = 0;
1438
1439 might_sleep();
1440
1441 if (list_empty(&req->list))
1442 return 0;
1443
1444 if (i915_gem_request_completed(req))
1445 return 0;
1446
1447 timeout_remain = MAX_SCHEDULE_TIMEOUT;
1448 if (timeout) {
1449 if (WARN_ON(*timeout < 0))
1450 return -EINVAL;
1451
1452 if (*timeout == 0)
1453 return -ETIME;
1454
1455 timeout_remain = nsecs_to_jiffies_timeout(*timeout);
1456
1457 /*
1458 * Record current time in case interrupted by signal, or wedged.
1459 */
1460 before = ktime_get_raw_ns();
1461 }
1462
1463 trace_i915_gem_request_wait_begin(req);
1464
1465 /* This client is about to stall waiting for the GPU. In many cases
1466 * this is undesirable and limits the throughput of the system, as
1467 * many clients cannot continue processing user input/output whilst
1468 * blocked. RPS autotuning may take tens of milliseconds to respond
1469 * to the GPU load and thus incurs additional latency for the client.
1470 * We can circumvent that by promoting the GPU frequency to maximum
1471 * before we wait. This makes the GPU throttle up much more quickly
1472 * (good for benchmarks and user experience, e.g. window animations),
1473 * but at a cost of spending more power processing the workload
1474 * (bad for battery). Not all clients even want their results
1475 * immediately and for them we should just let the GPU select its own
1476 * frequency to maximise efficiency. To prevent a single client from
1477 * forcing the clocks too high for the whole system, we only allow
1478 * each client to waitboost once in a busy period.
1479 */
1480 if (INTEL_INFO(req->i915)->gen >= 6)
1481 gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
1482
1483 /* Optimistic spin for the next ~jiffie before touching IRQs */
1484 if (i915_spin_request(req, state, 5))
1485 goto complete;
1486
1487 set_current_state(state);
1488 add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
1489
1490 intel_wait_init(&wait, req->seqno);
1491 if (intel_engine_add_wait(req->engine, &wait))
1492 /* In order to check that we haven't missed the interrupt
1493 * as we enabled it, we need to kick ourselves to do a
1494 * coherent check on the seqno before we sleep.
1495 */
1496 goto wakeup;
1497
1498 for (;;) {
1499 if (signal_pending_state(state, current)) {
1500 ret = -ERESTARTSYS;
1501 break;
1502 }
1503
1504 timeout_remain = io_schedule_timeout(timeout_remain);
1505 if (timeout_remain == 0) {
1506 ret = -ETIME;
1507 break;
1508 }
1509
1510 if (intel_wait_complete(&wait))
1511 break;
1512
1513 set_current_state(state);
1514
1515wakeup:
1516 /* Carefully check if the request is complete, giving time
1517 * for the seqno to be visible following the interrupt.
1518 * We also have to check in case we are kicked by the GPU
1519 * reset in order to drop the struct_mutex.
1520 */
1521 if (__i915_request_irq_complete(req))
1522 break;
1523
1524 /* Only spin if we know the GPU is processing this request */
1525 if (i915_spin_request(req, state, 2))
1526 break;
1527 }
1528 remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
1529
1530 intel_engine_remove_wait(req->engine, &wait);
1531 __set_current_state(TASK_RUNNING);
1532complete:
1533 trace_i915_gem_request_wait_end(req);
1534
1535 if (timeout) {
1536 s64 tres = *timeout - (ktime_get_raw_ns() - before);
1537
1538 *timeout = tres < 0 ? 0 : tres;
1539
1540 /*
1541 * Apparently ktime isn't accurate enough and occasionally has a
1542 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1543 * things up to make the test happy. We allow up to 1 jiffy.
1544 *
1545 * This is a regrssion from the timespec->ktime conversion.
1546 */
1547 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1548 *timeout = 0;
1549 }
1550
1551 if (rps && req->seqno == req->engine->last_submitted_seqno) {
1552 /* The GPU is now idle and this client has stalled.
1553 * Since no other client has submitted a request in the
1554 * meantime, assume that this client is the only one
1555 * supplying work to the GPU but is unable to keep that
1556 * work supplied because it is waiting. Since the GPU is
1557 * then never kept fully busy, RPS autoclocking will
1558 * keep the clocks relatively low, causing further delays.
1559 * Compensate by giving the synchronous client credit for
1560 * a waitboost next time.
1561 */
1562 spin_lock(&req->i915->rps.client_lock);
1563 list_del_init(&rps->link);
1564 spin_unlock(&req->i915->rps.client_lock);
1565 }
1566
1567 return ret;
1568}
1569
1570int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1571 struct drm_file *file)
1572{
1573 struct drm_i915_file_private *file_priv;
1574
1575 WARN_ON(!req || !file || req->file_priv);
1576
1577 if (!req || !file)
1578 return -EINVAL;
1579
1580 if (req->file_priv)
1581 return -EINVAL;
1582
1583 file_priv = file->driver_priv;
1584
1585 spin_lock(&file_priv->mm.lock);
1586 req->file_priv = file_priv;
1587 list_add_tail(&req->client_list, &file_priv->mm.request_list);
1588 spin_unlock(&file_priv->mm.lock);
1589
1590 req->pid = get_pid(task_pid(current));
1591
1592 return 0;
1593}
1594
1595static inline void
1596i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1597{
1598 struct drm_i915_file_private *file_priv = request->file_priv;
1599
1600 if (!file_priv)
1601 return;
1602
1603 spin_lock(&file_priv->mm.lock);
1604 list_del(&request->client_list);
1605 request->file_priv = NULL;
1606 spin_unlock(&file_priv->mm.lock);
1607
1608 put_pid(request->pid);
1609 request->pid = NULL;
1610}
1611
1612static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1613{
1614 trace_i915_gem_request_retire(request);
1615
1616 /* We know the GPU must have read the request to have
1617 * sent us the seqno + interrupt, so use the position
1618 * of tail of the request to update the last known position
1619 * of the GPU head.
1620 *
1621 * Note this requires that we are always called in request
1622 * completion order.
1623 */
1624 request->ringbuf->last_retired_head = request->postfix;
1625
1626 list_del_init(&request->list);
1627 i915_gem_request_remove_from_client(request);
1628
1629 if (request->previous_context) {
1630 if (i915.enable_execlists)
1631 intel_lr_context_unpin(request->previous_context,
1632 request->engine);
1633 }
1634
1635 i915_gem_context_unreference(request->ctx);
1636 i915_gem_request_unreference(request);
1637}
1638
1639static void
1640__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1641{
1642 struct intel_engine_cs *engine = req->engine;
1643 struct drm_i915_gem_request *tmp;
1644
1645 lockdep_assert_held(&engine->i915->drm.struct_mutex);
1646
1647 if (list_empty(&req->list))
1648 return;
1649
1650 do {
1651 tmp = list_first_entry(&engine->request_list,
1652 typeof(*tmp), list);
1653
1654 i915_gem_request_retire(tmp);
1655 } while (tmp != req);
1656
1657 WARN_ON(i915_verify_lists(engine->dev));
1658}
1659
1660/**
1661 * Waits for a request to be signaled, and cleans up the
1662 * request and object lists appropriately for that event.
1663 * @req: request to wait on
1664 */
1665int
1666i915_wait_request(struct drm_i915_gem_request *req)
1667{
1668 struct drm_i915_private *dev_priv = req->i915;
1669 bool interruptible;
1670 int ret;
1671
1672 interruptible = dev_priv->mm.interruptible;
1673
1674 BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
1675
1676 ret = __i915_wait_request(req, interruptible, NULL, NULL);
1677 if (ret)
1678 return ret;
1679
1680 /* If the GPU hung, we want to keep the requests to find the guilty. */
1681 if (!i915_reset_in_progress(&dev_priv->gpu_error))
1682 __i915_gem_request_retire__upto(req);
1683
1684 return 0;
1685}
1686
1687/**
1688 * Ensures that all rendering to the object has completed and the object is
1689 * safe to unbind from the GTT or access from the CPU.
1690 * @obj: i915 gem object
1691 * @readonly: waiting for read access or write
1692 */
1693int
1694i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1695 bool readonly)
1696{
1697 int ret, i;
1698
1699 if (!obj->active)
1700 return 0;
1701
1702 if (readonly) {
1703 if (obj->last_write_req != NULL) {
1704 ret = i915_wait_request(obj->last_write_req);
1705 if (ret)
1706 return ret;
1707
1708 i = obj->last_write_req->engine->id;
1709 if (obj->last_read_req[i] == obj->last_write_req)
1710 i915_gem_object_retire__read(obj, i);
1711 else
1712 i915_gem_object_retire__write(obj);
1713 }
1714 } else {
1715 for (i = 0; i < I915_NUM_ENGINES; i++) {
1716 if (obj->last_read_req[i] == NULL)
1717 continue;
1718
1719 ret = i915_wait_request(obj->last_read_req[i]);
1720 if (ret)
1721 return ret;
1722
1723 i915_gem_object_retire__read(obj, i);
1724 }
1725 GEM_BUG_ON(obj->active);
1726 }
1727
1728 return 0;
1729}
1730
1731static void
1732i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1733 struct drm_i915_gem_request *req)
1734{
1735 int ring = req->engine->id;
1736
1737 if (obj->last_read_req[ring] == req)
1738 i915_gem_object_retire__read(obj, ring);
1739 else if (obj->last_write_req == req)
1740 i915_gem_object_retire__write(obj);
1741
1742 if (!i915_reset_in_progress(&req->i915->gpu_error))
1743 __i915_gem_request_retire__upto(req);
1744}
1745
1746/* A nonblocking variant of the above wait. This is a highly dangerous routine
1747 * as the object state may change during this call.
1748 */
1749static __must_check int
1750i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1751 struct intel_rps_client *rps,
1752 bool readonly)
1753{
1754 struct drm_device *dev = obj->base.dev;
1755 struct drm_i915_private *dev_priv = to_i915(dev);
1756 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
1757 int ret, i, n = 0;
1758
1759 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1760 BUG_ON(!dev_priv->mm.interruptible);
1761
1762 if (!obj->active)
1763 return 0;
1764
1765 if (readonly) {
1766 struct drm_i915_gem_request *req;
1767
1768 req = obj->last_write_req;
1769 if (req == NULL)
1770 return 0;
1771
1772 requests[n++] = i915_gem_request_reference(req);
1773 } else {
1774 for (i = 0; i < I915_NUM_ENGINES; i++) {
1775 struct drm_i915_gem_request *req;
1776
1777 req = obj->last_read_req[i];
1778 if (req == NULL)
1779 continue;
1780
1781 requests[n++] = i915_gem_request_reference(req);
1782 }
1783 }
1784
1785 mutex_unlock(&dev->struct_mutex);
1786 ret = 0;
1787 for (i = 0; ret == 0 && i < n; i++)
1788 ret = __i915_wait_request(requests[i], true, NULL, rps);
1789 mutex_lock(&dev->struct_mutex);
1790
1791 for (i = 0; i < n; i++) {
1792 if (ret == 0)
1793 i915_gem_object_retire_request(obj, requests[i]);
1794 i915_gem_request_unreference(requests[i]);
1795 }
1796 1434
1435err_rpm:
1436 intel_runtime_pm_put(dev_priv);
1437err:
1438 i915_gem_object_put_unlocked(obj);
1797 return ret; 1439 return ret;
1798} 1440}
1799 1441
1800static struct intel_rps_client *to_rps_client(struct drm_file *file)
1801{
1802 struct drm_i915_file_private *fpriv = file->driver_priv;
1803 return &fpriv->rps;
1804}
1805
1806static enum fb_op_origin 1442static enum fb_op_origin
1807write_origin(struct drm_i915_gem_object *obj, unsigned domain) 1443write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1808{ 1444{
@@ -1828,10 +1464,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1828 int ret; 1464 int ret;
1829 1465
1830 /* Only handle setting domains to types used by the CPU. */ 1466 /* Only handle setting domains to types used by the CPU. */
1831 if (write_domain & I915_GEM_GPU_DOMAINS) 1467 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1832 return -EINVAL;
1833
1834 if (read_domains & I915_GEM_GPU_DOMAINS)
1835 return -EINVAL; 1468 return -EINVAL;
1836 1469
1837 /* Having something in the write domain implies it's in the read 1470 /* Having something in the write domain implies it's in the read
@@ -1840,25 +1473,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1840 if (write_domain != 0 && read_domains != write_domain) 1473 if (write_domain != 0 && read_domains != write_domain)
1841 return -EINVAL; 1474 return -EINVAL;
1842 1475
1843 ret = i915_mutex_lock_interruptible(dev); 1476 obj = i915_gem_object_lookup(file, args->handle);
1844 if (ret) 1477 if (!obj)
1845 return ret; 1478 return -ENOENT;
1846
1847 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
1848 if (&obj->base == NULL) {
1849 ret = -ENOENT;
1850 goto unlock;
1851 }
1852 1479
1853 /* Try to flush the object off the GPU without holding the lock. 1480 /* Try to flush the object off the GPU without holding the lock.
1854 * We will repeat the flush holding the lock in the normal manner 1481 * We will repeat the flush holding the lock in the normal manner
1855 * to catch cases where we are gazumped. 1482 * to catch cases where we are gazumped.
1856 */ 1483 */
1857 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1484 ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
1858 to_rps_client(file),
1859 !write_domain);
1860 if (ret) 1485 if (ret)
1861 goto unref; 1486 goto err;
1487
1488 ret = i915_mutex_lock_interruptible(dev);
1489 if (ret)
1490 goto err;
1862 1491
1863 if (read_domains & I915_GEM_DOMAIN_GTT) 1492 if (read_domains & I915_GEM_DOMAIN_GTT)
1864 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1493 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
@@ -1868,11 +1497,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1868 if (write_domain != 0) 1497 if (write_domain != 0)
1869 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); 1498 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1870 1499
1871unref: 1500 i915_gem_object_put(obj);
1872 drm_gem_object_unreference(&obj->base);
1873unlock:
1874 mutex_unlock(&dev->struct_mutex); 1501 mutex_unlock(&dev->struct_mutex);
1875 return ret; 1502 return ret;
1503
1504err:
1505 i915_gem_object_put_unlocked(obj);
1506 return ret;
1876} 1507}
1877 1508
1878/** 1509/**
@@ -1887,26 +1518,23 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1887{ 1518{
1888 struct drm_i915_gem_sw_finish *args = data; 1519 struct drm_i915_gem_sw_finish *args = data;
1889 struct drm_i915_gem_object *obj; 1520 struct drm_i915_gem_object *obj;
1890 int ret = 0; 1521 int err = 0;
1891 1522
1892 ret = i915_mutex_lock_interruptible(dev); 1523 obj = i915_gem_object_lookup(file, args->handle);
1893 if (ret) 1524 if (!obj)
1894 return ret; 1525 return -ENOENT;
1895
1896 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
1897 if (&obj->base == NULL) {
1898 ret = -ENOENT;
1899 goto unlock;
1900 }
1901 1526
1902 /* Pinned buffers may be scanout, so flush the cache */ 1527 /* Pinned buffers may be scanout, so flush the cache */
1903 if (obj->pin_display) 1528 if (READ_ONCE(obj->pin_display)) {
1904 i915_gem_object_flush_cpu_write_domain(obj); 1529 err = i915_mutex_lock_interruptible(dev);
1530 if (!err) {
1531 i915_gem_object_flush_cpu_write_domain(obj);
1532 mutex_unlock(&dev->struct_mutex);
1533 }
1534 }
1905 1535
1906 drm_gem_object_unreference(&obj->base); 1536 i915_gem_object_put_unlocked(obj);
1907unlock: 1537 return err;
1908 mutex_unlock(&dev->struct_mutex);
1909 return ret;
1910} 1538}
1911 1539
1912/** 1540/**
@@ -1934,7 +1562,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1934 struct drm_file *file) 1562 struct drm_file *file)
1935{ 1563{
1936 struct drm_i915_gem_mmap *args = data; 1564 struct drm_i915_gem_mmap *args = data;
1937 struct drm_gem_object *obj; 1565 struct drm_i915_gem_object *obj;
1938 unsigned long addr; 1566 unsigned long addr;
1939 1567
1940 if (args->flags & ~(I915_MMAP_WC)) 1568 if (args->flags & ~(I915_MMAP_WC))
@@ -1943,19 +1571,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1943 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1571 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1944 return -ENODEV; 1572 return -ENODEV;
1945 1573
1946 obj = drm_gem_object_lookup(file, args->handle); 1574 obj = i915_gem_object_lookup(file, args->handle);
1947 if (obj == NULL) 1575 if (!obj)
1948 return -ENOENT; 1576 return -ENOENT;
1949 1577
1950 /* prime objects have no backing filp to GEM mmap 1578 /* prime objects have no backing filp to GEM mmap
1951 * pages from. 1579 * pages from.
1952 */ 1580 */
1953 if (!obj->filp) { 1581 if (!obj->base.filp) {
1954 drm_gem_object_unreference_unlocked(obj); 1582 i915_gem_object_put_unlocked(obj);
1955 return -EINVAL; 1583 return -EINVAL;
1956 } 1584 }
1957 1585
1958 addr = vm_mmap(obj->filp, 0, args->size, 1586 addr = vm_mmap(obj->base.filp, 0, args->size,
1959 PROT_READ | PROT_WRITE, MAP_SHARED, 1587 PROT_READ | PROT_WRITE, MAP_SHARED,
1960 args->offset); 1588 args->offset);
1961 if (args->flags & I915_MMAP_WC) { 1589 if (args->flags & I915_MMAP_WC) {
@@ -1963,7 +1591,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1963 struct vm_area_struct *vma; 1591 struct vm_area_struct *vma;
1964 1592
1965 if (down_write_killable(&mm->mmap_sem)) { 1593 if (down_write_killable(&mm->mmap_sem)) {
1966 drm_gem_object_unreference_unlocked(obj); 1594 i915_gem_object_put_unlocked(obj);
1967 return -EINTR; 1595 return -EINTR;
1968 } 1596 }
1969 vma = find_vma(mm, addr); 1597 vma = find_vma(mm, addr);
@@ -1975,9 +1603,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1975 up_write(&mm->mmap_sem); 1603 up_write(&mm->mmap_sem);
1976 1604
1977 /* This may race, but that's ok, it only gets set */ 1605 /* This may race, but that's ok, it only gets set */
1978 WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true); 1606 WRITE_ONCE(obj->has_wc_mmap, true);
1979 } 1607 }
1980 drm_gem_object_unreference_unlocked(obj); 1608 i915_gem_object_put_unlocked(obj);
1981 if (IS_ERR((void *)addr)) 1609 if (IS_ERR((void *)addr))
1982 return addr; 1610 return addr;
1983 1611
@@ -2009,41 +1637,41 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2009 struct drm_i915_private *dev_priv = to_i915(dev); 1637 struct drm_i915_private *dev_priv = to_i915(dev);
2010 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1638 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2011 struct i915_ggtt_view view = i915_ggtt_view_normal; 1639 struct i915_ggtt_view view = i915_ggtt_view_normal;
1640 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2012 pgoff_t page_offset; 1641 pgoff_t page_offset;
2013 unsigned long pfn; 1642 unsigned long pfn;
2014 int ret = 0; 1643 int ret;
2015 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2016
2017 intel_runtime_pm_get(dev_priv);
2018 1644
2019 /* We don't use vmf->pgoff since that has the fake offset */ 1645 /* We don't use vmf->pgoff since that has the fake offset */
2020 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1646 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
2021 PAGE_SHIFT; 1647 PAGE_SHIFT;
2022 1648
2023 ret = i915_mutex_lock_interruptible(dev);
2024 if (ret)
2025 goto out;
2026
2027 trace_i915_gem_object_fault(obj, page_offset, true, write); 1649 trace_i915_gem_object_fault(obj, page_offset, true, write);
2028 1650
2029 /* Try to flush the object off the GPU first without holding the lock. 1651 /* Try to flush the object off the GPU first without holding the lock.
2030 * Upon reacquiring the lock, we will perform our sanity checks and then 1652 * Upon acquiring the lock, we will perform our sanity checks and then
2031 * repeat the flush holding the lock in the normal manner to catch cases 1653 * repeat the flush holding the lock in the normal manner to catch cases
2032 * where we are gazumped. 1654 * where we are gazumped.
2033 */ 1655 */
2034 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1656 ret = __unsafe_wait_rendering(obj, NULL, !write);
2035 if (ret) 1657 if (ret)
2036 goto unlock; 1658 goto err;
1659
1660 intel_runtime_pm_get(dev_priv);
1661
1662 ret = i915_mutex_lock_interruptible(dev);
1663 if (ret)
1664 goto err_rpm;
2037 1665
2038 /* Access to snoopable pages through the GTT is incoherent. */ 1666 /* Access to snoopable pages through the GTT is incoherent. */
2039 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1667 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2040 ret = -EFAULT; 1668 ret = -EFAULT;
2041 goto unlock; 1669 goto err_unlock;
2042 } 1670 }
2043 1671
2044 /* Use a partial view if the object is bigger than the aperture. */ 1672 /* Use a partial view if the object is bigger than the aperture. */
2045 if (obj->base.size >= ggtt->mappable_end && 1673 if (obj->base.size >= ggtt->mappable_end &&
2046 obj->tiling_mode == I915_TILING_NONE) { 1674 !i915_gem_object_is_tiled(obj)) {
2047 static const unsigned int chunk_size = 256; // 1 MiB 1675 static const unsigned int chunk_size = 256; // 1 MiB
2048 1676
2049 memset(&view, 0, sizeof(view)); 1677 memset(&view, 0, sizeof(view));
@@ -2057,17 +1685,17 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2057 } 1685 }
2058 1686
2059 /* Now pin it into the GTT if needed */ 1687 /* Now pin it into the GTT if needed */
2060 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1688 ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
2061 if (ret) 1689 if (ret)
2062 goto unlock; 1690 goto err_unlock;
2063 1691
2064 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1692 ret = i915_gem_object_set_to_gtt_domain(obj, write);
2065 if (ret) 1693 if (ret)
2066 goto unpin; 1694 goto err_unpin;
2067 1695
2068 ret = i915_gem_object_get_fence(obj); 1696 ret = i915_gem_object_get_fence(obj);
2069 if (ret) 1697 if (ret)
2070 goto unpin; 1698 goto err_unpin;
2071 1699
2072 /* Finally, remap it using the new GTT offset */ 1700 /* Finally, remap it using the new GTT offset */
2073 pfn = ggtt->mappable_base + 1701 pfn = ggtt->mappable_base +
@@ -2112,11 +1740,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2112 (unsigned long)vmf->virtual_address, 1740 (unsigned long)vmf->virtual_address,
2113 pfn + page_offset); 1741 pfn + page_offset);
2114 } 1742 }
2115unpin: 1743err_unpin:
2116 i915_gem_object_ggtt_unpin_view(obj, &view); 1744 i915_gem_object_ggtt_unpin_view(obj, &view);
2117unlock: 1745err_unlock:
2118 mutex_unlock(&dev->struct_mutex); 1746 mutex_unlock(&dev->struct_mutex);
2119out: 1747err_rpm:
1748 intel_runtime_pm_put(dev_priv);
1749err:
2120 switch (ret) { 1750 switch (ret) {
2121 case -EIO: 1751 case -EIO:
2122 /* 1752 /*
@@ -2157,8 +1787,6 @@ out:
2157 ret = VM_FAULT_SIGBUS; 1787 ret = VM_FAULT_SIGBUS;
2158 break; 1788 break;
2159 } 1789 }
2160
2161 intel_runtime_pm_put(dev_priv);
2162 return ret; 1790 return ret;
2163} 1791}
2164 1792
@@ -2212,46 +1840,58 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2212 i915_gem_release_mmap(obj); 1840 i915_gem_release_mmap(obj);
2213} 1841}
2214 1842
2215uint32_t 1843/**
2216i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1844 * i915_gem_get_ggtt_size - return required global GTT size for an object
1845 * @dev_priv: i915 device
1846 * @size: object size
1847 * @tiling_mode: tiling mode
1848 *
1849 * Return the required global GTT size for an object, taking into account
1850 * potential fence register mapping.
1851 */
1852u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
1853 u64 size, int tiling_mode)
2217{ 1854{
2218 uint32_t gtt_size; 1855 u64 ggtt_size;
2219 1856
2220 if (INTEL_INFO(dev)->gen >= 4 || 1857 GEM_BUG_ON(size == 0);
1858
1859 if (INTEL_GEN(dev_priv) >= 4 ||
2221 tiling_mode == I915_TILING_NONE) 1860 tiling_mode == I915_TILING_NONE)
2222 return size; 1861 return size;
2223 1862
2224 /* Previous chips need a power-of-two fence region when tiling */ 1863 /* Previous chips need a power-of-two fence region when tiling */
2225 if (IS_GEN3(dev)) 1864 if (IS_GEN3(dev_priv))
2226 gtt_size = 1024*1024; 1865 ggtt_size = 1024*1024;
2227 else 1866 else
2228 gtt_size = 512*1024; 1867 ggtt_size = 512*1024;
2229 1868
2230 while (gtt_size < size) 1869 while (ggtt_size < size)
2231 gtt_size <<= 1; 1870 ggtt_size <<= 1;
2232 1871
2233 return gtt_size; 1872 return ggtt_size;
2234} 1873}
2235 1874
2236/** 1875/**
2237 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1876 * i915_gem_get_ggtt_alignment - return required global GTT alignment
2238 * @dev: drm device 1877 * @dev_priv: i915 device
2239 * @size: object size 1878 * @size: object size
2240 * @tiling_mode: tiling mode 1879 * @tiling_mode: tiling mode
2241 * @fenced: is fenced alignemned required or not 1880 * @fenced: is fenced alignment required or not
2242 * 1881 *
2243 * Return the required GTT alignment for an object, taking into account 1882 * Return the required global GTT alignment for an object, taking into account
2244 * potential fence register mapping. 1883 * potential fence register mapping.
2245 */ 1884 */
2246uint32_t 1885u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
2247i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1886 int tiling_mode, bool fenced)
2248 int tiling_mode, bool fenced)
2249{ 1887{
1888 GEM_BUG_ON(size == 0);
1889
2250 /* 1890 /*
2251 * Minimum alignment is 4k (GTT page size), but might be greater 1891 * Minimum alignment is 4k (GTT page size), but might be greater
2252 * if a fence register is needed for the object. 1892 * if a fence register is needed for the object.
2253 */ 1893 */
2254 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1894 if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
2255 tiling_mode == I915_TILING_NONE) 1895 tiling_mode == I915_TILING_NONE)
2256 return 4096; 1896 return 4096;
2257 1897
@@ -2259,42 +1899,34 @@ i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2259 * Previous chips need to be aligned to the size of the smallest 1899 * Previous chips need to be aligned to the size of the smallest
2260 * fence register that can contain the object. 1900 * fence register that can contain the object.
2261 */ 1901 */
2262 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1902 return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
2263} 1903}
2264 1904
2265static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1905static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2266{ 1906{
2267 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 1907 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2268 int ret; 1908 int err;
2269
2270 dev_priv->mm.shrinker_no_lock_stealing = true;
2271 1909
2272 ret = drm_gem_create_mmap_offset(&obj->base); 1910 err = drm_gem_create_mmap_offset(&obj->base);
2273 if (ret != -ENOSPC) 1911 if (!err)
2274 goto out; 1912 return 0;
2275 1913
2276 /* Badly fragmented mmap space? The only way we can recover 1914 /* We can idle the GPU locklessly to flush stale objects, but in order
2277 * space is by destroying unwanted objects. We can't randomly release 1915 * to claim that space for ourselves, we need to take the big
2278 * mmap_offsets as userspace expects them to be persistent for the 1916 * struct_mutex to free the requests+objects and allocate our slot.
2279 * lifetime of the objects. The closest we can is to release the
2280 * offsets on purgeable objects by truncating it and marking it purged,
2281 * which prevents userspace from ever using that object again.
2282 */ 1917 */
2283 i915_gem_shrink(dev_priv, 1918 err = i915_gem_wait_for_idle(dev_priv, true);
2284 obj->base.size >> PAGE_SHIFT, 1919 if (err)
2285 I915_SHRINK_BOUND | 1920 return err;
2286 I915_SHRINK_UNBOUND |
2287 I915_SHRINK_PURGEABLE);
2288 ret = drm_gem_create_mmap_offset(&obj->base);
2289 if (ret != -ENOSPC)
2290 goto out;
2291 1921
2292 i915_gem_shrink_all(dev_priv); 1922 err = i915_mutex_lock_interruptible(&dev_priv->drm);
2293 ret = drm_gem_create_mmap_offset(&obj->base); 1923 if (!err) {
2294out: 1924 i915_gem_retire_requests(dev_priv);
2295 dev_priv->mm.shrinker_no_lock_stealing = false; 1925 err = drm_gem_create_mmap_offset(&obj->base);
1926 mutex_unlock(&dev_priv->drm.struct_mutex);
1927 }
2296 1928
2297 return ret; 1929 return err;
2298} 1930}
2299 1931
2300static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1932static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
@@ -2311,32 +1943,15 @@ i915_gem_mmap_gtt(struct drm_file *file,
2311 struct drm_i915_gem_object *obj; 1943 struct drm_i915_gem_object *obj;
2312 int ret; 1944 int ret;
2313 1945
2314 ret = i915_mutex_lock_interruptible(dev); 1946 obj = i915_gem_object_lookup(file, handle);
2315 if (ret) 1947 if (!obj)
2316 return ret; 1948 return -ENOENT;
2317
2318 obj = to_intel_bo(drm_gem_object_lookup(file, handle));
2319 if (&obj->base == NULL) {
2320 ret = -ENOENT;
2321 goto unlock;
2322 }
2323
2324 if (obj->madv != I915_MADV_WILLNEED) {
2325 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2326 ret = -EFAULT;
2327 goto out;
2328 }
2329 1949
2330 ret = i915_gem_object_create_mmap_offset(obj); 1950 ret = i915_gem_object_create_mmap_offset(obj);
2331 if (ret) 1951 if (ret == 0)
2332 goto out; 1952 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2333
2334 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2335 1953
2336out: 1954 i915_gem_object_put_unlocked(obj);
2337 drm_gem_object_unreference(&obj->base);
2338unlock:
2339 mutex_unlock(&dev->struct_mutex);
2340 return ret; 1955 return ret;
2341} 1956}
2342 1957
@@ -2454,7 +2069,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2454 if (obj->pages_pin_count) 2069 if (obj->pages_pin_count)
2455 return -EBUSY; 2070 return -EBUSY;
2456 2071
2457 BUG_ON(i915_gem_obj_bound_any(obj)); 2072 GEM_BUG_ON(obj->bind_count);
2458 2073
2459 /* ->put_pages might need to allocate memory for the bit17 swizzle 2074 /* ->put_pages might need to allocate memory for the bit17 swizzle
2460 * array, hence protect them from being reaped by removing them from gtt 2075 * array, hence protect them from being reaped by removing them from gtt
@@ -2574,7 +2189,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2574 if (i915_gem_object_needs_bit17_swizzle(obj)) 2189 if (i915_gem_object_needs_bit17_swizzle(obj))
2575 i915_gem_object_do_bit_17_swizzle(obj); 2190 i915_gem_object_do_bit_17_swizzle(obj);
2576 2191
2577 if (obj->tiling_mode != I915_TILING_NONE && 2192 if (i915_gem_object_is_tiled(obj) &&
2578 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2193 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2579 i915_gem_object_pin_pages(obj); 2194 i915_gem_object_pin_pages(obj);
2580 2195
@@ -2698,253 +2313,39 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
2698 return obj->mapping; 2313 return obj->mapping;
2699} 2314}
2700 2315
2701void i915_vma_move_to_active(struct i915_vma *vma,
2702 struct drm_i915_gem_request *req)
2703{
2704 struct drm_i915_gem_object *obj = vma->obj;
2705 struct intel_engine_cs *engine;
2706
2707 engine = i915_gem_request_get_engine(req);
2708
2709 /* Add a reference if we're newly entering the active list. */
2710 if (obj->active == 0)
2711 drm_gem_object_reference(&obj->base);
2712 obj->active |= intel_engine_flag(engine);
2713
2714 list_move_tail(&obj->engine_list[engine->id], &engine->active_list);
2715 i915_gem_request_assign(&obj->last_read_req[engine->id], req);
2716
2717 list_move_tail(&vma->vm_link, &vma->vm->active_list);
2718}
2719
2720static void 2316static void
2721i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2317i915_gem_object_retire__write(struct i915_gem_active *active,
2318 struct drm_i915_gem_request *request)
2722{ 2319{
2723 GEM_BUG_ON(obj->last_write_req == NULL); 2320 struct drm_i915_gem_object *obj =
2724 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2321 container_of(active, struct drm_i915_gem_object, last_write);
2725 2322
2726 i915_gem_request_assign(&obj->last_write_req, NULL);
2727 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2323 intel_fb_obj_flush(obj, true, ORIGIN_CS);
2728} 2324}
2729 2325
2730static void 2326static void
2731i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2327i915_gem_object_retire__read(struct i915_gem_active *active,
2328 struct drm_i915_gem_request *request)
2732{ 2329{
2733 struct i915_vma *vma; 2330 int idx = request->engine->id;
2734 2331 struct drm_i915_gem_object *obj =
2735 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2332 container_of(active, struct drm_i915_gem_object, last_read[idx]);
2736 GEM_BUG_ON(!(obj->active & (1 << ring)));
2737 2333
2738 list_del_init(&obj->engine_list[ring]); 2334 GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
2739 i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2740 2335
2741 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2336 i915_gem_object_clear_active(obj, idx);
2742 i915_gem_object_retire__write(obj); 2337 if (i915_gem_object_is_active(obj))
2743
2744 obj->active &= ~(1 << ring);
2745 if (obj->active)
2746 return; 2338 return;
2747 2339
2748 /* Bump our place on the bound list to keep it roughly in LRU order 2340 /* Bump our place on the bound list to keep it roughly in LRU order
2749 * so that we don't steal from recently used but inactive objects 2341 * so that we don't steal from recently used but inactive objects
2750 * (unless we are forced to ofc!) 2342 * (unless we are forced to ofc!)
2751 */ 2343 */
2752 list_move_tail(&obj->global_list, 2344 if (obj->bind_count)
2753 &to_i915(obj->base.dev)->mm.bound_list); 2345 list_move_tail(&obj->global_list,
2754 2346 &request->i915->mm.bound_list);
2755 list_for_each_entry(vma, &obj->vma_list, obj_link) {
2756 if (!list_empty(&vma->vm_link))
2757 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
2758 }
2759
2760 i915_gem_request_assign(&obj->last_fenced_req, NULL);
2761 drm_gem_object_unreference(&obj->base);
2762}
2763
2764static int
2765i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
2766{
2767 struct intel_engine_cs *engine;
2768 int ret;
2769
2770 /* Carefully retire all requests without writing to the rings */
2771 for_each_engine(engine, dev_priv) {
2772 ret = intel_engine_idle(engine);
2773 if (ret)
2774 return ret;
2775 }
2776 i915_gem_retire_requests(dev_priv);
2777
2778 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
2779 if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
2780 while (intel_kick_waiters(dev_priv) ||
2781 intel_kick_signalers(dev_priv))
2782 yield();
2783 }
2784
2785 /* Finally reset hw state */
2786 for_each_engine(engine, dev_priv)
2787 intel_ring_init_seqno(engine, seqno);
2788
2789 return 0;
2790}
2791
2792int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2793{
2794 struct drm_i915_private *dev_priv = to_i915(dev);
2795 int ret;
2796
2797 if (seqno == 0)
2798 return -EINVAL;
2799
2800 /* HWS page needs to be set less than what we
2801 * will inject to ring
2802 */
2803 ret = i915_gem_init_seqno(dev_priv, seqno - 1);
2804 if (ret)
2805 return ret;
2806
2807 /* Carefully set the last_seqno value so that wrap
2808 * detection still works
2809 */
2810 dev_priv->next_seqno = seqno;
2811 dev_priv->last_seqno = seqno - 1;
2812 if (dev_priv->last_seqno == 0)
2813 dev_priv->last_seqno--;
2814
2815 return 0;
2816}
2817
2818int
2819i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
2820{
2821 /* reserve 0 for non-seqno */
2822 if (dev_priv->next_seqno == 0) {
2823 int ret = i915_gem_init_seqno(dev_priv, 0);
2824 if (ret)
2825 return ret;
2826
2827 dev_priv->next_seqno = 1;
2828 }
2829
2830 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2831 return 0;
2832}
2833
2834static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
2835{
2836 struct drm_i915_private *dev_priv = engine->i915;
2837
2838 dev_priv->gt.active_engines |= intel_engine_flag(engine);
2839 if (dev_priv->gt.awake)
2840 return;
2841
2842 intel_runtime_pm_get_noresume(dev_priv);
2843 dev_priv->gt.awake = true;
2844
2845 i915_update_gfx_val(dev_priv);
2846 if (INTEL_GEN(dev_priv) >= 6)
2847 gen6_rps_busy(dev_priv);
2848
2849 queue_delayed_work(dev_priv->wq,
2850 &dev_priv->gt.retire_work,
2851 round_jiffies_up_relative(HZ));
2852}
2853
2854/*
2855 * NB: This function is not allowed to fail. Doing so would mean the the
2856 * request is not being tracked for completion but the work itself is
2857 * going to happen on the hardware. This would be a Bad Thing(tm).
2858 */
2859void __i915_add_request(struct drm_i915_gem_request *request,
2860 struct drm_i915_gem_object *obj,
2861 bool flush_caches)
2862{
2863 struct intel_engine_cs *engine;
2864 struct intel_ringbuffer *ringbuf;
2865 u32 request_start;
2866 u32 reserved_tail;
2867 int ret;
2868
2869 if (WARN_ON(request == NULL))
2870 return;
2871
2872 engine = request->engine;
2873 ringbuf = request->ringbuf;
2874
2875 /*
2876 * To ensure that this call will not fail, space for its emissions
2877 * should already have been reserved in the ring buffer. Let the ring
2878 * know that it is time to use that space up.
2879 */
2880 request_start = intel_ring_get_tail(ringbuf);
2881 reserved_tail = request->reserved_space;
2882 request->reserved_space = 0;
2883 2347
2884 /* 2348 i915_gem_object_put(obj);
2885 * Emit any outstanding flushes - execbuf can fail to emit the flush
2886 * after having emitted the batchbuffer command. Hence we need to fix
2887 * things up similar to emitting the lazy request. The difference here
2888 * is that the flush _must_ happen before the next request, no matter
2889 * what.
2890 */
2891 if (flush_caches) {
2892 if (i915.enable_execlists)
2893 ret = logical_ring_flush_all_caches(request);
2894 else
2895 ret = intel_ring_flush_all_caches(request);
2896 /* Not allowed to fail! */
2897 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
2898 }
2899
2900 trace_i915_gem_request_add(request);
2901
2902 request->head = request_start;
2903
2904 /* Whilst this request exists, batch_obj will be on the
2905 * active_list, and so will hold the active reference. Only when this
2906 * request is retired will the the batch_obj be moved onto the
2907 * inactive_list and lose its active reference. Hence we do not need
2908 * to explicitly hold another reference here.
2909 */
2910 request->batch_obj = obj;
2911
2912 /* Seal the request and mark it as pending execution. Note that
2913 * we may inspect this state, without holding any locks, during
2914 * hangcheck. Hence we apply the barrier to ensure that we do not
2915 * see a more recent value in the hws than we are tracking.
2916 */
2917 request->emitted_jiffies = jiffies;
2918 request->previous_seqno = engine->last_submitted_seqno;
2919 smp_store_mb(engine->last_submitted_seqno, request->seqno);
2920 list_add_tail(&request->list, &engine->request_list);
2921
2922 /* Record the position of the start of the request so that
2923 * should we detect the updated seqno part-way through the
2924 * GPU processing the request, we never over-estimate the
2925 * position of the head.
2926 */
2927 request->postfix = intel_ring_get_tail(ringbuf);
2928
2929 if (i915.enable_execlists)
2930 ret = engine->emit_request(request);
2931 else {
2932 ret = engine->add_request(request);
2933
2934 request->tail = intel_ring_get_tail(ringbuf);
2935 }
2936 /* Not allowed to fail! */
2937 WARN(ret, "emit|add_request failed: %d!\n", ret);
2938 /* Sanity check that the reserved size was large enough. */
2939 ret = intel_ring_get_tail(ringbuf) - request_start;
2940 if (ret < 0)
2941 ret += ringbuf->size;
2942 WARN_ONCE(ret > reserved_tail,
2943 "Not enough space reserved (%d bytes) "
2944 "for adding the request (%d bytes)\n",
2945 reserved_tail, ret);
2946
2947 i915_gem_mark_busy(engine);
2948} 2349}
2949 2350
2950static bool i915_context_is_banned(const struct i915_gem_context *ctx) 2351static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@ -2978,101 +2379,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx,
2978 } 2379 }
2979} 2380}
2980 2381
2981void i915_gem_request_free(struct kref *req_ref)
2982{
2983 struct drm_i915_gem_request *req = container_of(req_ref,
2984 typeof(*req), ref);
2985 kmem_cache_free(req->i915->requests, req);
2986}
2987
2988static inline int
2989__i915_gem_request_alloc(struct intel_engine_cs *engine,
2990 struct i915_gem_context *ctx,
2991 struct drm_i915_gem_request **req_out)
2992{
2993 struct drm_i915_private *dev_priv = engine->i915;
2994 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
2995 struct drm_i915_gem_request *req;
2996 int ret;
2997
2998 if (!req_out)
2999 return -EINVAL;
3000
3001 *req_out = NULL;
3002
3003 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
3004 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
3005 * and restart.
3006 */
3007 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
3008 if (ret)
3009 return ret;
3010
3011 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
3012 if (req == NULL)
3013 return -ENOMEM;
3014
3015 ret = i915_gem_get_seqno(engine->i915, &req->seqno);
3016 if (ret)
3017 goto err;
3018
3019 kref_init(&req->ref);
3020 req->i915 = dev_priv;
3021 req->engine = engine;
3022 req->ctx = ctx;
3023 i915_gem_context_reference(req->ctx);
3024
3025 /*
3026 * Reserve space in the ring buffer for all the commands required to
3027 * eventually emit this request. This is to guarantee that the
3028 * i915_add_request() call can't fail. Note that the reserve may need
3029 * to be redone if the request is not actually submitted straight
3030 * away, e.g. because a GPU scheduler has deferred it.
3031 */
3032 req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
3033
3034 if (i915.enable_execlists)
3035 ret = intel_logical_ring_alloc_request_extras(req);
3036 else
3037 ret = intel_ring_alloc_request_extras(req);
3038 if (ret)
3039 goto err_ctx;
3040
3041 *req_out = req;
3042 return 0;
3043
3044err_ctx:
3045 i915_gem_context_unreference(ctx);
3046err:
3047 kmem_cache_free(dev_priv->requests, req);
3048 return ret;
3049}
3050
3051/**
3052 * i915_gem_request_alloc - allocate a request structure
3053 *
3054 * @engine: engine that we wish to issue the request on.
3055 * @ctx: context that the request will be associated with.
3056 * This can be NULL if the request is not directly related to
3057 * any specific user context, in which case this function will
3058 * choose an appropriate context to use.
3059 *
3060 * Returns a pointer to the allocated request if successful,
3061 * or an error code if not.
3062 */
3063struct drm_i915_gem_request *
3064i915_gem_request_alloc(struct intel_engine_cs *engine,
3065 struct i915_gem_context *ctx)
3066{
3067 struct drm_i915_gem_request *req;
3068 int err;
3069
3070 if (ctx == NULL)
3071 ctx = engine->i915->kernel_context;
3072 err = __i915_gem_request_alloc(engine, ctx, &req);
3073 return err ? ERR_PTR(err) : req;
3074}
3075
3076struct drm_i915_gem_request * 2382struct drm_i915_gem_request *
3077i915_gem_find_active_request(struct intel_engine_cs *engine) 2383i915_gem_find_active_request(struct intel_engine_cs *engine)
3078{ 2384{
@@ -3086,7 +2392,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
3086 * extra delay for a recent interrupt is pointless. Hence, we do 2392 * extra delay for a recent interrupt is pointless. Hence, we do
3087 * not need an engine->irq_seqno_barrier() before the seqno reads. 2393 * not need an engine->irq_seqno_barrier() before the seqno reads.
3088 */ 2394 */
3089 list_for_each_entry(request, &engine->request_list, list) { 2395 list_for_each_entry(request, &engine->request_list, link) {
3090 if (i915_gem_request_completed(request)) 2396 if (i915_gem_request_completed(request))
3091 continue; 2397 continue;
3092 2398
@@ -3108,23 +2414,24 @@ static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
3108 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2414 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3109 2415
3110 i915_set_reset_status(request->ctx, ring_hung); 2416 i915_set_reset_status(request->ctx, ring_hung);
3111 list_for_each_entry_continue(request, &engine->request_list, list) 2417 list_for_each_entry_continue(request, &engine->request_list, link)
3112 i915_set_reset_status(request->ctx, false); 2418 i915_set_reset_status(request->ctx, false);
3113} 2419}
3114 2420
3115static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) 2421static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
3116{ 2422{
3117 struct intel_ringbuffer *buffer; 2423 struct drm_i915_gem_request *request;
3118 2424 struct intel_ring *ring;
3119 while (!list_empty(&engine->active_list)) {
3120 struct drm_i915_gem_object *obj;
3121 2425
3122 obj = list_first_entry(&engine->active_list, 2426 request = i915_gem_active_peek(&engine->last_request,
3123 struct drm_i915_gem_object, 2427 &engine->i915->drm.struct_mutex);
3124 engine_list[engine->id]);
3125 2428
3126 i915_gem_object_retire__read(obj, engine->id); 2429 /* Mark all pending requests as complete so that any concurrent
3127 } 2430 * (lockless) lookup doesn't try and wait upon the request as we
2431 * reset it.
2432 */
2433 if (request)
2434 intel_engine_init_seqno(engine, request->fence.seqno);
3128 2435
3129 /* 2436 /*
3130 * Clear the execlists queue up before freeing the requests, as those 2437 * Clear the execlists queue up before freeing the requests, as those
@@ -3146,15 +2453,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
3146 * implicit references on things like e.g. ppgtt address spaces through 2453 * implicit references on things like e.g. ppgtt address spaces through
3147 * the request. 2454 * the request.
3148 */ 2455 */
3149 while (!list_empty(&engine->request_list)) { 2456 if (request)
3150 struct drm_i915_gem_request *request; 2457 i915_gem_request_retire_upto(request);
3151 2458 GEM_BUG_ON(intel_engine_is_active(engine));
3152 request = list_first_entry(&engine->request_list,
3153 struct drm_i915_gem_request,
3154 list);
3155
3156 i915_gem_request_retire(request);
3157 }
3158 2459
3159 /* Having flushed all requests from all queues, we know that all 2460 /* Having flushed all requests from all queues, we know that all
3160 * ringbuffers must now be empty. However, since we do not reclaim 2461 * ringbuffers must now be empty. However, since we do not reclaim
@@ -3163,12 +2464,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
3163 * upon reset is less than when we start. Do one more pass over 2464 * upon reset is less than when we start. Do one more pass over
3164 * all the ringbuffers to reset last_retired_head. 2465 * all the ringbuffers to reset last_retired_head.
3165 */ 2466 */
3166 list_for_each_entry(buffer, &engine->buffers, link) { 2467 list_for_each_entry(ring, &engine->buffers, link) {
3167 buffer->last_retired_head = buffer->tail; 2468 ring->last_retired_head = ring->tail;
3168 intel_ring_update_space(buffer); 2469 intel_ring_update_space(ring);
3169 } 2470 }
3170 2471
3171 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 2472 engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
3172} 2473}
3173 2474
3174void i915_gem_reset(struct drm_device *dev) 2475void i915_gem_reset(struct drm_device *dev)
@@ -3186,82 +2487,11 @@ void i915_gem_reset(struct drm_device *dev)
3186 2487
3187 for_each_engine(engine, dev_priv) 2488 for_each_engine(engine, dev_priv)
3188 i915_gem_reset_engine_cleanup(engine); 2489 i915_gem_reset_engine_cleanup(engine);
2490 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
3189 2491
3190 i915_gem_context_reset(dev); 2492 i915_gem_context_reset(dev);
3191 2493
3192 i915_gem_restore_fences(dev); 2494 i915_gem_restore_fences(dev);
3193
3194 WARN_ON(i915_verify_lists(dev));
3195}
3196
3197/**
3198 * This function clears the request list as sequence numbers are passed.
3199 * @engine: engine to retire requests on
3200 */
3201void
3202i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
3203{
3204 WARN_ON(i915_verify_lists(engine->dev));
3205
3206 /* Retire requests first as we use it above for the early return.
3207 * If we retire requests last, we may use a later seqno and so clear
3208 * the requests lists without clearing the active list, leading to
3209 * confusion.
3210 */
3211 while (!list_empty(&engine->request_list)) {
3212 struct drm_i915_gem_request *request;
3213
3214 request = list_first_entry(&engine->request_list,
3215 struct drm_i915_gem_request,
3216 list);
3217
3218 if (!i915_gem_request_completed(request))
3219 break;
3220
3221 i915_gem_request_retire(request);
3222 }
3223
3224 /* Move any buffers on the active list that are no longer referenced
3225 * by the ringbuffer to the flushing/inactive lists as appropriate,
3226 * before we free the context associated with the requests.
3227 */
3228 while (!list_empty(&engine->active_list)) {
3229 struct drm_i915_gem_object *obj;
3230
3231 obj = list_first_entry(&engine->active_list,
3232 struct drm_i915_gem_object,
3233 engine_list[engine->id]);
3234
3235 if (!list_empty(&obj->last_read_req[engine->id]->list))
3236 break;
3237
3238 i915_gem_object_retire__read(obj, engine->id);
3239 }
3240
3241 WARN_ON(i915_verify_lists(engine->dev));
3242}
3243
3244void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
3245{
3246 struct intel_engine_cs *engine;
3247
3248 lockdep_assert_held(&dev_priv->drm.struct_mutex);
3249
3250 if (dev_priv->gt.active_engines == 0)
3251 return;
3252
3253 GEM_BUG_ON(!dev_priv->gt.awake);
3254
3255 for_each_engine(engine, dev_priv) {
3256 i915_gem_retire_requests_ring(engine);
3257 if (list_empty(&engine->request_list))
3258 dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
3259 }
3260
3261 if (dev_priv->gt.active_engines == 0)
3262 queue_delayed_work(dev_priv->wq,
3263 &dev_priv->gt.idle_work,
3264 msecs_to_jiffies(100));
3265} 2495}
3266 2496
3267static void 2497static void
@@ -3281,10 +2511,12 @@ i915_gem_retire_work_handler(struct work_struct *work)
3281 * We do not need to do this test under locking as in the worst-case 2511 * We do not need to do this test under locking as in the worst-case
3282 * we queue the retire worker once too often. 2512 * we queue the retire worker once too often.
3283 */ 2513 */
3284 if (READ_ONCE(dev_priv->gt.awake)) 2514 if (READ_ONCE(dev_priv->gt.awake)) {
2515 i915_queue_hangcheck(dev_priv);
3285 queue_delayed_work(dev_priv->wq, 2516 queue_delayed_work(dev_priv->wq,
3286 &dev_priv->gt.retire_work, 2517 &dev_priv->gt.retire_work,
3287 round_jiffies_up_relative(HZ)); 2518 round_jiffies_up_relative(HZ));
2519 }
3288} 2520}
3289 2521
3290static void 2522static void
@@ -3324,11 +2556,14 @@ i915_gem_idle_work_handler(struct work_struct *work)
3324 dev_priv->gt.awake = false; 2556 dev_priv->gt.awake = false;
3325 rearm_hangcheck = false; 2557 rearm_hangcheck = false;
3326 2558
2559 /* As we have disabled hangcheck, we need to unstick any waiters still
2560 * hanging around. However, as we may be racing against the interrupt
2561 * handler or the waiters themselves, we skip enabling the fake-irq.
2562 */
3327 stuck_engines = intel_kick_waiters(dev_priv); 2563 stuck_engines = intel_kick_waiters(dev_priv);
3328 if (unlikely(stuck_engines)) { 2564 if (unlikely(stuck_engines))
3329 DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n"); 2565 DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
3330 dev_priv->gpu_error.missed_irq_rings |= stuck_engines; 2566 stuck_engines);
3331 }
3332 2567
3333 if (INTEL_GEN(dev_priv) >= 6) 2568 if (INTEL_GEN(dev_priv) >= 6)
3334 gen6_rps_idle(dev_priv); 2569 gen6_rps_idle(dev_priv);
@@ -3343,32 +2578,17 @@ out_rearm:
3343 } 2578 }
3344} 2579}
3345 2580
3346/** 2581void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3347 * Ensures that an object will eventually get non-busy by flushing any required
3348 * write domains, emitting any outstanding lazy request and retiring and
3349 * completed requests.
3350 * @obj: object to flush
3351 */
3352static int
3353i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3354{ 2582{
3355 int i; 2583 struct drm_i915_gem_object *obj = to_intel_bo(gem);
3356 2584 struct drm_i915_file_private *fpriv = file->driver_priv;
3357 if (!obj->active) 2585 struct i915_vma *vma, *vn;
3358 return 0;
3359
3360 for (i = 0; i < I915_NUM_ENGINES; i++) {
3361 struct drm_i915_gem_request *req;
3362
3363 req = obj->last_read_req[i];
3364 if (req == NULL)
3365 continue;
3366
3367 if (i915_gem_request_completed(req))
3368 i915_gem_object_retire__read(obj, i);
3369 }
3370 2586
3371 return 0; 2587 mutex_lock(&obj->base.dev->struct_mutex);
2588 list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2589 if (vma->vm->file == fpriv)
2590 i915_vma_close(vma);
2591 mutex_unlock(&obj->base.dev->struct_mutex);
3372} 2592}
3373 2593
3374/** 2594/**
@@ -3399,122 +2619,58 @@ int
3399i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2619i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3400{ 2620{
3401 struct drm_i915_gem_wait *args = data; 2621 struct drm_i915_gem_wait *args = data;
2622 struct intel_rps_client *rps = to_rps_client(file);
3402 struct drm_i915_gem_object *obj; 2623 struct drm_i915_gem_object *obj;
3403 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 2624 unsigned long active;
3404 int i, n = 0; 2625 int idx, ret = 0;
3405 int ret;
3406 2626
3407 if (args->flags != 0) 2627 if (args->flags != 0)
3408 return -EINVAL; 2628 return -EINVAL;
3409 2629
3410 ret = i915_mutex_lock_interruptible(dev); 2630 obj = i915_gem_object_lookup(file, args->bo_handle);
3411 if (ret) 2631 if (!obj)
3412 return ret;
3413
3414 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle));
3415 if (&obj->base == NULL) {
3416 mutex_unlock(&dev->struct_mutex);
3417 return -ENOENT; 2632 return -ENOENT;
3418 }
3419
3420 /* Need to make sure the object gets inactive eventually. */
3421 ret = i915_gem_object_flush_active(obj);
3422 if (ret)
3423 goto out;
3424
3425 if (!obj->active)
3426 goto out;
3427
3428 /* Do this after OLR check to make sure we make forward progress polling
3429 * on this IOCTL with a timeout == 0 (like busy ioctl)
3430 */
3431 if (args->timeout_ns == 0) {
3432 ret = -ETIME;
3433 goto out;
3434 }
3435
3436 drm_gem_object_unreference(&obj->base);
3437
3438 for (i = 0; i < I915_NUM_ENGINES; i++) {
3439 if (obj->last_read_req[i] == NULL)
3440 continue;
3441 2633
3442 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 2634 active = __I915_BO_ACTIVE(obj);
2635 for_each_active(active, idx) {
2636 s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
2637 ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true,
2638 timeout, rps);
2639 if (ret)
2640 break;
3443 } 2641 }
3444 2642
3445 mutex_unlock(&dev->struct_mutex); 2643 i915_gem_object_put_unlocked(obj);
3446
3447 for (i = 0; i < n; i++) {
3448 if (ret == 0)
3449 ret = __i915_wait_request(req[i], true,
3450 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
3451 to_rps_client(file));
3452 i915_gem_request_unreference(req[i]);
3453 }
3454 return ret;
3455
3456out:
3457 drm_gem_object_unreference(&obj->base);
3458 mutex_unlock(&dev->struct_mutex);
3459 return ret; 2644 return ret;
3460} 2645}
3461 2646
3462static int 2647static int
3463__i915_gem_object_sync(struct drm_i915_gem_object *obj, 2648__i915_gem_object_sync(struct drm_i915_gem_request *to,
3464 struct intel_engine_cs *to, 2649 struct drm_i915_gem_request *from)
3465 struct drm_i915_gem_request *from_req,
3466 struct drm_i915_gem_request **to_req)
3467{ 2650{
3468 struct intel_engine_cs *from;
3469 int ret; 2651 int ret;
3470 2652
3471 from = i915_gem_request_get_engine(from_req); 2653 if (to->engine == from->engine)
3472 if (to == from)
3473 return 0;
3474
3475 if (i915_gem_request_completed(from_req))
3476 return 0; 2654 return 0;
3477 2655
3478 if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { 2656 if (!i915.semaphores) {
3479 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2657 ret = i915_wait_request(from,
3480 ret = __i915_wait_request(from_req, 2658 from->i915->mm.interruptible,
3481 i915->mm.interruptible, 2659 NULL,
3482 NULL, 2660 NO_WAITBOOST);
3483 &i915->rps.semaphores);
3484 if (ret) 2661 if (ret)
3485 return ret; 2662 return ret;
3486
3487 i915_gem_object_retire_request(obj, from_req);
3488 } else { 2663 } else {
3489 int idx = intel_ring_sync_index(from, to); 2664 int idx = intel_engine_sync_index(from->engine, to->engine);
3490 u32 seqno = i915_gem_request_get_seqno(from_req); 2665 if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
3491
3492 WARN_ON(!to_req);
3493
3494 if (seqno <= from->semaphore.sync_seqno[idx])
3495 return 0; 2666 return 0;
3496 2667
3497 if (*to_req == NULL) { 2668 trace_i915_gem_ring_sync_to(to, from);
3498 struct drm_i915_gem_request *req; 2669 ret = to->engine->semaphore.sync_to(to, from);
3499
3500 req = i915_gem_request_alloc(to, NULL);
3501 if (IS_ERR(req))
3502 return PTR_ERR(req);
3503
3504 *to_req = req;
3505 }
3506
3507 trace_i915_gem_ring_sync_to(*to_req, from, from_req);
3508 ret = to->semaphore.sync_to(*to_req, from, seqno);
3509 if (ret) 2670 if (ret)
3510 return ret; 2671 return ret;
3511 2672
3512 /* We use last_read_req because sync_to() 2673 from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
3513 * might have just caused seqno wrap under
3514 * the radar.
3515 */
3516 from->semaphore.sync_seqno[idx] =
3517 i915_gem_request_get_seqno(obj->last_read_req[from->id]);
3518 } 2674 }
3519 2675
3520 return 0; 2676 return 0;
@@ -3524,17 +2680,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
3524 * i915_gem_object_sync - sync an object to a ring. 2680 * i915_gem_object_sync - sync an object to a ring.
3525 * 2681 *
3526 * @obj: object which may be in use on another ring. 2682 * @obj: object which may be in use on another ring.
3527 * @to: ring we wish to use the object on. May be NULL. 2683 * @to: request we are wishing to use
3528 * @to_req: request we wish to use the object for. See below.
3529 * This will be allocated and returned if a request is
3530 * required but not passed in.
3531 * 2684 *
3532 * This code is meant to abstract object synchronization with the GPU. 2685 * This code is meant to abstract object synchronization with the GPU.
3533 * Calling with NULL implies synchronizing the object with the CPU 2686 * Conceptually we serialise writes between engines inside the GPU.
3534 * rather than a particular GPU ring. Conceptually we serialise writes 2687 * We only allow one engine to write into a buffer at any time, but
3535 * between engines inside the GPU. We only allow one engine to write 2688 * multiple readers. To ensure each has a coherent view of memory, we must:
3536 * into a buffer at any time, but multiple readers. To ensure each has
3537 * a coherent view of memory, we must:
3538 * 2689 *
3539 * - If there is an outstanding write request to the object, the new 2690 * - If there is an outstanding write request to the object, the new
3540 * request must wait for it to complete (either CPU or in hw, requests 2691 * request must wait for it to complete (either CPU or in hw, requests
@@ -3543,44 +2694,39 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
3543 * - If we are a write request (pending_write_domain is set), the new 2694 * - If we are a write request (pending_write_domain is set), the new
3544 * request must wait for outstanding read requests to complete. 2695 * request must wait for outstanding read requests to complete.
3545 * 2696 *
3546 * For CPU synchronisation (NULL to) no request is required. For syncing with
3547 * rings to_req must be non-NULL. However, a request does not have to be
3548 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
3549 * request will be allocated automatically and returned through *to_req. Note
3550 * that it is not guaranteed that commands will be emitted (because the system
3551 * might already be idle). Hence there is no need to create a request that
3552 * might never have any work submitted. Note further that if a request is
3553 * returned in *to_req, it is the responsibility of the caller to submit
3554 * that request (after potentially adding more work to it).
3555 *
3556 * Returns 0 if successful, else propagates up the lower layer error. 2697 * Returns 0 if successful, else propagates up the lower layer error.
3557 */ 2698 */
3558int 2699int
3559i915_gem_object_sync(struct drm_i915_gem_object *obj, 2700i915_gem_object_sync(struct drm_i915_gem_object *obj,
3560 struct intel_engine_cs *to, 2701 struct drm_i915_gem_request *to)
3561 struct drm_i915_gem_request **to_req)
3562{ 2702{
3563 const bool readonly = obj->base.pending_write_domain == 0; 2703 struct i915_gem_active *active;
3564 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 2704 unsigned long active_mask;
3565 int ret, i, n; 2705 int idx;
3566 2706
3567 if (!obj->active) 2707 lockdep_assert_held(&obj->base.dev->struct_mutex);
3568 return 0;
3569 2708
3570 if (to == NULL) 2709 active_mask = i915_gem_object_get_active(obj);
3571 return i915_gem_object_wait_rendering(obj, readonly); 2710 if (!active_mask)
2711 return 0;
3572 2712
3573 n = 0; 2713 if (obj->base.pending_write_domain) {
3574 if (readonly) { 2714 active = obj->last_read;
3575 if (obj->last_write_req)
3576 req[n++] = obj->last_write_req;
3577 } else { 2715 } else {
3578 for (i = 0; i < I915_NUM_ENGINES; i++) 2716 active_mask = 1;
3579 if (obj->last_read_req[i]) 2717 active = &obj->last_write;
3580 req[n++] = obj->last_read_req[i];
3581 } 2718 }
3582 for (i = 0; i < n; i++) { 2719
3583 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 2720 for_each_active(active_mask, idx) {
2721 struct drm_i915_gem_request *request;
2722 int ret;
2723
2724 request = i915_gem_active_peek(&active[idx],
2725 &obj->base.dev->struct_mutex);
2726 if (!request)
2727 continue;
2728
2729 ret = __i915_gem_object_sync(to, request);
3584 if (ret) 2730 if (ret)
3585 return ret; 2731 return ret;
3586 } 2732 }
@@ -3611,7 +2757,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3611 2757
3612static void __i915_vma_iounmap(struct i915_vma *vma) 2758static void __i915_vma_iounmap(struct i915_vma *vma)
3613{ 2759{
3614 GEM_BUG_ON(vma->pin_count); 2760 GEM_BUG_ON(i915_vma_is_pinned(vma));
3615 2761
3616 if (vma->iomap == NULL) 2762 if (vma->iomap == NULL)
3617 return; 2763 return;
@@ -3620,32 +2766,51 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
3620 vma->iomap = NULL; 2766 vma->iomap = NULL;
3621} 2767}
3622 2768
3623static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 2769int i915_vma_unbind(struct i915_vma *vma)
3624{ 2770{
3625 struct drm_i915_gem_object *obj = vma->obj; 2771 struct drm_i915_gem_object *obj = vma->obj;
3626 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2772 unsigned long active;
3627 int ret; 2773 int ret;
3628 2774
3629 if (list_empty(&vma->obj_link)) 2775 /* First wait upon any activity as retiring the request may
3630 return 0; 2776 * have side-effects such as unpinning or even unbinding this vma.
2777 */
2778 active = i915_vma_get_active(vma);
2779 if (active) {
2780 int idx;
2781
2782 /* When a closed VMA is retired, it is unbound - eek.
2783 * In order to prevent it from being recursively closed,
2784 * take a pin on the vma so that the second unbind is
2785 * aborted.
2786 */
2787 __i915_vma_pin(vma);
3631 2788
3632 if (!drm_mm_node_allocated(&vma->node)) { 2789 for_each_active(active, idx) {
3633 i915_gem_vma_destroy(vma); 2790 ret = i915_gem_active_retire(&vma->last_read[idx],
3634 return 0; 2791 &vma->vm->dev->struct_mutex);
2792 if (ret)
2793 break;
2794 }
2795
2796 __i915_vma_unpin(vma);
2797 if (ret)
2798 return ret;
2799
2800 GEM_BUG_ON(i915_vma_is_active(vma));
3635 } 2801 }
3636 2802
3637 if (vma->pin_count) 2803 if (i915_vma_is_pinned(vma))
3638 return -EBUSY; 2804 return -EBUSY;
3639 2805
3640 BUG_ON(obj->pages == NULL); 2806 if (!drm_mm_node_allocated(&vma->node))
2807 goto destroy;
3641 2808
3642 if (wait) { 2809 GEM_BUG_ON(obj->bind_count == 0);
3643 ret = i915_gem_object_wait_rendering(obj, false); 2810 GEM_BUG_ON(!obj->pages);
3644 if (ret)
3645 return ret;
3646 }
3647 2811
3648 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 2812 if (i915_vma_is_ggtt(vma) &&
2813 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3649 i915_gem_object_finish_gtt(obj); 2814 i915_gem_object_finish_gtt(obj);
3650 2815
3651 /* release the fence reg _after_ flushing */ 2816 /* release the fence reg _after_ flushing */
@@ -3656,13 +2821,16 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
3656 __i915_vma_iounmap(vma); 2821 __i915_vma_iounmap(vma);
3657 } 2822 }
3658 2823
3659 trace_i915_vma_unbind(vma); 2824 if (likely(!vma->vm->closed)) {
2825 trace_i915_vma_unbind(vma);
2826 vma->vm->unbind_vma(vma);
2827 }
2828 vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
3660 2829
3661 vma->vm->unbind_vma(vma); 2830 drm_mm_remove_node(&vma->node);
3662 vma->bound = 0; 2831 list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
3663 2832
3664 list_del_init(&vma->vm_link); 2833 if (i915_vma_is_ggtt(vma)) {
3665 if (vma->is_ggtt) {
3666 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 2834 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3667 obj->map_and_fenceable = false; 2835 obj->map_and_fenceable = false;
3668 } else if (vma->ggtt_view.pages) { 2836 } else if (vma->ggtt_view.pages) {
@@ -3672,13 +2840,11 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
3672 vma->ggtt_view.pages = NULL; 2840 vma->ggtt_view.pages = NULL;
3673 } 2841 }
3674 2842
3675 drm_mm_remove_node(&vma->node);
3676 i915_gem_vma_destroy(vma);
3677
3678 /* Since the unbound list is global, only move to that list if 2843 /* Since the unbound list is global, only move to that list if
3679 * no more VMAs exist. */ 2844 * no more VMAs exist. */
3680 if (list_empty(&obj->vma_list)) 2845 if (--obj->bind_count == 0)
3681 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2846 list_move_tail(&obj->global_list,
2847 &to_i915(obj->base.dev)->mm.unbound_list);
3682 2848
3683 /* And finally now the object is completely decoupled from this vma, 2849 /* And finally now the object is completely decoupled from this vma,
3684 * we can drop its hold on the backing storage and allow it to be 2850 * we can drop its hold on the backing storage and allow it to be
@@ -3686,36 +2852,28 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
3686 */ 2852 */
3687 i915_gem_object_unpin_pages(obj); 2853 i915_gem_object_unpin_pages(obj);
3688 2854
3689 return 0; 2855destroy:
3690} 2856 if (unlikely(i915_vma_is_closed(vma)))
3691 2857 i915_vma_destroy(vma);
3692int i915_vma_unbind(struct i915_vma *vma)
3693{
3694 return __i915_vma_unbind(vma, true);
3695}
3696 2858
3697int __i915_vma_unbind_no_wait(struct i915_vma *vma) 2859 return 0;
3698{
3699 return __i915_vma_unbind(vma, false);
3700} 2860}
3701 2861
3702int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) 2862int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
2863 bool interruptible)
3703{ 2864{
3704 struct intel_engine_cs *engine; 2865 struct intel_engine_cs *engine;
3705 int ret; 2866 int ret;
3706 2867
3707 lockdep_assert_held(&dev_priv->drm.struct_mutex);
3708
3709 for_each_engine(engine, dev_priv) { 2868 for_each_engine(engine, dev_priv) {
3710 if (engine->last_context == NULL) 2869 if (engine->last_context == NULL)
3711 continue; 2870 continue;
3712 2871
3713 ret = intel_engine_idle(engine); 2872 ret = intel_engine_idle(engine, interruptible);
3714 if (ret) 2873 if (ret)
3715 return ret; 2874 return ret;
3716 } 2875 }
3717 2876
3718 WARN_ON(i915_verify_lists(dev));
3719 return 0; 2877 return 0;
3720} 2878}
3721 2879
@@ -3753,128 +2911,95 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3753} 2911}
3754 2912
3755/** 2913/**
3756 * Finds free space in the GTT aperture and binds the object or a view of it 2914 * i915_vma_insert - finds a slot for the vma in its address space
3757 * there. 2915 * @vma: the vma
3758 * @obj: object to bind 2916 * @size: requested size in bytes (can be larger than the VMA)
3759 * @vm: address space to bind into 2917 * @alignment: required alignment
3760 * @ggtt_view: global gtt view if applicable
3761 * @alignment: requested alignment
3762 * @flags: mask of PIN_* flags to use 2918 * @flags: mask of PIN_* flags to use
2919 *
2920 * First we try to allocate some free space that meets the requirements for
2921 * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
2922 * preferrably the oldest idle entry to make room for the new VMA.
2923 *
2924 * Returns:
2925 * 0 on success, negative error code otherwise.
3763 */ 2926 */
3764static struct i915_vma * 2927static int
3765i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 2928i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3766 struct i915_address_space *vm,
3767 const struct i915_ggtt_view *ggtt_view,
3768 unsigned alignment,
3769 uint64_t flags)
3770{ 2929{
3771 struct drm_device *dev = obj->base.dev; 2930 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
3772 struct drm_i915_private *dev_priv = to_i915(dev); 2931 struct drm_i915_gem_object *obj = vma->obj;
3773 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3774 u32 fence_alignment, unfenced_alignment;
3775 u32 search_flag, alloc_flag;
3776 u64 start, end; 2932 u64 start, end;
3777 u64 size, fence_size; 2933 u64 min_alignment;
3778 struct i915_vma *vma;
3779 int ret; 2934 int ret;
3780 2935
3781 if (i915_is_ggtt(vm)) { 2936 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
3782 u32 view_size; 2937 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
3783 2938
3784 if (WARN_ON(!ggtt_view)) 2939 size = max(size, vma->size);
3785 return ERR_PTR(-EINVAL); 2940 if (flags & PIN_MAPPABLE)
3786 2941 size = i915_gem_get_ggtt_size(dev_priv, size,
3787 view_size = i915_ggtt_view_size(obj, ggtt_view); 2942 i915_gem_object_get_tiling(obj));
3788 2943
3789 fence_size = i915_gem_get_gtt_size(dev, 2944 min_alignment =
3790 view_size, 2945 i915_gem_get_ggtt_alignment(dev_priv, size,
3791 obj->tiling_mode); 2946 i915_gem_object_get_tiling(obj),
3792 fence_alignment = i915_gem_get_gtt_alignment(dev, 2947 flags & PIN_MAPPABLE);
3793 view_size, 2948 if (alignment == 0)
3794 obj->tiling_mode, 2949 alignment = min_alignment;
3795 true); 2950 if (alignment & (min_alignment - 1)) {
3796 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 2951 DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
3797 view_size, 2952 alignment, min_alignment);
3798 obj->tiling_mode, 2953 return -EINVAL;
3799 false);
3800 size = flags & PIN_MAPPABLE ? fence_size : view_size;
3801 } else {
3802 fence_size = i915_gem_get_gtt_size(dev,
3803 obj->base.size,
3804 obj->tiling_mode);
3805 fence_alignment = i915_gem_get_gtt_alignment(dev,
3806 obj->base.size,
3807 obj->tiling_mode,
3808 true);
3809 unfenced_alignment =
3810 i915_gem_get_gtt_alignment(dev,
3811 obj->base.size,
3812 obj->tiling_mode,
3813 false);
3814 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3815 } 2954 }
3816 2955
3817 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 2956 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3818 end = vm->total; 2957
2958 end = vma->vm->total;
3819 if (flags & PIN_MAPPABLE) 2959 if (flags & PIN_MAPPABLE)
3820 end = min_t(u64, end, ggtt->mappable_end); 2960 end = min_t(u64, end, dev_priv->ggtt.mappable_end);
3821 if (flags & PIN_ZONE_4G) 2961 if (flags & PIN_ZONE_4G)
3822 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 2962 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3823 2963
3824 if (alignment == 0)
3825 alignment = flags & PIN_MAPPABLE ? fence_alignment :
3826 unfenced_alignment;
3827 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3828 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3829 ggtt_view ? ggtt_view->type : 0,
3830 alignment);
3831 return ERR_PTR(-EINVAL);
3832 }
3833
3834 /* If binding the object/GGTT view requires more space than the entire 2964 /* If binding the object/GGTT view requires more space than the entire
3835 * aperture has, reject it early before evicting everything in a vain 2965 * aperture has, reject it early before evicting everything in a vain
3836 * attempt to find space. 2966 * attempt to find space.
3837 */ 2967 */
3838 if (size > end) { 2968 if (size > end) {
3839 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 2969 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
3840 ggtt_view ? ggtt_view->type : 0, 2970 size, obj->base.size,
3841 size,
3842 flags & PIN_MAPPABLE ? "mappable" : "total", 2971 flags & PIN_MAPPABLE ? "mappable" : "total",
3843 end); 2972 end);
3844 return ERR_PTR(-E2BIG); 2973 return -E2BIG;
3845 } 2974 }
3846 2975
3847 ret = i915_gem_object_get_pages(obj); 2976 ret = i915_gem_object_get_pages(obj);
3848 if (ret) 2977 if (ret)
3849 return ERR_PTR(ret); 2978 return ret;
3850 2979
3851 i915_gem_object_pin_pages(obj); 2980 i915_gem_object_pin_pages(obj);
3852 2981
3853 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3854 i915_gem_obj_lookup_or_create_vma(obj, vm);
3855
3856 if (IS_ERR(vma))
3857 goto err_unpin;
3858
3859 if (flags & PIN_OFFSET_FIXED) { 2982 if (flags & PIN_OFFSET_FIXED) {
3860 uint64_t offset = flags & PIN_OFFSET_MASK; 2983 u64 offset = flags & PIN_OFFSET_MASK;
3861 2984 if (offset & (alignment - 1) || offset > end - size) {
3862 if (offset & (alignment - 1) || offset + size > end) {
3863 ret = -EINVAL; 2985 ret = -EINVAL;
3864 goto err_free_vma; 2986 goto err_unpin;
3865 } 2987 }
2988
3866 vma->node.start = offset; 2989 vma->node.start = offset;
3867 vma->node.size = size; 2990 vma->node.size = size;
3868 vma->node.color = obj->cache_level; 2991 vma->node.color = obj->cache_level;
3869 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 2992 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3870 if (ret) { 2993 if (ret) {
3871 ret = i915_gem_evict_for_vma(vma); 2994 ret = i915_gem_evict_for_vma(vma);
3872 if (ret == 0) 2995 if (ret == 0)
3873 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 2996 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
2997 if (ret)
2998 goto err_unpin;
3874 } 2999 }
3875 if (ret)
3876 goto err_free_vma;
3877 } else { 3000 } else {
3001 u32 search_flag, alloc_flag;
3002
3878 if (flags & PIN_HIGH) { 3003 if (flags & PIN_HIGH) {
3879 search_flag = DRM_MM_SEARCH_BELOW; 3004 search_flag = DRM_MM_SEARCH_BELOW;
3880 alloc_flag = DRM_MM_CREATE_TOP; 3005 alloc_flag = DRM_MM_CREATE_TOP;
@@ -3883,47 +3008,45 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3883 alloc_flag = DRM_MM_CREATE_DEFAULT; 3008 alloc_flag = DRM_MM_CREATE_DEFAULT;
3884 } 3009 }
3885 3010
3011 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3012 * so we know that we always have a minimum alignment of 4096.
3013 * The drm_mm range manager is optimised to return results
3014 * with zero alignment, so where possible use the optimal
3015 * path.
3016 */
3017 if (alignment <= 4096)
3018 alignment = 0;
3019
3886search_free: 3020search_free:
3887 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3021 ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3022 &vma->node,
3888 size, alignment, 3023 size, alignment,
3889 obj->cache_level, 3024 obj->cache_level,
3890 start, end, 3025 start, end,
3891 search_flag, 3026 search_flag,
3892 alloc_flag); 3027 alloc_flag);
3893 if (ret) { 3028 if (ret) {
3894 ret = i915_gem_evict_something(dev, vm, size, alignment, 3029 ret = i915_gem_evict_something(vma->vm, size, alignment,
3895 obj->cache_level, 3030 obj->cache_level,
3896 start, end, 3031 start, end,
3897 flags); 3032 flags);
3898 if (ret == 0) 3033 if (ret == 0)
3899 goto search_free; 3034 goto search_free;
3900 3035
3901 goto err_free_vma; 3036 goto err_unpin;
3902 } 3037 }
3903 } 3038 }
3904 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3039 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
3905 ret = -EINVAL;
3906 goto err_remove_node;
3907 }
3908
3909 trace_i915_vma_bind(vma, flags);
3910 ret = i915_vma_bind(vma, obj->cache_level, flags);
3911 if (ret)
3912 goto err_remove_node;
3913 3040
3914 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3041 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3915 list_add_tail(&vma->vm_link, &vm->inactive_list); 3042 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3043 obj->bind_count++;
3916 3044
3917 return vma; 3045 return 0;
3918 3046
3919err_remove_node:
3920 drm_mm_remove_node(&vma->node);
3921err_free_vma:
3922 i915_gem_vma_destroy(vma);
3923 vma = ERR_PTR(ret);
3924err_unpin: 3047err_unpin:
3925 i915_gem_object_unpin_pages(obj); 3048 i915_gem_object_unpin_pages(obj);
3926 return vma; 3049 return ret;
3927} 3050}
3928 3051
3929bool 3052bool
@@ -4026,20 +3149,17 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
4026int 3149int
4027i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3150i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4028{ 3151{
4029 struct drm_device *dev = obj->base.dev;
4030 struct drm_i915_private *dev_priv = to_i915(dev);
4031 struct i915_ggtt *ggtt = &dev_priv->ggtt;
4032 uint32_t old_write_domain, old_read_domains; 3152 uint32_t old_write_domain, old_read_domains;
4033 struct i915_vma *vma; 3153 struct i915_vma *vma;
4034 int ret; 3154 int ret;
4035 3155
4036 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4037 return 0;
4038
4039 ret = i915_gem_object_wait_rendering(obj, !write); 3156 ret = i915_gem_object_wait_rendering(obj, !write);
4040 if (ret) 3157 if (ret)
4041 return ret; 3158 return ret;
4042 3159
3160 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3161 return 0;
3162
4043 /* Flush and acquire obj->pages so that we are coherent through 3163 /* Flush and acquire obj->pages so that we are coherent through
4044 * direct access in memory with previous cached writes through 3164 * direct access in memory with previous cached writes through
4045 * shmemfs and that our cache domain tracking remains valid. 3165 * shmemfs and that our cache domain tracking remains valid.
@@ -4081,9 +3201,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4081 3201
4082 /* And bump the LRU for this access */ 3202 /* And bump the LRU for this access */
4083 vma = i915_gem_obj_to_ggtt(obj); 3203 vma = i915_gem_obj_to_ggtt(obj);
4084 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3204 if (vma &&
4085 list_move_tail(&vma->vm_link, 3205 drm_mm_node_allocated(&vma->node) &&
4086 &ggtt->base.inactive_list); 3206 !i915_vma_is_active(vma))
3207 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
4087 3208
4088 return 0; 3209 return 0;
4089} 3210}
@@ -4106,9 +3227,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4106int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3227int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4107 enum i915_cache_level cache_level) 3228 enum i915_cache_level cache_level)
4108{ 3229{
4109 struct drm_device *dev = obj->base.dev; 3230 struct i915_vma *vma;
4110 struct i915_vma *vma, *next;
4111 bool bound = false;
4112 int ret = 0; 3231 int ret = 0;
4113 3232
4114 if (obj->cache_level == cache_level) 3233 if (obj->cache_level == cache_level)
@@ -4119,21 +3238,28 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4119 * catch the issue of the CS prefetch crossing page boundaries and 3238 * catch the issue of the CS prefetch crossing page boundaries and
4120 * reading an invalid PTE on older architectures. 3239 * reading an invalid PTE on older architectures.
4121 */ 3240 */
4122 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 3241restart:
3242 list_for_each_entry(vma, &obj->vma_list, obj_link) {
4123 if (!drm_mm_node_allocated(&vma->node)) 3243 if (!drm_mm_node_allocated(&vma->node))
4124 continue; 3244 continue;
4125 3245
4126 if (vma->pin_count) { 3246 if (i915_vma_is_pinned(vma)) {
4127 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3247 DRM_DEBUG("can not change the cache level of pinned objects\n");
4128 return -EBUSY; 3248 return -EBUSY;
4129 } 3249 }
4130 3250
4131 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3251 if (i915_gem_valid_gtt_space(vma, cache_level))
4132 ret = i915_vma_unbind(vma); 3252 continue;
4133 if (ret) 3253
4134 return ret; 3254 ret = i915_vma_unbind(vma);
4135 } else 3255 if (ret)
4136 bound = true; 3256 return ret;
3257
3258 /* As unbinding may affect other elements in the
3259 * obj->vma_list (due to side-effects from retiring
3260 * an active vma), play safe and restart the iterator.
3261 */
3262 goto restart;
4137 } 3263 }
4138 3264
4139 /* We can reuse the existing drm_mm nodes but need to change the 3265 /* We can reuse the existing drm_mm nodes but need to change the
@@ -4143,7 +3269,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4143 * rewrite the PTE in the belief that doing so tramples upon less 3269 * rewrite the PTE in the belief that doing so tramples upon less
4144 * state and so involves less work. 3270 * state and so involves less work.
4145 */ 3271 */
4146 if (bound) { 3272 if (obj->bind_count) {
4147 /* Before we change the PTE, the GPU must not be accessing it. 3273 /* Before we change the PTE, the GPU must not be accessing it.
4148 * If we wait upon the object, we know that all the bound 3274 * If we wait upon the object, we know that all the bound
4149 * VMA are no longer active. 3275 * VMA are no longer active.
@@ -4152,7 +3278,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4152 if (ret) 3278 if (ret)
4153 return ret; 3279 return ret;
4154 3280
4155 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3281 if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
4156 /* Access to snoopable pages through the GTT is 3282 /* Access to snoopable pages through the GTT is
4157 * incoherent and on some machines causes a hard 3283 * incoherent and on some machines causes a hard
4158 * lockup. Relinquish the CPU mmaping to force 3284 * lockup. Relinquish the CPU mmaping to force
@@ -4215,8 +3341,8 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4215 struct drm_i915_gem_caching *args = data; 3341 struct drm_i915_gem_caching *args = data;
4216 struct drm_i915_gem_object *obj; 3342 struct drm_i915_gem_object *obj;
4217 3343
4218 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 3344 obj = i915_gem_object_lookup(file, args->handle);
4219 if (&obj->base == NULL) 3345 if (!obj)
4220 return -ENOENT; 3346 return -ENOENT;
4221 3347
4222 switch (obj->cache_level) { 3348 switch (obj->cache_level) {
@@ -4234,7 +3360,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4234 break; 3360 break;
4235 } 3361 }
4236 3362
4237 drm_gem_object_unreference_unlocked(&obj->base); 3363 i915_gem_object_put_unlocked(obj);
4238 return 0; 3364 return 0;
4239} 3365}
4240 3366
@@ -4276,15 +3402,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4276 if (ret) 3402 if (ret)
4277 goto rpm_put; 3403 goto rpm_put;
4278 3404
4279 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 3405 obj = i915_gem_object_lookup(file, args->handle);
4280 if (&obj->base == NULL) { 3406 if (!obj) {
4281 ret = -ENOENT; 3407 ret = -ENOENT;
4282 goto unlock; 3408 goto unlock;
4283 } 3409 }
4284 3410
4285 ret = i915_gem_object_set_cache_level(obj, level); 3411 ret = i915_gem_object_set_cache_level(obj, level);
4286 3412
4287 drm_gem_object_unreference(&obj->base); 3413 i915_gem_object_put(obj);
4288unlock: 3414unlock:
4289 mutex_unlock(&dev->struct_mutex); 3415 mutex_unlock(&dev->struct_mutex);
4290rpm_put: 3416rpm_put:
@@ -4329,7 +3455,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4329 * (e.g. libkms for the bootup splash), we have to ensure that we 3455 * (e.g. libkms for the bootup splash), we have to ensure that we
4330 * always use map_and_fenceable for all scanout buffers. 3456 * always use map_and_fenceable for all scanout buffers.
4331 */ 3457 */
4332 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 3458 ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
4333 view->type == I915_GGTT_VIEW_NORMAL ? 3459 view->type == I915_GGTT_VIEW_NORMAL ?
4334 PIN_MAPPABLE : 0); 3460 PIN_MAPPABLE : 0);
4335 if (ret) 3461 if (ret)
@@ -4383,13 +3509,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4383 uint32_t old_write_domain, old_read_domains; 3509 uint32_t old_write_domain, old_read_domains;
4384 int ret; 3510 int ret;
4385 3511
4386 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4387 return 0;
4388
4389 ret = i915_gem_object_wait_rendering(obj, !write); 3512 ret = i915_gem_object_wait_rendering(obj, !write);
4390 if (ret) 3513 if (ret)
4391 return ret; 3514 return ret;
4392 3515
3516 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3517 return 0;
3518
4393 i915_gem_object_flush_gtt_write_domain(obj); 3519 i915_gem_object_flush_gtt_write_domain(obj);
4394 3520
4395 old_write_domain = obj->base.write_domain; 3521 old_write_domain = obj->base.write_domain;
@@ -4464,25 +3590,30 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4464 target = request; 3590 target = request;
4465 } 3591 }
4466 if (target) 3592 if (target)
4467 i915_gem_request_reference(target); 3593 i915_gem_request_get(target);
4468 spin_unlock(&file_priv->mm.lock); 3594 spin_unlock(&file_priv->mm.lock);
4469 3595
4470 if (target == NULL) 3596 if (target == NULL)
4471 return 0; 3597 return 0;
4472 3598
4473 ret = __i915_wait_request(target, true, NULL, NULL); 3599 ret = i915_wait_request(target, true, NULL, NULL);
4474 i915_gem_request_unreference(target); 3600 i915_gem_request_put(target);
4475 3601
4476 return ret; 3602 return ret;
4477} 3603}
4478 3604
4479static bool 3605static bool
4480i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 3606i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
4481{ 3607{
4482 struct drm_i915_gem_object *obj = vma->obj; 3608 struct drm_i915_gem_object *obj = vma->obj;
4483 3609
4484 if (alignment && 3610 if (!drm_mm_node_allocated(&vma->node))
4485 vma->node.start & (alignment - 1)) 3611 return false;
3612
3613 if (vma->node.size < size)
3614 return true;
3615
3616 if (alignment && vma->node.start & (alignment - 1))
4486 return true; 3617 return true;
4487 3618
4488 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 3619 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
@@ -4502,135 +3633,159 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4502void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 3633void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
4503{ 3634{
4504 struct drm_i915_gem_object *obj = vma->obj; 3635 struct drm_i915_gem_object *obj = vma->obj;
3636 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4505 bool mappable, fenceable; 3637 bool mappable, fenceable;
4506 u32 fence_size, fence_alignment; 3638 u32 fence_size, fence_alignment;
4507 3639
4508 fence_size = i915_gem_get_gtt_size(obj->base.dev, 3640 fence_size = i915_gem_get_ggtt_size(dev_priv,
4509 obj->base.size, 3641 obj->base.size,
4510 obj->tiling_mode); 3642 i915_gem_object_get_tiling(obj));
4511 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 3643 fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
4512 obj->base.size, 3644 obj->base.size,
4513 obj->tiling_mode, 3645 i915_gem_object_get_tiling(obj),
4514 true); 3646 true);
4515 3647
4516 fenceable = (vma->node.size == fence_size && 3648 fenceable = (vma->node.size == fence_size &&
4517 (vma->node.start & (fence_alignment - 1)) == 0); 3649 (vma->node.start & (fence_alignment - 1)) == 0);
4518 3650
4519 mappable = (vma->node.start + fence_size <= 3651 mappable = (vma->node.start + fence_size <=
4520 to_i915(obj->base.dev)->ggtt.mappable_end); 3652 dev_priv->ggtt.mappable_end);
4521 3653
4522 obj->map_and_fenceable = mappable && fenceable; 3654 obj->map_and_fenceable = mappable && fenceable;
4523} 3655}
4524 3656
4525static int 3657int __i915_vma_do_pin(struct i915_vma *vma,
4526i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 3658 u64 size, u64 alignment, u64 flags)
4527 struct i915_address_space *vm,
4528 const struct i915_ggtt_view *ggtt_view,
4529 uint32_t alignment,
4530 uint64_t flags)
4531{ 3659{
4532 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3660 unsigned int bound = vma->flags;
4533 struct i915_vma *vma;
4534 unsigned bound;
4535 int ret; 3661 int ret;
4536 3662
4537 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 3663 GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
4538 return -ENODEV; 3664 GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
4539 3665
4540 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 3666 if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
4541 return -EINVAL; 3667 ret = -EBUSY;
4542 3668 goto err;
4543 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
4544 return -EINVAL;
4545
4546 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
4547 return -EINVAL;
4548
4549 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
4550 i915_gem_obj_to_vma(obj, vm);
4551
4552 if (vma) {
4553 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4554 return -EBUSY;
4555
4556 if (i915_vma_misplaced(vma, alignment, flags)) {
4557 WARN(vma->pin_count,
4558 "bo is already pinned in %s with incorrect alignment:"
4559 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
4560 " obj->map_and_fenceable=%d\n",
4561 ggtt_view ? "ggtt" : "ppgtt",
4562 upper_32_bits(vma->node.start),
4563 lower_32_bits(vma->node.start),
4564 alignment,
4565 !!(flags & PIN_MAPPABLE),
4566 obj->map_and_fenceable);
4567 ret = i915_vma_unbind(vma);
4568 if (ret)
4569 return ret;
4570
4571 vma = NULL;
4572 }
4573 } 3669 }
4574 3670
4575 bound = vma ? vma->bound : 0; 3671 if ((bound & I915_VMA_BIND_MASK) == 0) {
4576 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 3672 ret = i915_vma_insert(vma, size, alignment, flags);
4577 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
4578 flags);
4579 if (IS_ERR(vma))
4580 return PTR_ERR(vma);
4581 } else {
4582 ret = i915_vma_bind(vma, obj->cache_level, flags);
4583 if (ret) 3673 if (ret)
4584 return ret; 3674 goto err;
4585 } 3675 }
4586 3676
4587 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 3677 ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
4588 (bound ^ vma->bound) & GLOBAL_BIND) { 3678 if (ret)
3679 goto err;
3680
3681 if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
4589 __i915_vma_set_map_and_fenceable(vma); 3682 __i915_vma_set_map_and_fenceable(vma);
4590 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4591 }
4592 3683
4593 vma->pin_count++; 3684 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
4594 return 0; 3685 return 0;
4595}
4596 3686
4597int 3687err:
4598i915_gem_object_pin(struct drm_i915_gem_object *obj, 3688 __i915_vma_unpin(vma);
4599 struct i915_address_space *vm, 3689 return ret;
4600 uint32_t alignment,
4601 uint64_t flags)
4602{
4603 return i915_gem_object_do_pin(obj, vm,
4604 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
4605 alignment, flags);
4606} 3690}
4607 3691
4608int 3692int
4609i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3693i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4610 const struct i915_ggtt_view *view, 3694 const struct i915_ggtt_view *view,
4611 uint32_t alignment, 3695 u64 size,
4612 uint64_t flags) 3696 u64 alignment,
3697 u64 flags)
4613{ 3698{
4614 struct drm_device *dev = obj->base.dev; 3699 struct i915_vma *vma;
4615 struct drm_i915_private *dev_priv = to_i915(dev); 3700 int ret;
4616 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3701
3702 if (!view)
3703 view = &i915_ggtt_view_normal;
4617 3704
4618 BUG_ON(!view); 3705 vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
3706 if (IS_ERR(vma))
3707 return PTR_ERR(vma);
3708
3709 if (i915_vma_misplaced(vma, size, alignment, flags)) {
3710 if (flags & PIN_NONBLOCK &&
3711 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3712 return -ENOSPC;
3713
3714 WARN(i915_vma_is_pinned(vma),
3715 "bo is already pinned in ggtt with incorrect alignment:"
3716 " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
3717 " obj->map_and_fenceable=%d\n",
3718 upper_32_bits(vma->node.start),
3719 lower_32_bits(vma->node.start),
3720 alignment,
3721 !!(flags & PIN_MAPPABLE),
3722 obj->map_and_fenceable);
3723 ret = i915_vma_unbind(vma);
3724 if (ret)
3725 return ret;
3726 }
4619 3727
4620 return i915_gem_object_do_pin(obj, &ggtt->base, view, 3728 return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
4621 alignment, flags | PIN_GLOBAL);
4622} 3729}
4623 3730
4624void 3731void
4625i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 3732i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
4626 const struct i915_ggtt_view *view) 3733 const struct i915_ggtt_view *view)
4627{ 3734{
4628 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 3735 i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
3736}
4629 3737
4630 WARN_ON(vma->pin_count == 0); 3738static __always_inline unsigned __busy_read_flag(unsigned int id)
4631 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 3739{
3740 /* Note that we could alias engines in the execbuf API, but
3741 * that would be very unwise as it prevents userspace from
3742 * fine control over engine selection. Ahem.
3743 *
3744 * This should be something like EXEC_MAX_ENGINE instead of
3745 * I915_NUM_ENGINES.
3746 */
3747 BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3748 return 0x10000 << id;
3749}
4632 3750
4633 --vma->pin_count; 3751static __always_inline unsigned int __busy_write_id(unsigned int id)
3752{
3753 return id;
3754}
3755
3756static __always_inline unsigned
3757__busy_set_if_active(const struct i915_gem_active *active,
3758 unsigned int (*flag)(unsigned int id))
3759{
3760 /* For more discussion about the barriers and locking concerns,
3761 * see __i915_gem_active_get_rcu().
3762 */
3763 do {
3764 struct drm_i915_gem_request *request;
3765 unsigned int id;
3766
3767 request = rcu_dereference(active->request);
3768 if (!request || i915_gem_request_completed(request))
3769 return 0;
3770
3771 id = request->engine->exec_id;
3772
3773 /* Check that the pointer wasn't reassigned and overwritten. */
3774 if (request == rcu_access_pointer(active->request))
3775 return flag(id);
3776 } while (1);
3777}
3778
3779static inline unsigned
3780busy_check_reader(const struct i915_gem_active *active)
3781{
3782 return __busy_set_if_active(active, __busy_read_flag);
3783}
3784
3785static inline unsigned
3786busy_check_writer(const struct i915_gem_active *active)
3787{
3788 return __busy_set_if_active(active, __busy_write_id);
4634} 3789}
4635 3790
4636int 3791int
@@ -4639,47 +3794,61 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4639{ 3794{
4640 struct drm_i915_gem_busy *args = data; 3795 struct drm_i915_gem_busy *args = data;
4641 struct drm_i915_gem_object *obj; 3796 struct drm_i915_gem_object *obj;
4642 int ret; 3797 unsigned long active;
4643 3798
4644 ret = i915_mutex_lock_interruptible(dev); 3799 obj = i915_gem_object_lookup(file, args->handle);
4645 if (ret) 3800 if (!obj)
4646 return ret; 3801 return -ENOENT;
4647 3802
4648 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 3803 args->busy = 0;
4649 if (&obj->base == NULL) { 3804 active = __I915_BO_ACTIVE(obj);
4650 ret = -ENOENT; 3805 if (active) {
4651 goto unlock; 3806 int idx;
4652 }
4653 3807
4654 /* Count all active objects as busy, even if they are currently not used 3808 /* Yes, the lookups are intentionally racy.
4655 * by the gpu. Users of this interface expect objects to eventually 3809 *
4656 * become non-busy without any further actions, therefore emit any 3810 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
4657 * necessary flushes here. 3811 * to regard the value as stale and as our ABI guarantees
4658 */ 3812 * forward progress, we confirm the status of each active
4659 ret = i915_gem_object_flush_active(obj); 3813 * request with the hardware.
4660 if (ret) 3814 *
4661 goto unref; 3815 * Even though we guard the pointer lookup by RCU, that only
3816 * guarantees that the pointer and its contents remain
3817 * dereferencable and does *not* mean that the request we
3818 * have is the same as the one being tracked by the object.
3819 *
3820 * Consider that we lookup the request just as it is being
3821 * retired and freed. We take a local copy of the pointer,
3822 * but before we add its engine into the busy set, the other
3823 * thread reallocates it and assigns it to a task on another
3824 * engine with a fresh and incomplete seqno.
3825 *
3826 * So after we lookup the engine's id, we double check that
3827 * the active request is the same and only then do we add it
3828 * into the busy set.
3829 */
3830 rcu_read_lock();
4662 3831
4663 args->busy = 0; 3832 for_each_active(active, idx)
4664 if (obj->active) { 3833 args->busy |= busy_check_reader(&obj->last_read[idx]);
4665 int i;
4666 3834
4667 for (i = 0; i < I915_NUM_ENGINES; i++) { 3835 /* For ABI sanity, we only care that the write engine is in
4668 struct drm_i915_gem_request *req; 3836 * the set of read engines. This is ensured by the ordering
3837 * of setting last_read/last_write in i915_vma_move_to_active,
3838 * and then in reverse in retire.
3839 *
3840 * We don't care that the set of active read/write engines
3841 * may change during construction of the result, as it is
3842 * equally liable to change before userspace can inspect
3843 * the result.
3844 */
3845 args->busy |= busy_check_writer(&obj->last_write);
4669 3846
4670 req = obj->last_read_req[i]; 3847 rcu_read_unlock();
4671 if (req)
4672 args->busy |= 1 << (16 + req->engine->exec_id);
4673 }
4674 if (obj->last_write_req)
4675 args->busy |= obj->last_write_req->engine->exec_id;
4676 } 3848 }
4677 3849
4678unref: 3850 i915_gem_object_put_unlocked(obj);
4679 drm_gem_object_unreference(&obj->base); 3851 return 0;
4680unlock:
4681 mutex_unlock(&dev->struct_mutex);
4682 return ret;
4683} 3852}
4684 3853
4685int 3854int
@@ -4710,19 +3879,14 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4710 if (ret) 3879 if (ret)
4711 return ret; 3880 return ret;
4712 3881
4713 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 3882 obj = i915_gem_object_lookup(file_priv, args->handle);
4714 if (&obj->base == NULL) { 3883 if (!obj) {
4715 ret = -ENOENT; 3884 ret = -ENOENT;
4716 goto unlock; 3885 goto unlock;
4717 } 3886 }
4718 3887
4719 if (i915_gem_obj_is_pinned(obj)) {
4720 ret = -EINVAL;
4721 goto out;
4722 }
4723
4724 if (obj->pages && 3888 if (obj->pages &&
4725 obj->tiling_mode != I915_TILING_NONE && 3889 i915_gem_object_is_tiled(obj) &&
4726 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 3890 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4727 if (obj->madv == I915_MADV_WILLNEED) 3891 if (obj->madv == I915_MADV_WILLNEED)
4728 i915_gem_object_unpin_pages(obj); 3892 i915_gem_object_unpin_pages(obj);
@@ -4739,8 +3903,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4739 3903
4740 args->retained = obj->madv != __I915_MADV_PURGED; 3904 args->retained = obj->madv != __I915_MADV_PURGED;
4741 3905
4742out: 3906 i915_gem_object_put(obj);
4743 drm_gem_object_unreference(&obj->base);
4744unlock: 3907unlock:
4745 mutex_unlock(&dev->struct_mutex); 3908 mutex_unlock(&dev->struct_mutex);
4746 return ret; 3909 return ret;
@@ -4753,7 +3916,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
4753 3916
4754 INIT_LIST_HEAD(&obj->global_list); 3917 INIT_LIST_HEAD(&obj->global_list);
4755 for (i = 0; i < I915_NUM_ENGINES; i++) 3918 for (i = 0; i < I915_NUM_ENGINES; i++)
4756 INIT_LIST_HEAD(&obj->engine_list[i]); 3919 init_request_active(&obj->last_read[i],
3920 i915_gem_object_retire__read);
3921 init_request_active(&obj->last_write,
3922 i915_gem_object_retire__write);
3923 init_request_active(&obj->last_fence, NULL);
4757 INIT_LIST_HEAD(&obj->obj_exec_link); 3924 INIT_LIST_HEAD(&obj->obj_exec_link);
4758 INIT_LIST_HEAD(&obj->vma_list); 3925 INIT_LIST_HEAD(&obj->vma_list);
4759 INIT_LIST_HEAD(&obj->batch_pool_link); 3926 INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4865,33 +4032,31 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
4865 4032
4866 trace_i915_gem_object_destroy(obj); 4033 trace_i915_gem_object_destroy(obj);
4867 4034
4035 /* All file-owned VMA should have been released by this point through
4036 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4037 * However, the object may also be bound into the global GTT (e.g.
4038 * older GPUs without per-process support, or for direct access through
4039 * the GTT either for the user or for scanout). Those VMA still need to
4040 * unbound now.
4041 */
4868 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4042 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
4869 int ret; 4043 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4870 4044 GEM_BUG_ON(i915_vma_is_active(vma));
4871 vma->pin_count = 0; 4045 vma->flags &= ~I915_VMA_PIN_MASK;
4872 ret = i915_vma_unbind(vma); 4046 i915_vma_close(vma);
4873 if (WARN_ON(ret == -ERESTARTSYS)) {
4874 bool was_interruptible;
4875
4876 was_interruptible = dev_priv->mm.interruptible;
4877 dev_priv->mm.interruptible = false;
4878
4879 WARN_ON(i915_vma_unbind(vma));
4880
4881 dev_priv->mm.interruptible = was_interruptible;
4882 }
4883 } 4047 }
4048 GEM_BUG_ON(obj->bind_count);
4884 4049
4885 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4050 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4886 * before progressing. */ 4051 * before progressing. */
4887 if (obj->stolen) 4052 if (obj->stolen)
4888 i915_gem_object_unpin_pages(obj); 4053 i915_gem_object_unpin_pages(obj);
4889 4054
4890 WARN_ON(obj->frontbuffer_bits); 4055 WARN_ON(atomic_read(&obj->frontbuffer_bits));
4891 4056
4892 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4057 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4893 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4058 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4894 obj->tiling_mode != I915_TILING_NONE) 4059 i915_gem_object_is_tiled(obj))
4895 i915_gem_object_unpin_pages(obj); 4060 i915_gem_object_unpin_pages(obj);
4896 4061
4897 if (WARN_ON(obj->pages_pin_count)) 4062 if (WARN_ON(obj->pages_pin_count))
@@ -4899,7 +4064,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
4899 if (discard_backing_storage(obj)) 4064 if (discard_backing_storage(obj))
4900 obj->madv = I915_MADV_DONTNEED; 4065 obj->madv = I915_MADV_DONTNEED;
4901 i915_gem_object_put_pages(obj); 4066 i915_gem_object_put_pages(obj);
4902 i915_gem_object_free_mmap_offset(obj);
4903 4067
4904 BUG_ON(obj->pages); 4068 BUG_ON(obj->pages);
4905 4069
@@ -4938,51 +4102,39 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4938 GEM_BUG_ON(!view); 4102 GEM_BUG_ON(!view);
4939 4103
4940 list_for_each_entry(vma, &obj->vma_list, obj_link) 4104 list_for_each_entry(vma, &obj->vma_list, obj_link)
4941 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 4105 if (i915_vma_is_ggtt(vma) &&
4106 i915_ggtt_view_equal(&vma->ggtt_view, view))
4942 return vma; 4107 return vma;
4943 return NULL; 4108 return NULL;
4944} 4109}
4945 4110
4946void i915_gem_vma_destroy(struct i915_vma *vma) 4111int i915_gem_suspend(struct drm_device *dev)
4947{
4948 WARN_ON(vma->node.allocated);
4949
4950 /* Keep the vma as a placeholder in the execbuffer reservation lists */
4951 if (!list_empty(&vma->exec_list))
4952 return;
4953
4954 if (!vma->is_ggtt)
4955 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
4956
4957 list_del(&vma->obj_link);
4958
4959 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
4960}
4961
4962static void
4963i915_gem_stop_engines(struct drm_device *dev)
4964{ 4112{
4965 struct drm_i915_private *dev_priv = to_i915(dev); 4113 struct drm_i915_private *dev_priv = to_i915(dev);
4966 struct intel_engine_cs *engine; 4114 int ret;
4967 4115
4968 for_each_engine(engine, dev_priv) 4116 intel_suspend_gt_powersave(dev_priv);
4969 dev_priv->gt.stop_engine(engine);
4970}
4971
4972int
4973i915_gem_suspend(struct drm_device *dev)
4974{
4975 struct drm_i915_private *dev_priv = to_i915(dev);
4976 int ret = 0;
4977 4117
4978 mutex_lock(&dev->struct_mutex); 4118 mutex_lock(&dev->struct_mutex);
4979 ret = i915_gem_wait_for_idle(dev_priv); 4119
4120 /* We have to flush all the executing contexts to main memory so
4121 * that they can saved in the hibernation image. To ensure the last
4122 * context image is coherent, we have to switch away from it. That
4123 * leaves the dev_priv->kernel_context still active when
4124 * we actually suspend, and its image in memory may not match the GPU
4125 * state. Fortunately, the kernel_context is disposable and we do
4126 * not rely on its state.
4127 */
4128 ret = i915_gem_switch_to_kernel_context(dev_priv);
4129 if (ret)
4130 goto err;
4131
4132 ret = i915_gem_wait_for_idle(dev_priv, true);
4980 if (ret) 4133 if (ret)
4981 goto err; 4134 goto err;
4982 4135
4983 i915_gem_retire_requests(dev_priv); 4136 i915_gem_retire_requests(dev_priv);
4984 4137
4985 i915_gem_stop_engines(dev);
4986 i915_gem_context_lost(dev_priv); 4138 i915_gem_context_lost(dev_priv);
4987 mutex_unlock(&dev->struct_mutex); 4139 mutex_unlock(&dev->struct_mutex);
4988 4140
@@ -5002,6 +4154,23 @@ err:
5002 return ret; 4154 return ret;
5003} 4155}
5004 4156
4157void i915_gem_resume(struct drm_device *dev)
4158{
4159 struct drm_i915_private *dev_priv = to_i915(dev);
4160
4161 mutex_lock(&dev->struct_mutex);
4162 i915_gem_restore_gtt_mappings(dev);
4163
4164 /* As we didn't flush the kernel context before suspend, we cannot
4165 * guarantee that the context image is complete. So let's just reset
4166 * it and start again.
4167 */
4168 if (i915.enable_execlists)
4169 intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
4170
4171 mutex_unlock(&dev->struct_mutex);
4172}
4173
5005void i915_gem_init_swizzling(struct drm_device *dev) 4174void i915_gem_init_swizzling(struct drm_device *dev)
5006{ 4175{
5007 struct drm_i915_private *dev_priv = to_i915(dev); 4176 struct drm_i915_private *dev_priv = to_i915(dev);
@@ -5054,53 +4223,6 @@ static void init_unused_rings(struct drm_device *dev)
5054 } 4223 }
5055} 4224}
5056 4225
5057int i915_gem_init_engines(struct drm_device *dev)
5058{
5059 struct drm_i915_private *dev_priv = to_i915(dev);
5060 int ret;
5061
5062 ret = intel_init_render_ring_buffer(dev);
5063 if (ret)
5064 return ret;
5065
5066 if (HAS_BSD(dev)) {
5067 ret = intel_init_bsd_ring_buffer(dev);
5068 if (ret)
5069 goto cleanup_render_ring;
5070 }
5071
5072 if (HAS_BLT(dev)) {
5073 ret = intel_init_blt_ring_buffer(dev);
5074 if (ret)
5075 goto cleanup_bsd_ring;
5076 }
5077
5078 if (HAS_VEBOX(dev)) {
5079 ret = intel_init_vebox_ring_buffer(dev);
5080 if (ret)
5081 goto cleanup_blt_ring;
5082 }
5083
5084 if (HAS_BSD2(dev)) {
5085 ret = intel_init_bsd2_ring_buffer(dev);
5086 if (ret)
5087 goto cleanup_vebox_ring;
5088 }
5089
5090 return 0;
5091
5092cleanup_vebox_ring:
5093 intel_cleanup_engine(&dev_priv->engine[VECS]);
5094cleanup_blt_ring:
5095 intel_cleanup_engine(&dev_priv->engine[BCS]);
5096cleanup_bsd_ring:
5097 intel_cleanup_engine(&dev_priv->engine[VCS]);
5098cleanup_render_ring:
5099 intel_cleanup_engine(&dev_priv->engine[RCS]);
5100
5101 return ret;
5102}
5103
5104int 4226int
5105i915_gem_init_hw(struct drm_device *dev) 4227i915_gem_init_hw(struct drm_device *dev)
5106{ 4228{
@@ -5167,6 +4289,27 @@ out:
5167 return ret; 4289 return ret;
5168} 4290}
5169 4291
4292bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4293{
4294 if (INTEL_INFO(dev_priv)->gen < 6)
4295 return false;
4296
4297 /* TODO: make semaphores and Execlists play nicely together */
4298 if (i915.enable_execlists)
4299 return false;
4300
4301 if (value >= 0)
4302 return value;
4303
4304#ifdef CONFIG_INTEL_IOMMU
4305 /* Enable semaphores on SNB when IO remapping is off */
4306 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4307 return false;
4308#endif
4309
4310 return true;
4311}
4312
5170int i915_gem_init(struct drm_device *dev) 4313int i915_gem_init(struct drm_device *dev)
5171{ 4314{
5172 struct drm_i915_private *dev_priv = to_i915(dev); 4315 struct drm_i915_private *dev_priv = to_i915(dev);
@@ -5175,15 +4318,9 @@ int i915_gem_init(struct drm_device *dev)
5175 mutex_lock(&dev->struct_mutex); 4318 mutex_lock(&dev->struct_mutex);
5176 4319
5177 if (!i915.enable_execlists) { 4320 if (!i915.enable_execlists) {
5178 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 4321 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
5179 dev_priv->gt.init_engines = i915_gem_init_engines;
5180 dev_priv->gt.cleanup_engine = intel_cleanup_engine;
5181 dev_priv->gt.stop_engine = intel_stop_engine;
5182 } else { 4322 } else {
5183 dev_priv->gt.execbuf_submit = intel_execlists_submission;
5184 dev_priv->gt.init_engines = intel_logical_rings_init;
5185 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 4323 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
5186 dev_priv->gt.stop_engine = intel_logical_ring_stop;
5187 } 4324 }
5188 4325
5189 /* This is just a security blanket to placate dragons. 4326 /* This is just a security blanket to placate dragons.
@@ -5195,19 +4332,22 @@ int i915_gem_init(struct drm_device *dev)
5195 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4332 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5196 4333
5197 i915_gem_init_userptr(dev_priv); 4334 i915_gem_init_userptr(dev_priv);
5198 i915_gem_init_ggtt(dev); 4335
4336 ret = i915_gem_init_ggtt(dev_priv);
4337 if (ret)
4338 goto out_unlock;
5199 4339
5200 ret = i915_gem_context_init(dev); 4340 ret = i915_gem_context_init(dev);
5201 if (ret) 4341 if (ret)
5202 goto out_unlock; 4342 goto out_unlock;
5203 4343
5204 ret = dev_priv->gt.init_engines(dev); 4344 ret = intel_engines_init(dev);
5205 if (ret) 4345 if (ret)
5206 goto out_unlock; 4346 goto out_unlock;
5207 4347
5208 ret = i915_gem_init_hw(dev); 4348 ret = i915_gem_init_hw(dev);
5209 if (ret == -EIO) { 4349 if (ret == -EIO) {
5210 /* Allow ring initialisation to fail by marking the GPU as 4350 /* Allow engine initialisation to fail by marking the GPU as
5211 * wedged. But we only want to do this where the GPU is angry, 4351 * wedged. But we only want to do this where the GPU is angry,
5212 * for all other failure, such as an allocation failure, bail. 4352 * for all other failure, such as an allocation failure, bail.
5213 */ 4353 */
@@ -5236,7 +4376,6 @@ i915_gem_cleanup_engines(struct drm_device *dev)
5236static void 4376static void
5237init_engine_lists(struct intel_engine_cs *engine) 4377init_engine_lists(struct intel_engine_cs *engine)
5238{ 4378{
5239 INIT_LIST_HEAD(&engine->active_list);
5240 INIT_LIST_HEAD(&engine->request_list); 4379 INIT_LIST_HEAD(&engine->request_list);
5241} 4380}
5242 4381
@@ -5283,10 +4422,11 @@ i915_gem_load_init(struct drm_device *dev)
5283 dev_priv->requests = 4422 dev_priv->requests =
5284 kmem_cache_create("i915_gem_request", 4423 kmem_cache_create("i915_gem_request",
5285 sizeof(struct drm_i915_gem_request), 0, 4424 sizeof(struct drm_i915_gem_request), 0,
5286 SLAB_HWCACHE_ALIGN, 4425 SLAB_HWCACHE_ALIGN |
4426 SLAB_RECLAIM_ACCOUNT |
4427 SLAB_DESTROY_BY_RCU,
5287 NULL); 4428 NULL);
5288 4429
5289 INIT_LIST_HEAD(&dev_priv->vm_list);
5290 INIT_LIST_HEAD(&dev_priv->context_list); 4430 INIT_LIST_HEAD(&dev_priv->context_list);
5291 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4431 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5292 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4432 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
@@ -5310,7 +4450,7 @@ i915_gem_load_init(struct drm_device *dev)
5310 4450
5311 dev_priv->mm.interruptible = true; 4451 dev_priv->mm.interruptible = true;
5312 4452
5313 mutex_init(&dev_priv->fb_tracking.lock); 4453 spin_lock_init(&dev_priv->fb_tracking.lock);
5314} 4454}
5315 4455
5316void i915_gem_load_cleanup(struct drm_device *dev) 4456void i915_gem_load_cleanup(struct drm_device *dev)
@@ -5320,6 +4460,9 @@ void i915_gem_load_cleanup(struct drm_device *dev)
5320 kmem_cache_destroy(dev_priv->requests); 4460 kmem_cache_destroy(dev_priv->requests);
5321 kmem_cache_destroy(dev_priv->vmas); 4461 kmem_cache_destroy(dev_priv->vmas);
5322 kmem_cache_destroy(dev_priv->objects); 4462 kmem_cache_destroy(dev_priv->objects);
4463
4464 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4465 rcu_barrier();
5323} 4466}
5324 4467
5325int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 4468int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
@@ -5353,21 +4496,15 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
5353void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4496void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5354{ 4497{
5355 struct drm_i915_file_private *file_priv = file->driver_priv; 4498 struct drm_i915_file_private *file_priv = file->driver_priv;
4499 struct drm_i915_gem_request *request;
5356 4500
5357 /* Clean up our request list when the client is going away, so that 4501 /* Clean up our request list when the client is going away, so that
5358 * later retire_requests won't dereference our soon-to-be-gone 4502 * later retire_requests won't dereference our soon-to-be-gone
5359 * file_priv. 4503 * file_priv.
5360 */ 4504 */
5361 spin_lock(&file_priv->mm.lock); 4505 spin_lock(&file_priv->mm.lock);
5362 while (!list_empty(&file_priv->mm.request_list)) { 4506 list_for_each_entry(request, &file_priv->mm.request_list, client_list)
5363 struct drm_i915_gem_request *request;
5364
5365 request = list_first_entry(&file_priv->mm.request_list,
5366 struct drm_i915_gem_request,
5367 client_list);
5368 list_del(&request->client_list);
5369 request->file_priv = NULL; 4507 request->file_priv = NULL;
5370 }
5371 spin_unlock(&file_priv->mm.lock); 4508 spin_unlock(&file_priv->mm.lock);
5372 4509
5373 if (!list_empty(&file_priv->rps.link)) { 4510 if (!list_empty(&file_priv->rps.link)) {
@@ -5396,7 +4533,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5396 spin_lock_init(&file_priv->mm.lock); 4533 spin_lock_init(&file_priv->mm.lock);
5397 INIT_LIST_HEAD(&file_priv->mm.request_list); 4534 INIT_LIST_HEAD(&file_priv->mm.request_list);
5398 4535
5399 file_priv->bsd_ring = -1; 4536 file_priv->bsd_engine = -1;
5400 4537
5401 ret = i915_gem_context_open(dev, file); 4538 ret = i915_gem_context_open(dev, file);
5402 if (ret) 4539 if (ret)
@@ -5418,16 +4555,23 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
5418 struct drm_i915_gem_object *new, 4555 struct drm_i915_gem_object *new,
5419 unsigned frontbuffer_bits) 4556 unsigned frontbuffer_bits)
5420{ 4557{
4558 /* Control of individual bits within the mask are guarded by
4559 * the owning plane->mutex, i.e. we can never see concurrent
4560 * manipulation of individual bits. But since the bitfield as a whole
4561 * is updated using RMW, we need to use atomics in order to update
4562 * the bits.
4563 */
4564 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4565 sizeof(atomic_t) * BITS_PER_BYTE);
4566
5421 if (old) { 4567 if (old) {
5422 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 4568 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5423 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 4569 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5424 old->frontbuffer_bits &= ~frontbuffer_bits;
5425 } 4570 }
5426 4571
5427 if (new) { 4572 if (new) {
5428 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 4573 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5429 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 4574 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5430 new->frontbuffer_bits |= frontbuffer_bits;
5431 } 4575 }
5432} 4576}
5433 4577
@@ -5441,7 +4585,7 @@ u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
5441 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 4585 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5442 4586
5443 list_for_each_entry(vma, &o->vma_list, obj_link) { 4587 list_for_each_entry(vma, &o->vma_list, obj_link) {
5444 if (vma->is_ggtt && 4588 if (i915_vma_is_ggtt(vma) &&
5445 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4589 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5446 continue; 4590 continue;
5447 if (vma->vm == vm) 4591 if (vma->vm == vm)
@@ -5459,7 +4603,8 @@ u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
5459 struct i915_vma *vma; 4603 struct i915_vma *vma;
5460 4604
5461 list_for_each_entry(vma, &o->vma_list, obj_link) 4605 list_for_each_entry(vma, &o->vma_list, obj_link)
5462 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 4606 if (i915_vma_is_ggtt(vma) &&
4607 i915_ggtt_view_equal(&vma->ggtt_view, view))
5463 return vma->node.start; 4608 return vma->node.start;
5464 4609
5465 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 4610 WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
@@ -5472,7 +4617,7 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5472 struct i915_vma *vma; 4617 struct i915_vma *vma;
5473 4618
5474 list_for_each_entry(vma, &o->vma_list, obj_link) { 4619 list_for_each_entry(vma, &o->vma_list, obj_link) {
5475 if (vma->is_ggtt && 4620 if (i915_vma_is_ggtt(vma) &&
5476 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4621 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5477 continue; 4622 continue;
5478 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 4623 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
@@ -5488,7 +4633,7 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
5488 struct i915_vma *vma; 4633 struct i915_vma *vma;
5489 4634
5490 list_for_each_entry(vma, &o->vma_list, obj_link) 4635 list_for_each_entry(vma, &o->vma_list, obj_link)
5491 if (vma->is_ggtt && 4636 if (i915_vma_is_ggtt(vma) &&
5492 i915_ggtt_view_equal(&vma->ggtt_view, view) && 4637 i915_ggtt_view_equal(&vma->ggtt_view, view) &&
5493 drm_mm_node_allocated(&vma->node)) 4638 drm_mm_node_allocated(&vma->node))
5494 return true; 4639 return true;
@@ -5496,17 +4641,6 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
5496 return false; 4641 return false;
5497} 4642}
5498 4643
5499bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5500{
5501 struct i915_vma *vma;
5502
5503 list_for_each_entry(vma, &o->vma_list, obj_link)
5504 if (drm_mm_node_allocated(&vma->node))
5505 return true;
5506
5507 return false;
5508}
5509
5510unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) 4644unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
5511{ 4645{
5512 struct i915_vma *vma; 4646 struct i915_vma *vma;
@@ -5514,7 +4648,7 @@ unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
5514 GEM_BUG_ON(list_empty(&o->vma_list)); 4648 GEM_BUG_ON(list_empty(&o->vma_list));
5515 4649
5516 list_for_each_entry(vma, &o->vma_list, obj_link) { 4650 list_for_each_entry(vma, &o->vma_list, obj_link) {
5517 if (vma->is_ggtt && 4651 if (i915_vma_is_ggtt(vma) &&
5518 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 4652 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
5519 return vma->node.size; 4653 return vma->node.size;
5520 } 4654 }
@@ -5526,7 +4660,7 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5526{ 4660{
5527 struct i915_vma *vma; 4661 struct i915_vma *vma;
5528 list_for_each_entry(vma, &obj->vma_list, obj_link) 4662 list_for_each_entry(vma, &obj->vma_list, obj_link)
5529 if (vma->pin_count > 0) 4663 if (i915_vma_is_pinned(vma))
5530 return true; 4664 return true;
5531 4665
5532 return false; 4666 return false;
@@ -5584,6 +4718,6 @@ i915_gem_object_create_from_data(struct drm_device *dev,
5584 return obj; 4718 return obj;
5585 4719
5586fail: 4720fail:
5587 drm_gem_object_unreference(&obj->base); 4721 i915_gem_object_put(obj);
5588 return ERR_PTR(ret); 4722 return ERR_PTR(ret);
5589} 4723}
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 3752d5daa4b2..ed989596d9a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -41,15 +41,15 @@
41 41
42/** 42/**
43 * i915_gem_batch_pool_init() - initialize a batch buffer pool 43 * i915_gem_batch_pool_init() - initialize a batch buffer pool
44 * @dev: the drm device 44 * @engine: the associated request submission engine
45 * @pool: the batch buffer pool 45 * @pool: the batch buffer pool
46 */ 46 */
47void i915_gem_batch_pool_init(struct drm_device *dev, 47void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
48 struct i915_gem_batch_pool *pool) 48 struct i915_gem_batch_pool *pool)
49{ 49{
50 int n; 50 int n;
51 51
52 pool->dev = dev; 52 pool->engine = engine;
53 53
54 for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) 54 for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
55 INIT_LIST_HEAD(&pool->cache_list[n]); 55 INIT_LIST_HEAD(&pool->cache_list[n]);
@@ -65,18 +65,17 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
65{ 65{
66 int n; 66 int n;
67 67
68 WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); 68 lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
69 69
70 for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { 70 for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
71 while (!list_empty(&pool->cache_list[n])) { 71 struct drm_i915_gem_object *obj, *next;
72 struct drm_i915_gem_object *obj = 72
73 list_first_entry(&pool->cache_list[n], 73 list_for_each_entry_safe(obj, next,
74 struct drm_i915_gem_object, 74 &pool->cache_list[n],
75 batch_pool_link); 75 batch_pool_link)
76 76 i915_gem_object_put(obj);
77 list_del(&obj->batch_pool_link); 77
78 drm_gem_object_unreference(&obj->base); 78 INIT_LIST_HEAD(&pool->cache_list[n]);
79 }
80 } 79 }
81} 80}
82 81
@@ -102,7 +101,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
102 struct list_head *list; 101 struct list_head *list;
103 int n; 102 int n;
104 103
105 WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); 104 lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
106 105
107 /* Compute a power-of-two bucket, but throw everything greater than 106 /* Compute a power-of-two bucket, but throw everything greater than
108 * 16KiB into the same bucket: i.e. the the buckets hold objects of 107 * 16KiB into the same bucket: i.e. the the buckets hold objects of
@@ -115,13 +114,14 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
115 114
116 list_for_each_entry_safe(tmp, next, list, batch_pool_link) { 115 list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
117 /* The batches are strictly LRU ordered */ 116 /* The batches are strictly LRU ordered */
118 if (tmp->active) 117 if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
118 &tmp->base.dev->struct_mutex))
119 break; 119 break;
120 120
121 /* While we're looping, do some clean up */ 121 /* While we're looping, do some clean up */
122 if (tmp->madv == __I915_MADV_PURGED) { 122 if (tmp->madv == __I915_MADV_PURGED) {
123 list_del(&tmp->batch_pool_link); 123 list_del(&tmp->batch_pool_link);
124 drm_gem_object_unreference(&tmp->base); 124 i915_gem_object_put(tmp);
125 continue; 125 continue;
126 } 126 }
127 127
@@ -134,7 +134,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
134 if (obj == NULL) { 134 if (obj == NULL) {
135 int ret; 135 int ret;
136 136
137 obj = i915_gem_object_create(pool->dev, size); 137 obj = i915_gem_object_create(&pool->engine->i915->drm, size);
138 if (IS_ERR(obj)) 138 if (IS_ERR(obj))
139 return obj; 139 return obj;
140 140
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 848e90703eed..10d5ac4c00d3 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -27,13 +27,15 @@
27 27
28#include "i915_drv.h" 28#include "i915_drv.h"
29 29
30struct intel_engine_cs;
31
30struct i915_gem_batch_pool { 32struct i915_gem_batch_pool {
31 struct drm_device *dev; 33 struct intel_engine_cs *engine;
32 struct list_head cache_list[4]; 34 struct list_head cache_list[4];
33}; 35};
34 36
35/* i915_gem_batch_pool.c */ 37/* i915_gem_batch_pool.c */
36void i915_gem_batch_pool_init(struct drm_device *dev, 38void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
37 struct i915_gem_batch_pool *pool); 39 struct i915_gem_batch_pool *pool);
38void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); 40void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
39struct drm_i915_gem_object* 41struct drm_i915_gem_object*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3c97f0e7a003..bb72af5320b0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -134,21 +134,6 @@ static int get_context_size(struct drm_i915_private *dev_priv)
134 return ret; 134 return ret;
135} 135}
136 136
137static void i915_gem_context_clean(struct i915_gem_context *ctx)
138{
139 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
140 struct i915_vma *vma, *next;
141
142 if (!ppgtt)
143 return;
144
145 list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
146 vm_link) {
147 if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
148 break;
149 }
150}
151
152void i915_gem_context_free(struct kref *ctx_ref) 137void i915_gem_context_free(struct kref *ctx_ref)
153{ 138{
154 struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); 139 struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -156,13 +141,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
156 141
157 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 142 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
158 trace_i915_context_free(ctx); 143 trace_i915_context_free(ctx);
159 144 GEM_BUG_ON(!ctx->closed);
160 /*
161 * This context is going away and we need to remove all VMAs still
162 * around. This is to handle imported shared objects for which
163 * destructor did not run when their handles were closed.
164 */
165 i915_gem_context_clean(ctx);
166 145
167 i915_ppgtt_put(ctx->ppgtt); 146 i915_ppgtt_put(ctx->ppgtt);
168 147
@@ -173,10 +152,10 @@ void i915_gem_context_free(struct kref *ctx_ref)
173 continue; 152 continue;
174 153
175 WARN_ON(ce->pin_count); 154 WARN_ON(ce->pin_count);
176 if (ce->ringbuf) 155 if (ce->ring)
177 intel_ringbuffer_free(ce->ringbuf); 156 intel_ring_free(ce->ring);
178 157
179 drm_gem_object_unreference(&ce->state->base); 158 i915_gem_object_put(ce->state);
180 } 159 }
181 160
182 list_del(&ctx->link); 161 list_del(&ctx->link);
@@ -216,7 +195,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
216 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); 195 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
217 /* Failure shouldn't ever happen this early */ 196 /* Failure shouldn't ever happen this early */
218 if (WARN_ON(ret)) { 197 if (WARN_ON(ret)) {
219 drm_gem_object_unreference(&obj->base); 198 i915_gem_object_put(obj);
220 return ERR_PTR(ret); 199 return ERR_PTR(ret);
221 } 200 }
222 } 201 }
@@ -224,6 +203,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
224 return obj; 203 return obj;
225} 204}
226 205
206static void i915_ppgtt_close(struct i915_address_space *vm)
207{
208 struct list_head *phases[] = {
209 &vm->active_list,
210 &vm->inactive_list,
211 &vm->unbound_list,
212 NULL,
213 }, **phase;
214
215 GEM_BUG_ON(vm->closed);
216 vm->closed = true;
217
218 for (phase = phases; *phase; phase++) {
219 struct i915_vma *vma, *vn;
220
221 list_for_each_entry_safe(vma, vn, *phase, vm_link)
222 if (!i915_vma_is_closed(vma))
223 i915_vma_close(vma);
224 }
225}
226
227static void context_close(struct i915_gem_context *ctx)
228{
229 GEM_BUG_ON(ctx->closed);
230 ctx->closed = true;
231 if (ctx->ppgtt)
232 i915_ppgtt_close(&ctx->ppgtt->base);
233 ctx->file_priv = ERR_PTR(-EBADF);
234 i915_gem_context_put(ctx);
235}
236
227static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) 237static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
228{ 238{
229 int ret; 239 int ret;
@@ -305,7 +315,7 @@ __create_hw_context(struct drm_device *dev,
305 return ctx; 315 return ctx;
306 316
307err_out: 317err_out:
308 i915_gem_context_unreference(ctx); 318 context_close(ctx);
309 return ERR_PTR(ret); 319 return ERR_PTR(ret);
310} 320}
311 321
@@ -327,13 +337,14 @@ i915_gem_create_context(struct drm_device *dev,
327 return ctx; 337 return ctx;
328 338
329 if (USES_FULL_PPGTT(dev)) { 339 if (USES_FULL_PPGTT(dev)) {
330 struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); 340 struct i915_hw_ppgtt *ppgtt =
341 i915_ppgtt_create(to_i915(dev), file_priv);
331 342
332 if (IS_ERR(ppgtt)) { 343 if (IS_ERR(ppgtt)) {
333 DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", 344 DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
334 PTR_ERR(ppgtt)); 345 PTR_ERR(ppgtt));
335 idr_remove(&file_priv->context_idr, ctx->user_handle); 346 idr_remove(&file_priv->context_idr, ctx->user_handle);
336 i915_gem_context_unreference(ctx); 347 context_close(ctx);
337 return ERR_CAST(ppgtt); 348 return ERR_CAST(ppgtt);
338 } 349 }
339 350
@@ -390,7 +401,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
390 if (ce->state) 401 if (ce->state)
391 i915_gem_object_ggtt_unpin(ce->state); 402 i915_gem_object_ggtt_unpin(ce->state);
392 403
393 i915_gem_context_unreference(ctx); 404 i915_gem_context_put(ctx);
394 } 405 }
395} 406}
396 407
@@ -504,7 +515,7 @@ void i915_gem_context_fini(struct drm_device *dev)
504 515
505 lockdep_assert_held(&dev->struct_mutex); 516 lockdep_assert_held(&dev->struct_mutex);
506 517
507 i915_gem_context_unreference(dctx); 518 context_close(dctx);
508 dev_priv->kernel_context = NULL; 519 dev_priv->kernel_context = NULL;
509 520
510 ida_destroy(&dev_priv->context_hw_ida); 521 ida_destroy(&dev_priv->context_hw_ida);
@@ -514,8 +525,7 @@ static int context_idr_cleanup(int id, void *p, void *data)
514{ 525{
515 struct i915_gem_context *ctx = p; 526 struct i915_gem_context *ctx = p;
516 527
517 ctx->file_priv = ERR_PTR(-EBADF); 528 context_close(ctx);
518 i915_gem_context_unreference(ctx);
519 return 0; 529 return 0;
520} 530}
521 531
@@ -552,11 +562,12 @@ static inline int
552mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) 562mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
553{ 563{
554 struct drm_i915_private *dev_priv = req->i915; 564 struct drm_i915_private *dev_priv = req->i915;
565 struct intel_ring *ring = req->ring;
555 struct intel_engine_cs *engine = req->engine; 566 struct intel_engine_cs *engine = req->engine;
556 u32 flags = hw_flags | MI_MM_SPACE_GTT; 567 u32 flags = hw_flags | MI_MM_SPACE_GTT;
557 const int num_rings = 568 const int num_rings =
558 /* Use an extended w/a on ivb+ if signalling from other rings */ 569 /* Use an extended w/a on ivb+ if signalling from other rings */
559 i915_semaphore_is_enabled(dev_priv) ? 570 i915.semaphores ?
560 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : 571 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 :
561 0; 572 0;
562 int len, ret; 573 int len, ret;
@@ -567,7 +578,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
567 * itlb_before_ctx_switch. 578 * itlb_before_ctx_switch.
568 */ 579 */
569 if (IS_GEN6(dev_priv)) { 580 if (IS_GEN6(dev_priv)) {
570 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0); 581 ret = engine->emit_flush(req, EMIT_INVALIDATE);
571 if (ret) 582 if (ret)
572 return ret; 583 return ret;
573 } 584 }
@@ -589,64 +600,64 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
589 600
590 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 601 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
591 if (INTEL_GEN(dev_priv) >= 7) { 602 if (INTEL_GEN(dev_priv) >= 7) {
592 intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE); 603 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
593 if (num_rings) { 604 if (num_rings) {
594 struct intel_engine_cs *signaller; 605 struct intel_engine_cs *signaller;
595 606
596 intel_ring_emit(engine, 607 intel_ring_emit(ring,
597 MI_LOAD_REGISTER_IMM(num_rings)); 608 MI_LOAD_REGISTER_IMM(num_rings));
598 for_each_engine(signaller, dev_priv) { 609 for_each_engine(signaller, dev_priv) {
599 if (signaller == engine) 610 if (signaller == engine)
600 continue; 611 continue;
601 612
602 intel_ring_emit_reg(engine, 613 intel_ring_emit_reg(ring,
603 RING_PSMI_CTL(signaller->mmio_base)); 614 RING_PSMI_CTL(signaller->mmio_base));
604 intel_ring_emit(engine, 615 intel_ring_emit(ring,
605 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 616 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
606 } 617 }
607 } 618 }
608 } 619 }
609 620
610 intel_ring_emit(engine, MI_NOOP); 621 intel_ring_emit(ring, MI_NOOP);
611 intel_ring_emit(engine, MI_SET_CONTEXT); 622 intel_ring_emit(ring, MI_SET_CONTEXT);
612 intel_ring_emit(engine, 623 intel_ring_emit(ring,
613 i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | 624 i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
614 flags); 625 flags);
615 /* 626 /*
616 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 627 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
617 * WaMiSetContext_Hang:snb,ivb,vlv 628 * WaMiSetContext_Hang:snb,ivb,vlv
618 */ 629 */
619 intel_ring_emit(engine, MI_NOOP); 630 intel_ring_emit(ring, MI_NOOP);
620 631
621 if (INTEL_GEN(dev_priv) >= 7) { 632 if (INTEL_GEN(dev_priv) >= 7) {
622 if (num_rings) { 633 if (num_rings) {
623 struct intel_engine_cs *signaller; 634 struct intel_engine_cs *signaller;
624 i915_reg_t last_reg = {}; /* keep gcc quiet */ 635 i915_reg_t last_reg = {}; /* keep gcc quiet */
625 636
626 intel_ring_emit(engine, 637 intel_ring_emit(ring,
627 MI_LOAD_REGISTER_IMM(num_rings)); 638 MI_LOAD_REGISTER_IMM(num_rings));
628 for_each_engine(signaller, dev_priv) { 639 for_each_engine(signaller, dev_priv) {
629 if (signaller == engine) 640 if (signaller == engine)
630 continue; 641 continue;
631 642
632 last_reg = RING_PSMI_CTL(signaller->mmio_base); 643 last_reg = RING_PSMI_CTL(signaller->mmio_base);
633 intel_ring_emit_reg(engine, last_reg); 644 intel_ring_emit_reg(ring, last_reg);
634 intel_ring_emit(engine, 645 intel_ring_emit(ring,
635 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 646 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
636 } 647 }
637 648
638 /* Insert a delay before the next switch! */ 649 /* Insert a delay before the next switch! */
639 intel_ring_emit(engine, 650 intel_ring_emit(ring,
640 MI_STORE_REGISTER_MEM | 651 MI_STORE_REGISTER_MEM |
641 MI_SRM_LRM_GLOBAL_GTT); 652 MI_SRM_LRM_GLOBAL_GTT);
642 intel_ring_emit_reg(engine, last_reg); 653 intel_ring_emit_reg(ring, last_reg);
643 intel_ring_emit(engine, engine->scratch.gtt_offset); 654 intel_ring_emit(ring, engine->scratch.gtt_offset);
644 intel_ring_emit(engine, MI_NOOP); 655 intel_ring_emit(ring, MI_NOOP);
645 } 656 }
646 intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE); 657 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
647 } 658 }
648 659
649 intel_ring_advance(engine); 660 intel_ring_advance(ring);
650 661
651 return ret; 662 return ret;
652} 663}
@@ -654,7 +665,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
654static int remap_l3(struct drm_i915_gem_request *req, int slice) 665static int remap_l3(struct drm_i915_gem_request *req, int slice)
655{ 666{
656 u32 *remap_info = req->i915->l3_parity.remap_info[slice]; 667 u32 *remap_info = req->i915->l3_parity.remap_info[slice];
657 struct intel_engine_cs *engine = req->engine; 668 struct intel_ring *ring = req->ring;
658 int i, ret; 669 int i, ret;
659 670
660 if (!remap_info) 671 if (!remap_info)
@@ -669,13 +680,13 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice)
669 * here because no other code should access these registers other than 680 * here because no other code should access these registers other than
670 * at initialization time. 681 * at initialization time.
671 */ 682 */
672 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); 683 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
673 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 684 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
674 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); 685 intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
675 intel_ring_emit(engine, remap_info[i]); 686 intel_ring_emit(ring, remap_info[i]);
676 } 687 }
677 intel_ring_emit(engine, MI_NOOP); 688 intel_ring_emit(ring, MI_NOOP);
678 intel_ring_advance(engine); 689 intel_ring_advance(ring);
679 690
680 return 0; 691 return 0;
681} 692}
@@ -752,9 +763,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
752 return 0; 763 return 0;
753 764
754 /* Trying to pin first makes error handling easier. */ 765 /* Trying to pin first makes error handling easier. */
755 ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state, 766 ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
756 to->ggtt_alignment, 767 to->ggtt_alignment, 0);
757 0);
758 if (ret) 768 if (ret)
759 return ret; 769 return ret;
760 770
@@ -814,8 +824,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
814 * MI_SET_CONTEXT instead of when the next seqno has completed. 824 * MI_SET_CONTEXT instead of when the next seqno has completed.
815 */ 825 */
816 if (from != NULL) { 826 if (from != NULL) {
817 from->engine[RCS].state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; 827 struct drm_i915_gem_object *obj = from->engine[RCS].state;
818 i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->engine[RCS].state), req); 828
819 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the 829 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
820 * whole damn pipeline, we don't need to explicitly mark the 830 * whole damn pipeline, we don't need to explicitly mark the
821 * object dirty. The only exception is that the context must be 831 * object dirty. The only exception is that the context must be
@@ -823,14 +833,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
823 * able to defer doing this until we know the object would be 833 * able to defer doing this until we know the object would be
824 * swapped, but there is no way to do that yet. 834 * swapped, but there is no way to do that yet.
825 */ 835 */
826 from->engine[RCS].state->dirty = 1; 836 obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
837 i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
827 838
828 /* obj is kept alive until the next request by its active ref */ 839 /* obj is kept alive until the next request by its active ref */
829 i915_gem_object_ggtt_unpin(from->engine[RCS].state); 840 i915_gem_object_ggtt_unpin(obj);
830 i915_gem_context_unreference(from); 841 i915_gem_context_put(from);
831 } 842 }
832 i915_gem_context_reference(to); 843 engine->last_context = i915_gem_context_get(to);
833 engine->last_context = to;
834 844
835 /* GEN8 does *not* require an explicit reload if the PDPs have been 845 /* GEN8 does *not* require an explicit reload if the PDPs have been
836 * setup, and we do not wish to move them. 846 * setup, and we do not wish to move them.
@@ -894,8 +904,9 @@ int i915_switch_context(struct drm_i915_gem_request *req)
894{ 904{
895 struct intel_engine_cs *engine = req->engine; 905 struct intel_engine_cs *engine = req->engine;
896 906
897 WARN_ON(i915.enable_execlists);
898 lockdep_assert_held(&req->i915->drm.struct_mutex); 907 lockdep_assert_held(&req->i915->drm.struct_mutex);
908 if (i915.enable_execlists)
909 return 0;
899 910
900 if (!req->ctx->engine[engine->id].state) { 911 if (!req->ctx->engine[engine->id].state) {
901 struct i915_gem_context *to = req->ctx; 912 struct i915_gem_context *to = req->ctx;
@@ -914,10 +925,9 @@ int i915_switch_context(struct drm_i915_gem_request *req)
914 } 925 }
915 926
916 if (to != engine->last_context) { 927 if (to != engine->last_context) {
917 i915_gem_context_reference(to);
918 if (engine->last_context) 928 if (engine->last_context)
919 i915_gem_context_unreference(engine->last_context); 929 i915_gem_context_put(engine->last_context);
920 engine->last_context = to; 930 engine->last_context = i915_gem_context_get(to);
921 } 931 }
922 932
923 return 0; 933 return 0;
@@ -926,6 +936,33 @@ int i915_switch_context(struct drm_i915_gem_request *req)
926 return do_rcs_switch(req); 936 return do_rcs_switch(req);
927} 937}
928 938
939int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
940{
941 struct intel_engine_cs *engine;
942
943 for_each_engine(engine, dev_priv) {
944 struct drm_i915_gem_request *req;
945 int ret;
946
947 if (engine->last_context == NULL)
948 continue;
949
950 if (engine->last_context == dev_priv->kernel_context)
951 continue;
952
953 req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
954 if (IS_ERR(req))
955 return PTR_ERR(req);
956
957 ret = i915_switch_context(req);
958 i915_add_request_no_flush(req);
959 if (ret)
960 return ret;
961 }
962
963 return 0;
964}
965
929static bool contexts_enabled(struct drm_device *dev) 966static bool contexts_enabled(struct drm_device *dev)
930{ 967{
931 return i915.enable_execlists || to_i915(dev)->hw_context_size; 968 return i915.enable_execlists || to_i915(dev)->hw_context_size;
@@ -985,7 +1022,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
985 } 1022 }
986 1023
987 idr_remove(&file_priv->context_idr, ctx->user_handle); 1024 idr_remove(&file_priv->context_idr, ctx->user_handle);
988 i915_gem_context_unreference(ctx); 1025 context_close(ctx);
989 mutex_unlock(&dev->struct_mutex); 1026 mutex_unlock(&dev->struct_mutex);
990 1027
991 DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); 1028 DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c
deleted file mode 100644
index a56516482394..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_debug.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Keith Packard <keithp@keithp.com>
25 *
26 */
27
28#include <drm/drmP.h>
29#include <drm/i915_drm.h>
30#include "i915_drv.h"
31
32#if WATCH_LISTS
33int
34i915_verify_lists(struct drm_device *dev)
35{
36 static int warned;
37 struct drm_i915_private *dev_priv = to_i915(dev);
38 struct drm_i915_gem_object *obj;
39 struct intel_engine_cs *engine;
40 int err = 0;
41
42 if (warned)
43 return 0;
44
45 for_each_engine(engine, dev_priv) {
46 list_for_each_entry(obj, &engine->active_list,
47 engine_list[engine->id]) {
48 if (obj->base.dev != dev ||
49 !atomic_read(&obj->base.refcount.refcount)) {
50 DRM_ERROR("%s: freed active obj %p\n",
51 engine->name, obj);
52 err++;
53 break;
54 } else if (!obj->active ||
55 obj->last_read_req[engine->id] == NULL) {
56 DRM_ERROR("%s: invalid active obj %p\n",
57 engine->name, obj);
58 err++;
59 } else if (obj->base.write_domain) {
60 DRM_ERROR("%s: invalid write obj %p (w %x)\n",
61 engine->name,
62 obj, obj->base.write_domain);
63 err++;
64 }
65 }
66 }
67
68 return warned = err;
69}
70#endif /* WATCH_LIST */
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 80bbe43a2e92..c60a8d5bbad0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -23,9 +23,13 @@
23 * Authors: 23 * Authors:
24 * Dave Airlie <airlied@redhat.com> 24 * Dave Airlie <airlied@redhat.com>
25 */ 25 */
26
27#include <linux/dma-buf.h>
28#include <linux/reservation.h>
29
26#include <drm/drmP.h> 30#include <drm/drmP.h>
31
27#include "i915_drv.h" 32#include "i915_drv.h"
28#include <linux/dma-buf.h>
29 33
30static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) 34static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
31{ 35{
@@ -218,25 +222,73 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
218 .end_cpu_access = i915_gem_end_cpu_access, 222 .end_cpu_access = i915_gem_end_cpu_access,
219}; 223};
220 224
225static void export_fences(struct drm_i915_gem_object *obj,
226 struct dma_buf *dma_buf)
227{
228 struct reservation_object *resv = dma_buf->resv;
229 struct drm_i915_gem_request *req;
230 unsigned long active;
231 int idx;
232
233 active = __I915_BO_ACTIVE(obj);
234 if (!active)
235 return;
236
237 /* Serialise with execbuf to prevent concurrent fence-loops */
238 mutex_lock(&obj->base.dev->struct_mutex);
239
240 /* Mark the object for future fences before racily adding old fences */
241 obj->base.dma_buf = dma_buf;
242
243 ww_mutex_lock(&resv->lock, NULL);
244
245 for_each_active(active, idx) {
246 req = i915_gem_active_get(&obj->last_read[idx],
247 &obj->base.dev->struct_mutex);
248 if (!req)
249 continue;
250
251 if (reservation_object_reserve_shared(resv) == 0)
252 reservation_object_add_shared_fence(resv, &req->fence);
253
254 i915_gem_request_put(req);
255 }
256
257 req = i915_gem_active_get(&obj->last_write,
258 &obj->base.dev->struct_mutex);
259 if (req) {
260 reservation_object_add_excl_fence(resv, &req->fence);
261 i915_gem_request_put(req);
262 }
263
264 ww_mutex_unlock(&resv->lock);
265 mutex_unlock(&obj->base.dev->struct_mutex);
266}
267
221struct dma_buf *i915_gem_prime_export(struct drm_device *dev, 268struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
222 struct drm_gem_object *gem_obj, int flags) 269 struct drm_gem_object *gem_obj, int flags)
223{ 270{
224 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 271 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
225 DEFINE_DMA_BUF_EXPORT_INFO(exp_info); 272 DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
273 struct dma_buf *dma_buf;
226 274
227 exp_info.ops = &i915_dmabuf_ops; 275 exp_info.ops = &i915_dmabuf_ops;
228 exp_info.size = gem_obj->size; 276 exp_info.size = gem_obj->size;
229 exp_info.flags = flags; 277 exp_info.flags = flags;
230 exp_info.priv = gem_obj; 278 exp_info.priv = gem_obj;
231 279
232
233 if (obj->ops->dmabuf_export) { 280 if (obj->ops->dmabuf_export) {
234 int ret = obj->ops->dmabuf_export(obj); 281 int ret = obj->ops->dmabuf_export(obj);
235 if (ret) 282 if (ret)
236 return ERR_PTR(ret); 283 return ERR_PTR(ret);
237 } 284 }
238 285
239 return dma_buf_export(&exp_info); 286 dma_buf = dma_buf_export(&exp_info);
287 if (IS_ERR(dma_buf))
288 return dma_buf;
289
290 export_fences(obj, dma_buf);
291 return dma_buf;
240} 292}
241 293
242static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) 294static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
@@ -278,8 +330,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
278 * Importing dmabuf exported from out own gem increases 330 * Importing dmabuf exported from out own gem increases
279 * refcount on gem itself instead of f_count of dmabuf. 331 * refcount on gem itself instead of f_count of dmabuf.
280 */ 332 */
281 drm_gem_object_reference(&obj->base); 333 return &i915_gem_object_get(obj)->base;
282 return &obj->base;
283 } 334 }
284 } 335 }
285 336
@@ -300,6 +351,16 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
300 i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); 351 i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
301 obj->base.import_attach = attach; 352 obj->base.import_attach = attach;
302 353
354 /* We use GTT as shorthand for a coherent domain, one that is
355 * neither in the GPU cache nor in the CPU cache, where all
356 * writes are immediately visible in memory. (That's not strictly
357 * true, but it's close! There are internal buffers such as the
358 * write-combined buffer or a delay through the chipset for GTT
359 * writes that do require us to treat GTT as a separate cache domain.)
360 */
361 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
362 obj->base.write_domain = 0;
363
303 return &obj->base; 364 return &obj->base;
304 365
305fail_detach: 366fail_detach:
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 3c1280ec7ff6..f76c06e92677 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -33,41 +33,23 @@
33#include "intel_drv.h" 33#include "intel_drv.h"
34#include "i915_trace.h" 34#include "i915_trace.h"
35 35
36static int switch_to_pinned_context(struct drm_i915_private *dev_priv) 36static bool
37gpu_is_idle(struct drm_i915_private *dev_priv)
37{ 38{
38 struct intel_engine_cs *engine; 39 struct intel_engine_cs *engine;
39 40
40 if (i915.enable_execlists)
41 return 0;
42
43 for_each_engine(engine, dev_priv) { 41 for_each_engine(engine, dev_priv) {
44 struct drm_i915_gem_request *req; 42 if (intel_engine_is_active(engine))
45 int ret; 43 return false;
46
47 if (engine->last_context == NULL)
48 continue;
49
50 if (engine->last_context == dev_priv->kernel_context)
51 continue;
52
53 req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
54 if (IS_ERR(req))
55 return PTR_ERR(req);
56
57 ret = i915_switch_context(req);
58 i915_add_request_no_flush(req);
59 if (ret)
60 return ret;
61 } 44 }
62 45
63 return 0; 46 return true;
64} 47}
65 48
66
67static bool 49static bool
68mark_free(struct i915_vma *vma, struct list_head *unwind) 50mark_free(struct i915_vma *vma, struct list_head *unwind)
69{ 51{
70 if (vma->pin_count) 52 if (i915_vma_is_pinned(vma))
71 return false; 53 return false;
72 54
73 if (WARN_ON(!list_empty(&vma->exec_list))) 55 if (WARN_ON(!list_empty(&vma->exec_list)))
@@ -79,7 +61,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
79 61
80/** 62/**
81 * i915_gem_evict_something - Evict vmas to make room for binding a new one 63 * i915_gem_evict_something - Evict vmas to make room for binding a new one
82 * @dev: drm_device
83 * @vm: address space to evict from 64 * @vm: address space to evict from
84 * @min_size: size of the desired free space 65 * @min_size: size of the desired free space
85 * @alignment: alignment constraint of the desired free space 66 * @alignment: alignment constraint of the desired free space
@@ -102,42 +83,37 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
102 * memory in e.g. the shrinker. 83 * memory in e.g. the shrinker.
103 */ 84 */
104int 85int
105i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, 86i915_gem_evict_something(struct i915_address_space *vm,
106 int min_size, unsigned alignment, unsigned cache_level, 87 u64 min_size, u64 alignment,
107 unsigned long start, unsigned long end, 88 unsigned cache_level,
89 u64 start, u64 end,
108 unsigned flags) 90 unsigned flags)
109{ 91{
110 struct list_head eviction_list, unwind_list; 92 struct drm_i915_private *dev_priv = to_i915(vm->dev);
111 struct i915_vma *vma; 93 struct list_head eviction_list;
112 int ret = 0; 94 struct list_head *phases[] = {
113 int pass = 0; 95 &vm->inactive_list,
96 &vm->active_list,
97 NULL,
98 }, **phase;
99 struct i915_vma *vma, *next;
100 int ret;
114 101
115 trace_i915_gem_evict(dev, min_size, alignment, flags); 102 trace_i915_gem_evict(vm, min_size, alignment, flags);
116 103
117 /* 104 /*
118 * The goal is to evict objects and amalgamate space in LRU order. 105 * The goal is to evict objects and amalgamate space in LRU order.
119 * The oldest idle objects reside on the inactive list, which is in 106 * The oldest idle objects reside on the inactive list, which is in
120 * retirement order. The next objects to retire are those on the (per 107 * retirement order. The next objects to retire are those in flight,
121 * ring) active list that do not have an outstanding flush. Once the 108 * on the active list, again in retirement order.
122 * hardware reports completion (the seqno is updated after the
123 * batchbuffer has been finished) the clean buffer objects would
124 * be retired to the inactive list. Any dirty objects would be added
125 * to the tail of the flushing list. So after processing the clean
126 * active objects we need to emit a MI_FLUSH to retire the flushing
127 * list, hence the retirement order of the flushing list is in
128 * advance of the dirty objects on the active lists.
129 * 109 *
130 * The retirement sequence is thus: 110 * The retirement sequence is thus:
131 * 1. Inactive objects (already retired) 111 * 1. Inactive objects (already retired)
132 * 2. Clean active objects 112 * 2. Active objects (will stall on unbinding)
133 * 3. Flushing list
134 * 4. Dirty active objects.
135 * 113 *
136 * On each list, the oldest objects lie at the HEAD with the freshest 114 * On each list, the oldest objects lie at the HEAD with the freshest
137 * object on the TAIL. 115 * object on the TAIL.
138 */ 116 */
139
140 INIT_LIST_HEAD(&unwind_list);
141 if (start != 0 || end != vm->total) { 117 if (start != 0 || end != vm->total) {
142 drm_mm_init_scan_with_range(&vm->mm, min_size, 118 drm_mm_init_scan_with_range(&vm->mm, min_size,
143 alignment, cache_level, 119 alignment, cache_level,
@@ -145,96 +121,84 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
145 } else 121 } else
146 drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); 122 drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
147 123
148search_again:
149 /* First see if there is a large enough contiguous idle region... */
150 list_for_each_entry(vma, &vm->inactive_list, vm_link) {
151 if (mark_free(vma, &unwind_list))
152 goto found;
153 }
154
155 if (flags & PIN_NONBLOCK) 124 if (flags & PIN_NONBLOCK)
156 goto none; 125 phases[1] = NULL;
157 126
158 /* Now merge in the soon-to-be-expired objects... */ 127search_again:
159 list_for_each_entry(vma, &vm->active_list, vm_link) { 128 INIT_LIST_HEAD(&eviction_list);
160 if (mark_free(vma, &unwind_list)) 129 phase = phases;
161 goto found; 130 do {
162 } 131 list_for_each_entry(vma, *phase, vm_link)
132 if (mark_free(vma, &eviction_list))
133 goto found;
134 } while (*++phase);
163 135
164none:
165 /* Nothing found, clean up and bail out! */ 136 /* Nothing found, clean up and bail out! */
166 while (!list_empty(&unwind_list)) { 137 list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
167 vma = list_first_entry(&unwind_list,
168 struct i915_vma,
169 exec_list);
170 ret = drm_mm_scan_remove_block(&vma->node); 138 ret = drm_mm_scan_remove_block(&vma->node);
171 BUG_ON(ret); 139 BUG_ON(ret);
172 140
173 list_del_init(&vma->exec_list); 141 INIT_LIST_HEAD(&vma->exec_list);
174 } 142 }
175 143
176 /* Can we unpin some objects such as idle hw contents, 144 /* Can we unpin some objects such as idle hw contents,
177 * or pending flips? 145 * or pending flips? But since only the GGTT has global entries
146 * such as scanouts, rinbuffers and contexts, we can skip the
147 * purge when inspecting per-process local address spaces.
178 */ 148 */
179 if (flags & PIN_NONBLOCK) 149 if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
180 return -ENOSPC; 150 return -ENOSPC;
181 151
182 /* Only idle the GPU and repeat the search once */ 152 if (gpu_is_idle(dev_priv)) {
183 if (pass++ == 0) { 153 /* If we still have pending pageflip completions, drop
184 struct drm_i915_private *dev_priv = to_i915(dev); 154 * back to userspace to give our workqueues time to
185 155 * acquire our locks and unpin the old scanouts.
186 if (i915_is_ggtt(vm)) { 156 */
187 ret = switch_to_pinned_context(dev_priv); 157 return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
188 if (ret)
189 return ret;
190 }
191
192 ret = i915_gem_wait_for_idle(dev_priv);
193 if (ret)
194 return ret;
195
196 i915_gem_retire_requests(dev_priv);
197 goto search_again;
198 } 158 }
199 159
200 /* If we still have pending pageflip completions, drop 160 /* Not everything in the GGTT is tracked via vma (otherwise we
201 * back to userspace to give our workqueues time to 161 * could evict as required with minimal stalling) so we are forced
202 * acquire our locks and unpin the old scanouts. 162 * to idle the GPU and explicitly retire outstanding requests in
163 * the hopes that we can then remove contexts and the like only
164 * bound by their active reference.
203 */ 165 */
204 return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC; 166 ret = i915_gem_switch_to_kernel_context(dev_priv);
167 if (ret)
168 return ret;
169
170 ret = i915_gem_wait_for_idle(dev_priv, true);
171 if (ret)
172 return ret;
173
174 i915_gem_retire_requests(dev_priv);
175 goto search_again;
205 176
206found: 177found:
207 /* drm_mm doesn't allow any other other operations while 178 /* drm_mm doesn't allow any other other operations while
208 * scanning, therefore store to be evicted objects on a 179 * scanning, therefore store to-be-evicted objects on a
209 * temporary list. */ 180 * temporary list and take a reference for all before
210 INIT_LIST_HEAD(&eviction_list); 181 * calling unbind (which may remove the active reference
211 while (!list_empty(&unwind_list)) { 182 * of any of our objects, thus corrupting the list).
212 vma = list_first_entry(&unwind_list, 183 */
213 struct i915_vma, 184 list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
214 exec_list); 185 if (drm_mm_scan_remove_block(&vma->node))
215 if (drm_mm_scan_remove_block(&vma->node)) { 186 __i915_vma_pin(vma);
216 list_move(&vma->exec_list, &eviction_list); 187 else
217 drm_gem_object_reference(&vma->obj->base); 188 list_del_init(&vma->exec_list);
218 continue;
219 }
220 list_del_init(&vma->exec_list);
221 } 189 }
222 190
223 /* Unbinding will emit any required flushes */ 191 /* Unbinding will emit any required flushes */
224 while (!list_empty(&eviction_list)) { 192 while (!list_empty(&eviction_list)) {
225 struct drm_gem_object *obj;
226 vma = list_first_entry(&eviction_list, 193 vma = list_first_entry(&eviction_list,
227 struct i915_vma, 194 struct i915_vma,
228 exec_list); 195 exec_list);
229 196
230 obj = &vma->obj->base;
231 list_del_init(&vma->exec_list); 197 list_del_init(&vma->exec_list);
198 __i915_vma_unpin(vma);
232 if (ret == 0) 199 if (ret == 0)
233 ret = i915_vma_unbind(vma); 200 ret = i915_vma_unbind(vma);
234
235 drm_gem_object_unreference(obj);
236 } 201 }
237
238 return ret; 202 return ret;
239} 203}
240 204
@@ -256,8 +220,8 @@ i915_gem_evict_for_vma(struct i915_vma *target)
256 220
257 vma = container_of(node, typeof(*vma), node); 221 vma = container_of(node, typeof(*vma), node);
258 222
259 if (vma->pin_count) { 223 if (i915_vma_is_pinned(vma)) {
260 if (!vma->exec_entry || (vma->pin_count > 1)) 224 if (!vma->exec_entry || i915_vma_pin_count(vma) > 1)
261 /* Object is pinned for some other use */ 225 /* Object is pinned for some other use */
262 return -EBUSY; 226 return -EBUSY;
263 227
@@ -303,22 +267,21 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
303 struct drm_i915_private *dev_priv = to_i915(vm->dev); 267 struct drm_i915_private *dev_priv = to_i915(vm->dev);
304 268
305 if (i915_is_ggtt(vm)) { 269 if (i915_is_ggtt(vm)) {
306 ret = switch_to_pinned_context(dev_priv); 270 ret = i915_gem_switch_to_kernel_context(dev_priv);
307 if (ret) 271 if (ret)
308 return ret; 272 return ret;
309 } 273 }
310 274
311 ret = i915_gem_wait_for_idle(dev_priv); 275 ret = i915_gem_wait_for_idle(dev_priv, true);
312 if (ret) 276 if (ret)
313 return ret; 277 return ret;
314 278
315 i915_gem_retire_requests(dev_priv); 279 i915_gem_retire_requests(dev_priv);
316
317 WARN_ON(!list_empty(&vm->active_list)); 280 WARN_ON(!list_empty(&vm->active_list));
318 } 281 }
319 282
320 list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link) 283 list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
321 if (vma->pin_count == 0) 284 if (!i915_vma_is_pinned(vma))
322 WARN_ON(i915_vma_unbind(vma)); 285 WARN_ON(i915_vma_unbind(vma));
323 286
324 return 0; 287 return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1978633e7549..c494b79ded20 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -26,21 +26,38 @@
26 * 26 *
27 */ 27 */
28 28
29#include <linux/dma_remapping.h>
30#include <linux/reservation.h>
31#include <linux/uaccess.h>
32
29#include <drm/drmP.h> 33#include <drm/drmP.h>
30#include <drm/i915_drm.h> 34#include <drm/i915_drm.h>
35
31#include "i915_drv.h" 36#include "i915_drv.h"
37#include "i915_gem_dmabuf.h"
32#include "i915_trace.h" 38#include "i915_trace.h"
33#include "intel_drv.h" 39#include "intel_drv.h"
34#include <linux/dma_remapping.h> 40#include "intel_frontbuffer.h"
35#include <linux/uaccess.h>
36 41
37#define __EXEC_OBJECT_HAS_PIN (1<<31) 42#define __EXEC_OBJECT_HAS_PIN (1<<31)
38#define __EXEC_OBJECT_HAS_FENCE (1<<30) 43#define __EXEC_OBJECT_HAS_FENCE (1<<30)
39#define __EXEC_OBJECT_NEEDS_MAP (1<<29) 44#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
40#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) 45#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
46#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
41 47
42#define BATCH_OFFSET_BIAS (256*1024) 48#define BATCH_OFFSET_BIAS (256*1024)
43 49
50struct i915_execbuffer_params {
51 struct drm_device *dev;
52 struct drm_file *file;
53 struct i915_vma *batch;
54 u32 dispatch_flags;
55 u32 args_batch_start_offset;
56 struct intel_engine_cs *engine;
57 struct i915_gem_context *ctx;
58 struct drm_i915_gem_request *request;
59};
60
44struct eb_vmas { 61struct eb_vmas {
45 struct list_head vmas; 62 struct list_head vmas;
46 int and; 63 int and;
@@ -89,6 +106,26 @@ eb_reset(struct eb_vmas *eb)
89 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 106 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
90} 107}
91 108
109static struct i915_vma *
110eb_get_batch(struct eb_vmas *eb)
111{
112 struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
113
114 /*
115 * SNA is doing fancy tricks with compressing batch buffers, which leads
116 * to negative relocation deltas. Usually that works out ok since the
117 * relocate address is still positive, except when the batch is placed
118 * very low in the GTT. Ensure this doesn't happen.
119 *
120 * Note that actual hangs have only been observed on gen7, but for
121 * paranoia do it everywhere.
122 */
123 if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
124 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
125
126 return vma;
127}
128
92static int 129static int
93eb_lookup_vmas(struct eb_vmas *eb, 130eb_lookup_vmas(struct eb_vmas *eb,
94 struct drm_i915_gem_exec_object2 *exec, 131 struct drm_i915_gem_exec_object2 *exec,
@@ -122,7 +159,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
122 goto err; 159 goto err;
123 } 160 }
124 161
125 drm_gem_object_reference(&obj->base); 162 i915_gem_object_get(obj);
126 list_add_tail(&obj->obj_exec_link, &objects); 163 list_add_tail(&obj->obj_exec_link, &objects);
127 } 164 }
128 spin_unlock(&file->table_lock); 165 spin_unlock(&file->table_lock);
@@ -175,7 +212,7 @@ err:
175 struct drm_i915_gem_object, 212 struct drm_i915_gem_object,
176 obj_exec_link); 213 obj_exec_link);
177 list_del_init(&obj->obj_exec_link); 214 list_del_init(&obj->obj_exec_link);
178 drm_gem_object_unreference(&obj->base); 215 i915_gem_object_put(obj);
179 } 216 }
180 /* 217 /*
181 * Objects already transfered to the vmas list will be unreferenced by 218 * Objects already transfered to the vmas list will be unreferenced by
@@ -219,7 +256,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
219 i915_gem_object_unpin_fence(obj); 256 i915_gem_object_unpin_fence(obj);
220 257
221 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 258 if (entry->flags & __EXEC_OBJECT_HAS_PIN)
222 vma->pin_count--; 259 __i915_vma_unpin(vma);
223 260
224 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 261 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
225} 262}
@@ -234,7 +271,7 @@ static void eb_destroy(struct eb_vmas *eb)
234 exec_list); 271 exec_list);
235 list_del_init(&vma->exec_list); 272 list_del_init(&vma->exec_list);
236 i915_gem_execbuffer_unreserve_vma(vma); 273 i915_gem_execbuffer_unreserve_vma(vma);
237 drm_gem_object_unreference(&vma->obj->base); 274 i915_gem_object_put(vma->obj);
238 } 275 }
239 kfree(eb); 276 kfree(eb);
240} 277}
@@ -399,6 +436,20 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
399 return 0; 436 return 0;
400} 437}
401 438
439static bool object_is_idle(struct drm_i915_gem_object *obj)
440{
441 unsigned long active = i915_gem_object_get_active(obj);
442 int idx;
443
444 for_each_active(active, idx) {
445 if (!i915_gem_active_is_idle(&obj->last_read[idx],
446 &obj->base.dev->struct_mutex))
447 return false;
448 }
449
450 return true;
451}
452
402static int 453static int
403i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 454i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
404 struct eb_vmas *eb, 455 struct eb_vmas *eb,
@@ -482,7 +533,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
482 } 533 }
483 534
484 /* We can't wait for rendering with pagefaults disabled */ 535 /* We can't wait for rendering with pagefaults disabled */
485 if (obj->active && pagefault_disabled()) 536 if (pagefault_disabled() && !object_is_idle(obj))
486 return -EFAULT; 537 return -EFAULT;
487 538
488 if (use_cpu_reloc(obj)) 539 if (use_cpu_reloc(obj))
@@ -626,12 +677,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
626 flags |= PIN_HIGH; 677 flags |= PIN_HIGH;
627 } 678 }
628 679
629 ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); 680 ret = i915_vma_pin(vma,
630 if ((ret == -ENOSPC || ret == -E2BIG) && 681 entry->pad_to_size,
682 entry->alignment,
683 flags);
684 if ((ret == -ENOSPC || ret == -E2BIG) &&
631 only_mappable_for_reloc(entry->flags)) 685 only_mappable_for_reloc(entry->flags))
632 ret = i915_gem_object_pin(obj, vma->vm, 686 ret = i915_vma_pin(vma,
633 entry->alignment, 687 entry->pad_to_size,
634 flags & ~PIN_MAPPABLE); 688 entry->alignment,
689 flags & ~PIN_MAPPABLE);
635 if (ret) 690 if (ret)
636 return ret; 691 return ret;
637 692
@@ -667,7 +722,7 @@ need_reloc_mappable(struct i915_vma *vma)
667 if (entry->relocation_count == 0) 722 if (entry->relocation_count == 0)
668 return false; 723 return false;
669 724
670 if (!vma->is_ggtt) 725 if (!i915_vma_is_ggtt(vma))
671 return false; 726 return false;
672 727
673 /* See also use_cpu_reloc() */ 728 /* See also use_cpu_reloc() */
@@ -686,12 +741,16 @@ eb_vma_misplaced(struct i915_vma *vma)
686 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 741 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
687 struct drm_i915_gem_object *obj = vma->obj; 742 struct drm_i915_gem_object *obj = vma->obj;
688 743
689 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); 744 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
745 !i915_vma_is_ggtt(vma));
690 746
691 if (entry->alignment && 747 if (entry->alignment &&
692 vma->node.start & (entry->alignment - 1)) 748 vma->node.start & (entry->alignment - 1))
693 return true; 749 return true;
694 750
751 if (vma->node.size < entry->pad_to_size)
752 return true;
753
695 if (entry->flags & EXEC_OBJECT_PINNED && 754 if (entry->flags & EXEC_OBJECT_PINNED &&
696 vma->node.start != entry->offset) 755 vma->node.start != entry->offset)
697 return true; 756 return true;
@@ -725,8 +784,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
725 bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; 784 bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
726 int retry; 785 int retry;
727 786
728 i915_gem_retire_requests_ring(engine);
729
730 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; 787 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
731 788
732 INIT_LIST_HEAD(&ordered_vmas); 789 INIT_LIST_HEAD(&ordered_vmas);
@@ -746,7 +803,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
746 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 803 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
747 need_fence = 804 need_fence =
748 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 805 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
749 obj->tiling_mode != I915_TILING_NONE; 806 i915_gem_object_is_tiled(obj);
750 need_mappable = need_fence || need_reloc_mappable(vma); 807 need_mappable = need_fence || need_reloc_mappable(vma);
751 808
752 if (entry->flags & EXEC_OBJECT_PINNED) 809 if (entry->flags & EXEC_OBJECT_PINNED)
@@ -843,7 +900,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
843 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); 900 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
844 list_del_init(&vma->exec_list); 901 list_del_init(&vma->exec_list);
845 i915_gem_execbuffer_unreserve_vma(vma); 902 i915_gem_execbuffer_unreserve_vma(vma);
846 drm_gem_object_unreference(&vma->obj->base); 903 i915_gem_object_put(vma->obj);
847 } 904 }
848 905
849 mutex_unlock(&dev->struct_mutex); 906 mutex_unlock(&dev->struct_mutex);
@@ -937,11 +994,21 @@ err:
937 return ret; 994 return ret;
938} 995}
939 996
997static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
998{
999 unsigned int mask;
1000
1001 mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
1002 mask <<= I915_BO_ACTIVE_SHIFT;
1003
1004 return mask;
1005}
1006
940static int 1007static int
941i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, 1008i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
942 struct list_head *vmas) 1009 struct list_head *vmas)
943{ 1010{
944 const unsigned other_rings = ~intel_engine_flag(req->engine); 1011 const unsigned int other_rings = eb_other_engines(req);
945 struct i915_vma *vma; 1012 struct i915_vma *vma;
946 uint32_t flush_domains = 0; 1013 uint32_t flush_domains = 0;
947 bool flush_chipset = false; 1014 bool flush_chipset = false;
@@ -950,8 +1017,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
950 list_for_each_entry(vma, vmas, exec_list) { 1017 list_for_each_entry(vma, vmas, exec_list) {
951 struct drm_i915_gem_object *obj = vma->obj; 1018 struct drm_i915_gem_object *obj = vma->obj;
952 1019
953 if (obj->active & other_rings) { 1020 if (obj->flags & other_rings) {
954 ret = i915_gem_object_sync(obj, req->engine, &req); 1021 ret = i915_gem_object_sync(obj, req);
955 if (ret) 1022 if (ret)
956 return ret; 1023 return ret;
957 } 1024 }
@@ -968,10 +1035,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
968 if (flush_domains & I915_GEM_DOMAIN_GTT) 1035 if (flush_domains & I915_GEM_DOMAIN_GTT)
969 wmb(); 1036 wmb();
970 1037
971 /* Unconditionally invalidate gpu caches and ensure that we do flush 1038 /* Unconditionally invalidate GPU caches and TLBs. */
972 * any residual writes from the previous batch. 1039 return req->engine->emit_flush(req, EMIT_INVALIDATE);
973 */
974 return intel_ring_invalidate_all_caches(req);
975} 1040}
976 1041
977static bool 1042static bool
@@ -1007,6 +1072,9 @@ validate_exec_list(struct drm_device *dev,
1007 unsigned invalid_flags; 1072 unsigned invalid_flags;
1008 int i; 1073 int i;
1009 1074
1075 /* INTERNAL flags must not overlap with external ones */
1076 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1077
1010 invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 1078 invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1011 if (USES_FULL_PPGTT(dev)) 1079 if (USES_FULL_PPGTT(dev))
1012 invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 1080 invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -1036,6 +1104,14 @@ validate_exec_list(struct drm_device *dev,
1036 if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) 1104 if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1037 return -EINVAL; 1105 return -EINVAL;
1038 1106
1107 /* pad_to_size was once a reserved field, so sanitize it */
1108 if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1109 if (offset_in_page(exec[i].pad_to_size))
1110 return -EINVAL;
1111 } else {
1112 exec[i].pad_to_size = 0;
1113 }
1114
1039 /* First check for malicious input causing overflow in 1115 /* First check for malicious input causing overflow in
1040 * the worst case where we need to allocate the entire 1116 * the worst case where we need to allocate the entire
1041 * relocation tree as a single array. 1117 * relocation tree as a single array.
@@ -1086,66 +1162,106 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1086 return ctx; 1162 return ctx;
1087} 1163}
1088 1164
1089void 1165void i915_vma_move_to_active(struct i915_vma *vma,
1166 struct drm_i915_gem_request *req,
1167 unsigned int flags)
1168{
1169 struct drm_i915_gem_object *obj = vma->obj;
1170 const unsigned int idx = req->engine->id;
1171
1172 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1173
1174 obj->dirty = 1; /* be paranoid */
1175
1176 /* Add a reference if we're newly entering the active list.
1177 * The order in which we add operations to the retirement queue is
1178 * vital here: mark_active adds to the start of the callback list,
1179 * such that subsequent callbacks are called first. Therefore we
1180 * add the active reference first and queue for it to be dropped
1181 * *last*.
1182 */
1183 if (!i915_gem_object_is_active(obj))
1184 i915_gem_object_get(obj);
1185 i915_gem_object_set_active(obj, idx);
1186 i915_gem_active_set(&obj->last_read[idx], req);
1187
1188 if (flags & EXEC_OBJECT_WRITE) {
1189 i915_gem_active_set(&obj->last_write, req);
1190
1191 intel_fb_obj_invalidate(obj, ORIGIN_CS);
1192
1193 /* update for the implicit flush after a batch */
1194 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1195 }
1196
1197 if (flags & EXEC_OBJECT_NEEDS_FENCE) {
1198 i915_gem_active_set(&obj->last_fence, req);
1199 if (flags & __EXEC_OBJECT_HAS_FENCE) {
1200 struct drm_i915_private *dev_priv = req->i915;
1201
1202 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1203 &dev_priv->mm.fence_list);
1204 }
1205 }
1206
1207 i915_vma_set_active(vma, idx);
1208 i915_gem_active_set(&vma->last_read[idx], req);
1209 list_move_tail(&vma->vm_link, &vma->vm->active_list);
1210}
1211
1212static void eb_export_fence(struct drm_i915_gem_object *obj,
1213 struct drm_i915_gem_request *req,
1214 unsigned int flags)
1215{
1216 struct reservation_object *resv;
1217
1218 resv = i915_gem_object_get_dmabuf_resv(obj);
1219 if (!resv)
1220 return;
1221
1222 /* Ignore errors from failing to allocate the new fence, we can't
1223 * handle an error right now. Worst case should be missed
1224 * synchronisation leading to rendering corruption.
1225 */
1226 ww_mutex_lock(&resv->lock, NULL);
1227 if (flags & EXEC_OBJECT_WRITE)
1228 reservation_object_add_excl_fence(resv, &req->fence);
1229 else if (reservation_object_reserve_shared(resv) == 0)
1230 reservation_object_add_shared_fence(resv, &req->fence);
1231 ww_mutex_unlock(&resv->lock);
1232}
1233
1234static void
1090i915_gem_execbuffer_move_to_active(struct list_head *vmas, 1235i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1091 struct drm_i915_gem_request *req) 1236 struct drm_i915_gem_request *req)
1092{ 1237{
1093 struct intel_engine_cs *engine = i915_gem_request_get_engine(req);
1094 struct i915_vma *vma; 1238 struct i915_vma *vma;
1095 1239
1096 list_for_each_entry(vma, vmas, exec_list) { 1240 list_for_each_entry(vma, vmas, exec_list) {
1097 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
1098 struct drm_i915_gem_object *obj = vma->obj; 1241 struct drm_i915_gem_object *obj = vma->obj;
1099 u32 old_read = obj->base.read_domains; 1242 u32 old_read = obj->base.read_domains;
1100 u32 old_write = obj->base.write_domain; 1243 u32 old_write = obj->base.write_domain;
1101 1244
1102 obj->dirty = 1; /* be paranoid */
1103 obj->base.write_domain = obj->base.pending_write_domain; 1245 obj->base.write_domain = obj->base.pending_write_domain;
1104 if (obj->base.write_domain == 0) 1246 if (obj->base.write_domain)
1247 vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
1248 else
1105 obj->base.pending_read_domains |= obj->base.read_domains; 1249 obj->base.pending_read_domains |= obj->base.read_domains;
1106 obj->base.read_domains = obj->base.pending_read_domains; 1250 obj->base.read_domains = obj->base.pending_read_domains;
1107 1251
1108 i915_vma_move_to_active(vma, req); 1252 i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
1109 if (obj->base.write_domain) { 1253 eb_export_fence(obj, req, vma->exec_entry->flags);
1110 i915_gem_request_assign(&obj->last_write_req, req);
1111
1112 intel_fb_obj_invalidate(obj, ORIGIN_CS);
1113
1114 /* update for the implicit flush after a batch */
1115 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1116 }
1117 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
1118 i915_gem_request_assign(&obj->last_fenced_req, req);
1119 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
1120 struct drm_i915_private *dev_priv = engine->i915;
1121 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1122 &dev_priv->mm.fence_list);
1123 }
1124 }
1125
1126 trace_i915_gem_object_change_domain(obj, old_read, old_write); 1254 trace_i915_gem_object_change_domain(obj, old_read, old_write);
1127 } 1255 }
1128} 1256}
1129 1257
1130static void
1131i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
1132{
1133 /* Unconditionally force add_request to emit a full flush. */
1134 params->engine->gpu_caches_dirty = true;
1135
1136 /* Add a breadcrumb for the completion of the batch buffer */
1137 __i915_add_request(params->request, params->batch_obj, true);
1138}
1139
1140static int 1258static int
1141i915_reset_gen7_sol_offsets(struct drm_device *dev, 1259i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1142 struct drm_i915_gem_request *req)
1143{ 1260{
1144 struct intel_engine_cs *engine = req->engine; 1261 struct intel_ring *ring = req->ring;
1145 struct drm_i915_private *dev_priv = to_i915(dev);
1146 int ret, i; 1262 int ret, i;
1147 1263
1148 if (!IS_GEN7(dev) || engine != &dev_priv->engine[RCS]) { 1264 if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
1149 DRM_DEBUG("sol reset is gen7/rcs only\n"); 1265 DRM_DEBUG("sol reset is gen7/rcs only\n");
1150 return -EINVAL; 1266 return -EINVAL;
1151 } 1267 }
@@ -1155,21 +1271,21 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
1155 return ret; 1271 return ret;
1156 1272
1157 for (i = 0; i < 4; i++) { 1273 for (i = 0; i < 4; i++) {
1158 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 1274 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1159 intel_ring_emit_reg(engine, GEN7_SO_WRITE_OFFSET(i)); 1275 intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1160 intel_ring_emit(engine, 0); 1276 intel_ring_emit(ring, 0);
1161 } 1277 }
1162 1278
1163 intel_ring_advance(engine); 1279 intel_ring_advance(ring);
1164 1280
1165 return 0; 1281 return 0;
1166} 1282}
1167 1283
1168static struct drm_i915_gem_object* 1284static struct i915_vma*
1169i915_gem_execbuffer_parse(struct intel_engine_cs *engine, 1285i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1170 struct drm_i915_gem_exec_object2 *shadow_exec_entry, 1286 struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1171 struct eb_vmas *eb,
1172 struct drm_i915_gem_object *batch_obj, 1287 struct drm_i915_gem_object *batch_obj,
1288 struct eb_vmas *eb,
1173 u32 batch_start_offset, 1289 u32 batch_start_offset,
1174 u32 batch_len, 1290 u32 batch_len,
1175 bool is_master) 1291 bool is_master)
@@ -1181,18 +1297,18 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1181 shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, 1297 shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
1182 PAGE_ALIGN(batch_len)); 1298 PAGE_ALIGN(batch_len));
1183 if (IS_ERR(shadow_batch_obj)) 1299 if (IS_ERR(shadow_batch_obj))
1184 return shadow_batch_obj; 1300 return ERR_CAST(shadow_batch_obj);
1185 1301
1186 ret = i915_parse_cmds(engine, 1302 ret = intel_engine_cmd_parser(engine,
1187 batch_obj, 1303 batch_obj,
1188 shadow_batch_obj, 1304 shadow_batch_obj,
1189 batch_start_offset, 1305 batch_start_offset,
1190 batch_len, 1306 batch_len,
1191 is_master); 1307 is_master);
1192 if (ret) 1308 if (ret)
1193 goto err; 1309 goto err;
1194 1310
1195 ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); 1311 ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1196 if (ret) 1312 if (ret)
1197 goto err; 1313 goto err;
1198 1314
@@ -1203,29 +1319,25 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1203 vma = i915_gem_obj_to_ggtt(shadow_batch_obj); 1319 vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1204 vma->exec_entry = shadow_exec_entry; 1320 vma->exec_entry = shadow_exec_entry;
1205 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; 1321 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1206 drm_gem_object_reference(&shadow_batch_obj->base); 1322 i915_gem_object_get(shadow_batch_obj);
1207 list_add_tail(&vma->exec_list, &eb->vmas); 1323 list_add_tail(&vma->exec_list, &eb->vmas);
1208 1324
1209 shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; 1325 return vma;
1210
1211 return shadow_batch_obj;
1212 1326
1213err: 1327err:
1214 i915_gem_object_unpin_pages(shadow_batch_obj); 1328 i915_gem_object_unpin_pages(shadow_batch_obj);
1215 if (ret == -EACCES) /* unhandled chained batch */ 1329 if (ret == -EACCES) /* unhandled chained batch */
1216 return batch_obj; 1330 return NULL;
1217 else 1331 else
1218 return ERR_PTR(ret); 1332 return ERR_PTR(ret);
1219} 1333}
1220 1334
1221int 1335static int
1222i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, 1336execbuf_submit(struct i915_execbuffer_params *params,
1223 struct drm_i915_gem_execbuffer2 *args, 1337 struct drm_i915_gem_execbuffer2 *args,
1224 struct list_head *vmas) 1338 struct list_head *vmas)
1225{ 1339{
1226 struct drm_device *dev = params->dev; 1340 struct drm_i915_private *dev_priv = params->request->i915;
1227 struct intel_engine_cs *engine = params->engine;
1228 struct drm_i915_private *dev_priv = to_i915(dev);
1229 u64 exec_start, exec_len; 1341 u64 exec_start, exec_len;
1230 int instp_mode; 1342 int instp_mode;
1231 u32 instp_mask; 1343 u32 instp_mask;
@@ -1239,34 +1351,31 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
1239 if (ret) 1351 if (ret)
1240 return ret; 1352 return ret;
1241 1353
1242 WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<engine->id),
1243 "%s didn't clear reload\n", engine->name);
1244
1245 instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1354 instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1246 instp_mask = I915_EXEC_CONSTANTS_MASK; 1355 instp_mask = I915_EXEC_CONSTANTS_MASK;
1247 switch (instp_mode) { 1356 switch (instp_mode) {
1248 case I915_EXEC_CONSTANTS_REL_GENERAL: 1357 case I915_EXEC_CONSTANTS_REL_GENERAL:
1249 case I915_EXEC_CONSTANTS_ABSOLUTE: 1358 case I915_EXEC_CONSTANTS_ABSOLUTE:
1250 case I915_EXEC_CONSTANTS_REL_SURFACE: 1359 case I915_EXEC_CONSTANTS_REL_SURFACE:
1251 if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { 1360 if (instp_mode != 0 && params->engine->id != RCS) {
1252 DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); 1361 DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1253 return -EINVAL; 1362 return -EINVAL;
1254 } 1363 }
1255 1364
1256 if (instp_mode != dev_priv->relative_constants_mode) { 1365 if (instp_mode != dev_priv->relative_constants_mode) {
1257 if (INTEL_INFO(dev)->gen < 4) { 1366 if (INTEL_INFO(dev_priv)->gen < 4) {
1258 DRM_DEBUG("no rel constants on pre-gen4\n"); 1367 DRM_DEBUG("no rel constants on pre-gen4\n");
1259 return -EINVAL; 1368 return -EINVAL;
1260 } 1369 }
1261 1370
1262 if (INTEL_INFO(dev)->gen > 5 && 1371 if (INTEL_INFO(dev_priv)->gen > 5 &&
1263 instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1372 instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1264 DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); 1373 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1265 return -EINVAL; 1374 return -EINVAL;
1266 } 1375 }
1267 1376
1268 /* The HW changed the meaning on this bit on gen6 */ 1377 /* The HW changed the meaning on this bit on gen6 */
1269 if (INTEL_INFO(dev)->gen >= 6) 1378 if (INTEL_INFO(dev_priv)->gen >= 6)
1270 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1379 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1271 } 1380 }
1272 break; 1381 break;
@@ -1275,37 +1384,39 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
1275 return -EINVAL; 1384 return -EINVAL;
1276 } 1385 }
1277 1386
1278 if (engine == &dev_priv->engine[RCS] && 1387 if (params->engine->id == RCS &&
1279 instp_mode != dev_priv->relative_constants_mode) { 1388 instp_mode != dev_priv->relative_constants_mode) {
1389 struct intel_ring *ring = params->request->ring;
1390
1280 ret = intel_ring_begin(params->request, 4); 1391 ret = intel_ring_begin(params->request, 4);
1281 if (ret) 1392 if (ret)
1282 return ret; 1393 return ret;
1283 1394
1284 intel_ring_emit(engine, MI_NOOP); 1395 intel_ring_emit(ring, MI_NOOP);
1285 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 1396 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1286 intel_ring_emit_reg(engine, INSTPM); 1397 intel_ring_emit_reg(ring, INSTPM);
1287 intel_ring_emit(engine, instp_mask << 16 | instp_mode); 1398 intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1288 intel_ring_advance(engine); 1399 intel_ring_advance(ring);
1289 1400
1290 dev_priv->relative_constants_mode = instp_mode; 1401 dev_priv->relative_constants_mode = instp_mode;
1291 } 1402 }
1292 1403
1293 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1404 if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1294 ret = i915_reset_gen7_sol_offsets(dev, params->request); 1405 ret = i915_reset_gen7_sol_offsets(params->request);
1295 if (ret) 1406 if (ret)
1296 return ret; 1407 return ret;
1297 } 1408 }
1298 1409
1299 exec_len = args->batch_len; 1410 exec_len = args->batch_len;
1300 exec_start = params->batch_obj_vm_offset + 1411 exec_start = params->batch->node.start +
1301 params->args_batch_start_offset; 1412 params->args_batch_start_offset;
1302 1413
1303 if (exec_len == 0) 1414 if (exec_len == 0)
1304 exec_len = params->batch_obj->base.size; 1415 exec_len = params->batch->size;
1305 1416
1306 ret = engine->dispatch_execbuffer(params->request, 1417 ret = params->engine->emit_bb_start(params->request,
1307 exec_start, exec_len, 1418 exec_start, exec_len,
1308 params->dispatch_flags); 1419 params->dispatch_flags);
1309 if (ret) 1420 if (ret)
1310 return ret; 1421 return ret;
1311 1422
@@ -1318,43 +1429,24 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
1318 1429
1319/** 1430/**
1320 * Find one BSD ring to dispatch the corresponding BSD command. 1431 * Find one BSD ring to dispatch the corresponding BSD command.
1321 * The ring index is returned. 1432 * The engine index is returned.
1322 */ 1433 */
1323static unsigned int 1434static unsigned int
1324gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) 1435gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1436 struct drm_file *file)
1325{ 1437{
1326 struct drm_i915_file_private *file_priv = file->driver_priv; 1438 struct drm_i915_file_private *file_priv = file->driver_priv;
1327 1439
1328 /* Check whether the file_priv has already selected one ring. */ 1440 /* Check whether the file_priv has already selected one ring. */
1329 if ((int)file_priv->bsd_ring < 0) { 1441 if ((int)file_priv->bsd_engine < 0) {
1330 /* If not, use the ping-pong mechanism to select one. */ 1442 /* If not, use the ping-pong mechanism to select one. */
1331 mutex_lock(&dev_priv->drm.struct_mutex); 1443 mutex_lock(&dev_priv->drm.struct_mutex);
1332 file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index; 1444 file_priv->bsd_engine = dev_priv->mm.bsd_engine_dispatch_index;
1333 dev_priv->mm.bsd_ring_dispatch_index ^= 1; 1445 dev_priv->mm.bsd_engine_dispatch_index ^= 1;
1334 mutex_unlock(&dev_priv->drm.struct_mutex); 1446 mutex_unlock(&dev_priv->drm.struct_mutex);
1335 } 1447 }
1336 1448
1337 return file_priv->bsd_ring; 1449 return file_priv->bsd_engine;
1338}
1339
1340static struct drm_i915_gem_object *
1341eb_get_batch(struct eb_vmas *eb)
1342{
1343 struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1344
1345 /*
1346 * SNA is doing fancy tricks with compressing batch buffers, which leads
1347 * to negative relocation deltas. Usually that works out ok since the
1348 * relocate address is still positive, except when the batch is placed
1349 * very low in the GTT. Ensure this doesn't happen.
1350 *
1351 * Note that actual hangs have only been observed on gen7, but for
1352 * paranoia do it everywhere.
1353 */
1354 if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
1355 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1356
1357 return vma->obj;
1358} 1450}
1359 1451
1360#define I915_USER_RINGS (4) 1452#define I915_USER_RINGS (4)
@@ -1367,31 +1459,31 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1367 [I915_EXEC_VEBOX] = VECS 1459 [I915_EXEC_VEBOX] = VECS
1368}; 1460};
1369 1461
1370static int 1462static struct intel_engine_cs *
1371eb_select_ring(struct drm_i915_private *dev_priv, 1463eb_select_engine(struct drm_i915_private *dev_priv,
1372 struct drm_file *file, 1464 struct drm_file *file,
1373 struct drm_i915_gem_execbuffer2 *args, 1465 struct drm_i915_gem_execbuffer2 *args)
1374 struct intel_engine_cs **ring)
1375{ 1466{
1376 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; 1467 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
1468 struct intel_engine_cs *engine;
1377 1469
1378 if (user_ring_id > I915_USER_RINGS) { 1470 if (user_ring_id > I915_USER_RINGS) {
1379 DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); 1471 DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1380 return -EINVAL; 1472 return NULL;
1381 } 1473 }
1382 1474
1383 if ((user_ring_id != I915_EXEC_BSD) && 1475 if ((user_ring_id != I915_EXEC_BSD) &&
1384 ((args->flags & I915_EXEC_BSD_MASK) != 0)) { 1476 ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1385 DRM_DEBUG("execbuf with non bsd ring but with invalid " 1477 DRM_DEBUG("execbuf with non bsd ring but with invalid "
1386 "bsd dispatch flags: %d\n", (int)(args->flags)); 1478 "bsd dispatch flags: %d\n", (int)(args->flags));
1387 return -EINVAL; 1479 return NULL;
1388 } 1480 }
1389 1481
1390 if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { 1482 if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1391 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; 1483 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1392 1484
1393 if (bsd_idx == I915_EXEC_BSD_DEFAULT) { 1485 if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1394 bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file); 1486 bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
1395 } else if (bsd_idx >= I915_EXEC_BSD_RING1 && 1487 } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1396 bsd_idx <= I915_EXEC_BSD_RING2) { 1488 bsd_idx <= I915_EXEC_BSD_RING2) {
1397 bsd_idx >>= I915_EXEC_BSD_SHIFT; 1489 bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -1399,20 +1491,20 @@ eb_select_ring(struct drm_i915_private *dev_priv,
1399 } else { 1491 } else {
1400 DRM_DEBUG("execbuf with unknown bsd ring: %u\n", 1492 DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1401 bsd_idx); 1493 bsd_idx);
1402 return -EINVAL; 1494 return NULL;
1403 } 1495 }
1404 1496
1405 *ring = &dev_priv->engine[_VCS(bsd_idx)]; 1497 engine = &dev_priv->engine[_VCS(bsd_idx)];
1406 } else { 1498 } else {
1407 *ring = &dev_priv->engine[user_ring_map[user_ring_id]]; 1499 engine = &dev_priv->engine[user_ring_map[user_ring_id]];
1408 } 1500 }
1409 1501
1410 if (!intel_engine_initialized(*ring)) { 1502 if (!intel_engine_initialized(engine)) {
1411 DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); 1503 DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1412 return -EINVAL; 1504 return NULL;
1413 } 1505 }
1414 1506
1415 return 0; 1507 return engine;
1416} 1508}
1417 1509
1418static int 1510static int
@@ -1423,9 +1515,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1423{ 1515{
1424 struct drm_i915_private *dev_priv = to_i915(dev); 1516 struct drm_i915_private *dev_priv = to_i915(dev);
1425 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1517 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1426 struct drm_i915_gem_request *req = NULL;
1427 struct eb_vmas *eb; 1518 struct eb_vmas *eb;
1428 struct drm_i915_gem_object *batch_obj;
1429 struct drm_i915_gem_exec_object2 shadow_exec_entry; 1519 struct drm_i915_gem_exec_object2 shadow_exec_entry;
1430 struct intel_engine_cs *engine; 1520 struct intel_engine_cs *engine;
1431 struct i915_gem_context *ctx; 1521 struct i915_gem_context *ctx;
@@ -1454,9 +1544,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1454 if (args->flags & I915_EXEC_IS_PINNED) 1544 if (args->flags & I915_EXEC_IS_PINNED)
1455 dispatch_flags |= I915_DISPATCH_PINNED; 1545 dispatch_flags |= I915_DISPATCH_PINNED;
1456 1546
1457 ret = eb_select_ring(dev_priv, file, args, &engine); 1547 engine = eb_select_engine(dev_priv, file, args);
1458 if (ret) 1548 if (!engine)
1459 return ret; 1549 return -EINVAL;
1460 1550
1461 if (args->buffer_count < 1) { 1551 if (args->buffer_count < 1) {
1462 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1552 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
@@ -1496,7 +1586,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1496 goto pre_mutex_err; 1586 goto pre_mutex_err;
1497 } 1587 }
1498 1588
1499 i915_gem_context_reference(ctx); 1589 i915_gem_context_get(ctx);
1500 1590
1501 if (ctx->ppgtt) 1591 if (ctx->ppgtt)
1502 vm = &ctx->ppgtt->base; 1592 vm = &ctx->ppgtt->base;
@@ -1507,7 +1597,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1507 1597
1508 eb = eb_create(args); 1598 eb = eb_create(args);
1509 if (eb == NULL) { 1599 if (eb == NULL) {
1510 i915_gem_context_unreference(ctx); 1600 i915_gem_context_put(ctx);
1511 mutex_unlock(&dev->struct_mutex); 1601 mutex_unlock(&dev->struct_mutex);
1512 ret = -ENOMEM; 1602 ret = -ENOMEM;
1513 goto pre_mutex_err; 1603 goto pre_mutex_err;
@@ -1519,7 +1609,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1519 goto err; 1609 goto err;
1520 1610
1521 /* take note of the batch buffer before we might reorder the lists */ 1611 /* take note of the batch buffer before we might reorder the lists */
1522 batch_obj = eb_get_batch(eb); 1612 params->batch = eb_get_batch(eb);
1523 1613
1524 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1614 /* Move the objects en-masse into the GTT, evicting if necessary. */
1525 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1615 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
@@ -1543,34 +1633,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1543 } 1633 }
1544 1634
1545 /* Set the pending read domains for the batch buffer to COMMAND */ 1635 /* Set the pending read domains for the batch buffer to COMMAND */
1546 if (batch_obj->base.pending_write_domain) { 1636 if (params->batch->obj->base.pending_write_domain) {
1547 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1637 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1548 ret = -EINVAL; 1638 ret = -EINVAL;
1549 goto err; 1639 goto err;
1550 } 1640 }
1551 1641
1552 params->args_batch_start_offset = args->batch_start_offset; 1642 params->args_batch_start_offset = args->batch_start_offset;
1553 if (i915_needs_cmd_parser(engine) && args->batch_len) { 1643 if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
1554 struct drm_i915_gem_object *parsed_batch_obj; 1644 struct i915_vma *vma;
1555 1645
1556 parsed_batch_obj = i915_gem_execbuffer_parse(engine, 1646 vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
1557 &shadow_exec_entry, 1647 params->batch->obj,
1558 eb, 1648 eb,
1559 batch_obj, 1649 args->batch_start_offset,
1560 args->batch_start_offset, 1650 args->batch_len,
1561 args->batch_len, 1651 drm_is_current_master(file));
1562 drm_is_current_master(file)); 1652 if (IS_ERR(vma)) {
1563 if (IS_ERR(parsed_batch_obj)) { 1653 ret = PTR_ERR(vma);
1564 ret = PTR_ERR(parsed_batch_obj);
1565 goto err; 1654 goto err;
1566 } 1655 }
1567 1656
1568 /* 1657 if (vma) {
1569 * parsed_batch_obj == batch_obj means batch not fully parsed:
1570 * Accept, but don't promote to secure.
1571 */
1572
1573 if (parsed_batch_obj != batch_obj) {
1574 /* 1658 /*
1575 * Batch parsed and accepted: 1659 * Batch parsed and accepted:
1576 * 1660 *
@@ -1582,16 +1666,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1582 */ 1666 */
1583 dispatch_flags |= I915_DISPATCH_SECURE; 1667 dispatch_flags |= I915_DISPATCH_SECURE;
1584 params->args_batch_start_offset = 0; 1668 params->args_batch_start_offset = 0;
1585 batch_obj = parsed_batch_obj; 1669 params->batch = vma;
1586 } 1670 }
1587 } 1671 }
1588 1672
1589 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1673 params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1590 1674
1591 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1675 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1592 * batch" bit. Hence we need to pin secure batches into the global gtt. 1676 * batch" bit. Hence we need to pin secure batches into the global gtt.
1593 * hsw should have this fixed, but bdw mucks it up again. */ 1677 * hsw should have this fixed, but bdw mucks it up again. */
1594 if (dispatch_flags & I915_DISPATCH_SECURE) { 1678 if (dispatch_flags & I915_DISPATCH_SECURE) {
1679 struct drm_i915_gem_object *obj = params->batch->obj;
1680
1595 /* 1681 /*
1596 * So on first glance it looks freaky that we pin the batch here 1682 * So on first glance it looks freaky that we pin the batch here
1597 * outside of the reservation loop. But: 1683 * outside of the reservation loop. But:
@@ -1602,22 +1688,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1602 * fitting due to fragmentation. 1688 * fitting due to fragmentation.
1603 * So this is actually safe. 1689 * So this is actually safe.
1604 */ 1690 */
1605 ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); 1691 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
1606 if (ret) 1692 if (ret)
1607 goto err; 1693 goto err;
1608 1694
1609 params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); 1695 params->batch = i915_gem_obj_to_ggtt(obj);
1610 } else 1696 }
1611 params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
1612 1697
1613 /* Allocate a request for this batch buffer nice and early. */ 1698 /* Allocate a request for this batch buffer nice and early. */
1614 req = i915_gem_request_alloc(engine, ctx); 1699 params->request = i915_gem_request_alloc(engine, ctx);
1615 if (IS_ERR(req)) { 1700 if (IS_ERR(params->request)) {
1616 ret = PTR_ERR(req); 1701 ret = PTR_ERR(params->request);
1617 goto err_batch_unpin; 1702 goto err_batch_unpin;
1618 } 1703 }
1619 1704
1620 ret = i915_gem_request_add_to_client(req, file); 1705 ret = i915_gem_request_add_to_client(params->request, file);
1621 if (ret) 1706 if (ret)
1622 goto err_request; 1707 goto err_request;
1623 1708
@@ -1631,13 +1716,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1631 params->file = file; 1716 params->file = file;
1632 params->engine = engine; 1717 params->engine = engine;
1633 params->dispatch_flags = dispatch_flags; 1718 params->dispatch_flags = dispatch_flags;
1634 params->batch_obj = batch_obj;
1635 params->ctx = ctx; 1719 params->ctx = ctx;
1636 params->request = req;
1637 1720
1638 ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); 1721 ret = execbuf_submit(params, args, &eb->vmas);
1639err_request: 1722err_request:
1640 i915_gem_execbuffer_retire_commands(params); 1723 __i915_add_request(params->request, params->batch->obj, ret == 0);
1641 1724
1642err_batch_unpin: 1725err_batch_unpin:
1643 /* 1726 /*
@@ -1647,11 +1730,10 @@ err_batch_unpin:
1647 * active. 1730 * active.
1648 */ 1731 */
1649 if (dispatch_flags & I915_DISPATCH_SECURE) 1732 if (dispatch_flags & I915_DISPATCH_SECURE)
1650 i915_gem_object_ggtt_unpin(batch_obj); 1733 i915_vma_unpin(params->batch);
1651
1652err: 1734err:
1653 /* the request owns the ref now */ 1735 /* the request owns the ref now */
1654 i915_gem_context_unreference(ctx); 1736 i915_gem_context_put(ctx);
1655 eb_destroy(eb); 1737 eb_destroy(eb);
1656 1738
1657 mutex_unlock(&dev->struct_mutex); 1739 mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 251d7a95af89..9e8173fe2a09 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -86,20 +86,22 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
86 86
87 if (obj) { 87 if (obj) {
88 u32 size = i915_gem_obj_ggtt_size(obj); 88 u32 size = i915_gem_obj_ggtt_size(obj);
89 unsigned int tiling = i915_gem_object_get_tiling(obj);
90 unsigned int stride = i915_gem_object_get_stride(obj);
89 uint64_t val; 91 uint64_t val;
90 92
91 /* Adjust fence size to match tiled area */ 93 /* Adjust fence size to match tiled area */
92 if (obj->tiling_mode != I915_TILING_NONE) { 94 if (tiling != I915_TILING_NONE) {
93 uint32_t row_size = obj->stride * 95 uint32_t row_size = stride *
94 (obj->tiling_mode == I915_TILING_Y ? 32 : 8); 96 (tiling == I915_TILING_Y ? 32 : 8);
95 size = (size / row_size) * row_size; 97 size = (size / row_size) * row_size;
96 } 98 }
97 99
98 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 100 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
99 0xfffff000) << 32; 101 0xfffff000) << 32;
100 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 102 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
101 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 103 val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
102 if (obj->tiling_mode == I915_TILING_Y) 104 if (tiling == I915_TILING_Y)
103 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 105 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
104 val |= I965_FENCE_REG_VALID; 106 val |= I965_FENCE_REG_VALID;
105 107
@@ -122,6 +124,8 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
122 124
123 if (obj) { 125 if (obj) {
124 u32 size = i915_gem_obj_ggtt_size(obj); 126 u32 size = i915_gem_obj_ggtt_size(obj);
127 unsigned int tiling = i915_gem_object_get_tiling(obj);
128 unsigned int stride = i915_gem_object_get_stride(obj);
125 int pitch_val; 129 int pitch_val;
126 int tile_width; 130 int tile_width;
127 131
@@ -131,17 +135,17 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
131 "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 135 "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
132 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 136 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
133 137
134 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 138 if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
135 tile_width = 128; 139 tile_width = 128;
136 else 140 else
137 tile_width = 512; 141 tile_width = 512;
138 142
139 /* Note: pitch better be a power of two tile widths */ 143 /* Note: pitch better be a power of two tile widths */
140 pitch_val = obj->stride / tile_width; 144 pitch_val = stride / tile_width;
141 pitch_val = ffs(pitch_val) - 1; 145 pitch_val = ffs(pitch_val) - 1;
142 146
143 val = i915_gem_obj_ggtt_offset(obj); 147 val = i915_gem_obj_ggtt_offset(obj);
144 if (obj->tiling_mode == I915_TILING_Y) 148 if (tiling == I915_TILING_Y)
145 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 149 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
146 val |= I915_FENCE_SIZE_BITS(size); 150 val |= I915_FENCE_SIZE_BITS(size);
147 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 151 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
@@ -161,6 +165,8 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
161 165
162 if (obj) { 166 if (obj) {
163 u32 size = i915_gem_obj_ggtt_size(obj); 167 u32 size = i915_gem_obj_ggtt_size(obj);
168 unsigned int tiling = i915_gem_object_get_tiling(obj);
169 unsigned int stride = i915_gem_object_get_stride(obj);
164 uint32_t pitch_val; 170 uint32_t pitch_val;
165 171
166 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 172 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
@@ -169,11 +175,11 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
169 "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", 175 "object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
170 i915_gem_obj_ggtt_offset(obj), size); 176 i915_gem_obj_ggtt_offset(obj), size);
171 177
172 pitch_val = obj->stride / 128; 178 pitch_val = stride / 128;
173 pitch_val = ffs(pitch_val) - 1; 179 pitch_val = ffs(pitch_val) - 1;
174 180
175 val = i915_gem_obj_ggtt_offset(obj); 181 val = i915_gem_obj_ggtt_offset(obj);
176 if (obj->tiling_mode == I915_TILING_Y) 182 if (tiling == I915_TILING_Y)
177 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 183 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
178 val |= I830_FENCE_SIZE_BITS(size); 184 val |= I830_FENCE_SIZE_BITS(size);
179 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 185 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
@@ -201,9 +207,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
201 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 207 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
202 mb(); 208 mb();
203 209
204 WARN(obj && (!obj->stride || !obj->tiling_mode), 210 WARN(obj &&
211 (!i915_gem_object_get_stride(obj) ||
212 !i915_gem_object_get_tiling(obj)),
205 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 213 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
206 obj->stride, obj->tiling_mode); 214 i915_gem_object_get_stride(obj),
215 i915_gem_object_get_tiling(obj));
207 216
208 if (IS_GEN2(dev)) 217 if (IS_GEN2(dev))
209 i830_write_fence_reg(dev, reg, obj); 218 i830_write_fence_reg(dev, reg, obj);
@@ -248,7 +257,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
248 257
249static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 258static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
250{ 259{
251 if (obj->tiling_mode) 260 if (i915_gem_object_is_tiled(obj))
252 i915_gem_release_mmap(obj); 261 i915_gem_release_mmap(obj);
253 262
254 /* As we do not have an associated fence register, we will force 263 /* As we do not have an associated fence register, we will force
@@ -261,15 +270,8 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
261static int 270static int
262i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 271i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
263{ 272{
264 if (obj->last_fenced_req) { 273 return i915_gem_active_retire(&obj->last_fence,
265 int ret = i915_wait_request(obj->last_fenced_req); 274 &obj->base.dev->struct_mutex);
266 if (ret)
267 return ret;
268
269 i915_gem_request_assign(&obj->last_fenced_req, NULL);
270 }
271
272 return 0;
273} 275}
274 276
275/** 277/**
@@ -368,7 +370,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
368{ 370{
369 struct drm_device *dev = obj->base.dev; 371 struct drm_device *dev = obj->base.dev;
370 struct drm_i915_private *dev_priv = to_i915(dev); 372 struct drm_i915_private *dev_priv = to_i915(dev);
371 bool enable = obj->tiling_mode != I915_TILING_NONE; 373 bool enable = i915_gem_object_is_tiled(obj);
372 struct drm_i915_fence_reg *reg; 374 struct drm_i915_fence_reg *reg;
373 int ret; 375 int ret;
374 376
@@ -438,7 +440,7 @@ i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
438 440
439 WARN_ON(!ggtt_vma || 441 WARN_ON(!ggtt_vma ||
440 dev_priv->fence_regs[obj->fence_reg].pin_count > 442 dev_priv->fence_regs[obj->fence_reg].pin_count >
441 ggtt_vma->pin_count); 443 i915_vma_pin_count(ggtt_vma));
442 dev_priv->fence_regs[obj->fence_reg].pin_count++; 444 dev_priv->fence_regs[obj->fence_reg].pin_count++;
443 return true; 445 return true;
444 } else 446 } else
@@ -484,7 +486,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
484 */ 486 */
485 if (reg->obj) { 487 if (reg->obj) {
486 i915_gem_object_update_fence(reg->obj, reg, 488 i915_gem_object_update_fence(reg->obj, reg,
487 reg->obj->tiling_mode); 489 i915_gem_object_get_tiling(reg->obj));
488 } else { 490 } else {
489 i915_gem_write_fence(dev, i, NULL); 491 i915_gem_write_fence(dev, i, NULL);
490 } 492 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 10f1e32767e6..18c7c9644761 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -184,7 +184,7 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
184{ 184{
185 vma->vm->clear_range(vma->vm, 185 vma->vm->clear_range(vma->vm,
186 vma->node.start, 186 vma->node.start,
187 vma->obj->base.size, 187 vma->size,
188 true); 188 true);
189} 189}
190 190
@@ -669,6 +669,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
669 unsigned entry, 669 unsigned entry,
670 dma_addr_t addr) 670 dma_addr_t addr)
671{ 671{
672 struct intel_ring *ring = req->ring;
672 struct intel_engine_cs *engine = req->engine; 673 struct intel_engine_cs *engine = req->engine;
673 int ret; 674 int ret;
674 675
@@ -678,13 +679,13 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
678 if (ret) 679 if (ret)
679 return ret; 680 return ret;
680 681
681 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 682 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
682 intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry)); 683 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry));
683 intel_ring_emit(engine, upper_32_bits(addr)); 684 intel_ring_emit(ring, upper_32_bits(addr));
684 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 685 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
685 intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry)); 686 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry));
686 intel_ring_emit(engine, lower_32_bits(addr)); 687 intel_ring_emit(ring, lower_32_bits(addr));
687 intel_ring_advance(engine); 688 intel_ring_advance(ring);
688 689
689 return 0; 690 return 0;
690} 691}
@@ -1660,11 +1661,12 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1660static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1661static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1661 struct drm_i915_gem_request *req) 1662 struct drm_i915_gem_request *req)
1662{ 1663{
1664 struct intel_ring *ring = req->ring;
1663 struct intel_engine_cs *engine = req->engine; 1665 struct intel_engine_cs *engine = req->engine;
1664 int ret; 1666 int ret;
1665 1667
1666 /* NB: TLBs must be flushed and invalidated before a switch */ 1668 /* NB: TLBs must be flushed and invalidated before a switch */
1667 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1669 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1668 if (ret) 1670 if (ret)
1669 return ret; 1671 return ret;
1670 1672
@@ -1672,13 +1674,13 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1672 if (ret) 1674 if (ret)
1673 return ret; 1675 return ret;
1674 1676
1675 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1677 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1676 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1678 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
1677 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1679 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1678 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1680 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
1679 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1681 intel_ring_emit(ring, get_pd_offset(ppgtt));
1680 intel_ring_emit(engine, MI_NOOP); 1682 intel_ring_emit(ring, MI_NOOP);
1681 intel_ring_advance(engine); 1683 intel_ring_advance(ring);
1682 1684
1683 return 0; 1685 return 0;
1684} 1686}
@@ -1686,11 +1688,12 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1686static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1688static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1687 struct drm_i915_gem_request *req) 1689 struct drm_i915_gem_request *req)
1688{ 1690{
1691 struct intel_ring *ring = req->ring;
1689 struct intel_engine_cs *engine = req->engine; 1692 struct intel_engine_cs *engine = req->engine;
1690 int ret; 1693 int ret;
1691 1694
1692 /* NB: TLBs must be flushed and invalidated before a switch */ 1695 /* NB: TLBs must be flushed and invalidated before a switch */
1693 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1696 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1694 if (ret) 1697 if (ret)
1695 return ret; 1698 return ret;
1696 1699
@@ -1698,17 +1701,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1698 if (ret) 1701 if (ret)
1699 return ret; 1702 return ret;
1700 1703
1701 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1704 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1702 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1705 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
1703 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1706 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1704 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1707 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
1705 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1708 intel_ring_emit(ring, get_pd_offset(ppgtt));
1706 intel_ring_emit(engine, MI_NOOP); 1709 intel_ring_emit(ring, MI_NOOP);
1707 intel_ring_advance(engine); 1710 intel_ring_advance(ring);
1708 1711
1709 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1712 /* XXX: RCS is the only one to auto invalidate the TLBs? */
1710 if (engine->id != RCS) { 1713 if (engine->id != RCS) {
1711 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1714 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1712 if (ret) 1715 if (ret)
1713 return ret; 1716 return ret;
1714 } 1717 }
@@ -2009,7 +2012,7 @@ alloc:
2009 0, ggtt->base.total, 2012 0, ggtt->base.total,
2010 DRM_MM_TOPDOWN); 2013 DRM_MM_TOPDOWN);
2011 if (ret == -ENOSPC && !retried) { 2014 if (ret == -ENOSPC && !retried) {
2012 ret = i915_gem_evict_something(dev, &ggtt->base, 2015 ret = i915_gem_evict_something(&ggtt->base,
2013 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2016 GEN6_PD_SIZE, GEN6_PD_ALIGN,
2014 I915_CACHE_NONE, 2017 I915_CACHE_NONE,
2015 0, ggtt->base.total, 2018 0, ggtt->base.total,
@@ -2101,11 +2104,12 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2101 return 0; 2104 return 0;
2102} 2105}
2103 2106
2104static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2107static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2108 struct drm_i915_private *dev_priv)
2105{ 2109{
2106 ppgtt->base.dev = dev; 2110 ppgtt->base.dev = &dev_priv->drm;
2107 2111
2108 if (INTEL_INFO(dev)->gen < 8) 2112 if (INTEL_INFO(dev_priv)->gen < 8)
2109 return gen6_ppgtt_init(ppgtt); 2113 return gen6_ppgtt_init(ppgtt);
2110 else 2114 else
2111 return gen8_ppgtt_init(ppgtt); 2115 return gen8_ppgtt_init(ppgtt);
@@ -2115,9 +2119,9 @@ static void i915_address_space_init(struct i915_address_space *vm,
2115 struct drm_i915_private *dev_priv) 2119 struct drm_i915_private *dev_priv)
2116{ 2120{
2117 drm_mm_init(&vm->mm, vm->start, vm->total); 2121 drm_mm_init(&vm->mm, vm->start, vm->total);
2118 vm->dev = &dev_priv->drm;
2119 INIT_LIST_HEAD(&vm->active_list); 2122 INIT_LIST_HEAD(&vm->active_list);
2120 INIT_LIST_HEAD(&vm->inactive_list); 2123 INIT_LIST_HEAD(&vm->inactive_list);
2124 INIT_LIST_HEAD(&vm->unbound_list);
2121 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2125 list_add_tail(&vm->global_link, &dev_priv->vm_list);
2122} 2126}
2123 2127
@@ -2140,15 +2144,17 @@ static void gtt_write_workarounds(struct drm_device *dev)
2140 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2144 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2141} 2145}
2142 2146
2143static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2147static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2148 struct drm_i915_private *dev_priv,
2149 struct drm_i915_file_private *file_priv)
2144{ 2150{
2145 struct drm_i915_private *dev_priv = to_i915(dev); 2151 int ret;
2146 int ret = 0;
2147 2152
2148 ret = __hw_ppgtt_init(dev, ppgtt); 2153 ret = __hw_ppgtt_init(ppgtt, dev_priv);
2149 if (ret == 0) { 2154 if (ret == 0) {
2150 kref_init(&ppgtt->ref); 2155 kref_init(&ppgtt->ref);
2151 i915_address_space_init(&ppgtt->base, dev_priv); 2156 i915_address_space_init(&ppgtt->base, dev_priv);
2157 ppgtt->base.file = file_priv;
2152 } 2158 }
2153 2159
2154 return ret; 2160 return ret;
@@ -2180,7 +2186,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
2180} 2186}
2181 2187
2182struct i915_hw_ppgtt * 2188struct i915_hw_ppgtt *
2183i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 2189i915_ppgtt_create(struct drm_i915_private *dev_priv,
2190 struct drm_i915_file_private *fpriv)
2184{ 2191{
2185 struct i915_hw_ppgtt *ppgtt; 2192 struct i915_hw_ppgtt *ppgtt;
2186 int ret; 2193 int ret;
@@ -2189,14 +2196,12 @@ i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2189 if (!ppgtt) 2196 if (!ppgtt)
2190 return ERR_PTR(-ENOMEM); 2197 return ERR_PTR(-ENOMEM);
2191 2198
2192 ret = i915_ppgtt_init(dev, ppgtt); 2199 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
2193 if (ret) { 2200 if (ret) {
2194 kfree(ppgtt); 2201 kfree(ppgtt);
2195 return ERR_PTR(ret); 2202 return ERR_PTR(ret);
2196 } 2203 }
2197 2204
2198 ppgtt->file_priv = fpriv;
2199
2200 trace_i915_ppgtt_create(&ppgtt->base); 2205 trace_i915_ppgtt_create(&ppgtt->base);
2201 2206
2202 return ppgtt; 2207 return ppgtt;
@@ -2209,9 +2214,10 @@ void i915_ppgtt_release(struct kref *kref)
2209 2214
2210 trace_i915_ppgtt_release(&ppgtt->base); 2215 trace_i915_ppgtt_release(&ppgtt->base);
2211 2216
2212 /* vmas should already be unbound */ 2217 /* vmas should already be unbound and destroyed */
2213 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2218 WARN_ON(!list_empty(&ppgtt->base.active_list));
2214 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2219 WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2220 WARN_ON(!list_empty(&ppgtt->base.unbound_list));
2215 2221
2216 list_del(&ppgtt->base.global_link); 2222 list_del(&ppgtt->base.global_link);
2217 drm_mm_takedown(&ppgtt->base.mm); 2223 drm_mm_takedown(&ppgtt->base.mm);
@@ -2220,47 +2226,21 @@ void i915_ppgtt_release(struct kref *kref)
2220 kfree(ppgtt); 2226 kfree(ppgtt);
2221} 2227}
2222 2228
2223extern int intel_iommu_gfx_mapped;
2224/* Certain Gen5 chipsets require require idling the GPU before 2229/* Certain Gen5 chipsets require require idling the GPU before
2225 * unmapping anything from the GTT when VT-d is enabled. 2230 * unmapping anything from the GTT when VT-d is enabled.
2226 */ 2231 */
2227static bool needs_idle_maps(struct drm_device *dev) 2232static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2228{ 2233{
2229#ifdef CONFIG_INTEL_IOMMU 2234#ifdef CONFIG_INTEL_IOMMU
2230 /* Query intel_iommu to see if we need the workaround. Presumably that 2235 /* Query intel_iommu to see if we need the workaround. Presumably that
2231 * was loaded first. 2236 * was loaded first.
2232 */ 2237 */
2233 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 2238 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
2234 return true; 2239 return true;
2235#endif 2240#endif
2236 return false; 2241 return false;
2237} 2242}
2238 2243
2239static bool do_idling(struct drm_i915_private *dev_priv)
2240{
2241 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2242 bool ret = dev_priv->mm.interruptible;
2243
2244 if (unlikely(ggtt->do_idle_maps)) {
2245 dev_priv->mm.interruptible = false;
2246 if (i915_gem_wait_for_idle(dev_priv)) {
2247 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2248 /* Wait a bit, in hopes it avoids the hang */
2249 udelay(10);
2250 }
2251 }
2252
2253 return ret;
2254}
2255
2256static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2257{
2258 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2259
2260 if (unlikely(ggtt->do_idle_maps))
2261 dev_priv->mm.interruptible = interruptible;
2262}
2263
2264void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2244void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2265{ 2245{
2266 struct intel_engine_cs *engine; 2246 struct intel_engine_cs *engine;
@@ -2647,7 +2627,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
2647 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2627 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2648 * upgrade to both bound if we bind either to avoid double-binding. 2628 * upgrade to both bound if we bind either to avoid double-binding.
2649 */ 2629 */
2650 vma->bound |= GLOBAL_BIND | LOCAL_BIND; 2630 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2651 2631
2652 return 0; 2632 return 0;
2653} 2633}
@@ -2669,14 +2649,14 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2669 pte_flags |= PTE_READ_ONLY; 2649 pte_flags |= PTE_READ_ONLY;
2670 2650
2671 2651
2672 if (flags & GLOBAL_BIND) { 2652 if (flags & I915_VMA_GLOBAL_BIND) {
2673 vma->vm->insert_entries(vma->vm, 2653 vma->vm->insert_entries(vma->vm,
2674 vma->ggtt_view.pages, 2654 vma->ggtt_view.pages,
2675 vma->node.start, 2655 vma->node.start,
2676 cache_level, pte_flags); 2656 cache_level, pte_flags);
2677 } 2657 }
2678 2658
2679 if (flags & LOCAL_BIND) { 2659 if (flags & I915_VMA_LOCAL_BIND) {
2680 struct i915_hw_ppgtt *appgtt = 2660 struct i915_hw_ppgtt *appgtt =
2681 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2661 to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2682 appgtt->base.insert_entries(&appgtt->base, 2662 appgtt->base.insert_entries(&appgtt->base,
@@ -2690,42 +2670,36 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2690 2670
2691static void ggtt_unbind_vma(struct i915_vma *vma) 2671static void ggtt_unbind_vma(struct i915_vma *vma)
2692{ 2672{
2693 struct drm_device *dev = vma->vm->dev; 2673 struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
2694 struct drm_i915_private *dev_priv = to_i915(dev); 2674 const u64 size = min(vma->size, vma->node.size);
2695 struct drm_i915_gem_object *obj = vma->obj;
2696 const uint64_t size = min_t(uint64_t,
2697 obj->base.size,
2698 vma->node.size);
2699 2675
2700 if (vma->bound & GLOBAL_BIND) { 2676 if (vma->flags & I915_VMA_GLOBAL_BIND)
2701 vma->vm->clear_range(vma->vm, 2677 vma->vm->clear_range(vma->vm,
2702 vma->node.start, 2678 vma->node.start, size,
2703 size,
2704 true); 2679 true);
2705 }
2706
2707 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2708 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2709 2680
2681 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
2710 appgtt->base.clear_range(&appgtt->base, 2682 appgtt->base.clear_range(&appgtt->base,
2711 vma->node.start, 2683 vma->node.start, size,
2712 size,
2713 true); 2684 true);
2714 }
2715} 2685}
2716 2686
2717void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2687void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2718{ 2688{
2719 struct drm_device *dev = obj->base.dev; 2689 struct drm_device *dev = obj->base.dev;
2720 struct drm_i915_private *dev_priv = to_i915(dev); 2690 struct drm_i915_private *dev_priv = to_i915(dev);
2721 bool interruptible; 2691 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2722 2692
2723 interruptible = do_idling(dev_priv); 2693 if (unlikely(ggtt->do_idle_maps)) {
2694 if (i915_gem_wait_for_idle(dev_priv, false)) {
2695 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2696 /* Wait a bit, in hopes it avoids the hang */
2697 udelay(10);
2698 }
2699 }
2724 2700
2725 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, 2701 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2726 PCI_DMA_BIDIRECTIONAL); 2702 PCI_DMA_BIDIRECTIONAL);
2727
2728 undo_idling(dev_priv, interruptible);
2729} 2703}
2730 2704
2731static void i915_gtt_color_adjust(struct drm_mm_node *node, 2705static void i915_gtt_color_adjust(struct drm_mm_node *node,
@@ -2736,19 +2710,14 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
2736 if (node->color != color) 2710 if (node->color != color)
2737 *start += 4096; 2711 *start += 4096;
2738 2712
2739 if (!list_empty(&node->node_list)) { 2713 node = list_first_entry_or_null(&node->node_list,
2740 node = list_entry(node->node_list.next, 2714 struct drm_mm_node,
2741 struct drm_mm_node, 2715 node_list);
2742 node_list); 2716 if (node && node->allocated && node->color != color)
2743 if (node->allocated && node->color != color) 2717 *end -= 4096;
2744 *end -= 4096;
2745 }
2746} 2718}
2747 2719
2748static int i915_gem_setup_global_gtt(struct drm_device *dev, 2720int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2749 u64 start,
2750 u64 mappable_end,
2751 u64 end)
2752{ 2721{
2753 /* Let GEM Manage all of the aperture. 2722 /* Let GEM Manage all of the aperture.
2754 * 2723 *
@@ -2759,48 +2728,15 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
2759 * aperture. One page should be enough to keep any prefetching inside 2728 * aperture. One page should be enough to keep any prefetching inside
2760 * of the aperture. 2729 * of the aperture.
2761 */ 2730 */
2762 struct drm_i915_private *dev_priv = to_i915(dev);
2763 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2731 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2764 struct drm_mm_node *entry;
2765 struct drm_i915_gem_object *obj;
2766 unsigned long hole_start, hole_end; 2732 unsigned long hole_start, hole_end;
2733 struct drm_mm_node *entry;
2767 int ret; 2734 int ret;
2768 2735
2769 BUG_ON(mappable_end > end);
2770
2771 ggtt->base.start = start;
2772
2773 /* Subtract the guard page before address space initialization to
2774 * shrink the range used by drm_mm */
2775 ggtt->base.total = end - start - PAGE_SIZE;
2776 i915_address_space_init(&ggtt->base, dev_priv);
2777 ggtt->base.total += PAGE_SIZE;
2778
2779 ret = intel_vgt_balloon(dev_priv); 2736 ret = intel_vgt_balloon(dev_priv);
2780 if (ret) 2737 if (ret)
2781 return ret; 2738 return ret;
2782 2739
2783 if (!HAS_LLC(dev))
2784 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
2785
2786 /* Mark any preallocated objects as occupied */
2787 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2788 struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base);
2789
2790 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2791 i915_gem_obj_ggtt_offset(obj), obj->base.size);
2792
2793 WARN_ON(i915_gem_obj_ggtt_bound(obj));
2794 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
2795 if (ret) {
2796 DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2797 return ret;
2798 }
2799 vma->bound |= GLOBAL_BIND;
2800 __i915_vma_set_map_and_fenceable(vma);
2801 list_add_tail(&vma->vm_link, &ggtt->base.inactive_list);
2802 }
2803
2804 /* Clear any non-preallocated blocks */ 2740 /* Clear any non-preallocated blocks */
2805 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2741 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2806 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2742 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2810,18 +2746,19 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
2810 } 2746 }
2811 2747
2812 /* And finally clear the reserved guard page */ 2748 /* And finally clear the reserved guard page */
2813 ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); 2749 ggtt->base.clear_range(&ggtt->base,
2750 ggtt->base.total - PAGE_SIZE, PAGE_SIZE,
2751 true);
2814 2752
2815 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 2753 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2816 struct i915_hw_ppgtt *ppgtt; 2754 struct i915_hw_ppgtt *ppgtt;
2817 2755
2818 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2756 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2819 if (!ppgtt) 2757 if (!ppgtt)
2820 return -ENOMEM; 2758 return -ENOMEM;
2821 2759
2822 ret = __hw_ppgtt_init(dev, ppgtt); 2760 ret = __hw_ppgtt_init(ppgtt, dev_priv);
2823 if (ret) { 2761 if (ret) {
2824 ppgtt->base.cleanup(&ppgtt->base);
2825 kfree(ppgtt); 2762 kfree(ppgtt);
2826 return ret; 2763 return ret;
2827 } 2764 }
@@ -2849,33 +2786,21 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
2849} 2786}
2850 2787
2851/** 2788/**
2852 * i915_gem_init_ggtt - Initialize GEM for Global GTT
2853 * @dev: DRM device
2854 */
2855void i915_gem_init_ggtt(struct drm_device *dev)
2856{
2857 struct drm_i915_private *dev_priv = to_i915(dev);
2858 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2859
2860 i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total);
2861}
2862
2863/**
2864 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2789 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2865 * @dev: DRM device 2790 * @dev_priv: i915 device
2866 */ 2791 */
2867void i915_ggtt_cleanup_hw(struct drm_device *dev) 2792void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2868{ 2793{
2869 struct drm_i915_private *dev_priv = to_i915(dev);
2870 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2794 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2871 2795
2872 if (dev_priv->mm.aliasing_ppgtt) { 2796 if (dev_priv->mm.aliasing_ppgtt) {
2873 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2797 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2874 2798
2875 ppgtt->base.cleanup(&ppgtt->base); 2799 ppgtt->base.cleanup(&ppgtt->base);
2800 kfree(ppgtt);
2876 } 2801 }
2877 2802
2878 i915_gem_cleanup_stolen(dev); 2803 i915_gem_cleanup_stolen(&dev_priv->drm);
2879 2804
2880 if (drm_mm_initialized(&ggtt->base.mm)) { 2805 if (drm_mm_initialized(&ggtt->base.mm)) {
2881 intel_vgt_deballoon(dev_priv); 2806 intel_vgt_deballoon(dev_priv);
@@ -2885,6 +2810,9 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev)
2885 } 2810 }
2886 2811
2887 ggtt->base.cleanup(&ggtt->base); 2812 ggtt->base.cleanup(&ggtt->base);
2813
2814 arch_phys_wc_del(ggtt->mtrr);
2815 io_mapping_free(ggtt->mappable);
2888} 2816}
2889 2817
2890static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2818static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -2965,17 +2893,14 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2965 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2893 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2966} 2894}
2967 2895
2968static int ggtt_probe_common(struct drm_device *dev, 2896static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2969 size_t gtt_size)
2970{ 2897{
2971 struct drm_i915_private *dev_priv = to_i915(dev); 2898 struct pci_dev *pdev = ggtt->base.dev->pdev;
2972 struct i915_ggtt *ggtt = &dev_priv->ggtt;
2973 struct i915_page_scratch *scratch_page; 2899 struct i915_page_scratch *scratch_page;
2974 phys_addr_t ggtt_phys_addr; 2900 phys_addr_t phys_addr;
2975 2901
2976 /* For Modern GENs the PTEs and register space are split in the BAR */ 2902 /* For Modern GENs the PTEs and register space are split in the BAR */
2977 ggtt_phys_addr = pci_resource_start(dev->pdev, 0) + 2903 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2978 (pci_resource_len(dev->pdev, 0) / 2);
2979 2904
2980 /* 2905 /*
2981 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2906 * On BXT writes larger than 64 bit to the GTT pagetable range will be
@@ -2984,16 +2909,16 @@ static int ggtt_probe_common(struct drm_device *dev,
2984 * resort to an uncached mapping. The WC issue is easily caught by the 2909 * resort to an uncached mapping. The WC issue is easily caught by the
2985 * readback check when writing GTT PTE entries. 2910 * readback check when writing GTT PTE entries.
2986 */ 2911 */
2987 if (IS_BROXTON(dev)) 2912 if (IS_BROXTON(ggtt->base.dev))
2988 ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); 2913 ggtt->gsm = ioremap_nocache(phys_addr, size);
2989 else 2914 else
2990 ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); 2915 ggtt->gsm = ioremap_wc(phys_addr, size);
2991 if (!ggtt->gsm) { 2916 if (!ggtt->gsm) {
2992 DRM_ERROR("Failed to map the gtt page table\n"); 2917 DRM_ERROR("Failed to map the ggtt page table\n");
2993 return -ENOMEM; 2918 return -ENOMEM;
2994 } 2919 }
2995 2920
2996 scratch_page = alloc_scratch_page(dev); 2921 scratch_page = alloc_scratch_page(ggtt->base.dev);
2997 if (IS_ERR(scratch_page)) { 2922 if (IS_ERR(scratch_page)) {
2998 DRM_ERROR("Scratch setup failed\n"); 2923 DRM_ERROR("Scratch setup failed\n");
2999 /* iounmap will also get called at remove, but meh */ 2924 /* iounmap will also get called at remove, but meh */
@@ -3079,42 +3004,49 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3079 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3004 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3080} 3005}
3081 3006
3007static void gen6_gmch_remove(struct i915_address_space *vm)
3008{
3009 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3010
3011 iounmap(ggtt->gsm);
3012 free_scratch_page(vm->dev, vm->scratch_page);
3013}
3014
3082static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3015static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3083{ 3016{
3084 struct drm_device *dev = ggtt->base.dev; 3017 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3085 struct drm_i915_private *dev_priv = to_i915(dev); 3018 struct pci_dev *pdev = dev_priv->drm.pdev;
3019 unsigned int size;
3086 u16 snb_gmch_ctl; 3020 u16 snb_gmch_ctl;
3087 int ret;
3088 3021
3089 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3022 /* TODO: We're not aware of mappable constraints on gen8 yet */
3090 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3023 ggtt->mappable_base = pci_resource_start(pdev, 2);
3091 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3024 ggtt->mappable_end = pci_resource_len(pdev, 2);
3092 3025
3093 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 3026 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
3094 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 3027 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3095 3028
3096 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3029 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3097 3030
3098 if (INTEL_INFO(dev)->gen >= 9) { 3031 if (INTEL_GEN(dev_priv) >= 9) {
3099 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3032 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3100 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3033 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3101 } else if (IS_CHERRYVIEW(dev)) { 3034 } else if (IS_CHERRYVIEW(dev_priv)) {
3102 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3035 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3103 ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); 3036 size = chv_get_total_gtt_size(snb_gmch_ctl);
3104 } else { 3037 } else {
3105 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3038 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3106 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3039 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3107 } 3040 }
3108 3041
3109 ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3042 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3110 3043
3111 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3044 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
3112 chv_setup_private_ppat(dev_priv); 3045 chv_setup_private_ppat(dev_priv);
3113 else 3046 else
3114 bdw_setup_private_ppat(dev_priv); 3047 bdw_setup_private_ppat(dev_priv);
3115 3048
3116 ret = ggtt_probe_common(dev, ggtt->size); 3049 ggtt->base.cleanup = gen6_gmch_remove;
3117
3118 ggtt->base.bind_vma = ggtt_bind_vma; 3050 ggtt->base.bind_vma = ggtt_bind_vma;
3119 ggtt->base.unbind_vma = ggtt_unbind_vma; 3051 ggtt->base.unbind_vma = ggtt_unbind_vma;
3120 ggtt->base.insert_page = gen8_ggtt_insert_page; 3052 ggtt->base.insert_page = gen8_ggtt_insert_page;
@@ -3126,57 +3058,65 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3126 if (IS_CHERRYVIEW(dev_priv)) 3058 if (IS_CHERRYVIEW(dev_priv))
3127 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3059 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
3128 3060
3129 return ret; 3061 return ggtt_probe_common(ggtt, size);
3130} 3062}
3131 3063
3132static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3064static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3133{ 3065{
3134 struct drm_device *dev = ggtt->base.dev; 3066 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3067 struct pci_dev *pdev = dev_priv->drm.pdev;
3068 unsigned int size;
3135 u16 snb_gmch_ctl; 3069 u16 snb_gmch_ctl;
3136 int ret;
3137 3070
3138 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3071 ggtt->mappable_base = pci_resource_start(pdev, 2);
3139 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3072 ggtt->mappable_end = pci_resource_len(pdev, 2);
3140 3073
3141 /* 64/512MB is the current min/max we actually know of, but this is just 3074 /* 64/512MB is the current min/max we actually know of, but this is just
3142 * a coarse sanity check. 3075 * a coarse sanity check.
3143 */ 3076 */
3144 if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) { 3077 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3145 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3078 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3146 return -ENXIO; 3079 return -ENXIO;
3147 } 3080 }
3148 3081
3149 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 3082 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
3150 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 3083 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3151 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3084 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3152 3085
3153 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3086 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3154 ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl);
3155 ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3156 3087
3157 ret = ggtt_probe_common(dev, ggtt->size); 3088 size = gen6_get_total_gtt_size(snb_gmch_ctl);
3089 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3158 3090
3159 ggtt->base.clear_range = gen6_ggtt_clear_range; 3091 ggtt->base.clear_range = gen6_ggtt_clear_range;
3160 ggtt->base.insert_page = gen6_ggtt_insert_page; 3092 ggtt->base.insert_page = gen6_ggtt_insert_page;
3161 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3093 ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3162 ggtt->base.bind_vma = ggtt_bind_vma; 3094 ggtt->base.bind_vma = ggtt_bind_vma;
3163 ggtt->base.unbind_vma = ggtt_unbind_vma; 3095 ggtt->base.unbind_vma = ggtt_unbind_vma;
3096 ggtt->base.cleanup = gen6_gmch_remove;
3097
3098 if (HAS_EDRAM(dev_priv))
3099 ggtt->base.pte_encode = iris_pte_encode;
3100 else if (IS_HASWELL(dev_priv))
3101 ggtt->base.pte_encode = hsw_pte_encode;
3102 else if (IS_VALLEYVIEW(dev_priv))
3103 ggtt->base.pte_encode = byt_pte_encode;
3104 else if (INTEL_GEN(dev_priv) >= 7)
3105 ggtt->base.pte_encode = ivb_pte_encode;
3106 else
3107 ggtt->base.pte_encode = snb_pte_encode;
3164 3108
3165 return ret; 3109 return ggtt_probe_common(ggtt, size);
3166} 3110}
3167 3111
3168static void gen6_gmch_remove(struct i915_address_space *vm) 3112static void i915_gmch_remove(struct i915_address_space *vm)
3169{ 3113{
3170 struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base); 3114 intel_gmch_remove();
3171
3172 iounmap(ggtt->gsm);
3173 free_scratch_page(vm->dev, vm->scratch_page);
3174} 3115}
3175 3116
3176static int i915_gmch_probe(struct i915_ggtt *ggtt) 3117static int i915_gmch_probe(struct i915_ggtt *ggtt)
3177{ 3118{
3178 struct drm_device *dev = ggtt->base.dev; 3119 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
3179 struct drm_i915_private *dev_priv = to_i915(dev);
3180 int ret; 3120 int ret;
3181 3121
3182 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3122 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
@@ -3188,12 +3128,13 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
3188 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3128 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
3189 &ggtt->mappable_base, &ggtt->mappable_end); 3129 &ggtt->mappable_base, &ggtt->mappable_end);
3190 3130
3191 ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm); 3131 ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3192 ggtt->base.insert_page = i915_ggtt_insert_page; 3132 ggtt->base.insert_page = i915_ggtt_insert_page;
3193 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3133 ggtt->base.insert_entries = i915_ggtt_insert_entries;
3194 ggtt->base.clear_range = i915_ggtt_clear_range; 3134 ggtt->base.clear_range = i915_ggtt_clear_range;
3195 ggtt->base.bind_vma = ggtt_bind_vma; 3135 ggtt->base.bind_vma = ggtt_bind_vma;
3196 ggtt->base.unbind_vma = ggtt_unbind_vma; 3136 ggtt->base.unbind_vma = ggtt_unbind_vma;
3137 ggtt->base.cleanup = i915_gmch_remove;
3197 3138
3198 if (unlikely(ggtt->do_idle_maps)) 3139 if (unlikely(ggtt->do_idle_maps))
3199 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3140 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
@@ -3201,65 +3142,40 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
3201 return 0; 3142 return 0;
3202} 3143}
3203 3144
3204static void i915_gmch_remove(struct i915_address_space *vm)
3205{
3206 intel_gmch_remove();
3207}
3208
3209/** 3145/**
3210 * i915_ggtt_init_hw - Initialize GGTT hardware 3146 * i915_ggtt_probe_hw - Probe GGTT hardware location
3211 * @dev: DRM device 3147 * @dev_priv: i915 device
3212 */ 3148 */
3213int i915_ggtt_init_hw(struct drm_device *dev) 3149int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3214{ 3150{
3215 struct drm_i915_private *dev_priv = to_i915(dev);
3216 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3151 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3217 int ret; 3152 int ret;
3218 3153
3219 if (INTEL_INFO(dev)->gen <= 5) { 3154 ggtt->base.dev = &dev_priv->drm;
3220 ggtt->probe = i915_gmch_probe;
3221 ggtt->base.cleanup = i915_gmch_remove;
3222 } else if (INTEL_INFO(dev)->gen < 8) {
3223 ggtt->probe = gen6_gmch_probe;
3224 ggtt->base.cleanup = gen6_gmch_remove;
3225
3226 if (HAS_EDRAM(dev))
3227 ggtt->base.pte_encode = iris_pte_encode;
3228 else if (IS_HASWELL(dev))
3229 ggtt->base.pte_encode = hsw_pte_encode;
3230 else if (IS_VALLEYVIEW(dev))
3231 ggtt->base.pte_encode = byt_pte_encode;
3232 else if (INTEL_INFO(dev)->gen >= 7)
3233 ggtt->base.pte_encode = ivb_pte_encode;
3234 else
3235 ggtt->base.pte_encode = snb_pte_encode;
3236 } else {
3237 ggtt->probe = gen8_gmch_probe;
3238 ggtt->base.cleanup = gen6_gmch_remove;
3239 }
3240 3155
3241 ggtt->base.dev = dev; 3156 if (INTEL_GEN(dev_priv) <= 5)
3242 ggtt->base.is_ggtt = true; 3157 ret = i915_gmch_probe(ggtt);
3243 3158 else if (INTEL_GEN(dev_priv) < 8)
3244 ret = ggtt->probe(ggtt); 3159 ret = gen6_gmch_probe(ggtt);
3160 else
3161 ret = gen8_gmch_probe(ggtt);
3245 if (ret) 3162 if (ret)
3246 return ret; 3163 return ret;
3247 3164
3248 if ((ggtt->base.total - 1) >> 32) { 3165 if ((ggtt->base.total - 1) >> 32) {
3249 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3166 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3250 "of address space! Found %lldM!\n", 3167 " of address space! Found %lldM!\n",
3251 ggtt->base.total >> 20); 3168 ggtt->base.total >> 20);
3252 ggtt->base.total = 1ULL << 32; 3169 ggtt->base.total = 1ULL << 32;
3253 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3170 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3254 } 3171 }
3255 3172
3256 /* 3173 if (ggtt->mappable_end > ggtt->base.total) {
3257 * Initialise stolen early so that we may reserve preallocated 3174 DRM_ERROR("mappable aperture extends past end of GGTT,"
3258 * objects for the BIOS to KMS transition. 3175 " aperture=%llx, total=%llx\n",
3259 */ 3176 ggtt->mappable_end, ggtt->base.total);
3260 ret = i915_gem_init_stolen(dev); 3177 ggtt->mappable_end = ggtt->base.total;
3261 if (ret) 3178 }
3262 goto out_gtt_cleanup;
3263 3179
3264 /* GMADR is the PCI mmio aperture into the global GTT. */ 3180 /* GMADR is the PCI mmio aperture into the global GTT. */
3265 DRM_INFO("Memory usable by graphics device = %lluM\n", 3181 DRM_INFO("Memory usable by graphics device = %lluM\n",
@@ -3272,16 +3188,55 @@ int i915_ggtt_init_hw(struct drm_device *dev)
3272#endif 3188#endif
3273 3189
3274 return 0; 3190 return 0;
3191}
3192
3193/**
3194 * i915_ggtt_init_hw - Initialize GGTT hardware
3195 * @dev_priv: i915 device
3196 */
3197int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3198{
3199 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3200 int ret;
3201
3202 INIT_LIST_HEAD(&dev_priv->vm_list);
3203
3204 /* Subtract the guard page before address space initialization to
3205 * shrink the range used by drm_mm.
3206 */
3207 ggtt->base.total -= PAGE_SIZE;
3208 i915_address_space_init(&ggtt->base, dev_priv);
3209 ggtt->base.total += PAGE_SIZE;
3210 if (!HAS_LLC(dev_priv))
3211 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3212
3213 ggtt->mappable =
3214 io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end);
3215 if (!ggtt->mappable) {
3216 ret = -EIO;
3217 goto out_gtt_cleanup;
3218 }
3219
3220 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
3221
3222 /*
3223 * Initialise stolen early so that we may reserve preallocated
3224 * objects for the BIOS to KMS transition.
3225 */
3226 ret = i915_gem_init_stolen(&dev_priv->drm);
3227 if (ret)
3228 goto out_gtt_cleanup;
3229
3230 return 0;
3275 3231
3276out_gtt_cleanup: 3232out_gtt_cleanup:
3277 ggtt->base.cleanup(&ggtt->base); 3233 ggtt->base.cleanup(&ggtt->base);
3278
3279 return ret; 3234 return ret;
3280} 3235}
3281 3236
3282int i915_ggtt_enable_hw(struct drm_device *dev) 3237int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3283{ 3238{
3284 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 3239 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3285 return -EIO; 3240 return -EIO;
3286 3241
3287 return 0; 3242 return 0;
@@ -3331,7 +3286,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3331 3286
3332 struct i915_hw_ppgtt *ppgtt; 3287 struct i915_hw_ppgtt *ppgtt;
3333 3288
3334 if (vm->is_ggtt) 3289 if (i915_is_ggtt(vm))
3335 ppgtt = dev_priv->mm.aliasing_ppgtt; 3290 ppgtt = dev_priv->mm.aliasing_ppgtt;
3336 else 3291 else
3337 ppgtt = i915_vm_to_ppgtt(vm); 3292 ppgtt = i915_vm_to_ppgtt(vm);
@@ -3344,31 +3299,88 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3344 i915_ggtt_flush(dev_priv); 3299 i915_ggtt_flush(dev_priv);
3345} 3300}
3346 3301
3302static void
3303i915_vma_retire(struct i915_gem_active *active,
3304 struct drm_i915_gem_request *rq)
3305{
3306 const unsigned int idx = rq->engine->id;
3307 struct i915_vma *vma =
3308 container_of(active, struct i915_vma, last_read[idx]);
3309
3310 GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
3311
3312 i915_vma_clear_active(vma, idx);
3313 if (i915_vma_is_active(vma))
3314 return;
3315
3316 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3317 if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
3318 WARN_ON(i915_vma_unbind(vma));
3319}
3320
3321void i915_vma_destroy(struct i915_vma *vma)
3322{
3323 GEM_BUG_ON(vma->node.allocated);
3324 GEM_BUG_ON(i915_vma_is_active(vma));
3325 GEM_BUG_ON(!i915_vma_is_closed(vma));
3326
3327 list_del(&vma->vm_link);
3328 if (!i915_vma_is_ggtt(vma))
3329 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
3330
3331 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
3332}
3333
3334void i915_vma_close(struct i915_vma *vma)
3335{
3336 GEM_BUG_ON(i915_vma_is_closed(vma));
3337 vma->flags |= I915_VMA_CLOSED;
3338
3339 list_del_init(&vma->obj_link);
3340 if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
3341 WARN_ON(i915_vma_unbind(vma));
3342}
3343
3347static struct i915_vma * 3344static struct i915_vma *
3348__i915_gem_vma_create(struct drm_i915_gem_object *obj, 3345__i915_gem_vma_create(struct drm_i915_gem_object *obj,
3349 struct i915_address_space *vm, 3346 struct i915_address_space *vm,
3350 const struct i915_ggtt_view *ggtt_view) 3347 const struct i915_ggtt_view *view)
3351{ 3348{
3352 struct i915_vma *vma; 3349 struct i915_vma *vma;
3350 int i;
3351
3352 GEM_BUG_ON(vm->closed);
3353 3353
3354 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 3354 if (WARN_ON(i915_is_ggtt(vm) != !!view))
3355 return ERR_PTR(-EINVAL); 3355 return ERR_PTR(-EINVAL);
3356 3356
3357 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); 3357 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3358 if (vma == NULL) 3358 if (vma == NULL)
3359 return ERR_PTR(-ENOMEM); 3359 return ERR_PTR(-ENOMEM);
3360 3360
3361 INIT_LIST_HEAD(&vma->vm_link);
3362 INIT_LIST_HEAD(&vma->obj_link); 3361 INIT_LIST_HEAD(&vma->obj_link);
3363 INIT_LIST_HEAD(&vma->exec_list); 3362 INIT_LIST_HEAD(&vma->exec_list);
3363 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
3364 init_request_active(&vma->last_read[i], i915_vma_retire);
3365 list_add(&vma->vm_link, &vm->unbound_list);
3364 vma->vm = vm; 3366 vma->vm = vm;
3365 vma->obj = obj; 3367 vma->obj = obj;
3366 vma->is_ggtt = i915_is_ggtt(vm); 3368 vma->size = obj->base.size;
3367 3369
3368 if (i915_is_ggtt(vm)) 3370 if (i915_is_ggtt(vm)) {
3369 vma->ggtt_view = *ggtt_view; 3371 vma->flags |= I915_VMA_GGTT;
3370 else 3372 vma->ggtt_view = *view;
3373 if (view->type == I915_GGTT_VIEW_PARTIAL) {
3374 vma->size = view->params.partial.size;
3375 vma->size <<= PAGE_SHIFT;
3376 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3377 vma->size =
3378 intel_rotation_info_size(&view->params.rotated);
3379 vma->size <<= PAGE_SHIFT;
3380 }
3381 } else {
3371 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3382 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3383 }
3372 3384
3373 list_add_tail(&vma->obj_link, &obj->vma_list); 3385 list_add_tail(&vma->obj_link, &obj->vma_list);
3374 3386
@@ -3398,9 +3410,12 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3398 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3410 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3399 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 3411 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
3400 3412
3413 GEM_BUG_ON(!view);
3414
3401 if (!vma) 3415 if (!vma)
3402 vma = __i915_gem_vma_create(obj, &ggtt->base, view); 3416 vma = __i915_gem_vma_create(obj, &ggtt->base, view);
3403 3417
3418 GEM_BUG_ON(i915_vma_is_closed(vma));
3404 return vma; 3419 return vma;
3405 3420
3406} 3421}
@@ -3611,34 +3626,32 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
3611int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3626int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3612 u32 flags) 3627 u32 flags)
3613{ 3628{
3614 int ret;
3615 u32 bind_flags; 3629 u32 bind_flags;
3630 u32 vma_flags;
3631 int ret;
3616 3632
3617 if (WARN_ON(flags == 0)) 3633 if (WARN_ON(flags == 0))
3618 return -EINVAL; 3634 return -EINVAL;
3619 3635
3620 bind_flags = 0; 3636 bind_flags = 0;
3621 if (flags & PIN_GLOBAL) 3637 if (flags & PIN_GLOBAL)
3622 bind_flags |= GLOBAL_BIND; 3638 bind_flags |= I915_VMA_GLOBAL_BIND;
3623 if (flags & PIN_USER) 3639 if (flags & PIN_USER)
3624 bind_flags |= LOCAL_BIND; 3640 bind_flags |= I915_VMA_LOCAL_BIND;
3625 3641
3642 vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
3626 if (flags & PIN_UPDATE) 3643 if (flags & PIN_UPDATE)
3627 bind_flags |= vma->bound; 3644 bind_flags |= vma_flags;
3628 else 3645 else
3629 bind_flags &= ~vma->bound; 3646 bind_flags &= ~vma_flags;
3630
3631 if (bind_flags == 0) 3647 if (bind_flags == 0)
3632 return 0; 3648 return 0;
3633 3649
3634 if (vma->bound == 0 && vma->vm->allocate_va_range) { 3650 if (vma_flags == 0 && vma->vm->allocate_va_range) {
3635 /* XXX: i915_vma_pin() will fix this +- hack */
3636 vma->pin_count++;
3637 trace_i915_va_alloc(vma); 3651 trace_i915_va_alloc(vma);
3638 ret = vma->vm->allocate_va_range(vma->vm, 3652 ret = vma->vm->allocate_va_range(vma->vm,
3639 vma->node.start, 3653 vma->node.start,
3640 vma->node.size); 3654 vma->node.size);
3641 vma->pin_count--;
3642 if (ret) 3655 if (ret)
3643 return ret; 3656 return ret;
3644 } 3657 }
@@ -3647,44 +3660,20 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3647 if (ret) 3660 if (ret)
3648 return ret; 3661 return ret;
3649 3662
3650 vma->bound |= bind_flags; 3663 vma->flags |= bind_flags;
3651
3652 return 0; 3664 return 0;
3653} 3665}
3654 3666
3655/**
3656 * i915_ggtt_view_size - Get the size of a GGTT view.
3657 * @obj: Object the view is of.
3658 * @view: The view in question.
3659 *
3660 * @return The size of the GGTT view in bytes.
3661 */
3662size_t
3663i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3664 const struct i915_ggtt_view *view)
3665{
3666 if (view->type == I915_GGTT_VIEW_NORMAL) {
3667 return obj->base.size;
3668 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3669 return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT;
3670 } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3671 return view->params.partial.size << PAGE_SHIFT;
3672 } else {
3673 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3674 return obj->base.size;
3675 }
3676}
3677
3678void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 3667void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3679{ 3668{
3680 void __iomem *ptr; 3669 void __iomem *ptr;
3681 3670
3682 lockdep_assert_held(&vma->vm->dev->struct_mutex); 3671 lockdep_assert_held(&vma->vm->dev->struct_mutex);
3683 if (WARN_ON(!vma->obj->map_and_fenceable)) 3672 if (WARN_ON(!vma->obj->map_and_fenceable))
3684 return ERR_PTR(-ENODEV); 3673 return IO_ERR_PTR(-ENODEV);
3685 3674
3686 GEM_BUG_ON(!vma->is_ggtt); 3675 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
3687 GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0); 3676 GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
3688 3677
3689 ptr = vma->iomap; 3678 ptr = vma->iomap;
3690 if (ptr == NULL) { 3679 if (ptr == NULL) {
@@ -3692,11 +3681,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
3692 vma->node.start, 3681 vma->node.start,
3693 vma->node.size); 3682 vma->node.size);
3694 if (ptr == NULL) 3683 if (ptr == NULL)
3695 return ERR_PTR(-ENOMEM); 3684 return IO_ERR_PTR(-ENOMEM);
3696 3685
3697 vma->iomap = ptr; 3686 vma->iomap = ptr;
3698 } 3687 }
3699 3688
3700 vma->pin_count++; 3689 __i915_vma_pin(vma);
3701 return ptr; 3690 return ptr;
3702} 3691}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index aa5f31d1c2ed..cc56206a1600 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -36,6 +36,8 @@
36 36
37#include <linux/io-mapping.h> 37#include <linux/io-mapping.h>
38 38
39#include "i915_gem_request.h"
40
39struct drm_i915_file_private; 41struct drm_i915_file_private;
40 42
41typedef uint32_t gen6_pte_t; 43typedef uint32_t gen6_pte_t;
@@ -178,12 +180,32 @@ struct i915_vma {
178 struct drm_i915_gem_object *obj; 180 struct drm_i915_gem_object *obj;
179 struct i915_address_space *vm; 181 struct i915_address_space *vm;
180 void __iomem *iomap; 182 void __iomem *iomap;
183 u64 size;
184
185 unsigned int flags;
186 /**
187 * How many users have pinned this object in GTT space. The following
188 * users can each hold at most one reference: pwrite/pread, execbuffer
189 * (objects are not allowed multiple times for the same batchbuffer),
190 * and the framebuffer code. When switching/pageflipping, the
191 * framebuffer code has at most two buffers pinned per crtc.
192 *
193 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
194 * bits with absolutely no headroom. So use 4 bits.
195 */
196#define I915_VMA_PIN_MASK 0xf
197#define I915_VMA_PIN_OVERFLOW BIT(5)
181 198
182 /** Flags and address space this VMA is bound to */ 199 /** Flags and address space this VMA is bound to */
183#define GLOBAL_BIND (1<<0) 200#define I915_VMA_GLOBAL_BIND BIT(6)
184#define LOCAL_BIND (1<<1) 201#define I915_VMA_LOCAL_BIND BIT(7)
185 unsigned int bound : 4; 202#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
186 bool is_ggtt : 1; 203
204#define I915_VMA_GGTT BIT(8)
205#define I915_VMA_CLOSED BIT(9)
206
207 unsigned int active;
208 struct i915_gem_active last_read[I915_NUM_ENGINES];
187 209
188 /** 210 /**
189 * Support different GGTT views into the same object. 211 * Support different GGTT views into the same object.
@@ -208,20 +230,46 @@ struct i915_vma {
208 struct hlist_node exec_node; 230 struct hlist_node exec_node;
209 unsigned long exec_handle; 231 unsigned long exec_handle;
210 struct drm_i915_gem_exec_object2 *exec_entry; 232 struct drm_i915_gem_exec_object2 *exec_entry;
211
212 /**
213 * How many users have pinned this object in GTT space. The following
214 * users can each hold at most one reference: pwrite/pread, execbuffer
215 * (objects are not allowed multiple times for the same batchbuffer),
216 * and the framebuffer code. When switching/pageflipping, the
217 * framebuffer code has at most two buffers pinned per crtc.
218 *
219 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
220 * bits with absolutely no headroom. So use 4 bits. */
221 unsigned int pin_count:4;
222#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
223}; 233};
224 234
235static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
236{
237 return vma->flags & I915_VMA_GGTT;
238}
239
240static inline bool i915_vma_is_closed(const struct i915_vma *vma)
241{
242 return vma->flags & I915_VMA_CLOSED;
243}
244
245static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
246{
247 return vma->active;
248}
249
250static inline bool i915_vma_is_active(const struct i915_vma *vma)
251{
252 return i915_vma_get_active(vma);
253}
254
255static inline void i915_vma_set_active(struct i915_vma *vma,
256 unsigned int engine)
257{
258 vma->active |= BIT(engine);
259}
260
261static inline void i915_vma_clear_active(struct i915_vma *vma,
262 unsigned int engine)
263{
264 vma->active &= ~BIT(engine);
265}
266
267static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
268 unsigned int engine)
269{
270 return vma->active & BIT(engine);
271}
272
225struct i915_page_dma { 273struct i915_page_dma {
226 struct page *page; 274 struct page *page;
227 union { 275 union {
@@ -272,11 +320,20 @@ struct i915_pml4 {
272struct i915_address_space { 320struct i915_address_space {
273 struct drm_mm mm; 321 struct drm_mm mm;
274 struct drm_device *dev; 322 struct drm_device *dev;
323 /* Every address space belongs to a struct file - except for the global
324 * GTT that is owned by the driver (and so @file is set to NULL). In
325 * principle, no information should leak from one context to another
326 * (or between files/processes etc) unless explicitly shared by the
327 * owner. Tracking the owner is important in order to free up per-file
328 * objects along with the file, to aide resource tracking, and to
329 * assign blame.
330 */
331 struct drm_i915_file_private *file;
275 struct list_head global_link; 332 struct list_head global_link;
276 u64 start; /* Start offset always 0 for dri2 */ 333 u64 start; /* Start offset always 0 for dri2 */
277 u64 total; /* size addr space maps (ex. 2GB for ggtt) */ 334 u64 total; /* size addr space maps (ex. 2GB for ggtt) */
278 335
279 bool is_ggtt; 336 bool closed;
280 337
281 struct i915_page_scratch *scratch_page; 338 struct i915_page_scratch *scratch_page;
282 struct i915_page_table *scratch_pt; 339 struct i915_page_table *scratch_pt;
@@ -306,6 +363,13 @@ struct i915_address_space {
306 */ 363 */
307 struct list_head inactive_list; 364 struct list_head inactive_list;
308 365
366 /**
367 * List of vma that have been unbound.
368 *
369 * A reference is not held on the buffer while on this list.
370 */
371 struct list_head unbound_list;
372
309 /* FIXME: Need a more generic return type */ 373 /* FIXME: Need a more generic return type */
310 gen6_pte_t (*pte_encode)(dma_addr_t addr, 374 gen6_pte_t (*pte_encode)(dma_addr_t addr,
311 enum i915_cache_level level, 375 enum i915_cache_level level,
@@ -338,7 +402,7 @@ struct i915_address_space {
338 u32 flags); 402 u32 flags);
339}; 403};
340 404
341#define i915_is_ggtt(V) ((V)->is_ggtt) 405#define i915_is_ggtt(V) (!(V)->file)
342 406
343/* The Graphics Translation Table is the way in which GEN hardware translates a 407/* The Graphics Translation Table is the way in which GEN hardware translates a
344 * Graphics Virtual Address into a Physical Address. In addition to the normal 408 * Graphics Virtual Address into a Physical Address. In addition to the normal
@@ -354,7 +418,6 @@ struct i915_ggtt {
354 size_t stolen_usable_size; /* Total size minus BIOS reserved */ 418 size_t stolen_usable_size; /* Total size minus BIOS reserved */
355 size_t stolen_reserved_base; 419 size_t stolen_reserved_base;
356 size_t stolen_reserved_size; 420 size_t stolen_reserved_size;
357 size_t size; /* Total size of Global GTT */
358 u64 mappable_end; /* End offset that we can CPU map */ 421 u64 mappable_end; /* End offset that we can CPU map */
359 struct io_mapping *mappable; /* Mapping to our CPU mappable region */ 422 struct io_mapping *mappable; /* Mapping to our CPU mappable region */
360 phys_addr_t mappable_base; /* PA of our GMADR */ 423 phys_addr_t mappable_base; /* PA of our GMADR */
@@ -365,8 +428,6 @@ struct i915_ggtt {
365 bool do_idle_maps; 428 bool do_idle_maps;
366 429
367 int mtrr; 430 int mtrr;
368
369 int (*probe)(struct i915_ggtt *ggtt);
370}; 431};
371 432
372struct i915_hw_ppgtt { 433struct i915_hw_ppgtt {
@@ -380,8 +441,6 @@ struct i915_hw_ppgtt {
380 struct i915_page_directory pd; /* GEN6-7 */ 441 struct i915_page_directory pd; /* GEN6-7 */
381 }; 442 };
382 443
383 struct drm_i915_file_private *file_priv;
384
385 gen6_pte_t __iomem *pd_addr; 444 gen6_pte_t __iomem *pd_addr;
386 445
387 int (*enable)(struct i915_hw_ppgtt *ppgtt); 446 int (*enable)(struct i915_hw_ppgtt *ppgtt);
@@ -521,14 +580,15 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
521 px_dma(ppgtt->base.scratch_pd); 580 px_dma(ppgtt->base.scratch_pd);
522} 581}
523 582
524int i915_ggtt_init_hw(struct drm_device *dev); 583int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
525int i915_ggtt_enable_hw(struct drm_device *dev); 584int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
526void i915_gem_init_ggtt(struct drm_device *dev); 585int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
527void i915_ggtt_cleanup_hw(struct drm_device *dev); 586int i915_gem_init_ggtt(struct drm_i915_private *dev_priv);
587void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
528 588
529int i915_ppgtt_init_hw(struct drm_device *dev); 589int i915_ppgtt_init_hw(struct drm_device *dev);
530void i915_ppgtt_release(struct kref *kref); 590void i915_ppgtt_release(struct kref *kref);
531struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, 591struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
532 struct drm_i915_file_private *fpriv); 592 struct drm_i915_file_private *fpriv);
533static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) 593static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
534{ 594{
@@ -562,9 +622,66 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
562 return true; 622 return true;
563} 623}
564 624
565size_t 625/* Flags used by pin/bind&friends. */
566i915_ggtt_view_size(struct drm_i915_gem_object *obj, 626#define PIN_NONBLOCK BIT(0)
567 const struct i915_ggtt_view *view); 627#define PIN_MAPPABLE BIT(1)
628#define PIN_ZONE_4G BIT(2)
629
630#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */
631#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */
632#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */
633#define PIN_UPDATE BIT(8)
634
635#define PIN_HIGH BIT(9)
636#define PIN_OFFSET_BIAS BIT(10)
637#define PIN_OFFSET_FIXED BIT(11)
638#define PIN_OFFSET_MASK (~4095)
639
640int __i915_vma_do_pin(struct i915_vma *vma,
641 u64 size, u64 alignment, u64 flags);
642static inline int __must_check
643i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
644{
645 BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
646 BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
647 BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
648
649 /* Pin early to prevent the shrinker/eviction logic from destroying
650 * our vma as we insert and bind.
651 */
652 if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
653 return 0;
654
655 return __i915_vma_do_pin(vma, size, alignment, flags);
656}
657
658static inline int i915_vma_pin_count(const struct i915_vma *vma)
659{
660 return vma->flags & I915_VMA_PIN_MASK;
661}
662
663static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
664{
665 return i915_vma_pin_count(vma);
666}
667
668static inline void __i915_vma_pin(struct i915_vma *vma)
669{
670 vma->flags++;
671 GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
672}
673
674static inline void __i915_vma_unpin(struct i915_vma *vma)
675{
676 GEM_BUG_ON(!i915_vma_is_pinned(vma));
677 vma->flags--;
678}
679
680static inline void i915_vma_unpin(struct i915_vma *vma)
681{
682 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
683 __i915_vma_unpin(vma);
684}
568 685
569/** 686/**
570 * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture 687 * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
@@ -580,6 +697,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
580 * Returns a valid iomapped pointer or ERR_PTR. 697 * Returns a valid iomapped pointer or ERR_PTR.
581 */ 698 */
582void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); 699void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
700#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
583 701
584/** 702/**
585 * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap 703 * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
@@ -593,9 +711,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
593static inline void i915_vma_unpin_iomap(struct i915_vma *vma) 711static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
594{ 712{
595 lockdep_assert_held(&vma->vm->dev->struct_mutex); 713 lockdep_assert_held(&vma->vm->dev->struct_mutex);
596 GEM_BUG_ON(vma->pin_count == 0);
597 GEM_BUG_ON(vma->iomap == NULL); 714 GEM_BUG_ON(vma->iomap == NULL);
598 vma->pin_count--; 715 i915_vma_unpin(vma);
599} 716}
600 717
601#endif 718#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index f75bbd67a13a..57fd767a2d79 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,10 +28,18 @@
28#include "i915_drv.h" 28#include "i915_drv.h"
29#include "intel_renderstate.h" 29#include "intel_renderstate.h"
30 30
31struct render_state {
32 const struct intel_renderstate_rodata *rodata;
33 struct drm_i915_gem_object *obj;
34 u64 ggtt_offset;
35 u32 aux_batch_size;
36 u32 aux_batch_offset;
37};
38
31static const struct intel_renderstate_rodata * 39static const struct intel_renderstate_rodata *
32render_state_get_rodata(const int gen) 40render_state_get_rodata(const struct drm_i915_gem_request *req)
33{ 41{
34 switch (gen) { 42 switch (INTEL_GEN(req->i915)) {
35 case 6: 43 case 6:
36 return &gen6_null_state; 44 return &gen6_null_state;
37 case 7: 45 case 7:
@@ -45,35 +53,6 @@ render_state_get_rodata(const int gen)
45 return NULL; 53 return NULL;
46} 54}
47 55
48static int render_state_init(struct render_state *so,
49 struct drm_i915_private *dev_priv)
50{
51 int ret;
52
53 so->gen = INTEL_GEN(dev_priv);
54 so->rodata = render_state_get_rodata(so->gen);
55 if (so->rodata == NULL)
56 return 0;
57
58 if (so->rodata->batch_items * 4 > 4096)
59 return -EINVAL;
60
61 so->obj = i915_gem_object_create(&dev_priv->drm, 4096);
62 if (IS_ERR(so->obj))
63 return PTR_ERR(so->obj);
64
65 ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
66 if (ret)
67 goto free_gem;
68
69 so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
70 return 0;
71
72free_gem:
73 drm_gem_object_unreference(&so->obj->base);
74 return ret;
75}
76
77/* 56/*
78 * Macro to add commands to auxiliary batch. 57 * Macro to add commands to auxiliary batch.
79 * This macro only checks for page overflow before inserting the commands, 58 * This macro only checks for page overflow before inserting the commands,
@@ -96,6 +75,7 @@ static int render_state_setup(struct render_state *so)
96{ 75{
97 struct drm_device *dev = so->obj->base.dev; 76 struct drm_device *dev = so->obj->base.dev;
98 const struct intel_renderstate_rodata *rodata = so->rodata; 77 const struct intel_renderstate_rodata *rodata = so->rodata;
78 const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
99 unsigned int i = 0, reloc_index = 0; 79 unsigned int i = 0, reloc_index = 0;
100 struct page *page; 80 struct page *page;
101 u32 *d; 81 u32 *d;
@@ -114,7 +94,7 @@ static int render_state_setup(struct render_state *so)
114 if (i * 4 == rodata->reloc[reloc_index]) { 94 if (i * 4 == rodata->reloc[reloc_index]) {
115 u64 r = s + so->ggtt_offset; 95 u64 r = s + so->ggtt_offset;
116 s = lower_32_bits(r); 96 s = lower_32_bits(r);
117 if (so->gen >= 8) { 97 if (has_64bit_reloc) {
118 if (i + 1 >= rodata->batch_items || 98 if (i + 1 >= rodata->batch_items ||
119 rodata->batch[i + 1] != 0) { 99 rodata->batch[i + 1] != 0) {
120 ret = -EINVAL; 100 ret = -EINVAL;
@@ -192,67 +172,55 @@ err_out:
192 172
193#undef OUT_BATCH 173#undef OUT_BATCH
194 174
195void i915_gem_render_state_fini(struct render_state *so) 175int i915_gem_render_state_init(struct drm_i915_gem_request *req)
196{
197 i915_gem_object_ggtt_unpin(so->obj);
198 drm_gem_object_unreference(&so->obj->base);
199}
200
201int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
202 struct render_state *so)
203{ 176{
177 struct render_state so;
204 int ret; 178 int ret;
205 179
206 if (WARN_ON(engine->id != RCS)) 180 if (WARN_ON(req->engine->id != RCS))
207 return -ENOENT; 181 return -ENOENT;
208 182
209 ret = render_state_init(so, engine->i915); 183 so.rodata = render_state_get_rodata(req);
210 if (ret) 184 if (!so.rodata)
211 return ret;
212
213 if (so->rodata == NULL)
214 return 0; 185 return 0;
215 186
216 ret = render_state_setup(so); 187 if (so.rodata->batch_items * 4 > 4096)
217 if (ret) { 188 return -EINVAL;
218 i915_gem_render_state_fini(so);
219 return ret;
220 }
221 189
222 return 0; 190 so.obj = i915_gem_object_create(&req->i915->drm, 4096);
223} 191 if (IS_ERR(so.obj))
192 return PTR_ERR(so.obj);
224 193
225int i915_gem_render_state_init(struct drm_i915_gem_request *req) 194 ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0);
226{
227 struct render_state so;
228 int ret;
229
230 ret = i915_gem_render_state_prepare(req->engine, &so);
231 if (ret) 195 if (ret)
232 return ret; 196 goto err_obj;
233 197
234 if (so.rodata == NULL) 198 so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj);
235 return 0; 199
200 ret = render_state_setup(&so);
201 if (ret)
202 goto err_unpin;
236 203
237 ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset, 204 ret = req->engine->emit_bb_start(req, so.ggtt_offset,
238 so.rodata->batch_items * 4, 205 so.rodata->batch_items * 4,
239 I915_DISPATCH_SECURE); 206 I915_DISPATCH_SECURE);
240 if (ret) 207 if (ret)
241 goto out; 208 goto err_unpin;
242 209
243 if (so.aux_batch_size > 8) { 210 if (so.aux_batch_size > 8) {
244 ret = req->engine->dispatch_execbuffer(req, 211 ret = req->engine->emit_bb_start(req,
245 (so.ggtt_offset + 212 (so.ggtt_offset +
246 so.aux_batch_offset), 213 so.aux_batch_offset),
247 so.aux_batch_size, 214 so.aux_batch_size,
248 I915_DISPATCH_SECURE); 215 I915_DISPATCH_SECURE);
249 if (ret) 216 if (ret)
250 goto out; 217 goto err_unpin;
251 } 218 }
252 219
253 i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); 220 i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0);
254 221err_unpin:
255out: 222 i915_gem_object_ggtt_unpin(so.obj);
256 i915_gem_render_state_fini(&so); 223err_obj:
224 i915_gem_object_put(so.obj);
257 return ret; 225 return ret;
258} 226}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index 6aaa3a10a630..c44fca8599bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -26,24 +26,6 @@
26 26
27#include <linux/types.h> 27#include <linux/types.h>
28 28
29struct intel_renderstate_rodata {
30 const u32 *reloc;
31 const u32 *batch;
32 const u32 batch_items;
33};
34
35struct render_state {
36 const struct intel_renderstate_rodata *rodata;
37 struct drm_i915_gem_object *obj;
38 u64 ggtt_offset;
39 int gen;
40 u32 aux_batch_size;
41 u32 aux_batch_offset;
42};
43
44int i915_gem_render_state_init(struct drm_i915_gem_request *req); 29int i915_gem_render_state_init(struct drm_i915_gem_request *req);
45void i915_gem_render_state_fini(struct render_state *so);
46int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
47 struct render_state *so);
48 30
49#endif /* _I915_GEM_RENDER_STATE_H_ */ 31#endif /* _I915_GEM_RENDER_STATE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
new file mode 100644
index 000000000000..6a1661643d3d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -0,0 +1,767 @@
1/*
2 * Copyright © 2008-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/prefetch.h>
26
27#include "i915_drv.h"
28
29static const char *i915_fence_get_driver_name(struct fence *fence)
30{
31 return "i915";
32}
33
34static const char *i915_fence_get_timeline_name(struct fence *fence)
35{
36 /* Timelines are bound by eviction to a VM. However, since
37 * we only have a global seqno at the moment, we only have
38 * a single timeline. Note that each timeline will have
39 * multiple execution contexts (fence contexts) as we allow
40 * engines within a single timeline to execute in parallel.
41 */
42 return "global";
43}
44
45static bool i915_fence_signaled(struct fence *fence)
46{
47 return i915_gem_request_completed(to_request(fence));
48}
49
50static bool i915_fence_enable_signaling(struct fence *fence)
51{
52 if (i915_fence_signaled(fence))
53 return false;
54
55 intel_engine_enable_signaling(to_request(fence));
56 return true;
57}
58
59static signed long i915_fence_wait(struct fence *fence,
60 bool interruptible,
61 signed long timeout_jiffies)
62{
63 s64 timeout_ns, *timeout;
64 int ret;
65
66 if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
67 timeout_ns = jiffies_to_nsecs(timeout_jiffies);
68 timeout = &timeout_ns;
69 } else {
70 timeout = NULL;
71 }
72
73 ret = i915_wait_request(to_request(fence),
74 interruptible, timeout,
75 NO_WAITBOOST);
76 if (ret == -ETIME)
77 return 0;
78
79 if (ret < 0)
80 return ret;
81
82 if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
83 timeout_jiffies = nsecs_to_jiffies(timeout_ns);
84
85 return timeout_jiffies;
86}
87
88static void i915_fence_value_str(struct fence *fence, char *str, int size)
89{
90 snprintf(str, size, "%u", fence->seqno);
91}
92
93static void i915_fence_timeline_value_str(struct fence *fence, char *str,
94 int size)
95{
96 snprintf(str, size, "%u",
97 intel_engine_get_seqno(to_request(fence)->engine));
98}
99
100static void i915_fence_release(struct fence *fence)
101{
102 struct drm_i915_gem_request *req = to_request(fence);
103
104 kmem_cache_free(req->i915->requests, req);
105}
106
107const struct fence_ops i915_fence_ops = {
108 .get_driver_name = i915_fence_get_driver_name,
109 .get_timeline_name = i915_fence_get_timeline_name,
110 .enable_signaling = i915_fence_enable_signaling,
111 .signaled = i915_fence_signaled,
112 .wait = i915_fence_wait,
113 .release = i915_fence_release,
114 .fence_value_str = i915_fence_value_str,
115 .timeline_value_str = i915_fence_timeline_value_str,
116};
117
118int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
119 struct drm_file *file)
120{
121 struct drm_i915_private *dev_private;
122 struct drm_i915_file_private *file_priv;
123
124 WARN_ON(!req || !file || req->file_priv);
125
126 if (!req || !file)
127 return -EINVAL;
128
129 if (req->file_priv)
130 return -EINVAL;
131
132 dev_private = req->i915;
133 file_priv = file->driver_priv;
134
135 spin_lock(&file_priv->mm.lock);
136 req->file_priv = file_priv;
137 list_add_tail(&req->client_list, &file_priv->mm.request_list);
138 spin_unlock(&file_priv->mm.lock);
139
140 req->pid = get_pid(task_pid(current));
141
142 return 0;
143}
144
145static inline void
146i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
147{
148 struct drm_i915_file_private *file_priv = request->file_priv;
149
150 if (!file_priv)
151 return;
152
153 spin_lock(&file_priv->mm.lock);
154 list_del(&request->client_list);
155 request->file_priv = NULL;
156 spin_unlock(&file_priv->mm.lock);
157
158 put_pid(request->pid);
159 request->pid = NULL;
160}
161
162void i915_gem_retire_noop(struct i915_gem_active *active,
163 struct drm_i915_gem_request *request)
164{
165 /* Space left intentionally blank */
166}
167
168static void i915_gem_request_retire(struct drm_i915_gem_request *request)
169{
170 struct i915_gem_active *active, *next;
171
172 trace_i915_gem_request_retire(request);
173 list_del(&request->link);
174
175 /* We know the GPU must have read the request to have
176 * sent us the seqno + interrupt, so use the position
177 * of tail of the request to update the last known position
178 * of the GPU head.
179 *
180 * Note this requires that we are always called in request
181 * completion order.
182 */
183 list_del(&request->ring_link);
184 request->ring->last_retired_head = request->postfix;
185
186 /* Walk through the active list, calling retire on each. This allows
187 * objects to track their GPU activity and mark themselves as idle
188 * when their *last* active request is completed (updating state
189 * tracking lists for eviction, active references for GEM, etc).
190 *
191 * As the ->retire() may free the node, we decouple it first and
192 * pass along the auxiliary information (to avoid dereferencing
193 * the node after the callback).
194 */
195 list_for_each_entry_safe(active, next, &request->active_list, link) {
196 /* In microbenchmarks or focusing upon time inside the kernel,
197 * we may spend an inordinate amount of time simply handling
198 * the retirement of requests and processing their callbacks.
199 * Of which, this loop itself is particularly hot due to the
200 * cache misses when jumping around the list of i915_gem_active.
201 * So we try to keep this loop as streamlined as possible and
202 * also prefetch the next i915_gem_active to try and hide
203 * the likely cache miss.
204 */
205 prefetchw(next);
206
207 INIT_LIST_HEAD(&active->link);
208 RCU_INIT_POINTER(active->request, NULL);
209
210 active->retire(active, request);
211 }
212
213 i915_gem_request_remove_from_client(request);
214
215 if (request->previous_context) {
216 if (i915.enable_execlists)
217 intel_lr_context_unpin(request->previous_context,
218 request->engine);
219 }
220
221 i915_gem_context_put(request->ctx);
222 i915_gem_request_put(request);
223}
224
225void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
226{
227 struct intel_engine_cs *engine = req->engine;
228 struct drm_i915_gem_request *tmp;
229
230 lockdep_assert_held(&req->i915->drm.struct_mutex);
231 GEM_BUG_ON(list_empty(&req->link));
232
233 do {
234 tmp = list_first_entry(&engine->request_list,
235 typeof(*tmp), link);
236
237 i915_gem_request_retire(tmp);
238 } while (tmp != req);
239}
240
241static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
242{
243 if (__i915_terminally_wedged(reset_counter))
244 return -EIO;
245
246 if (__i915_reset_in_progress(reset_counter)) {
247 /* Non-interruptible callers can't handle -EAGAIN, hence return
248 * -EIO unconditionally for these.
249 */
250 if (!interruptible)
251 return -EIO;
252
253 return -EAGAIN;
254 }
255
256 return 0;
257}
258
259static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
260{
261 struct intel_engine_cs *engine;
262 int ret;
263
264 /* Carefully retire all requests without writing to the rings */
265 for_each_engine(engine, dev_priv) {
266 ret = intel_engine_idle(engine, true);
267 if (ret)
268 return ret;
269 }
270 i915_gem_retire_requests(dev_priv);
271
272 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
273 if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
274 while (intel_kick_waiters(dev_priv) ||
275 intel_kick_signalers(dev_priv))
276 yield();
277 }
278
279 /* Finally reset hw state */
280 for_each_engine(engine, dev_priv)
281 intel_engine_init_seqno(engine, seqno);
282
283 return 0;
284}
285
286int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
287{
288 struct drm_i915_private *dev_priv = to_i915(dev);
289 int ret;
290
291 if (seqno == 0)
292 return -EINVAL;
293
294 /* HWS page needs to be set less than what we
295 * will inject to ring
296 */
297 ret = i915_gem_init_seqno(dev_priv, seqno - 1);
298 if (ret)
299 return ret;
300
301 dev_priv->next_seqno = seqno;
302 return 0;
303}
304
305static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
306{
307 /* reserve 0 for non-seqno */
308 if (unlikely(dev_priv->next_seqno == 0)) {
309 int ret;
310
311 ret = i915_gem_init_seqno(dev_priv, 0);
312 if (ret)
313 return ret;
314
315 dev_priv->next_seqno = 1;
316 }
317
318 *seqno = dev_priv->next_seqno++;
319 return 0;
320}
321
322/**
323 * i915_gem_request_alloc - allocate a request structure
324 *
325 * @engine: engine that we wish to issue the request on.
326 * @ctx: context that the request will be associated with.
327 * This can be NULL if the request is not directly related to
328 * any specific user context, in which case this function will
329 * choose an appropriate context to use.
330 *
331 * Returns a pointer to the allocated request if successful,
332 * or an error code if not.
333 */
334struct drm_i915_gem_request *
335i915_gem_request_alloc(struct intel_engine_cs *engine,
336 struct i915_gem_context *ctx)
337{
338 struct drm_i915_private *dev_priv = engine->i915;
339 unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
340 struct drm_i915_gem_request *req;
341 u32 seqno;
342 int ret;
343
344 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
345 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
346 * and restart.
347 */
348 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
349 if (ret)
350 return ERR_PTR(ret);
351
352 /* Move the oldest request to the slab-cache (if not in use!) */
353 req = list_first_entry_or_null(&engine->request_list,
354 typeof(*req), link);
355 if (req && i915_gem_request_completed(req))
356 i915_gem_request_retire(req);
357
358 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
359 if (!req)
360 return ERR_PTR(-ENOMEM);
361
362 ret = i915_gem_get_seqno(dev_priv, &seqno);
363 if (ret)
364 goto err;
365
366 spin_lock_init(&req->lock);
367 fence_init(&req->fence,
368 &i915_fence_ops,
369 &req->lock,
370 engine->fence_context,
371 seqno);
372
373 INIT_LIST_HEAD(&req->active_list);
374 req->i915 = dev_priv;
375 req->engine = engine;
376 req->ctx = i915_gem_context_get(ctx);
377
378 /*
379 * Reserve space in the ring buffer for all the commands required to
380 * eventually emit this request. This is to guarantee that the
381 * i915_add_request() call can't fail. Note that the reserve may need
382 * to be redone if the request is not actually submitted straight
383 * away, e.g. because a GPU scheduler has deferred it.
384 */
385 req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
386
387 if (i915.enable_execlists)
388 ret = intel_logical_ring_alloc_request_extras(req);
389 else
390 ret = intel_ring_alloc_request_extras(req);
391 if (ret)
392 goto err_ctx;
393
394 return req;
395
396err_ctx:
397 i915_gem_context_put(ctx);
398err:
399 kmem_cache_free(dev_priv->requests, req);
400 return ERR_PTR(ret);
401}
402
403static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
404{
405 struct drm_i915_private *dev_priv = engine->i915;
406
407 dev_priv->gt.active_engines |= intel_engine_flag(engine);
408 if (dev_priv->gt.awake)
409 return;
410
411 intel_runtime_pm_get_noresume(dev_priv);
412 dev_priv->gt.awake = true;
413
414 intel_enable_gt_powersave(dev_priv);
415 i915_update_gfx_val(dev_priv);
416 if (INTEL_GEN(dev_priv) >= 6)
417 gen6_rps_busy(dev_priv);
418
419 queue_delayed_work(dev_priv->wq,
420 &dev_priv->gt.retire_work,
421 round_jiffies_up_relative(HZ));
422}
423
424/*
425 * NB: This function is not allowed to fail. Doing so would mean the the
426 * request is not being tracked for completion but the work itself is
427 * going to happen on the hardware. This would be a Bad Thing(tm).
428 */
429void __i915_add_request(struct drm_i915_gem_request *request,
430 struct drm_i915_gem_object *obj,
431 bool flush_caches)
432{
433 struct intel_engine_cs *engine;
434 struct intel_ring *ring;
435 u32 request_start;
436 u32 reserved_tail;
437 int ret;
438
439 if (WARN_ON(!request))
440 return;
441
442 engine = request->engine;
443 ring = request->ring;
444
445 /*
446 * To ensure that this call will not fail, space for its emissions
447 * should already have been reserved in the ring buffer. Let the ring
448 * know that it is time to use that space up.
449 */
450 request_start = ring->tail;
451 reserved_tail = request->reserved_space;
452 request->reserved_space = 0;
453
454 /*
455 * Emit any outstanding flushes - execbuf can fail to emit the flush
456 * after having emitted the batchbuffer command. Hence we need to fix
457 * things up similar to emitting the lazy request. The difference here
458 * is that the flush _must_ happen before the next request, no matter
459 * what.
460 */
461 if (flush_caches) {
462 ret = engine->emit_flush(request, EMIT_FLUSH);
463
464 /* Not allowed to fail! */
465 WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
466 }
467
468 trace_i915_gem_request_add(request);
469
470 request->head = request_start;
471
472 /* Whilst this request exists, batch_obj will be on the
473 * active_list, and so will hold the active reference. Only when this
474 * request is retired will the the batch_obj be moved onto the
475 * inactive_list and lose its active reference. Hence we do not need
476 * to explicitly hold another reference here.
477 */
478 request->batch_obj = obj;
479
480 /* Seal the request and mark it as pending execution. Note that
481 * we may inspect this state, without holding any locks, during
482 * hangcheck. Hence we apply the barrier to ensure that we do not
483 * see a more recent value in the hws than we are tracking.
484 */
485 request->emitted_jiffies = jiffies;
486 request->previous_seqno = engine->last_submitted_seqno;
487 engine->last_submitted_seqno = request->fence.seqno;
488 i915_gem_active_set(&engine->last_request, request);
489 list_add_tail(&request->link, &engine->request_list);
490 list_add_tail(&request->ring_link, &ring->request_list);
491
492 /* Record the position of the start of the request so that
493 * should we detect the updated seqno part-way through the
494 * GPU processing the request, we never over-estimate the
495 * position of the head.
496 */
497 request->postfix = ring->tail;
498
499 /* Not allowed to fail! */
500 ret = engine->emit_request(request);
501 WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret);
502
503 /* Sanity check that the reserved size was large enough. */
504 ret = ring->tail - request_start;
505 if (ret < 0)
506 ret += ring->size;
507 WARN_ONCE(ret > reserved_tail,
508 "Not enough space reserved (%d bytes) "
509 "for adding the request (%d bytes)\n",
510 reserved_tail, ret);
511
512 i915_gem_mark_busy(engine);
513 engine->submit_request(request);
514}
515
516static unsigned long local_clock_us(unsigned int *cpu)
517{
518 unsigned long t;
519
520 /* Cheaply and approximately convert from nanoseconds to microseconds.
521 * The result and subsequent calculations are also defined in the same
522 * approximate microseconds units. The principal source of timing
523 * error here is from the simple truncation.
524 *
525 * Note that local_clock() is only defined wrt to the current CPU;
526 * the comparisons are no longer valid if we switch CPUs. Instead of
527 * blocking preemption for the entire busywait, we can detect the CPU
528 * switch and use that as indicator of system load and a reason to
529 * stop busywaiting, see busywait_stop().
530 */
531 *cpu = get_cpu();
532 t = local_clock() >> 10;
533 put_cpu();
534
535 return t;
536}
537
538static bool busywait_stop(unsigned long timeout, unsigned int cpu)
539{
540 unsigned int this_cpu;
541
542 if (time_after(local_clock_us(&this_cpu), timeout))
543 return true;
544
545 return this_cpu != cpu;
546}
547
548bool __i915_spin_request(const struct drm_i915_gem_request *req,
549 int state, unsigned long timeout_us)
550{
551 unsigned int cpu;
552
553 /* When waiting for high frequency requests, e.g. during synchronous
554 * rendering split between the CPU and GPU, the finite amount of time
555 * required to set up the irq and wait upon it limits the response
556 * rate. By busywaiting on the request completion for a short while we
557 * can service the high frequency waits as quick as possible. However,
558 * if it is a slow request, we want to sleep as quickly as possible.
559 * The tradeoff between waiting and sleeping is roughly the time it
560 * takes to sleep on a request, on the order of a microsecond.
561 */
562
563 timeout_us += local_clock_us(&cpu);
564 do {
565 if (i915_gem_request_completed(req))
566 return true;
567
568 if (signal_pending_state(state, current))
569 break;
570
571 if (busywait_stop(timeout_us, cpu))
572 break;
573
574 cpu_relax_lowlatency();
575 } while (!need_resched());
576
577 return false;
578}
579
580/**
581 * i915_wait_request - wait until execution of request has finished
582 * @req: duh!
583 * @interruptible: do an interruptible wait (normally yes)
584 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
585 * @rps: client to charge for RPS boosting
586 *
587 * Note: It is of utmost importance that the passed in seqno and reset_counter
588 * values have been read by the caller in an smp safe manner. Where read-side
589 * locks are involved, it is sufficient to read the reset_counter before
590 * unlocking the lock that protects the seqno. For lockless tricks, the
591 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
592 * inserted.
593 *
594 * Returns 0 if the request was found within the alloted time. Else returns the
595 * errno with remaining time filled in timeout argument.
596 */
597int i915_wait_request(struct drm_i915_gem_request *req,
598 bool interruptible,
599 s64 *timeout,
600 struct intel_rps_client *rps)
601{
602 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
603 DEFINE_WAIT(reset);
604 struct intel_wait wait;
605 unsigned long timeout_remain;
606 int ret = 0;
607
608 might_sleep();
609
610 if (i915_gem_request_completed(req))
611 return 0;
612
613 timeout_remain = MAX_SCHEDULE_TIMEOUT;
614 if (timeout) {
615 if (WARN_ON(*timeout < 0))
616 return -EINVAL;
617
618 if (*timeout == 0)
619 return -ETIME;
620
621 /* Record current time in case interrupted, or wedged */
622 timeout_remain = nsecs_to_jiffies_timeout(*timeout);
623 *timeout += ktime_get_raw_ns();
624 }
625
626 trace_i915_gem_request_wait_begin(req);
627
628 /* This client is about to stall waiting for the GPU. In many cases
629 * this is undesirable and limits the throughput of the system, as
630 * many clients cannot continue processing user input/output whilst
631 * blocked. RPS autotuning may take tens of milliseconds to respond
632 * to the GPU load and thus incurs additional latency for the client.
633 * We can circumvent that by promoting the GPU frequency to maximum
634 * before we wait. This makes the GPU throttle up much more quickly
635 * (good for benchmarks and user experience, e.g. window animations),
636 * but at a cost of spending more power processing the workload
637 * (bad for battery). Not all clients even want their results
638 * immediately and for them we should just let the GPU select its own
639 * frequency to maximise efficiency. To prevent a single client from
640 * forcing the clocks too high for the whole system, we only allow
641 * each client to waitboost once in a busy period.
642 */
643 if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
644 gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
645
646 /* Optimistic short spin before touching IRQs */
647 if (i915_spin_request(req, state, 5))
648 goto complete;
649
650 set_current_state(state);
651 add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
652
653 intel_wait_init(&wait, req->fence.seqno);
654 if (intel_engine_add_wait(req->engine, &wait))
655 /* In order to check that we haven't missed the interrupt
656 * as we enabled it, we need to kick ourselves to do a
657 * coherent check on the seqno before we sleep.
658 */
659 goto wakeup;
660
661 for (;;) {
662 if (signal_pending_state(state, current)) {
663 ret = -ERESTARTSYS;
664 break;
665 }
666
667 timeout_remain = io_schedule_timeout(timeout_remain);
668 if (timeout_remain == 0) {
669 ret = -ETIME;
670 break;
671 }
672
673 if (intel_wait_complete(&wait))
674 break;
675
676 set_current_state(state);
677
678wakeup:
679 /* Carefully check if the request is complete, giving time
680 * for the seqno to be visible following the interrupt.
681 * We also have to check in case we are kicked by the GPU
682 * reset in order to drop the struct_mutex.
683 */
684 if (__i915_request_irq_complete(req))
685 break;
686
687 /* Only spin if we know the GPU is processing this request */
688 if (i915_spin_request(req, state, 2))
689 break;
690 }
691 remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
692
693 intel_engine_remove_wait(req->engine, &wait);
694 __set_current_state(TASK_RUNNING);
695complete:
696 trace_i915_gem_request_wait_end(req);
697
698 if (timeout) {
699 *timeout -= ktime_get_raw_ns();
700 if (*timeout < 0)
701 *timeout = 0;
702
703 /*
704 * Apparently ktime isn't accurate enough and occasionally has a
705 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
706 * things up to make the test happy. We allow up to 1 jiffy.
707 *
708 * This is a regrssion from the timespec->ktime conversion.
709 */
710 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
711 *timeout = 0;
712 }
713
714 if (IS_RPS_USER(rps) &&
715 req->fence.seqno == req->engine->last_submitted_seqno) {
716 /* The GPU is now idle and this client has stalled.
717 * Since no other client has submitted a request in the
718 * meantime, assume that this client is the only one
719 * supplying work to the GPU but is unable to keep that
720 * work supplied because it is waiting. Since the GPU is
721 * then never kept fully busy, RPS autoclocking will
722 * keep the clocks relatively low, causing further delays.
723 * Compensate by giving the synchronous client credit for
724 * a waitboost next time.
725 */
726 spin_lock(&req->i915->rps.client_lock);
727 list_del_init(&rps->link);
728 spin_unlock(&req->i915->rps.client_lock);
729 }
730
731 return ret;
732}
733
734static void engine_retire_requests(struct intel_engine_cs *engine)
735{
736 struct drm_i915_gem_request *request, *next;
737
738 list_for_each_entry_safe(request, next, &engine->request_list, link) {
739 if (!i915_gem_request_completed(request))
740 break;
741
742 i915_gem_request_retire(request);
743 }
744}
745
746void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
747{
748 struct intel_engine_cs *engine;
749
750 lockdep_assert_held(&dev_priv->drm.struct_mutex);
751
752 if (dev_priv->gt.active_engines == 0)
753 return;
754
755 GEM_BUG_ON(!dev_priv->gt.awake);
756
757 for_each_engine(engine, dev_priv) {
758 engine_retire_requests(engine);
759 if (!intel_engine_is_active(engine))
760 dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
761 }
762
763 if (dev_priv->gt.active_engines == 0)
764 queue_delayed_work(dev_priv->wq,
765 &dev_priv->gt.idle_work,
766 msecs_to_jiffies(100));
767}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
new file mode 100644
index 000000000000..3496e28785e7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -0,0 +1,676 @@
1/*
2 * Copyright © 2008-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef I915_GEM_REQUEST_H
26#define I915_GEM_REQUEST_H
27
28#include <linux/fence.h>
29
30#include "i915_gem.h"
31
32struct intel_wait {
33 struct rb_node node;
34 struct task_struct *tsk;
35 u32 seqno;
36};
37
38struct intel_signal_node {
39 struct rb_node node;
40 struct intel_wait wait;
41};
42
43/**
44 * Request queue structure.
45 *
46 * The request queue allows us to note sequence numbers that have been emitted
47 * and may be associated with active buffers to be retired.
48 *
49 * By keeping this list, we can avoid having to do questionable sequence
50 * number comparisons on buffer last_read|write_seqno. It also allows an
51 * emission time to be associated with the request for tracking how far ahead
52 * of the GPU the submission is.
53 *
54 * The requests are reference counted.
55 */
56struct drm_i915_gem_request {
57 struct fence fence;
58 spinlock_t lock;
59
60 /** On Which ring this request was generated */
61 struct drm_i915_private *i915;
62
63 /**
64 * Context and ring buffer related to this request
65 * Contexts are refcounted, so when this request is associated with a
66 * context, we must increment the context's refcount, to guarantee that
67 * it persists while any request is linked to it. Requests themselves
68 * are also refcounted, so the request will only be freed when the last
69 * reference to it is dismissed, and the code in
70 * i915_gem_request_free() will then decrement the refcount on the
71 * context.
72 */
73 struct i915_gem_context *ctx;
74 struct intel_engine_cs *engine;
75 struct intel_ring *ring;
76 struct intel_signal_node signaling;
77
78 /** GEM sequence number associated with the previous request,
79 * when the HWS breadcrumb is equal to this the GPU is processing
80 * this request.
81 */
82 u32 previous_seqno;
83
84 /** Position in the ringbuffer of the start of the request */
85 u32 head;
86
87 /**
88 * Position in the ringbuffer of the start of the postfix.
89 * This is required to calculate the maximum available ringbuffer
90 * space without overwriting the postfix.
91 */
92 u32 postfix;
93
94 /** Position in the ringbuffer of the end of the whole request */
95 u32 tail;
96
97 /** Preallocate space in the ringbuffer for the emitting the request */
98 u32 reserved_space;
99
100 /**
101 * Context related to the previous request.
102 * As the contexts are accessed by the hardware until the switch is
103 * completed to a new context, the hardware may still be writing
104 * to the context object after the breadcrumb is visible. We must
105 * not unpin/unbind/prune that object whilst still active and so
106 * we keep the previous context pinned until the following (this)
107 * request is retired.
108 */
109 struct i915_gem_context *previous_context;
110
111 /** Batch buffer related to this request if any (used for
112 * error state dump only).
113 */
114 struct drm_i915_gem_object *batch_obj;
115 struct list_head active_list;
116
117 /** Time at which this request was emitted, in jiffies. */
118 unsigned long emitted_jiffies;
119
120 /** engine->request_list entry for this request */
121 struct list_head link;
122
123 /** ring->request_list entry for this request */
124 struct list_head ring_link;
125
126 struct drm_i915_file_private *file_priv;
127 /** file_priv list entry for this request */
128 struct list_head client_list;
129
130 /** process identifier submitting this request */
131 struct pid *pid;
132
133 /**
134 * The ELSP only accepts two elements at a time, so we queue
135 * context/tail pairs on a given queue (ring->execlist_queue) until the
136 * hardware is available. The queue serves a double purpose: we also use
137 * it to keep track of the up to 2 contexts currently in the hardware
138 * (usually one in execution and the other queued up by the GPU): We
139 * only remove elements from the head of the queue when the hardware
140 * informs us that an element has been completed.
141 *
142 * All accesses to the queue are mediated by a spinlock
143 * (ring->execlist_lock).
144 */
145
146 /** Execlist link in the submission queue.*/
147 struct list_head execlist_link;
148
149 /** Execlists no. of times this request has been sent to the ELSP */
150 int elsp_submitted;
151
152 /** Execlists context hardware id. */
153 unsigned int ctx_hw_id;
154};
155
156extern const struct fence_ops i915_fence_ops;
157
158static inline bool fence_is_i915(struct fence *fence)
159{
160 return fence->ops == &i915_fence_ops;
161}
162
163struct drm_i915_gem_request * __must_check
164i915_gem_request_alloc(struct intel_engine_cs *engine,
165 struct i915_gem_context *ctx);
166int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
167 struct drm_file *file);
168void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
169
170static inline u32
171i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
172{
173 return req ? req->fence.seqno : 0;
174}
175
176static inline struct intel_engine_cs *
177i915_gem_request_get_engine(struct drm_i915_gem_request *req)
178{
179 return req ? req->engine : NULL;
180}
181
182static inline struct drm_i915_gem_request *
183to_request(struct fence *fence)
184{
185 /* We assume that NULL fence/request are interoperable */
186 BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
187 GEM_BUG_ON(fence && !fence_is_i915(fence));
188 return container_of(fence, struct drm_i915_gem_request, fence);
189}
190
191static inline struct drm_i915_gem_request *
192i915_gem_request_get(struct drm_i915_gem_request *req)
193{
194 return to_request(fence_get(&req->fence));
195}
196
197static inline struct drm_i915_gem_request *
198i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
199{
200 return to_request(fence_get_rcu(&req->fence));
201}
202
203static inline void
204i915_gem_request_put(struct drm_i915_gem_request *req)
205{
206 fence_put(&req->fence);
207}
208
209static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
210 struct drm_i915_gem_request *src)
211{
212 if (src)
213 i915_gem_request_get(src);
214
215 if (*pdst)
216 i915_gem_request_put(*pdst);
217
218 *pdst = src;
219}
220
221void __i915_add_request(struct drm_i915_gem_request *req,
222 struct drm_i915_gem_object *batch_obj,
223 bool flush_caches);
224#define i915_add_request(req) \
225 __i915_add_request(req, NULL, true)
226#define i915_add_request_no_flush(req) \
227 __i915_add_request(req, NULL, false)
228
229struct intel_rps_client;
230#define NO_WAITBOOST ERR_PTR(-1)
231#define IS_RPS_CLIENT(p) (!IS_ERR(p))
232#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
233
234int i915_wait_request(struct drm_i915_gem_request *req,
235 bool interruptible,
236 s64 *timeout,
237 struct intel_rps_client *rps)
238 __attribute__((nonnull(1)));
239
240static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
241
242/**
243 * Returns true if seq1 is later than seq2.
244 */
245static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
246{
247 return (s32)(seq1 - seq2) >= 0;
248}
249
250static inline bool
251i915_gem_request_started(const struct drm_i915_gem_request *req)
252{
253 return i915_seqno_passed(intel_engine_get_seqno(req->engine),
254 req->previous_seqno);
255}
256
257static inline bool
258i915_gem_request_completed(const struct drm_i915_gem_request *req)
259{
260 return i915_seqno_passed(intel_engine_get_seqno(req->engine),
261 req->fence.seqno);
262}
263
264bool __i915_spin_request(const struct drm_i915_gem_request *request,
265 int state, unsigned long timeout_us);
266static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
267 int state, unsigned long timeout_us)
268{
269 return (i915_gem_request_started(request) &&
270 __i915_spin_request(request, state, timeout_us));
271}
272
273/* We treat requests as fences. This is not be to confused with our
274 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
275 * We use the fences to synchronize access from the CPU with activity on the
276 * GPU, for example, we should not rewrite an object's PTE whilst the GPU
277 * is reading them. We also track fences at a higher level to provide
278 * implicit synchronisation around GEM objects, e.g. set-domain will wait
279 * for outstanding GPU rendering before marking the object ready for CPU
280 * access, or a pageflip will wait until the GPU is complete before showing
281 * the frame on the scanout.
282 *
283 * In order to use a fence, the object must track the fence it needs to
284 * serialise with. For example, GEM objects want to track both read and
285 * write access so that we can perform concurrent read operations between
286 * the CPU and GPU engines, as well as waiting for all rendering to
287 * complete, or waiting for the last GPU user of a "fence register". The
288 * object then embeds a #i915_gem_active to track the most recent (in
289 * retirement order) request relevant for the desired mode of access.
290 * The #i915_gem_active is updated with i915_gem_active_set() to track the
291 * most recent fence request, typically this is done as part of
292 * i915_vma_move_to_active().
293 *
294 * When the #i915_gem_active completes (is retired), it will
295 * signal its completion to the owner through a callback as well as mark
296 * itself as idle (i915_gem_active.request == NULL). The owner
297 * can then perform any action, such as delayed freeing of an active
298 * resource including itself.
299 */
300struct i915_gem_active;
301
302typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
303 struct drm_i915_gem_request *);
304
305struct i915_gem_active {
306 struct drm_i915_gem_request __rcu *request;
307 struct list_head link;
308 i915_gem_retire_fn retire;
309};
310
311void i915_gem_retire_noop(struct i915_gem_active *,
312 struct drm_i915_gem_request *request);
313
314/**
315 * init_request_active - prepares the activity tracker for use
316 * @active - the active tracker
317 * @func - a callback when then the tracker is retired (becomes idle),
318 * can be NULL
319 *
320 * init_request_active() prepares the embedded @active struct for use as
321 * an activity tracker, that is for tracking the last known active request
322 * associated with it. When the last request becomes idle, when it is retired
323 * after completion, the optional callback @func is invoked.
324 */
325static inline void
326init_request_active(struct i915_gem_active *active,
327 i915_gem_retire_fn retire)
328{
329 INIT_LIST_HEAD(&active->link);
330 active->retire = retire ?: i915_gem_retire_noop;
331}
332
333/**
334 * i915_gem_active_set - updates the tracker to watch the current request
335 * @active - the active tracker
336 * @request - the request to watch
337 *
338 * i915_gem_active_set() watches the given @request for completion. Whilst
339 * that @request is busy, the @active reports busy. When that @request is
340 * retired, the @active tracker is updated to report idle.
341 */
342static inline void
343i915_gem_active_set(struct i915_gem_active *active,
344 struct drm_i915_gem_request *request)
345{
346 list_move(&active->link, &request->active_list);
347 rcu_assign_pointer(active->request, request);
348}
349
350static inline struct drm_i915_gem_request *
351__i915_gem_active_peek(const struct i915_gem_active *active)
352{
353 /* Inside the error capture (running with the driver in an unknown
354 * state), we want to bend the rules slightly (a lot).
355 *
356 * Work is in progress to make it safer, in the meantime this keeps
357 * the known issue from spamming the logs.
358 */
359 return rcu_dereference_protected(active->request, 1);
360}
361
362/**
363 * i915_gem_active_peek - report the active request being monitored
364 * @active - the active tracker
365 *
366 * i915_gem_active_peek() returns the current request being tracked if
367 * still active, or NULL. It does not obtain a reference on the request
368 * for the caller, so the caller must hold struct_mutex.
369 */
370static inline struct drm_i915_gem_request *
371i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
372{
373 struct drm_i915_gem_request *request;
374
375 request = rcu_dereference_protected(active->request,
376 lockdep_is_held(mutex));
377 if (!request || i915_gem_request_completed(request))
378 return NULL;
379
380 return request;
381}
382
383/**
384 * i915_gem_active_peek_rcu - report the active request being monitored
385 * @active - the active tracker
386 *
387 * i915_gem_active_peek_rcu() returns the current request being tracked if
388 * still active, or NULL. It does not obtain a reference on the request
389 * for the caller, and inspection of the request is only valid under
390 * the RCU lock.
391 */
392static inline struct drm_i915_gem_request *
393i915_gem_active_peek_rcu(const struct i915_gem_active *active)
394{
395 struct drm_i915_gem_request *request;
396
397 request = rcu_dereference(active->request);
398 if (!request || i915_gem_request_completed(request))
399 return NULL;
400
401 return request;
402}
403
404/**
405 * i915_gem_active_get - return a reference to the active request
406 * @active - the active tracker
407 *
408 * i915_gem_active_get() returns a reference to the active request, or NULL
409 * if the active tracker is idle. The caller must hold struct_mutex.
410 */
411static inline struct drm_i915_gem_request *
412i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
413{
414 return i915_gem_request_get(i915_gem_active_peek(active, mutex));
415}
416
417/**
418 * __i915_gem_active_get_rcu - return a reference to the active request
419 * @active - the active tracker
420 *
421 * __i915_gem_active_get() returns a reference to the active request, or NULL
422 * if the active tracker is idle. The caller must hold the RCU read lock, but
423 * the returned pointer is safe to use outside of RCU.
424 */
425static inline struct drm_i915_gem_request *
426__i915_gem_active_get_rcu(const struct i915_gem_active *active)
427{
428 /* Performing a lockless retrieval of the active request is super
429 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
430 * slab of request objects will not be freed whilst we hold the
431 * RCU read lock. It does not guarantee that the request itself
432 * will not be freed and then *reused*. Viz,
433 *
434 * Thread A Thread B
435 *
436 * req = active.request
437 * retire(req) -> free(req);
438 * (req is now first on the slab freelist)
439 * active.request = NULL
440 *
441 * req = new submission on a new object
442 * ref(req)
443 *
444 * To prevent the request from being reused whilst the caller
445 * uses it, we take a reference like normal. Whilst acquiring
446 * the reference we check that it is not in a destroyed state
447 * (refcnt == 0). That prevents the request being reallocated
448 * whilst the caller holds on to it. To check that the request
449 * was not reallocated as we acquired the reference we have to
450 * check that our request remains the active request across
451 * the lookup, in the same manner as a seqlock. The visibility
452 * of the pointer versus the reference counting is controlled
453 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
454 *
455 * In the middle of all that, we inspect whether the request is
456 * complete. Retiring is lazy so the request may be completed long
457 * before the active tracker is updated. Querying whether the
458 * request is complete is far cheaper (as it involves no locked
459 * instructions setting cachelines to exclusive) than acquiring
460 * the reference, so we do it first. The RCU read lock ensures the
461 * pointer dereference is valid, but does not ensure that the
462 * seqno nor HWS is the right one! However, if the request was
463 * reallocated, that means the active tracker's request was complete.
464 * If the new request is also complete, then both are and we can
465 * just report the active tracker is idle. If the new request is
466 * incomplete, then we acquire a reference on it and check that
467 * it remained the active request.
468 */
469 do {
470 struct drm_i915_gem_request *request;
471
472 request = rcu_dereference(active->request);
473 if (!request || i915_gem_request_completed(request))
474 return NULL;
475
476 request = i915_gem_request_get_rcu(request);
477
478 /* What stops the following rcu_access_pointer() from occurring
479 * before the above i915_gem_request_get_rcu()? If we were
480 * to read the value before pausing to get the reference to
481 * the request, we may not notice a change in the active
482 * tracker.
483 *
484 * The rcu_access_pointer() is a mere compiler barrier, which
485 * means both the CPU and compiler are free to perform the
486 * memory read without constraint. The compiler only has to
487 * ensure that any operations after the rcu_access_pointer()
488 * occur afterwards in program order. This means the read may
489 * be performed earlier by an out-of-order CPU, or adventurous
490 * compiler.
491 *
492 * The atomic operation at the heart of
493 * i915_gem_request_get_rcu(), see fence_get_rcu(), is
494 * atomic_inc_not_zero() which is only a full memory barrier
495 * when successful. That is, if i915_gem_request_get_rcu()
496 * returns the request (and so with the reference counted
497 * incremented) then the following read for rcu_access_pointer()
498 * must occur after the atomic operation and so confirm
499 * that this request is the one currently being tracked.
500 */
501 if (!request || request == rcu_access_pointer(active->request))
502 return rcu_pointer_handoff(request);
503
504 i915_gem_request_put(request);
505 } while (1);
506}
507
508/**
509 * i915_gem_active_get_unlocked - return a reference to the active request
510 * @active - the active tracker
511 *
512 * i915_gem_active_get_unlocked() returns a reference to the active request,
513 * or NULL if the active tracker is idle. The reference is obtained under RCU,
514 * so no locking is required by the caller.
515 *
516 * The reference should be freed with i915_gem_request_put().
517 */
518static inline struct drm_i915_gem_request *
519i915_gem_active_get_unlocked(const struct i915_gem_active *active)
520{
521 struct drm_i915_gem_request *request;
522
523 rcu_read_lock();
524 request = __i915_gem_active_get_rcu(active);
525 rcu_read_unlock();
526
527 return request;
528}
529
530/**
531 * i915_gem_active_isset - report whether the active tracker is assigned
532 * @active - the active tracker
533 *
534 * i915_gem_active_isset() returns true if the active tracker is currently
535 * assigned to a request. Due to the lazy retiring, that request may be idle
536 * and this may report stale information.
537 */
538static inline bool
539i915_gem_active_isset(const struct i915_gem_active *active)
540{
541 return rcu_access_pointer(active->request);
542}
543
544/**
545 * i915_gem_active_is_idle - report whether the active tracker is idle
546 * @active - the active tracker
547 *
548 * i915_gem_active_is_idle() returns true if the active tracker is currently
549 * unassigned or if the request is complete (but not yet retired). Requires
550 * the caller to hold struct_mutex (but that can be relaxed if desired).
551 */
552static inline bool
553i915_gem_active_is_idle(const struct i915_gem_active *active,
554 struct mutex *mutex)
555{
556 return !i915_gem_active_peek(active, mutex);
557}
558
559/**
560 * i915_gem_active_wait - waits until the request is completed
561 * @active - the active request on which to wait
562 *
563 * i915_gem_active_wait() waits until the request is completed before
564 * returning. Note that it does not guarantee that the request is
565 * retired first, see i915_gem_active_retire().
566 *
567 * i915_gem_active_wait() returns immediately if the active
568 * request is already complete.
569 */
570static inline int __must_check
571i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
572{
573 struct drm_i915_gem_request *request;
574
575 request = i915_gem_active_peek(active, mutex);
576 if (!request)
577 return 0;
578
579 return i915_wait_request(request, true, NULL, NULL);
580}
581
582/**
583 * i915_gem_active_wait_unlocked - waits until the request is completed
584 * @active - the active request on which to wait
585 * @interruptible - whether the wait can be woken by a userspace signal
586 * @timeout - how long to wait at most
587 * @rps - userspace client to charge for a waitboost
588 *
589 * i915_gem_active_wait_unlocked() waits until the request is completed before
590 * returning, without requiring any locks to be held. Note that it does not
591 * retire any requests before returning.
592 *
593 * This function relies on RCU in order to acquire the reference to the active
594 * request without holding any locks. See __i915_gem_active_get_rcu() for the
595 * glory details on how that is managed. Once the reference is acquired, we
596 * can then wait upon the request, and afterwards release our reference,
597 * free of any locking.
598 *
599 * This function wraps i915_wait_request(), see it for the full details on
600 * the arguments.
601 *
602 * Returns 0 if successful, or a negative error code.
603 */
604static inline int
605i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
606 bool interruptible,
607 s64 *timeout,
608 struct intel_rps_client *rps)
609{
610 struct drm_i915_gem_request *request;
611 int ret = 0;
612
613 request = i915_gem_active_get_unlocked(active);
614 if (request) {
615 ret = i915_wait_request(request, interruptible, timeout, rps);
616 i915_gem_request_put(request);
617 }
618
619 return ret;
620}
621
622/**
623 * i915_gem_active_retire - waits until the request is retired
624 * @active - the active request on which to wait
625 *
626 * i915_gem_active_retire() waits until the request is completed,
627 * and then ensures that at least the retirement handler for this
628 * @active tracker is called before returning. If the @active
629 * tracker is idle, the function returns immediately.
630 */
631static inline int __must_check
632i915_gem_active_retire(struct i915_gem_active *active,
633 struct mutex *mutex)
634{
635 struct drm_i915_gem_request *request;
636 int ret;
637
638 request = rcu_dereference_protected(active->request,
639 lockdep_is_held(mutex));
640 if (!request)
641 return 0;
642
643 ret = i915_wait_request(request, true, NULL, NULL);
644 if (ret)
645 return ret;
646
647 list_del_init(&active->link);
648 RCU_INIT_POINTER(active->request, NULL);
649
650 active->retire(active, request);
651
652 return 0;
653}
654
655/* Convenience functions for peeking at state inside active's request whilst
656 * guarded by the struct_mutex.
657 */
658
659static inline uint32_t
660i915_gem_active_get_seqno(const struct i915_gem_active *active,
661 struct mutex *mutex)
662{
663 return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex));
664}
665
666static inline struct intel_engine_cs *
667i915_gem_active_get_engine(const struct i915_gem_active *active,
668 struct mutex *mutex)
669{
670 return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex));
671}
672
673#define for_each_active(mask, idx) \
674 for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
675
676#endif /* I915_GEM_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 6f10b421487b..b80802b35353 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -48,19 +48,15 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
48#endif 48#endif
49} 49}
50 50
51static int num_vma_bound(struct drm_i915_gem_object *obj) 51static bool any_vma_pinned(struct drm_i915_gem_object *obj)
52{ 52{
53 struct i915_vma *vma; 53 struct i915_vma *vma;
54 int count = 0;
55 54
56 list_for_each_entry(vma, &obj->vma_list, obj_link) { 55 list_for_each_entry(vma, &obj->vma_list, obj_link)
57 if (drm_mm_node_allocated(&vma->node)) 56 if (i915_vma_is_pinned(vma))
58 count++; 57 return true;
59 if (vma->pin_count)
60 count++;
61 }
62 58
63 return count; 59 return false;
64} 60}
65 61
66static bool swap_available(void) 62static bool swap_available(void)
@@ -82,7 +78,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
82 * to the GPU, simply unbinding from the GPU is not going to succeed 78 * to the GPU, simply unbinding from the GPU is not going to succeed
83 * in releasing our pin count on the pages themselves. 79 * in releasing our pin count on the pages themselves.
84 */ 80 */
85 if (obj->pages_pin_count != num_vma_bound(obj)) 81 if (obj->pages_pin_count > obj->bind_count)
82 return false;
83
84 if (any_vma_pinned(obj))
86 return false; 85 return false;
87 86
88 /* We can only return physical pages to the system if we can either 87 /* We can only return physical pages to the system if we can either
@@ -163,17 +162,16 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
163 */ 162 */
164 for (phase = phases; phase->list; phase++) { 163 for (phase = phases; phase->list; phase++) {
165 struct list_head still_in_list; 164 struct list_head still_in_list;
165 struct drm_i915_gem_object *obj;
166 166
167 if ((flags & phase->bit) == 0) 167 if ((flags & phase->bit) == 0)
168 continue; 168 continue;
169 169
170 INIT_LIST_HEAD(&still_in_list); 170 INIT_LIST_HEAD(&still_in_list);
171 while (count < target && !list_empty(phase->list)) { 171 while (count < target &&
172 struct drm_i915_gem_object *obj; 172 (obj = list_first_entry_or_null(phase->list,
173 struct i915_vma *vma, *v; 173 typeof(*obj),
174 174 global_list))) {
175 obj = list_first_entry(phase->list,
176 typeof(*obj), global_list);
177 list_move_tail(&obj->global_list, &still_in_list); 175 list_move_tail(&obj->global_list, &still_in_list);
178 176
179 if (flags & I915_SHRINK_PURGEABLE && 177 if (flags & I915_SHRINK_PURGEABLE &&
@@ -184,24 +182,21 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
184 !is_vmalloc_addr(obj->mapping)) 182 !is_vmalloc_addr(obj->mapping))
185 continue; 183 continue;
186 184
187 if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active) 185 if ((flags & I915_SHRINK_ACTIVE) == 0 &&
186 i915_gem_object_is_active(obj))
188 continue; 187 continue;
189 188
190 if (!can_release_pages(obj)) 189 if (!can_release_pages(obj))
191 continue; 190 continue;
192 191
193 drm_gem_object_reference(&obj->base); 192 i915_gem_object_get(obj);
194 193
195 /* For the unbound phase, this should be a no-op! */ 194 /* For the unbound phase, this should be a no-op! */
196 list_for_each_entry_safe(vma, v, 195 i915_gem_object_unbind(obj);
197 &obj->vma_list, obj_link)
198 if (i915_vma_unbind(vma))
199 break;
200
201 if (i915_gem_object_put_pages(obj) == 0) 196 if (i915_gem_object_put_pages(obj) == 0)
202 count += obj->base.size >> PAGE_SHIFT; 197 count += obj->base.size >> PAGE_SHIFT;
203 198
204 drm_gem_object_unreference(&obj->base); 199 i915_gem_object_put(obj);
205 } 200 }
206 list_splice(&still_in_list, phase->list); 201 list_splice(&still_in_list, phase->list);
207 } 202 }
@@ -210,6 +205,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
210 intel_runtime_pm_put(dev_priv); 205 intel_runtime_pm_put(dev_priv);
211 206
212 i915_gem_retire_requests(dev_priv); 207 i915_gem_retire_requests(dev_priv);
208 /* expedite the RCU grace period to free some request slabs */
209 synchronize_rcu_expedited();
213 210
214 return count; 211 return count;
215} 212}
@@ -230,10 +227,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
230 */ 227 */
231unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) 228unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
232{ 229{
233 return i915_gem_shrink(dev_priv, -1UL, 230 unsigned long freed;
234 I915_SHRINK_BOUND | 231
235 I915_SHRINK_UNBOUND | 232 freed = i915_gem_shrink(dev_priv, -1UL,
236 I915_SHRINK_ACTIVE); 233 I915_SHRINK_BOUND |
234 I915_SHRINK_UNBOUND |
235 I915_SHRINK_ACTIVE);
236 rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
237
238 return freed;
237} 239}
238 240
239static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 241static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
@@ -242,9 +244,6 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
242 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 244 if (!mutex_is_locked_by(&dev->struct_mutex, current))
243 return false; 245 return false;
244 246
245 if (to_i915(dev)->mm.shrinker_no_lock_stealing)
246 return false;
247
248 *unlock = false; 247 *unlock = false;
249 } else 248 } else
250 *unlock = true; 249 *unlock = true;
@@ -273,7 +272,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
273 count += obj->base.size >> PAGE_SHIFT; 272 count += obj->base.size >> PAGE_SHIFT;
274 273
275 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 274 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
276 if (!obj->active && can_release_pages(obj)) 275 if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
277 count += obj->base.size >> PAGE_SHIFT; 276 count += obj->base.size >> PAGE_SHIFT;
278 } 277 }
279 278
@@ -321,17 +320,22 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
321 struct shrinker_lock_uninterruptible *slu, 320 struct shrinker_lock_uninterruptible *slu,
322 int timeout_ms) 321 int timeout_ms)
323{ 322{
324 unsigned long timeout = msecs_to_jiffies(timeout_ms) + 1; 323 unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
324
325 do {
326 if (i915_gem_wait_for_idle(dev_priv, false) == 0 &&
327 i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
328 break;
325 329
326 while (!i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) {
327 schedule_timeout_killable(1); 330 schedule_timeout_killable(1);
328 if (fatal_signal_pending(current)) 331 if (fatal_signal_pending(current))
329 return false; 332 return false;
330 if (--timeout == 0) { 333
334 if (time_after(jiffies, timeout)) {
331 pr_err("Unable to lock GPU to purge memory.\n"); 335 pr_err("Unable to lock GPU to purge memory.\n");
332 return false; 336 return false;
333 } 337 }
334 } 338 } while (1);
335 339
336 slu->was_interruptible = dev_priv->mm.interruptible; 340 slu->was_interruptible = dev_priv->mm.interruptible;
337 dev_priv->mm.interruptible = false; 341 dev_priv->mm.interruptible = false;
@@ -410,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
410 return NOTIFY_DONE; 414 return NOTIFY_DONE;
411 415
412 /* Force everything onto the inactive lists */ 416 /* Force everything onto the inactive lists */
413 ret = i915_gem_wait_for_idle(dev_priv); 417 ret = i915_gem_wait_for_idle(dev_priv, false);
414 if (ret) 418 if (ret)
415 goto out; 419 goto out;
416 420
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 66be299a1486..13279610eeec 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -698,24 +698,24 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
698 */ 698 */
699 vma->node.start = gtt_offset; 699 vma->node.start = gtt_offset;
700 vma->node.size = size; 700 vma->node.size = size;
701 if (drm_mm_initialized(&ggtt->base.mm)) {
702 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
703 if (ret) {
704 DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
705 goto err;
706 }
707 701
708 vma->bound |= GLOBAL_BIND; 702 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
709 __i915_vma_set_map_and_fenceable(vma); 703 if (ret) {
710 list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); 704 DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
705 goto err;
711 } 706 }
712 707
708 vma->flags |= I915_VMA_GLOBAL_BIND;
709 __i915_vma_set_map_and_fenceable(vma);
710 list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
711 obj->bind_count++;
712
713 list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); 713 list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
714 i915_gem_object_pin_pages(obj); 714 i915_gem_object_pin_pages(obj);
715 715
716 return obj; 716 return obj;
717 717
718err: 718err:
719 drm_gem_object_unreference(&obj->base); 719 i915_gem_object_put(obj);
720 return NULL; 720 return NULL;
721} 721}
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 8030199731db..f4b984de83b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -68,6 +68,9 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
68 if (tiling_mode == I915_TILING_NONE) 68 if (tiling_mode == I915_TILING_NONE)
69 return true; 69 return true;
70 70
71 if (tiling_mode > I915_TILING_LAST)
72 return false;
73
71 if (IS_GEN2(dev) || 74 if (IS_GEN2(dev) ||
72 (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) 75 (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
73 tile_width = 128; 76 tile_width = 128;
@@ -117,15 +120,16 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
117static bool 120static bool
118i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) 121i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
119{ 122{
123 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
120 u32 size; 124 u32 size;
121 125
122 if (tiling_mode == I915_TILING_NONE) 126 if (tiling_mode == I915_TILING_NONE)
123 return true; 127 return true;
124 128
125 if (INTEL_INFO(obj->base.dev)->gen >= 4) 129 if (INTEL_GEN(dev_priv) >= 4)
126 return true; 130 return true;
127 131
128 if (IS_GEN3(obj->base.dev)) { 132 if (IS_GEN3(dev_priv)) {
129 if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) 133 if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK)
130 return false; 134 return false;
131 } else { 135 } else {
@@ -133,7 +137,7 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
133 return false; 137 return false;
134 } 138 }
135 139
136 size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode); 140 size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode);
137 if (i915_gem_obj_ggtt_size(obj) != size) 141 if (i915_gem_obj_ggtt_size(obj) != size)
138 return false; 142 return false;
139 143
@@ -166,13 +170,16 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
166 struct drm_i915_gem_object *obj; 170 struct drm_i915_gem_object *obj;
167 int ret = 0; 171 int ret = 0;
168 172
169 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 173 /* Make sure we don't cross-contaminate obj->tiling_and_stride */
170 if (&obj->base == NULL) 174 BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
175
176 obj = i915_gem_object_lookup(file, args->handle);
177 if (!obj)
171 return -ENOENT; 178 return -ENOENT;
172 179
173 if (!i915_tiling_ok(dev, 180 if (!i915_tiling_ok(dev,
174 args->stride, obj->base.size, args->tiling_mode)) { 181 args->stride, obj->base.size, args->tiling_mode)) {
175 drm_gem_object_unreference_unlocked(&obj->base); 182 i915_gem_object_put_unlocked(obj);
176 return -EINVAL; 183 return -EINVAL;
177 } 184 }
178 185
@@ -213,8 +220,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
213 } 220 }
214 } 221 }
215 222
216 if (args->tiling_mode != obj->tiling_mode || 223 if (args->tiling_mode != i915_gem_object_get_tiling(obj) ||
217 args->stride != obj->stride) { 224 args->stride != i915_gem_object_get_stride(obj)) {
218 /* We need to rebind the object if its current allocation 225 /* We need to rebind the object if its current allocation
219 * no longer meets the alignment restrictions for its new 226 * no longer meets the alignment restrictions for its new
220 * tiling mode. Otherwise we can just leave it alone, but 227 * tiling mode. Otherwise we can just leave it alone, but
@@ -237,24 +244,25 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
237 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 244 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
238 if (args->tiling_mode == I915_TILING_NONE) 245 if (args->tiling_mode == I915_TILING_NONE)
239 i915_gem_object_unpin_pages(obj); 246 i915_gem_object_unpin_pages(obj);
240 if (obj->tiling_mode == I915_TILING_NONE) 247 if (!i915_gem_object_is_tiled(obj))
241 i915_gem_object_pin_pages(obj); 248 i915_gem_object_pin_pages(obj);
242 } 249 }
243 250
244 obj->fence_dirty = 251 obj->fence_dirty =
245 obj->last_fenced_req || 252 !i915_gem_active_is_idle(&obj->last_fence,
253 &dev->struct_mutex) ||
246 obj->fence_reg != I915_FENCE_REG_NONE; 254 obj->fence_reg != I915_FENCE_REG_NONE;
247 255
248 obj->tiling_mode = args->tiling_mode; 256 obj->tiling_and_stride =
249 obj->stride = args->stride; 257 args->stride | args->tiling_mode;
250 258
251 /* Force the fence to be reacquired for GTT access */ 259 /* Force the fence to be reacquired for GTT access */
252 i915_gem_release_mmap(obj); 260 i915_gem_release_mmap(obj);
253 } 261 }
254 } 262 }
255 /* we have to maintain this existing ABI... */ 263 /* we have to maintain this existing ABI... */
256 args->stride = obj->stride; 264 args->stride = i915_gem_object_get_stride(obj);
257 args->tiling_mode = obj->tiling_mode; 265 args->tiling_mode = i915_gem_object_get_tiling(obj);
258 266
259 /* Try to preallocate memory required to save swizzling on put-pages */ 267 /* Try to preallocate memory required to save swizzling on put-pages */
260 if (i915_gem_object_needs_bit17_swizzle(obj)) { 268 if (i915_gem_object_needs_bit17_swizzle(obj)) {
@@ -268,7 +276,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
268 } 276 }
269 277
270err: 278err:
271 drm_gem_object_unreference(&obj->base); 279 i915_gem_object_put(obj);
272 mutex_unlock(&dev->struct_mutex); 280 mutex_unlock(&dev->struct_mutex);
273 281
274 intel_runtime_pm_put(dev_priv); 282 intel_runtime_pm_put(dev_priv);
@@ -297,14 +305,12 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
297 struct drm_i915_private *dev_priv = to_i915(dev); 305 struct drm_i915_private *dev_priv = to_i915(dev);
298 struct drm_i915_gem_object *obj; 306 struct drm_i915_gem_object *obj;
299 307
300 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 308 obj = i915_gem_object_lookup(file, args->handle);
301 if (&obj->base == NULL) 309 if (!obj)
302 return -ENOENT; 310 return -ENOENT;
303 311
304 mutex_lock(&dev->struct_mutex); 312 args->tiling_mode = READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
305 313 switch (args->tiling_mode) {
306 args->tiling_mode = obj->tiling_mode;
307 switch (obj->tiling_mode) {
308 case I915_TILING_X: 314 case I915_TILING_X:
309 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; 315 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
310 break; 316 break;
@@ -328,8 +334,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
328 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) 334 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
329 args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; 335 args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
330 336
331 drm_gem_object_unreference(&obj->base); 337 i915_gem_object_put_unlocked(obj);
332 mutex_unlock(&dev->struct_mutex);
333
334 return 0; 338 return 0;
335} 339}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2314c88323e3..57218cca7e05 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -63,33 +63,12 @@ struct i915_mmu_object {
63 63
64static void wait_rendering(struct drm_i915_gem_object *obj) 64static void wait_rendering(struct drm_i915_gem_object *obj)
65{ 65{
66 struct drm_device *dev = obj->base.dev; 66 unsigned long active = __I915_BO_ACTIVE(obj);
67 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 67 int idx;
68 int i, n;
69
70 if (!obj->active)
71 return;
72
73 n = 0;
74 for (i = 0; i < I915_NUM_ENGINES; i++) {
75 struct drm_i915_gem_request *req;
76
77 req = obj->last_read_req[i];
78 if (req == NULL)
79 continue;
80
81 requests[n++] = i915_gem_request_reference(req);
82 }
83
84 mutex_unlock(&dev->struct_mutex);
85 68
86 for (i = 0; i < n; i++) 69 for_each_active(active, idx)
87 __i915_wait_request(requests[i], false, NULL, NULL); 70 i915_gem_active_wait_unlocked(&obj->last_read[idx],
88 71 false, NULL, NULL);
89 mutex_lock(&dev->struct_mutex);
90
91 for (i = 0; i < n; i++)
92 i915_gem_request_unreference(requests[i]);
93} 72}
94 73
95static void cancel_userptr(struct work_struct *work) 74static void cancel_userptr(struct work_struct *work)
@@ -98,28 +77,19 @@ static void cancel_userptr(struct work_struct *work)
98 struct drm_i915_gem_object *obj = mo->obj; 77 struct drm_i915_gem_object *obj = mo->obj;
99 struct drm_device *dev = obj->base.dev; 78 struct drm_device *dev = obj->base.dev;
100 79
80 wait_rendering(obj);
81
101 mutex_lock(&dev->struct_mutex); 82 mutex_lock(&dev->struct_mutex);
102 /* Cancel any active worker and force us to re-evaluate gup */ 83 /* Cancel any active worker and force us to re-evaluate gup */
103 obj->userptr.work = NULL; 84 obj->userptr.work = NULL;
104 85
105 if (obj->pages != NULL) { 86 if (obj->pages != NULL) {
106 struct drm_i915_private *dev_priv = to_i915(dev); 87 /* We are inside a kthread context and can't be interrupted */
107 struct i915_vma *vma, *tmp; 88 WARN_ON(i915_gem_object_unbind(obj));
108 bool was_interruptible;
109
110 wait_rendering(obj);
111
112 was_interruptible = dev_priv->mm.interruptible;
113 dev_priv->mm.interruptible = false;
114
115 list_for_each_entry_safe(vma, tmp, &obj->vma_list, obj_link)
116 WARN_ON(i915_vma_unbind(vma));
117 WARN_ON(i915_gem_object_put_pages(obj)); 89 WARN_ON(i915_gem_object_put_pages(obj));
118
119 dev_priv->mm.interruptible = was_interruptible;
120 } 90 }
121 91
122 drm_gem_object_unreference(&obj->base); 92 i915_gem_object_put(obj);
123 mutex_unlock(&dev->struct_mutex); 93 mutex_unlock(&dev->struct_mutex);
124} 94}
125 95
@@ -577,7 +547,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
577 } 547 }
578 548
579 obj->userptr.workers--; 549 obj->userptr.workers--;
580 drm_gem_object_unreference(&obj->base); 550 i915_gem_object_put(obj);
581 mutex_unlock(&dev->struct_mutex); 551 mutex_unlock(&dev->struct_mutex);
582 552
583 release_pages(pvec, pinned, 0); 553 release_pages(pvec, pinned, 0);
@@ -622,8 +592,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
622 obj->userptr.work = &work->work; 592 obj->userptr.work = &work->work;
623 obj->userptr.workers++; 593 obj->userptr.workers++;
624 594
625 work->obj = obj; 595 work->obj = i915_gem_object_get(obj);
626 drm_gem_object_reference(&obj->base);
627 596
628 work->task = current; 597 work->task = current;
629 get_task_struct(work->task); 598 get_task_struct(work->task);
@@ -846,7 +815,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
846 ret = drm_gem_handle_create(file, &obj->base, &handle); 815 ret = drm_gem_handle_create(file, &obj->base, &handle);
847 816
848 /* drop reference from allocate - handle holds it now */ 817 /* drop reference from allocate - handle holds it now */
849 drm_gem_object_unreference_unlocked(&obj->base); 818 i915_gem_object_put_unlocked(obj);
850 if (ret) 819 if (ret)
851 return ret; 820 return ret;
852 821
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9d73d2216adc..eecb87063c88 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,9 +30,9 @@
30#include <generated/utsrelease.h> 30#include <generated/utsrelease.h>
31#include "i915_drv.h" 31#include "i915_drv.h"
32 32
33static const char *ring_str(int ring) 33static const char *engine_str(int engine)
34{ 34{
35 switch (ring) { 35 switch (engine) {
36 case RCS: return "render"; 36 case RCS: return "render";
37 case VCS: return "bsd"; 37 case VCS: return "bsd";
38 case BCS: return "blt"; 38 case BCS: return "blt";
@@ -207,8 +207,8 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
207 err_puts(m, dirty_flag(err->dirty)); 207 err_puts(m, dirty_flag(err->dirty));
208 err_puts(m, purgeable_flag(err->purgeable)); 208 err_puts(m, purgeable_flag(err->purgeable));
209 err_puts(m, err->userptr ? " userptr" : ""); 209 err_puts(m, err->userptr ? " userptr" : "");
210 err_puts(m, err->ring != -1 ? " " : ""); 210 err_puts(m, err->engine != -1 ? " " : "");
211 err_puts(m, ring_str(err->ring)); 211 err_puts(m, engine_str(err->engine));
212 err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); 212 err_puts(m, i915_cache_level_str(m->i915, err->cache_level));
213 213
214 if (err->name) 214 if (err->name)
@@ -221,7 +221,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
221 } 221 }
222} 222}
223 223
224static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) 224static const char *hangcheck_action_to_str(enum intel_engine_hangcheck_action a)
225{ 225{
226 switch (a) { 226 switch (a) {
227 case HANGCHECK_IDLE: 227 case HANGCHECK_IDLE:
@@ -239,70 +239,65 @@ static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
239 return "unknown"; 239 return "unknown";
240} 240}
241 241
242static void i915_ring_error_state(struct drm_i915_error_state_buf *m, 242static void error_print_engine(struct drm_i915_error_state_buf *m,
243 struct drm_device *dev, 243 struct drm_i915_error_engine *ee)
244 struct drm_i915_error_state *error,
245 int ring_idx)
246{ 244{
247 struct drm_i915_error_ring *ring = &error->ring[ring_idx]; 245 err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
248 246 err_printf(m, " START: 0x%08x\n", ee->start);
249 if (!ring->valid) 247 err_printf(m, " HEAD: 0x%08x\n", ee->head);
250 return; 248 err_printf(m, " TAIL: 0x%08x\n", ee->tail);
251 249 err_printf(m, " CTL: 0x%08x\n", ee->ctl);
252 err_printf(m, "%s command stream:\n", ring_str(ring_idx)); 250 err_printf(m, " HWS: 0x%08x\n", ee->hws);
253 err_printf(m, " START: 0x%08x\n", ring->start); 251 err_printf(m, " ACTHD: 0x%08x %08x\n",
254 err_printf(m, " HEAD: 0x%08x\n", ring->head); 252 (u32)(ee->acthd>>32), (u32)ee->acthd);
255 err_printf(m, " TAIL: 0x%08x\n", ring->tail); 253 err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir);
256 err_printf(m, " CTL: 0x%08x\n", ring->ctl); 254 err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr);
257 err_printf(m, " HWS: 0x%08x\n", ring->hws); 255 err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone);
258 err_printf(m, " ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd); 256 if (INTEL_GEN(m->i915) >= 4) {
259 err_printf(m, " IPEIR: 0x%08x\n", ring->ipeir); 257 err_printf(m, " BBADDR: 0x%08x %08x\n",
260 err_printf(m, " IPEHR: 0x%08x\n", ring->ipehr); 258 (u32)(ee->bbaddr>>32), (u32)ee->bbaddr);
261 err_printf(m, " INSTDONE: 0x%08x\n", ring->instdone); 259 err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate);
262 if (INTEL_INFO(dev)->gen >= 4) { 260 err_printf(m, " INSTPS: 0x%08x\n", ee->instps);
263 err_printf(m, " BBADDR: 0x%08x %08x\n", (u32)(ring->bbaddr>>32), (u32)ring->bbaddr);
264 err_printf(m, " BB_STATE: 0x%08x\n", ring->bbstate);
265 err_printf(m, " INSTPS: 0x%08x\n", ring->instps);
266 } 261 }
267 err_printf(m, " INSTPM: 0x%08x\n", ring->instpm); 262 err_printf(m, " INSTPM: 0x%08x\n", ee->instpm);
268 err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ring->faddr), 263 err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ee->faddr),
269 lower_32_bits(ring->faddr)); 264 lower_32_bits(ee->faddr));
270 if (INTEL_INFO(dev)->gen >= 6) { 265 if (INTEL_GEN(m->i915) >= 6) {
271 err_printf(m, " RC PSMI: 0x%08x\n", ring->rc_psmi); 266 err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi);
272 err_printf(m, " FAULT_REG: 0x%08x\n", ring->fault_reg); 267 err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg);
273 err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n", 268 err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n",
274 ring->semaphore_mboxes[0], 269 ee->semaphore_mboxes[0],
275 ring->semaphore_seqno[0]); 270 ee->semaphore_seqno[0]);
276 err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", 271 err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n",
277 ring->semaphore_mboxes[1], 272 ee->semaphore_mboxes[1],
278 ring->semaphore_seqno[1]); 273 ee->semaphore_seqno[1]);
279 if (HAS_VEBOX(dev)) { 274 if (HAS_VEBOX(m->i915)) {
280 err_printf(m, " SYNC_2: 0x%08x [last synced 0x%08x]\n", 275 err_printf(m, " SYNC_2: 0x%08x [last synced 0x%08x]\n",
281 ring->semaphore_mboxes[2], 276 ee->semaphore_mboxes[2],
282 ring->semaphore_seqno[2]); 277 ee->semaphore_seqno[2]);
283 } 278 }
284 } 279 }
285 if (USES_PPGTT(dev)) { 280 if (USES_PPGTT(m->i915)) {
286 err_printf(m, " GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode); 281 err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode);
287 282
288 if (INTEL_INFO(dev)->gen >= 8) { 283 if (INTEL_GEN(m->i915) >= 8) {
289 int i; 284 int i;
290 for (i = 0; i < 4; i++) 285 for (i = 0; i < 4; i++)
291 err_printf(m, " PDP%d: 0x%016llx\n", 286 err_printf(m, " PDP%d: 0x%016llx\n",
292 i, ring->vm_info.pdp[i]); 287 i, ee->vm_info.pdp[i]);
293 } else { 288 } else {
294 err_printf(m, " PP_DIR_BASE: 0x%08x\n", 289 err_printf(m, " PP_DIR_BASE: 0x%08x\n",
295 ring->vm_info.pp_dir_base); 290 ee->vm_info.pp_dir_base);
296 } 291 }
297 } 292 }
298 err_printf(m, " seqno: 0x%08x\n", ring->seqno); 293 err_printf(m, " seqno: 0x%08x\n", ee->seqno);
299 err_printf(m, " last_seqno: 0x%08x\n", ring->last_seqno); 294 err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno);
300 err_printf(m, " waiting: %s\n", yesno(ring->waiting)); 295 err_printf(m, " waiting: %s\n", yesno(ee->waiting));
301 err_printf(m, " ring->head: 0x%08x\n", ring->cpu_ring_head); 296 err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head);
302 err_printf(m, " ring->tail: 0x%08x\n", ring->cpu_ring_tail); 297 err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail);
303 err_printf(m, " hangcheck: %s [%d]\n", 298 err_printf(m, " hangcheck: %s [%d]\n",
304 hangcheck_action_to_str(ring->hangcheck_action), 299 hangcheck_action_to_str(ee->hangcheck_action),
305 ring->hangcheck_score); 300 ee->hangcheck_score);
306} 301}
307 302
308void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) 303void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -348,17 +343,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
348 error->time.tv_usec); 343 error->time.tv_usec);
349 err_printf(m, "Kernel: " UTS_RELEASE "\n"); 344 err_printf(m, "Kernel: " UTS_RELEASE "\n");
350 max_hangcheck_score = 0; 345 max_hangcheck_score = 0;
351 for (i = 0; i < ARRAY_SIZE(error->ring); i++) { 346 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
352 if (error->ring[i].hangcheck_score > max_hangcheck_score) 347 if (error->engine[i].hangcheck_score > max_hangcheck_score)
353 max_hangcheck_score = error->ring[i].hangcheck_score; 348 max_hangcheck_score = error->engine[i].hangcheck_score;
354 } 349 }
355 for (i = 0; i < ARRAY_SIZE(error->ring); i++) { 350 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
356 if (error->ring[i].hangcheck_score == max_hangcheck_score && 351 if (error->engine[i].hangcheck_score == max_hangcheck_score &&
357 error->ring[i].pid != -1) { 352 error->engine[i].pid != -1) {
358 err_printf(m, "Active process (on ring %s): %s [%d]\n", 353 err_printf(m, "Active process (on ring %s): %s [%d]\n",
359 ring_str(i), 354 engine_str(i),
360 error->ring[i].comm, 355 error->engine[i].comm,
361 error->ring[i].pid); 356 error->engine[i].pid);
362 } 357 }
363 } 358 }
364 err_printf(m, "Reset count: %u\n", error->reset_count); 359 err_printf(m, "Reset count: %u\n", error->reset_count);
@@ -414,8 +409,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
414 if (IS_GEN7(dev)) 409 if (IS_GEN7(dev))
415 err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); 410 err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
416 411
417 for (i = 0; i < ARRAY_SIZE(error->ring); i++) 412 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
418 i915_ring_error_state(m, dev, error, i); 413 if (error->engine[i].engine_id != -1)
414 error_print_engine(m, &error->engine[i]);
415 }
419 416
420 for (i = 0; i < error->vm_count; i++) { 417 for (i = 0; i < error->vm_count; i++) {
421 err_printf(m, "vm[%d]\n", i); 418 err_printf(m, "vm[%d]\n", i);
@@ -429,21 +426,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
429 error->pinned_bo_count[i]); 426 error->pinned_bo_count[i]);
430 } 427 }
431 428
432 for (i = 0; i < ARRAY_SIZE(error->ring); i++) { 429 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
433 obj = error->ring[i].batchbuffer; 430 struct drm_i915_error_engine *ee = &error->engine[i];
431
432 obj = ee->batchbuffer;
434 if (obj) { 433 if (obj) {
435 err_puts(m, dev_priv->engine[i].name); 434 err_puts(m, dev_priv->engine[i].name);
436 if (error->ring[i].pid != -1) 435 if (ee->pid != -1)
437 err_printf(m, " (submitted by %s [%d])", 436 err_printf(m, " (submitted by %s [%d])",
438 error->ring[i].comm, 437 ee->comm,
439 error->ring[i].pid); 438 ee->pid);
440 err_printf(m, " --- gtt_offset = 0x%08x %08x\n", 439 err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
441 upper_32_bits(obj->gtt_offset), 440 upper_32_bits(obj->gtt_offset),
442 lower_32_bits(obj->gtt_offset)); 441 lower_32_bits(obj->gtt_offset));
443 print_error_obj(m, obj); 442 print_error_obj(m, obj);
444 } 443 }
445 444
446 obj = error->ring[i].wa_batchbuffer; 445 obj = ee->wa_batchbuffer;
447 if (obj) { 446 if (obj) {
448 err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n", 447 err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n",
449 dev_priv->engine[i].name, 448 dev_priv->engine[i].name,
@@ -451,38 +450,38 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
451 print_error_obj(m, obj); 450 print_error_obj(m, obj);
452 } 451 }
453 452
454 if (error->ring[i].num_requests) { 453 if (ee->num_requests) {
455 err_printf(m, "%s --- %d requests\n", 454 err_printf(m, "%s --- %d requests\n",
456 dev_priv->engine[i].name, 455 dev_priv->engine[i].name,
457 error->ring[i].num_requests); 456 ee->num_requests);
458 for (j = 0; j < error->ring[i].num_requests; j++) { 457 for (j = 0; j < ee->num_requests; j++) {
459 err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", 458 err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n",
460 error->ring[i].requests[j].seqno, 459 ee->requests[j].seqno,
461 error->ring[i].requests[j].jiffies, 460 ee->requests[j].jiffies,
462 error->ring[i].requests[j].tail); 461 ee->requests[j].tail);
463 } 462 }
464 } 463 }
465 464
466 if (error->ring[i].num_waiters) { 465 if (ee->num_waiters) {
467 err_printf(m, "%s --- %d waiters\n", 466 err_printf(m, "%s --- %d waiters\n",
468 dev_priv->engine[i].name, 467 dev_priv->engine[i].name,
469 error->ring[i].num_waiters); 468 ee->num_waiters);
470 for (j = 0; j < error->ring[i].num_waiters; j++) { 469 for (j = 0; j < ee->num_waiters; j++) {
471 err_printf(m, " seqno 0x%08x for %s [%d]\n", 470 err_printf(m, " seqno 0x%08x for %s [%d]\n",
472 error->ring[i].waiters[j].seqno, 471 ee->waiters[j].seqno,
473 error->ring[i].waiters[j].comm, 472 ee->waiters[j].comm,
474 error->ring[i].waiters[j].pid); 473 ee->waiters[j].pid);
475 } 474 }
476 } 475 }
477 476
478 if ((obj = error->ring[i].ringbuffer)) { 477 if ((obj = ee->ringbuffer)) {
479 err_printf(m, "%s --- ringbuffer = 0x%08x\n", 478 err_printf(m, "%s --- ringbuffer = 0x%08x\n",
480 dev_priv->engine[i].name, 479 dev_priv->engine[i].name,
481 lower_32_bits(obj->gtt_offset)); 480 lower_32_bits(obj->gtt_offset));
482 print_error_obj(m, obj); 481 print_error_obj(m, obj);
483 } 482 }
484 483
485 if ((obj = error->ring[i].hws_page)) { 484 if ((obj = ee->hws_page)) {
486 u64 hws_offset = obj->gtt_offset; 485 u64 hws_offset = obj->gtt_offset;
487 u32 *hws_page = &obj->pages[0][0]; 486 u32 *hws_page = &obj->pages[0][0];
488 487
@@ -504,7 +503,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
504 } 503 }
505 } 504 }
506 505
507 obj = error->ring[i].wa_ctx; 506 obj = ee->wa_ctx;
508 if (obj) { 507 if (obj) {
509 u64 wa_ctx_offset = obj->gtt_offset; 508 u64 wa_ctx_offset = obj->gtt_offset;
510 u32 *wa_ctx_page = &obj->pages[0][0]; 509 u32 *wa_ctx_page = &obj->pages[0][0];
@@ -526,7 +525,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
526 } 525 }
527 } 526 }
528 527
529 if ((obj = error->ring[i].ctx)) { 528 if ((obj = ee->ctx)) {
530 err_printf(m, "%s --- HW Context = 0x%08x\n", 529 err_printf(m, "%s --- HW Context = 0x%08x\n",
531 dev_priv->engine[i].name, 530 dev_priv->engine[i].name,
532 lower_32_bits(obj->gtt_offset)); 531 lower_32_bits(obj->gtt_offset));
@@ -611,15 +610,18 @@ static void i915_error_state_free(struct kref *error_ref)
611 typeof(*error), ref); 610 typeof(*error), ref);
612 int i; 611 int i;
613 612
614 for (i = 0; i < ARRAY_SIZE(error->ring); i++) { 613 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
615 i915_error_object_free(error->ring[i].batchbuffer); 614 struct drm_i915_error_engine *ee = &error->engine[i];
616 i915_error_object_free(error->ring[i].wa_batchbuffer); 615
617 i915_error_object_free(error->ring[i].ringbuffer); 616 i915_error_object_free(ee->batchbuffer);
618 i915_error_object_free(error->ring[i].hws_page); 617 i915_error_object_free(ee->wa_batchbuffer);
619 i915_error_object_free(error->ring[i].ctx); 618 i915_error_object_free(ee->ringbuffer);
620 i915_error_object_free(error->ring[i].wa_ctx); 619 i915_error_object_free(ee->hws_page);
621 kfree(error->ring[i].requests); 620 i915_error_object_free(ee->ctx);
622 kfree(error->ring[i].waiters); 621 i915_error_object_free(ee->wa_ctx);
622
623 kfree(ee->requests);
624 kfree(ee->waiters);
623 } 625 }
624 626
625 i915_error_object_free(error->semaphore_obj); 627 i915_error_object_free(error->semaphore_obj);
@@ -667,14 +669,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
667 if (i915_is_ggtt(vm)) 669 if (i915_is_ggtt(vm))
668 vma = i915_gem_obj_to_ggtt(src); 670 vma = i915_gem_obj_to_ggtt(src);
669 use_ggtt = (src->cache_level == I915_CACHE_NONE && 671 use_ggtt = (src->cache_level == I915_CACHE_NONE &&
670 vma && (vma->bound & GLOBAL_BIND) && 672 vma && (vma->flags & I915_VMA_GLOBAL_BIND) &&
671 reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); 673 reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
672 674
673 /* Cannot access stolen address directly, try to use the aperture */ 675 /* Cannot access stolen address directly, try to use the aperture */
674 if (src->stolen) { 676 if (src->stolen) {
675 use_ggtt = true; 677 use_ggtt = true;
676 678
677 if (!(vma && vma->bound & GLOBAL_BIND)) 679 if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND))
678 goto unwind; 680 goto unwind;
679 681
680 reloc_offset = i915_gem_obj_ggtt_offset(src); 682 reloc_offset = i915_gem_obj_ggtt_offset(src);
@@ -740,6 +742,24 @@ unwind:
740#define i915_error_ggtt_object_create(dev_priv, src) \ 742#define i915_error_ggtt_object_create(dev_priv, src) \
741 i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base) 743 i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base)
742 744
745/* The error capture is special as tries to run underneath the normal
746 * locking rules - so we use the raw version of the i915_gem_active lookup.
747 */
748static inline uint32_t
749__active_get_seqno(struct i915_gem_active *active)
750{
751 return i915_gem_request_get_seqno(__i915_gem_active_peek(active));
752}
753
754static inline int
755__active_get_engine_id(struct i915_gem_active *active)
756{
757 struct intel_engine_cs *engine;
758
759 engine = i915_gem_request_get_engine(__i915_gem_active_peek(active));
760 return engine ? engine->id : -1;
761}
762
743static void capture_bo(struct drm_i915_error_buffer *err, 763static void capture_bo(struct drm_i915_error_buffer *err,
744 struct i915_vma *vma) 764 struct i915_vma *vma)
745{ 765{
@@ -748,9 +768,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
748 768
749 err->size = obj->base.size; 769 err->size = obj->base.size;
750 err->name = obj->base.name; 770 err->name = obj->base.name;
771
751 for (i = 0; i < I915_NUM_ENGINES; i++) 772 for (i = 0; i < I915_NUM_ENGINES; i++)
752 err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]); 773 err->rseqno[i] = __active_get_seqno(&obj->last_read[i]);
753 err->wseqno = i915_gem_request_get_seqno(obj->last_write_req); 774 err->wseqno = __active_get_seqno(&obj->last_write);
775 err->engine = __active_get_engine_id(&obj->last_write);
776
754 err->gtt_offset = vma->node.start; 777 err->gtt_offset = vma->node.start;
755 err->read_domains = obj->base.read_domains; 778 err->read_domains = obj->base.read_domains;
756 err->write_domain = obj->base.write_domain; 779 err->write_domain = obj->base.write_domain;
@@ -758,12 +781,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
758 err->pinned = 0; 781 err->pinned = 0;
759 if (i915_gem_obj_is_pinned(obj)) 782 if (i915_gem_obj_is_pinned(obj))
760 err->pinned = 1; 783 err->pinned = 1;
761 err->tiling = obj->tiling_mode; 784 err->tiling = i915_gem_object_get_tiling(obj);
762 err->dirty = obj->dirty; 785 err->dirty = obj->dirty;
763 err->purgeable = obj->madv != I915_MADV_WILLNEED; 786 err->purgeable = obj->madv != I915_MADV_WILLNEED;
764 err->userptr = obj->userptr.mm != NULL; 787 err->userptr = obj->userptr.mm != NULL;
765 err->ring = obj->last_write_req ?
766 i915_gem_request_get_engine(obj->last_write_req)->id : -1;
767 err->cache_level = obj->cache_level; 788 err->cache_level = obj->cache_level;
768} 789}
769 790
@@ -797,7 +818,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
797 break; 818 break;
798 819
799 list_for_each_entry(vma, &obj->vma_list, obj_link) 820 list_for_each_entry(vma, &obj->vma_list, obj_link)
800 if (vma->vm == vm && vma->pin_count > 0) 821 if (vma->vm == vm && i915_vma_is_pinned(vma))
801 capture_bo(err++, vma); 822 capture_bo(err++, vma);
802 } 823 }
803 824
@@ -815,7 +836,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
815 */ 836 */
816static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, 837static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
817 struct drm_i915_error_state *error, 838 struct drm_i915_error_state *error,
818 int *ring_id) 839 int *engine_id)
819{ 840{
820 uint32_t error_code = 0; 841 uint32_t error_code = 0;
821 int i; 842 int i;
@@ -826,11 +847,11 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
826 * strictly a client bug. Use instdone to differentiate those some. 847 * strictly a client bug. Use instdone to differentiate those some.
827 */ 848 */
828 for (i = 0; i < I915_NUM_ENGINES; i++) { 849 for (i = 0; i < I915_NUM_ENGINES; i++) {
829 if (error->ring[i].hangcheck_action == HANGCHECK_HUNG) { 850 if (error->engine[i].hangcheck_action == HANGCHECK_HUNG) {
830 if (ring_id) 851 if (engine_id)
831 *ring_id = i; 852 *engine_id = i;
832 853
833 return error->ring[i].ipehr ^ error->ring[i].instdone; 854 return error->engine[i].ipehr ^ error->engine[i].instdone;
834 } 855 }
835 } 856 }
836 857
@@ -855,21 +876,16 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
855} 876}
856 877
857 878
858static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, 879static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
859 struct drm_i915_error_state *error,
860 struct intel_engine_cs *engine, 880 struct intel_engine_cs *engine,
861 struct drm_i915_error_ring *ering) 881 struct drm_i915_error_engine *ee)
862{ 882{
883 struct drm_i915_private *dev_priv = engine->i915;
863 struct intel_engine_cs *to; 884 struct intel_engine_cs *to;
864 enum intel_engine_id id; 885 enum intel_engine_id id;
865 886
866 if (!i915_semaphore_is_enabled(dev_priv))
867 return;
868
869 if (!error->semaphore_obj) 887 if (!error->semaphore_obj)
870 error->semaphore_obj = 888 return;
871 i915_error_ggtt_object_create(dev_priv,
872 dev_priv->semaphore_obj);
873 889
874 for_each_engine_id(to, dev_priv, id) { 890 for_each_engine_id(to, dev_priv, id) {
875 int idx; 891 int idx;
@@ -879,42 +895,43 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv,
879 if (engine == to) 895 if (engine == to)
880 continue; 896 continue;
881 897
882 signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) 898 signal_offset =
883 / 4; 899 (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
884 tmp = error->semaphore_obj->pages[0]; 900 tmp = error->semaphore_obj->pages[0];
885 idx = intel_ring_sync_index(engine, to); 901 idx = intel_engine_sync_index(engine, to);
886 902
887 ering->semaphore_mboxes[idx] = tmp[signal_offset]; 903 ee->semaphore_mboxes[idx] = tmp[signal_offset];
888 ering->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; 904 ee->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx];
889 } 905 }
890} 906}
891 907
892static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv, 908static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
893 struct intel_engine_cs *engine, 909 struct drm_i915_error_engine *ee)
894 struct drm_i915_error_ring *ering)
895{ 910{
896 ering->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); 911 struct drm_i915_private *dev_priv = engine->i915;
897 ering->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); 912
898 ering->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; 913 ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base));
899 ering->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; 914 ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base));
915 ee->semaphore_seqno[0] = engine->semaphore.sync_seqno[0];
916 ee->semaphore_seqno[1] = engine->semaphore.sync_seqno[1];
900 917
901 if (HAS_VEBOX(dev_priv)) { 918 if (HAS_VEBOX(dev_priv)) {
902 ering->semaphore_mboxes[2] = 919 ee->semaphore_mboxes[2] =
903 I915_READ(RING_SYNC_2(engine->mmio_base)); 920 I915_READ(RING_SYNC_2(engine->mmio_base));
904 ering->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; 921 ee->semaphore_seqno[2] = engine->semaphore.sync_seqno[2];
905 } 922 }
906} 923}
907 924
908static void engine_record_waiters(struct intel_engine_cs *engine, 925static void error_record_engine_waiters(struct intel_engine_cs *engine,
909 struct drm_i915_error_ring *ering) 926 struct drm_i915_error_engine *ee)
910{ 927{
911 struct intel_breadcrumbs *b = &engine->breadcrumbs; 928 struct intel_breadcrumbs *b = &engine->breadcrumbs;
912 struct drm_i915_error_waiter *waiter; 929 struct drm_i915_error_waiter *waiter;
913 struct rb_node *rb; 930 struct rb_node *rb;
914 int count; 931 int count;
915 932
916 ering->num_waiters = 0; 933 ee->num_waiters = 0;
917 ering->waiters = NULL; 934 ee->waiters = NULL;
918 935
919 spin_lock(&b->lock); 936 spin_lock(&b->lock);
920 count = 0; 937 count = 0;
@@ -930,7 +947,7 @@ static void engine_record_waiters(struct intel_engine_cs *engine,
930 if (!waiter) 947 if (!waiter)
931 return; 948 return;
932 949
933 ering->waiters = waiter; 950 ee->waiters = waiter;
934 951
935 spin_lock(&b->lock); 952 spin_lock(&b->lock);
936 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { 953 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -941,55 +958,55 @@ static void engine_record_waiters(struct intel_engine_cs *engine,
941 waiter->seqno = w->seqno; 958 waiter->seqno = w->seqno;
942 waiter++; 959 waiter++;
943 960
944 if (++ering->num_waiters == count) 961 if (++ee->num_waiters == count)
945 break; 962 break;
946 } 963 }
947 spin_unlock(&b->lock); 964 spin_unlock(&b->lock);
948} 965}
949 966
950static void i915_record_ring_state(struct drm_i915_private *dev_priv, 967static void error_record_engine_registers(struct drm_i915_error_state *error,
951 struct drm_i915_error_state *error, 968 struct intel_engine_cs *engine,
952 struct intel_engine_cs *engine, 969 struct drm_i915_error_engine *ee)
953 struct drm_i915_error_ring *ering)
954{ 970{
971 struct drm_i915_private *dev_priv = engine->i915;
972
955 if (INTEL_GEN(dev_priv) >= 6) { 973 if (INTEL_GEN(dev_priv) >= 6) {
956 ering->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); 974 ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
957 ering->fault_reg = I915_READ(RING_FAULT_REG(engine)); 975 ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
958 if (INTEL_GEN(dev_priv) >= 8) 976 if (INTEL_GEN(dev_priv) >= 8)
959 gen8_record_semaphore_state(dev_priv, error, engine, 977 gen8_record_semaphore_state(error, engine, ee);
960 ering);
961 else 978 else
962 gen6_record_semaphore_state(dev_priv, engine, ering); 979 gen6_record_semaphore_state(engine, ee);
963 } 980 }
964 981
965 if (INTEL_GEN(dev_priv) >= 4) { 982 if (INTEL_GEN(dev_priv) >= 4) {
966 ering->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); 983 ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base));
967 ering->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); 984 ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base));
968 ering->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); 985 ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
969 ering->instdone = I915_READ(RING_INSTDONE(engine->mmio_base)); 986 ee->instdone = I915_READ(RING_INSTDONE(engine->mmio_base));
970 ering->instps = I915_READ(RING_INSTPS(engine->mmio_base)); 987 ee->instps = I915_READ(RING_INSTPS(engine->mmio_base));
971 ering->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); 988 ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
972 if (INTEL_GEN(dev_priv) >= 8) { 989 if (INTEL_GEN(dev_priv) >= 8) {
973 ering->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; 990 ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32;
974 ering->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; 991 ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32;
975 } 992 }
976 ering->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); 993 ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base));
977 } else { 994 } else {
978 ering->faddr = I915_READ(DMA_FADD_I8XX); 995 ee->faddr = I915_READ(DMA_FADD_I8XX);
979 ering->ipeir = I915_READ(IPEIR); 996 ee->ipeir = I915_READ(IPEIR);
980 ering->ipehr = I915_READ(IPEHR); 997 ee->ipehr = I915_READ(IPEHR);
981 ering->instdone = I915_READ(GEN2_INSTDONE); 998 ee->instdone = I915_READ(GEN2_INSTDONE);
982 } 999 }
983 1000
984 ering->waiting = intel_engine_has_waiter(engine); 1001 ee->waiting = intel_engine_has_waiter(engine);
985 ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); 1002 ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
986 ering->acthd = intel_ring_get_active_head(engine); 1003 ee->acthd = intel_engine_get_active_head(engine);
987 ering->seqno = intel_engine_get_seqno(engine); 1004 ee->seqno = intel_engine_get_seqno(engine);
988 ering->last_seqno = engine->last_submitted_seqno; 1005 ee->last_seqno = engine->last_submitted_seqno;
989 ering->start = I915_READ_START(engine); 1006 ee->start = I915_READ_START(engine);
990 ering->head = I915_READ_HEAD(engine); 1007 ee->head = I915_READ_HEAD(engine);
991 ering->tail = I915_READ_TAIL(engine); 1008 ee->tail = I915_READ_TAIL(engine);
992 ering->ctl = I915_READ_CTL(engine); 1009 ee->ctl = I915_READ_CTL(engine);
993 1010
994 if (I915_NEED_GFX_HWS(dev_priv)) { 1011 if (I915_NEED_GFX_HWS(dev_priv)) {
995 i915_reg_t mmio; 1012 i915_reg_t mmio;
@@ -1017,29 +1034,29 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv,
1017 mmio = RING_HWS_PGA(engine->mmio_base); 1034 mmio = RING_HWS_PGA(engine->mmio_base);
1018 } 1035 }
1019 1036
1020 ering->hws = I915_READ(mmio); 1037 ee->hws = I915_READ(mmio);
1021 } 1038 }
1022 1039
1023 ering->hangcheck_score = engine->hangcheck.score; 1040 ee->hangcheck_score = engine->hangcheck.score;
1024 ering->hangcheck_action = engine->hangcheck.action; 1041 ee->hangcheck_action = engine->hangcheck.action;
1025 1042
1026 if (USES_PPGTT(dev_priv)) { 1043 if (USES_PPGTT(dev_priv)) {
1027 int i; 1044 int i;
1028 1045
1029 ering->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); 1046 ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine));
1030 1047
1031 if (IS_GEN6(dev_priv)) 1048 if (IS_GEN6(dev_priv))
1032 ering->vm_info.pp_dir_base = 1049 ee->vm_info.pp_dir_base =
1033 I915_READ(RING_PP_DIR_BASE_READ(engine)); 1050 I915_READ(RING_PP_DIR_BASE_READ(engine));
1034 else if (IS_GEN7(dev_priv)) 1051 else if (IS_GEN7(dev_priv))
1035 ering->vm_info.pp_dir_base = 1052 ee->vm_info.pp_dir_base =
1036 I915_READ(RING_PP_DIR_BASE(engine)); 1053 I915_READ(RING_PP_DIR_BASE(engine));
1037 else if (INTEL_GEN(dev_priv) >= 8) 1054 else if (INTEL_GEN(dev_priv) >= 8)
1038 for (i = 0; i < 4; i++) { 1055 for (i = 0; i < 4; i++) {
1039 ering->vm_info.pdp[i] = 1056 ee->vm_info.pdp[i] =
1040 I915_READ(GEN8_RING_PDP_UDW(engine, i)); 1057 I915_READ(GEN8_RING_PDP_UDW(engine, i));
1041 ering->vm_info.pdp[i] <<= 32; 1058 ee->vm_info.pdp[i] <<= 32;
1042 ering->vm_info.pdp[i] |= 1059 ee->vm_info.pdp[i] |=
1043 I915_READ(GEN8_RING_PDP_LDW(engine, i)); 1060 I915_READ(GEN8_RING_PDP_LDW(engine, i));
1044 } 1061 }
1045 } 1062 }
@@ -1048,7 +1065,7 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv,
1048 1065
1049static void i915_gem_record_active_context(struct intel_engine_cs *engine, 1066static void i915_gem_record_active_context(struct intel_engine_cs *engine,
1050 struct drm_i915_error_state *error, 1067 struct drm_i915_error_state *error,
1051 struct drm_i915_error_ring *ering) 1068 struct drm_i915_error_engine *ee)
1052{ 1069{
1053 struct drm_i915_private *dev_priv = engine->i915; 1070 struct drm_i915_private *dev_priv = engine->i915;
1054 struct drm_i915_gem_object *obj; 1071 struct drm_i915_gem_object *obj;
@@ -1062,7 +1079,7 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine,
1062 continue; 1079 continue;
1063 1080
1064 if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) { 1081 if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
1065 ering->ctx = i915_error_ggtt_object_create(dev_priv, obj); 1082 ee->ctx = i915_error_ggtt_object_create(dev_priv, obj);
1066 break; 1083 break;
1067 } 1084 }
1068 } 1085 }
@@ -1075,23 +1092,31 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1075 struct drm_i915_gem_request *request; 1092 struct drm_i915_gem_request *request;
1076 int i, count; 1093 int i, count;
1077 1094
1095 if (dev_priv->semaphore_obj) {
1096 error->semaphore_obj =
1097 i915_error_ggtt_object_create(dev_priv,
1098 dev_priv->semaphore_obj);
1099 }
1100
1078 for (i = 0; i < I915_NUM_ENGINES; i++) { 1101 for (i = 0; i < I915_NUM_ENGINES; i++) {
1079 struct intel_engine_cs *engine = &dev_priv->engine[i]; 1102 struct intel_engine_cs *engine = &dev_priv->engine[i];
1103 struct drm_i915_error_engine *ee = &error->engine[i];
1080 1104
1081 error->ring[i].pid = -1; 1105 ee->pid = -1;
1106 ee->engine_id = -1;
1082 1107
1083 if (!intel_engine_initialized(engine)) 1108 if (!intel_engine_initialized(engine))
1084 continue; 1109 continue;
1085 1110
1086 error->ring[i].valid = true; 1111 ee->engine_id = i;
1087 1112
1088 i915_record_ring_state(dev_priv, error, engine, &error->ring[i]); 1113 error_record_engine_registers(error, engine, ee);
1089 engine_record_waiters(engine, &error->ring[i]); 1114 error_record_engine_waiters(engine, ee);
1090 1115
1091 request = i915_gem_find_active_request(engine); 1116 request = i915_gem_find_active_request(engine);
1092 if (request) { 1117 if (request) {
1093 struct i915_address_space *vm; 1118 struct i915_address_space *vm;
1094 struct intel_ringbuffer *rb; 1119 struct intel_ring *ring;
1095 1120
1096 vm = request->ctx->ppgtt ? 1121 vm = request->ctx->ppgtt ?
1097 &request->ctx->ppgtt->base : &ggtt->base; 1122 &request->ctx->ppgtt->base : &ggtt->base;
@@ -1100,15 +1125,15 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1100 * as the simplest method to avoid being overwritten 1125 * as the simplest method to avoid being overwritten
1101 * by userspace. 1126 * by userspace.
1102 */ 1127 */
1103 error->ring[i].batchbuffer = 1128 ee->batchbuffer =
1104 i915_error_object_create(dev_priv, 1129 i915_error_object_create(dev_priv,
1105 request->batch_obj, 1130 request->batch_obj,
1106 vm); 1131 vm);
1107 1132
1108 if (HAS_BROKEN_CS_TLB(dev_priv)) 1133 if (HAS_BROKEN_CS_TLB(dev_priv))
1109 error->ring[i].wa_batchbuffer = 1134 ee->wa_batchbuffer =
1110 i915_error_ggtt_object_create(dev_priv, 1135 i915_error_ggtt_object_create(dev_priv,
1111 engine->scratch.obj); 1136 engine->scratch.obj);
1112 1137
1113 if (request->pid) { 1138 if (request->pid) {
1114 struct task_struct *task; 1139 struct task_struct *task;
@@ -1116,8 +1141,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1116 rcu_read_lock(); 1141 rcu_read_lock();
1117 task = pid_task(request->pid, PIDTYPE_PID); 1142 task = pid_task(request->pid, PIDTYPE_PID);
1118 if (task) { 1143 if (task) {
1119 strcpy(error->ring[i].comm, task->comm); 1144 strcpy(ee->comm, task->comm);
1120 error->ring[i].pid = task->pid; 1145 ee->pid = task->pid;
1121 } 1146 }
1122 rcu_read_unlock(); 1147 rcu_read_unlock();
1123 } 1148 }
@@ -1125,44 +1150,40 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1125 error->simulated |= 1150 error->simulated |=
1126 request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE; 1151 request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
1127 1152
1128 rb = request->ringbuf; 1153 ring = request->ring;
1129 error->ring[i].cpu_ring_head = rb->head; 1154 ee->cpu_ring_head = ring->head;
1130 error->ring[i].cpu_ring_tail = rb->tail; 1155 ee->cpu_ring_tail = ring->tail;
1131 error->ring[i].ringbuffer = 1156 ee->ringbuffer =
1132 i915_error_ggtt_object_create(dev_priv, 1157 i915_error_ggtt_object_create(dev_priv,
1133 rb->obj); 1158 ring->obj);
1134 } 1159 }
1135 1160
1136 error->ring[i].hws_page = 1161 ee->hws_page =
1137 i915_error_ggtt_object_create(dev_priv, 1162 i915_error_ggtt_object_create(dev_priv,
1138 engine->status_page.obj); 1163 engine->status_page.obj);
1139 1164
1140 if (engine->wa_ctx.obj) { 1165 ee->wa_ctx = i915_error_ggtt_object_create(dev_priv,
1141 error->ring[i].wa_ctx = 1166 engine->wa_ctx.obj);
1142 i915_error_ggtt_object_create(dev_priv,
1143 engine->wa_ctx.obj);
1144 }
1145 1167
1146 i915_gem_record_active_context(engine, error, &error->ring[i]); 1168 i915_gem_record_active_context(engine, error, ee);
1147 1169
1148 count = 0; 1170 count = 0;
1149 list_for_each_entry(request, &engine->request_list, list) 1171 list_for_each_entry(request, &engine->request_list, link)
1150 count++; 1172 count++;
1151 1173
1152 error->ring[i].num_requests = count; 1174 ee->num_requests = count;
1153 error->ring[i].requests = 1175 ee->requests =
1154 kcalloc(count, sizeof(*error->ring[i].requests), 1176 kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
1155 GFP_ATOMIC); 1177 if (!ee->requests) {
1156 if (error->ring[i].requests == NULL) { 1178 ee->num_requests = 0;
1157 error->ring[i].num_requests = 0;
1158 continue; 1179 continue;
1159 } 1180 }
1160 1181
1161 count = 0; 1182 count = 0;
1162 list_for_each_entry(request, &engine->request_list, list) { 1183 list_for_each_entry(request, &engine->request_list, link) {
1163 struct drm_i915_error_request *erq; 1184 struct drm_i915_error_request *erq;
1164 1185
1165 if (count >= error->ring[i].num_requests) { 1186 if (count >= ee->num_requests) {
1166 /* 1187 /*
1167 * If the ring request list was changed in 1188 * If the ring request list was changed in
1168 * between the point where the error request 1189 * between the point where the error request
@@ -1181,8 +1202,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1181 break; 1202 break;
1182 } 1203 }
1183 1204
1184 erq = &error->ring[i].requests[count++]; 1205 erq = &ee->requests[count++];
1185 erq->seqno = request->seqno; 1206 erq->seqno = request->fence.seqno;
1186 erq->jiffies = request->emitted_jiffies; 1207 erq->jiffies = request->emitted_jiffies;
1187 erq->tail = request->postfix; 1208 erq->tail = request->postfix;
1188 } 1209 }
@@ -1209,7 +1230,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
1209 1230
1210 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1231 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1211 list_for_each_entry(vma, &obj->vma_list, obj_link) 1232 list_for_each_entry(vma, &obj->vma_list, obj_link)
1212 if (vma->vm == vm && vma->pin_count > 0) 1233 if (vma->vm == vm && i915_vma_is_pinned(vma))
1213 i++; 1234 i++;
1214 } 1235 }
1215 error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; 1236 error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
@@ -1352,20 +1373,20 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
1352 const char *error_msg) 1373 const char *error_msg)
1353{ 1374{
1354 u32 ecode; 1375 u32 ecode;
1355 int ring_id = -1, len; 1376 int engine_id = -1, len;
1356 1377
1357 ecode = i915_error_generate_code(dev_priv, error, &ring_id); 1378 ecode = i915_error_generate_code(dev_priv, error, &engine_id);
1358 1379
1359 len = scnprintf(error->error_msg, sizeof(error->error_msg), 1380 len = scnprintf(error->error_msg, sizeof(error->error_msg),
1360 "GPU HANG: ecode %d:%d:0x%08x", 1381 "GPU HANG: ecode %d:%d:0x%08x",
1361 INTEL_GEN(dev_priv), ring_id, ecode); 1382 INTEL_GEN(dev_priv), engine_id, ecode);
1362 1383
1363 if (ring_id != -1 && error->ring[ring_id].pid != -1) 1384 if (engine_id != -1 && error->engine[engine_id].pid != -1)
1364 len += scnprintf(error->error_msg + len, 1385 len += scnprintf(error->error_msg + len,
1365 sizeof(error->error_msg) - len, 1386 sizeof(error->error_msg) - len,
1366 ", in %s [%d]", 1387 ", in %s [%d]",
1367 error->ring[ring_id].comm, 1388 error->engine[engine_id].comm,
1368 error->ring[ring_id].pid); 1389 error->engine[engine_id].pid);
1369 1390
1370 scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, 1391 scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
1371 ", reason: %s, action: %s", 1392 ", reason: %s, action: %s",
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 2112e029db6a..03a5cef353eb 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -363,7 +363,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
363 lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | 363 lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
364 (engine->guc_id << GUC_ELC_ENGINE_OFFSET); 364 (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
365 365
366 obj = ce->ringbuf->obj; 366 obj = ce->ring->obj;
367 gfx_addr = i915_gem_obj_ggtt_offset(obj); 367 gfx_addr = i915_gem_obj_ggtt_offset(obj);
368 368
369 lrc->ring_begin = gfx_addr; 369 lrc->ring_begin = gfx_addr;
@@ -506,7 +506,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
506 rq->engine); 506 rq->engine);
507 507
508 wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; 508 wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
509 wqi->fence_id = rq->seqno; 509 wqi->fence_id = rq->fence.seqno;
510 510
511 kunmap_atomic(base); 511 kunmap_atomic(base);
512} 512}
@@ -585,7 +585,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
585 * The only error here arises if the doorbell hardware isn't functioning 585 * The only error here arises if the doorbell hardware isn't functioning
586 * as expected, which really shouln't happen. 586 * as expected, which really shouln't happen.
587 */ 587 */
588int i915_guc_submit(struct drm_i915_gem_request *rq) 588static void i915_guc_submit(struct drm_i915_gem_request *rq)
589{ 589{
590 unsigned int engine_id = rq->engine->id; 590 unsigned int engine_id = rq->engine->id;
591 struct intel_guc *guc = &rq->i915->guc; 591 struct intel_guc *guc = &rq->i915->guc;
@@ -601,9 +601,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
601 client->b_fail += 1; 601 client->b_fail += 1;
602 602
603 guc->submissions[engine_id] += 1; 603 guc->submissions[engine_id] += 1;
604 guc->last_seqno[engine_id] = rq->seqno; 604 guc->last_seqno[engine_id] = rq->fence.seqno;
605
606 return b_ret;
607} 605}
608 606
609/* 607/*
@@ -633,13 +631,13 @@ gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
633 return NULL; 631 return NULL;
634 632
635 if (i915_gem_object_get_pages(obj)) { 633 if (i915_gem_object_get_pages(obj)) {
636 drm_gem_object_unreference(&obj->base); 634 i915_gem_object_put(obj);
637 return NULL; 635 return NULL;
638 } 636 }
639 637
640 if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 638 if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
641 PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { 639 PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
642 drm_gem_object_unreference(&obj->base); 640 i915_gem_object_put(obj);
643 return NULL; 641 return NULL;
644 } 642 }
645 643
@@ -661,7 +659,7 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
661 if (i915_gem_obj_is_pinned(obj)) 659 if (i915_gem_obj_is_pinned(obj))
662 i915_gem_object_ggtt_unpin(obj); 660 i915_gem_object_ggtt_unpin(obj);
663 661
664 drm_gem_object_unreference(&obj->base); 662 i915_gem_object_put(obj);
665} 663}
666 664
667static void 665static void
@@ -992,6 +990,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
992{ 990{
993 struct intel_guc *guc = &dev_priv->guc; 991 struct intel_guc *guc = &dev_priv->guc;
994 struct i915_guc_client *client; 992 struct i915_guc_client *client;
993 struct intel_engine_cs *engine;
995 994
996 /* client for execbuf submission */ 995 /* client for execbuf submission */
997 client = guc_client_alloc(dev_priv, 996 client = guc_client_alloc(dev_priv,
@@ -1006,6 +1005,10 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
1006 host2guc_sample_forcewake(guc, client); 1005 host2guc_sample_forcewake(guc, client);
1007 guc_init_doorbell_hw(guc); 1006 guc_init_doorbell_hw(guc);
1008 1007
1008 /* Take over from manual control of ELSP (execlists) */
1009 for_each_engine(engine, dev_priv)
1010 engine->submit_request = i915_guc_submit;
1011
1009 return 0; 1012 return 0;
1010} 1013}
1011 1014
@@ -1013,6 +1016,12 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
1013{ 1016{
1014 struct intel_guc *guc = &dev_priv->guc; 1017 struct intel_guc *guc = &dev_priv->guc;
1015 1018
1019 if (!guc->execbuf_client)
1020 return;
1021
1022 /* Revert back to manual ELSP submission */
1023 intel_execlists_enable_submission(dev_priv);
1024
1016 guc_client_free(dev_priv, guc->execbuf_client); 1025 guc_client_free(dev_priv, guc->execbuf_client);
1017 guc->execbuf_client = NULL; 1026 guc->execbuf_client = NULL;
1018} 1027}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1c2aec392412..591f452ece68 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -656,12 +656,6 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv)
656 * of horizontal active on the first line of vertical active 656 * of horizontal active on the first line of vertical active
657 */ 657 */
658 658
659static u32 i8xx_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
660{
661 /* Gen2 doesn't have a hardware frame counter */
662 return 0;
663}
664
665/* Called from drm generic code, passed a 'crtc', which 659/* Called from drm generic code, passed a 'crtc', which
666 * we use as a pipe index 660 * we use as a pipe index
667 */ 661 */
@@ -1105,9 +1099,10 @@ static void gen6_pm_rps_work(struct work_struct *work)
1105 new_delay = dev_priv->rps.cur_freq; 1099 new_delay = dev_priv->rps.cur_freq;
1106 min = dev_priv->rps.min_freq_softlimit; 1100 min = dev_priv->rps.min_freq_softlimit;
1107 max = dev_priv->rps.max_freq_softlimit; 1101 max = dev_priv->rps.max_freq_softlimit;
1108 1102 if (client_boost || any_waiters(dev_priv))
1109 if (client_boost) { 1103 max = dev_priv->rps.max_freq;
1110 new_delay = dev_priv->rps.max_freq_softlimit; 1104 if (client_boost && new_delay < dev_priv->rps.boost_freq) {
1105 new_delay = dev_priv->rps.boost_freq;
1111 adj = 0; 1106 adj = 0;
1112 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1107 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1113 if (adj > 0) 1108 if (adj > 0)
@@ -1122,7 +1117,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
1122 new_delay = dev_priv->rps.efficient_freq; 1117 new_delay = dev_priv->rps.efficient_freq;
1123 adj = 0; 1118 adj = 0;
1124 } 1119 }
1125 } else if (any_waiters(dev_priv)) { 1120 } else if (client_boost || any_waiters(dev_priv)) {
1126 adj = 0; 1121 adj = 0;
1127 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1122 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
1128 if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) 1123 if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
@@ -2804,13 +2799,6 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe)
2804} 2799}
2805 2800
2806static bool 2801static bool
2807ring_idle(struct intel_engine_cs *engine, u32 seqno)
2808{
2809 return i915_seqno_passed(seqno,
2810 READ_ONCE(engine->last_submitted_seqno));
2811}
2812
2813static bool
2814ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) 2802ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
2815{ 2803{
2816 if (INTEL_GEN(engine->i915) >= 8) { 2804 if (INTEL_GEN(engine->i915) >= 8) {
@@ -2859,6 +2847,7 @@ static struct intel_engine_cs *
2859semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) 2847semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
2860{ 2848{
2861 struct drm_i915_private *dev_priv = engine->i915; 2849 struct drm_i915_private *dev_priv = engine->i915;
2850 void __iomem *vaddr;
2862 u32 cmd, ipehr, head; 2851 u32 cmd, ipehr, head;
2863 u64 offset = 0; 2852 u64 offset = 0;
2864 int i, backwards; 2853 int i, backwards;
@@ -2897,6 +2886,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
2897 */ 2886 */
2898 head = I915_READ_HEAD(engine) & HEAD_ADDR; 2887 head = I915_READ_HEAD(engine) & HEAD_ADDR;
2899 backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; 2888 backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
2889 vaddr = (void __iomem *)engine->buffer->vaddr;
2900 2890
2901 for (i = backwards; i; --i) { 2891 for (i = backwards; i; --i) {
2902 /* 2892 /*
@@ -2907,7 +2897,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
2907 head &= engine->buffer->size - 1; 2897 head &= engine->buffer->size - 1;
2908 2898
2909 /* This here seems to blow up */ 2899 /* This here seems to blow up */
2910 cmd = ioread32(engine->buffer->virtual_start + head); 2900 cmd = ioread32(vaddr + head);
2911 if (cmd == ipehr) 2901 if (cmd == ipehr)
2912 break; 2902 break;
2913 2903
@@ -2917,11 +2907,11 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
2917 if (!i) 2907 if (!i)
2918 return NULL; 2908 return NULL;
2919 2909
2920 *seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1; 2910 *seqno = ioread32(vaddr + head + 4) + 1;
2921 if (INTEL_GEN(dev_priv) >= 8) { 2911 if (INTEL_GEN(dev_priv) >= 8) {
2922 offset = ioread32(engine->buffer->virtual_start + head + 12); 2912 offset = ioread32(vaddr + head + 12);
2923 offset <<= 32; 2913 offset <<= 32;
2924 offset = ioread32(engine->buffer->virtual_start + head + 8); 2914 offset |= ioread32(vaddr + head + 8);
2925 } 2915 }
2926 return semaphore_wait_to_signaller_ring(engine, ipehr, offset); 2916 return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
2927} 2917}
@@ -2990,7 +2980,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine)
2990 return stuck; 2980 return stuck;
2991} 2981}
2992 2982
2993static enum intel_ring_hangcheck_action 2983static enum intel_engine_hangcheck_action
2994head_stuck(struct intel_engine_cs *engine, u64 acthd) 2984head_stuck(struct intel_engine_cs *engine, u64 acthd)
2995{ 2985{
2996 if (acthd != engine->hangcheck.acthd) { 2986 if (acthd != engine->hangcheck.acthd) {
@@ -3008,11 +2998,11 @@ head_stuck(struct intel_engine_cs *engine, u64 acthd)
3008 return HANGCHECK_HUNG; 2998 return HANGCHECK_HUNG;
3009} 2999}
3010 3000
3011static enum intel_ring_hangcheck_action 3001static enum intel_engine_hangcheck_action
3012ring_stuck(struct intel_engine_cs *engine, u64 acthd) 3002engine_stuck(struct intel_engine_cs *engine, u64 acthd)
3013{ 3003{
3014 struct drm_i915_private *dev_priv = engine->i915; 3004 struct drm_i915_private *dev_priv = engine->i915;
3015 enum intel_ring_hangcheck_action ha; 3005 enum intel_engine_hangcheck_action ha;
3016 u32 tmp; 3006 u32 tmp;
3017 3007
3018 ha = head_stuck(engine, acthd); 3008 ha = head_stuck(engine, acthd);
@@ -3121,14 +3111,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
3121 if (engine->irq_seqno_barrier) 3111 if (engine->irq_seqno_barrier)
3122 engine->irq_seqno_barrier(engine); 3112 engine->irq_seqno_barrier(engine);
3123 3113
3124 acthd = intel_ring_get_active_head(engine); 3114 acthd = intel_engine_get_active_head(engine);
3125 seqno = intel_engine_get_seqno(engine); 3115 seqno = intel_engine_get_seqno(engine);
3126 3116
3127 /* Reset stuck interrupts between batch advances */ 3117 /* Reset stuck interrupts between batch advances */
3128 user_interrupts = 0; 3118 user_interrupts = 0;
3129 3119
3130 if (engine->hangcheck.seqno == seqno) { 3120 if (engine->hangcheck.seqno == seqno) {
3131 if (ring_idle(engine, seqno)) { 3121 if (!intel_engine_is_active(engine)) {
3132 engine->hangcheck.action = HANGCHECK_IDLE; 3122 engine->hangcheck.action = HANGCHECK_IDLE;
3133 if (busy) { 3123 if (busy) {
3134 /* Safeguard against driver failure */ 3124 /* Safeguard against driver failure */
@@ -3137,13 +3127,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
3137 } 3127 }
3138 } else { 3128 } else {
3139 /* We always increment the hangcheck score 3129 /* We always increment the hangcheck score
3140 * if the ring is busy and still processing 3130 * if the engine is busy and still processing
3141 * the same request, so that no single request 3131 * the same request, so that no single request
3142 * can run indefinitely (such as a chain of 3132 * can run indefinitely (such as a chain of
3143 * batches). The only time we do not increment 3133 * batches). The only time we do not increment
3144 * the hangcheck score on this ring, if this 3134 * the hangcheck score on this ring, if this
3145 * ring is in a legitimate wait for another 3135 * engine is in a legitimate wait for another
3146 * ring. In that case the waiting ring is a 3136 * engine. In that case the waiting engine is a
3147 * victim and we want to be sure we catch the 3137 * victim and we want to be sure we catch the
3148 * right culprit. Then every time we do kick 3138 * right culprit. Then every time we do kick
3149 * the ring, add a small increment to the 3139 * the ring, add a small increment to the
@@ -3151,8 +3141,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
3151 * being repeatedly kicked and so responsible 3141 * being repeatedly kicked and so responsible
3152 * for stalling the machine. 3142 * for stalling the machine.
3153 */ 3143 */
3154 engine->hangcheck.action = ring_stuck(engine, 3144 engine->hangcheck.action =
3155 acthd); 3145 engine_stuck(engine, acthd);
3156 3146
3157 switch (engine->hangcheck.action) { 3147 switch (engine->hangcheck.action) {
3158 case HANGCHECK_IDLE: 3148 case HANGCHECK_IDLE:
@@ -4542,8 +4532,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
4542 i915_hangcheck_elapsed); 4532 i915_hangcheck_elapsed);
4543 4533
4544 if (IS_GEN2(dev_priv)) { 4534 if (IS_GEN2(dev_priv)) {
4535 /* Gen2 doesn't have a hardware frame counter */
4545 dev->max_vblank_count = 0; 4536 dev->max_vblank_count = 0;
4546 dev->driver->get_vblank_counter = i8xx_get_vblank_counter; 4537 dev->driver->get_vblank_counter = drm_vblank_no_hw_counter;
4547 } else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) { 4538 } else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) {
4548 dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ 4539 dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
4549 dev->driver->get_vblank_counter = g4x_get_vblank_counter; 4540 dev->driver->get_vblank_counter = g4x_get_vblank_counter;
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 949c01686a66..2587b1bd41f4 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -173,6 +173,7 @@ static const struct intel_device_info intel_pineview_info = {
173 .gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .num_pipes = 2, 173 .gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .num_pipes = 2,
174 .need_gfx_hws = 1, .has_hotplug = 1, 174 .need_gfx_hws = 1, .has_hotplug = 1,
175 .has_overlay = 1, 175 .has_overlay = 1,
176 .ring_mask = RENDER_RING,
176 GEN_DEFAULT_PIPEOFFSETS, 177 GEN_DEFAULT_PIPEOFFSETS,
177 CURSOR_OFFSETS, 178 CURSOR_OFFSETS,
178}; 179};
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ce14fe09d962..f38a5e20bbee 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -186,13 +186,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
186#define GEN9_GRDOM_GUC (1 << 5) 186#define GEN9_GRDOM_GUC (1 << 5)
187#define GEN8_GRDOM_MEDIA2 (1 << 7) 187#define GEN8_GRDOM_MEDIA2 (1 << 7)
188 188
189#define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228) 189#define RING_PP_DIR_BASE(engine) _MMIO((engine)->mmio_base+0x228)
190#define RING_PP_DIR_BASE_READ(ring) _MMIO((ring)->mmio_base+0x518) 190#define RING_PP_DIR_BASE_READ(engine) _MMIO((engine)->mmio_base+0x518)
191#define RING_PP_DIR_DCLV(ring) _MMIO((ring)->mmio_base+0x220) 191#define RING_PP_DIR_DCLV(engine) _MMIO((engine)->mmio_base+0x220)
192#define PP_DIR_DCLV_2G 0xffffffff 192#define PP_DIR_DCLV_2G 0xffffffff
193 193
194#define GEN8_RING_PDP_UDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8 + 4) 194#define GEN8_RING_PDP_UDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8 + 4)
195#define GEN8_RING_PDP_LDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8) 195#define GEN8_RING_PDP_LDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8)
196 196
197#define GEN8_R_PWR_CLK_STATE _MMIO(0x20C8) 197#define GEN8_R_PWR_CLK_STATE _MMIO(0x20C8)
198#define GEN8_RPCS_ENABLE (1 << 31) 198#define GEN8_RPCS_ENABLE (1 << 31)
@@ -1536,6 +1536,7 @@ enum skl_disp_power_wells {
1536#define BALANCE_LEG_MASK(port) (7<<(8+3*(port))) 1536#define BALANCE_LEG_MASK(port) (7<<(8+3*(port)))
1537/* Balance leg disable bits */ 1537/* Balance leg disable bits */
1538#define BALANCE_LEG_DISABLE_SHIFT 23 1538#define BALANCE_LEG_DISABLE_SHIFT 23
1539#define BALANCE_LEG_DISABLE(port) (1 << (23 + (port)))
1539 1540
1540/* 1541/*
1541 * Fence registers 1542 * Fence registers
@@ -1647,7 +1648,7 @@ enum skl_disp_power_wells {
1647#define ARB_MODE_BWGTLB_DISABLE (1<<9) 1648#define ARB_MODE_BWGTLB_DISABLE (1<<9)
1648#define ARB_MODE_SWIZZLE_BDW (1<<1) 1649#define ARB_MODE_SWIZZLE_BDW (1<<1)
1649#define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) 1650#define RENDER_HWS_PGA_GEN7 _MMIO(0x04080)
1650#define RING_FAULT_REG(ring) _MMIO(0x4094 + 0x100*(ring)->id) 1651#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->hw_id)
1651#define RING_FAULT_GTTSEL_MASK (1<<11) 1652#define RING_FAULT_GTTSEL_MASK (1<<11)
1652#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) 1653#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff)
1653#define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) 1654#define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3)
@@ -1845,7 +1846,7 @@ enum skl_disp_power_wells {
1845 1846
1846#define GFX_MODE _MMIO(0x2520) 1847#define GFX_MODE _MMIO(0x2520)
1847#define GFX_MODE_GEN7 _MMIO(0x229c) 1848#define GFX_MODE_GEN7 _MMIO(0x229c)
1848#define RING_MODE_GEN7(ring) _MMIO((ring)->mmio_base+0x29c) 1849#define RING_MODE_GEN7(engine) _MMIO((engine)->mmio_base+0x29c)
1849#define GFX_RUN_LIST_ENABLE (1<<15) 1850#define GFX_RUN_LIST_ENABLE (1<<15)
1850#define GFX_INTERRUPT_STEERING (1<<14) 1851#define GFX_INTERRUPT_STEERING (1<<14)
1851#define GFX_TLB_INVALIDATE_EXPLICIT (1<<13) 1852#define GFX_TLB_INVALIDATE_EXPLICIT (1<<13)
@@ -6132,6 +6133,7 @@ enum {
6132# define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26)) 6133# define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26))
6133# define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14) 6134# define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14)
6134#define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) 6135#define COMMON_SLICE_CHICKEN2 _MMIO(0x7014)
6136# define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12)
6135# define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8) 6137# define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8)
6136# define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) 6138# define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0)
6137 6139
@@ -6958,6 +6960,9 @@ enum {
6958#define ECOBUS _MMIO(0xa180) 6960#define ECOBUS _MMIO(0xa180)
6959#define FORCEWAKE_MT_ENABLE (1<<5) 6961#define FORCEWAKE_MT_ENABLE (1<<5)
6960#define VLV_SPAREG2H _MMIO(0xA194) 6962#define VLV_SPAREG2H _MMIO(0xA194)
6963#define GEN9_PWRGT_DOMAIN_STATUS _MMIO(0xA2A0)
6964#define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0)
6965#define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1)
6961 6966
6962#define GTFIFODBG _MMIO(0x120000) 6967#define GTFIFODBG _MMIO(0x120000)
6963#define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) 6968#define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20)
@@ -7485,6 +7490,7 @@ enum {
7485#define _DDI_BUF_TRANS_A 0x64E00 7490#define _DDI_BUF_TRANS_A 0x64E00
7486#define _DDI_BUF_TRANS_B 0x64E60 7491#define _DDI_BUF_TRANS_B 0x64E60
7487#define DDI_BUF_TRANS_LO(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8) 7492#define DDI_BUF_TRANS_LO(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8)
7493#define DDI_BUF_BALANCE_LEG_ENABLE (1 << 31)
7488#define DDI_BUF_TRANS_HI(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4) 7494#define DDI_BUF_TRANS_HI(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4)
7489 7495
7490/* Sideband Interface (SBI) is programmed indirectly, via 7496/* Sideband Interface (SBI) is programmed indirectly, via
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index d61829e54f93..f1ffde7f7c0b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -271,8 +271,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
271 struct drm_i915_private *dev_priv = to_i915(dev); 271 struct drm_i915_private *dev_priv = to_i915(dev);
272 int ret; 272 int ret;
273 273
274 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
275
276 intel_runtime_pm_get(dev_priv); 274 intel_runtime_pm_get(dev_priv);
277 275
278 mutex_lock(&dev_priv->rps.hw_lock); 276 mutex_lock(&dev_priv->rps.hw_lock);
@@ -303,19 +301,46 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
303 struct drm_minor *minor = dev_to_drm_minor(kdev); 301 struct drm_minor *minor = dev_to_drm_minor(kdev);
304 struct drm_device *dev = minor->dev; 302 struct drm_device *dev = minor->dev;
305 struct drm_i915_private *dev_priv = to_i915(dev); 303 struct drm_i915_private *dev_priv = to_i915(dev);
306 int ret;
307 304
308 flush_delayed_work(&dev_priv->rps.delayed_resume_work); 305 return snprintf(buf, PAGE_SIZE, "%d\n",
306 intel_gpu_freq(dev_priv,
307 dev_priv->rps.cur_freq));
308}
309 309
310 intel_runtime_pm_get(dev_priv); 310static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
311{
312 struct drm_minor *minor = dev_to_drm_minor(kdev);
313 struct drm_i915_private *dev_priv = to_i915(minor->dev);
314
315 return snprintf(buf, PAGE_SIZE, "%d\n",
316 intel_gpu_freq(dev_priv,
317 dev_priv->rps.boost_freq));
318}
319
320static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
321 struct device_attribute *attr,
322 const char *buf, size_t count)
323{
324 struct drm_minor *minor = dev_to_drm_minor(kdev);
325 struct drm_device *dev = minor->dev;
326 struct drm_i915_private *dev_priv = to_i915(dev);
327 u32 val;
328 ssize_t ret;
329
330 ret = kstrtou32(buf, 0, &val);
331 if (ret)
332 return ret;
333
334 /* Validate against (static) hardware limits */
335 val = intel_freq_opcode(dev_priv, val);
336 if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq)
337 return -EINVAL;
311 338
312 mutex_lock(&dev_priv->rps.hw_lock); 339 mutex_lock(&dev_priv->rps.hw_lock);
313 ret = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq); 340 dev_priv->rps.boost_freq = val;
314 mutex_unlock(&dev_priv->rps.hw_lock); 341 mutex_unlock(&dev_priv->rps.hw_lock);
315 342
316 intel_runtime_pm_put(dev_priv); 343 return count;
317
318 return snprintf(buf, PAGE_SIZE, "%d\n", ret);
319} 344}
320 345
321static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, 346static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
@@ -325,9 +350,9 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
325 struct drm_device *dev = minor->dev; 350 struct drm_device *dev = minor->dev;
326 struct drm_i915_private *dev_priv = to_i915(dev); 351 struct drm_i915_private *dev_priv = to_i915(dev);
327 352
328 return snprintf(buf, PAGE_SIZE, 353 return snprintf(buf, PAGE_SIZE, "%d\n",
329 "%d\n", 354 intel_gpu_freq(dev_priv,
330 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); 355 dev_priv->rps.efficient_freq));
331} 356}
332 357
333static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) 358static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
@@ -335,15 +360,10 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute
335 struct drm_minor *minor = dev_to_drm_minor(kdev); 360 struct drm_minor *minor = dev_to_drm_minor(kdev);
336 struct drm_device *dev = minor->dev; 361 struct drm_device *dev = minor->dev;
337 struct drm_i915_private *dev_priv = to_i915(dev); 362 struct drm_i915_private *dev_priv = to_i915(dev);
338 int ret;
339
340 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
341
342 mutex_lock(&dev_priv->rps.hw_lock);
343 ret = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit);
344 mutex_unlock(&dev_priv->rps.hw_lock);
345 363
346 return snprintf(buf, PAGE_SIZE, "%d\n", ret); 364 return snprintf(buf, PAGE_SIZE, "%d\n",
365 intel_gpu_freq(dev_priv,
366 dev_priv->rps.max_freq_softlimit));
347} 367}
348 368
349static ssize_t gt_max_freq_mhz_store(struct device *kdev, 369static ssize_t gt_max_freq_mhz_store(struct device *kdev,
@@ -360,8 +380,6 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
360 if (ret) 380 if (ret)
361 return ret; 381 return ret;
362 382
363 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
364
365 intel_runtime_pm_get(dev_priv); 383 intel_runtime_pm_get(dev_priv);
366 384
367 mutex_lock(&dev_priv->rps.hw_lock); 385 mutex_lock(&dev_priv->rps.hw_lock);
@@ -403,15 +421,10 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute
403 struct drm_minor *minor = dev_to_drm_minor(kdev); 421 struct drm_minor *minor = dev_to_drm_minor(kdev);
404 struct drm_device *dev = minor->dev; 422 struct drm_device *dev = minor->dev;
405 struct drm_i915_private *dev_priv = to_i915(dev); 423 struct drm_i915_private *dev_priv = to_i915(dev);
406 int ret;
407
408 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
409 424
410 mutex_lock(&dev_priv->rps.hw_lock); 425 return snprintf(buf, PAGE_SIZE, "%d\n",
411 ret = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); 426 intel_gpu_freq(dev_priv,
412 mutex_unlock(&dev_priv->rps.hw_lock); 427 dev_priv->rps.min_freq_softlimit));
413
414 return snprintf(buf, PAGE_SIZE, "%d\n", ret);
415} 428}
416 429
417static ssize_t gt_min_freq_mhz_store(struct device *kdev, 430static ssize_t gt_min_freq_mhz_store(struct device *kdev,
@@ -428,8 +441,6 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
428 if (ret) 441 if (ret)
429 return ret; 442 return ret;
430 443
431 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
432
433 intel_runtime_pm_get(dev_priv); 444 intel_runtime_pm_get(dev_priv);
434 445
435 mutex_lock(&dev_priv->rps.hw_lock); 446 mutex_lock(&dev_priv->rps.hw_lock);
@@ -465,6 +476,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
465 476
466static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); 477static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL);
467static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); 478static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL);
479static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store);
468static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); 480static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store);
469static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); 481static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store);
470 482
@@ -498,6 +510,7 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
498static const struct attribute *gen6_attrs[] = { 510static const struct attribute *gen6_attrs[] = {
499 &dev_attr_gt_act_freq_mhz.attr, 511 &dev_attr_gt_act_freq_mhz.attr,
500 &dev_attr_gt_cur_freq_mhz.attr, 512 &dev_attr_gt_cur_freq_mhz.attr,
513 &dev_attr_gt_boost_freq_mhz.attr,
501 &dev_attr_gt_max_freq_mhz.attr, 514 &dev_attr_gt_max_freq_mhz.attr,
502 &dev_attr_gt_min_freq_mhz.attr, 515 &dev_attr_gt_min_freq_mhz.attr,
503 &dev_attr_gt_RP0_freq_mhz.attr, 516 &dev_attr_gt_RP0_freq_mhz.attr,
@@ -509,6 +522,7 @@ static const struct attribute *gen6_attrs[] = {
509static const struct attribute *vlv_attrs[] = { 522static const struct attribute *vlv_attrs[] = {
510 &dev_attr_gt_act_freq_mhz.attr, 523 &dev_attr_gt_act_freq_mhz.attr,
511 &dev_attr_gt_cur_freq_mhz.attr, 524 &dev_attr_gt_cur_freq_mhz.attr,
525 &dev_attr_gt_boost_freq_mhz.attr,
512 &dev_attr_gt_max_freq_mhz.attr, 526 &dev_attr_gt_max_freq_mhz.attr,
513 &dev_attr_gt_min_freq_mhz.attr, 527 &dev_attr_gt_min_freq_mhz.attr,
514 &dev_attr_gt_RP0_freq_mhz.attr, 528 &dev_attr_gt_RP0_freq_mhz.attr,
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 534154e05fbe..178798002a73 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -394,25 +394,27 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
394); 394);
395 395
396TRACE_EVENT(i915_gem_evict, 396TRACE_EVENT(i915_gem_evict,
397 TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags), 397 TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags),
398 TP_ARGS(dev, size, align, flags), 398 TP_ARGS(vm, size, align, flags),
399 399
400 TP_STRUCT__entry( 400 TP_STRUCT__entry(
401 __field(u32, dev) 401 __field(u32, dev)
402 __field(struct i915_address_space *, vm)
402 __field(u32, size) 403 __field(u32, size)
403 __field(u32, align) 404 __field(u32, align)
404 __field(unsigned, flags) 405 __field(unsigned int, flags)
405 ), 406 ),
406 407
407 TP_fast_assign( 408 TP_fast_assign(
408 __entry->dev = dev->primary->index; 409 __entry->dev = vm->dev->primary->index;
410 __entry->vm = vm;
409 __entry->size = size; 411 __entry->size = size;
410 __entry->align = align; 412 __entry->align = align;
411 __entry->flags = flags; 413 __entry->flags = flags;
412 ), 414 ),
413 415
414 TP_printk("dev=%d, size=%d, align=%d %s", 416 TP_printk("dev=%d, vm=%p, size=%d, align=%d %s",
415 __entry->dev, __entry->size, __entry->align, 417 __entry->dev, __entry->vm, __entry->size, __entry->align,
416 __entry->flags & PIN_MAPPABLE ? ", mappable" : "") 418 __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
417); 419);
418 420
@@ -449,10 +451,9 @@ TRACE_EVENT(i915_gem_evict_vm,
449); 451);
450 452
451TRACE_EVENT(i915_gem_ring_sync_to, 453TRACE_EVENT(i915_gem_ring_sync_to,
452 TP_PROTO(struct drm_i915_gem_request *to_req, 454 TP_PROTO(struct drm_i915_gem_request *to,
453 struct intel_engine_cs *from, 455 struct drm_i915_gem_request *from),
454 struct drm_i915_gem_request *req), 456 TP_ARGS(to, from),
455 TP_ARGS(to_req, from, req),
456 457
457 TP_STRUCT__entry( 458 TP_STRUCT__entry(
458 __field(u32, dev) 459 __field(u32, dev)
@@ -463,9 +464,9 @@ TRACE_EVENT(i915_gem_ring_sync_to,
463 464
464 TP_fast_assign( 465 TP_fast_assign(
465 __entry->dev = from->i915->drm.primary->index; 466 __entry->dev = from->i915->drm.primary->index;
466 __entry->sync_from = from->id; 467 __entry->sync_from = from->engine->id;
467 __entry->sync_to = to_req->engine->id; 468 __entry->sync_to = to->engine->id;
468 __entry->seqno = i915_gem_request_get_seqno(req); 469 __entry->seqno = from->fence.seqno;
469 ), 470 ),
470 471
471 TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", 472 TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -488,9 +489,9 @@ TRACE_EVENT(i915_gem_ring_dispatch,
488 TP_fast_assign( 489 TP_fast_assign(
489 __entry->dev = req->i915->drm.primary->index; 490 __entry->dev = req->i915->drm.primary->index;
490 __entry->ring = req->engine->id; 491 __entry->ring = req->engine->id;
491 __entry->seqno = req->seqno; 492 __entry->seqno = req->fence.seqno;
492 __entry->flags = flags; 493 __entry->flags = flags;
493 intel_engine_enable_signaling(req); 494 fence_enable_sw_signaling(&req->fence);
494 ), 495 ),
495 496
496 TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", 497 TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
@@ -533,7 +534,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
533 TP_fast_assign( 534 TP_fast_assign(
534 __entry->dev = req->i915->drm.primary->index; 535 __entry->dev = req->i915->drm.primary->index;
535 __entry->ring = req->engine->id; 536 __entry->ring = req->engine->id;
536 __entry->seqno = req->seqno; 537 __entry->seqno = req->fence.seqno;
537 ), 538 ),
538 539
539 TP_printk("dev=%u, ring=%u, seqno=%u", 540 TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -595,7 +596,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
595 TP_fast_assign( 596 TP_fast_assign(
596 __entry->dev = req->i915->drm.primary->index; 597 __entry->dev = req->i915->drm.primary->index;
597 __entry->ring = req->engine->id; 598 __entry->ring = req->engine->id;
598 __entry->seqno = req->seqno; 599 __entry->seqno = req->fence.seqno;
599 __entry->blocking = 600 __entry->blocking =
600 mutex_is_locked(&req->i915->drm.struct_mutex); 601 mutex_is_locked(&req->i915->drm.struct_mutex);
601 ), 602 ),
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index f6acb5a0e701..142bac976919 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -97,6 +97,7 @@ static struct _balloon_info_ bl_info;
97 97
98/** 98/**
99 * intel_vgt_deballoon - deballoon reserved graphics address trunks 99 * intel_vgt_deballoon - deballoon reserved graphics address trunks
100 * @dev_priv: i915 device private data
100 * 101 *
101 * This function is called to deallocate the ballooned-out graphic memory, when 102 * This function is called to deallocate the ballooned-out graphic memory, when
102 * driver is unloaded or when ballooning fails. 103 * driver is unloaded or when ballooning fails.
@@ -138,7 +139,7 @@ static int vgt_balloon_space(struct drm_mm *mm,
138 139
139/** 140/**
140 * intel_vgt_balloon - balloon out reserved graphics address trunks 141 * intel_vgt_balloon - balloon out reserved graphics address trunks
141 * @dev: drm device 142 * @dev_priv: i915 device private data
142 * 143 *
143 * This function is called at the initialization stage, to balloon out the 144 * This function is called at the initialization stage, to balloon out the
144 * graphic address space allocated to other vGPUs, by marking these spaces as 145 * graphic address space allocated to other vGPUs, by marking these spaces as
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 6700a7be7f78..d32f586f9c05 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
600 if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) 600 if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
601 return; 601 return;
602 602
603 i915_audio_component_get_power(dev);
604
603 /* 605 /*
604 * Enable/disable generating the codec wake signal, overriding the 606 * Enable/disable generating the codec wake signal, overriding the
605 * internal logic to generate the codec wake to controller. 607 * internal logic to generate the codec wake to controller.
@@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
615 I915_WRITE(HSW_AUD_CHICKENBIT, tmp); 617 I915_WRITE(HSW_AUD_CHICKENBIT, tmp);
616 usleep_range(1000, 1500); 618 usleep_range(1000, 1500);
617 } 619 }
620
621 i915_audio_component_put_power(dev);
618} 622}
619 623
620/* Get CDCLK in kHz */ 624/* Get CDCLK in kHz */
@@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
648 !IS_HASWELL(dev_priv)) 652 !IS_HASWELL(dev_priv))
649 return 0; 653 return 0;
650 654
655 i915_audio_component_get_power(dev);
651 mutex_lock(&dev_priv->av_mutex); 656 mutex_lock(&dev_priv->av_mutex);
652 /* 1. get the pipe */ 657 /* 1. get the pipe */
653 intel_encoder = dev_priv->dig_port_map[port]; 658 intel_encoder = dev_priv->dig_port_map[port];
@@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
698 703
699 unlock: 704 unlock:
700 mutex_unlock(&dev_priv->av_mutex); 705 mutex_unlock(&dev_priv->av_mutex);
706 i915_audio_component_put_power(dev);
701 return err; 707 return err;
702} 708}
703 709
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index b074f3d6d127..90867446f1a5 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -51,6 +51,13 @@ static void irq_enable(struct intel_engine_cs *engine)
51 */ 51 */
52 engine->breadcrumbs.irq_posted = true; 52 engine->breadcrumbs.irq_posted = true;
53 53
54 /* Make sure the current hangcheck doesn't falsely accuse a just
55 * started irq handler from missing an interrupt (because the
56 * interrupt count still matches the stale value from when
57 * the irq handler was disabled, many hangchecks ago).
58 */
59 engine->breadcrumbs.irq_wakeups++;
60
54 spin_lock_irq(&engine->i915->irq_lock); 61 spin_lock_irq(&engine->i915->irq_lock);
55 engine->irq_enable(engine); 62 engine->irq_enable(engine);
56 spin_unlock_irq(&engine->i915->irq_lock); 63 spin_unlock_irq(&engine->i915->irq_lock);
@@ -436,6 +443,7 @@ static int intel_breadcrumbs_signaler(void *arg)
436 */ 443 */
437 intel_engine_remove_wait(engine, 444 intel_engine_remove_wait(engine,
438 &request->signaling.wait); 445 &request->signaling.wait);
446 fence_signal(&request->fence);
439 447
440 /* Find the next oldest signal. Note that as we have 448 /* Find the next oldest signal. Note that as we have
441 * not been holding the lock, another client may 449 * not been holding the lock, another client may
@@ -452,7 +460,7 @@ static int intel_breadcrumbs_signaler(void *arg)
452 rb_erase(&request->signaling.node, &b->signals); 460 rb_erase(&request->signaling.node, &b->signals);
453 spin_unlock(&b->lock); 461 spin_unlock(&b->lock);
454 462
455 i915_gem_request_unreference(request); 463 i915_gem_request_put(request);
456 } else { 464 } else {
457 if (kthread_should_stop()) 465 if (kthread_should_stop())
458 break; 466 break;
@@ -472,18 +480,14 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
472 struct rb_node *parent, **p; 480 struct rb_node *parent, **p;
473 bool first, wakeup; 481 bool first, wakeup;
474 482
475 if (unlikely(READ_ONCE(request->signaling.wait.tsk))) 483 /* locked by fence_enable_sw_signaling() */
476 return; 484 assert_spin_locked(&request->lock);
477
478 spin_lock(&b->lock);
479 if (unlikely(request->signaling.wait.tsk)) {
480 wakeup = false;
481 goto unlock;
482 }
483 485
484 request->signaling.wait.tsk = b->signaler; 486 request->signaling.wait.tsk = b->signaler;
485 request->signaling.wait.seqno = request->seqno; 487 request->signaling.wait.seqno = request->fence.seqno;
486 i915_gem_request_reference(request); 488 i915_gem_request_get(request);
489
490 spin_lock(&b->lock);
487 491
488 /* First add ourselves into the list of waiters, but register our 492 /* First add ourselves into the list of waiters, but register our
489 * bottom-half as the signaller thread. As per usual, only the oldest 493 * bottom-half as the signaller thread. As per usual, only the oldest
@@ -504,8 +508,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
504 p = &b->signals.rb_node; 508 p = &b->signals.rb_node;
505 while (*p) { 509 while (*p) {
506 parent = *p; 510 parent = *p;
507 if (i915_seqno_passed(request->seqno, 511 if (i915_seqno_passed(request->fence.seqno,
508 to_signaler(parent)->seqno)) { 512 to_signaler(parent)->fence.seqno)) {
509 p = &parent->rb_right; 513 p = &parent->rb_right;
510 first = false; 514 first = false;
511 } else { 515 } else {
@@ -517,7 +521,6 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
517 if (first) 521 if (first)
518 smp_store_mb(b->first_signal, request); 522 smp_store_mb(b->first_signal, request);
519 523
520unlock:
521 spin_unlock(&b->lock); 524 spin_unlock(&b->lock);
522 525
523 if (wakeup) 526 if (wakeup)
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 3edb9580928e..fb27d187876c 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -32,13 +32,6 @@
32 * onwards to drive newly added DMC (Display microcontroller) in display 32 * onwards to drive newly added DMC (Display microcontroller) in display
33 * engine to save and restore the state of display engine when it enter into 33 * engine to save and restore the state of display engine when it enter into
34 * low-power state and comes back to normal. 34 * low-power state and comes back to normal.
35 *
36 * Firmware loading status will be one of the below states: FW_UNINITIALIZED,
37 * FW_LOADED, FW_FAILED.
38 *
39 * Once the firmware is written into the registers status will be moved from
40 * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will
41 * be moved to FW_FAILED.
42 */ 35 */
43 36
44#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin" 37#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index dd1d6fe12297..c2df4e429b19 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = {
145static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { 145static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
146 { 0x0000201B, 0x000000A2, 0x0 }, 146 { 0x0000201B, 0x000000A2, 0x0 },
147 { 0x00005012, 0x00000088, 0x0 }, 147 { 0x00005012, 0x00000088, 0x0 },
148 { 0x80007011, 0x000000CD, 0x0 }, 148 { 0x80007011, 0x000000CD, 0x1 },
149 { 0x80009010, 0x000000C0, 0x1 }, 149 { 0x80009010, 0x000000C0, 0x1 },
150 { 0x0000201B, 0x0000009D, 0x0 }, 150 { 0x0000201B, 0x0000009D, 0x0 },
151 { 0x80005012, 0x000000C0, 0x1 }, 151 { 0x80005012, 0x000000C0, 0x1 },
@@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
158static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { 158static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = {
159 { 0x00000018, 0x000000A2, 0x0 }, 159 { 0x00000018, 0x000000A2, 0x0 },
160 { 0x00005012, 0x00000088, 0x0 }, 160 { 0x00005012, 0x00000088, 0x0 },
161 { 0x80007011, 0x000000CD, 0x0 }, 161 { 0x80007011, 0x000000CD, 0x3 },
162 { 0x80009010, 0x000000C0, 0x3 }, 162 { 0x80009010, 0x000000C0, 0x3 },
163 { 0x00000018, 0x0000009D, 0x0 }, 163 { 0x00000018, 0x0000009D, 0x0 },
164 { 0x80005012, 0x000000C0, 0x3 }, 164 { 0x80005012, 0x000000C0, 0x3 },
@@ -301,45 +301,34 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = {
301 { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ 301 { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */
302}; 302};
303 303
304static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, 304enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder)
305 u32 level, enum port port, int type);
306
307static void ddi_get_encoder_port(struct intel_encoder *intel_encoder,
308 struct intel_digital_port **dig_port,
309 enum port *port)
310{ 305{
311 struct drm_encoder *encoder = &intel_encoder->base; 306 switch (encoder->type) {
312
313 switch (intel_encoder->type) {
314 case INTEL_OUTPUT_DP_MST: 307 case INTEL_OUTPUT_DP_MST:
315 *dig_port = enc_to_mst(encoder)->primary; 308 return enc_to_mst(&encoder->base)->primary->port;
316 *port = (*dig_port)->port;
317 break;
318 default:
319 WARN(1, "Invalid DDI encoder type %d\n", intel_encoder->type);
320 /* fallthrough and treat as unknown */
321 case INTEL_OUTPUT_DP: 309 case INTEL_OUTPUT_DP:
322 case INTEL_OUTPUT_EDP: 310 case INTEL_OUTPUT_EDP:
323 case INTEL_OUTPUT_HDMI: 311 case INTEL_OUTPUT_HDMI:
324 case INTEL_OUTPUT_UNKNOWN: 312 case INTEL_OUTPUT_UNKNOWN:
325 *dig_port = enc_to_dig_port(encoder); 313 return enc_to_dig_port(&encoder->base)->port;
326 *port = (*dig_port)->port;
327 break;
328 case INTEL_OUTPUT_ANALOG: 314 case INTEL_OUTPUT_ANALOG:
329 *dig_port = NULL; 315 return PORT_E;
330 *port = PORT_E; 316 default:
331 break; 317 MISSING_CASE(encoder->type);
318 return PORT_A;
332 } 319 }
333} 320}
334 321
335enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) 322static const struct ddi_buf_trans *
323bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
336{ 324{
337 struct intel_digital_port *dig_port; 325 if (dev_priv->vbt.edp.low_vswing) {
338 enum port port; 326 *n_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
339 327 return bdw_ddi_translations_edp;
340 ddi_get_encoder_port(intel_encoder, &dig_port, &port); 328 } else {
341 329 *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
342 return port; 330 return bdw_ddi_translations_dp;
331 }
343} 332}
344 333
345static const struct ddi_buf_trans * 334static const struct ddi_buf_trans *
@@ -388,39 +377,58 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
388 } 377 }
389} 378}
390 379
380static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
381{
382 int n_hdmi_entries;
383 int hdmi_level;
384 int hdmi_default_entry;
385
386 hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
387
388 if (IS_BROXTON(dev_priv))
389 return hdmi_level;
390
391 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
392 skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
393 hdmi_default_entry = 8;
394 } else if (IS_BROADWELL(dev_priv)) {
395 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
396 hdmi_default_entry = 7;
397 } else if (IS_HASWELL(dev_priv)) {
398 n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
399 hdmi_default_entry = 6;
400 } else {
401 WARN(1, "ddi translation table missing\n");
402 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
403 hdmi_default_entry = 7;
404 }
405
406 /* Choose a good default if VBT is badly populated */
407 if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
408 hdmi_level >= n_hdmi_entries)
409 hdmi_level = hdmi_default_entry;
410
411 return hdmi_level;
412}
413
391/* 414/*
392 * Starting with Haswell, DDI port buffers must be programmed with correct 415 * Starting with Haswell, DDI port buffers must be programmed with correct
393 * values in advance. The buffer values are different for FDI and DP modes, 416 * values in advance. This function programs the correct values for
394 * but the HDMI/DVI fields are shared among those. So we program the DDI 417 * DP/eDP/FDI use cases.
395 * in either FDI or DP modes only, as HDMI connections will work with both
396 * of those
397 */ 418 */
398void intel_prepare_ddi_buffer(struct intel_encoder *encoder) 419void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder)
399{ 420{
400 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 421 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
401 u32 iboost_bit = 0; 422 u32 iboost_bit = 0;
402 int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry, 423 int i, n_dp_entries, n_edp_entries, size;
403 size; 424 enum port port = intel_ddi_get_encoder_port(encoder);
404 int hdmi_level;
405 enum port port;
406 const struct ddi_buf_trans *ddi_translations_fdi; 425 const struct ddi_buf_trans *ddi_translations_fdi;
407 const struct ddi_buf_trans *ddi_translations_dp; 426 const struct ddi_buf_trans *ddi_translations_dp;
408 const struct ddi_buf_trans *ddi_translations_edp; 427 const struct ddi_buf_trans *ddi_translations_edp;
409 const struct ddi_buf_trans *ddi_translations_hdmi;
410 const struct ddi_buf_trans *ddi_translations; 428 const struct ddi_buf_trans *ddi_translations;
411 429
412 port = intel_ddi_get_encoder_port(encoder); 430 if (IS_BROXTON(dev_priv))
413 hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
414
415 if (IS_BROXTON(dev_priv)) {
416 if (encoder->type != INTEL_OUTPUT_HDMI)
417 return;
418
419 /* Vswing programming for HDMI */
420 bxt_ddi_vswing_sequence(dev_priv, hdmi_level, port,
421 INTEL_OUTPUT_HDMI);
422 return; 431 return;
423 }
424 432
425 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 433 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
426 ddi_translations_fdi = NULL; 434 ddi_translations_fdi = NULL;
@@ -428,13 +436,10 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
428 skl_get_buf_trans_dp(dev_priv, &n_dp_entries); 436 skl_get_buf_trans_dp(dev_priv, &n_dp_entries);
429 ddi_translations_edp = 437 ddi_translations_edp =
430 skl_get_buf_trans_edp(dev_priv, &n_edp_entries); 438 skl_get_buf_trans_edp(dev_priv, &n_edp_entries);
431 ddi_translations_hdmi = 439
432 skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
433 hdmi_default_entry = 8;
434 /* If we're boosting the current, set bit 31 of trans1 */ 440 /* If we're boosting the current, set bit 31 of trans1 */
435 if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || 441 if (dev_priv->vbt.ddi_port_info[port].dp_boost_level)
436 dev_priv->vbt.ddi_port_info[port].dp_boost_level) 442 iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE;
437 iboost_bit = 1<<31;
438 443
439 if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && 444 if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP &&
440 port != PORT_A && port != PORT_E && 445 port != PORT_A && port != PORT_E &&
@@ -443,38 +448,20 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
443 } else if (IS_BROADWELL(dev_priv)) { 448 } else if (IS_BROADWELL(dev_priv)) {
444 ddi_translations_fdi = bdw_ddi_translations_fdi; 449 ddi_translations_fdi = bdw_ddi_translations_fdi;
445 ddi_translations_dp = bdw_ddi_translations_dp; 450 ddi_translations_dp = bdw_ddi_translations_dp;
446 451 ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries);
447 if (dev_priv->vbt.edp.low_vswing) {
448 ddi_translations_edp = bdw_ddi_translations_edp;
449 n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
450 } else {
451 ddi_translations_edp = bdw_ddi_translations_dp;
452 n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
453 }
454
455 ddi_translations_hdmi = bdw_ddi_translations_hdmi;
456
457 n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); 452 n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
458 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
459 hdmi_default_entry = 7;
460 } else if (IS_HASWELL(dev_priv)) { 453 } else if (IS_HASWELL(dev_priv)) {
461 ddi_translations_fdi = hsw_ddi_translations_fdi; 454 ddi_translations_fdi = hsw_ddi_translations_fdi;
462 ddi_translations_dp = hsw_ddi_translations_dp; 455 ddi_translations_dp = hsw_ddi_translations_dp;
463 ddi_translations_edp = hsw_ddi_translations_dp; 456 ddi_translations_edp = hsw_ddi_translations_dp;
464 ddi_translations_hdmi = hsw_ddi_translations_hdmi;
465 n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); 457 n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp);
466 n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
467 hdmi_default_entry = 6;
468 } else { 458 } else {
469 WARN(1, "ddi translation table missing\n"); 459 WARN(1, "ddi translation table missing\n");
470 ddi_translations_edp = bdw_ddi_translations_dp; 460 ddi_translations_edp = bdw_ddi_translations_dp;
471 ddi_translations_fdi = bdw_ddi_translations_fdi; 461 ddi_translations_fdi = bdw_ddi_translations_fdi;
472 ddi_translations_dp = bdw_ddi_translations_dp; 462 ddi_translations_dp = bdw_ddi_translations_dp;
473 ddi_translations_hdmi = bdw_ddi_translations_hdmi;
474 n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); 463 n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
475 n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); 464 n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
476 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
477 hdmi_default_entry = 7;
478 } 465 }
479 466
480 switch (encoder->type) { 467 switch (encoder->type) {
@@ -483,7 +470,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
483 size = n_edp_entries; 470 size = n_edp_entries;
484 break; 471 break;
485 case INTEL_OUTPUT_DP: 472 case INTEL_OUTPUT_DP:
486 case INTEL_OUTPUT_HDMI:
487 ddi_translations = ddi_translations_dp; 473 ddi_translations = ddi_translations_dp;
488 size = n_dp_entries; 474 size = n_dp_entries;
489 break; 475 break;
@@ -501,19 +487,48 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
501 I915_WRITE(DDI_BUF_TRANS_HI(port, i), 487 I915_WRITE(DDI_BUF_TRANS_HI(port, i),
502 ddi_translations[i].trans2); 488 ddi_translations[i].trans2);
503 } 489 }
490}
491
492/*
493 * Starting with Haswell, DDI port buffers must be programmed with correct
494 * values in advance. This function programs the correct values for
495 * HDMI/DVI use cases.
496 */
497static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder)
498{
499 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
500 u32 iboost_bit = 0;
501 int n_hdmi_entries, hdmi_level;
502 enum port port = intel_ddi_get_encoder_port(encoder);
503 const struct ddi_buf_trans *ddi_translations_hdmi;
504 504
505 if (encoder->type != INTEL_OUTPUT_HDMI) 505 if (IS_BROXTON(dev_priv))
506 return; 506 return;
507 507
508 /* Choose a good default if VBT is badly populated */ 508 hdmi_level = intel_ddi_hdmi_level(dev_priv, port);
509 if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || 509
510 hdmi_level >= n_hdmi_entries) 510 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
511 hdmi_level = hdmi_default_entry; 511 ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
512
513 /* If we're boosting the current, set bit 31 of trans1 */
514 if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level)
515 iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE;
516 } else if (IS_BROADWELL(dev_priv)) {
517 ddi_translations_hdmi = bdw_ddi_translations_hdmi;
518 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
519 } else if (IS_HASWELL(dev_priv)) {
520 ddi_translations_hdmi = hsw_ddi_translations_hdmi;
521 n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
522 } else {
523 WARN(1, "ddi translation table missing\n");
524 ddi_translations_hdmi = bdw_ddi_translations_hdmi;
525 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
526 }
512 527
513 /* Entry 9 is for HDMI: */ 528 /* Entry 9 is for HDMI: */
514 I915_WRITE(DDI_BUF_TRANS_LO(port, i), 529 I915_WRITE(DDI_BUF_TRANS_LO(port, 9),
515 ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); 530 ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit);
516 I915_WRITE(DDI_BUF_TRANS_HI(port, i), 531 I915_WRITE(DDI_BUF_TRANS_HI(port, 9),
517 ddi_translations_hdmi[hdmi_level].trans2); 532 ddi_translations_hdmi[hdmi_level].trans2);
518} 533}
519 534
@@ -550,7 +565,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc)
550 565
551 for_each_encoder_on_crtc(dev, crtc, encoder) { 566 for_each_encoder_on_crtc(dev, crtc, encoder) {
552 WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); 567 WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG);
553 intel_prepare_ddi_buffer(encoder); 568 intel_prepare_dp_ddi_buffers(encoder);
554 } 569 }
555 570
556 /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the 571 /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the
@@ -1111,7 +1126,6 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc)
1111{ 1126{
1112 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1127 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1113 struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); 1128 struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
1114 struct drm_encoder *encoder = &intel_encoder->base;
1115 struct drm_device *dev = crtc->dev; 1129 struct drm_device *dev = crtc->dev;
1116 struct drm_i915_private *dev_priv = to_i915(dev); 1130 struct drm_i915_private *dev_priv = to_i915(dev);
1117 enum pipe pipe = intel_crtc->pipe; 1131 enum pipe pipe = intel_crtc->pipe;
@@ -1177,29 +1191,15 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc)
1177 temp |= TRANS_DDI_MODE_SELECT_HDMI; 1191 temp |= TRANS_DDI_MODE_SELECT_HDMI;
1178 else 1192 else
1179 temp |= TRANS_DDI_MODE_SELECT_DVI; 1193 temp |= TRANS_DDI_MODE_SELECT_DVI;
1180
1181 } else if (type == INTEL_OUTPUT_ANALOG) { 1194 } else if (type == INTEL_OUTPUT_ANALOG) {
1182 temp |= TRANS_DDI_MODE_SELECT_FDI; 1195 temp |= TRANS_DDI_MODE_SELECT_FDI;
1183 temp |= (intel_crtc->config->fdi_lanes - 1) << 1; 1196 temp |= (intel_crtc->config->fdi_lanes - 1) << 1;
1184
1185 } else if (type == INTEL_OUTPUT_DP || 1197 } else if (type == INTEL_OUTPUT_DP ||
1186 type == INTEL_OUTPUT_EDP) { 1198 type == INTEL_OUTPUT_EDP) {
1187 struct intel_dp *intel_dp = enc_to_intel_dp(encoder); 1199 temp |= TRANS_DDI_MODE_SELECT_DP_SST;
1188
1189 if (intel_dp->is_mst) {
1190 temp |= TRANS_DDI_MODE_SELECT_DP_MST;
1191 } else
1192 temp |= TRANS_DDI_MODE_SELECT_DP_SST;
1193
1194 temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); 1200 temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count);
1195 } else if (type == INTEL_OUTPUT_DP_MST) { 1201 } else if (type == INTEL_OUTPUT_DP_MST) {
1196 struct intel_dp *intel_dp = &enc_to_mst(encoder)->primary->dp; 1202 temp |= TRANS_DDI_MODE_SELECT_DP_MST;
1197
1198 if (intel_dp->is_mst) {
1199 temp |= TRANS_DDI_MODE_SELECT_DP_MST;
1200 } else
1201 temp |= TRANS_DDI_MODE_SELECT_DP_SST;
1202
1203 temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); 1203 temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count);
1204 } else { 1204 } else {
1205 WARN(1, "Invalid encoder type %d for pipe %c\n", 1205 WARN(1, "Invalid encoder type %d for pipe %c\n",
@@ -1379,14 +1379,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
1379 TRANS_CLK_SEL_DISABLED); 1379 TRANS_CLK_SEL_DISABLED);
1380} 1380}
1381 1381
1382static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, 1382static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
1383 u32 level, enum port port, int type) 1383 enum port port, uint8_t iboost)
1384{ 1384{
1385 u32 tmp;
1386
1387 tmp = I915_READ(DISPIO_CR_TX_BMU_CR0);
1388 tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port));
1389 if (iboost)
1390 tmp |= iboost << BALANCE_LEG_SHIFT(port);
1391 else
1392 tmp |= BALANCE_LEG_DISABLE(port);
1393 I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp);
1394}
1395
1396static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level)
1397{
1398 struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
1399 struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
1400 enum port port = intel_dig_port->port;
1401 int type = encoder->type;
1385 const struct ddi_buf_trans *ddi_translations; 1402 const struct ddi_buf_trans *ddi_translations;
1386 uint8_t iboost; 1403 uint8_t iboost;
1387 uint8_t dp_iboost, hdmi_iboost; 1404 uint8_t dp_iboost, hdmi_iboost;
1388 int n_entries; 1405 int n_entries;
1389 u32 reg;
1390 1406
1391 /* VBT may override standard boost values */ 1407 /* VBT may override standard boost values */
1392 dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; 1408 dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level;
@@ -1428,16 +1444,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
1428 return; 1444 return;
1429 } 1445 }
1430 1446
1431 reg = I915_READ(DISPIO_CR_TX_BMU_CR0); 1447 _skl_ddi_set_iboost(dev_priv, port, iboost);
1432 reg &= ~BALANCE_LEG_MASK(port);
1433 reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port));
1434 1448
1435 if (iboost) 1449 if (port == PORT_A && intel_dig_port->max_lanes == 4)
1436 reg |= iboost << BALANCE_LEG_SHIFT(port); 1450 _skl_ddi_set_iboost(dev_priv, PORT_E, iboost);
1437 else
1438 reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port);
1439
1440 I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg);
1441} 1451}
1442 1452
1443static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, 1453static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
@@ -1568,7 +1578,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
1568 level = translate_signal_level(signal_levels); 1578 level = translate_signal_level(signal_levels);
1569 1579
1570 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) 1580 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
1571 skl_ddi_set_iboost(dev_priv, level, port, encoder->type); 1581 skl_ddi_set_iboost(encoder, level);
1572 else if (IS_BROXTON(dev_priv)) 1582 else if (IS_BROXTON(dev_priv))
1573 bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); 1583 bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type);
1574 1584
@@ -1615,8 +1625,6 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
1615 intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); 1625 intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
1616 } 1626 }
1617 1627
1618 intel_prepare_ddi_buffer(intel_encoder);
1619
1620 if (type == INTEL_OUTPUT_EDP) { 1628 if (type == INTEL_OUTPUT_EDP) {
1621 struct intel_dp *intel_dp = enc_to_intel_dp(encoder); 1629 struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
1622 intel_edp_panel_on(intel_dp); 1630 intel_edp_panel_on(intel_dp);
@@ -1627,6 +1635,8 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
1627 if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { 1635 if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) {
1628 struct intel_dp *intel_dp = enc_to_intel_dp(encoder); 1636 struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
1629 1637
1638 intel_prepare_dp_ddi_buffers(intel_encoder);
1639
1630 intel_dp_set_link_params(intel_dp, crtc->config); 1640 intel_dp_set_link_params(intel_dp, crtc->config);
1631 1641
1632 intel_ddi_init_dp_buf_reg(intel_encoder); 1642 intel_ddi_init_dp_buf_reg(intel_encoder);
@@ -1637,6 +1647,15 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
1637 intel_dp_stop_link_train(intel_dp); 1647 intel_dp_stop_link_train(intel_dp);
1638 } else if (type == INTEL_OUTPUT_HDMI) { 1648 } else if (type == INTEL_OUTPUT_HDMI) {
1639 struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); 1649 struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
1650 int level = intel_ddi_hdmi_level(dev_priv, port);
1651
1652 intel_prepare_hdmi_ddi_buffers(intel_encoder);
1653
1654 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
1655 skl_ddi_set_iboost(intel_encoder, level);
1656 else if (IS_BROXTON(dev_priv))
1657 bxt_ddi_vswing_sequence(dev_priv, level, port,
1658 INTEL_OUTPUT_HDMI);
1640 1659
1641 intel_hdmi->set_infoframes(encoder, 1660 intel_hdmi->set_infoframes(encoder,
1642 crtc->config->has_hdmi_sink, 1661 crtc->config->has_hdmi_sink,
@@ -2105,7 +2124,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
2105 2124
2106 val = DP_TP_CTL_ENABLE | 2125 val = DP_TP_CTL_ENABLE |
2107 DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; 2126 DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE;
2108 if (intel_dp->is_mst) 2127 if (intel_dp->link_mst)
2109 val |= DP_TP_CTL_MODE_MST; 2128 val |= DP_TP_CTL_MODE_MST;
2110 else { 2129 else {
2111 val |= DP_TP_CTL_MODE_SST; 2130 val |= DP_TP_CTL_MODE_SST;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 8cc361114112..c6f27ab99e8f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -34,6 +34,7 @@
34#include <drm/drm_edid.h> 34#include <drm/drm_edid.h>
35#include <drm/drmP.h> 35#include <drm/drmP.h>
36#include "intel_drv.h" 36#include "intel_drv.h"
37#include "intel_frontbuffer.h"
37#include <drm/i915_drm.h> 38#include <drm/i915_drm.h>
38#include "i915_drv.h" 39#include "i915_drv.h"
39#include "i915_gem_dmabuf.h" 40#include "i915_gem_dmabuf.h"
@@ -2465,9 +2466,8 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
2465 return false; 2466 return false;
2466 } 2467 }
2467 2468
2468 obj->tiling_mode = plane_config->tiling; 2469 if (plane_config->tiling == I915_TILING_X)
2469 if (obj->tiling_mode == I915_TILING_X) 2470 obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X;
2470 obj->stride = fb->pitches[0];
2471 2471
2472 mode_cmd.pixel_format = fb->pixel_format; 2472 mode_cmd.pixel_format = fb->pixel_format;
2473 mode_cmd.width = fb->width; 2473 mode_cmd.width = fb->width;
@@ -2488,7 +2488,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
2488 return true; 2488 return true;
2489 2489
2490out_unref_obj: 2490out_unref_obj:
2491 drm_gem_object_unreference(&obj->base); 2491 i915_gem_object_put(obj);
2492 mutex_unlock(&dev->struct_mutex); 2492 mutex_unlock(&dev->struct_mutex);
2493 return false; 2493 return false;
2494} 2494}
@@ -2593,14 +2593,15 @@ valid_fb:
2593 intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h; 2593 intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
2594 2594
2595 obj = intel_fb_obj(fb); 2595 obj = intel_fb_obj(fb);
2596 if (obj->tiling_mode != I915_TILING_NONE) 2596 if (i915_gem_object_is_tiled(obj))
2597 dev_priv->preserve_bios_swizzle = true; 2597 dev_priv->preserve_bios_swizzle = true;
2598 2598
2599 drm_framebuffer_reference(fb); 2599 drm_framebuffer_reference(fb);
2600 primary->fb = primary->state->fb = fb; 2600 primary->fb = primary->state->fb = fb;
2601 primary->crtc = primary->state->crtc = &intel_crtc->base; 2601 primary->crtc = primary->state->crtc = &intel_crtc->base;
2602 intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); 2602 intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary));
2603 obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit; 2603 atomic_or(to_intel_plane(primary)->frontbuffer_bit,
2604 &obj->frontbuffer_bits);
2604} 2605}
2605 2606
2606static void i9xx_update_primary_plane(struct drm_plane *primary, 2607static void i9xx_update_primary_plane(struct drm_plane *primary,
@@ -2670,8 +2671,7 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
2670 BUG(); 2671 BUG();
2671 } 2672 }
2672 2673
2673 if (INTEL_INFO(dev)->gen >= 4 && 2674 if (INTEL_INFO(dev)->gen >= 4 && i915_gem_object_is_tiled(obj))
2674 obj->tiling_mode != I915_TILING_NONE)
2675 dspcntr |= DISPPLANE_TILED; 2675 dspcntr |= DISPPLANE_TILED;
2676 2676
2677 if (IS_G4X(dev)) 2677 if (IS_G4X(dev))
@@ -2780,7 +2780,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
2780 BUG(); 2780 BUG();
2781 } 2781 }
2782 2782
2783 if (obj->tiling_mode != I915_TILING_NONE) 2783 if (i915_gem_object_is_tiled(obj))
2784 dspcntr |= DISPPLANE_TILED; 2784 dspcntr |= DISPPLANE_TILED;
2785 2785
2786 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) 2786 if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
@@ -4564,12 +4564,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
4564 struct drm_atomic_state *old_state = old_crtc_state->base.state; 4564 struct drm_atomic_state *old_state = old_crtc_state->base.state;
4565 struct intel_crtc_state *pipe_config = 4565 struct intel_crtc_state *pipe_config =
4566 to_intel_crtc_state(crtc->base.state); 4566 to_intel_crtc_state(crtc->base.state);
4567 struct drm_device *dev = crtc->base.dev;
4568 struct drm_plane *primary = crtc->base.primary; 4567 struct drm_plane *primary = crtc->base.primary;
4569 struct drm_plane_state *old_pri_state = 4568 struct drm_plane_state *old_pri_state =
4570 drm_atomic_get_existing_plane_state(old_state, primary); 4569 drm_atomic_get_existing_plane_state(old_state, primary);
4571 4570
4572 intel_frontbuffer_flip(dev, pipe_config->fb_bits); 4571 intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
4573 4572
4574 crtc->wm.cxsr_allowed = true; 4573 crtc->wm.cxsr_allowed = true;
4575 4574
@@ -4692,7 +4691,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask
4692 * to compute the mask of flip planes precisely. For the time being 4691 * to compute the mask of flip planes precisely. For the time being
4693 * consider this a flip to a NULL plane. 4692 * consider this a flip to a NULL plane.
4694 */ 4693 */
4695 intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); 4694 intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
4696} 4695}
4697 4696
4698static void ironlake_crtc_enable(struct drm_crtc *crtc) 4697static void ironlake_crtc_enable(struct drm_crtc *crtc)
@@ -10434,7 +10433,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev,
10434 10433
10435 fb = intel_framebuffer_create(dev, &mode_cmd, obj); 10434 fb = intel_framebuffer_create(dev, &mode_cmd, obj);
10436 if (IS_ERR(fb)) 10435 if (IS_ERR(fb))
10437 drm_gem_object_unreference_unlocked(&obj->base); 10436 i915_gem_object_put_unlocked(obj);
10438 10437
10439 return fb; 10438 return fb;
10440} 10439}
@@ -10945,13 +10944,13 @@ static void intel_unpin_work_fn(struct work_struct *__work)
10945 10944
10946 mutex_lock(&dev->struct_mutex); 10945 mutex_lock(&dev->struct_mutex);
10947 intel_unpin_fb_obj(work->old_fb, primary->state->rotation); 10946 intel_unpin_fb_obj(work->old_fb, primary->state->rotation);
10948 drm_gem_object_unreference(&work->pending_flip_obj->base); 10947 i915_gem_object_put(work->pending_flip_obj);
10949
10950 if (work->flip_queued_req)
10951 i915_gem_request_assign(&work->flip_queued_req, NULL);
10952 mutex_unlock(&dev->struct_mutex); 10948 mutex_unlock(&dev->struct_mutex);
10953 10949
10954 intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit); 10950 i915_gem_request_put(work->flip_queued_req);
10951
10952 intel_frontbuffer_flip_complete(to_i915(dev),
10953 to_intel_plane(primary)->frontbuffer_bit);
10955 intel_fbc_post_update(crtc); 10954 intel_fbc_post_update(crtc);
10956 drm_framebuffer_unreference(work->old_fb); 10955 drm_framebuffer_unreference(work->old_fb);
10957 10956
@@ -11116,7 +11115,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
11116 struct drm_i915_gem_request *req, 11115 struct drm_i915_gem_request *req,
11117 uint32_t flags) 11116 uint32_t flags)
11118{ 11117{
11119 struct intel_engine_cs *engine = req->engine; 11118 struct intel_ring *ring = req->ring;
11120 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 11119 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
11121 u32 flip_mask; 11120 u32 flip_mask;
11122 int ret; 11121 int ret;
@@ -11132,13 +11131,13 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
11132 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 11131 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
11133 else 11132 else
11134 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 11133 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
11135 intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); 11134 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
11136 intel_ring_emit(engine, MI_NOOP); 11135 intel_ring_emit(ring, MI_NOOP);
11137 intel_ring_emit(engine, MI_DISPLAY_FLIP | 11136 intel_ring_emit(ring, MI_DISPLAY_FLIP |
11138 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); 11137 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
11139 intel_ring_emit(engine, fb->pitches[0]); 11138 intel_ring_emit(ring, fb->pitches[0]);
11140 intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); 11139 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
11141 intel_ring_emit(engine, 0); /* aux display base address, unused */ 11140 intel_ring_emit(ring, 0); /* aux display base address, unused */
11142 11141
11143 return 0; 11142 return 0;
11144} 11143}
@@ -11150,7 +11149,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
11150 struct drm_i915_gem_request *req, 11149 struct drm_i915_gem_request *req,
11151 uint32_t flags) 11150 uint32_t flags)
11152{ 11151{
11153 struct intel_engine_cs *engine = req->engine; 11152 struct intel_ring *ring = req->ring;
11154 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 11153 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
11155 u32 flip_mask; 11154 u32 flip_mask;
11156 int ret; 11155 int ret;
@@ -11163,13 +11162,13 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
11163 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 11162 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
11164 else 11163 else
11165 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 11164 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
11166 intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); 11165 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
11167 intel_ring_emit(engine, MI_NOOP); 11166 intel_ring_emit(ring, MI_NOOP);
11168 intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | 11167 intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
11169 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); 11168 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
11170 intel_ring_emit(engine, fb->pitches[0]); 11169 intel_ring_emit(ring, fb->pitches[0]);
11171 intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); 11170 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
11172 intel_ring_emit(engine, MI_NOOP); 11171 intel_ring_emit(ring, MI_NOOP);
11173 11172
11174 return 0; 11173 return 0;
11175} 11174}
@@ -11181,7 +11180,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
11181 struct drm_i915_gem_request *req, 11180 struct drm_i915_gem_request *req,
11182 uint32_t flags) 11181 uint32_t flags)
11183{ 11182{
11184 struct intel_engine_cs *engine = req->engine; 11183 struct intel_ring *ring = req->ring;
11185 struct drm_i915_private *dev_priv = to_i915(dev); 11184 struct drm_i915_private *dev_priv = to_i915(dev);
11186 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 11185 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
11187 uint32_t pf, pipesrc; 11186 uint32_t pf, pipesrc;
@@ -11195,11 +11194,11 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
11195 * Display Registers (which do not change across a page-flip) 11194 * Display Registers (which do not change across a page-flip)
11196 * so we need only reprogram the base address. 11195 * so we need only reprogram the base address.
11197 */ 11196 */
11198 intel_ring_emit(engine, MI_DISPLAY_FLIP | 11197 intel_ring_emit(ring, MI_DISPLAY_FLIP |
11199 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); 11198 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
11200 intel_ring_emit(engine, fb->pitches[0]); 11199 intel_ring_emit(ring, fb->pitches[0]);
11201 intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset | 11200 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset |
11202 obj->tiling_mode); 11201 i915_gem_object_get_tiling(obj));
11203 11202
11204 /* XXX Enabling the panel-fitter across page-flip is so far 11203 /* XXX Enabling the panel-fitter across page-flip is so far
11205 * untested on non-native modes, so ignore it for now. 11204 * untested on non-native modes, so ignore it for now.
@@ -11207,7 +11206,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
11207 */ 11206 */
11208 pf = 0; 11207 pf = 0;
11209 pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; 11208 pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
11210 intel_ring_emit(engine, pf | pipesrc); 11209 intel_ring_emit(ring, pf | pipesrc);
11211 11210
11212 return 0; 11211 return 0;
11213} 11212}
@@ -11219,7 +11218,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
11219 struct drm_i915_gem_request *req, 11218 struct drm_i915_gem_request *req,
11220 uint32_t flags) 11219 uint32_t flags)
11221{ 11220{
11222 struct intel_engine_cs *engine = req->engine; 11221 struct intel_ring *ring = req->ring;
11223 struct drm_i915_private *dev_priv = to_i915(dev); 11222 struct drm_i915_private *dev_priv = to_i915(dev);
11224 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 11223 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
11225 uint32_t pf, pipesrc; 11224 uint32_t pf, pipesrc;
@@ -11229,10 +11228,10 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
11229 if (ret) 11228 if (ret)
11230 return ret; 11229 return ret;
11231 11230
11232 intel_ring_emit(engine, MI_DISPLAY_FLIP | 11231 intel_ring_emit(ring, MI_DISPLAY_FLIP |
11233 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); 11232 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
11234 intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode); 11233 intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj));
11235 intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); 11234 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
11236 11235
11237 /* Contrary to the suggestions in the documentation, 11236 /* Contrary to the suggestions in the documentation,
11238 * "Enable Panel Fitter" does not seem to be required when page 11237 * "Enable Panel Fitter" does not seem to be required when page
@@ -11242,7 +11241,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
11242 */ 11241 */
11243 pf = 0; 11242 pf = 0;
11244 pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; 11243 pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
11245 intel_ring_emit(engine, pf | pipesrc); 11244 intel_ring_emit(ring, pf | pipesrc);
11246 11245
11247 return 0; 11246 return 0;
11248} 11247}
@@ -11254,7 +11253,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
11254 struct drm_i915_gem_request *req, 11253 struct drm_i915_gem_request *req,
11255 uint32_t flags) 11254 uint32_t flags)
11256{ 11255{
11257 struct intel_engine_cs *engine = req->engine; 11256 struct intel_ring *ring = req->ring;
11258 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 11257 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
11259 uint32_t plane_bit = 0; 11258 uint32_t plane_bit = 0;
11260 int len, ret; 11259 int len, ret;
@@ -11275,7 +11274,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
11275 } 11274 }
11276 11275
11277 len = 4; 11276 len = 4;
11278 if (engine->id == RCS) { 11277 if (req->engine->id == RCS) {
11279 len += 6; 11278 len += 6;
11280 /* 11279 /*
11281 * On Gen 8, SRM is now taking an extra dword to accommodate 11280 * On Gen 8, SRM is now taking an extra dword to accommodate
@@ -11313,30 +11312,30 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
11313 * for the RCS also doesn't appear to drop events. Setting the DERRMR 11312 * for the RCS also doesn't appear to drop events. Setting the DERRMR
11314 * to zero does lead to lockups within MI_DISPLAY_FLIP. 11313 * to zero does lead to lockups within MI_DISPLAY_FLIP.
11315 */ 11314 */
11316 if (engine->id == RCS) { 11315 if (req->engine->id == RCS) {
11317 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 11316 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
11318 intel_ring_emit_reg(engine, DERRMR); 11317 intel_ring_emit_reg(ring, DERRMR);
11319 intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE | 11318 intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
11320 DERRMR_PIPEB_PRI_FLIP_DONE | 11319 DERRMR_PIPEB_PRI_FLIP_DONE |
11321 DERRMR_PIPEC_PRI_FLIP_DONE)); 11320 DERRMR_PIPEC_PRI_FLIP_DONE));
11322 if (IS_GEN8(dev)) 11321 if (IS_GEN8(dev))
11323 intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 | 11322 intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 |
11324 MI_SRM_LRM_GLOBAL_GTT); 11323 MI_SRM_LRM_GLOBAL_GTT);
11325 else 11324 else
11326 intel_ring_emit(engine, MI_STORE_REGISTER_MEM | 11325 intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
11327 MI_SRM_LRM_GLOBAL_GTT); 11326 MI_SRM_LRM_GLOBAL_GTT);
11328 intel_ring_emit_reg(engine, DERRMR); 11327 intel_ring_emit_reg(ring, DERRMR);
11329 intel_ring_emit(engine, engine->scratch.gtt_offset + 256); 11328 intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256);
11330 if (IS_GEN8(dev)) { 11329 if (IS_GEN8(dev)) {
11331 intel_ring_emit(engine, 0); 11330 intel_ring_emit(ring, 0);
11332 intel_ring_emit(engine, MI_NOOP); 11331 intel_ring_emit(ring, MI_NOOP);
11333 } 11332 }
11334 } 11333 }
11335 11334
11336 intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit); 11335 intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
11337 intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode)); 11336 intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj));
11338 intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); 11337 intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
11339 intel_ring_emit(engine, (MI_NOOP)); 11338 intel_ring_emit(ring, (MI_NOOP));
11340 11339
11341 return 0; 11340 return 0;
11342} 11341}
@@ -11371,7 +11370,8 @@ static bool use_mmio_flip(struct intel_engine_cs *engine,
11371 if (resv && !reservation_object_test_signaled_rcu(resv, false)) 11370 if (resv && !reservation_object_test_signaled_rcu(resv, false))
11372 return true; 11371 return true;
11373 11372
11374 return engine != i915_gem_request_get_engine(obj->last_write_req); 11373 return engine != i915_gem_active_get_engine(&obj->last_write,
11374 &obj->base.dev->struct_mutex);
11375} 11375}
11376 11376
11377static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, 11377static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
@@ -11440,7 +11440,7 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc,
11440 11440
11441 dspcntr = I915_READ(reg); 11441 dspcntr = I915_READ(reg);
11442 11442
11443 if (obj->tiling_mode != I915_TILING_NONE) 11443 if (i915_gem_object_is_tiled(obj))
11444 dspcntr |= DISPPLANE_TILED; 11444 dspcntr |= DISPPLANE_TILED;
11445 else 11445 else
11446 dspcntr &= ~DISPPLANE_TILED; 11446 dspcntr &= ~DISPPLANE_TILED;
@@ -11463,9 +11463,9 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
11463 struct reservation_object *resv; 11463 struct reservation_object *resv;
11464 11464
11465 if (work->flip_queued_req) 11465 if (work->flip_queued_req)
11466 WARN_ON(__i915_wait_request(work->flip_queued_req, 11466 WARN_ON(i915_wait_request(work->flip_queued_req,
11467 false, NULL, 11467 false, NULL,
11468 &dev_priv->rps.mmioflips)); 11468 NO_WAITBOOST));
11469 11469
11470 /* For framebuffer backed by dmabuf, wait for fence */ 11470 /* For framebuffer backed by dmabuf, wait for fence */
11471 resv = i915_gem_object_get_dmabuf_resv(obj); 11471 resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -11576,7 +11576,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11576 struct intel_flip_work *work; 11576 struct intel_flip_work *work;
11577 struct intel_engine_cs *engine; 11577 struct intel_engine_cs *engine;
11578 bool mmio_flip; 11578 bool mmio_flip;
11579 struct drm_i915_gem_request *request = NULL; 11579 struct drm_i915_gem_request *request;
11580 int ret; 11580 int ret;
11581 11581
11582 /* 11582 /*
@@ -11642,7 +11642,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11642 11642
11643 /* Reference the objects for the scheduled work. */ 11643 /* Reference the objects for the scheduled work. */
11644 drm_framebuffer_reference(work->old_fb); 11644 drm_framebuffer_reference(work->old_fb);
11645 drm_gem_object_reference(&obj->base);
11646 11645
11647 crtc->primary->fb = fb; 11646 crtc->primary->fb = fb;
11648 update_state_fb(crtc->primary); 11647 update_state_fb(crtc->primary);
@@ -11650,7 +11649,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11650 intel_fbc_pre_update(intel_crtc, intel_crtc->config, 11649 intel_fbc_pre_update(intel_crtc, intel_crtc->config,
11651 to_intel_plane_state(primary->state)); 11650 to_intel_plane_state(primary->state));
11652 11651
11653 work->pending_flip_obj = obj; 11652 work->pending_flip_obj = i915_gem_object_get(obj);
11654 11653
11655 ret = i915_mutex_lock_interruptible(dev); 11654 ret = i915_mutex_lock_interruptible(dev);
11656 if (ret) 11655 if (ret)
@@ -11669,13 +11668,15 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11669 11668
11670 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { 11669 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
11671 engine = &dev_priv->engine[BCS]; 11670 engine = &dev_priv->engine[BCS];
11672 if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode) 11671 if (i915_gem_object_get_tiling(obj) !=
11672 i915_gem_object_get_tiling(intel_fb_obj(work->old_fb)))
11673 /* vlv: DISPLAY_FLIP fails to change tiling */ 11673 /* vlv: DISPLAY_FLIP fails to change tiling */
11674 engine = NULL; 11674 engine = NULL;
11675 } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { 11675 } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
11676 engine = &dev_priv->engine[BCS]; 11676 engine = &dev_priv->engine[BCS];
11677 } else if (INTEL_INFO(dev)->gen >= 7) { 11677 } else if (INTEL_INFO(dev)->gen >= 7) {
11678 engine = i915_gem_request_get_engine(obj->last_write_req); 11678 engine = i915_gem_active_get_engine(&obj->last_write,
11679 &obj->base.dev->struct_mutex);
11679 if (engine == NULL || engine->id != RCS) 11680 if (engine == NULL || engine->id != RCS)
11680 engine = &dev_priv->engine[BCS]; 11681 engine = &dev_priv->engine[BCS];
11681 } else { 11682 } else {
@@ -11684,22 +11685,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11684 11685
11685 mmio_flip = use_mmio_flip(engine, obj); 11686 mmio_flip = use_mmio_flip(engine, obj);
11686 11687
11687 /* When using CS flips, we want to emit semaphores between rings.
11688 * However, when using mmio flips we will create a task to do the
11689 * synchronisation, so all we want here is to pin the framebuffer
11690 * into the display plane and skip any waits.
11691 */
11692 if (!mmio_flip) {
11693 ret = i915_gem_object_sync(obj, engine, &request);
11694 if (!ret && !request) {
11695 request = i915_gem_request_alloc(engine, NULL);
11696 ret = PTR_ERR_OR_ZERO(request);
11697 }
11698
11699 if (ret)
11700 goto cleanup_pending;
11701 }
11702
11703 ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); 11688 ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
11704 if (ret) 11689 if (ret)
11705 goto cleanup_pending; 11690 goto cleanup_pending;
@@ -11712,19 +11697,28 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11712 if (mmio_flip) { 11697 if (mmio_flip) {
11713 INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); 11698 INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
11714 11699
11715 i915_gem_request_assign(&work->flip_queued_req, 11700 work->flip_queued_req = i915_gem_active_get(&obj->last_write,
11716 obj->last_write_req); 11701 &obj->base.dev->struct_mutex);
11717
11718 schedule_work(&work->mmio_work); 11702 schedule_work(&work->mmio_work);
11719 } else { 11703 } else {
11720 i915_gem_request_assign(&work->flip_queued_req, request); 11704 request = i915_gem_request_alloc(engine, engine->last_context);
11705 if (IS_ERR(request)) {
11706 ret = PTR_ERR(request);
11707 goto cleanup_unpin;
11708 }
11709
11710 ret = i915_gem_object_sync(obj, request);
11711 if (ret)
11712 goto cleanup_request;
11713
11721 ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, 11714 ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
11722 page_flip_flags); 11715 page_flip_flags);
11723 if (ret) 11716 if (ret)
11724 goto cleanup_unpin; 11717 goto cleanup_request;
11725 11718
11726 intel_mark_page_flip_active(intel_crtc, work); 11719 intel_mark_page_flip_active(intel_crtc, work);
11727 11720
11721 work->flip_queued_req = i915_gem_request_get(request);
11728 i915_add_request_no_flush(request); 11722 i915_add_request_no_flush(request);
11729 } 11723 }
11730 11724
@@ -11732,25 +11726,25 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
11732 to_intel_plane(primary)->frontbuffer_bit); 11726 to_intel_plane(primary)->frontbuffer_bit);
11733 mutex_unlock(&dev->struct_mutex); 11727 mutex_unlock(&dev->struct_mutex);
11734 11728
11735 intel_frontbuffer_flip_prepare(dev, 11729 intel_frontbuffer_flip_prepare(to_i915(dev),
11736 to_intel_plane(primary)->frontbuffer_bit); 11730 to_intel_plane(primary)->frontbuffer_bit);
11737 11731
11738 trace_i915_flip_request(intel_crtc->plane, obj); 11732 trace_i915_flip_request(intel_crtc->plane, obj);
11739 11733
11740 return 0; 11734 return 0;
11741 11735
11736cleanup_request:
11737 i915_add_request_no_flush(request);
11742cleanup_unpin: 11738cleanup_unpin:
11743 intel_unpin_fb_obj(fb, crtc->primary->state->rotation); 11739 intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
11744cleanup_pending: 11740cleanup_pending:
11745 if (!IS_ERR_OR_NULL(request))
11746 i915_add_request_no_flush(request);
11747 atomic_dec(&intel_crtc->unpin_work_count); 11741 atomic_dec(&intel_crtc->unpin_work_count);
11748 mutex_unlock(&dev->struct_mutex); 11742 mutex_unlock(&dev->struct_mutex);
11749cleanup: 11743cleanup:
11750 crtc->primary->fb = old_fb; 11744 crtc->primary->fb = old_fb;
11751 update_state_fb(crtc->primary); 11745 update_state_fb(crtc->primary);
11752 11746
11753 drm_gem_object_unreference_unlocked(&obj->base); 11747 i915_gem_object_put_unlocked(obj);
11754 drm_framebuffer_unreference(work->old_fb); 11748 drm_framebuffer_unreference(work->old_fb);
11755 11749
11756 spin_lock_irq(&dev->event_lock); 11750 spin_lock_irq(&dev->event_lock);
@@ -12298,6 +12292,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state)
12298 struct drm_device *dev = state->dev; 12292 struct drm_device *dev = state->dev;
12299 struct drm_connector *connector; 12293 struct drm_connector *connector;
12300 unsigned int used_ports = 0; 12294 unsigned int used_ports = 0;
12295 unsigned int used_mst_ports = 0;
12301 12296
12302 /* 12297 /*
12303 * Walk the connector list instead of the encoder 12298 * Walk the connector list instead of the encoder
@@ -12334,11 +12329,20 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state)
12334 return false; 12329 return false;
12335 12330
12336 used_ports |= port_mask; 12331 used_ports |= port_mask;
12332 break;
12333 case INTEL_OUTPUT_DP_MST:
12334 used_mst_ports |=
12335 1 << enc_to_mst(&encoder->base)->primary->port;
12336 break;
12337 default: 12337 default:
12338 break; 12338 break;
12339 } 12339 }
12340 } 12340 }
12341 12341
12342 /* can't mix MST and SST/HDMI on the same port */
12343 if (used_ports & used_mst_ports)
12344 return false;
12345
12342 return true; 12346 return true;
12343} 12347}
12344 12348
@@ -13506,8 +13510,8 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
13506 if (!intel_plane_state->wait_req) 13510 if (!intel_plane_state->wait_req)
13507 continue; 13511 continue;
13508 13512
13509 ret = __i915_wait_request(intel_plane_state->wait_req, 13513 ret = i915_wait_request(intel_plane_state->wait_req,
13510 true, NULL, NULL); 13514 true, NULL, NULL);
13511 if (ret) { 13515 if (ret) {
13512 /* Any hang should be swallowed by the wait */ 13516 /* Any hang should be swallowed by the wait */
13513 WARN_ON(ret == -EIO); 13517 WARN_ON(ret == -EIO);
@@ -13619,8 +13623,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
13619 if (!intel_plane_state->wait_req) 13623 if (!intel_plane_state->wait_req)
13620 continue; 13624 continue;
13621 13625
13622 ret = __i915_wait_request(intel_plane_state->wait_req, 13626 ret = i915_wait_request(intel_plane_state->wait_req,
13623 true, NULL, NULL); 13627 true, NULL, NULL);
13624 /* EIO should be eaten, and we can't get interrupted in the 13628 /* EIO should be eaten, and we can't get interrupted in the
13625 * worker, and blocking commits have waited already. */ 13629 * worker, and blocking commits have waited already. */
13626 WARN_ON(ret); 13630 WARN_ON(ret);
@@ -13797,19 +13801,12 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state)
13797{ 13801{
13798 struct drm_plane_state *old_plane_state; 13802 struct drm_plane_state *old_plane_state;
13799 struct drm_plane *plane; 13803 struct drm_plane *plane;
13800 struct drm_i915_gem_object *obj, *old_obj;
13801 struct intel_plane *intel_plane;
13802 int i; 13804 int i;
13803 13805
13804 mutex_lock(&state->dev->struct_mutex); 13806 for_each_plane_in_state(state, plane, old_plane_state, i)
13805 for_each_plane_in_state(state, plane, old_plane_state, i) { 13807 i915_gem_track_fb(intel_fb_obj(old_plane_state->fb),
13806 obj = intel_fb_obj(plane->state->fb); 13808 intel_fb_obj(plane->state->fb),
13807 old_obj = intel_fb_obj(old_plane_state->fb); 13809 to_intel_plane(plane)->frontbuffer_bit);
13808 intel_plane = to_intel_plane(plane);
13809
13810 i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
13811 }
13812 mutex_unlock(&state->dev->struct_mutex);
13813} 13810}
13814 13811
13815/** 13812/**
@@ -14038,11 +14035,9 @@ intel_prepare_plane_fb(struct drm_plane *plane,
14038 } 14035 }
14039 14036
14040 if (ret == 0) { 14037 if (ret == 0) {
14041 struct intel_plane_state *plane_state = 14038 to_intel_plane_state(new_state)->wait_req =
14042 to_intel_plane_state(new_state); 14039 i915_gem_active_get(&obj->last_write,
14043 14040 &obj->base.dev->struct_mutex);
14044 i915_gem_request_assign(&plane_state->wait_req,
14045 obj->last_write_req);
14046 } 14041 }
14047 14042
14048 return ret; 14043 return ret;
@@ -14063,6 +14058,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
14063{ 14058{
14064 struct drm_device *dev = plane->dev; 14059 struct drm_device *dev = plane->dev;
14065 struct intel_plane_state *old_intel_state; 14060 struct intel_plane_state *old_intel_state;
14061 struct intel_plane_state *intel_state = to_intel_plane_state(plane->state);
14066 struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); 14062 struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
14067 struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); 14063 struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
14068 14064
@@ -14075,6 +14071,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
14075 !INTEL_INFO(dev)->cursor_needs_physical)) 14071 !INTEL_INFO(dev)->cursor_needs_physical))
14076 intel_unpin_fb_obj(old_state->fb, old_state->rotation); 14072 intel_unpin_fb_obj(old_state->fb, old_state->rotation);
14077 14073
14074 i915_gem_request_assign(&intel_state->wait_req, NULL);
14078 i915_gem_request_assign(&old_intel_state->wait_req, NULL); 14075 i915_gem_request_assign(&old_intel_state->wait_req, NULL);
14079} 14076}
14080 14077
@@ -14831,7 +14828,7 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
14831 drm_framebuffer_cleanup(fb); 14828 drm_framebuffer_cleanup(fb);
14832 mutex_lock(&dev->struct_mutex); 14829 mutex_lock(&dev->struct_mutex);
14833 WARN_ON(!intel_fb->obj->framebuffer_references--); 14830 WARN_ON(!intel_fb->obj->framebuffer_references--);
14834 drm_gem_object_unreference(&intel_fb->obj->base); 14831 i915_gem_object_put(intel_fb->obj);
14835 mutex_unlock(&dev->struct_mutex); 14832 mutex_unlock(&dev->struct_mutex);
14836 kfree(intel_fb); 14833 kfree(intel_fb);
14837} 14834}
@@ -14920,15 +14917,15 @@ static int intel_framebuffer_init(struct drm_device *dev,
14920 if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { 14917 if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
14921 /* Enforce that fb modifier and tiling mode match, but only for 14918 /* Enforce that fb modifier and tiling mode match, but only for
14922 * X-tiled. This is needed for FBC. */ 14919 * X-tiled. This is needed for FBC. */
14923 if (!!(obj->tiling_mode == I915_TILING_X) != 14920 if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) !=
14924 !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) { 14921 !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) {
14925 DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); 14922 DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
14926 return -EINVAL; 14923 return -EINVAL;
14927 } 14924 }
14928 } else { 14925 } else {
14929 if (obj->tiling_mode == I915_TILING_X) 14926 if (i915_gem_object_get_tiling(obj) == I915_TILING_X)
14930 mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; 14927 mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
14931 else if (obj->tiling_mode == I915_TILING_Y) { 14928 else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) {
14932 DRM_DEBUG("No Y tiling for legacy addfb\n"); 14929 DRM_DEBUG("No Y tiling for legacy addfb\n");
14933 return -EINVAL; 14930 return -EINVAL;
14934 } 14931 }
@@ -14972,9 +14969,10 @@ static int intel_framebuffer_init(struct drm_device *dev,
14972 } 14969 }
14973 14970
14974 if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED && 14971 if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED &&
14975 mode_cmd->pitches[0] != obj->stride) { 14972 mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) {
14976 DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", 14973 DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
14977 mode_cmd->pitches[0], obj->stride); 14974 mode_cmd->pitches[0],
14975 i915_gem_object_get_stride(obj));
14978 return -EINVAL; 14976 return -EINVAL;
14979 } 14977 }
14980 14978
@@ -15068,13 +15066,13 @@ intel_user_framebuffer_create(struct drm_device *dev,
15068 struct drm_i915_gem_object *obj; 15066 struct drm_i915_gem_object *obj;
15069 struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; 15067 struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
15070 15068
15071 obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0])); 15069 obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
15072 if (&obj->base == NULL) 15070 if (!obj)
15073 return ERR_PTR(-ENOENT); 15071 return ERR_PTR(-ENOENT);
15074 15072
15075 fb = intel_framebuffer_create(dev, &mode_cmd, obj); 15073 fb = intel_framebuffer_create(dev, &mode_cmd, obj);
15076 if (IS_ERR(fb)) 15074 if (IS_ERR(fb))
15077 drm_gem_object_unreference_unlocked(&obj->base); 15075 i915_gem_object_put_unlocked(obj);
15078 15076
15079 return fb; 15077 return fb;
15080} 15078}
@@ -15482,7 +15480,6 @@ void intel_modeset_init_hw(struct drm_device *dev)
15482 dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; 15480 dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
15483 15481
15484 intel_init_clock_gating(dev); 15482 intel_init_clock_gating(dev);
15485 intel_enable_gt_powersave(dev_priv);
15486} 15483}
15487 15484
15488/* 15485/*
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 21b04c3eda41..8fe2afa5439e 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1041,10 +1041,10 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
1041 if (WARN_ON(txsize > 20)) 1041 if (WARN_ON(txsize > 20))
1042 return -E2BIG; 1042 return -E2BIG;
1043 1043
1044 WARN_ON(!msg->buffer != !msg->size);
1045
1044 if (msg->buffer) 1046 if (msg->buffer)
1045 memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); 1047 memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size);
1046 else
1047 WARN_ON(msg->size);
1048 1048
1049 ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); 1049 ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize);
1050 if (ret > 0) { 1050 if (ret > 0) {
@@ -1447,7 +1447,7 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp)
1447 if (WARN_ON(len <= 0)) 1447 if (WARN_ON(len <= 0))
1448 return 162000; 1448 return 162000;
1449 1449
1450 return rates[rate_to_index(0, rates) - 1]; 1450 return rates[len - 1];
1451} 1451}
1452 1452
1453int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) 1453int intel_dp_rate_select(struct intel_dp *intel_dp, int rate)
@@ -1651,6 +1651,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp,
1651{ 1651{
1652 intel_dp->link_rate = pipe_config->port_clock; 1652 intel_dp->link_rate = pipe_config->port_clock;
1653 intel_dp->lane_count = pipe_config->lane_count; 1653 intel_dp->lane_count = pipe_config->lane_count;
1654 intel_dp->link_mst = intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST);
1654} 1655}
1655 1656
1656static void intel_dp_prepare(struct intel_encoder *encoder) 1657static void intel_dp_prepare(struct intel_encoder *encoder)
@@ -3395,84 +3396,67 @@ intel_dp_link_down(struct intel_dp *intel_dp)
3395} 3396}
3396 3397
3397static bool 3398static bool
3398intel_dp_get_dpcd(struct intel_dp *intel_dp) 3399intel_dp_read_dpcd(struct intel_dp *intel_dp)
3399{ 3400{
3400 struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
3401 struct drm_device *dev = dig_port->base.base.dev;
3402 struct drm_i915_private *dev_priv = to_i915(dev);
3403
3404 if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd, 3401 if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
3405 sizeof(intel_dp->dpcd)) < 0) 3402 sizeof(intel_dp->dpcd)) < 0)
3406 return false; /* aux transfer failed */ 3403 return false; /* aux transfer failed */
3407 3404
3408 DRM_DEBUG_KMS("DPCD: %*ph\n", (int) sizeof(intel_dp->dpcd), intel_dp->dpcd); 3405 DRM_DEBUG_KMS("DPCD: %*ph\n", (int) sizeof(intel_dp->dpcd), intel_dp->dpcd);
3409 3406
3410 if (intel_dp->dpcd[DP_DPCD_REV] == 0) 3407 return intel_dp->dpcd[DP_DPCD_REV] != 0;
3411 return false; /* DPCD not present */ 3408}
3412 3409
3413 if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, 3410static bool
3414 &intel_dp->sink_count, 1) < 0) 3411intel_edp_init_dpcd(struct intel_dp *intel_dp)
3415 return false; 3412{
3413 struct drm_i915_private *dev_priv =
3414 to_i915(dp_to_dig_port(intel_dp)->base.base.dev);
3416 3415
3417 /* 3416 /* this function is meant to be called only once */
3418 * Sink count can change between short pulse hpd hence 3417 WARN_ON(intel_dp->dpcd[DP_DPCD_REV] != 0);
3419 * a member variable in intel_dp will track any changes
3420 * between short pulse interrupts.
3421 */
3422 intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count);
3423 3418
3424 /* 3419 if (!intel_dp_read_dpcd(intel_dp))
3425 * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that
3426 * a dongle is present but no display. Unless we require to know
3427 * if a dongle is present or not, we don't need to update
3428 * downstream port information. So, an early return here saves
3429 * time from performing other operations which are not required.
3430 */
3431 if (!is_edp(intel_dp) && !intel_dp->sink_count)
3432 return false; 3420 return false;
3433 3421
3434 /* Check if the panel supports PSR */ 3422 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
3435 memset(intel_dp->psr_dpcd, 0, sizeof(intel_dp->psr_dpcd)); 3423 dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
3436 if (is_edp(intel_dp)) { 3424 DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
3437 drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT,
3438 intel_dp->psr_dpcd,
3439 sizeof(intel_dp->psr_dpcd));
3440 if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) {
3441 dev_priv->psr.sink_support = true;
3442 DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
3443 }
3444
3445 if (INTEL_INFO(dev)->gen >= 9 &&
3446 (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) {
3447 uint8_t frame_sync_cap;
3448
3449 dev_priv->psr.sink_support = true;
3450 drm_dp_dpcd_read(&intel_dp->aux,
3451 DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
3452 &frame_sync_cap, 1);
3453 dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
3454 /* PSR2 needs frame sync as well */
3455 dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
3456 DRM_DEBUG_KMS("PSR2 %s on sink",
3457 dev_priv->psr.psr2_support ? "supported" : "not supported");
3458 }
3459 3425
3460 /* Read the eDP Display control capabilities registers */ 3426 /* Check if the panel supports PSR */
3461 memset(intel_dp->edp_dpcd, 0, sizeof(intel_dp->edp_dpcd)); 3427 drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT,
3462 if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && 3428 intel_dp->psr_dpcd,
3463 (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, 3429 sizeof(intel_dp->psr_dpcd));
3464 intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == 3430 if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) {
3465 sizeof(intel_dp->edp_dpcd))) 3431 dev_priv->psr.sink_support = true;
3466 DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), 3432 DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
3467 intel_dp->edp_dpcd); 3433 }
3468 } 3434
3469 3435 if (INTEL_GEN(dev_priv) >= 9 &&
3470 DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", 3436 (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) {
3471 yesno(intel_dp_source_supports_hbr2(intel_dp)), 3437 uint8_t frame_sync_cap;
3472 yesno(drm_dp_tps3_supported(intel_dp->dpcd))); 3438
3439 dev_priv->psr.sink_support = true;
3440 drm_dp_dpcd_read(&intel_dp->aux,
3441 DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
3442 &frame_sync_cap, 1);
3443 dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
3444 /* PSR2 needs frame sync as well */
3445 dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
3446 DRM_DEBUG_KMS("PSR2 %s on sink",
3447 dev_priv->psr.psr2_support ? "supported" : "not supported");
3448 }
3449
3450 /* Read the eDP Display control capabilities registers */
3451 if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) &&
3452 drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV,
3453 intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd) ==
3454 sizeof(intel_dp->edp_dpcd)))
3455 DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd),
3456 intel_dp->edp_dpcd);
3473 3457
3474 /* Intermediate frequency support */ 3458 /* Intermediate frequency support */
3475 if (is_edp(intel_dp) && (intel_dp->edp_dpcd[0] >= 0x03)) { /* eDp v1.4 or higher */ 3459 if (intel_dp->edp_dpcd[0] >= 0x03) { /* eDp v1.4 or higher */
3476 __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; 3460 __le16 sink_rates[DP_MAX_SUPPORTED_RATES];
3477 int i; 3461 int i;
3478 3462
@@ -3491,7 +3475,36 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
3491 intel_dp->num_sink_rates = i; 3475 intel_dp->num_sink_rates = i;
3492 } 3476 }
3493 3477
3494 intel_dp_print_rates(intel_dp); 3478 return true;
3479}
3480
3481
3482static bool
3483intel_dp_get_dpcd(struct intel_dp *intel_dp)
3484{
3485 if (!intel_dp_read_dpcd(intel_dp))
3486 return false;
3487
3488 if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT,
3489 &intel_dp->sink_count, 1) < 0)
3490 return false;
3491
3492 /*
3493 * Sink count can change between short pulse hpd hence
3494 * a member variable in intel_dp will track any changes
3495 * between short pulse interrupts.
3496 */
3497 intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count);
3498
3499 /*
3500 * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that
3501 * a dongle is present but no display. Unless we require to know
3502 * if a dongle is present or not, we don't need to update
3503 * downstream port information. So, an early return here saves
3504 * time from performing other operations which are not required.
3505 */
3506 if (!is_edp(intel_dp) && !intel_dp->sink_count)
3507 return false;
3495 3508
3496 if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & 3509 if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
3497 DP_DWN_STRM_PORT_PRESENT)) 3510 DP_DWN_STRM_PORT_PRESENT))
@@ -3526,7 +3539,7 @@ intel_dp_probe_oui(struct intel_dp *intel_dp)
3526} 3539}
3527 3540
3528static bool 3541static bool
3529intel_dp_probe_mst(struct intel_dp *intel_dp) 3542intel_dp_can_mst(struct intel_dp *intel_dp)
3530{ 3543{
3531 u8 buf[1]; 3544 u8 buf[1];
3532 3545
@@ -3539,18 +3552,30 @@ intel_dp_probe_mst(struct intel_dp *intel_dp)
3539 if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) 3552 if (intel_dp->dpcd[DP_DPCD_REV] < 0x12)
3540 return false; 3553 return false;
3541 3554
3542 if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1)) { 3555 if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1)
3543 if (buf[0] & DP_MST_CAP) { 3556 return false;
3544 DRM_DEBUG_KMS("Sink is MST capable\n");
3545 intel_dp->is_mst = true;
3546 } else {
3547 DRM_DEBUG_KMS("Sink is not MST capable\n");
3548 intel_dp->is_mst = false;
3549 }
3550 }
3551 3557
3552 drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); 3558 return buf[0] & DP_MST_CAP;
3553 return intel_dp->is_mst; 3559}
3560
3561static void
3562intel_dp_configure_mst(struct intel_dp *intel_dp)
3563{
3564 if (!i915.enable_dp_mst)
3565 return;
3566
3567 if (!intel_dp->can_mst)
3568 return;
3569
3570 intel_dp->is_mst = intel_dp_can_mst(intel_dp);
3571
3572 if (intel_dp->is_mst)
3573 DRM_DEBUG_KMS("Sink is MST capable\n");
3574 else
3575 DRM_DEBUG_KMS("Sink is not MST capable\n");
3576
3577 drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
3578 intel_dp->is_mst);
3554} 3579}
3555 3580
3556static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) 3581static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
@@ -3909,7 +3934,7 @@ static bool
3909intel_dp_short_pulse(struct intel_dp *intel_dp) 3934intel_dp_short_pulse(struct intel_dp *intel_dp)
3910{ 3935{
3911 struct drm_device *dev = intel_dp_to_dev(intel_dp); 3936 struct drm_device *dev = intel_dp_to_dev(intel_dp);
3912 u8 sink_irq_vector; 3937 u8 sink_irq_vector = 0;
3913 u8 old_sink_count = intel_dp->sink_count; 3938 u8 old_sink_count = intel_dp->sink_count;
3914 bool ret; 3939 bool ret;
3915 3940
@@ -3936,7 +3961,8 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
3936 3961
3937 /* Try to read the source of the interrupt */ 3962 /* Try to read the source of the interrupt */
3938 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && 3963 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
3939 intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { 3964 intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) &&
3965 sink_irq_vector != 0) {
3940 /* Clear interrupt source */ 3966 /* Clear interrupt source */
3941 drm_dp_dpcd_writeb(&intel_dp->aux, 3967 drm_dp_dpcd_writeb(&intel_dp->aux,
3942 DP_DEVICE_SERVICE_IRQ_VECTOR, 3968 DP_DEVICE_SERVICE_IRQ_VECTOR,
@@ -3980,6 +4006,9 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp)
3980 connector_status_connected : connector_status_disconnected; 4006 connector_status_connected : connector_status_disconnected;
3981 } 4007 }
3982 4008
4009 if (intel_dp_can_mst(intel_dp))
4010 return connector_status_connected;
4011
3983 /* If no HPD, poke DDC gently */ 4012 /* If no HPD, poke DDC gently */
3984 if (drm_probe_ddc(&intel_dp->aux.ddc)) 4013 if (drm_probe_ddc(&intel_dp->aux.ddc))
3985 return connector_status_connected; 4014 return connector_status_connected;
@@ -4217,8 +4246,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
4217 struct drm_device *dev = connector->dev; 4246 struct drm_device *dev = connector->dev;
4218 enum drm_connector_status status; 4247 enum drm_connector_status status;
4219 enum intel_display_power_domain power_domain; 4248 enum intel_display_power_domain power_domain;
4220 bool ret; 4249 u8 sink_irq_vector = 0;
4221 u8 sink_irq_vector;
4222 4250
4223 power_domain = intel_display_port_aux_power_domain(intel_encoder); 4251 power_domain = intel_display_port_aux_power_domain(intel_encoder);
4224 intel_display_power_get(to_i915(dev), power_domain); 4252 intel_display_power_get(to_i915(dev), power_domain);
@@ -4252,10 +4280,17 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
4252 if (intel_encoder->type != INTEL_OUTPUT_EDP) 4280 if (intel_encoder->type != INTEL_OUTPUT_EDP)
4253 intel_encoder->type = INTEL_OUTPUT_DP; 4281 intel_encoder->type = INTEL_OUTPUT_DP;
4254 4282
4283 DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n",
4284 yesno(intel_dp_source_supports_hbr2(intel_dp)),
4285 yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
4286
4287 intel_dp_print_rates(intel_dp);
4288
4255 intel_dp_probe_oui(intel_dp); 4289 intel_dp_probe_oui(intel_dp);
4256 4290
4257 ret = intel_dp_probe_mst(intel_dp); 4291 intel_dp_configure_mst(intel_dp);
4258 if (ret) { 4292
4293 if (intel_dp->is_mst) {
4259 /* 4294 /*
4260 * If we are in MST mode then this connector 4295 * If we are in MST mode then this connector
4261 * won't appear connected or have anything 4296 * won't appear connected or have anything
@@ -4290,7 +4325,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
4290 4325
4291 /* Try to read the source of the interrupt */ 4326 /* Try to read the source of the interrupt */
4292 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && 4327 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
4293 intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { 4328 intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) &&
4329 sink_irq_vector != 0) {
4294 /* Clear interrupt source */ 4330 /* Clear interrupt source */
4295 drm_dp_dpcd_writeb(&intel_dp->aux, 4331 drm_dp_dpcd_writeb(&intel_dp->aux,
4296 DP_DEVICE_SERVICE_IRQ_VECTOR, 4332 DP_DEVICE_SERVICE_IRQ_VECTOR,
@@ -5186,7 +5222,7 @@ unlock:
5186 5222
5187/** 5223/**
5188 * intel_edp_drrs_invalidate - Disable Idleness DRRS 5224 * intel_edp_drrs_invalidate - Disable Idleness DRRS
5189 * @dev: DRM device 5225 * @dev_priv: i915 device
5190 * @frontbuffer_bits: frontbuffer plane tracking bits 5226 * @frontbuffer_bits: frontbuffer plane tracking bits
5191 * 5227 *
5192 * This function gets called everytime rendering on the given planes start. 5228 * This function gets called everytime rendering on the given planes start.
@@ -5194,10 +5230,9 @@ unlock:
5194 * 5230 *
5195 * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. 5231 * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
5196 */ 5232 */
5197void intel_edp_drrs_invalidate(struct drm_device *dev, 5233void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
5198 unsigned frontbuffer_bits) 5234 unsigned int frontbuffer_bits)
5199{ 5235{
5200 struct drm_i915_private *dev_priv = to_i915(dev);
5201 struct drm_crtc *crtc; 5236 struct drm_crtc *crtc;
5202 enum pipe pipe; 5237 enum pipe pipe;
5203 5238
@@ -5229,7 +5264,7 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
5229 5264
5230/** 5265/**
5231 * intel_edp_drrs_flush - Restart Idleness DRRS 5266 * intel_edp_drrs_flush - Restart Idleness DRRS
5232 * @dev: DRM device 5267 * @dev_priv: i915 device
5233 * @frontbuffer_bits: frontbuffer plane tracking bits 5268 * @frontbuffer_bits: frontbuffer plane tracking bits
5234 * 5269 *
5235 * This function gets called every time rendering on the given planes has 5270 * This function gets called every time rendering on the given planes has
@@ -5239,10 +5274,9 @@ void intel_edp_drrs_invalidate(struct drm_device *dev,
5239 * 5274 *
5240 * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. 5275 * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
5241 */ 5276 */
5242void intel_edp_drrs_flush(struct drm_device *dev, 5277void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
5243 unsigned frontbuffer_bits) 5278 unsigned int frontbuffer_bits)
5244{ 5279{
5245 struct drm_i915_private *dev_priv = to_i915(dev);
5246 struct drm_crtc *crtc; 5280 struct drm_crtc *crtc;
5247 enum pipe pipe; 5281 enum pipe pipe;
5248 5282
@@ -5413,14 +5447,9 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
5413 pps_unlock(intel_dp); 5447 pps_unlock(intel_dp);
5414 5448
5415 /* Cache DPCD and EDID for edp. */ 5449 /* Cache DPCD and EDID for edp. */
5416 has_dpcd = intel_dp_get_dpcd(intel_dp); 5450 has_dpcd = intel_edp_init_dpcd(intel_dp);
5417 5451
5418 if (has_dpcd) { 5452 if (!has_dpcd) {
5419 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
5420 dev_priv->no_aux_handshake =
5421 intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
5422 DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
5423 } else {
5424 /* if this fails, presume the device is a ghost */ 5453 /* if this fails, presume the device is a ghost */
5425 DRM_INFO("failed to retrieve link info, disabling eDP\n"); 5454 DRM_INFO("failed to retrieve link info, disabling eDP\n");
5426 goto out_vdd_off; 5455 goto out_vdd_off;
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 68a005d729e9..629337dbca3d 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -170,10 +170,10 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder)
170 intel_mst->connector = found; 170 intel_mst->connector = found;
171 171
172 if (intel_dp->active_mst_links == 0) { 172 if (intel_dp->active_mst_links == 0) {
173 intel_prepare_ddi_buffer(&intel_dig_port->base);
174
175 intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config); 173 intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config);
176 174
175 intel_prepare_dp_ddi_buffers(&intel_dig_port->base);
176
177 intel_dp_set_link_params(intel_dp, intel_crtc->config); 177 intel_dp_set_link_params(intel_dp, intel_crtc->config);
178 178
179 intel_ddi_init_dp_buf_reg(&intel_dig_port->base); 179 intel_ddi_init_dp_buf_reg(&intel_dig_port->base);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 9c59521afb18..c29a429cbc45 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -849,6 +849,7 @@ struct intel_dp {
849 int link_rate; 849 int link_rate;
850 uint8_t lane_count; 850 uint8_t lane_count;
851 uint8_t sink_count; 851 uint8_t sink_count;
852 bool link_mst;
852 bool has_audio; 853 bool has_audio;
853 bool detect_done; 854 bool detect_done;
854 enum hdmi_force_audio force_audio; 855 enum hdmi_force_audio force_audio;
@@ -1104,7 +1105,7 @@ void intel_crt_reset(struct drm_encoder *encoder);
1104/* intel_ddi.c */ 1105/* intel_ddi.c */
1105void intel_ddi_clk_select(struct intel_encoder *encoder, 1106void intel_ddi_clk_select(struct intel_encoder *encoder,
1106 const struct intel_crtc_state *pipe_config); 1107 const struct intel_crtc_state *pipe_config);
1107void intel_prepare_ddi_buffer(struct intel_encoder *encoder); 1108void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder);
1108void hsw_fdi_link_train(struct drm_crtc *crtc); 1109void hsw_fdi_link_train(struct drm_crtc *crtc);
1109void intel_ddi_init(struct drm_device *dev, enum port port); 1110void intel_ddi_init(struct drm_device *dev, enum port port);
1110enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); 1111enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder);
@@ -1131,21 +1132,10 @@ void intel_ddi_clock_get(struct intel_encoder *encoder,
1131void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); 1132void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
1132uint32_t ddi_signal_levels(struct intel_dp *intel_dp); 1133uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
1133 1134
1134/* intel_frontbuffer.c */
1135void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
1136 enum fb_op_origin origin);
1137void intel_frontbuffer_flip_prepare(struct drm_device *dev,
1138 unsigned frontbuffer_bits);
1139void intel_frontbuffer_flip_complete(struct drm_device *dev,
1140 unsigned frontbuffer_bits);
1141void intel_frontbuffer_flip(struct drm_device *dev,
1142 unsigned frontbuffer_bits);
1143unsigned int intel_fb_align_height(struct drm_device *dev, 1135unsigned int intel_fb_align_height(struct drm_device *dev,
1144 unsigned int height, 1136 unsigned int height,
1145 uint32_t pixel_format, 1137 uint32_t pixel_format,
1146 uint64_t fb_format_modifier); 1138 uint64_t fb_format_modifier);
1147void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire,
1148 enum fb_op_origin origin);
1149u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, 1139u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
1150 uint64_t fb_modifier, uint32_t pixel_format); 1140 uint64_t fb_modifier, uint32_t pixel_format);
1151 1141
@@ -1381,11 +1371,12 @@ uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes);
1381void intel_plane_destroy(struct drm_plane *plane); 1371void intel_plane_destroy(struct drm_plane *plane);
1382void intel_edp_drrs_enable(struct intel_dp *intel_dp); 1372void intel_edp_drrs_enable(struct intel_dp *intel_dp);
1383void intel_edp_drrs_disable(struct intel_dp *intel_dp); 1373void intel_edp_drrs_disable(struct intel_dp *intel_dp);
1384void intel_edp_drrs_invalidate(struct drm_device *dev, 1374void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
1385 unsigned frontbuffer_bits); 1375 unsigned int frontbuffer_bits);
1386void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits); 1376void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
1377 unsigned int frontbuffer_bits);
1387bool intel_digital_port_connected(struct drm_i915_private *dev_priv, 1378bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
1388 struct intel_digital_port *port); 1379 struct intel_digital_port *port);
1389 1380
1390void 1381void
1391intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, 1382intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
@@ -1558,13 +1549,13 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con
1558/* intel_psr.c */ 1549/* intel_psr.c */
1559void intel_psr_enable(struct intel_dp *intel_dp); 1550void intel_psr_enable(struct intel_dp *intel_dp);
1560void intel_psr_disable(struct intel_dp *intel_dp); 1551void intel_psr_disable(struct intel_dp *intel_dp);
1561void intel_psr_invalidate(struct drm_device *dev, 1552void intel_psr_invalidate(struct drm_i915_private *dev_priv,
1562 unsigned frontbuffer_bits); 1553 unsigned frontbuffer_bits);
1563void intel_psr_flush(struct drm_device *dev, 1554void intel_psr_flush(struct drm_i915_private *dev_priv,
1564 unsigned frontbuffer_bits, 1555 unsigned frontbuffer_bits,
1565 enum fb_op_origin origin); 1556 enum fb_op_origin origin);
1566void intel_psr_init(struct drm_device *dev); 1557void intel_psr_init(struct drm_device *dev);
1567void intel_psr_single_frame_update(struct drm_device *dev, 1558void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
1568 unsigned frontbuffer_bits); 1559 unsigned frontbuffer_bits);
1569 1560
1570/* intel_runtime_pm.c */ 1561/* intel_runtime_pm.c */
@@ -1664,13 +1655,6 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
1664 atomic_dec(&dev_priv->pm.wakeref_count); 1655 atomic_dec(&dev_priv->pm.wakeref_count);
1665} 1656}
1666 1657
1667/* TODO: convert users of these to rely instead on proper RPM refcounting */
1668#define DISABLE_RPM_WAKEREF_ASSERTS(dev_priv) \
1669 disable_rpm_wakeref_asserts(dev_priv)
1670
1671#define ENABLE_RPM_WAKEREF_ASSERTS(dev_priv) \
1672 enable_rpm_wakeref_asserts(dev_priv)
1673
1674void intel_runtime_pm_get(struct drm_i915_private *dev_priv); 1658void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
1675bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv); 1659bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
1676void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); 1660void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
@@ -1696,11 +1680,11 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
1696void intel_gpu_ips_teardown(void); 1680void intel_gpu_ips_teardown(void);
1697void intel_init_gt_powersave(struct drm_i915_private *dev_priv); 1681void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
1698void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); 1682void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
1683void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
1699void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); 1684void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
1685void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv);
1700void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); 1686void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
1701void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); 1687void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
1702void intel_reset_gt_powersave(struct drm_i915_private *dev_priv);
1703void gen6_update_ring_freq(struct drm_i915_private *dev_priv);
1704void gen6_rps_busy(struct drm_i915_private *dev_priv); 1688void gen6_rps_busy(struct drm_i915_private *dev_priv);
1705void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); 1689void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
1706void gen6_rps_idle(struct drm_i915_private *dev_priv); 1690void gen6_rps_idle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
new file mode 100644
index 000000000000..e9b301ae2d0c
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -0,0 +1,231 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "i915_drv.h"
26#include "intel_ringbuffer.h"
27#include "intel_lrc.h"
28
29static const struct engine_info {
30 const char *name;
31 unsigned exec_id;
32 unsigned guc_id;
33 u32 mmio_base;
34 unsigned irq_shift;
35 int (*init_legacy)(struct intel_engine_cs *engine);
36 int (*init_execlists)(struct intel_engine_cs *engine);
37} intel_engines[] = {
38 [RCS] = {
39 .name = "render ring",
40 .exec_id = I915_EXEC_RENDER,
41 .guc_id = GUC_RENDER_ENGINE,
42 .mmio_base = RENDER_RING_BASE,
43 .irq_shift = GEN8_RCS_IRQ_SHIFT,
44 .init_execlists = logical_render_ring_init,
45 .init_legacy = intel_init_render_ring_buffer,
46 },
47 [BCS] = {
48 .name = "blitter ring",
49 .exec_id = I915_EXEC_BLT,
50 .guc_id = GUC_BLITTER_ENGINE,
51 .mmio_base = BLT_RING_BASE,
52 .irq_shift = GEN8_BCS_IRQ_SHIFT,
53 .init_execlists = logical_xcs_ring_init,
54 .init_legacy = intel_init_blt_ring_buffer,
55 },
56 [VCS] = {
57 .name = "bsd ring",
58 .exec_id = I915_EXEC_BSD,
59 .guc_id = GUC_VIDEO_ENGINE,
60 .mmio_base = GEN6_BSD_RING_BASE,
61 .irq_shift = GEN8_VCS1_IRQ_SHIFT,
62 .init_execlists = logical_xcs_ring_init,
63 .init_legacy = intel_init_bsd_ring_buffer,
64 },
65 [VCS2] = {
66 .name = "bsd2 ring",
67 .exec_id = I915_EXEC_BSD,
68 .guc_id = GUC_VIDEO_ENGINE2,
69 .mmio_base = GEN8_BSD2_RING_BASE,
70 .irq_shift = GEN8_VCS2_IRQ_SHIFT,
71 .init_execlists = logical_xcs_ring_init,
72 .init_legacy = intel_init_bsd2_ring_buffer,
73 },
74 [VECS] = {
75 .name = "video enhancement ring",
76 .exec_id = I915_EXEC_VEBOX,
77 .guc_id = GUC_VIDEOENHANCE_ENGINE,
78 .mmio_base = VEBOX_RING_BASE,
79 .irq_shift = GEN8_VECS_IRQ_SHIFT,
80 .init_execlists = logical_xcs_ring_init,
81 .init_legacy = intel_init_vebox_ring_buffer,
82 },
83};
84
85static struct intel_engine_cs *
86intel_engine_setup(struct drm_i915_private *dev_priv,
87 enum intel_engine_id id)
88{
89 const struct engine_info *info = &intel_engines[id];
90 struct intel_engine_cs *engine = &dev_priv->engine[id];
91
92 engine->id = id;
93 engine->i915 = dev_priv;
94 engine->name = info->name;
95 engine->exec_id = info->exec_id;
96 engine->hw_id = engine->guc_id = info->guc_id;
97 engine->mmio_base = info->mmio_base;
98 engine->irq_shift = info->irq_shift;
99
100 return engine;
101}
102
103/**
104 * intel_engines_init() - allocate, populate and init the Engine Command Streamers
105 * @dev: DRM device.
106 *
107 * Return: non-zero if the initialization failed.
108 */
109int intel_engines_init(struct drm_device *dev)
110{
111 struct drm_i915_private *dev_priv = to_i915(dev);
112 unsigned int mask = 0;
113 int (*init)(struct intel_engine_cs *engine);
114 unsigned int i;
115 int ret;
116
117 WARN_ON(INTEL_INFO(dev_priv)->ring_mask == 0);
118 WARN_ON(INTEL_INFO(dev_priv)->ring_mask &
119 GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES));
120
121 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
122 if (!HAS_ENGINE(dev_priv, i))
123 continue;
124
125 if (i915.enable_execlists)
126 init = intel_engines[i].init_execlists;
127 else
128 init = intel_engines[i].init_legacy;
129
130 if (!init)
131 continue;
132
133 ret = init(intel_engine_setup(dev_priv, i));
134 if (ret)
135 goto cleanup;
136
137 mask |= ENGINE_MASK(i);
138 }
139
140 /*
141 * Catch failures to update intel_engines table when the new engines
142 * are added to the driver by a warning and disabling the forgotten
143 * engines.
144 */
145 if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) {
146 struct intel_device_info *info =
147 (struct intel_device_info *)&dev_priv->info;
148 info->ring_mask = mask;
149 }
150
151 return 0;
152
153cleanup:
154 for (i = 0; i < I915_NUM_ENGINES; i++) {
155 if (i915.enable_execlists)
156 intel_logical_ring_cleanup(&dev_priv->engine[i]);
157 else
158 intel_engine_cleanup(&dev_priv->engine[i]);
159 }
160
161 return ret;
162}
163
164void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
165{
166 memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
167}
168
169static void intel_engine_init_requests(struct intel_engine_cs *engine)
170{
171 init_request_active(&engine->last_request, NULL);
172 INIT_LIST_HEAD(&engine->request_list);
173}
174
175/**
176 * intel_engines_setup_common - setup engine state not requiring hw access
177 * @engine: Engine to setup.
178 *
179 * Initializes @engine@ structure members shared between legacy and execlists
180 * submission modes which do not require hardware access.
181 *
182 * Typically done early in the submission mode specific engine setup stage.
183 */
184void intel_engine_setup_common(struct intel_engine_cs *engine)
185{
186 INIT_LIST_HEAD(&engine->buffers);
187 INIT_LIST_HEAD(&engine->execlist_queue);
188 spin_lock_init(&engine->execlist_lock);
189
190 engine->fence_context = fence_context_alloc(1);
191
192 intel_engine_init_requests(engine);
193 intel_engine_init_hangcheck(engine);
194 i915_gem_batch_pool_init(engine, &engine->batch_pool);
195}
196
197/**
198 * intel_engines_init_common - initialize cengine state which might require hw access
199 * @engine: Engine to initialize.
200 *
201 * Initializes @engine@ structure members shared between legacy and execlists
202 * submission modes which do require hardware access.
203 *
204 * Typcally done at later stages of submission mode specific engine setup.
205 *
206 * Returns zero on success or an error code on failure.
207 */
208int intel_engine_init_common(struct intel_engine_cs *engine)
209{
210 int ret;
211
212 ret = intel_engine_init_breadcrumbs(engine);
213 if (ret)
214 return ret;
215
216 return intel_engine_init_cmd_parser(engine);
217}
218
219/**
220 * intel_engines_cleanup_common - cleans up the engine state created by
221 * the common initiailizers.
222 * @engine: Engine to cleanup.
223 *
224 * This cleans up everything created by the common helpers.
225 */
226void intel_engine_cleanup_common(struct intel_engine_cs *engine)
227{
228 intel_engine_cleanup_cmd_parser(engine);
229 intel_engine_fini_breadcrumbs(engine);
230 i915_gem_batch_pool_fini(&engine->batch_pool);
231}
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 3f4e32f8baae..e67b09a3328c 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -741,7 +741,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
741 cache->fb.pixel_format = fb->pixel_format; 741 cache->fb.pixel_format = fb->pixel_format;
742 cache->fb.stride = fb->pitches[0]; 742 cache->fb.stride = fb->pitches[0];
743 cache->fb.fence_reg = obj->fence_reg; 743 cache->fb.fence_reg = obj->fence_reg;
744 cache->fb.tiling_mode = obj->tiling_mode; 744 cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
745} 745}
746 746
747static bool intel_fbc_can_activate(struct intel_crtc *crtc) 747static bool intel_fbc_can_activate(struct intel_crtc *crtc)
@@ -1075,6 +1075,8 @@ out:
1075/** 1075/**
1076 * intel_fbc_enable: tries to enable FBC on the CRTC 1076 * intel_fbc_enable: tries to enable FBC on the CRTC
1077 * @crtc: the CRTC 1077 * @crtc: the CRTC
1078 * @crtc_state: corresponding &drm_crtc_state for @crtc
1079 * @plane_state: corresponding &drm_plane_state for the primary plane of @crtc
1078 * 1080 *
1079 * This function checks if the given CRTC was chosen for FBC, then enables it if 1081 * This function checks if the given CRTC was chosen for FBC, then enables it if
1080 * possible. Notice that it doesn't activate FBC. It is valid to call 1082 * possible. Notice that it doesn't activate FBC. It is valid to call
@@ -1163,11 +1165,8 @@ void intel_fbc_disable(struct intel_crtc *crtc)
1163 return; 1165 return;
1164 1166
1165 mutex_lock(&fbc->lock); 1167 mutex_lock(&fbc->lock);
1166 if (fbc->crtc == crtc) { 1168 if (fbc->crtc == crtc)
1167 WARN_ON(!fbc->enabled);
1168 WARN_ON(fbc->active);
1169 __intel_fbc_disable(dev_priv); 1169 __intel_fbc_disable(dev_priv);
1170 }
1171 mutex_unlock(&fbc->lock); 1170 mutex_unlock(&fbc->lock);
1172 1171
1173 cancel_work_sync(&fbc->work.work); 1172 cancel_work_sync(&fbc->work.work);
@@ -1230,12 +1229,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
1230 if (i915.enable_fbc >= 0) 1229 if (i915.enable_fbc >= 0)
1231 return !!i915.enable_fbc; 1230 return !!i915.enable_fbc;
1232 1231
1232 if (!HAS_FBC(dev_priv))
1233 return 0;
1234
1233 if (IS_BROADWELL(dev_priv)) 1235 if (IS_BROADWELL(dev_priv))
1234 return 1; 1236 return 1;
1235 1237
1236 return 0; 1238 return 0;
1237} 1239}
1238 1240
1241static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
1242{
1243#ifdef CONFIG_INTEL_IOMMU
1244 /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
1245 if (intel_iommu_gfx_mapped &&
1246 (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
1247 DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
1248 return true;
1249 }
1250#endif
1251
1252 return false;
1253}
1254
1239/** 1255/**
1240 * intel_fbc_init - Initialize FBC 1256 * intel_fbc_init - Initialize FBC
1241 * @dev_priv: the i915 device 1257 * @dev_priv: the i915 device
@@ -1253,6 +1269,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
1253 fbc->active = false; 1269 fbc->active = false;
1254 fbc->work.scheduled = false; 1270 fbc->work.scheduled = false;
1255 1271
1272 if (need_fbc_vtd_wa(dev_priv))
1273 mkwrite_device_info(dev_priv)->has_fbc = false;
1274
1256 i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); 1275 i915.enable_fbc = intel_sanitize_fbc_option(dev_priv);
1257 DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); 1276 DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc);
1258 1277
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 69d7ea576baa..2c14dfc5e4f0 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -41,6 +41,7 @@
41#include <drm/drm_crtc.h> 41#include <drm/drm_crtc.h>
42#include <drm/drm_fb_helper.h> 42#include <drm/drm_fb_helper.h>
43#include "intel_drv.h" 43#include "intel_drv.h"
44#include "intel_frontbuffer.h"
44#include <drm/i915_drm.h> 45#include <drm/i915_drm.h>
45#include "i915_drv.h" 46#include "i915_drv.h"
46 47
@@ -158,7 +159,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
158 159
159 fb = __intel_framebuffer_create(dev, &mode_cmd, obj); 160 fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
160 if (IS_ERR(fb)) { 161 if (IS_ERR(fb)) {
161 drm_gem_object_unreference(&obj->base); 162 i915_gem_object_put(obj);
162 ret = PTR_ERR(fb); 163 ret = PTR_ERR(fb);
163 goto out; 164 goto out;
164 } 165 }
@@ -188,7 +189,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
188 struct i915_vma *vma; 189 struct i915_vma *vma;
189 struct drm_i915_gem_object *obj; 190 struct drm_i915_gem_object *obj;
190 bool prealloc = false; 191 bool prealloc = false;
191 void *vaddr; 192 void __iomem *vaddr;
192 int ret; 193 int ret;
193 194
194 if (intel_fb && 195 if (intel_fb &&
@@ -767,7 +768,7 @@ void intel_fbdev_fini(struct drm_device *dev)
767 if (!ifbdev) 768 if (!ifbdev)
768 return; 769 return;
769 770
770 flush_work(&dev_priv->fbdev_suspend_work); 771 cancel_work_sync(&dev_priv->fbdev_suspend_work);
771 if (!current_is_async()) 772 if (!current_is_async())
772 intel_fbdev_sync(ifbdev); 773 intel_fbdev_sync(ifbdev);
773 774
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index ac85357010b4..966de4c7c7a2 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -63,47 +63,30 @@
63#include <drm/drmP.h> 63#include <drm/drmP.h>
64 64
65#include "intel_drv.h" 65#include "intel_drv.h"
66#include "intel_frontbuffer.h"
66#include "i915_drv.h" 67#include "i915_drv.h"
67 68
68/** 69void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
69 * intel_fb_obj_invalidate - invalidate frontbuffer object 70 enum fb_op_origin origin,
70 * @obj: GEM object to invalidate 71 unsigned int frontbuffer_bits)
71 * @origin: which operation caused the invalidation
72 *
73 * This function gets called every time rendering on the given object starts and
74 * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must
75 * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed
76 * until the rendering completes or a flip on this frontbuffer plane is
77 * scheduled.
78 */
79void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
80 enum fb_op_origin origin)
81{ 72{
82 struct drm_device *dev = obj->base.dev; 73 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
83 struct drm_i915_private *dev_priv = to_i915(dev);
84
85 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
86
87 if (!obj->frontbuffer_bits)
88 return;
89 74
90 if (origin == ORIGIN_CS) { 75 if (origin == ORIGIN_CS) {
91 mutex_lock(&dev_priv->fb_tracking.lock); 76 spin_lock(&dev_priv->fb_tracking.lock);
92 dev_priv->fb_tracking.busy_bits 77 dev_priv->fb_tracking.busy_bits |= frontbuffer_bits;
93 |= obj->frontbuffer_bits; 78 dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
94 dev_priv->fb_tracking.flip_bits 79 spin_unlock(&dev_priv->fb_tracking.lock);
95 &= ~obj->frontbuffer_bits;
96 mutex_unlock(&dev_priv->fb_tracking.lock);
97 } 80 }
98 81
99 intel_psr_invalidate(dev, obj->frontbuffer_bits); 82 intel_psr_invalidate(dev_priv, frontbuffer_bits);
100 intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits); 83 intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
101 intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin); 84 intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
102} 85}
103 86
104/** 87/**
105 * intel_frontbuffer_flush - flush frontbuffer 88 * intel_frontbuffer_flush - flush frontbuffer
106 * @dev: DRM device 89 * @dev_priv: i915 device
107 * @frontbuffer_bits: frontbuffer plane tracking bits 90 * @frontbuffer_bits: frontbuffer plane tracking bits
108 * @origin: which operation caused the flush 91 * @origin: which operation caused the flush
109 * 92 *
@@ -113,64 +96,45 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
113 * 96 *
114 * Can be called without any locks held. 97 * Can be called without any locks held.
115 */ 98 */
116static void intel_frontbuffer_flush(struct drm_device *dev, 99static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
117 unsigned frontbuffer_bits, 100 unsigned frontbuffer_bits,
118 enum fb_op_origin origin) 101 enum fb_op_origin origin)
119{ 102{
120 struct drm_i915_private *dev_priv = to_i915(dev);
121
122 /* Delay flushing when rings are still busy.*/ 103 /* Delay flushing when rings are still busy.*/
123 mutex_lock(&dev_priv->fb_tracking.lock); 104 spin_lock(&dev_priv->fb_tracking.lock);
124 frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits; 105 frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
125 mutex_unlock(&dev_priv->fb_tracking.lock); 106 spin_unlock(&dev_priv->fb_tracking.lock);
126 107
127 if (!frontbuffer_bits) 108 if (!frontbuffer_bits)
128 return; 109 return;
129 110
130 intel_edp_drrs_flush(dev, frontbuffer_bits); 111 intel_edp_drrs_flush(dev_priv, frontbuffer_bits);
131 intel_psr_flush(dev, frontbuffer_bits, origin); 112 intel_psr_flush(dev_priv, frontbuffer_bits, origin);
132 intel_fbc_flush(dev_priv, frontbuffer_bits, origin); 113 intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
133} 114}
134 115
135/** 116void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
136 * intel_fb_obj_flush - flush frontbuffer object 117 bool retire,
137 * @obj: GEM object to flush 118 enum fb_op_origin origin,
138 * @retire: set when retiring asynchronous rendering 119 unsigned int frontbuffer_bits)
139 * @origin: which operation caused the flush
140 *
141 * This function gets called every time rendering on the given object has
142 * completed and frontbuffer caching can be started again. If @retire is true
143 * then any delayed flushes will be unblocked.
144 */
145void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
146 bool retire, enum fb_op_origin origin)
147{ 120{
148 struct drm_device *dev = obj->base.dev; 121 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
149 struct drm_i915_private *dev_priv = to_i915(dev);
150 unsigned frontbuffer_bits;
151
152 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
153
154 if (!obj->frontbuffer_bits)
155 return;
156
157 frontbuffer_bits = obj->frontbuffer_bits;
158 122
159 if (retire) { 123 if (retire) {
160 mutex_lock(&dev_priv->fb_tracking.lock); 124 spin_lock(&dev_priv->fb_tracking.lock);
161 /* Filter out new bits since rendering started. */ 125 /* Filter out new bits since rendering started. */
162 frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; 126 frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
163
164 dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; 127 dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
165 mutex_unlock(&dev_priv->fb_tracking.lock); 128 spin_unlock(&dev_priv->fb_tracking.lock);
166 } 129 }
167 130
168 intel_frontbuffer_flush(dev, frontbuffer_bits, origin); 131 if (frontbuffer_bits)
132 intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin);
169} 133}
170 134
171/** 135/**
172 * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip 136 * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip
173 * @dev: DRM device 137 * @dev_priv: i915 device
174 * @frontbuffer_bits: frontbuffer plane tracking bits 138 * @frontbuffer_bits: frontbuffer plane tracking bits
175 * 139 *
176 * This function gets called after scheduling a flip on @obj. The actual 140 * This function gets called after scheduling a flip on @obj. The actual
@@ -180,23 +144,21 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
180 * 144 *
181 * Can be called without any locks held. 145 * Can be called without any locks held.
182 */ 146 */
183void intel_frontbuffer_flip_prepare(struct drm_device *dev, 147void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
184 unsigned frontbuffer_bits) 148 unsigned frontbuffer_bits)
185{ 149{
186 struct drm_i915_private *dev_priv = to_i915(dev); 150 spin_lock(&dev_priv->fb_tracking.lock);
187
188 mutex_lock(&dev_priv->fb_tracking.lock);
189 dev_priv->fb_tracking.flip_bits |= frontbuffer_bits; 151 dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
190 /* Remove stale busy bits due to the old buffer. */ 152 /* Remove stale busy bits due to the old buffer. */
191 dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; 153 dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
192 mutex_unlock(&dev_priv->fb_tracking.lock); 154 spin_unlock(&dev_priv->fb_tracking.lock);
193 155
194 intel_psr_single_frame_update(dev, frontbuffer_bits); 156 intel_psr_single_frame_update(dev_priv, frontbuffer_bits);
195} 157}
196 158
197/** 159/**
198 * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip 160 * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip
199 * @dev: DRM device 161 * @dev_priv: i915 device
200 * @frontbuffer_bits: frontbuffer plane tracking bits 162 * @frontbuffer_bits: frontbuffer plane tracking bits
201 * 163 *
202 * This function gets called after the flip has been latched and will complete 164 * This function gets called after the flip has been latched and will complete
@@ -204,23 +166,23 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
204 * 166 *
205 * Can be called without any locks held. 167 * Can be called without any locks held.
206 */ 168 */
207void intel_frontbuffer_flip_complete(struct drm_device *dev, 169void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
208 unsigned frontbuffer_bits) 170 unsigned frontbuffer_bits)
209{ 171{
210 struct drm_i915_private *dev_priv = to_i915(dev); 172 spin_lock(&dev_priv->fb_tracking.lock);
211
212 mutex_lock(&dev_priv->fb_tracking.lock);
213 /* Mask any cancelled flips. */ 173 /* Mask any cancelled flips. */
214 frontbuffer_bits &= dev_priv->fb_tracking.flip_bits; 174 frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
215 dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; 175 dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
216 mutex_unlock(&dev_priv->fb_tracking.lock); 176 spin_unlock(&dev_priv->fb_tracking.lock);
217 177
218 intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); 178 if (frontbuffer_bits)
179 intel_frontbuffer_flush(dev_priv,
180 frontbuffer_bits, ORIGIN_FLIP);
219} 181}
220 182
221/** 183/**
222 * intel_frontbuffer_flip - synchronous frontbuffer flip 184 * intel_frontbuffer_flip - synchronous frontbuffer flip
223 * @dev: DRM device 185 * @dev_priv: i915 device
224 * @frontbuffer_bits: frontbuffer plane tracking bits 186 * @frontbuffer_bits: frontbuffer plane tracking bits
225 * 187 *
226 * This function gets called after scheduling a flip on @obj. This is for 188 * This function gets called after scheduling a flip on @obj. This is for
@@ -229,15 +191,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
229 * 191 *
230 * Can be called without any locks held. 192 * Can be called without any locks held.
231 */ 193 */
232void intel_frontbuffer_flip(struct drm_device *dev, 194void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
233 unsigned frontbuffer_bits) 195 unsigned frontbuffer_bits)
234{ 196{
235 struct drm_i915_private *dev_priv = to_i915(dev); 197 spin_lock(&dev_priv->fb_tracking.lock);
236
237 mutex_lock(&dev_priv->fb_tracking.lock);
238 /* Remove stale busy bits due to the old buffer. */ 198 /* Remove stale busy bits due to the old buffer. */
239 dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; 199 dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
240 mutex_unlock(&dev_priv->fb_tracking.lock); 200 spin_unlock(&dev_priv->fb_tracking.lock);
241 201
242 intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); 202 intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP);
243} 203}
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h
new file mode 100644
index 000000000000..76ceb539f9f0
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.h
@@ -0,0 +1,91 @@
1/*
2 * Copyright (c) 2014-2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef __INTEL_FRONTBUFFER_H__
25#define __INTEL_FRONTBUFFER_H__
26
27struct drm_i915_private;
28struct drm_i915_gem_object;
29
30void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
31 unsigned frontbuffer_bits);
32void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
33 unsigned frontbuffer_bits);
34void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
35 unsigned frontbuffer_bits);
36
37void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
38 enum fb_op_origin origin,
39 unsigned int frontbuffer_bits);
40void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
41 bool retire,
42 enum fb_op_origin origin,
43 unsigned int frontbuffer_bits);
44
45/**
46 * intel_fb_obj_invalidate - invalidate frontbuffer object
47 * @obj: GEM object to invalidate
48 * @origin: which operation caused the invalidation
49 *
50 * This function gets called every time rendering on the given object starts and
51 * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must
52 * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed
53 * until the rendering completes or a flip on this frontbuffer plane is
54 * scheduled.
55 */
56static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
57 enum fb_op_origin origin)
58{
59 unsigned int frontbuffer_bits;
60
61 frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
62 if (!frontbuffer_bits)
63 return;
64
65 __intel_fb_obj_invalidate(obj, origin, frontbuffer_bits);
66}
67
68/**
69 * intel_fb_obj_flush - flush frontbuffer object
70 * @obj: GEM object to flush
71 * @retire: set when retiring asynchronous rendering
72 * @origin: which operation caused the flush
73 *
74 * This function gets called every time rendering on the given object has
75 * completed and frontbuffer caching can be started again. If @retire is true
76 * then any delayed flushes will be unblocked.
77 */
78static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
79 bool retire,
80 enum fb_op_origin origin)
81{
82 unsigned int frontbuffer_bits;
83
84 frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
85 if (!frontbuffer_bits)
86 return;
87
88 __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits);
89}
90
91#endif /* __INTEL_FRONTBUFFER_H__ */
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 3e3e743740c0..623cf26cd784 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -160,7 +160,6 @@ extern int intel_guc_resume(struct drm_device *dev);
160int i915_guc_submission_init(struct drm_i915_private *dev_priv); 160int i915_guc_submission_init(struct drm_i915_private *dev_priv);
161int i915_guc_submission_enable(struct drm_i915_private *dev_priv); 161int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
162int i915_guc_wq_check_space(struct drm_i915_gem_request *rq); 162int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
163int i915_guc_submit(struct drm_i915_gem_request *rq);
164void i915_guc_submission_disable(struct drm_i915_private *dev_priv); 163void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
165void i915_guc_submission_fini(struct drm_i915_private *dev_priv); 164void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
166 165
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 605c69658d2c..3763e30cc165 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -323,7 +323,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
323 return ret; 323 return ret;
324 } 324 }
325 325
326 ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0); 326 ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0);
327 if (ret) { 327 if (ret) {
328 DRM_DEBUG_DRIVER("pin failed %d\n", ret); 328 DRM_DEBUG_DRIVER("pin failed %d\n", ret);
329 return ret; 329 return ret;
@@ -349,7 +349,9 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
349 } 349 }
350 350
351 /* WaC6DisallowByGfxPause*/ 351 /* WaC6DisallowByGfxPause*/
352 I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); 352 if (IS_SKL_REVID(dev, 0, SKL_REVID_C0) ||
353 IS_BXT_REVID(dev, 0, BXT_REVID_B0))
354 I915_WRITE(GEN6_GFXPAUSE, 0x30FFF);
353 355
354 if (IS_BROXTON(dev)) 356 if (IS_BROXTON(dev))
355 I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); 357 I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE);
@@ -662,7 +664,7 @@ fail:
662 mutex_lock(&dev->struct_mutex); 664 mutex_lock(&dev->struct_mutex);
663 obj = guc_fw->guc_fw_obj; 665 obj = guc_fw->guc_fw_obj;
664 if (obj) 666 if (obj)
665 drm_gem_object_unreference(&obj->base); 667 i915_gem_object_put(obj);
666 guc_fw->guc_fw_obj = NULL; 668 guc_fw->guc_fw_obj = NULL;
667 mutex_unlock(&dev->struct_mutex); 669 mutex_unlock(&dev->struct_mutex);
668 670
@@ -743,7 +745,7 @@ void intel_guc_fini(struct drm_device *dev)
743 i915_guc_submission_fini(dev_priv); 745 i915_guc_submission_fini(dev_priv);
744 746
745 if (guc_fw->guc_fw_obj) 747 if (guc_fw->guc_fw_obj)
746 drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); 748 i915_gem_object_put(guc_fw->guc_fw_obj);
747 guc_fw->guc_fw_obj = NULL; 749 guc_fw->guc_fw_obj = NULL;
748 mutex_unlock(&dev->struct_mutex); 750 mutex_unlock(&dev->struct_mutex);
749 751
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index f48957ea100d..5dc2c20f6ca1 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -525,7 +525,6 @@ void i915_hpd_poll_init_work(struct work_struct *work) {
525/** 525/**
526 * intel_hpd_poll_init - enables/disables polling for connectors with hpd 526 * intel_hpd_poll_init - enables/disables polling for connectors with hpd
527 * @dev_priv: i915 device instance 527 * @dev_priv: i915 device instance
528 * @enabled: Whether to enable or disable polling
529 * 528 *
530 * This function enables polling for all connectors, regardless of whether or 529 * This function enables polling for all connectors, regardless of whether or
531 * not they support hotplug detection. Under certain conditions HPD may not be 530 * not they support hotplug detection. Under certain conditions HPD may not be
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 414ddda43922..309c5d9b1c57 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -288,7 +288,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
288/** 288/**
289 * intel_lr_context_descriptor_update() - calculate & cache the descriptor 289 * intel_lr_context_descriptor_update() - calculate & cache the descriptor
290 * descriptor for a pinned context 290 * descriptor for a pinned context
291 *
292 * @ctx: Context to work on 291 * @ctx: Context to work on
293 * @engine: Engine the descriptor will be used with 292 * @engine: Engine the descriptor will be used with
294 * 293 *
@@ -297,12 +296,13 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
297 * expensive to calculate, we'll just do it once and cache the result, 296 * expensive to calculate, we'll just do it once and cache the result,
298 * which remains valid until the context is unpinned. 297 * which remains valid until the context is unpinned.
299 * 298 *
300 * This is what a descriptor looks like, from LSB to MSB: 299 * This is what a descriptor looks like, from LSB to MSB::
301 * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) 300 *
302 * bits 12-31: LRCA, GTT address of (the HWSP of) this context 301 * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template)
303 * bits 32-52: ctx ID, a globally unique tag 302 * bits 12-31: LRCA, GTT address of (the HWSP of) this context
304 * bits 53-54: mbz, reserved for use by hardware 303 * bits 32-52: ctx ID, a globally unique tag
305 * bits 55-63: group ID, currently unused and set to 0 304 * bits 53-54: mbz, reserved for use by hardware
305 * bits 55-63: group ID, currently unused and set to 0
306 */ 306 */
307static void 307static void
308intel_lr_context_descriptor_update(struct i915_gem_context *ctx, 308intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
@@ -373,7 +373,7 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
373 struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; 373 struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
374 uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state; 374 uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
375 375
376 reg_state[CTX_RING_TAIL+1] = rq->tail; 376 reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
377 377
378 /* True 32b PPGTT with dynamic page allocation: update PDP 378 /* True 32b PPGTT with dynamic page allocation: update PDP
379 * registers and point the unallocated PDPs to scratch page. 379 * registers and point the unallocated PDPs to scratch page.
@@ -384,8 +384,8 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
384 execlists_update_context_pdps(ppgtt, reg_state); 384 execlists_update_context_pdps(ppgtt, reg_state);
385} 385}
386 386
387static void execlists_submit_requests(struct drm_i915_gem_request *rq0, 387static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
388 struct drm_i915_gem_request *rq1) 388 struct drm_i915_gem_request *rq1)
389{ 389{
390 struct drm_i915_private *dev_priv = rq0->i915; 390 struct drm_i915_private *dev_priv = rq0->i915;
391 unsigned int fw_domains = rq0->engine->fw_domains; 391 unsigned int fw_domains = rq0->engine->fw_domains;
@@ -418,7 +418,7 @@ static inline void execlists_context_status_change(
418 atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); 418 atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
419} 419}
420 420
421static void execlists_context_unqueue(struct intel_engine_cs *engine) 421static void execlists_unqueue(struct intel_engine_cs *engine)
422{ 422{
423 struct drm_i915_gem_request *req0 = NULL, *req1 = NULL; 423 struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
424 struct drm_i915_gem_request *cursor, *tmp; 424 struct drm_i915_gem_request *cursor, *tmp;
@@ -441,7 +441,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine)
441 * will update tail past first request's workload */ 441 * will update tail past first request's workload */
442 cursor->elsp_submitted = req0->elsp_submitted; 442 cursor->elsp_submitted = req0->elsp_submitted;
443 list_del(&req0->execlist_link); 443 list_del(&req0->execlist_link);
444 i915_gem_request_unreference(req0); 444 i915_gem_request_put(req0);
445 req0 = cursor; 445 req0 = cursor;
446 } else { 446 } else {
447 if (IS_ENABLED(CONFIG_DRM_I915_GVT)) { 447 if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
@@ -482,14 +482,11 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine)
482 * resubmit the request. See gen8_emit_request() for where we 482 * resubmit the request. See gen8_emit_request() for where we
483 * prepare the padding after the end of the request. 483 * prepare the padding after the end of the request.
484 */ 484 */
485 struct intel_ringbuffer *ringbuf;
486
487 ringbuf = req0->ctx->engine[engine->id].ringbuf;
488 req0->tail += 8; 485 req0->tail += 8;
489 req0->tail &= ringbuf->size - 1; 486 req0->tail &= req0->ring->size - 1;
490 } 487 }
491 488
492 execlists_submit_requests(req0, req1); 489 execlists_elsp_submit_contexts(req0, req1);
493} 490}
494 491
495static unsigned int 492static unsigned int
@@ -514,7 +511,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
514 execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT); 511 execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
515 512
516 list_del(&head_req->execlist_link); 513 list_del(&head_req->execlist_link);
517 i915_gem_request_unreference(head_req); 514 i915_gem_request_put(head_req);
518 515
519 return 1; 516 return 1;
520} 517}
@@ -539,10 +536,7 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
539 return status; 536 return status;
540} 537}
541 538
542/** 539/*
543 * intel_lrc_irq_handler() - handle Context Switch interrupts
544 * @data: tasklet handler passed in unsigned long
545 *
546 * Check the unread Context Status Buffers and manage the submission of new 540 * Check the unread Context Status Buffers and manage the submission of new
547 * contexts to the ELSP accordingly. 541 * contexts to the ELSP accordingly.
548 */ 542 */
@@ -603,7 +597,7 @@ static void intel_lrc_irq_handler(unsigned long data)
603 if (submit_contexts) { 597 if (submit_contexts) {
604 if (!engine->disable_lite_restore_wa || 598 if (!engine->disable_lite_restore_wa ||
605 (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE)) 599 (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
606 execlists_context_unqueue(engine); 600 execlists_unqueue(engine);
607 } 601 }
608 602
609 spin_unlock(&engine->execlist_lock); 603 spin_unlock(&engine->execlist_lock);
@@ -612,7 +606,7 @@ static void intel_lrc_irq_handler(unsigned long data)
612 DRM_ERROR("More than two context complete events?\n"); 606 DRM_ERROR("More than two context complete events?\n");
613} 607}
614 608
615static void execlists_context_queue(struct drm_i915_gem_request *request) 609static void execlists_submit_request(struct drm_i915_gem_request *request)
616{ 610{
617 struct intel_engine_cs *engine = request->engine; 611 struct intel_engine_cs *engine = request->engine;
618 struct drm_i915_gem_request *cursor; 612 struct drm_i915_gem_request *cursor;
@@ -635,70 +629,19 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
635 WARN(tail_req->elsp_submitted != 0, 629 WARN(tail_req->elsp_submitted != 0,
636 "More than 2 already-submitted reqs queued\n"); 630 "More than 2 already-submitted reqs queued\n");
637 list_del(&tail_req->execlist_link); 631 list_del(&tail_req->execlist_link);
638 i915_gem_request_unreference(tail_req); 632 i915_gem_request_put(tail_req);
639 } 633 }
640 } 634 }
641 635
642 i915_gem_request_reference(request); 636 i915_gem_request_get(request);
643 list_add_tail(&request->execlist_link, &engine->execlist_queue); 637 list_add_tail(&request->execlist_link, &engine->execlist_queue);
644 request->ctx_hw_id = request->ctx->hw_id; 638 request->ctx_hw_id = request->ctx->hw_id;
645 if (num_elements == 0) 639 if (num_elements == 0)
646 execlists_context_unqueue(engine); 640 execlists_unqueue(engine);
647 641
648 spin_unlock_bh(&engine->execlist_lock); 642 spin_unlock_bh(&engine->execlist_lock);
649} 643}
650 644
651static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
652{
653 struct intel_engine_cs *engine = req->engine;
654 uint32_t flush_domains;
655 int ret;
656
657 flush_domains = 0;
658 if (engine->gpu_caches_dirty)
659 flush_domains = I915_GEM_GPU_DOMAINS;
660
661 ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
662 if (ret)
663 return ret;
664
665 engine->gpu_caches_dirty = false;
666 return 0;
667}
668
669static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
670 struct list_head *vmas)
671{
672 const unsigned other_rings = ~intel_engine_flag(req->engine);
673 struct i915_vma *vma;
674 uint32_t flush_domains = 0;
675 bool flush_chipset = false;
676 int ret;
677
678 list_for_each_entry(vma, vmas, exec_list) {
679 struct drm_i915_gem_object *obj = vma->obj;
680
681 if (obj->active & other_rings) {
682 ret = i915_gem_object_sync(obj, req->engine, &req);
683 if (ret)
684 return ret;
685 }
686
687 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
688 flush_chipset |= i915_gem_clflush_object(obj, false);
689
690 flush_domains |= obj->base.write_domain;
691 }
692
693 if (flush_domains & I915_GEM_DOMAIN_GTT)
694 wmb();
695
696 /* Unconditionally invalidate gpu caches and ensure that we do flush
697 * any residual writes from the previous batch.
698 */
699 return logical_ring_invalidate_all_caches(req);
700}
701
702int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) 645int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
703{ 646{
704 struct intel_engine_cs *engine = request->engine; 647 struct intel_engine_cs *engine = request->engine;
@@ -717,7 +660,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
717 return ret; 660 return ret;
718 } 661 }
719 662
720 request->ringbuf = ce->ringbuf; 663 request->ring = ce->ring;
721 664
722 if (i915.enable_guc_submission) { 665 if (i915.enable_guc_submission) {
723 /* 666 /*
@@ -762,7 +705,7 @@ err_unpin:
762} 705}
763 706
764/* 707/*
765 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload 708 * intel_logical_ring_advance() - advance the tail and prepare for submission
766 * @request: Request to advance the logical ringbuffer of. 709 * @request: Request to advance the logical ringbuffer of.
767 * 710 *
768 * The tail is updated in our logical ringbuffer struct, not in the actual context. What 711 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
@@ -771,13 +714,13 @@ err_unpin:
771 * point, the tail *inside* the context is updated and the ELSP written to. 714 * point, the tail *inside* the context is updated and the ELSP written to.
772 */ 715 */
773static int 716static int
774intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) 717intel_logical_ring_advance(struct drm_i915_gem_request *request)
775{ 718{
776 struct intel_ringbuffer *ringbuf = request->ringbuf; 719 struct intel_ring *ring = request->ring;
777 struct intel_engine_cs *engine = request->engine; 720 struct intel_engine_cs *engine = request->engine;
778 721
779 intel_logical_ring_advance(ringbuf); 722 intel_ring_advance(ring);
780 request->tail = ringbuf->tail; 723 request->tail = ring->tail;
781 724
782 /* 725 /*
783 * Here we add two extra NOOPs as padding to avoid 726 * Here we add two extra NOOPs as padding to avoid
@@ -785,9 +728,9 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
785 * 728 *
786 * Caller must reserve WA_TAIL_DWORDS for us! 729 * Caller must reserve WA_TAIL_DWORDS for us!
787 */ 730 */
788 intel_logical_ring_emit(ringbuf, MI_NOOP); 731 intel_ring_emit(ring, MI_NOOP);
789 intel_logical_ring_emit(ringbuf, MI_NOOP); 732 intel_ring_emit(ring, MI_NOOP);
790 intel_logical_ring_advance(ringbuf); 733 intel_ring_advance(ring);
791 734
792 /* We keep the previous context alive until we retire the following 735 /* We keep the previous context alive until we retire the following
793 * request. This ensures that any the context object is still pinned 736 * request. This ensures that any the context object is still pinned
@@ -797,100 +740,6 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
797 */ 740 */
798 request->previous_context = engine->last_context; 741 request->previous_context = engine->last_context;
799 engine->last_context = request->ctx; 742 engine->last_context = request->ctx;
800
801 if (i915.enable_guc_submission)
802 i915_guc_submit(request);
803 else
804 execlists_context_queue(request);
805
806 return 0;
807}
808
809/**
810 * execlists_submission() - submit a batchbuffer for execution, Execlists style
811 * @params: execbuffer call parameters.
812 * @args: execbuffer call arguments.
813 * @vmas: list of vmas.
814 *
815 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
816 * away the submission details of the execbuffer ioctl call.
817 *
818 * Return: non-zero if the submission fails.
819 */
820int intel_execlists_submission(struct i915_execbuffer_params *params,
821 struct drm_i915_gem_execbuffer2 *args,
822 struct list_head *vmas)
823{
824 struct drm_device *dev = params->dev;
825 struct intel_engine_cs *engine = params->engine;
826 struct drm_i915_private *dev_priv = to_i915(dev);
827 struct intel_ringbuffer *ringbuf = params->ctx->engine[engine->id].ringbuf;
828 u64 exec_start;
829 int instp_mode;
830 u32 instp_mask;
831 int ret;
832
833 instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
834 instp_mask = I915_EXEC_CONSTANTS_MASK;
835 switch (instp_mode) {
836 case I915_EXEC_CONSTANTS_REL_GENERAL:
837 case I915_EXEC_CONSTANTS_ABSOLUTE:
838 case I915_EXEC_CONSTANTS_REL_SURFACE:
839 if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) {
840 DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
841 return -EINVAL;
842 }
843
844 if (instp_mode != dev_priv->relative_constants_mode) {
845 if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
846 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
847 return -EINVAL;
848 }
849
850 /* The HW changed the meaning on this bit on gen6 */
851 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
852 }
853 break;
854 default:
855 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
856 return -EINVAL;
857 }
858
859 if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
860 DRM_DEBUG("sol reset is gen7 only\n");
861 return -EINVAL;
862 }
863
864 ret = execlists_move_to_gpu(params->request, vmas);
865 if (ret)
866 return ret;
867
868 if (engine == &dev_priv->engine[RCS] &&
869 instp_mode != dev_priv->relative_constants_mode) {
870 ret = intel_ring_begin(params->request, 4);
871 if (ret)
872 return ret;
873
874 intel_logical_ring_emit(ringbuf, MI_NOOP);
875 intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
876 intel_logical_ring_emit_reg(ringbuf, INSTPM);
877 intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
878 intel_logical_ring_advance(ringbuf);
879
880 dev_priv->relative_constants_mode = instp_mode;
881 }
882
883 exec_start = params->batch_obj_vm_offset +
884 args->batch_start_offset;
885
886 ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags);
887 if (ret)
888 return ret;
889
890 trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
891
892 i915_gem_execbuffer_move_to_active(vmas, params->request);
893
894 return 0; 743 return 0;
895} 744}
896 745
@@ -907,51 +756,10 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
907 756
908 list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) { 757 list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
909 list_del(&req->execlist_link); 758 list_del(&req->execlist_link);
910 i915_gem_request_unreference(req); 759 i915_gem_request_put(req);
911 } 760 }
912} 761}
913 762
914void intel_logical_ring_stop(struct intel_engine_cs *engine)
915{
916 struct drm_i915_private *dev_priv = engine->i915;
917 int ret;
918
919 if (!intel_engine_initialized(engine))
920 return;
921
922 ret = intel_engine_idle(engine);
923 if (ret)
924 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
925 engine->name, ret);
926
927 /* TODO: Is this correct with Execlists enabled? */
928 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
929 if (intel_wait_for_register(dev_priv,
930 RING_MI_MODE(engine->mmio_base),
931 MODE_IDLE, MODE_IDLE,
932 1000)) {
933 DRM_ERROR("%s :timed out trying to stop ring\n", engine->name);
934 return;
935 }
936 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
937}
938
939int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
940{
941 struct intel_engine_cs *engine = req->engine;
942 int ret;
943
944 if (!engine->gpu_caches_dirty)
945 return 0;
946
947 ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
948 if (ret)
949 return ret;
950
951 engine->gpu_caches_dirty = false;
952 return 0;
953}
954
955static int intel_lr_context_pin(struct i915_gem_context *ctx, 763static int intel_lr_context_pin(struct i915_gem_context *ctx,
956 struct intel_engine_cs *engine) 764 struct intel_engine_cs *engine)
957{ 765{
@@ -966,8 +774,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
966 if (ce->pin_count++) 774 if (ce->pin_count++)
967 return 0; 775 return 0;
968 776
969 ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN, 777 ret = i915_gem_object_ggtt_pin(ce->state, NULL,
970 PIN_OFFSET_BIAS | GUC_WOPCM_TOP); 778 0, GEN8_LR_CONTEXT_ALIGN,
779 PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
971 if (ret) 780 if (ret)
972 goto err; 781 goto err;
973 782
@@ -979,15 +788,14 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
979 788
980 lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; 789 lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
981 790
982 ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ringbuf); 791 ret = intel_ring_pin(ce->ring);
983 if (ret) 792 if (ret)
984 goto unpin_map; 793 goto unpin_map;
985 794
986 i915_gem_context_reference(ctx);
987 ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); 795 ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state);
988 intel_lr_context_descriptor_update(ctx, engine); 796 intel_lr_context_descriptor_update(ctx, engine);
989 797
990 lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ringbuf->vma->node.start; 798 lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start;
991 ce->lrc_reg_state = lrc_reg_state; 799 ce->lrc_reg_state = lrc_reg_state;
992 ce->state->dirty = true; 800 ce->state->dirty = true;
993 801
@@ -995,6 +803,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
995 if (i915.enable_guc_submission) 803 if (i915.enable_guc_submission)
996 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); 804 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
997 805
806 i915_gem_context_get(ctx);
998 return 0; 807 return 0;
999 808
1000unpin_map: 809unpin_map:
@@ -1017,7 +826,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
1017 if (--ce->pin_count) 826 if (--ce->pin_count)
1018 return; 827 return;
1019 828
1020 intel_unpin_ringbuffer_obj(ce->ringbuf); 829 intel_ring_unpin(ce->ring);
1021 830
1022 i915_gem_object_unpin_map(ce->state); 831 i915_gem_object_unpin_map(ce->state);
1023 i915_gem_object_ggtt_unpin(ce->state); 832 i915_gem_object_ggtt_unpin(ce->state);
@@ -1026,21 +835,19 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
1026 ce->lrc_desc = 0; 835 ce->lrc_desc = 0;
1027 ce->lrc_reg_state = NULL; 836 ce->lrc_reg_state = NULL;
1028 837
1029 i915_gem_context_unreference(ctx); 838 i915_gem_context_put(ctx);
1030} 839}
1031 840
1032static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) 841static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
1033{ 842{
1034 int ret, i; 843 int ret, i;
1035 struct intel_engine_cs *engine = req->engine; 844 struct intel_ring *ring = req->ring;
1036 struct intel_ringbuffer *ringbuf = req->ringbuf;
1037 struct i915_workarounds *w = &req->i915->workarounds; 845 struct i915_workarounds *w = &req->i915->workarounds;
1038 846
1039 if (w->count == 0) 847 if (w->count == 0)
1040 return 0; 848 return 0;
1041 849
1042 engine->gpu_caches_dirty = true; 850 ret = req->engine->emit_flush(req, EMIT_BARRIER);
1043 ret = logical_ring_flush_all_caches(req);
1044 if (ret) 851 if (ret)
1045 return ret; 852 return ret;
1046 853
@@ -1048,17 +855,16 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
1048 if (ret) 855 if (ret)
1049 return ret; 856 return ret;
1050 857
1051 intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); 858 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
1052 for (i = 0; i < w->count; i++) { 859 for (i = 0; i < w->count; i++) {
1053 intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr); 860 intel_ring_emit_reg(ring, w->reg[i].addr);
1054 intel_logical_ring_emit(ringbuf, w->reg[i].value); 861 intel_ring_emit(ring, w->reg[i].value);
1055 } 862 }
1056 intel_logical_ring_emit(ringbuf, MI_NOOP); 863 intel_ring_emit(ring, MI_NOOP);
1057 864
1058 intel_logical_ring_advance(ringbuf); 865 intel_ring_advance(ring);
1059 866
1060 engine->gpu_caches_dirty = true; 867 ret = req->engine->emit_flush(req, EMIT_BARRIER);
1061 ret = logical_ring_flush_all_caches(req);
1062 if (ret) 868 if (ret)
1063 return ret; 869 return ret;
1064 870
@@ -1094,7 +900,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
1094 * code duplication. 900 * code duplication.
1095 */ 901 */
1096static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, 902static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
1097 uint32_t *const batch, 903 uint32_t *batch,
1098 uint32_t index) 904 uint32_t index)
1099{ 905{
1100 struct drm_i915_private *dev_priv = engine->i915; 906 struct drm_i915_private *dev_priv = engine->i915;
@@ -1156,37 +962,24 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
1156 return 0; 962 return 0;
1157} 963}
1158 964
1159/** 965/*
1160 * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA 966 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1161 * 967 * initialized at the beginning and shared across all contexts but this field
1162 * @engine: only applicable for RCS 968 * helps us to have multiple batches at different offsets and select them based
1163 * @wa_ctx: structure representing wa_ctx 969 * on a criteria. At the moment this batch always start at the beginning of the page
1164 * offset: specifies start of the batch, should be cache-aligned. This is updated 970 * and at this point we don't have multiple wa_ctx batch buffers.
1165 * with the offset value received as input.
1166 * size: size of the batch in DWORDS but HW expects in terms of cachelines
1167 * @batch: page in which WA are loaded
1168 * @offset: This field specifies the start of the batch, it should be
1169 * cache-aligned otherwise it is adjusted accordingly.
1170 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1171 * initialized at the beginning and shared across all contexts but this field
1172 * helps us to have multiple batches at different offsets and select them based
1173 * on a criteria. At the moment this batch always start at the beginning of the page
1174 * and at this point we don't have multiple wa_ctx batch buffers.
1175 *
1176 * The number of WA applied are not known at the beginning; we use this field
1177 * to return the no of DWORDS written.
1178 * 971 *
1179 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END 972 * The number of WA applied are not known at the beginning; we use this field
1180 * so it adds NOOPs as padding to make it cacheline aligned. 973 * to return the no of DWORDS written.
1181 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1182 * makes a complete batch buffer.
1183 * 974 *
1184 * Return: non-zero if we exceed the PAGE_SIZE limit. 975 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
976 * so it adds NOOPs as padding to make it cacheline aligned.
977 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
978 * makes a complete batch buffer.
1185 */ 979 */
1186
1187static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, 980static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
1188 struct i915_wa_ctx_bb *wa_ctx, 981 struct i915_wa_ctx_bb *wa_ctx,
1189 uint32_t *const batch, 982 uint32_t *batch,
1190 uint32_t *offset) 983 uint32_t *offset)
1191{ 984{
1192 uint32_t scratch_addr; 985 uint32_t scratch_addr;
@@ -1230,26 +1023,18 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
1230 return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); 1023 return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
1231} 1024}
1232 1025
1233/** 1026/*
1234 * gen8_init_perctx_bb() - initialize per ctx batch with WA 1027 * This batch is started immediately after indirect_ctx batch. Since we ensure
1235 * 1028 * that indirect_ctx ends on a cacheline this batch is aligned automatically.
1236 * @engine: only applicable for RCS
1237 * @wa_ctx: structure representing wa_ctx
1238 * offset: specifies start of the batch, should be cache-aligned.
1239 * size: size of the batch in DWORDS but HW expects in terms of cachelines
1240 * @batch: page in which WA are loaded
1241 * @offset: This field specifies the start of this batch.
1242 * This batch is started immediately after indirect_ctx batch. Since we ensure
1243 * that indirect_ctx ends on a cacheline this batch is aligned automatically.
1244 * 1029 *
1245 * The number of DWORDS written are returned using this field. 1030 * The number of DWORDS written are returned using this field.
1246 * 1031 *
1247 * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding 1032 * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
1248 * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. 1033 * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
1249 */ 1034 */
1250static int gen8_init_perctx_bb(struct intel_engine_cs *engine, 1035static int gen8_init_perctx_bb(struct intel_engine_cs *engine,
1251 struct i915_wa_ctx_bb *wa_ctx, 1036 struct i915_wa_ctx_bb *wa_ctx,
1252 uint32_t *const batch, 1037 uint32_t *batch,
1253 uint32_t *offset) 1038 uint32_t *offset)
1254{ 1039{
1255 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); 1040 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
@@ -1264,7 +1049,7 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *engine,
1264 1049
1265static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, 1050static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
1266 struct i915_wa_ctx_bb *wa_ctx, 1051 struct i915_wa_ctx_bb *wa_ctx,
1267 uint32_t *const batch, 1052 uint32_t *batch,
1268 uint32_t *offset) 1053 uint32_t *offset)
1269{ 1054{
1270 int ret; 1055 int ret;
@@ -1282,6 +1067,13 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
1282 return ret; 1067 return ret;
1283 index = ret; 1068 index = ret;
1284 1069
1070 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */
1071 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
1072 wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2);
1073 wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(
1074 GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE));
1075 wa_ctx_emit(batch, index, MI_NOOP);
1076
1285 /* WaClearSlmSpaceAtContextSwitch:kbl */ 1077 /* WaClearSlmSpaceAtContextSwitch:kbl */
1286 /* Actual scratch location is at 128 bytes offset */ 1078 /* Actual scratch location is at 128 bytes offset */
1287 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { 1079 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
@@ -1332,7 +1124,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
1332 1124
1333static int gen9_init_perctx_bb(struct intel_engine_cs *engine, 1125static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
1334 struct i915_wa_ctx_bb *wa_ctx, 1126 struct i915_wa_ctx_bb *wa_ctx,
1335 uint32_t *const batch, 1127 uint32_t *batch,
1336 uint32_t *offset) 1128 uint32_t *offset)
1337{ 1129{
1338 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); 1130 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
@@ -1389,11 +1181,12 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
1389 return ret; 1181 return ret;
1390 } 1182 }
1391 1183
1392 ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0); 1184 ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
1185 0, PAGE_SIZE, 0);
1393 if (ret) { 1186 if (ret) {
1394 DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", 1187 DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
1395 ret); 1188 ret);
1396 drm_gem_object_unreference(&engine->wa_ctx.obj->base); 1189 i915_gem_object_put(engine->wa_ctx.obj);
1397 return ret; 1190 return ret;
1398 } 1191 }
1399 1192
@@ -1404,7 +1197,7 @@ static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
1404{ 1197{
1405 if (engine->wa_ctx.obj) { 1198 if (engine->wa_ctx.obj) {
1406 i915_gem_object_ggtt_unpin(engine->wa_ctx.obj); 1199 i915_gem_object_ggtt_unpin(engine->wa_ctx.obj);
1407 drm_gem_object_unreference(&engine->wa_ctx.obj->base); 1200 i915_gem_object_put(engine->wa_ctx.obj);
1408 engine->wa_ctx.obj = NULL; 1201 engine->wa_ctx.obj = NULL;
1409 } 1202 }
1410} 1203}
@@ -1572,8 +1365,8 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
1572static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) 1365static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
1573{ 1366{
1574 struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; 1367 struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
1368 struct intel_ring *ring = req->ring;
1575 struct intel_engine_cs *engine = req->engine; 1369 struct intel_engine_cs *engine = req->engine;
1576 struct intel_ringbuffer *ringbuf = req->ringbuf;
1577 const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; 1370 const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
1578 int i, ret; 1371 int i, ret;
1579 1372
@@ -1581,28 +1374,27 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
1581 if (ret) 1374 if (ret)
1582 return ret; 1375 return ret;
1583 1376
1584 intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds)); 1377 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds));
1585 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 1378 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
1586 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 1379 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
1587 1380
1588 intel_logical_ring_emit_reg(ringbuf, 1381 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i));
1589 GEN8_RING_PDP_UDW(engine, i)); 1382 intel_ring_emit(ring, upper_32_bits(pd_daddr));
1590 intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr)); 1383 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i));
1591 intel_logical_ring_emit_reg(ringbuf, 1384 intel_ring_emit(ring, lower_32_bits(pd_daddr));
1592 GEN8_RING_PDP_LDW(engine, i));
1593 intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
1594 } 1385 }
1595 1386
1596 intel_logical_ring_emit(ringbuf, MI_NOOP); 1387 intel_ring_emit(ring, MI_NOOP);
1597 intel_logical_ring_advance(ringbuf); 1388 intel_ring_advance(ring);
1598 1389
1599 return 0; 1390 return 0;
1600} 1391}
1601 1392
1602static int gen8_emit_bb_start(struct drm_i915_gem_request *req, 1393static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
1603 u64 offset, unsigned dispatch_flags) 1394 u64 offset, u32 len,
1395 unsigned int dispatch_flags)
1604{ 1396{
1605 struct intel_ringbuffer *ringbuf = req->ringbuf; 1397 struct intel_ring *ring = req->ring;
1606 bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); 1398 bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
1607 int ret; 1399 int ret;
1608 1400
@@ -1629,14 +1421,14 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
1629 return ret; 1421 return ret;
1630 1422
1631 /* FIXME(BDW): Address space and security selectors. */ 1423 /* FIXME(BDW): Address space and security selectors. */
1632 intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 1424 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 |
1633 (ppgtt<<8) | 1425 (ppgtt<<8) |
1634 (dispatch_flags & I915_DISPATCH_RS ? 1426 (dispatch_flags & I915_DISPATCH_RS ?
1635 MI_BATCH_RESOURCE_STREAMER : 0)); 1427 MI_BATCH_RESOURCE_STREAMER : 0));
1636 intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); 1428 intel_ring_emit(ring, lower_32_bits(offset));
1637 intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); 1429 intel_ring_emit(ring, upper_32_bits(offset));
1638 intel_logical_ring_emit(ringbuf, MI_NOOP); 1430 intel_ring_emit(ring, MI_NOOP);
1639 intel_logical_ring_advance(ringbuf); 1431 intel_ring_advance(ring);
1640 1432
1641 return 0; 1433 return 0;
1642} 1434}
@@ -1655,14 +1447,10 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
1655 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 1447 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
1656} 1448}
1657 1449
1658static int gen8_emit_flush(struct drm_i915_gem_request *request, 1450static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
1659 u32 invalidate_domains,
1660 u32 unused)
1661{ 1451{
1662 struct intel_ringbuffer *ringbuf = request->ringbuf; 1452 struct intel_ring *ring = request->ring;
1663 struct intel_engine_cs *engine = ringbuf->engine; 1453 u32 cmd;
1664 struct drm_i915_private *dev_priv = request->i915;
1665 uint32_t cmd;
1666 int ret; 1454 int ret;
1667 1455
1668 ret = intel_ring_begin(request, 4); 1456 ret = intel_ring_begin(request, 4);
@@ -1678,29 +1466,28 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request,
1678 */ 1466 */
1679 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1467 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1680 1468
1681 if (invalidate_domains & I915_GEM_GPU_DOMAINS) { 1469 if (mode & EMIT_INVALIDATE) {
1682 cmd |= MI_INVALIDATE_TLB; 1470 cmd |= MI_INVALIDATE_TLB;
1683 if (engine == &dev_priv->engine[VCS]) 1471 if (request->engine->id == VCS)
1684 cmd |= MI_INVALIDATE_BSD; 1472 cmd |= MI_INVALIDATE_BSD;
1685 } 1473 }
1686 1474
1687 intel_logical_ring_emit(ringbuf, cmd); 1475 intel_ring_emit(ring, cmd);
1688 intel_logical_ring_emit(ringbuf, 1476 intel_ring_emit(ring,
1689 I915_GEM_HWS_SCRATCH_ADDR | 1477 I915_GEM_HWS_SCRATCH_ADDR |
1690 MI_FLUSH_DW_USE_GTT); 1478 MI_FLUSH_DW_USE_GTT);
1691 intel_logical_ring_emit(ringbuf, 0); /* upper addr */ 1479 intel_ring_emit(ring, 0); /* upper addr */
1692 intel_logical_ring_emit(ringbuf, 0); /* value */ 1480 intel_ring_emit(ring, 0); /* value */
1693 intel_logical_ring_advance(ringbuf); 1481 intel_ring_advance(ring);
1694 1482
1695 return 0; 1483 return 0;
1696} 1484}
1697 1485
1698static int gen8_emit_flush_render(struct drm_i915_gem_request *request, 1486static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1699 u32 invalidate_domains, 1487 u32 mode)
1700 u32 flush_domains)
1701{ 1488{
1702 struct intel_ringbuffer *ringbuf = request->ringbuf; 1489 struct intel_ring *ring = request->ring;
1703 struct intel_engine_cs *engine = ringbuf->engine; 1490 struct intel_engine_cs *engine = request->engine;
1704 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 1491 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1705 bool vf_flush_wa = false, dc_flush_wa = false; 1492 bool vf_flush_wa = false, dc_flush_wa = false;
1706 u32 flags = 0; 1493 u32 flags = 0;
@@ -1709,14 +1496,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1709 1496
1710 flags |= PIPE_CONTROL_CS_STALL; 1497 flags |= PIPE_CONTROL_CS_STALL;
1711 1498
1712 if (flush_domains) { 1499 if (mode & EMIT_FLUSH) {
1713 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 1500 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1714 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 1501 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
1715 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 1502 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
1716 flags |= PIPE_CONTROL_FLUSH_ENABLE; 1503 flags |= PIPE_CONTROL_FLUSH_ENABLE;
1717 } 1504 }
1718 1505
1719 if (invalidate_domains) { 1506 if (mode & EMIT_INVALIDATE) {
1720 flags |= PIPE_CONTROL_TLB_INVALIDATE; 1507 flags |= PIPE_CONTROL_TLB_INVALIDATE;
1721 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 1508 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1722 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 1509 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -1751,40 +1538,40 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1751 return ret; 1538 return ret;
1752 1539
1753 if (vf_flush_wa) { 1540 if (vf_flush_wa) {
1754 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); 1541 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1755 intel_logical_ring_emit(ringbuf, 0); 1542 intel_ring_emit(ring, 0);
1756 intel_logical_ring_emit(ringbuf, 0); 1543 intel_ring_emit(ring, 0);
1757 intel_logical_ring_emit(ringbuf, 0); 1544 intel_ring_emit(ring, 0);
1758 intel_logical_ring_emit(ringbuf, 0); 1545 intel_ring_emit(ring, 0);
1759 intel_logical_ring_emit(ringbuf, 0); 1546 intel_ring_emit(ring, 0);
1760 } 1547 }
1761 1548
1762 if (dc_flush_wa) { 1549 if (dc_flush_wa) {
1763 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); 1550 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1764 intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE); 1551 intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE);
1765 intel_logical_ring_emit(ringbuf, 0); 1552 intel_ring_emit(ring, 0);
1766 intel_logical_ring_emit(ringbuf, 0); 1553 intel_ring_emit(ring, 0);
1767 intel_logical_ring_emit(ringbuf, 0); 1554 intel_ring_emit(ring, 0);
1768 intel_logical_ring_emit(ringbuf, 0); 1555 intel_ring_emit(ring, 0);
1769 } 1556 }
1770 1557
1771 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); 1558 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1772 intel_logical_ring_emit(ringbuf, flags); 1559 intel_ring_emit(ring, flags);
1773 intel_logical_ring_emit(ringbuf, scratch_addr); 1560 intel_ring_emit(ring, scratch_addr);
1774 intel_logical_ring_emit(ringbuf, 0); 1561 intel_ring_emit(ring, 0);
1775 intel_logical_ring_emit(ringbuf, 0); 1562 intel_ring_emit(ring, 0);
1776 intel_logical_ring_emit(ringbuf, 0); 1563 intel_ring_emit(ring, 0);
1777 1564
1778 if (dc_flush_wa) { 1565 if (dc_flush_wa) {
1779 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); 1566 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1780 intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL); 1567 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL);
1781 intel_logical_ring_emit(ringbuf, 0); 1568 intel_ring_emit(ring, 0);
1782 intel_logical_ring_emit(ringbuf, 0); 1569 intel_ring_emit(ring, 0);
1783 intel_logical_ring_emit(ringbuf, 0); 1570 intel_ring_emit(ring, 0);
1784 intel_logical_ring_emit(ringbuf, 0); 1571 intel_ring_emit(ring, 0);
1785 } 1572 }
1786 1573
1787 intel_logical_ring_advance(ringbuf); 1574 intel_ring_advance(ring);
1788 1575
1789 return 0; 1576 return 0;
1790} 1577}
@@ -1813,7 +1600,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
1813 1600
1814static int gen8_emit_request(struct drm_i915_gem_request *request) 1601static int gen8_emit_request(struct drm_i915_gem_request *request)
1815{ 1602{
1816 struct intel_ringbuffer *ringbuf = request->ringbuf; 1603 struct intel_ring *ring = request->ring;
1817 int ret; 1604 int ret;
1818 1605
1819 ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); 1606 ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS);
@@ -1823,21 +1610,20 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
1823 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ 1610 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
1824 BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); 1611 BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
1825 1612
1826 intel_logical_ring_emit(ringbuf, 1613 intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
1827 (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); 1614 intel_ring_emit(ring,
1828 intel_logical_ring_emit(ringbuf, 1615 intel_hws_seqno_address(request->engine) |
1829 intel_hws_seqno_address(request->engine) | 1616 MI_FLUSH_DW_USE_GTT);
1830 MI_FLUSH_DW_USE_GTT); 1617 intel_ring_emit(ring, 0);
1831 intel_logical_ring_emit(ringbuf, 0); 1618 intel_ring_emit(ring, request->fence.seqno);
1832 intel_logical_ring_emit(ringbuf, request->seqno); 1619 intel_ring_emit(ring, MI_USER_INTERRUPT);
1833 intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); 1620 intel_ring_emit(ring, MI_NOOP);
1834 intel_logical_ring_emit(ringbuf, MI_NOOP); 1621 return intel_logical_ring_advance(request);
1835 return intel_logical_ring_advance_and_submit(request);
1836} 1622}
1837 1623
1838static int gen8_emit_request_render(struct drm_i915_gem_request *request) 1624static int gen8_emit_request_render(struct drm_i915_gem_request *request)
1839{ 1625{
1840 struct intel_ringbuffer *ringbuf = request->ringbuf; 1626 struct intel_ring *ring = request->ring;
1841 int ret; 1627 int ret;
1842 1628
1843 ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); 1629 ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS);
@@ -1851,50 +1637,19 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
1851 * need a prior CS_STALL, which is emitted by the flush 1637 * need a prior CS_STALL, which is emitted by the flush
1852 * following the batch. 1638 * following the batch.
1853 */ 1639 */
1854 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); 1640 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1855 intel_logical_ring_emit(ringbuf, 1641 intel_ring_emit(ring,
1856 (PIPE_CONTROL_GLOBAL_GTT_IVB | 1642 (PIPE_CONTROL_GLOBAL_GTT_IVB |
1857 PIPE_CONTROL_CS_STALL | 1643 PIPE_CONTROL_CS_STALL |
1858 PIPE_CONTROL_QW_WRITE)); 1644 PIPE_CONTROL_QW_WRITE));
1859 intel_logical_ring_emit(ringbuf, 1645 intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
1860 intel_hws_seqno_address(request->engine)); 1646 intel_ring_emit(ring, 0);
1861 intel_logical_ring_emit(ringbuf, 0); 1647 intel_ring_emit(ring, i915_gem_request_get_seqno(request));
1862 intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
1863 /* We're thrashing one dword of HWS. */ 1648 /* We're thrashing one dword of HWS. */
1864 intel_logical_ring_emit(ringbuf, 0); 1649 intel_ring_emit(ring, 0);
1865 intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); 1650 intel_ring_emit(ring, MI_USER_INTERRUPT);
1866 intel_logical_ring_emit(ringbuf, MI_NOOP); 1651 intel_ring_emit(ring, MI_NOOP);
1867 return intel_logical_ring_advance_and_submit(request); 1652 return intel_logical_ring_advance(request);
1868}
1869
1870static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
1871{
1872 struct render_state so;
1873 int ret;
1874
1875 ret = i915_gem_render_state_prepare(req->engine, &so);
1876 if (ret)
1877 return ret;
1878
1879 if (so.rodata == NULL)
1880 return 0;
1881
1882 ret = req->engine->emit_bb_start(req, so.ggtt_offset,
1883 I915_DISPATCH_SECURE);
1884 if (ret)
1885 goto out;
1886
1887 ret = req->engine->emit_bb_start(req,
1888 (so.ggtt_offset + so.aux_batch_offset),
1889 I915_DISPATCH_SECURE);
1890 if (ret)
1891 goto out;
1892
1893 i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
1894
1895out:
1896 i915_gem_render_state_fini(&so);
1897 return ret;
1898} 1653}
1899 1654
1900static int gen8_init_rcs_context(struct drm_i915_gem_request *req) 1655static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
@@ -1913,14 +1668,12 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
1913 if (ret) 1668 if (ret)
1914 DRM_ERROR("MOCS failed to program: expect performance issues.\n"); 1669 DRM_ERROR("MOCS failed to program: expect performance issues.\n");
1915 1670
1916 return intel_lr_context_render_state_init(req); 1671 return i915_gem_render_state_init(req);
1917} 1672}
1918 1673
1919/** 1674/**
1920 * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer 1675 * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
1921 *
1922 * @engine: Engine Command Streamer. 1676 * @engine: Engine Command Streamer.
1923 *
1924 */ 1677 */
1925void intel_logical_ring_cleanup(struct intel_engine_cs *engine) 1678void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
1926{ 1679{
@@ -1939,17 +1692,13 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
1939 dev_priv = engine->i915; 1692 dev_priv = engine->i915;
1940 1693
1941 if (engine->buffer) { 1694 if (engine->buffer) {
1942 intel_logical_ring_stop(engine);
1943 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); 1695 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
1944 } 1696 }
1945 1697
1946 if (engine->cleanup) 1698 if (engine->cleanup)
1947 engine->cleanup(engine); 1699 engine->cleanup(engine);
1948 1700
1949 i915_cmd_parser_fini_ring(engine); 1701 intel_engine_cleanup_common(engine);
1950 i915_gem_batch_pool_fini(&engine->batch_pool);
1951
1952 intel_engine_fini_breadcrumbs(engine);
1953 1702
1954 if (engine->status_page.obj) { 1703 if (engine->status_page.obj) {
1955 i915_gem_object_unpin_map(engine->status_page.obj); 1704 i915_gem_object_unpin_map(engine->status_page.obj);
@@ -1965,13 +1714,23 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
1965 engine->i915 = NULL; 1714 engine->i915 = NULL;
1966} 1715}
1967 1716
1717void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
1718{
1719 struct intel_engine_cs *engine;
1720
1721 for_each_engine(engine, dev_priv)
1722 engine->submit_request = execlists_submit_request;
1723}
1724
1968static void 1725static void
1969logical_ring_default_vfuncs(struct intel_engine_cs *engine) 1726logical_ring_default_vfuncs(struct intel_engine_cs *engine)
1970{ 1727{
1971 /* Default vfuncs which can be overriden by each engine. */ 1728 /* Default vfuncs which can be overriden by each engine. */
1972 engine->init_hw = gen8_init_common_ring; 1729 engine->init_hw = gen8_init_common_ring;
1973 engine->emit_request = gen8_emit_request;
1974 engine->emit_flush = gen8_emit_flush; 1730 engine->emit_flush = gen8_emit_flush;
1731 engine->emit_request = gen8_emit_request;
1732 engine->submit_request = execlists_submit_request;
1733
1975 engine->irq_enable = gen8_logical_ring_enable_irq; 1734 engine->irq_enable = gen8_logical_ring_enable_irq;
1976 engine->irq_disable = gen8_logical_ring_disable_irq; 1735 engine->irq_disable = gen8_logical_ring_disable_irq;
1977 engine->emit_bb_start = gen8_emit_bb_start; 1736 engine->emit_bb_start = gen8_emit_bb_start;
@@ -1980,8 +1739,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
1980} 1739}
1981 1740
1982static inline void 1741static inline void
1983logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift) 1742logical_ring_default_irqs(struct intel_engine_cs *engine)
1984{ 1743{
1744 unsigned shift = engine->irq_shift;
1985 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; 1745 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
1986 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; 1746 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
1987} 1747}
@@ -2004,17 +1764,46 @@ lrc_setup_hws(struct intel_engine_cs *engine,
2004 return 0; 1764 return 0;
2005} 1765}
2006 1766
1767static void
1768logical_ring_setup(struct intel_engine_cs *engine)
1769{
1770 struct drm_i915_private *dev_priv = engine->i915;
1771 enum forcewake_domains fw_domains;
1772
1773 intel_engine_setup_common(engine);
1774
1775 /* Intentionally left blank. */
1776 engine->buffer = NULL;
1777
1778 fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
1779 RING_ELSP(engine),
1780 FW_REG_WRITE);
1781
1782 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
1783 RING_CONTEXT_STATUS_PTR(engine),
1784 FW_REG_READ | FW_REG_WRITE);
1785
1786 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
1787 RING_CONTEXT_STATUS_BUF_BASE(engine),
1788 FW_REG_READ);
1789
1790 engine->fw_domains = fw_domains;
1791
1792 tasklet_init(&engine->irq_tasklet,
1793 intel_lrc_irq_handler, (unsigned long)engine);
1794
1795 logical_ring_init_platform_invariants(engine);
1796 logical_ring_default_vfuncs(engine);
1797 logical_ring_default_irqs(engine);
1798}
1799
2007static int 1800static int
2008logical_ring_init(struct intel_engine_cs *engine) 1801logical_ring_init(struct intel_engine_cs *engine)
2009{ 1802{
2010 struct i915_gem_context *dctx = engine->i915->kernel_context; 1803 struct i915_gem_context *dctx = engine->i915->kernel_context;
2011 int ret; 1804 int ret;
2012 1805
2013 ret = intel_engine_init_breadcrumbs(engine); 1806 ret = intel_engine_init_common(engine);
2014 if (ret)
2015 goto error;
2016
2017 ret = i915_cmd_parser_init_ring(engine);
2018 if (ret) 1807 if (ret)
2019 goto error; 1808 goto error;
2020 1809
@@ -2044,11 +1833,13 @@ error:
2044 return ret; 1833 return ret;
2045} 1834}
2046 1835
2047static int logical_render_ring_init(struct intel_engine_cs *engine) 1836int logical_render_ring_init(struct intel_engine_cs *engine)
2048{ 1837{
2049 struct drm_i915_private *dev_priv = engine->i915; 1838 struct drm_i915_private *dev_priv = engine->i915;
2050 int ret; 1839 int ret;
2051 1840
1841 logical_ring_setup(engine);
1842
2052 if (HAS_L3_DPF(dev_priv)) 1843 if (HAS_L3_DPF(dev_priv))
2053 engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 1844 engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
2054 1845
@@ -2085,160 +1876,11 @@ static int logical_render_ring_init(struct intel_engine_cs *engine)
2085 return ret; 1876 return ret;
2086} 1877}
2087 1878
2088static const struct logical_ring_info { 1879int logical_xcs_ring_init(struct intel_engine_cs *engine)
2089 const char *name;
2090 unsigned exec_id;
2091 unsigned guc_id;
2092 u32 mmio_base;
2093 unsigned irq_shift;
2094 int (*init)(struct intel_engine_cs *engine);
2095} logical_rings[] = {
2096 [RCS] = {
2097 .name = "render ring",
2098 .exec_id = I915_EXEC_RENDER,
2099 .guc_id = GUC_RENDER_ENGINE,
2100 .mmio_base = RENDER_RING_BASE,
2101 .irq_shift = GEN8_RCS_IRQ_SHIFT,
2102 .init = logical_render_ring_init,
2103 },
2104 [BCS] = {
2105 .name = "blitter ring",
2106 .exec_id = I915_EXEC_BLT,
2107 .guc_id = GUC_BLITTER_ENGINE,
2108 .mmio_base = BLT_RING_BASE,
2109 .irq_shift = GEN8_BCS_IRQ_SHIFT,
2110 .init = logical_ring_init,
2111 },
2112 [VCS] = {
2113 .name = "bsd ring",
2114 .exec_id = I915_EXEC_BSD,
2115 .guc_id = GUC_VIDEO_ENGINE,
2116 .mmio_base = GEN6_BSD_RING_BASE,
2117 .irq_shift = GEN8_VCS1_IRQ_SHIFT,
2118 .init = logical_ring_init,
2119 },
2120 [VCS2] = {
2121 .name = "bsd2 ring",
2122 .exec_id = I915_EXEC_BSD,
2123 .guc_id = GUC_VIDEO_ENGINE2,
2124 .mmio_base = GEN8_BSD2_RING_BASE,
2125 .irq_shift = GEN8_VCS2_IRQ_SHIFT,
2126 .init = logical_ring_init,
2127 },
2128 [VECS] = {
2129 .name = "video enhancement ring",
2130 .exec_id = I915_EXEC_VEBOX,
2131 .guc_id = GUC_VIDEOENHANCE_ENGINE,
2132 .mmio_base = VEBOX_RING_BASE,
2133 .irq_shift = GEN8_VECS_IRQ_SHIFT,
2134 .init = logical_ring_init,
2135 },
2136};
2137
2138static struct intel_engine_cs *
2139logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id)
2140{ 1880{
2141 const struct logical_ring_info *info = &logical_rings[id]; 1881 logical_ring_setup(engine);
2142 struct intel_engine_cs *engine = &dev_priv->engine[id];
2143 enum forcewake_domains fw_domains;
2144 1882
2145 engine->id = id; 1883 return logical_ring_init(engine);
2146 engine->name = info->name;
2147 engine->exec_id = info->exec_id;
2148 engine->guc_id = info->guc_id;
2149 engine->mmio_base = info->mmio_base;
2150
2151 engine->i915 = dev_priv;
2152
2153 /* Intentionally left blank. */
2154 engine->buffer = NULL;
2155
2156 fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
2157 RING_ELSP(engine),
2158 FW_REG_WRITE);
2159
2160 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
2161 RING_CONTEXT_STATUS_PTR(engine),
2162 FW_REG_READ | FW_REG_WRITE);
2163
2164 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
2165 RING_CONTEXT_STATUS_BUF_BASE(engine),
2166 FW_REG_READ);
2167
2168 engine->fw_domains = fw_domains;
2169
2170 INIT_LIST_HEAD(&engine->active_list);
2171 INIT_LIST_HEAD(&engine->request_list);
2172 INIT_LIST_HEAD(&engine->buffers);
2173 INIT_LIST_HEAD(&engine->execlist_queue);
2174 spin_lock_init(&engine->execlist_lock);
2175
2176 tasklet_init(&engine->irq_tasklet,
2177 intel_lrc_irq_handler, (unsigned long)engine);
2178
2179 logical_ring_init_platform_invariants(engine);
2180 logical_ring_default_vfuncs(engine);
2181 logical_ring_default_irqs(engine, info->irq_shift);
2182
2183 intel_engine_init_hangcheck(engine);
2184 i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool);
2185
2186 return engine;
2187}
2188
2189/**
2190 * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
2191 * @dev: DRM device.
2192 *
2193 * This function inits the engines for an Execlists submission style (the
2194 * equivalent in the legacy ringbuffer submission world would be
2195 * i915_gem_init_engines). It does it only for those engines that are present in
2196 * the hardware.
2197 *
2198 * Return: non-zero if the initialization failed.
2199 */
2200int intel_logical_rings_init(struct drm_device *dev)
2201{
2202 struct drm_i915_private *dev_priv = to_i915(dev);
2203 unsigned int mask = 0;
2204 unsigned int i;
2205 int ret;
2206
2207 WARN_ON(INTEL_INFO(dev_priv)->ring_mask &
2208 GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES));
2209
2210 for (i = 0; i < ARRAY_SIZE(logical_rings); i++) {
2211 if (!HAS_ENGINE(dev_priv, i))
2212 continue;
2213
2214 if (!logical_rings[i].init)
2215 continue;
2216
2217 ret = logical_rings[i].init(logical_ring_setup(dev_priv, i));
2218 if (ret)
2219 goto cleanup;
2220
2221 mask |= ENGINE_MASK(i);
2222 }
2223
2224 /*
2225 * Catch failures to update logical_rings table when the new engines
2226 * are added to the driver by a warning and disabling the forgotten
2227 * engines.
2228 */
2229 if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) {
2230 struct intel_device_info *info =
2231 (struct intel_device_info *)&dev_priv->info;
2232 info->ring_mask = mask;
2233 }
2234
2235 return 0;
2236
2237cleanup:
2238 for (i = 0; i < I915_NUM_ENGINES; i++)
2239 intel_logical_ring_cleanup(&dev_priv->engine[i]);
2240
2241 return ret;
2242} 1884}
2243 1885
2244static u32 1886static u32
@@ -2309,7 +1951,7 @@ static int
2309populate_lr_context(struct i915_gem_context *ctx, 1951populate_lr_context(struct i915_gem_context *ctx,
2310 struct drm_i915_gem_object *ctx_obj, 1952 struct drm_i915_gem_object *ctx_obj,
2311 struct intel_engine_cs *engine, 1953 struct intel_engine_cs *engine,
2312 struct intel_ringbuffer *ringbuf) 1954 struct intel_ring *ring)
2313{ 1955{
2314 struct drm_i915_private *dev_priv = ctx->i915; 1956 struct drm_i915_private *dev_priv = ctx->i915;
2315 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; 1957 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
@@ -2362,7 +2004,7 @@ populate_lr_context(struct i915_gem_context *ctx,
2362 RING_START(engine->mmio_base), 0); 2004 RING_START(engine->mmio_base), 0);
2363 ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, 2005 ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
2364 RING_CTL(engine->mmio_base), 2006 RING_CTL(engine->mmio_base),
2365 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); 2007 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
2366 ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, 2008 ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U,
2367 RING_BBADDR_UDW(engine->mmio_base), 0); 2009 RING_BBADDR_UDW(engine->mmio_base), 0);
2368 ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, 2010 ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L,
@@ -2484,26 +2126,13 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
2484 return ret; 2126 return ret;
2485} 2127}
2486 2128
2487/**
2488 * execlists_context_deferred_alloc() - create the LRC specific bits of a context
2489 * @ctx: LR context to create.
2490 * @engine: engine to be used with the context.
2491 *
2492 * This function can be called more than once, with different engines, if we plan
2493 * to use the context with them. The context backing objects and the ringbuffers
2494 * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
2495 * the creation is a deferred call: it's better to make sure first that we need to use
2496 * a given ring with the context.
2497 *
2498 * Return: non-zero on error.
2499 */
2500static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, 2129static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
2501 struct intel_engine_cs *engine) 2130 struct intel_engine_cs *engine)
2502{ 2131{
2503 struct drm_i915_gem_object *ctx_obj; 2132 struct drm_i915_gem_object *ctx_obj;
2504 struct intel_context *ce = &ctx->engine[engine->id]; 2133 struct intel_context *ce = &ctx->engine[engine->id];
2505 uint32_t context_size; 2134 uint32_t context_size;
2506 struct intel_ringbuffer *ringbuf; 2135 struct intel_ring *ring;
2507 int ret; 2136 int ret;
2508 2137
2509 WARN_ON(ce->state); 2138 WARN_ON(ce->state);
@@ -2519,29 +2148,29 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
2519 return PTR_ERR(ctx_obj); 2148 return PTR_ERR(ctx_obj);
2520 } 2149 }
2521 2150
2522 ringbuf = intel_engine_create_ringbuffer(engine, ctx->ring_size); 2151 ring = intel_engine_create_ring(engine, ctx->ring_size);
2523 if (IS_ERR(ringbuf)) { 2152 if (IS_ERR(ring)) {
2524 ret = PTR_ERR(ringbuf); 2153 ret = PTR_ERR(ring);
2525 goto error_deref_obj; 2154 goto error_deref_obj;
2526 } 2155 }
2527 2156
2528 ret = populate_lr_context(ctx, ctx_obj, engine, ringbuf); 2157 ret = populate_lr_context(ctx, ctx_obj, engine, ring);
2529 if (ret) { 2158 if (ret) {
2530 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); 2159 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
2531 goto error_ringbuf; 2160 goto error_ring_free;
2532 } 2161 }
2533 2162
2534 ce->ringbuf = ringbuf; 2163 ce->ring = ring;
2535 ce->state = ctx_obj; 2164 ce->state = ctx_obj;
2536 ce->initialised = engine->init_context == NULL; 2165 ce->initialised = engine->init_context == NULL;
2537 2166
2538 return 0; 2167 return 0;
2539 2168
2540error_ringbuf: 2169error_ring_free:
2541 intel_ringbuffer_free(ringbuf); 2170 intel_ring_free(ring);
2542error_deref_obj: 2171error_deref_obj:
2543 drm_gem_object_unreference(&ctx_obj->base); 2172 i915_gem_object_put(ctx_obj);
2544 ce->ringbuf = NULL; 2173 ce->ring = NULL;
2545 ce->state = NULL; 2174 ce->state = NULL;
2546 return ret; 2175 return ret;
2547} 2176}
@@ -2572,7 +2201,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
2572 2201
2573 i915_gem_object_unpin_map(ctx_obj); 2202 i915_gem_object_unpin_map(ctx_obj);
2574 2203
2575 ce->ringbuf->head = 0; 2204 ce->ring->head = 0;
2576 ce->ringbuf->tail = 0; 2205 ce->ring->tail = 0;
2577 } 2206 }
2578} 2207}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 2b8255c19dcc..a52cf57dbd40 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -29,17 +29,17 @@
29#define GEN8_LR_CONTEXT_ALIGN 4096 29#define GEN8_LR_CONTEXT_ALIGN 4096
30 30
31/* Execlists regs */ 31/* Execlists regs */
32#define RING_ELSP(ring) _MMIO((ring)->mmio_base + 0x230) 32#define RING_ELSP(engine) _MMIO((engine)->mmio_base + 0x230)
33#define RING_EXECLIST_STATUS_LO(ring) _MMIO((ring)->mmio_base + 0x234) 33#define RING_EXECLIST_STATUS_LO(engine) _MMIO((engine)->mmio_base + 0x234)
34#define RING_EXECLIST_STATUS_HI(ring) _MMIO((ring)->mmio_base + 0x234 + 4) 34#define RING_EXECLIST_STATUS_HI(engine) _MMIO((engine)->mmio_base + 0x234 + 4)
35#define RING_CONTEXT_CONTROL(ring) _MMIO((ring)->mmio_base + 0x244) 35#define RING_CONTEXT_CONTROL(engine) _MMIO((engine)->mmio_base + 0x244)
36#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) 36#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3)
37#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) 37#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
38#define CTX_CTRL_RS_CTX_ENABLE (1 << 1) 38#define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
39#define RING_CONTEXT_STATUS_BUF_BASE(ring) _MMIO((ring)->mmio_base + 0x370) 39#define RING_CONTEXT_STATUS_BUF_BASE(engine) _MMIO((engine)->mmio_base + 0x370)
40#define RING_CONTEXT_STATUS_BUF_LO(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8) 40#define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8)
41#define RING_CONTEXT_STATUS_BUF_HI(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4) 41#define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4)
42#define RING_CONTEXT_STATUS_PTR(ring) _MMIO((ring)->mmio_base + 0x3a0) 42#define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0)
43 43
44/* The docs specify that the write pointer wraps around after 5h, "After status 44/* The docs specify that the write pointer wraps around after 5h, "After status
45 * is written out to the last available status QW at offset 5h, this pointer 45 * is written out to the last available status QW at offset 5h, this pointer
@@ -67,35 +67,10 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
67int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); 67int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request);
68void intel_logical_ring_stop(struct intel_engine_cs *engine); 68void intel_logical_ring_stop(struct intel_engine_cs *engine);
69void intel_logical_ring_cleanup(struct intel_engine_cs *engine); 69void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
70int intel_logical_rings_init(struct drm_device *dev); 70int logical_render_ring_init(struct intel_engine_cs *engine);
71int logical_xcs_ring_init(struct intel_engine_cs *engine);
71 72
72int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); 73int intel_engines_init(struct drm_device *dev);
73/**
74 * intel_logical_ring_advance() - advance the ringbuffer tail
75 * @ringbuf: Ringbuffer to advance.
76 *
77 * The tail is only updated in our logical ringbuffer struct.
78 */
79static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
80{
81 ringbuf->tail &= ringbuf->size - 1;
82}
83/**
84 * intel_logical_ring_emit() - write a DWORD to the ringbuffer.
85 * @ringbuf: Ringbuffer to write to.
86 * @data: DWORD to write.
87 */
88static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
89 u32 data)
90{
91 iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
92 ringbuf->tail += 4;
93}
94static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
95 i915_reg_t reg)
96{
97 intel_logical_ring_emit(ringbuf, i915_mmio_reg_offset(reg));
98}
99 74
100/* Logical Ring Contexts */ 75/* Logical Ring Contexts */
101 76
@@ -120,10 +95,7 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
120/* Execlists */ 95/* Execlists */
121int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, 96int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
122 int enable_execlists); 97 int enable_execlists);
123struct i915_execbuffer_params; 98void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
124int intel_execlists_submission(struct i915_execbuffer_params *params,
125 struct drm_i915_gem_execbuffer2 *args,
126 struct list_head *vmas);
127 99
128void intel_execlists_cancel_requests(struct intel_engine_cs *engine); 100void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
129 101
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 927825f5b284..80bb9247ce66 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
97 * end. 97 * end.
98 */ 98 */
99static const struct drm_i915_mocs_entry skylake_mocs_table[] = { 99static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
100 { /* 0x00000009 */ 100 [I915_MOCS_UNCACHED] = {
101 /* 0x00000009 */
101 .control_value = LE_CACHEABILITY(LE_UC) | 102 .control_value = LE_CACHEABILITY(LE_UC) |
102 LE_TGT_CACHE(LE_TC_LLC_ELLC) | 103 LE_TGT_CACHE(LE_TC_LLC_ELLC) |
103 LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | 104 LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
@@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
106 /* 0x0010 */ 107 /* 0x0010 */
107 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), 108 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
108 }, 109 },
109 { 110 [I915_MOCS_PTE] = {
110 /* 0x00000038 */ 111 /* 0x00000038 */
111 .control_value = LE_CACHEABILITY(LE_PAGETABLE) | 112 .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
112 LE_TGT_CACHE(LE_TC_LLC_ELLC) | 113 LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
115 /* 0x0030 */ 116 /* 0x0030 */
116 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), 117 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
117 }, 118 },
118 { 119 [I915_MOCS_CACHED] = {
119 /* 0x0000003b */ 120 /* 0x0000003b */
120 .control_value = LE_CACHEABILITY(LE_WB) | 121 .control_value = LE_CACHEABILITY(LE_WB) |
121 LE_TGT_CACHE(LE_TC_LLC_ELLC) | 122 LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
128 129
129/* NOTE: the LE_TGT_CACHE is not used on Broxton */ 130/* NOTE: the LE_TGT_CACHE is not used on Broxton */
130static const struct drm_i915_mocs_entry broxton_mocs_table[] = { 131static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
131 { 132 [I915_MOCS_UNCACHED] = {
132 /* 0x00000009 */ 133 /* 0x00000009 */
133 .control_value = LE_CACHEABILITY(LE_UC) | 134 .control_value = LE_CACHEABILITY(LE_UC) |
134 LE_TGT_CACHE(LE_TC_LLC_ELLC) | 135 LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
138 /* 0x0010 */ 139 /* 0x0010 */
139 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), 140 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
140 }, 141 },
141 { 142 [I915_MOCS_PTE] = {
142 /* 0x00000038 */ 143 /* 0x00000038 */
143 .control_value = LE_CACHEABILITY(LE_PAGETABLE) | 144 .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
144 LE_TGT_CACHE(LE_TC_LLC_ELLC) | 145 LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
148 /* 0x0030 */ 149 /* 0x0030 */
149 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), 150 .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
150 }, 151 },
151 { 152 [I915_MOCS_CACHED] = {
152 /* 0x00000039 */ 153 /* 0x00000039 */
153 .control_value = LE_CACHEABILITY(LE_UC) | 154 .control_value = LE_CACHEABILITY(LE_UC) |
154 LE_TGT_CACHE(LE_TC_LLC_ELLC) | 155 LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -203,9 +204,9 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
203 return result; 204 return result;
204} 205}
205 206
206static i915_reg_t mocs_register(enum intel_engine_id ring, int index) 207static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
207{ 208{
208 switch (ring) { 209 switch (engine_id) {
209 case RCS: 210 case RCS:
210 return GEN9_GFX_MOCS(index); 211 return GEN9_GFX_MOCS(index);
211 case VCS: 212 case VCS:
@@ -217,7 +218,7 @@ static i915_reg_t mocs_register(enum intel_engine_id ring, int index)
217 case VCS2: 218 case VCS2:
218 return GEN9_MFX1_MOCS(index); 219 return GEN9_MFX1_MOCS(index);
219 default: 220 default:
220 MISSING_CASE(ring); 221 MISSING_CASE(engine_id);
221 return INVALID_MMIO_REG; 222 return INVALID_MMIO_REG;
222 } 223 }
223} 224}
@@ -275,7 +276,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine)
275static int emit_mocs_control_table(struct drm_i915_gem_request *req, 276static int emit_mocs_control_table(struct drm_i915_gem_request *req,
276 const struct drm_i915_mocs_table *table) 277 const struct drm_i915_mocs_table *table)
277{ 278{
278 struct intel_ringbuffer *ringbuf = req->ringbuf; 279 struct intel_ring *ring = req->ring;
279 enum intel_engine_id engine = req->engine->id; 280 enum intel_engine_id engine = req->engine->id;
280 unsigned int index; 281 unsigned int index;
281 int ret; 282 int ret;
@@ -287,14 +288,11 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
287 if (ret) 288 if (ret)
288 return ret; 289 return ret;
289 290
290 intel_logical_ring_emit(ringbuf, 291 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES));
291 MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES));
292 292
293 for (index = 0; index < table->size; index++) { 293 for (index = 0; index < table->size; index++) {
294 intel_logical_ring_emit_reg(ringbuf, 294 intel_ring_emit_reg(ring, mocs_register(engine, index));
295 mocs_register(engine, index)); 295 intel_ring_emit(ring, table->table[index].control_value);
296 intel_logical_ring_emit(ringbuf,
297 table->table[index].control_value);
298 } 296 }
299 297
300 /* 298 /*
@@ -306,14 +304,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
306 * that value to all the used entries. 304 * that value to all the used entries.
307 */ 305 */
308 for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { 306 for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
309 intel_logical_ring_emit_reg(ringbuf, 307 intel_ring_emit_reg(ring, mocs_register(engine, index));
310 mocs_register(engine, index)); 308 intel_ring_emit(ring, table->table[0].control_value);
311 intel_logical_ring_emit(ringbuf,
312 table->table[0].control_value);
313 } 309 }
314 310
315 intel_logical_ring_emit(ringbuf, MI_NOOP); 311 intel_ring_emit(ring, MI_NOOP);
316 intel_logical_ring_advance(ringbuf); 312 intel_ring_advance(ring);
317 313
318 return 0; 314 return 0;
319} 315}
@@ -340,7 +336,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
340static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, 336static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
341 const struct drm_i915_mocs_table *table) 337 const struct drm_i915_mocs_table *table)
342{ 338{
343 struct intel_ringbuffer *ringbuf = req->ringbuf; 339 struct intel_ring *ring = req->ring;
344 unsigned int i; 340 unsigned int i;
345 int ret; 341 int ret;
346 342
@@ -351,19 +347,18 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
351 if (ret) 347 if (ret)
352 return ret; 348 return ret;
353 349
354 intel_logical_ring_emit(ringbuf, 350 intel_ring_emit(ring,
355 MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); 351 MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2));
356 352
357 for (i = 0; i < table->size/2; i++) { 353 for (i = 0; i < table->size/2; i++) {
358 intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); 354 intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i));
359 intel_logical_ring_emit(ringbuf, 355 intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1));
360 l3cc_combine(table, 2*i, 2*i+1));
361 } 356 }
362 357
363 if (table->size & 0x01) { 358 if (table->size & 0x01) {
364 /* Odd table size - 1 left over */ 359 /* Odd table size - 1 left over */
365 intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); 360 intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i));
366 intel_logical_ring_emit(ringbuf, l3cc_combine(table, 2*i, 0)); 361 intel_ring_emit(ring, l3cc_combine(table, 2*i, 0));
367 i++; 362 i++;
368 } 363 }
369 364
@@ -373,12 +368,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
373 * they are reserved by the hardware. 368 * they are reserved by the hardware.
374 */ 369 */
375 for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { 370 for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
376 intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); 371 intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i));
377 intel_logical_ring_emit(ringbuf, l3cc_combine(table, 0, 0)); 372 intel_ring_emit(ring, l3cc_combine(table, 0, 0));
378 } 373 }
379 374
380 intel_logical_ring_emit(ringbuf, MI_NOOP); 375 intel_ring_emit(ring, MI_NOOP);
381 intel_logical_ring_advance(ringbuf); 376 intel_ring_advance(ring);
382 377
383 return 0; 378 return 0;
384} 379}
diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h
index 4640299e04ec..a8bd9f7bfece 100644
--- a/drivers/gpu/drm/i915/intel_mocs.h
+++ b/drivers/gpu/drm/i915/intel_mocs.h
@@ -54,6 +54,6 @@
54 54
55int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); 55int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req);
56void intel_mocs_init_l3cc_table(struct drm_device *dev); 56void intel_mocs_init_l3cc_table(struct drm_device *dev);
57int intel_mocs_init_engine(struct intel_engine_cs *ring); 57int intel_mocs_init_engine(struct intel_engine_cs *engine);
58 58
59#endif 59#endif
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 3212d8806b5a..90f3ab424e01 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -30,6 +30,7 @@
30#include "i915_drv.h" 30#include "i915_drv.h"
31#include "i915_reg.h" 31#include "i915_reg.h"
32#include "intel_drv.h" 32#include "intel_drv.h"
33#include "intel_frontbuffer.h"
33 34
34/* Limits for overlay size. According to intel doc, the real limits are: 35/* Limits for overlay size. According to intel doc, the real limits are:
35 * Y width: 4095, UV width (planar): 2047, Y height: 2047, 36 * Y width: 4095, UV width (planar): 2047, Y height: 2047,
@@ -183,8 +184,7 @@ struct intel_overlay {
183 u32 flip_addr; 184 u32 flip_addr;
184 struct drm_i915_gem_object *reg_bo; 185 struct drm_i915_gem_object *reg_bo;
185 /* flip handling */ 186 /* flip handling */
186 struct drm_i915_gem_request *last_flip_req; 187 struct i915_gem_active last_flip;
187 void (*flip_tail)(struct intel_overlay *);
188}; 188};
189 189
190static struct overlay_registers __iomem * 190static struct overlay_registers __iomem *
@@ -210,37 +210,46 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
210 io_mapping_unmap(regs); 210 io_mapping_unmap(regs);
211} 211}
212 212
213static int intel_overlay_do_wait_request(struct intel_overlay *overlay, 213static void intel_overlay_submit_request(struct intel_overlay *overlay,
214 struct drm_i915_gem_request *req, 214 struct drm_i915_gem_request *req,
215 void (*tail)(struct intel_overlay *)) 215 i915_gem_retire_fn retire)
216{ 216{
217 int ret; 217 GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip,
218 218 &overlay->i915->drm.struct_mutex));
219 WARN_ON(overlay->last_flip_req); 219 overlay->last_flip.retire = retire;
220 i915_gem_request_assign(&overlay->last_flip_req, req); 220 i915_gem_active_set(&overlay->last_flip, req);
221 i915_add_request(req); 221 i915_add_request(req);
222}
222 223
223 overlay->flip_tail = tail; 224static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
224 ret = i915_wait_request(overlay->last_flip_req); 225 struct drm_i915_gem_request *req,
225 if (ret) 226 i915_gem_retire_fn retire)
226 return ret; 227{
228 intel_overlay_submit_request(overlay, req, retire);
229 return i915_gem_active_retire(&overlay->last_flip,
230 &overlay->i915->drm.struct_mutex);
231}
227 232
228 i915_gem_request_assign(&overlay->last_flip_req, NULL); 233static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay)
229 return 0; 234{
235 struct drm_i915_private *dev_priv = overlay->i915;
236 struct intel_engine_cs *engine = &dev_priv->engine[RCS];
237
238 return i915_gem_request_alloc(engine, dev_priv->kernel_context);
230} 239}
231 240
232/* overlay needs to be disable in OCMD reg */ 241/* overlay needs to be disable in OCMD reg */
233static int intel_overlay_on(struct intel_overlay *overlay) 242static int intel_overlay_on(struct intel_overlay *overlay)
234{ 243{
235 struct drm_i915_private *dev_priv = overlay->i915; 244 struct drm_i915_private *dev_priv = overlay->i915;
236 struct intel_engine_cs *engine = &dev_priv->engine[RCS];
237 struct drm_i915_gem_request *req; 245 struct drm_i915_gem_request *req;
246 struct intel_ring *ring;
238 int ret; 247 int ret;
239 248
240 WARN_ON(overlay->active); 249 WARN_ON(overlay->active);
241 WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); 250 WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
242 251
243 req = i915_gem_request_alloc(engine, NULL); 252 req = alloc_request(overlay);
244 if (IS_ERR(req)) 253 if (IS_ERR(req))
245 return PTR_ERR(req); 254 return PTR_ERR(req);
246 255
@@ -252,11 +261,12 @@ static int intel_overlay_on(struct intel_overlay *overlay)
252 261
253 overlay->active = true; 262 overlay->active = true;
254 263
255 intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON); 264 ring = req->ring;
256 intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE); 265 intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
257 intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); 266 intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
258 intel_ring_emit(engine, MI_NOOP); 267 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
259 intel_ring_advance(engine); 268 intel_ring_emit(ring, MI_NOOP);
269 intel_ring_advance(ring);
260 270
261 return intel_overlay_do_wait_request(overlay, req, NULL); 271 return intel_overlay_do_wait_request(overlay, req, NULL);
262} 272}
@@ -266,8 +276,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
266 bool load_polyphase_filter) 276 bool load_polyphase_filter)
267{ 277{
268 struct drm_i915_private *dev_priv = overlay->i915; 278 struct drm_i915_private *dev_priv = overlay->i915;
269 struct intel_engine_cs *engine = &dev_priv->engine[RCS];
270 struct drm_i915_gem_request *req; 279 struct drm_i915_gem_request *req;
280 struct intel_ring *ring;
271 u32 flip_addr = overlay->flip_addr; 281 u32 flip_addr = overlay->flip_addr;
272 u32 tmp; 282 u32 tmp;
273 int ret; 283 int ret;
@@ -282,7 +292,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
282 if (tmp & (1 << 17)) 292 if (tmp & (1 << 17))
283 DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); 293 DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
284 294
285 req = i915_gem_request_alloc(engine, NULL); 295 req = alloc_request(overlay);
286 if (IS_ERR(req)) 296 if (IS_ERR(req))
287 return PTR_ERR(req); 297 return PTR_ERR(req);
288 298
@@ -292,29 +302,37 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
292 return ret; 302 return ret;
293 } 303 }
294 304
295 intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); 305 ring = req->ring;
296 intel_ring_emit(engine, flip_addr); 306 intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
297 intel_ring_advance(engine); 307 intel_ring_emit(ring, flip_addr);
308 intel_ring_advance(ring);
298 309
299 WARN_ON(overlay->last_flip_req); 310 intel_overlay_submit_request(overlay, req, NULL);
300 i915_gem_request_assign(&overlay->last_flip_req, req);
301 i915_add_request(req);
302 311
303 return 0; 312 return 0;
304} 313}
305 314
306static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) 315static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
316 struct drm_i915_gem_request *req)
307{ 317{
318 struct intel_overlay *overlay =
319 container_of(active, typeof(*overlay), last_flip);
308 struct drm_i915_gem_object *obj = overlay->old_vid_bo; 320 struct drm_i915_gem_object *obj = overlay->old_vid_bo;
309 321
322 i915_gem_track_fb(obj, NULL,
323 INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
324
310 i915_gem_object_ggtt_unpin(obj); 325 i915_gem_object_ggtt_unpin(obj);
311 drm_gem_object_unreference(&obj->base); 326 i915_gem_object_put(obj);
312 327
313 overlay->old_vid_bo = NULL; 328 overlay->old_vid_bo = NULL;
314} 329}
315 330
316static void intel_overlay_off_tail(struct intel_overlay *overlay) 331static void intel_overlay_off_tail(struct i915_gem_active *active,
332 struct drm_i915_gem_request *req)
317{ 333{
334 struct intel_overlay *overlay =
335 container_of(active, typeof(*overlay), last_flip);
318 struct drm_i915_gem_object *obj = overlay->vid_bo; 336 struct drm_i915_gem_object *obj = overlay->vid_bo;
319 337
320 /* never have the overlay hw on without showing a frame */ 338 /* never have the overlay hw on without showing a frame */
@@ -322,7 +340,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
322 return; 340 return;
323 341
324 i915_gem_object_ggtt_unpin(obj); 342 i915_gem_object_ggtt_unpin(obj);
325 drm_gem_object_unreference(&obj->base); 343 i915_gem_object_put(obj);
326 overlay->vid_bo = NULL; 344 overlay->vid_bo = NULL;
327 345
328 overlay->crtc->overlay = NULL; 346 overlay->crtc->overlay = NULL;
@@ -334,8 +352,8 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
334static int intel_overlay_off(struct intel_overlay *overlay) 352static int intel_overlay_off(struct intel_overlay *overlay)
335{ 353{
336 struct drm_i915_private *dev_priv = overlay->i915; 354 struct drm_i915_private *dev_priv = overlay->i915;
337 struct intel_engine_cs *engine = &dev_priv->engine[RCS];
338 struct drm_i915_gem_request *req; 355 struct drm_i915_gem_request *req;
356 struct intel_ring *ring;
339 u32 flip_addr = overlay->flip_addr; 357 u32 flip_addr = overlay->flip_addr;
340 int ret; 358 int ret;
341 359
@@ -347,7 +365,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
347 * of the hw. Do it in both cases */ 365 * of the hw. Do it in both cases */
348 flip_addr |= OFC_UPDATE; 366 flip_addr |= OFC_UPDATE;
349 367
350 req = i915_gem_request_alloc(engine, NULL); 368 req = alloc_request(overlay);
351 if (IS_ERR(req)) 369 if (IS_ERR(req))
352 return PTR_ERR(req); 370 return PTR_ERR(req);
353 371
@@ -357,46 +375,36 @@ static int intel_overlay_off(struct intel_overlay *overlay)
357 return ret; 375 return ret;
358 } 376 }
359 377
378 ring = req->ring;
360 /* wait for overlay to go idle */ 379 /* wait for overlay to go idle */
361 intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); 380 intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
362 intel_ring_emit(engine, flip_addr); 381 intel_ring_emit(ring, flip_addr);
363 intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); 382 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
364 /* turn overlay off */ 383 /* turn overlay off */
365 if (IS_I830(dev_priv)) { 384 if (IS_I830(dev_priv)) {
366 /* Workaround: Don't disable the overlay fully, since otherwise 385 /* Workaround: Don't disable the overlay fully, since otherwise
367 * it dies on the next OVERLAY_ON cmd. */ 386 * it dies on the next OVERLAY_ON cmd. */
368 intel_ring_emit(engine, MI_NOOP); 387 intel_ring_emit(ring, MI_NOOP);
369 intel_ring_emit(engine, MI_NOOP); 388 intel_ring_emit(ring, MI_NOOP);
370 intel_ring_emit(engine, MI_NOOP); 389 intel_ring_emit(ring, MI_NOOP);
371 } else { 390 } else {
372 intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); 391 intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
373 intel_ring_emit(engine, flip_addr); 392 intel_ring_emit(ring, flip_addr);
374 intel_ring_emit(engine, 393 intel_ring_emit(ring,
375 MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); 394 MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
376 } 395 }
377 intel_ring_advance(engine); 396 intel_ring_advance(ring);
378 397
379 return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); 398 return intel_overlay_do_wait_request(overlay, req,
399 intel_overlay_off_tail);
380} 400}
381 401
382/* recover from an interruption due to a signal 402/* recover from an interruption due to a signal
383 * We have to be careful not to repeat work forever an make forward progess. */ 403 * We have to be careful not to repeat work forever an make forward progess. */
384static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) 404static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
385{ 405{
386 int ret; 406 return i915_gem_active_retire(&overlay->last_flip,
387 407 &overlay->i915->drm.struct_mutex);
388 if (overlay->last_flip_req == NULL)
389 return 0;
390
391 ret = i915_wait_request(overlay->last_flip_req);
392 if (ret)
393 return ret;
394
395 if (overlay->flip_tail)
396 overlay->flip_tail(overlay);
397
398 i915_gem_request_assign(&overlay->last_flip_req, NULL);
399 return 0;
400} 408}
401 409
402/* Wait for pending overlay flip and release old frame. 410/* Wait for pending overlay flip and release old frame.
@@ -406,7 +414,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
406static int intel_overlay_release_old_vid(struct intel_overlay *overlay) 414static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
407{ 415{
408 struct drm_i915_private *dev_priv = overlay->i915; 416 struct drm_i915_private *dev_priv = overlay->i915;
409 struct intel_engine_cs *engine = &dev_priv->engine[RCS];
410 int ret; 417 int ret;
411 418
412 lockdep_assert_held(&dev_priv->drm.struct_mutex); 419 lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -420,8 +427,9 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
420 if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { 427 if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
421 /* synchronous slowpath */ 428 /* synchronous slowpath */
422 struct drm_i915_gem_request *req; 429 struct drm_i915_gem_request *req;
430 struct intel_ring *ring;
423 431
424 req = i915_gem_request_alloc(engine, NULL); 432 req = alloc_request(overlay);
425 if (IS_ERR(req)) 433 if (IS_ERR(req))
426 return PTR_ERR(req); 434 return PTR_ERR(req);
427 435
@@ -431,22 +439,19 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
431 return ret; 439 return ret;
432 } 440 }
433 441
434 intel_ring_emit(engine, 442 ring = req->ring;
443 intel_ring_emit(ring,
435 MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); 444 MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
436 intel_ring_emit(engine, MI_NOOP); 445 intel_ring_emit(ring, MI_NOOP);
437 intel_ring_advance(engine); 446 intel_ring_advance(ring);
438 447
439 ret = intel_overlay_do_wait_request(overlay, req, 448 ret = intel_overlay_do_wait_request(overlay, req,
440 intel_overlay_release_old_vid_tail); 449 intel_overlay_release_old_vid_tail);
441 if (ret) 450 if (ret)
442 return ret; 451 return ret;
443 } 452 } else
444 453 intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL);
445 intel_overlay_release_old_vid_tail(overlay);
446 454
447
448 i915_gem_track_fb(overlay->old_vid_bo, NULL,
449 INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
450 return 0; 455 return 0;
451} 456}
452 457
@@ -459,7 +464,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
459 464
460 intel_overlay_release_old_vid(overlay); 465 intel_overlay_release_old_vid(overlay);
461 466
462 overlay->last_flip_req = NULL;
463 overlay->old_xscale = 0; 467 overlay->old_xscale = 0;
464 overlay->old_yscale = 0; 468 overlay->old_yscale = 0;
465 overlay->crtc = NULL; 469 overlay->crtc = NULL;
@@ -836,8 +840,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
836 overlay->old_vid_bo = overlay->vid_bo; 840 overlay->old_vid_bo = overlay->vid_bo;
837 overlay->vid_bo = new_bo; 841 overlay->vid_bo = new_bo;
838 842
839 intel_frontbuffer_flip(&dev_priv->drm, 843 intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
840 INTEL_FRONTBUFFER_OVERLAY(pipe));
841 844
842 return 0; 845 return 0;
843 846
@@ -870,12 +873,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
870 iowrite32(0, &regs->OCMD); 873 iowrite32(0, &regs->OCMD);
871 intel_overlay_unmap_regs(overlay, regs); 874 intel_overlay_unmap_regs(overlay, regs);
872 875
873 ret = intel_overlay_off(overlay); 876 return intel_overlay_off(overlay);
874 if (ret != 0)
875 return ret;
876
877 intel_overlay_off_tail(overlay);
878 return 0;
879} 877}
880 878
881static int check_overlay_possible_on_crtc(struct intel_overlay *overlay, 879static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
@@ -1122,9 +1120,8 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
1122 } 1120 }
1123 crtc = to_intel_crtc(drmmode_crtc); 1121 crtc = to_intel_crtc(drmmode_crtc);
1124 1122
1125 new_bo = to_intel_bo(drm_gem_object_lookup(file_priv, 1123 new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle);
1126 put_image_rec->bo_handle)); 1124 if (!new_bo) {
1127 if (&new_bo->base == NULL) {
1128 ret = -ENOENT; 1125 ret = -ENOENT;
1129 goto out_free; 1126 goto out_free;
1130 } 1127 }
@@ -1132,7 +1129,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
1132 drm_modeset_lock_all(dev); 1129 drm_modeset_lock_all(dev);
1133 mutex_lock(&dev->struct_mutex); 1130 mutex_lock(&dev->struct_mutex);
1134 1131
1135 if (new_bo->tiling_mode) { 1132 if (i915_gem_object_is_tiled(new_bo)) {
1136 DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); 1133 DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
1137 ret = -EINVAL; 1134 ret = -EINVAL;
1138 goto out_unlock; 1135 goto out_unlock;
@@ -1220,7 +1217,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
1220out_unlock: 1217out_unlock:
1221 mutex_unlock(&dev->struct_mutex); 1218 mutex_unlock(&dev->struct_mutex);
1222 drm_modeset_unlock_all(dev); 1219 drm_modeset_unlock_all(dev);
1223 drm_gem_object_unreference_unlocked(&new_bo->base); 1220 i915_gem_object_put_unlocked(new_bo);
1224out_free: 1221out_free:
1225 kfree(params); 1222 kfree(params);
1226 1223
@@ -1404,7 +1401,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv)
1404 } 1401 }
1405 overlay->flip_addr = reg_bo->phys_handle->busaddr; 1402 overlay->flip_addr = reg_bo->phys_handle->busaddr;
1406 } else { 1403 } else {
1407 ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE); 1404 ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
1405 0, PAGE_SIZE, PIN_MAPPABLE);
1408 if (ret) { 1406 if (ret) {
1409 DRM_ERROR("failed to pin overlay register bo\n"); 1407 DRM_ERROR("failed to pin overlay register bo\n");
1410 goto out_free_bo; 1408 goto out_free_bo;
@@ -1444,7 +1442,7 @@ out_unpin_bo:
1444 if (!OVERLAY_NEEDS_PHYSICAL(dev_priv)) 1442 if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1445 i915_gem_object_ggtt_unpin(reg_bo); 1443 i915_gem_object_ggtt_unpin(reg_bo);
1446out_free_bo: 1444out_free_bo:
1447 drm_gem_object_unreference(&reg_bo->base); 1445 i915_gem_object_put(reg_bo);
1448out_free: 1446out_free:
1449 mutex_unlock(&dev_priv->drm.struct_mutex); 1447 mutex_unlock(&dev_priv->drm.struct_mutex);
1450 kfree(overlay); 1448 kfree(overlay);
@@ -1461,7 +1459,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
1461 * hardware should be off already */ 1459 * hardware should be off already */
1462 WARN_ON(dev_priv->overlay->active); 1460 WARN_ON(dev_priv->overlay->active);
1463 1461
1464 drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base); 1462 i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
1465 kfree(dev_priv->overlay); 1463 kfree(dev_priv->overlay);
1466} 1464}
1467 1465
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ba062d70a548..81ab11934d85 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -340,6 +340,11 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
340 I915_WRITE(FW_BLC_SELF, val); 340 I915_WRITE(FW_BLC_SELF, val);
341 POSTING_READ(FW_BLC_SELF); 341 POSTING_READ(FW_BLC_SELF);
342 } else if (IS_I915GM(dev)) { 342 } else if (IS_I915GM(dev)) {
343 /*
344 * FIXME can't find a bit like this for 915G, and
345 * and yet it does have the related watermark in
346 * FW_BLC_SELF. What's going on?
347 */
343 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 348 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
344 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 349 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
345 I915_WRITE(INSTPM, val); 350 I915_WRITE(INSTPM, val);
@@ -1580,7 +1585,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1580 obj = intel_fb_obj(enabled->primary->state->fb); 1585 obj = intel_fb_obj(enabled->primary->state->fb);
1581 1586
1582 /* self-refresh seems busted with untiled */ 1587 /* self-refresh seems busted with untiled */
1583 if (obj->tiling_mode == I915_TILING_NONE) 1588 if (!i915_gem_object_is_tiled(obj))
1584 enabled = NULL; 1589 enabled = NULL;
1585 } 1590 }
1586 1591
@@ -1604,6 +1609,9 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1604 unsigned long line_time_us; 1609 unsigned long line_time_us;
1605 int entries; 1610 int entries;
1606 1611
1612 if (IS_I915GM(dev) || IS_I945GM(dev))
1613 cpp = 4;
1614
1607 line_time_us = max(htotal * 1000 / clock, 1); 1615 line_time_us = max(htotal * 1000 / clock, 1);
1608 1616
1609 /* Use ns/us then divide to preserve precision */ 1617 /* Use ns/us then divide to preserve precision */
@@ -1618,7 +1626,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1618 if (IS_I945G(dev) || IS_I945GM(dev)) 1626 if (IS_I945G(dev) || IS_I945GM(dev))
1619 I915_WRITE(FW_BLC_SELF, 1627 I915_WRITE(FW_BLC_SELF,
1620 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 1628 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1621 else if (IS_I915GM(dev)) 1629 else
1622 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 1630 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1623 } 1631 }
1624 1632
@@ -3344,6 +3352,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
3344 plane_bytes_per_line *= 4; 3352 plane_bytes_per_line *= 4;
3345 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3353 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3346 plane_blocks_per_line /= 4; 3354 plane_blocks_per_line /= 4;
3355 } else if (tiling == DRM_FORMAT_MOD_NONE) {
3356 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
3347 } else { 3357 } else {
3348 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3358 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3349 } 3359 }
@@ -4912,7 +4922,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv,
4912 */ 4922 */
4913 if (!(dev_priv->gt.awake && 4923 if (!(dev_priv->gt.awake &&
4914 dev_priv->rps.enabled && 4924 dev_priv->rps.enabled &&
4915 dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) 4925 dev_priv->rps.cur_freq < dev_priv->rps.boost_freq))
4916 return; 4926 return;
4917 4927
4918 /* Force a RPS boost (and don't count it against the client) if 4928 /* Force a RPS boost (and don't count it against the client) if
@@ -5103,35 +5113,31 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6)
5103 5113
5104static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) 5114static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
5105{ 5115{
5106 uint32_t rp_state_cap;
5107 u32 ddcc_status = 0;
5108 int ret;
5109
5110 /* All of these values are in units of 50MHz */ 5116 /* All of these values are in units of 50MHz */
5111 dev_priv->rps.cur_freq = 0; 5117
5112 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 5118 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
5113 if (IS_BROXTON(dev_priv)) { 5119 if (IS_BROXTON(dev_priv)) {
5114 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 5120 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
5115 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; 5121 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
5116 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 5122 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
5117 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; 5123 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff;
5118 } else { 5124 } else {
5119 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 5125 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
5120 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 5126 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff;
5121 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 5127 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
5122 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 5128 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
5123 } 5129 }
5124
5125 /* hw_max = RP0 until we check for overclocking */ 5130 /* hw_max = RP0 until we check for overclocking */
5126 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 5131 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq;
5127 5132
5128 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 5133 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
5129 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || 5134 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
5130 IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 5135 IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
5131 ret = sandybridge_pcode_read(dev_priv, 5136 u32 ddcc_status = 0;
5132 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 5137
5133 &ddcc_status); 5138 if (sandybridge_pcode_read(dev_priv,
5134 if (0 == ret) 5139 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
5140 &ddcc_status) == 0)
5135 dev_priv->rps.efficient_freq = 5141 dev_priv->rps.efficient_freq =
5136 clamp_t(u8, 5142 clamp_t(u8,
5137 ((ddcc_status >> 8) & 0xff), 5143 ((ddcc_status >> 8) & 0xff),
@@ -5141,29 +5147,26 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
5141 5147
5142 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 5148 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
5143 /* Store the frequency values in 16.66 MHZ units, which is 5149 /* Store the frequency values in 16.66 MHZ units, which is
5144 the natural hardware unit for SKL */ 5150 * the natural hardware unit for SKL
5151 */
5145 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; 5152 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
5146 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; 5153 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
5147 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; 5154 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
5148 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; 5155 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
5149 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; 5156 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
5150 } 5157 }
5158}
5151 5159
5152 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 5160static void reset_rps(struct drm_i915_private *dev_priv,
5161 void (*set)(struct drm_i915_private *, u8))
5162{
5163 u8 freq = dev_priv->rps.cur_freq;
5153 5164
5154 /* Preserve min/max settings in case of re-init */ 5165 /* force a reset */
5155 if (dev_priv->rps.max_freq_softlimit == 0) 5166 dev_priv->rps.power = -1;
5156 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5167 dev_priv->rps.cur_freq = -1;
5157 5168
5158 if (dev_priv->rps.min_freq_softlimit == 0) { 5169 set(dev_priv, freq);
5159 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5160 dev_priv->rps.min_freq_softlimit =
5161 max_t(int, dev_priv->rps.efficient_freq,
5162 intel_freq_opcode(dev_priv, 450));
5163 else
5164 dev_priv->rps.min_freq_softlimit =
5165 dev_priv->rps.min_freq;
5166 }
5167} 5170}
5168 5171
5169/* See the Gen9_GT_PM_Programming_Guide doc for the below */ 5172/* See the Gen9_GT_PM_Programming_Guide doc for the below */
@@ -5171,8 +5174,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv)
5171{ 5174{
5172 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5175 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5173 5176
5174 gen6_init_rps_frequencies(dev_priv);
5175
5176 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 5177 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
5177 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 5178 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
5178 /* 5179 /*
@@ -5202,8 +5203,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv)
5202 /* Leaning on the below call to gen6_set_rps to program/setup the 5203 /* Leaning on the below call to gen6_set_rps to program/setup the
5203 * Up/Down EI & threshold registers, as well as the RP_CONTROL, 5204 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
5204 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ 5205 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
5205 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 5206 reset_rps(dev_priv, gen6_set_rps);
5206 gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
5207 5207
5208 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5208 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5209} 5209}
@@ -5290,9 +5290,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
5290 /* 2a: Disable RC states. */ 5290 /* 2a: Disable RC states. */
5291 I915_WRITE(GEN6_RC_CONTROL, 0); 5291 I915_WRITE(GEN6_RC_CONTROL, 0);
5292 5292
5293 /* Initialize rps frequencies */
5294 gen6_init_rps_frequencies(dev_priv);
5295
5296 /* 2b: Program RC6 thresholds.*/ 5293 /* 2b: Program RC6 thresholds.*/
5297 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 5294 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5298 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5295 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
@@ -5349,8 +5346,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
5349 5346
5350 /* 6: Ring frequency + overclocking (our driver does this later */ 5347 /* 6: Ring frequency + overclocking (our driver does this later */
5351 5348
5352 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 5349 reset_rps(dev_priv, gen6_set_rps);
5353 gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
5354 5350
5355 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5351 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5356} 5352}
@@ -5358,7 +5354,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
5358static void gen6_enable_rps(struct drm_i915_private *dev_priv) 5354static void gen6_enable_rps(struct drm_i915_private *dev_priv)
5359{ 5355{
5360 struct intel_engine_cs *engine; 5356 struct intel_engine_cs *engine;
5361 u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; 5357 u32 rc6vids, rc6_mask = 0;
5362 u32 gtfifodbg; 5358 u32 gtfifodbg;
5363 int rc6_mode; 5359 int rc6_mode;
5364 int ret; 5360 int ret;
@@ -5382,9 +5378,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
5382 5378
5383 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5379 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5384 5380
5385 /* Initialize rps frequencies */
5386 gen6_init_rps_frequencies(dev_priv);
5387
5388 /* disable the counters and set deterministic thresholds */ 5381 /* disable the counters and set deterministic thresholds */
5389 I915_WRITE(GEN6_RC_CONTROL, 0); 5382 I915_WRITE(GEN6_RC_CONTROL, 0);
5390 5383
@@ -5435,16 +5428,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
5435 if (ret) 5428 if (ret)
5436 DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); 5429 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
5437 5430
5438 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); 5431 reset_rps(dev_priv, gen6_set_rps);
5439 if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
5440 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
5441 (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
5442 (pcu_mbox & 0xff) * 50);
5443 dev_priv->rps.max_freq = pcu_mbox & 0xff;
5444 }
5445
5446 dev_priv->rps.power = HIGH_POWER; /* force a reset */
5447 gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
5448 5432
5449 rc6vids = 0; 5433 rc6vids = 0;
5450 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 5434 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
@@ -5463,7 +5447,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
5463 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5447 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5464} 5448}
5465 5449
5466static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) 5450static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
5467{ 5451{
5468 int min_freq = 15; 5452 int min_freq = 15;
5469 unsigned int gpu_freq; 5453 unsigned int gpu_freq;
@@ -5547,16 +5531,6 @@ static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv)
5547 } 5531 }
5548} 5532}
5549 5533
5550void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
5551{
5552 if (!HAS_CORE_RING_FREQ(dev_priv))
5553 return;
5554
5555 mutex_lock(&dev_priv->rps.hw_lock);
5556 __gen6_update_ring_freq(dev_priv);
5557 mutex_unlock(&dev_priv->rps.hw_lock);
5558}
5559
5560static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 5534static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
5561{ 5535{
5562 u32 val, rp0; 5536 u32 val, rp0;
@@ -5746,7 +5720,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
5746 if (WARN_ON(!dev_priv->vlv_pctx)) 5720 if (WARN_ON(!dev_priv->vlv_pctx))
5747 return; 5721 return;
5748 5722
5749 drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); 5723 i915_gem_object_put_unlocked(dev_priv->vlv_pctx);
5750 dev_priv->vlv_pctx = NULL; 5724 dev_priv->vlv_pctx = NULL;
5751} 5725}
5752 5726
@@ -5769,8 +5743,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
5769 5743
5770 vlv_init_gpll_ref_freq(dev_priv); 5744 vlv_init_gpll_ref_freq(dev_priv);
5771 5745
5772 mutex_lock(&dev_priv->rps.hw_lock);
5773
5774 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5746 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5775 switch ((val >> 6) & 3) { 5747 switch ((val >> 6) & 3) {
5776 case 0: 5748 case 0:
@@ -5806,17 +5778,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
5806 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5778 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5807 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5779 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5808 dev_priv->rps.min_freq); 5780 dev_priv->rps.min_freq);
5809
5810 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5811
5812 /* Preserve min/max settings in case of re-init */
5813 if (dev_priv->rps.max_freq_softlimit == 0)
5814 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5815
5816 if (dev_priv->rps.min_freq_softlimit == 0)
5817 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5818
5819 mutex_unlock(&dev_priv->rps.hw_lock);
5820} 5781}
5821 5782
5822static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) 5783static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
@@ -5827,8 +5788,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
5827 5788
5828 vlv_init_gpll_ref_freq(dev_priv); 5789 vlv_init_gpll_ref_freq(dev_priv);
5829 5790
5830 mutex_lock(&dev_priv->rps.hw_lock);
5831
5832 mutex_lock(&dev_priv->sb_lock); 5791 mutex_lock(&dev_priv->sb_lock);
5833 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 5792 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
5834 mutex_unlock(&dev_priv->sb_lock); 5793 mutex_unlock(&dev_priv->sb_lock);
@@ -5870,17 +5829,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
5870 dev_priv->rps.rp1_freq | 5829 dev_priv->rps.rp1_freq |
5871 dev_priv->rps.min_freq) & 1, 5830 dev_priv->rps.min_freq) & 1,
5872 "Odd GPU freq values\n"); 5831 "Odd GPU freq values\n");
5873
5874 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5875
5876 /* Preserve min/max settings in case of re-init */
5877 if (dev_priv->rps.max_freq_softlimit == 0)
5878 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5879
5880 if (dev_priv->rps.min_freq_softlimit == 0)
5881 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5882
5883 mutex_unlock(&dev_priv->rps.hw_lock);
5884} 5832}
5885 5833
5886static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 5834static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
@@ -5971,16 +5919,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
5971 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5919 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5972 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5920 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5973 5921
5974 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5922 reset_rps(dev_priv, valleyview_set_rps);
5975 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5976 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5977 dev_priv->rps.cur_freq);
5978
5979 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5980 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq),
5981 dev_priv->rps.idle_freq);
5982
5983 valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq);
5984 5923
5985 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5924 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5986} 5925}
@@ -6060,16 +5999,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
6060 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5999 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
6061 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 6000 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
6062 6001
6063 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 6002 reset_rps(dev_priv, valleyview_set_rps);
6064 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
6065 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
6066 dev_priv->rps.cur_freq);
6067
6068 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
6069 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq),
6070 dev_priv->rps.idle_freq);
6071
6072 valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq);
6073 6003
6074 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6004 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6075} 6005}
@@ -6398,19 +6328,11 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
6398 */ 6328 */
6399bool i915_gpu_busy(void) 6329bool i915_gpu_busy(void)
6400{ 6330{
6401 struct drm_i915_private *dev_priv;
6402 struct intel_engine_cs *engine;
6403 bool ret = false; 6331 bool ret = false;
6404 6332
6405 spin_lock_irq(&mchdev_lock); 6333 spin_lock_irq(&mchdev_lock);
6406 if (!i915_mch_dev) 6334 if (i915_mch_dev)
6407 goto out_unlock; 6335 ret = i915_mch_dev->gt.awake;
6408 dev_priv = i915_mch_dev;
6409
6410 for_each_engine(engine, dev_priv)
6411 ret |= !list_empty(&engine->request_list);
6412
6413out_unlock:
6414 spin_unlock_irq(&mchdev_lock); 6336 spin_unlock_irq(&mchdev_lock);
6415 6337
6416 return ret; 6338 return ret;
@@ -6566,30 +6488,60 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
6566 intel_runtime_pm_get(dev_priv); 6488 intel_runtime_pm_get(dev_priv);
6567 } 6489 }
6568 6490
6491 mutex_lock(&dev_priv->rps.hw_lock);
6492
6493 /* Initialize RPS limits (for userspace) */
6569 if (IS_CHERRYVIEW(dev_priv)) 6494 if (IS_CHERRYVIEW(dev_priv))
6570 cherryview_init_gt_powersave(dev_priv); 6495 cherryview_init_gt_powersave(dev_priv);
6571 else if (IS_VALLEYVIEW(dev_priv)) 6496 else if (IS_VALLEYVIEW(dev_priv))
6572 valleyview_init_gt_powersave(dev_priv); 6497 valleyview_init_gt_powersave(dev_priv);
6498 else if (INTEL_GEN(dev_priv) >= 6)
6499 gen6_init_rps_frequencies(dev_priv);
6500
6501 /* Derive initial user preferences/limits from the hardware limits */
6502 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
6503 dev_priv->rps.cur_freq = dev_priv->rps.idle_freq;
6504
6505 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
6506 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
6507
6508 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6509 dev_priv->rps.min_freq_softlimit =
6510 max_t(int,
6511 dev_priv->rps.efficient_freq,
6512 intel_freq_opcode(dev_priv, 450));
6513
6514 /* After setting max-softlimit, find the overclock max freq */
6515 if (IS_GEN6(dev_priv) ||
6516 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
6517 u32 params = 0;
6518
6519 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
6520 if (params & BIT(31)) { /* OC supported */
6521 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
6522 (dev_priv->rps.max_freq & 0xff) * 50,
6523 (params & 0xff) * 50);
6524 dev_priv->rps.max_freq = params & 0xff;
6525 }
6526 }
6527
6528 /* Finally allow us to boost to max by default */
6529 dev_priv->rps.boost_freq = dev_priv->rps.max_freq;
6530
6531 mutex_unlock(&dev_priv->rps.hw_lock);
6532
6533 intel_autoenable_gt_powersave(dev_priv);
6573} 6534}
6574 6535
6575void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 6536void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
6576{ 6537{
6577 if (IS_CHERRYVIEW(dev_priv)) 6538 if (IS_VALLEYVIEW(dev_priv))
6578 return;
6579 else if (IS_VALLEYVIEW(dev_priv))
6580 valleyview_cleanup_gt_powersave(dev_priv); 6539 valleyview_cleanup_gt_powersave(dev_priv);
6581 6540
6582 if (!i915.enable_rc6) 6541 if (!i915.enable_rc6)
6583 intel_runtime_pm_put(dev_priv); 6542 intel_runtime_pm_put(dev_priv);
6584} 6543}
6585 6544
6586static void gen6_suspend_rps(struct drm_i915_private *dev_priv)
6587{
6588 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
6589
6590 gen6_disable_rps_interrupts(dev_priv);
6591}
6592
6593/** 6545/**
6594 * intel_suspend_gt_powersave - suspend PM work and helper threads 6546 * intel_suspend_gt_powersave - suspend PM work and helper threads
6595 * @dev_priv: i915 device 6547 * @dev_priv: i915 device
@@ -6603,60 +6555,76 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
6603 if (INTEL_GEN(dev_priv) < 6) 6555 if (INTEL_GEN(dev_priv) < 6)
6604 return; 6556 return;
6605 6557
6606 gen6_suspend_rps(dev_priv); 6558 if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
6559 intel_runtime_pm_put(dev_priv);
6607 6560
6608 /* Force GPU to min freq during suspend */ 6561 /* gen6_rps_idle() will be called later to disable interrupts */
6609 gen6_rps_idle(dev_priv); 6562}
6563
6564void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
6565{
6566 dev_priv->rps.enabled = true; /* force disabling */
6567 intel_disable_gt_powersave(dev_priv);
6568
6569 gen6_reset_rps_interrupts(dev_priv);
6610} 6570}
6611 6571
6612void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) 6572void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
6613{ 6573{
6614 if (IS_IRONLAKE_M(dev_priv)) { 6574 if (!READ_ONCE(dev_priv->rps.enabled))
6615 ironlake_disable_drps(dev_priv); 6575 return;
6616 } else if (INTEL_INFO(dev_priv)->gen >= 6) {
6617 intel_suspend_gt_powersave(dev_priv);
6618 6576
6619 mutex_lock(&dev_priv->rps.hw_lock); 6577 mutex_lock(&dev_priv->rps.hw_lock);
6620 if (INTEL_INFO(dev_priv)->gen >= 9) {
6621 gen9_disable_rc6(dev_priv);
6622 gen9_disable_rps(dev_priv);
6623 } else if (IS_CHERRYVIEW(dev_priv))
6624 cherryview_disable_rps(dev_priv);
6625 else if (IS_VALLEYVIEW(dev_priv))
6626 valleyview_disable_rps(dev_priv);
6627 else
6628 gen6_disable_rps(dev_priv);
6629 6578
6630 dev_priv->rps.enabled = false; 6579 if (INTEL_GEN(dev_priv) >= 9) {
6631 mutex_unlock(&dev_priv->rps.hw_lock); 6580 gen9_disable_rc6(dev_priv);
6581 gen9_disable_rps(dev_priv);
6582 } else if (IS_CHERRYVIEW(dev_priv)) {
6583 cherryview_disable_rps(dev_priv);
6584 } else if (IS_VALLEYVIEW(dev_priv)) {
6585 valleyview_disable_rps(dev_priv);
6586 } else if (INTEL_GEN(dev_priv) >= 6) {
6587 gen6_disable_rps(dev_priv);
6588 } else if (IS_IRONLAKE_M(dev_priv)) {
6589 ironlake_disable_drps(dev_priv);
6632 } 6590 }
6591
6592 dev_priv->rps.enabled = false;
6593 mutex_unlock(&dev_priv->rps.hw_lock);
6633} 6594}
6634 6595
6635static void intel_gen6_powersave_work(struct work_struct *work) 6596void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
6636{ 6597{
6637 struct drm_i915_private *dev_priv = 6598 /* We shouldn't be disabling as we submit, so this should be less
6638 container_of(work, struct drm_i915_private, 6599 * racy than it appears!
6639 rps.delayed_resume_work.work); 6600 */
6601 if (READ_ONCE(dev_priv->rps.enabled))
6602 return;
6640 6603
6641 mutex_lock(&dev_priv->rps.hw_lock); 6604 /* Powersaving is controlled by the host when inside a VM */
6605 if (intel_vgpu_active(dev_priv))
6606 return;
6642 6607
6643 gen6_reset_rps_interrupts(dev_priv); 6608 mutex_lock(&dev_priv->rps.hw_lock);
6644 6609
6645 if (IS_CHERRYVIEW(dev_priv)) { 6610 if (IS_CHERRYVIEW(dev_priv)) {
6646 cherryview_enable_rps(dev_priv); 6611 cherryview_enable_rps(dev_priv);
6647 } else if (IS_VALLEYVIEW(dev_priv)) { 6612 } else if (IS_VALLEYVIEW(dev_priv)) {
6648 valleyview_enable_rps(dev_priv); 6613 valleyview_enable_rps(dev_priv);
6649 } else if (INTEL_INFO(dev_priv)->gen >= 9) { 6614 } else if (INTEL_GEN(dev_priv) >= 9) {
6650 gen9_enable_rc6(dev_priv); 6615 gen9_enable_rc6(dev_priv);
6651 gen9_enable_rps(dev_priv); 6616 gen9_enable_rps(dev_priv);
6652 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) 6617 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
6653 __gen6_update_ring_freq(dev_priv); 6618 gen6_update_ring_freq(dev_priv);
6654 } else if (IS_BROADWELL(dev_priv)) { 6619 } else if (IS_BROADWELL(dev_priv)) {
6655 gen8_enable_rps(dev_priv); 6620 gen8_enable_rps(dev_priv);
6656 __gen6_update_ring_freq(dev_priv); 6621 gen6_update_ring_freq(dev_priv);
6657 } else { 6622 } else if (INTEL_GEN(dev_priv) >= 6) {
6658 gen6_enable_rps(dev_priv); 6623 gen6_enable_rps(dev_priv);
6659 __gen6_update_ring_freq(dev_priv); 6624 gen6_update_ring_freq(dev_priv);
6625 } else if (IS_IRONLAKE_M(dev_priv)) {
6626 ironlake_enable_drps(dev_priv);
6627 intel_init_emon(dev_priv);
6660 } 6628 }
6661 6629
6662 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); 6630 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
@@ -6666,18 +6634,47 @@ static void intel_gen6_powersave_work(struct work_struct *work)
6666 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); 6634 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
6667 6635
6668 dev_priv->rps.enabled = true; 6636 dev_priv->rps.enabled = true;
6637 mutex_unlock(&dev_priv->rps.hw_lock);
6638}
6669 6639
6670 gen6_enable_rps_interrupts(dev_priv); 6640static void __intel_autoenable_gt_powersave(struct work_struct *work)
6641{
6642 struct drm_i915_private *dev_priv =
6643 container_of(work, typeof(*dev_priv), rps.autoenable_work.work);
6644 struct intel_engine_cs *rcs;
6645 struct drm_i915_gem_request *req;
6671 6646
6672 mutex_unlock(&dev_priv->rps.hw_lock); 6647 if (READ_ONCE(dev_priv->rps.enabled))
6648 goto out;
6649
6650 rcs = &dev_priv->engine[RCS];
6651 if (rcs->last_context)
6652 goto out;
6653
6654 if (!rcs->init_context)
6655 goto out;
6673 6656
6657 mutex_lock(&dev_priv->drm.struct_mutex);
6658
6659 req = i915_gem_request_alloc(rcs, dev_priv->kernel_context);
6660 if (IS_ERR(req))
6661 goto unlock;
6662
6663 if (!i915.enable_execlists && i915_switch_context(req) == 0)
6664 rcs->init_context(req);
6665
6666 /* Mark the device busy, calling intel_enable_gt_powersave() */
6667 i915_add_request_no_flush(req);
6668
6669unlock:
6670 mutex_unlock(&dev_priv->drm.struct_mutex);
6671out:
6674 intel_runtime_pm_put(dev_priv); 6672 intel_runtime_pm_put(dev_priv);
6675} 6673}
6676 6674
6677void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) 6675void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
6678{ 6676{
6679 /* Powersaving is controlled by the host when inside a VM */ 6677 if (READ_ONCE(dev_priv->rps.enabled))
6680 if (intel_vgpu_active(dev_priv))
6681 return; 6678 return;
6682 6679
6683 if (IS_IRONLAKE_M(dev_priv)) { 6680 if (IS_IRONLAKE_M(dev_priv)) {
@@ -6698,21 +6695,13 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
6698 * paths, so the _noresume version is enough (and in case of 6695 * paths, so the _noresume version is enough (and in case of
6699 * runtime resume it's necessary). 6696 * runtime resume it's necessary).
6700 */ 6697 */
6701 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, 6698 if (queue_delayed_work(dev_priv->wq,
6702 round_jiffies_up_relative(HZ))) 6699 &dev_priv->rps.autoenable_work,
6700 round_jiffies_up_relative(HZ)))
6703 intel_runtime_pm_get_noresume(dev_priv); 6701 intel_runtime_pm_get_noresume(dev_priv);
6704 } 6702 }
6705} 6703}
6706 6704
6707void intel_reset_gt_powersave(struct drm_i915_private *dev_priv)
6708{
6709 if (INTEL_INFO(dev_priv)->gen < 6)
6710 return;
6711
6712 gen6_suspend_rps(dev_priv);
6713 dev_priv->rps.enabled = false;
6714}
6715
6716static void ibx_init_clock_gating(struct drm_device *dev) 6705static void ibx_init_clock_gating(struct drm_device *dev)
6717{ 6706{
6718 struct drm_i915_private *dev_priv = to_i915(dev); 6707 struct drm_i915_private *dev_priv = to_i915(dev);
@@ -7787,7 +7776,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
7787 if (!i915_gem_request_completed(req)) 7776 if (!i915_gem_request_completed(req))
7788 gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); 7777 gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
7789 7778
7790 i915_gem_request_unreference(req); 7779 i915_gem_request_put(req);
7791 kfree(boost); 7780 kfree(boost);
7792} 7781}
7793 7782
@@ -7805,8 +7794,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
7805 if (boost == NULL) 7794 if (boost == NULL)
7806 return; 7795 return;
7807 7796
7808 i915_gem_request_reference(req); 7797 boost->req = i915_gem_request_get(req);
7809 boost->req = req;
7810 7798
7811 INIT_WORK(&boost->work, __intel_rps_boost_work); 7799 INIT_WORK(&boost->work, __intel_rps_boost_work);
7812 queue_work(req->i915->wq, &boost->work); 7800 queue_work(req->i915->wq, &boost->work);
@@ -7819,11 +7807,9 @@ void intel_pm_setup(struct drm_device *dev)
7819 mutex_init(&dev_priv->rps.hw_lock); 7807 mutex_init(&dev_priv->rps.hw_lock);
7820 spin_lock_init(&dev_priv->rps.client_lock); 7808 spin_lock_init(&dev_priv->rps.client_lock);
7821 7809
7822 INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, 7810 INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
7823 intel_gen6_powersave_work); 7811 __intel_autoenable_gt_powersave);
7824 INIT_LIST_HEAD(&dev_priv->rps.clients); 7812 INIT_LIST_HEAD(&dev_priv->rps.clients);
7825 INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
7826 INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
7827 7813
7828 dev_priv->pm.suspended = false; 7814 dev_priv->pm.suspended = false;
7829 atomic_set(&dev_priv->pm.wakeref_count, 0); 7815 atomic_set(&dev_priv->pm.wakeref_count, 0);
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 2b0d1baf15b3..59a21c9d2e43 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -645,9 +645,8 @@ unlock:
645 mutex_unlock(&dev_priv->psr.lock); 645 mutex_unlock(&dev_priv->psr.lock);
646} 646}
647 647
648static void intel_psr_exit(struct drm_device *dev) 648static void intel_psr_exit(struct drm_i915_private *dev_priv)
649{ 649{
650 struct drm_i915_private *dev_priv = to_i915(dev);
651 struct intel_dp *intel_dp = dev_priv->psr.enabled; 650 struct intel_dp *intel_dp = dev_priv->psr.enabled;
652 struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; 651 struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
653 enum pipe pipe = to_intel_crtc(crtc)->pipe; 652 enum pipe pipe = to_intel_crtc(crtc)->pipe;
@@ -656,7 +655,7 @@ static void intel_psr_exit(struct drm_device *dev)
656 if (!dev_priv->psr.active) 655 if (!dev_priv->psr.active)
657 return; 656 return;
658 657
659 if (HAS_DDI(dev)) { 658 if (HAS_DDI(dev_priv)) {
660 val = I915_READ(EDP_PSR_CTL); 659 val = I915_READ(EDP_PSR_CTL);
661 660
662 WARN_ON(!(val & EDP_PSR_ENABLE)); 661 WARN_ON(!(val & EDP_PSR_ENABLE));
@@ -691,7 +690,7 @@ static void intel_psr_exit(struct drm_device *dev)
691 690
692/** 691/**
693 * intel_psr_single_frame_update - Single Frame Update 692 * intel_psr_single_frame_update - Single Frame Update
694 * @dev: DRM device 693 * @dev_priv: i915 device
695 * @frontbuffer_bits: frontbuffer plane tracking bits 694 * @frontbuffer_bits: frontbuffer plane tracking bits
696 * 695 *
697 * Some platforms support a single frame update feature that is used to 696 * Some platforms support a single frame update feature that is used to
@@ -699,10 +698,9 @@ static void intel_psr_exit(struct drm_device *dev)
699 * So far it is only implemented for Valleyview and Cherryview because 698 * So far it is only implemented for Valleyview and Cherryview because
700 * hardware requires this to be done before a page flip. 699 * hardware requires this to be done before a page flip.
701 */ 700 */
702void intel_psr_single_frame_update(struct drm_device *dev, 701void intel_psr_single_frame_update(struct drm_i915_private *dev_priv,
703 unsigned frontbuffer_bits) 702 unsigned frontbuffer_bits)
704{ 703{
705 struct drm_i915_private *dev_priv = to_i915(dev);
706 struct drm_crtc *crtc; 704 struct drm_crtc *crtc;
707 enum pipe pipe; 705 enum pipe pipe;
708 u32 val; 706 u32 val;
@@ -711,7 +709,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
711 * Single frame update is already supported on BDW+ but it requires 709 * Single frame update is already supported on BDW+ but it requires
712 * many W/A and it isn't really needed. 710 * many W/A and it isn't really needed.
713 */ 711 */
714 if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) 712 if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
715 return; 713 return;
716 714
717 mutex_lock(&dev_priv->psr.lock); 715 mutex_lock(&dev_priv->psr.lock);
@@ -737,7 +735,7 @@ void intel_psr_single_frame_update(struct drm_device *dev,
737 735
738/** 736/**
739 * intel_psr_invalidate - Invalidade PSR 737 * intel_psr_invalidate - Invalidade PSR
740 * @dev: DRM device 738 * @dev_priv: i915 device
741 * @frontbuffer_bits: frontbuffer plane tracking bits 739 * @frontbuffer_bits: frontbuffer plane tracking bits
742 * 740 *
743 * Since the hardware frontbuffer tracking has gaps we need to integrate 741 * Since the hardware frontbuffer tracking has gaps we need to integrate
@@ -747,10 +745,9 @@ void intel_psr_single_frame_update(struct drm_device *dev,
747 * 745 *
748 * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits." 746 * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits."
749 */ 747 */
750void intel_psr_invalidate(struct drm_device *dev, 748void intel_psr_invalidate(struct drm_i915_private *dev_priv,
751 unsigned frontbuffer_bits) 749 unsigned frontbuffer_bits)
752{ 750{
753 struct drm_i915_private *dev_priv = to_i915(dev);
754 struct drm_crtc *crtc; 751 struct drm_crtc *crtc;
755 enum pipe pipe; 752 enum pipe pipe;
756 753
@@ -767,14 +764,14 @@ void intel_psr_invalidate(struct drm_device *dev,
767 dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits; 764 dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits;
768 765
769 if (frontbuffer_bits) 766 if (frontbuffer_bits)
770 intel_psr_exit(dev); 767 intel_psr_exit(dev_priv);
771 768
772 mutex_unlock(&dev_priv->psr.lock); 769 mutex_unlock(&dev_priv->psr.lock);
773} 770}
774 771
775/** 772/**
776 * intel_psr_flush - Flush PSR 773 * intel_psr_flush - Flush PSR
777 * @dev: DRM device 774 * @dev_priv: i915 device
778 * @frontbuffer_bits: frontbuffer plane tracking bits 775 * @frontbuffer_bits: frontbuffer plane tracking bits
779 * @origin: which operation caused the flush 776 * @origin: which operation caused the flush
780 * 777 *
@@ -785,10 +782,9 @@ void intel_psr_invalidate(struct drm_device *dev,
785 * 782 *
786 * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits. 783 * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits.
787 */ 784 */
788void intel_psr_flush(struct drm_device *dev, 785void intel_psr_flush(struct drm_i915_private *dev_priv,
789 unsigned frontbuffer_bits, enum fb_op_origin origin) 786 unsigned frontbuffer_bits, enum fb_op_origin origin)
790{ 787{
791 struct drm_i915_private *dev_priv = to_i915(dev);
792 struct drm_crtc *crtc; 788 struct drm_crtc *crtc;
793 enum pipe pipe; 789 enum pipe pipe;
794 790
@@ -806,7 +802,7 @@ void intel_psr_flush(struct drm_device *dev,
806 802
807 /* By definition flush = invalidate + flush */ 803 /* By definition flush = invalidate + flush */
808 if (frontbuffer_bits) 804 if (frontbuffer_bits)
809 intel_psr_exit(dev); 805 intel_psr_exit(dev_priv);
810 806
811 if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits) 807 if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
812 if (!work_busy(&dev_priv->psr.work.work)) 808 if (!work_busy(&dev_priv->psr.work.work))
diff --git a/drivers/gpu/drm/i915/intel_renderstate.h b/drivers/gpu/drm/i915/intel_renderstate.h
index 5bd69852752c..08f6fea05a2c 100644
--- a/drivers/gpu/drm/i915/intel_renderstate.h
+++ b/drivers/gpu/drm/i915/intel_renderstate.h
@@ -24,12 +24,13 @@
24#ifndef _INTEL_RENDERSTATE_H 24#ifndef _INTEL_RENDERSTATE_H
25#define _INTEL_RENDERSTATE_H 25#define _INTEL_RENDERSTATE_H
26 26
27#include "i915_drv.h" 27#include <linux/types.h>
28 28
29extern const struct intel_renderstate_rodata gen6_null_state; 29struct intel_renderstate_rodata {
30extern const struct intel_renderstate_rodata gen7_null_state; 30 const u32 *reloc;
31extern const struct intel_renderstate_rodata gen8_null_state; 31 const u32 *batch;
32extern const struct intel_renderstate_rodata gen9_null_state; 32 const u32 batch_items;
33};
33 34
34#define RO_RENDERSTATE(_g) \ 35#define RO_RENDERSTATE(_g) \
35 const struct intel_renderstate_rodata gen ## _g ## _null_state = { \ 36 const struct intel_renderstate_rodata gen ## _g ## _null_state = { \
@@ -38,4 +39,9 @@ extern const struct intel_renderstate_rodata gen9_null_state;
38 .batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \ 39 .batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \
39 } 40 }
40 41
42extern const struct intel_renderstate_rodata gen6_null_state;
43extern const struct intel_renderstate_rodata gen7_null_state;
44extern const struct intel_renderstate_rodata gen8_null_state;
45extern const struct intel_renderstate_rodata gen9_null_state;
46
41#endif /* INTEL_RENDERSTATE_H */ 47#endif /* INTEL_RENDERSTATE_H */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cca7792f26d5..e08a1e1b04e4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -47,57 +47,44 @@ int __intel_ring_space(int head, int tail, int size)
47 return space - I915_RING_FREE_SPACE; 47 return space - I915_RING_FREE_SPACE;
48} 48}
49 49
50void intel_ring_update_space(struct intel_ringbuffer *ringbuf) 50void intel_ring_update_space(struct intel_ring *ring)
51{ 51{
52 if (ringbuf->last_retired_head != -1) { 52 if (ring->last_retired_head != -1) {
53 ringbuf->head = ringbuf->last_retired_head; 53 ring->head = ring->last_retired_head;
54 ringbuf->last_retired_head = -1; 54 ring->last_retired_head = -1;
55 } 55 }
56 56
57 ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR, 57 ring->space = __intel_ring_space(ring->head & HEAD_ADDR,
58 ringbuf->tail, ringbuf->size); 58 ring->tail, ring->size);
59}
60
61static void __intel_ring_advance(struct intel_engine_cs *engine)
62{
63 struct intel_ringbuffer *ringbuf = engine->buffer;
64 ringbuf->tail &= ringbuf->size - 1;
65 engine->write_tail(engine, ringbuf->tail);
66} 59}
67 60
68static int 61static int
69gen2_render_ring_flush(struct drm_i915_gem_request *req, 62gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
70 u32 invalidate_domains,
71 u32 flush_domains)
72{ 63{
73 struct intel_engine_cs *engine = req->engine; 64 struct intel_ring *ring = req->ring;
74 u32 cmd; 65 u32 cmd;
75 int ret; 66 int ret;
76 67
77 cmd = MI_FLUSH; 68 cmd = MI_FLUSH;
78 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
79 cmd |= MI_NO_WRITE_FLUSH;
80 69
81 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 70 if (mode & EMIT_INVALIDATE)
82 cmd |= MI_READ_FLUSH; 71 cmd |= MI_READ_FLUSH;
83 72
84 ret = intel_ring_begin(req, 2); 73 ret = intel_ring_begin(req, 2);
85 if (ret) 74 if (ret)
86 return ret; 75 return ret;
87 76
88 intel_ring_emit(engine, cmd); 77 intel_ring_emit(ring, cmd);
89 intel_ring_emit(engine, MI_NOOP); 78 intel_ring_emit(ring, MI_NOOP);
90 intel_ring_advance(engine); 79 intel_ring_advance(ring);
91 80
92 return 0; 81 return 0;
93} 82}
94 83
95static int 84static int
96gen4_render_ring_flush(struct drm_i915_gem_request *req, 85gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
97 u32 invalidate_domains,
98 u32 flush_domains)
99{ 86{
100 struct intel_engine_cs *engine = req->engine; 87 struct intel_ring *ring = req->ring;
101 u32 cmd; 88 u32 cmd;
102 int ret; 89 int ret;
103 90
@@ -129,23 +116,20 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req,
129 * are flushed at any MI_FLUSH. 116 * are flushed at any MI_FLUSH.
130 */ 117 */
131 118
132 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 119 cmd = MI_FLUSH;
133 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 120 if (mode & EMIT_INVALIDATE) {
134 cmd &= ~MI_NO_WRITE_FLUSH;
135 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
136 cmd |= MI_EXE_FLUSH; 121 cmd |= MI_EXE_FLUSH;
137 122 if (IS_G4X(req->i915) || IS_GEN5(req->i915))
138 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 123 cmd |= MI_INVALIDATE_ISP;
139 (IS_G4X(req->i915) || IS_GEN5(req->i915))) 124 }
140 cmd |= MI_INVALIDATE_ISP;
141 125
142 ret = intel_ring_begin(req, 2); 126 ret = intel_ring_begin(req, 2);
143 if (ret) 127 if (ret)
144 return ret; 128 return ret;
145 129
146 intel_ring_emit(engine, cmd); 130 intel_ring_emit(ring, cmd);
147 intel_ring_emit(engine, MI_NOOP); 131 intel_ring_emit(ring, MI_NOOP);
148 intel_ring_advance(engine); 132 intel_ring_advance(ring);
149 133
150 return 0; 134 return 0;
151} 135}
@@ -190,45 +174,46 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req,
190static int 174static int
191intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) 175intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
192{ 176{
193 struct intel_engine_cs *engine = req->engine; 177 struct intel_ring *ring = req->ring;
194 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 178 u32 scratch_addr =
179 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
195 int ret; 180 int ret;
196 181
197 ret = intel_ring_begin(req, 6); 182 ret = intel_ring_begin(req, 6);
198 if (ret) 183 if (ret)
199 return ret; 184 return ret;
200 185
201 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); 186 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
202 intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | 187 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
203 PIPE_CONTROL_STALL_AT_SCOREBOARD); 188 PIPE_CONTROL_STALL_AT_SCOREBOARD);
204 intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 189 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
205 intel_ring_emit(engine, 0); /* low dword */ 190 intel_ring_emit(ring, 0); /* low dword */
206 intel_ring_emit(engine, 0); /* high dword */ 191 intel_ring_emit(ring, 0); /* high dword */
207 intel_ring_emit(engine, MI_NOOP); 192 intel_ring_emit(ring, MI_NOOP);
208 intel_ring_advance(engine); 193 intel_ring_advance(ring);
209 194
210 ret = intel_ring_begin(req, 6); 195 ret = intel_ring_begin(req, 6);
211 if (ret) 196 if (ret)
212 return ret; 197 return ret;
213 198
214 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); 199 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
215 intel_ring_emit(engine, PIPE_CONTROL_QW_WRITE); 200 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
216 intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 201 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
217 intel_ring_emit(engine, 0); 202 intel_ring_emit(ring, 0);
218 intel_ring_emit(engine, 0); 203 intel_ring_emit(ring, 0);
219 intel_ring_emit(engine, MI_NOOP); 204 intel_ring_emit(ring, MI_NOOP);
220 intel_ring_advance(engine); 205 intel_ring_advance(ring);
221 206
222 return 0; 207 return 0;
223} 208}
224 209
225static int 210static int
226gen6_render_ring_flush(struct drm_i915_gem_request *req, 211gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
227 u32 invalidate_domains, u32 flush_domains)
228{ 212{
229 struct intel_engine_cs *engine = req->engine; 213 struct intel_ring *ring = req->ring;
214 u32 scratch_addr =
215 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
230 u32 flags = 0; 216 u32 flags = 0;
231 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
232 int ret; 217 int ret;
233 218
234 /* Force SNB workarounds for PIPE_CONTROL flushes */ 219 /* Force SNB workarounds for PIPE_CONTROL flushes */
@@ -240,7 +225,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
240 * number of bits based on the write domains has little performance 225 * number of bits based on the write domains has little performance
241 * impact. 226 * impact.
242 */ 227 */
243 if (flush_domains) { 228 if (mode & EMIT_FLUSH) {
244 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 229 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
245 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 230 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
246 /* 231 /*
@@ -249,7 +234,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
249 */ 234 */
250 flags |= PIPE_CONTROL_CS_STALL; 235 flags |= PIPE_CONTROL_CS_STALL;
251 } 236 }
252 if (invalidate_domains) { 237 if (mode & EMIT_INVALIDATE) {
253 flags |= PIPE_CONTROL_TLB_INVALIDATE; 238 flags |= PIPE_CONTROL_TLB_INVALIDATE;
254 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 239 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
255 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 240 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -266,11 +251,11 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
266 if (ret) 251 if (ret)
267 return ret; 252 return ret;
268 253
269 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); 254 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
270 intel_ring_emit(engine, flags); 255 intel_ring_emit(ring, flags);
271 intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 256 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
272 intel_ring_emit(engine, 0); 257 intel_ring_emit(ring, 0);
273 intel_ring_advance(engine); 258 intel_ring_advance(ring);
274 259
275 return 0; 260 return 0;
276} 261}
@@ -278,30 +263,31 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
278static int 263static int
279gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) 264gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
280{ 265{
281 struct intel_engine_cs *engine = req->engine; 266 struct intel_ring *ring = req->ring;
282 int ret; 267 int ret;
283 268
284 ret = intel_ring_begin(req, 4); 269 ret = intel_ring_begin(req, 4);
285 if (ret) 270 if (ret)
286 return ret; 271 return ret;
287 272
288 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); 273 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
289 intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | 274 intel_ring_emit(ring,
290 PIPE_CONTROL_STALL_AT_SCOREBOARD); 275 PIPE_CONTROL_CS_STALL |
291 intel_ring_emit(engine, 0); 276 PIPE_CONTROL_STALL_AT_SCOREBOARD);
292 intel_ring_emit(engine, 0); 277 intel_ring_emit(ring, 0);
293 intel_ring_advance(engine); 278 intel_ring_emit(ring, 0);
279 intel_ring_advance(ring);
294 280
295 return 0; 281 return 0;
296} 282}
297 283
298static int 284static int
299gen7_render_ring_flush(struct drm_i915_gem_request *req, 285gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
300 u32 invalidate_domains, u32 flush_domains)
301{ 286{
302 struct intel_engine_cs *engine = req->engine; 287 struct intel_ring *ring = req->ring;
288 u32 scratch_addr =
289 req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
303 u32 flags = 0; 290 u32 flags = 0;
304 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
305 int ret; 291 int ret;
306 292
307 /* 293 /*
@@ -318,13 +304,13 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
318 * number of bits based on the write domains has little performance 304 * number of bits based on the write domains has little performance
319 * impact. 305 * impact.
320 */ 306 */
321 if (flush_domains) { 307 if (mode & EMIT_FLUSH) {
322 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 308 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
323 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 309 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
324 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 310 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
325 flags |= PIPE_CONTROL_FLUSH_ENABLE; 311 flags |= PIPE_CONTROL_FLUSH_ENABLE;
326 } 312 }
327 if (invalidate_domains) { 313 if (mode & EMIT_INVALIDATE) {
328 flags |= PIPE_CONTROL_TLB_INVALIDATE; 314 flags |= PIPE_CONTROL_TLB_INVALIDATE;
329 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 315 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
330 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 316 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -350,11 +336,11 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
350 if (ret) 336 if (ret)
351 return ret; 337 return ret;
352 338
353 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); 339 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
354 intel_ring_emit(engine, flags); 340 intel_ring_emit(ring, flags);
355 intel_ring_emit(engine, scratch_addr); 341 intel_ring_emit(ring, scratch_addr);
356 intel_ring_emit(engine, 0); 342 intel_ring_emit(ring, 0);
357 intel_ring_advance(engine); 343 intel_ring_advance(ring);
358 344
359 return 0; 345 return 0;
360} 346}
@@ -363,41 +349,40 @@ static int
363gen8_emit_pipe_control(struct drm_i915_gem_request *req, 349gen8_emit_pipe_control(struct drm_i915_gem_request *req,
364 u32 flags, u32 scratch_addr) 350 u32 flags, u32 scratch_addr)
365{ 351{
366 struct intel_engine_cs *engine = req->engine; 352 struct intel_ring *ring = req->ring;
367 int ret; 353 int ret;
368 354
369 ret = intel_ring_begin(req, 6); 355 ret = intel_ring_begin(req, 6);
370 if (ret) 356 if (ret)
371 return ret; 357 return ret;
372 358
373 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); 359 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
374 intel_ring_emit(engine, flags); 360 intel_ring_emit(ring, flags);
375 intel_ring_emit(engine, scratch_addr); 361 intel_ring_emit(ring, scratch_addr);
376 intel_ring_emit(engine, 0); 362 intel_ring_emit(ring, 0);
377 intel_ring_emit(engine, 0); 363 intel_ring_emit(ring, 0);
378 intel_ring_emit(engine, 0); 364 intel_ring_emit(ring, 0);
379 intel_ring_advance(engine); 365 intel_ring_advance(ring);
380 366
381 return 0; 367 return 0;
382} 368}
383 369
384static int 370static int
385gen8_render_ring_flush(struct drm_i915_gem_request *req, 371gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
386 u32 invalidate_domains, u32 flush_domains)
387{ 372{
388 u32 flags = 0;
389 u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 373 u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
374 u32 flags = 0;
390 int ret; 375 int ret;
391 376
392 flags |= PIPE_CONTROL_CS_STALL; 377 flags |= PIPE_CONTROL_CS_STALL;
393 378
394 if (flush_domains) { 379 if (mode & EMIT_FLUSH) {
395 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 380 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
396 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 381 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
397 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 382 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
398 flags |= PIPE_CONTROL_FLUSH_ENABLE; 383 flags |= PIPE_CONTROL_FLUSH_ENABLE;
399 } 384 }
400 if (invalidate_domains) { 385 if (mode & EMIT_INVALIDATE) {
401 flags |= PIPE_CONTROL_TLB_INVALIDATE; 386 flags |= PIPE_CONTROL_TLB_INVALIDATE;
402 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 387 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
403 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 388 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -419,14 +404,7 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req,
419 return gen8_emit_pipe_control(req, flags, scratch_addr); 404 return gen8_emit_pipe_control(req, flags, scratch_addr);
420} 405}
421 406
422static void ring_write_tail(struct intel_engine_cs *engine, 407u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
423 u32 value)
424{
425 struct drm_i915_private *dev_priv = engine->i915;
426 I915_WRITE_TAIL(engine, value);
427}
428
429u64 intel_ring_get_active_head(struct intel_engine_cs *engine)
430{ 408{
431 struct drm_i915_private *dev_priv = engine->i915; 409 struct drm_i915_private *dev_priv = engine->i915;
432 u64 acthd; 410 u64 acthd;
@@ -539,7 +517,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
539 517
540 I915_WRITE_CTL(engine, 0); 518 I915_WRITE_CTL(engine, 0);
541 I915_WRITE_HEAD(engine, 0); 519 I915_WRITE_HEAD(engine, 0);
542 engine->write_tail(engine, 0); 520 I915_WRITE_TAIL(engine, 0);
543 521
544 if (!IS_GEN2(dev_priv)) { 522 if (!IS_GEN2(dev_priv)) {
545 (void)I915_READ_CTL(engine); 523 (void)I915_READ_CTL(engine);
@@ -549,16 +527,11 @@ static bool stop_ring(struct intel_engine_cs *engine)
549 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; 527 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0;
550} 528}
551 529
552void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
553{
554 memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
555}
556
557static int init_ring_common(struct intel_engine_cs *engine) 530static int init_ring_common(struct intel_engine_cs *engine)
558{ 531{
559 struct drm_i915_private *dev_priv = engine->i915; 532 struct drm_i915_private *dev_priv = engine->i915;
560 struct intel_ringbuffer *ringbuf = engine->buffer; 533 struct intel_ring *ring = engine->buffer;
561 struct drm_i915_gem_object *obj = ringbuf->obj; 534 struct drm_i915_gem_object *obj = ring->obj;
562 int ret = 0; 535 int ret = 0;
563 536
564 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 537 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -608,7 +581,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
608 (void)I915_READ_HEAD(engine); 581 (void)I915_READ_HEAD(engine);
609 582
610 I915_WRITE_CTL(engine, 583 I915_WRITE_CTL(engine,
611 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) 584 ((ring->size - PAGE_SIZE) & RING_NR_PAGES)
612 | RING_VALID); 585 | RING_VALID);
613 586
614 /* If the head is still not zero, the ring is dead */ 587 /* If the head is still not zero, the ring is dead */
@@ -627,10 +600,10 @@ static int init_ring_common(struct intel_engine_cs *engine)
627 goto out; 600 goto out;
628 } 601 }
629 602
630 ringbuf->last_retired_head = -1; 603 ring->last_retired_head = -1;
631 ringbuf->head = I915_READ_HEAD(engine); 604 ring->head = I915_READ_HEAD(engine);
632 ringbuf->tail = I915_READ_TAIL(engine) & TAIL_ADDR; 605 ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
633 intel_ring_update_space(ringbuf); 606 intel_ring_update_space(ring);
634 607
635 intel_engine_init_hangcheck(engine); 608 intel_engine_init_hangcheck(engine);
636 609
@@ -646,7 +619,7 @@ void intel_fini_pipe_control(struct intel_engine_cs *engine)
646 return; 619 return;
647 620
648 i915_gem_object_ggtt_unpin(engine->scratch.obj); 621 i915_gem_object_ggtt_unpin(engine->scratch.obj);
649 drm_gem_object_unreference(&engine->scratch.obj->base); 622 i915_gem_object_put(engine->scratch.obj);
650 engine->scratch.obj = NULL; 623 engine->scratch.obj = NULL;
651} 624}
652 625
@@ -666,7 +639,7 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
666 goto err; 639 goto err;
667 } 640 }
668 641
669 ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH); 642 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH);
670 if (ret) 643 if (ret)
671 goto err_unref; 644 goto err_unref;
672 645
@@ -677,22 +650,21 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size)
677 return 0; 650 return 0;
678 651
679err_unref: 652err_unref:
680 drm_gem_object_unreference(&engine->scratch.obj->base); 653 i915_gem_object_put(engine->scratch.obj);
681err: 654err:
682 return ret; 655 return ret;
683} 656}
684 657
685static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) 658static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
686{ 659{
687 struct intel_engine_cs *engine = req->engine; 660 struct intel_ring *ring = req->ring;
688 struct i915_workarounds *w = &req->i915->workarounds; 661 struct i915_workarounds *w = &req->i915->workarounds;
689 int ret, i; 662 int ret, i;
690 663
691 if (w->count == 0) 664 if (w->count == 0)
692 return 0; 665 return 0;
693 666
694 engine->gpu_caches_dirty = true; 667 ret = req->engine->emit_flush(req, EMIT_BARRIER);
695 ret = intel_ring_flush_all_caches(req);
696 if (ret) 668 if (ret)
697 return ret; 669 return ret;
698 670
@@ -700,17 +672,16 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
700 if (ret) 672 if (ret)
701 return ret; 673 return ret;
702 674
703 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(w->count)); 675 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
704 for (i = 0; i < w->count; i++) { 676 for (i = 0; i < w->count; i++) {
705 intel_ring_emit_reg(engine, w->reg[i].addr); 677 intel_ring_emit_reg(ring, w->reg[i].addr);
706 intel_ring_emit(engine, w->reg[i].value); 678 intel_ring_emit(ring, w->reg[i].value);
707 } 679 }
708 intel_ring_emit(engine, MI_NOOP); 680 intel_ring_emit(ring, MI_NOOP);
709 681
710 intel_ring_advance(engine); 682 intel_ring_advance(ring);
711 683
712 engine->gpu_caches_dirty = true; 684 ret = req->engine->emit_flush(req, EMIT_BARRIER);
713 ret = intel_ring_flush_all_caches(req);
714 if (ret) 685 if (ret)
715 return ret; 686 return ret;
716 687
@@ -1178,8 +1149,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
1178 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | 1149 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
1179 L3_HIGH_PRIO_CREDITS(2)); 1150 L3_HIGH_PRIO_CREDITS(2));
1180 1151
1181 /* WaInsertDummyPushConstPs:bxt */ 1152 /* WaToEnableHwFixForPushConstHWBug:bxt */
1182 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) 1153 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1183 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1154 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1184 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1155 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1185 1156
@@ -1222,8 +1193,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
1222 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | 1193 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1223 GEN8_LQSC_RO_PERF_DIS); 1194 GEN8_LQSC_RO_PERF_DIS);
1224 1195
1225 /* WaInsertDummyPushConstPs:kbl */ 1196 /* WaToEnableHwFixForPushConstHWBug:kbl */
1226 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 1197 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
1227 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1198 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1228 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1199 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1229 1200
@@ -1331,189 +1302,195 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
1331 1302
1332 if (dev_priv->semaphore_obj) { 1303 if (dev_priv->semaphore_obj) {
1333 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); 1304 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1334 drm_gem_object_unreference(&dev_priv->semaphore_obj->base); 1305 i915_gem_object_put(dev_priv->semaphore_obj);
1335 dev_priv->semaphore_obj = NULL; 1306 dev_priv->semaphore_obj = NULL;
1336 } 1307 }
1337 1308
1338 intel_fini_pipe_control(engine); 1309 intel_fini_pipe_control(engine);
1339} 1310}
1340 1311
1341static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, 1312static int gen8_rcs_signal(struct drm_i915_gem_request *req)
1342 unsigned int num_dwords)
1343{ 1313{
1344#define MBOX_UPDATE_DWORDS 8 1314 struct intel_ring *ring = req->ring;
1345 struct intel_engine_cs *signaller = signaller_req->engine; 1315 struct drm_i915_private *dev_priv = req->i915;
1346 struct drm_i915_private *dev_priv = signaller_req->i915;
1347 struct intel_engine_cs *waiter; 1316 struct intel_engine_cs *waiter;
1348 enum intel_engine_id id; 1317 enum intel_engine_id id;
1349 int ret, num_rings; 1318 int ret, num_rings;
1350 1319
1351 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1320 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
1352 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; 1321 ret = intel_ring_begin(req, (num_rings-1) * 8);
1353#undef MBOX_UPDATE_DWORDS
1354
1355 ret = intel_ring_begin(signaller_req, num_dwords);
1356 if (ret) 1322 if (ret)
1357 return ret; 1323 return ret;
1358 1324
1359 for_each_engine_id(waiter, dev_priv, id) { 1325 for_each_engine_id(waiter, dev_priv, id) {
1360 u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; 1326 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
1361 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 1327 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1362 continue; 1328 continue;
1363 1329
1364 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); 1330 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1365 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | 1331 intel_ring_emit(ring,
1366 PIPE_CONTROL_QW_WRITE | 1332 PIPE_CONTROL_GLOBAL_GTT_IVB |
1367 PIPE_CONTROL_CS_STALL); 1333 PIPE_CONTROL_QW_WRITE |
1368 intel_ring_emit(signaller, lower_32_bits(gtt_offset)); 1334 PIPE_CONTROL_CS_STALL);
1369 intel_ring_emit(signaller, upper_32_bits(gtt_offset)); 1335 intel_ring_emit(ring, lower_32_bits(gtt_offset));
1370 intel_ring_emit(signaller, signaller_req->seqno); 1336 intel_ring_emit(ring, upper_32_bits(gtt_offset));
1371 intel_ring_emit(signaller, 0); 1337 intel_ring_emit(ring, req->fence.seqno);
1372 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | 1338 intel_ring_emit(ring, 0);
1373 MI_SEMAPHORE_TARGET(waiter->hw_id)); 1339 intel_ring_emit(ring,
1374 intel_ring_emit(signaller, 0); 1340 MI_SEMAPHORE_SIGNAL |
1341 MI_SEMAPHORE_TARGET(waiter->hw_id));
1342 intel_ring_emit(ring, 0);
1375 } 1343 }
1344 intel_ring_advance(ring);
1376 1345
1377 return 0; 1346 return 0;
1378} 1347}
1379 1348
1380static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, 1349static int gen8_xcs_signal(struct drm_i915_gem_request *req)
1381 unsigned int num_dwords)
1382{ 1350{
1383#define MBOX_UPDATE_DWORDS 6 1351 struct intel_ring *ring = req->ring;
1384 struct intel_engine_cs *signaller = signaller_req->engine; 1352 struct drm_i915_private *dev_priv = req->i915;
1385 struct drm_i915_private *dev_priv = signaller_req->i915;
1386 struct intel_engine_cs *waiter; 1353 struct intel_engine_cs *waiter;
1387 enum intel_engine_id id; 1354 enum intel_engine_id id;
1388 int ret, num_rings; 1355 int ret, num_rings;
1389 1356
1390 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1357 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
1391 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; 1358 ret = intel_ring_begin(req, (num_rings-1) * 6);
1392#undef MBOX_UPDATE_DWORDS
1393
1394 ret = intel_ring_begin(signaller_req, num_dwords);
1395 if (ret) 1359 if (ret)
1396 return ret; 1360 return ret;
1397 1361
1398 for_each_engine_id(waiter, dev_priv, id) { 1362 for_each_engine_id(waiter, dev_priv, id) {
1399 u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; 1363 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
1400 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 1364 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1401 continue; 1365 continue;
1402 1366
1403 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | 1367 intel_ring_emit(ring,
1404 MI_FLUSH_DW_OP_STOREDW); 1368 (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
1405 intel_ring_emit(signaller, lower_32_bits(gtt_offset) | 1369 intel_ring_emit(ring,
1406 MI_FLUSH_DW_USE_GTT); 1370 lower_32_bits(gtt_offset) |
1407 intel_ring_emit(signaller, upper_32_bits(gtt_offset)); 1371 MI_FLUSH_DW_USE_GTT);
1408 intel_ring_emit(signaller, signaller_req->seqno); 1372 intel_ring_emit(ring, upper_32_bits(gtt_offset));
1409 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | 1373 intel_ring_emit(ring, req->fence.seqno);
1410 MI_SEMAPHORE_TARGET(waiter->hw_id)); 1374 intel_ring_emit(ring,
1411 intel_ring_emit(signaller, 0); 1375 MI_SEMAPHORE_SIGNAL |
1376 MI_SEMAPHORE_TARGET(waiter->hw_id));
1377 intel_ring_emit(ring, 0);
1412 } 1378 }
1379 intel_ring_advance(ring);
1413 1380
1414 return 0; 1381 return 0;
1415} 1382}
1416 1383
1417static int gen6_signal(struct drm_i915_gem_request *signaller_req, 1384static int gen6_signal(struct drm_i915_gem_request *req)
1418 unsigned int num_dwords)
1419{ 1385{
1420 struct intel_engine_cs *signaller = signaller_req->engine; 1386 struct intel_ring *ring = req->ring;
1421 struct drm_i915_private *dev_priv = signaller_req->i915; 1387 struct drm_i915_private *dev_priv = req->i915;
1422 struct intel_engine_cs *useless; 1388 struct intel_engine_cs *useless;
1423 enum intel_engine_id id; 1389 enum intel_engine_id id;
1424 int ret, num_rings; 1390 int ret, num_rings;
1425 1391
1426#define MBOX_UPDATE_DWORDS 3
1427 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1392 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
1428 num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); 1393 ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2));
1429#undef MBOX_UPDATE_DWORDS
1430
1431 ret = intel_ring_begin(signaller_req, num_dwords);
1432 if (ret) 1394 if (ret)
1433 return ret; 1395 return ret;
1434 1396
1435 for_each_engine_id(useless, dev_priv, id) { 1397 for_each_engine_id(useless, dev_priv, id) {
1436 i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[id]; 1398 i915_reg_t mbox_reg = req->engine->semaphore.mbox.signal[id];
1437 1399
1438 if (i915_mmio_reg_valid(mbox_reg)) { 1400 if (i915_mmio_reg_valid(mbox_reg)) {
1439 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); 1401 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1440 intel_ring_emit_reg(signaller, mbox_reg); 1402 intel_ring_emit_reg(ring, mbox_reg);
1441 intel_ring_emit(signaller, signaller_req->seqno); 1403 intel_ring_emit(ring, req->fence.seqno);
1442 } 1404 }
1443 } 1405 }
1444 1406
1445 /* If num_dwords was rounded, make sure the tail pointer is correct */ 1407 /* If num_dwords was rounded, make sure the tail pointer is correct */
1446 if (num_rings % 2 == 0) 1408 if (num_rings % 2 == 0)
1447 intel_ring_emit(signaller, MI_NOOP); 1409 intel_ring_emit(ring, MI_NOOP);
1410 intel_ring_advance(ring);
1411
1412 return 0;
1413}
1414
1415static void i9xx_submit_request(struct drm_i915_gem_request *request)
1416{
1417 struct drm_i915_private *dev_priv = request->i915;
1418
1419 I915_WRITE_TAIL(request->engine,
1420 intel_ring_offset(request->ring, request->tail));
1421}
1422
1423static int i9xx_emit_request(struct drm_i915_gem_request *req)
1424{
1425 struct intel_ring *ring = req->ring;
1426 int ret;
1427
1428 ret = intel_ring_begin(req, 4);
1429 if (ret)
1430 return ret;
1431
1432 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1433 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1434 intel_ring_emit(ring, req->fence.seqno);
1435 intel_ring_emit(ring, MI_USER_INTERRUPT);
1436 intel_ring_advance(ring);
1437
1438 req->tail = ring->tail;
1448 1439
1449 return 0; 1440 return 0;
1450} 1441}
1451 1442
1452/** 1443/**
1453 * gen6_add_request - Update the semaphore mailbox registers 1444 * gen6_sema_emit_request - Update the semaphore mailbox registers
1454 * 1445 *
1455 * @request - request to write to the ring 1446 * @request - request to write to the ring
1456 * 1447 *
1457 * Update the mailbox registers in the *other* rings with the current seqno. 1448 * Update the mailbox registers in the *other* rings with the current seqno.
1458 * This acts like a signal in the canonical semaphore. 1449 * This acts like a signal in the canonical semaphore.
1459 */ 1450 */
1460static int 1451static int gen6_sema_emit_request(struct drm_i915_gem_request *req)
1461gen6_add_request(struct drm_i915_gem_request *req)
1462{ 1452{
1463 struct intel_engine_cs *engine = req->engine;
1464 int ret; 1453 int ret;
1465 1454
1466 if (engine->semaphore.signal) 1455 ret = req->engine->semaphore.signal(req);
1467 ret = engine->semaphore.signal(req, 4);
1468 else
1469 ret = intel_ring_begin(req, 4);
1470
1471 if (ret) 1456 if (ret)
1472 return ret; 1457 return ret;
1473 1458
1474 intel_ring_emit(engine, MI_STORE_DWORD_INDEX); 1459 return i9xx_emit_request(req);
1475 intel_ring_emit(engine,
1476 I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1477 intel_ring_emit(engine, req->seqno);
1478 intel_ring_emit(engine, MI_USER_INTERRUPT);
1479 __intel_ring_advance(engine);
1480
1481 return 0;
1482} 1460}
1483 1461
1484static int 1462static int gen8_render_emit_request(struct drm_i915_gem_request *req)
1485gen8_render_add_request(struct drm_i915_gem_request *req)
1486{ 1463{
1487 struct intel_engine_cs *engine = req->engine; 1464 struct intel_engine_cs *engine = req->engine;
1465 struct intel_ring *ring = req->ring;
1488 int ret; 1466 int ret;
1489 1467
1490 if (engine->semaphore.signal) 1468 if (engine->semaphore.signal) {
1491 ret = engine->semaphore.signal(req, 8); 1469 ret = engine->semaphore.signal(req);
1492 else 1470 if (ret)
1493 ret = intel_ring_begin(req, 8); 1471 return ret;
1472 }
1473
1474 ret = intel_ring_begin(req, 8);
1494 if (ret) 1475 if (ret)
1495 return ret; 1476 return ret;
1496 1477
1497 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); 1478 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1498 intel_ring_emit(engine, (PIPE_CONTROL_GLOBAL_GTT_IVB | 1479 intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB |
1499 PIPE_CONTROL_CS_STALL | 1480 PIPE_CONTROL_CS_STALL |
1500 PIPE_CONTROL_QW_WRITE)); 1481 PIPE_CONTROL_QW_WRITE));
1501 intel_ring_emit(engine, intel_hws_seqno_address(req->engine)); 1482 intel_ring_emit(ring, intel_hws_seqno_address(engine));
1502 intel_ring_emit(engine, 0); 1483 intel_ring_emit(ring, 0);
1503 intel_ring_emit(engine, i915_gem_request_get_seqno(req)); 1484 intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1504 /* We're thrashing one dword of HWS. */ 1485 /* We're thrashing one dword of HWS. */
1505 intel_ring_emit(engine, 0); 1486 intel_ring_emit(ring, 0);
1506 intel_ring_emit(engine, MI_USER_INTERRUPT); 1487 intel_ring_emit(ring, MI_USER_INTERRUPT);
1507 intel_ring_emit(engine, MI_NOOP); 1488 intel_ring_emit(ring, MI_NOOP);
1508 __intel_ring_advance(engine); 1489 intel_ring_advance(ring);
1509 1490
1510 return 0; 1491 req->tail = ring->tail;
1511}
1512 1492
1513static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, 1493 return 0;
1514 u32 seqno)
1515{
1516 return dev_priv->last_seqno < seqno;
1517} 1494}
1518 1495
1519/** 1496/**
@@ -1525,82 +1502,71 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv,
1525 */ 1502 */
1526 1503
1527static int 1504static int
1528gen8_ring_sync(struct drm_i915_gem_request *waiter_req, 1505gen8_ring_sync_to(struct drm_i915_gem_request *req,
1529 struct intel_engine_cs *signaller, 1506 struct drm_i915_gem_request *signal)
1530 u32 seqno)
1531{ 1507{
1532 struct intel_engine_cs *waiter = waiter_req->engine; 1508 struct intel_ring *ring = req->ring;
1533 struct drm_i915_private *dev_priv = waiter_req->i915; 1509 struct drm_i915_private *dev_priv = req->i915;
1534 u64 offset = GEN8_WAIT_OFFSET(waiter, signaller->id); 1510 u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id);
1535 struct i915_hw_ppgtt *ppgtt; 1511 struct i915_hw_ppgtt *ppgtt;
1536 int ret; 1512 int ret;
1537 1513
1538 ret = intel_ring_begin(waiter_req, 4); 1514 ret = intel_ring_begin(req, 4);
1539 if (ret) 1515 if (ret)
1540 return ret; 1516 return ret;
1541 1517
1542 intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | 1518 intel_ring_emit(ring,
1543 MI_SEMAPHORE_GLOBAL_GTT | 1519 MI_SEMAPHORE_WAIT |
1544 MI_SEMAPHORE_SAD_GTE_SDD); 1520 MI_SEMAPHORE_GLOBAL_GTT |
1545 intel_ring_emit(waiter, seqno); 1521 MI_SEMAPHORE_SAD_GTE_SDD);
1546 intel_ring_emit(waiter, lower_32_bits(offset)); 1522 intel_ring_emit(ring, signal->fence.seqno);
1547 intel_ring_emit(waiter, upper_32_bits(offset)); 1523 intel_ring_emit(ring, lower_32_bits(offset));
1548 intel_ring_advance(waiter); 1524 intel_ring_emit(ring, upper_32_bits(offset));
1525 intel_ring_advance(ring);
1549 1526
1550 /* When the !RCS engines idle waiting upon a semaphore, they lose their 1527 /* When the !RCS engines idle waiting upon a semaphore, they lose their
1551 * pagetables and we must reload them before executing the batch. 1528 * pagetables and we must reload them before executing the batch.
1552 * We do this on the i915_switch_context() following the wait and 1529 * We do this on the i915_switch_context() following the wait and
1553 * before the dispatch. 1530 * before the dispatch.
1554 */ 1531 */
1555 ppgtt = waiter_req->ctx->ppgtt; 1532 ppgtt = req->ctx->ppgtt;
1556 if (ppgtt && waiter_req->engine->id != RCS) 1533 if (ppgtt && req->engine->id != RCS)
1557 ppgtt->pd_dirty_rings |= intel_engine_flag(waiter_req->engine); 1534 ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine);
1558 return 0; 1535 return 0;
1559} 1536}
1560 1537
1561static int 1538static int
1562gen6_ring_sync(struct drm_i915_gem_request *waiter_req, 1539gen6_ring_sync_to(struct drm_i915_gem_request *req,
1563 struct intel_engine_cs *signaller, 1540 struct drm_i915_gem_request *signal)
1564 u32 seqno)
1565{ 1541{
1566 struct intel_engine_cs *waiter = waiter_req->engine; 1542 struct intel_ring *ring = req->ring;
1567 u32 dw1 = MI_SEMAPHORE_MBOX | 1543 u32 dw1 = MI_SEMAPHORE_MBOX |
1568 MI_SEMAPHORE_COMPARE | 1544 MI_SEMAPHORE_COMPARE |
1569 MI_SEMAPHORE_REGISTER; 1545 MI_SEMAPHORE_REGISTER;
1570 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; 1546 u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->id];
1571 int ret; 1547 int ret;
1572 1548
1573 /* Throughout all of the GEM code, seqno passed implies our current
1574 * seqno is >= the last seqno executed. However for hardware the
1575 * comparison is strictly greater than.
1576 */
1577 seqno -= 1;
1578
1579 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 1549 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1580 1550
1581 ret = intel_ring_begin(waiter_req, 4); 1551 ret = intel_ring_begin(req, 4);
1582 if (ret) 1552 if (ret)
1583 return ret; 1553 return ret;
1584 1554
1585 /* If seqno wrap happened, omit the wait with no-ops */ 1555 intel_ring_emit(ring, dw1 | wait_mbox);
1586 if (likely(!i915_gem_has_seqno_wrapped(waiter_req->i915, seqno))) { 1556 /* Throughout all of the GEM code, seqno passed implies our current
1587 intel_ring_emit(waiter, dw1 | wait_mbox); 1557 * seqno is >= the last seqno executed. However for hardware the
1588 intel_ring_emit(waiter, seqno); 1558 * comparison is strictly greater than.
1589 intel_ring_emit(waiter, 0); 1559 */
1590 intel_ring_emit(waiter, MI_NOOP); 1560 intel_ring_emit(ring, signal->fence.seqno - 1);
1591 } else { 1561 intel_ring_emit(ring, 0);
1592 intel_ring_emit(waiter, MI_NOOP); 1562 intel_ring_emit(ring, MI_NOOP);
1593 intel_ring_emit(waiter, MI_NOOP); 1563 intel_ring_advance(ring);
1594 intel_ring_emit(waiter, MI_NOOP);
1595 intel_ring_emit(waiter, MI_NOOP);
1596 }
1597 intel_ring_advance(waiter);
1598 1564
1599 return 0; 1565 return 0;
1600} 1566}
1601 1567
1602static void 1568static void
1603gen5_seqno_barrier(struct intel_engine_cs *ring) 1569gen5_seqno_barrier(struct intel_engine_cs *engine)
1604{ 1570{
1605 /* MI_STORE are internally buffered by the GPU and not flushed 1571 /* MI_STORE are internally buffered by the GPU and not flushed
1606 * either by MI_FLUSH or SyncFlush or any other combination of 1572 * either by MI_FLUSH or SyncFlush or any other combination of
@@ -1693,40 +1659,18 @@ i8xx_irq_disable(struct intel_engine_cs *engine)
1693} 1659}
1694 1660
1695static int 1661static int
1696bsd_ring_flush(struct drm_i915_gem_request *req, 1662bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
1697 u32 invalidate_domains,
1698 u32 flush_domains)
1699{ 1663{
1700 struct intel_engine_cs *engine = req->engine; 1664 struct intel_ring *ring = req->ring;
1701 int ret; 1665 int ret;
1702 1666
1703 ret = intel_ring_begin(req, 2); 1667 ret = intel_ring_begin(req, 2);
1704 if (ret) 1668 if (ret)
1705 return ret; 1669 return ret;
1706 1670
1707 intel_ring_emit(engine, MI_FLUSH); 1671 intel_ring_emit(ring, MI_FLUSH);
1708 intel_ring_emit(engine, MI_NOOP); 1672 intel_ring_emit(ring, MI_NOOP);
1709 intel_ring_advance(engine); 1673 intel_ring_advance(ring);
1710 return 0;
1711}
1712
1713static int
1714i9xx_add_request(struct drm_i915_gem_request *req)
1715{
1716 struct intel_engine_cs *engine = req->engine;
1717 int ret;
1718
1719 ret = intel_ring_begin(req, 4);
1720 if (ret)
1721 return ret;
1722
1723 intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
1724 intel_ring_emit(engine,
1725 I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1726 intel_ring_emit(engine, req->seqno);
1727 intel_ring_emit(engine, MI_USER_INTERRUPT);
1728 __intel_ring_advance(engine);
1729
1730 return 0; 1674 return 0;
1731} 1675}
1732 1676
@@ -1788,24 +1732,24 @@ gen8_irq_disable(struct intel_engine_cs *engine)
1788} 1732}
1789 1733
1790static int 1734static int
1791i965_dispatch_execbuffer(struct drm_i915_gem_request *req, 1735i965_emit_bb_start(struct drm_i915_gem_request *req,
1792 u64 offset, u32 length, 1736 u64 offset, u32 length,
1793 unsigned dispatch_flags) 1737 unsigned int dispatch_flags)
1794{ 1738{
1795 struct intel_engine_cs *engine = req->engine; 1739 struct intel_ring *ring = req->ring;
1796 int ret; 1740 int ret;
1797 1741
1798 ret = intel_ring_begin(req, 2); 1742 ret = intel_ring_begin(req, 2);
1799 if (ret) 1743 if (ret)
1800 return ret; 1744 return ret;
1801 1745
1802 intel_ring_emit(engine, 1746 intel_ring_emit(ring,
1803 MI_BATCH_BUFFER_START | 1747 MI_BATCH_BUFFER_START |
1804 MI_BATCH_GTT | 1748 MI_BATCH_GTT |
1805 (dispatch_flags & I915_DISPATCH_SECURE ? 1749 (dispatch_flags & I915_DISPATCH_SECURE ?
1806 0 : MI_BATCH_NON_SECURE_I965)); 1750 0 : MI_BATCH_NON_SECURE_I965));
1807 intel_ring_emit(engine, offset); 1751 intel_ring_emit(ring, offset);
1808 intel_ring_advance(engine); 1752 intel_ring_advance(ring);
1809 1753
1810 return 0; 1754 return 0;
1811} 1755}
@@ -1815,12 +1759,12 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
1815#define I830_TLB_ENTRIES (2) 1759#define I830_TLB_ENTRIES (2)
1816#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 1760#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1817static int 1761static int
1818i830_dispatch_execbuffer(struct drm_i915_gem_request *req, 1762i830_emit_bb_start(struct drm_i915_gem_request *req,
1819 u64 offset, u32 len, 1763 u64 offset, u32 len,
1820 unsigned dispatch_flags) 1764 unsigned int dispatch_flags)
1821{ 1765{
1822 struct intel_engine_cs *engine = req->engine; 1766 struct intel_ring *ring = req->ring;
1823 u32 cs_offset = engine->scratch.gtt_offset; 1767 u32 cs_offset = req->engine->scratch.gtt_offset;
1824 int ret; 1768 int ret;
1825 1769
1826 ret = intel_ring_begin(req, 6); 1770 ret = intel_ring_begin(req, 6);
@@ -1828,13 +1772,13 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1828 return ret; 1772 return ret;
1829 1773
1830 /* Evict the invalid PTE TLBs */ 1774 /* Evict the invalid PTE TLBs */
1831 intel_ring_emit(engine, COLOR_BLT_CMD | BLT_WRITE_RGBA); 1775 intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1832 intel_ring_emit(engine, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); 1776 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1833 intel_ring_emit(engine, I830_TLB_ENTRIES << 16 | 4); /* load each page */ 1777 intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1834 intel_ring_emit(engine, cs_offset); 1778 intel_ring_emit(ring, cs_offset);
1835 intel_ring_emit(engine, 0xdeadbeef); 1779 intel_ring_emit(ring, 0xdeadbeef);
1836 intel_ring_emit(engine, MI_NOOP); 1780 intel_ring_emit(ring, MI_NOOP);
1837 intel_ring_advance(engine); 1781 intel_ring_advance(ring);
1838 1782
1839 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { 1783 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
1840 if (len > I830_BATCH_LIMIT) 1784 if (len > I830_BATCH_LIMIT)
@@ -1848,17 +1792,17 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1848 * stable batch scratch bo area (so that the CS never 1792 * stable batch scratch bo area (so that the CS never
1849 * stumbles over its tlb invalidation bug) ... 1793 * stumbles over its tlb invalidation bug) ...
1850 */ 1794 */
1851 intel_ring_emit(engine, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); 1795 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1852 intel_ring_emit(engine, 1796 intel_ring_emit(ring,
1853 BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); 1797 BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
1854 intel_ring_emit(engine, DIV_ROUND_UP(len, 4096) << 16 | 4096); 1798 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
1855 intel_ring_emit(engine, cs_offset); 1799 intel_ring_emit(ring, cs_offset);
1856 intel_ring_emit(engine, 4096); 1800 intel_ring_emit(ring, 4096);
1857 intel_ring_emit(engine, offset); 1801 intel_ring_emit(ring, offset);
1858 1802
1859 intel_ring_emit(engine, MI_FLUSH); 1803 intel_ring_emit(ring, MI_FLUSH);
1860 intel_ring_emit(engine, MI_NOOP); 1804 intel_ring_emit(ring, MI_NOOP);
1861 intel_ring_advance(engine); 1805 intel_ring_advance(ring);
1862 1806
1863 /* ... and execute it. */ 1807 /* ... and execute it. */
1864 offset = cs_offset; 1808 offset = cs_offset;
@@ -1868,30 +1812,30 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1868 if (ret) 1812 if (ret)
1869 return ret; 1813 return ret;
1870 1814
1871 intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1815 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1872 intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? 1816 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1873 0 : MI_BATCH_NON_SECURE)); 1817 0 : MI_BATCH_NON_SECURE));
1874 intel_ring_advance(engine); 1818 intel_ring_advance(ring);
1875 1819
1876 return 0; 1820 return 0;
1877} 1821}
1878 1822
1879static int 1823static int
1880i915_dispatch_execbuffer(struct drm_i915_gem_request *req, 1824i915_emit_bb_start(struct drm_i915_gem_request *req,
1881 u64 offset, u32 len, 1825 u64 offset, u32 len,
1882 unsigned dispatch_flags) 1826 unsigned int dispatch_flags)
1883{ 1827{
1884 struct intel_engine_cs *engine = req->engine; 1828 struct intel_ring *ring = req->ring;
1885 int ret; 1829 int ret;
1886 1830
1887 ret = intel_ring_begin(req, 2); 1831 ret = intel_ring_begin(req, 2);
1888 if (ret) 1832 if (ret)
1889 return ret; 1833 return ret;
1890 1834
1891 intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1835 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1892 intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? 1836 intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1893 0 : MI_BATCH_NON_SECURE)); 1837 0 : MI_BATCH_NON_SECURE));
1894 intel_ring_advance(engine); 1838 intel_ring_advance(ring);
1895 1839
1896 return 0; 1840 return 0;
1897} 1841}
@@ -1917,7 +1861,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine)
1917 1861
1918 kunmap(sg_page(obj->pages->sgl)); 1862 kunmap(sg_page(obj->pages->sgl));
1919 i915_gem_object_ggtt_unpin(obj); 1863 i915_gem_object_ggtt_unpin(obj);
1920 drm_gem_object_unreference(&obj->base); 1864 i915_gem_object_put(obj);
1921 engine->status_page.obj = NULL; 1865 engine->status_page.obj = NULL;
1922} 1866}
1923 1867
@@ -1952,10 +1896,10 @@ static int init_status_page(struct intel_engine_cs *engine)
1952 * actualy map it). 1896 * actualy map it).
1953 */ 1897 */
1954 flags |= PIN_MAPPABLE; 1898 flags |= PIN_MAPPABLE;
1955 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); 1899 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags);
1956 if (ret) { 1900 if (ret) {
1957err_unref: 1901err_unref:
1958 drm_gem_object_unreference(&obj->base); 1902 i915_gem_object_put(obj);
1959 return ret; 1903 return ret;
1960 } 1904 }
1961 1905
@@ -1989,32 +1933,17 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
1989 return 0; 1933 return 0;
1990} 1934}
1991 1935
1992void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) 1936int intel_ring_pin(struct intel_ring *ring)
1993{
1994 GEM_BUG_ON(ringbuf->vma == NULL);
1995 GEM_BUG_ON(ringbuf->virtual_start == NULL);
1996
1997 if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
1998 i915_gem_object_unpin_map(ringbuf->obj);
1999 else
2000 i915_vma_unpin_iomap(ringbuf->vma);
2001 ringbuf->virtual_start = NULL;
2002
2003 i915_gem_object_ggtt_unpin(ringbuf->obj);
2004 ringbuf->vma = NULL;
2005}
2006
2007int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv,
2008 struct intel_ringbuffer *ringbuf)
2009{ 1937{
2010 struct drm_i915_gem_object *obj = ringbuf->obj; 1938 struct drm_i915_private *dev_priv = ring->engine->i915;
1939 struct drm_i915_gem_object *obj = ring->obj;
2011 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 1940 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
2012 unsigned flags = PIN_OFFSET_BIAS | 4096; 1941 unsigned flags = PIN_OFFSET_BIAS | 4096;
2013 void *addr; 1942 void *addr;
2014 int ret; 1943 int ret;
2015 1944
2016 if (HAS_LLC(dev_priv) && !obj->stolen) { 1945 if (HAS_LLC(dev_priv) && !obj->stolen) {
2017 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags); 1946 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
2018 if (ret) 1947 if (ret)
2019 return ret; 1948 return ret;
2020 1949
@@ -2028,8 +1957,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv,
2028 goto err_unpin; 1957 goto err_unpin;
2029 } 1958 }
2030 } else { 1959 } else {
2031 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 1960 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
2032 flags | PIN_MAPPABLE); 1961 flags | PIN_MAPPABLE);
2033 if (ret) 1962 if (ret)
2034 return ret; 1963 return ret;
2035 1964
@@ -2040,15 +1969,16 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv,
2040 /* Access through the GTT requires the device to be awake. */ 1969 /* Access through the GTT requires the device to be awake. */
2041 assert_rpm_wakelock_held(dev_priv); 1970 assert_rpm_wakelock_held(dev_priv);
2042 1971
2043 addr = i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); 1972 addr = (void __force *)
1973 i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj));
2044 if (IS_ERR(addr)) { 1974 if (IS_ERR(addr)) {
2045 ret = PTR_ERR(addr); 1975 ret = PTR_ERR(addr);
2046 goto err_unpin; 1976 goto err_unpin;
2047 } 1977 }
2048 } 1978 }
2049 1979
2050 ringbuf->virtual_start = addr; 1980 ring->vaddr = addr;
2051 ringbuf->vma = i915_gem_obj_to_ggtt(obj); 1981 ring->vma = i915_gem_obj_to_ggtt(obj);
2052 return 0; 1982 return 0;
2053 1983
2054err_unpin: 1984err_unpin:
@@ -2056,39 +1986,56 @@ err_unpin:
2056 return ret; 1986 return ret;
2057} 1987}
2058 1988
2059static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) 1989void intel_ring_unpin(struct intel_ring *ring)
2060{ 1990{
2061 drm_gem_object_unreference(&ringbuf->obj->base); 1991 GEM_BUG_ON(!ring->vma);
2062 ringbuf->obj = NULL; 1992 GEM_BUG_ON(!ring->vaddr);
1993
1994 if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
1995 i915_gem_object_unpin_map(ring->obj);
1996 else
1997 i915_vma_unpin_iomap(ring->vma);
1998 ring->vaddr = NULL;
1999
2000 i915_gem_object_ggtt_unpin(ring->obj);
2001 ring->vma = NULL;
2002}
2003
2004static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
2005{
2006 i915_gem_object_put(ring->obj);
2007 ring->obj = NULL;
2063} 2008}
2064 2009
2065static int intel_alloc_ringbuffer_obj(struct drm_device *dev, 2010static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2066 struct intel_ringbuffer *ringbuf) 2011 struct intel_ring *ring)
2067{ 2012{
2068 struct drm_i915_gem_object *obj; 2013 struct drm_i915_gem_object *obj;
2069 2014
2070 obj = NULL; 2015 obj = NULL;
2071 if (!HAS_LLC(dev)) 2016 if (!HAS_LLC(dev))
2072 obj = i915_gem_object_create_stolen(dev, ringbuf->size); 2017 obj = i915_gem_object_create_stolen(dev, ring->size);
2073 if (obj == NULL) 2018 if (obj == NULL)
2074 obj = i915_gem_object_create(dev, ringbuf->size); 2019 obj = i915_gem_object_create(dev, ring->size);
2075 if (IS_ERR(obj)) 2020 if (IS_ERR(obj))
2076 return PTR_ERR(obj); 2021 return PTR_ERR(obj);
2077 2022
2078 /* mark ring buffers as read-only from GPU side by default */ 2023 /* mark ring buffers as read-only from GPU side by default */
2079 obj->gt_ro = 1; 2024 obj->gt_ro = 1;
2080 2025
2081 ringbuf->obj = obj; 2026 ring->obj = obj;
2082 2027
2083 return 0; 2028 return 0;
2084} 2029}
2085 2030
2086struct intel_ringbuffer * 2031struct intel_ring *
2087intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) 2032intel_engine_create_ring(struct intel_engine_cs *engine, int size)
2088{ 2033{
2089 struct intel_ringbuffer *ring; 2034 struct intel_ring *ring;
2090 int ret; 2035 int ret;
2091 2036
2037 GEM_BUG_ON(!is_power_of_2(size));
2038
2092 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 2039 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2093 if (ring == NULL) { 2040 if (ring == NULL) {
2094 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", 2041 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
@@ -2099,6 +2046,8 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2099 ring->engine = engine; 2046 ring->engine = engine;
2100 list_add(&ring->link, &engine->buffers); 2047 list_add(&ring->link, &engine->buffers);
2101 2048
2049 INIT_LIST_HEAD(&ring->request_list);
2050
2102 ring->size = size; 2051 ring->size = size;
2103 /* Workaround an erratum on the i830 which causes a hang if 2052 /* Workaround an erratum on the i830 which causes a hang if
2104 * the TAIL pointer points to within the last 2 cachelines 2053 * the TAIL pointer points to within the last 2 cachelines
@@ -2124,7 +2073,7 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2124} 2073}
2125 2074
2126void 2075void
2127intel_ringbuffer_free(struct intel_ringbuffer *ring) 2076intel_ring_free(struct intel_ring *ring)
2128{ 2077{
2129 intel_destroy_ringbuffer_obj(ring); 2078 intel_destroy_ringbuffer_obj(ring);
2130 list_del(&ring->link); 2079 list_del(&ring->link);
@@ -2143,7 +2092,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
2143 return 0; 2092 return 0;
2144 2093
2145 if (ce->state) { 2094 if (ce->state) {
2146 ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0); 2095 ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
2096 ctx->ggtt_alignment, 0);
2147 if (ret) 2097 if (ret)
2148 goto error; 2098 goto error;
2149 } 2099 }
@@ -2158,7 +2108,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
2158 if (ctx == ctx->i915->kernel_context) 2108 if (ctx == ctx->i915->kernel_context)
2159 ce->initialised = true; 2109 ce->initialised = true;
2160 2110
2161 i915_gem_context_reference(ctx); 2111 i915_gem_context_get(ctx);
2162 return 0; 2112 return 0;
2163 2113
2164error: 2114error:
@@ -2179,28 +2129,23 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx,
2179 if (ce->state) 2129 if (ce->state)
2180 i915_gem_object_ggtt_unpin(ce->state); 2130 i915_gem_object_ggtt_unpin(ce->state);
2181 2131
2182 i915_gem_context_unreference(ctx); 2132 i915_gem_context_put(ctx);
2183} 2133}
2184 2134
2185static int intel_init_ring_buffer(struct drm_device *dev, 2135static int intel_init_ring_buffer(struct intel_engine_cs *engine)
2186 struct intel_engine_cs *engine)
2187{ 2136{
2188 struct drm_i915_private *dev_priv = to_i915(dev); 2137 struct drm_i915_private *dev_priv = engine->i915;
2189 struct intel_ringbuffer *ringbuf; 2138 struct intel_ring *ring;
2190 int ret; 2139 int ret;
2191 2140
2192 WARN_ON(engine->buffer); 2141 WARN_ON(engine->buffer);
2193 2142
2194 engine->i915 = dev_priv; 2143 intel_engine_setup_common(engine);
2195 INIT_LIST_HEAD(&engine->active_list); 2144
2196 INIT_LIST_HEAD(&engine->request_list);
2197 INIT_LIST_HEAD(&engine->execlist_queue);
2198 INIT_LIST_HEAD(&engine->buffers);
2199 i915_gem_batch_pool_init(dev, &engine->batch_pool);
2200 memset(engine->semaphore.sync_seqno, 0, 2145 memset(engine->semaphore.sync_seqno, 0,
2201 sizeof(engine->semaphore.sync_seqno)); 2146 sizeof(engine->semaphore.sync_seqno));
2202 2147
2203 ret = intel_engine_init_breadcrumbs(engine); 2148 ret = intel_engine_init_common(engine);
2204 if (ret) 2149 if (ret)
2205 goto error; 2150 goto error;
2206 2151
@@ -2215,12 +2160,12 @@ static int intel_init_ring_buffer(struct drm_device *dev,
2215 if (ret) 2160 if (ret)
2216 goto error; 2161 goto error;
2217 2162
2218 ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE); 2163 ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
2219 if (IS_ERR(ringbuf)) { 2164 if (IS_ERR(ring)) {
2220 ret = PTR_ERR(ringbuf); 2165 ret = PTR_ERR(ring);
2221 goto error; 2166 goto error;
2222 } 2167 }
2223 engine->buffer = ringbuf; 2168 engine->buffer = ring;
2224 2169
2225 if (I915_NEED_GFX_HWS(dev_priv)) { 2170 if (I915_NEED_GFX_HWS(dev_priv)) {
2226 ret = init_status_page(engine); 2171 ret = init_status_page(engine);
@@ -2233,26 +2178,22 @@ static int intel_init_ring_buffer(struct drm_device *dev,
2233 goto error; 2178 goto error;
2234 } 2179 }
2235 2180
2236 ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ringbuf); 2181 ret = intel_ring_pin(ring);
2237 if (ret) { 2182 if (ret) {
2238 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", 2183 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2239 engine->name, ret); 2184 engine->name, ret);
2240 intel_destroy_ringbuffer_obj(ringbuf); 2185 intel_destroy_ringbuffer_obj(ring);
2241 goto error; 2186 goto error;
2242 } 2187 }
2243 2188
2244 ret = i915_cmd_parser_init_ring(engine);
2245 if (ret)
2246 goto error;
2247
2248 return 0; 2189 return 0;
2249 2190
2250error: 2191error:
2251 intel_cleanup_engine(engine); 2192 intel_engine_cleanup(engine);
2252 return ret; 2193 return ret;
2253} 2194}
2254 2195
2255void intel_cleanup_engine(struct intel_engine_cs *engine) 2196void intel_engine_cleanup(struct intel_engine_cs *engine)
2256{ 2197{
2257 struct drm_i915_private *dev_priv; 2198 struct drm_i915_private *dev_priv;
2258 2199
@@ -2262,11 +2203,10 @@ void intel_cleanup_engine(struct intel_engine_cs *engine)
2262 dev_priv = engine->i915; 2203 dev_priv = engine->i915;
2263 2204
2264 if (engine->buffer) { 2205 if (engine->buffer) {
2265 intel_stop_engine(engine);
2266 WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); 2206 WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0);
2267 2207
2268 intel_unpin_ringbuffer_obj(engine->buffer); 2208 intel_ring_unpin(engine->buffer);
2269 intel_ringbuffer_free(engine->buffer); 2209 intel_ring_free(engine->buffer);
2270 engine->buffer = NULL; 2210 engine->buffer = NULL;
2271 } 2211 }
2272 2212
@@ -2280,33 +2220,13 @@ void intel_cleanup_engine(struct intel_engine_cs *engine)
2280 cleanup_phys_status_page(engine); 2220 cleanup_phys_status_page(engine);
2281 } 2221 }
2282 2222
2283 i915_cmd_parser_fini_ring(engine); 2223 intel_engine_cleanup_common(engine);
2284 i915_gem_batch_pool_fini(&engine->batch_pool);
2285 intel_engine_fini_breadcrumbs(engine);
2286 2224
2287 intel_ring_context_unpin(dev_priv->kernel_context, engine); 2225 intel_ring_context_unpin(dev_priv->kernel_context, engine);
2288 2226
2289 engine->i915 = NULL; 2227 engine->i915 = NULL;
2290} 2228}
2291 2229
2292int intel_engine_idle(struct intel_engine_cs *engine)
2293{
2294 struct drm_i915_gem_request *req;
2295
2296 /* Wait upon the last request to be completed */
2297 if (list_empty(&engine->request_list))
2298 return 0;
2299
2300 req = list_entry(engine->request_list.prev,
2301 struct drm_i915_gem_request,
2302 list);
2303
2304 /* Make sure we do not trigger any retires */
2305 return __i915_wait_request(req,
2306 req->i915->mm.interruptible,
2307 NULL, NULL);
2308}
2309
2310int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) 2230int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
2311{ 2231{
2312 int ret; 2232 int ret;
@@ -2317,7 +2237,7 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
2317 */ 2237 */
2318 request->reserved_space += LEGACY_REQUEST_SIZE; 2238 request->reserved_space += LEGACY_REQUEST_SIZE;
2319 2239
2320 request->ringbuf = request->engine->buffer; 2240 request->ring = request->engine->buffer;
2321 2241
2322 ret = intel_ring_begin(request, 0); 2242 ret = intel_ring_begin(request, 0);
2323 if (ret) 2243 if (ret)
@@ -2329,12 +2249,12 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
2329 2249
2330static int wait_for_space(struct drm_i915_gem_request *req, int bytes) 2250static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
2331{ 2251{
2332 struct intel_ringbuffer *ringbuf = req->ringbuf; 2252 struct intel_ring *ring = req->ring;
2333 struct intel_engine_cs *engine = req->engine;
2334 struct drm_i915_gem_request *target; 2253 struct drm_i915_gem_request *target;
2254 int ret;
2335 2255
2336 intel_ring_update_space(ringbuf); 2256 intel_ring_update_space(ring);
2337 if (ringbuf->space >= bytes) 2257 if (ring->space >= bytes)
2338 return 0; 2258 return 0;
2339 2259
2340 /* 2260 /*
@@ -2348,35 +2268,38 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
2348 */ 2268 */
2349 GEM_BUG_ON(!req->reserved_space); 2269 GEM_BUG_ON(!req->reserved_space);
2350 2270
2351 list_for_each_entry(target, &engine->request_list, list) { 2271 list_for_each_entry(target, &ring->request_list, ring_link) {
2352 unsigned space; 2272 unsigned space;
2353 2273
2354 /*
2355 * The request queue is per-engine, so can contain requests
2356 * from multiple ringbuffers. Here, we must ignore any that
2357 * aren't from the ringbuffer we're considering.
2358 */
2359 if (target->ringbuf != ringbuf)
2360 continue;
2361
2362 /* Would completion of this request free enough space? */ 2274 /* Would completion of this request free enough space? */
2363 space = __intel_ring_space(target->postfix, ringbuf->tail, 2275 space = __intel_ring_space(target->postfix, ring->tail,
2364 ringbuf->size); 2276 ring->size);
2365 if (space >= bytes) 2277 if (space >= bytes)
2366 break; 2278 break;
2367 } 2279 }
2368 2280
2369 if (WARN_ON(&target->list == &engine->request_list)) 2281 if (WARN_ON(&target->ring_link == &ring->request_list))
2370 return -ENOSPC; 2282 return -ENOSPC;
2371 2283
2372 return i915_wait_request(target); 2284 ret = i915_wait_request(target, true, NULL, NO_WAITBOOST);
2285 if (ret)
2286 return ret;
2287
2288 if (i915_reset_in_progress(&target->i915->gpu_error))
2289 return -EAGAIN;
2290
2291 i915_gem_request_retire_upto(target);
2292
2293 intel_ring_update_space(ring);
2294 GEM_BUG_ON(ring->space < bytes);
2295 return 0;
2373} 2296}
2374 2297
2375int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) 2298int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
2376{ 2299{
2377 struct intel_ringbuffer *ringbuf = req->ringbuf; 2300 struct intel_ring *ring = req->ring;
2378 int remain_actual = ringbuf->size - ringbuf->tail; 2301 int remain_actual = ring->size - ring->tail;
2379 int remain_usable = ringbuf->effective_size - ringbuf->tail; 2302 int remain_usable = ring->effective_size - ring->tail;
2380 int bytes = num_dwords * sizeof(u32); 2303 int bytes = num_dwords * sizeof(u32);
2381 int total_bytes, wait_bytes; 2304 int total_bytes, wait_bytes;
2382 bool need_wrap = false; 2305 bool need_wrap = false;
@@ -2403,37 +2326,33 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
2403 wait_bytes = total_bytes; 2326 wait_bytes = total_bytes;
2404 } 2327 }
2405 2328
2406 if (wait_bytes > ringbuf->space) { 2329 if (wait_bytes > ring->space) {
2407 int ret = wait_for_space(req, wait_bytes); 2330 int ret = wait_for_space(req, wait_bytes);
2408 if (unlikely(ret)) 2331 if (unlikely(ret))
2409 return ret; 2332 return ret;
2410
2411 intel_ring_update_space(ringbuf);
2412 if (unlikely(ringbuf->space < wait_bytes))
2413 return -EAGAIN;
2414 } 2333 }
2415 2334
2416 if (unlikely(need_wrap)) { 2335 if (unlikely(need_wrap)) {
2417 GEM_BUG_ON(remain_actual > ringbuf->space); 2336 GEM_BUG_ON(remain_actual > ring->space);
2418 GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); 2337 GEM_BUG_ON(ring->tail + remain_actual > ring->size);
2419 2338
2420 /* Fill the tail with MI_NOOP */ 2339 /* Fill the tail with MI_NOOP */
2421 memset(ringbuf->virtual_start + ringbuf->tail, 2340 memset(ring->vaddr + ring->tail, 0, remain_actual);
2422 0, remain_actual); 2341 ring->tail = 0;
2423 ringbuf->tail = 0; 2342 ring->space -= remain_actual;
2424 ringbuf->space -= remain_actual;
2425 } 2343 }
2426 2344
2427 ringbuf->space -= bytes; 2345 ring->space -= bytes;
2428 GEM_BUG_ON(ringbuf->space < 0); 2346 GEM_BUG_ON(ring->space < 0);
2429 return 0; 2347 return 0;
2430} 2348}
2431 2349
2432/* Align the ring tail to a cacheline boundary */ 2350/* Align the ring tail to a cacheline boundary */
2433int intel_ring_cacheline_align(struct drm_i915_gem_request *req) 2351int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
2434{ 2352{
2435 struct intel_engine_cs *engine = req->engine; 2353 struct intel_ring *ring = req->ring;
2436 int num_dwords = (engine->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); 2354 int num_dwords =
2355 (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2437 int ret; 2356 int ret;
2438 2357
2439 if (num_dwords == 0) 2358 if (num_dwords == 0)
@@ -2445,14 +2364,14 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
2445 return ret; 2364 return ret;
2446 2365
2447 while (num_dwords--) 2366 while (num_dwords--)
2448 intel_ring_emit(engine, MI_NOOP); 2367 intel_ring_emit(ring, MI_NOOP);
2449 2368
2450 intel_ring_advance(engine); 2369 intel_ring_advance(ring);
2451 2370
2452 return 0; 2371 return 0;
2453} 2372}
2454 2373
2455void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) 2374void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
2456{ 2375{
2457 struct drm_i915_private *dev_priv = engine->i915; 2376 struct drm_i915_private *dev_priv = engine->i915;
2458 2377
@@ -2496,10 +2415,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno)
2496 rcu_read_unlock(); 2415 rcu_read_unlock();
2497} 2416}
2498 2417
2499static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, 2418static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
2500 u32 value)
2501{ 2419{
2502 struct drm_i915_private *dev_priv = engine->i915; 2420 struct drm_i915_private *dev_priv = request->i915;
2503 2421
2504 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 2422 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
2505 2423
@@ -2523,8 +2441,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
2523 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 2441 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2524 2442
2525 /* Now that the ring is fully powered up, update the tail */ 2443 /* Now that the ring is fully powered up, update the tail */
2526 I915_WRITE_FW(RING_TAIL(engine->mmio_base), value); 2444 i9xx_submit_request(request);
2527 POSTING_READ_FW(RING_TAIL(engine->mmio_base));
2528 2445
2529 /* Let the ring send IDLE messages to the GT again, 2446 /* Let the ring send IDLE messages to the GT again,
2530 * and so let it sleep to conserve power when idle. 2447 * and so let it sleep to conserve power when idle.
@@ -2535,10 +2452,9 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
2535 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 2452 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2536} 2453}
2537 2454
2538static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, 2455static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
2539 u32 invalidate, u32 flush)
2540{ 2456{
2541 struct intel_engine_cs *engine = req->engine; 2457 struct intel_ring *ring = req->ring;
2542 uint32_t cmd; 2458 uint32_t cmd;
2543 int ret; 2459 int ret;
2544 2460
@@ -2563,30 +2479,29 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
2563 * operation is complete. This bit is only valid when the 2479 * operation is complete. This bit is only valid when the
2564 * Post-Sync Operation field is a value of 1h or 3h." 2480 * Post-Sync Operation field is a value of 1h or 3h."
2565 */ 2481 */
2566 if (invalidate & I915_GEM_GPU_DOMAINS) 2482 if (mode & EMIT_INVALIDATE)
2567 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 2483 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2568 2484
2569 intel_ring_emit(engine, cmd); 2485 intel_ring_emit(ring, cmd);
2570 intel_ring_emit(engine, 2486 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2571 I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2572 if (INTEL_GEN(req->i915) >= 8) { 2487 if (INTEL_GEN(req->i915) >= 8) {
2573 intel_ring_emit(engine, 0); /* upper addr */ 2488 intel_ring_emit(ring, 0); /* upper addr */
2574 intel_ring_emit(engine, 0); /* value */ 2489 intel_ring_emit(ring, 0); /* value */
2575 } else { 2490 } else {
2576 intel_ring_emit(engine, 0); 2491 intel_ring_emit(ring, 0);
2577 intel_ring_emit(engine, MI_NOOP); 2492 intel_ring_emit(ring, MI_NOOP);
2578 } 2493 }
2579 intel_ring_advance(engine); 2494 intel_ring_advance(ring);
2580 return 0; 2495 return 0;
2581} 2496}
2582 2497
2583static int 2498static int
2584gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, 2499gen8_emit_bb_start(struct drm_i915_gem_request *req,
2585 u64 offset, u32 len, 2500 u64 offset, u32 len,
2586 unsigned dispatch_flags) 2501 unsigned int dispatch_flags)
2587{ 2502{
2588 struct intel_engine_cs *engine = req->engine; 2503 struct intel_ring *ring = req->ring;
2589 bool ppgtt = USES_PPGTT(engine->dev) && 2504 bool ppgtt = USES_PPGTT(req->i915) &&
2590 !(dispatch_flags & I915_DISPATCH_SECURE); 2505 !(dispatch_flags & I915_DISPATCH_SECURE);
2591 int ret; 2506 int ret;
2592 2507
@@ -2595,71 +2510,70 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2595 return ret; 2510 return ret;
2596 2511
2597 /* FIXME(BDW): Address space and security selectors. */ 2512 /* FIXME(BDW): Address space and security selectors. */
2598 intel_ring_emit(engine, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | 2513 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2599 (dispatch_flags & I915_DISPATCH_RS ? 2514 (dispatch_flags & I915_DISPATCH_RS ?
2600 MI_BATCH_RESOURCE_STREAMER : 0)); 2515 MI_BATCH_RESOURCE_STREAMER : 0));
2601 intel_ring_emit(engine, lower_32_bits(offset)); 2516 intel_ring_emit(ring, lower_32_bits(offset));
2602 intel_ring_emit(engine, upper_32_bits(offset)); 2517 intel_ring_emit(ring, upper_32_bits(offset));
2603 intel_ring_emit(engine, MI_NOOP); 2518 intel_ring_emit(ring, MI_NOOP);
2604 intel_ring_advance(engine); 2519 intel_ring_advance(ring);
2605 2520
2606 return 0; 2521 return 0;
2607} 2522}
2608 2523
2609static int 2524static int
2610hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, 2525hsw_emit_bb_start(struct drm_i915_gem_request *req,
2611 u64 offset, u32 len, 2526 u64 offset, u32 len,
2612 unsigned dispatch_flags) 2527 unsigned int dispatch_flags)
2613{ 2528{
2614 struct intel_engine_cs *engine = req->engine; 2529 struct intel_ring *ring = req->ring;
2615 int ret; 2530 int ret;
2616 2531
2617 ret = intel_ring_begin(req, 2); 2532 ret = intel_ring_begin(req, 2);
2618 if (ret) 2533 if (ret)
2619 return ret; 2534 return ret;
2620 2535
2621 intel_ring_emit(engine, 2536 intel_ring_emit(ring,
2622 MI_BATCH_BUFFER_START | 2537 MI_BATCH_BUFFER_START |
2623 (dispatch_flags & I915_DISPATCH_SECURE ? 2538 (dispatch_flags & I915_DISPATCH_SECURE ?
2624 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | 2539 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
2625 (dispatch_flags & I915_DISPATCH_RS ? 2540 (dispatch_flags & I915_DISPATCH_RS ?
2626 MI_BATCH_RESOURCE_STREAMER : 0)); 2541 MI_BATCH_RESOURCE_STREAMER : 0));
2627 /* bit0-7 is the length on GEN6+ */ 2542 /* bit0-7 is the length on GEN6+ */
2628 intel_ring_emit(engine, offset); 2543 intel_ring_emit(ring, offset);
2629 intel_ring_advance(engine); 2544 intel_ring_advance(ring);
2630 2545
2631 return 0; 2546 return 0;
2632} 2547}
2633 2548
2634static int 2549static int
2635gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, 2550gen6_emit_bb_start(struct drm_i915_gem_request *req,
2636 u64 offset, u32 len, 2551 u64 offset, u32 len,
2637 unsigned dispatch_flags) 2552 unsigned int dispatch_flags)
2638{ 2553{
2639 struct intel_engine_cs *engine = req->engine; 2554 struct intel_ring *ring = req->ring;
2640 int ret; 2555 int ret;
2641 2556
2642 ret = intel_ring_begin(req, 2); 2557 ret = intel_ring_begin(req, 2);
2643 if (ret) 2558 if (ret)
2644 return ret; 2559 return ret;
2645 2560
2646 intel_ring_emit(engine, 2561 intel_ring_emit(ring,
2647 MI_BATCH_BUFFER_START | 2562 MI_BATCH_BUFFER_START |
2648 (dispatch_flags & I915_DISPATCH_SECURE ? 2563 (dispatch_flags & I915_DISPATCH_SECURE ?
2649 0 : MI_BATCH_NON_SECURE_I965)); 2564 0 : MI_BATCH_NON_SECURE_I965));
2650 /* bit0-7 is the length on GEN6+ */ 2565 /* bit0-7 is the length on GEN6+ */
2651 intel_ring_emit(engine, offset); 2566 intel_ring_emit(ring, offset);
2652 intel_ring_advance(engine); 2567 intel_ring_advance(ring);
2653 2568
2654 return 0; 2569 return 0;
2655} 2570}
2656 2571
2657/* Blitter support (SandyBridge+) */ 2572/* Blitter support (SandyBridge+) */
2658 2573
2659static int gen6_ring_flush(struct drm_i915_gem_request *req, 2574static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
2660 u32 invalidate, u32 flush)
2661{ 2575{
2662 struct intel_engine_cs *engine = req->engine; 2576 struct intel_ring *ring = req->ring;
2663 uint32_t cmd; 2577 uint32_t cmd;
2664 int ret; 2578 int ret;
2665 2579
@@ -2684,19 +2598,19 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req,
2684 * operation is complete. This bit is only valid when the 2598 * operation is complete. This bit is only valid when the
2685 * Post-Sync Operation field is a value of 1h or 3h." 2599 * Post-Sync Operation field is a value of 1h or 3h."
2686 */ 2600 */
2687 if (invalidate & I915_GEM_DOMAIN_RENDER) 2601 if (mode & EMIT_INVALIDATE)
2688 cmd |= MI_INVALIDATE_TLB; 2602 cmd |= MI_INVALIDATE_TLB;
2689 intel_ring_emit(engine, cmd); 2603 intel_ring_emit(ring, cmd);
2690 intel_ring_emit(engine, 2604 intel_ring_emit(ring,
2691 I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 2605 I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2692 if (INTEL_GEN(req->i915) >= 8) { 2606 if (INTEL_GEN(req->i915) >= 8) {
2693 intel_ring_emit(engine, 0); /* upper addr */ 2607 intel_ring_emit(ring, 0); /* upper addr */
2694 intel_ring_emit(engine, 0); /* value */ 2608 intel_ring_emit(ring, 0); /* value */
2695 } else { 2609 } else {
2696 intel_ring_emit(engine, 0); 2610 intel_ring_emit(ring, 0);
2697 intel_ring_emit(engine, MI_NOOP); 2611 intel_ring_emit(ring, MI_NOOP);
2698 } 2612 }
2699 intel_ring_advance(engine); 2613 intel_ring_advance(ring);
2700 2614
2701 return 0; 2615 return 0;
2702} 2616}
@@ -2707,7 +2621,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2707 struct drm_i915_gem_object *obj; 2621 struct drm_i915_gem_object *obj;
2708 int ret, i; 2622 int ret, i;
2709 2623
2710 if (!i915_semaphore_is_enabled(dev_priv)) 2624 if (!i915.semaphores)
2711 return; 2625 return;
2712 2626
2713 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { 2627 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) {
@@ -2717,9 +2631,9 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2717 i915.semaphores = 0; 2631 i915.semaphores = 0;
2718 } else { 2632 } else {
2719 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 2633 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2720 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); 2634 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
2721 if (ret != 0) { 2635 if (ret != 0) {
2722 drm_gem_object_unreference(&obj->base); 2636 i915_gem_object_put(obj);
2723 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); 2637 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2724 i915.semaphores = 0; 2638 i915.semaphores = 0;
2725 } else { 2639 } else {
@@ -2728,13 +2642,13 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2728 } 2642 }
2729 } 2643 }
2730 2644
2731 if (!i915_semaphore_is_enabled(dev_priv)) 2645 if (!i915.semaphores)
2732 return; 2646 return;
2733 2647
2734 if (INTEL_GEN(dev_priv) >= 8) { 2648 if (INTEL_GEN(dev_priv) >= 8) {
2735 u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); 2649 u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj);
2736 2650
2737 engine->semaphore.sync_to = gen8_ring_sync; 2651 engine->semaphore.sync_to = gen8_ring_sync_to;
2738 engine->semaphore.signal = gen8_xcs_signal; 2652 engine->semaphore.signal = gen8_xcs_signal;
2739 2653
2740 for (i = 0; i < I915_NUM_ENGINES; i++) { 2654 for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -2748,7 +2662,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2748 engine->semaphore.signal_ggtt[i] = ring_offset; 2662 engine->semaphore.signal_ggtt[i] = ring_offset;
2749 } 2663 }
2750 } else if (INTEL_GEN(dev_priv) >= 6) { 2664 } else if (INTEL_GEN(dev_priv) >= 6) {
2751 engine->semaphore.sync_to = gen6_ring_sync; 2665 engine->semaphore.sync_to = gen6_ring_sync_to;
2752 engine->semaphore.signal = gen6_signal; 2666 engine->semaphore.signal = gen6_signal;
2753 2667
2754 /* 2668 /*
@@ -2804,6 +2718,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2804static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 2718static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2805 struct intel_engine_cs *engine) 2719 struct intel_engine_cs *engine)
2806{ 2720{
2721 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
2722
2807 if (INTEL_GEN(dev_priv) >= 8) { 2723 if (INTEL_GEN(dev_priv) >= 8) {
2808 engine->irq_enable = gen8_irq_enable; 2724 engine->irq_enable = gen8_irq_enable;
2809 engine->irq_disable = gen8_irq_disable; 2725 engine->irq_disable = gen8_irq_disable;
@@ -2828,74 +2744,66 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2828static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, 2744static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2829 struct intel_engine_cs *engine) 2745 struct intel_engine_cs *engine)
2830{ 2746{
2747 intel_ring_init_irq(dev_priv, engine);
2748 intel_ring_init_semaphores(dev_priv, engine);
2749
2831 engine->init_hw = init_ring_common; 2750 engine->init_hw = init_ring_common;
2832 engine->write_tail = ring_write_tail;
2833 2751
2834 engine->add_request = i9xx_add_request; 2752 engine->emit_request = i9xx_emit_request;
2835 if (INTEL_GEN(dev_priv) >= 6) 2753 if (i915.semaphores)
2836 engine->add_request = gen6_add_request; 2754 engine->emit_request = gen6_sema_emit_request;
2755 engine->submit_request = i9xx_submit_request;
2837 2756
2838 if (INTEL_GEN(dev_priv) >= 8) 2757 if (INTEL_GEN(dev_priv) >= 8)
2839 engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2758 engine->emit_bb_start = gen8_emit_bb_start;
2840 else if (INTEL_GEN(dev_priv) >= 6) 2759 else if (INTEL_GEN(dev_priv) >= 6)
2841 engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2760 engine->emit_bb_start = gen6_emit_bb_start;
2842 else if (INTEL_GEN(dev_priv) >= 4) 2761 else if (INTEL_GEN(dev_priv) >= 4)
2843 engine->dispatch_execbuffer = i965_dispatch_execbuffer; 2762 engine->emit_bb_start = i965_emit_bb_start;
2844 else if (IS_I830(dev_priv) || IS_845G(dev_priv)) 2763 else if (IS_I830(dev_priv) || IS_845G(dev_priv))
2845 engine->dispatch_execbuffer = i830_dispatch_execbuffer; 2764 engine->emit_bb_start = i830_emit_bb_start;
2846 else 2765 else
2847 engine->dispatch_execbuffer = i915_dispatch_execbuffer; 2766 engine->emit_bb_start = i915_emit_bb_start;
2848
2849 intel_ring_init_irq(dev_priv, engine);
2850 intel_ring_init_semaphores(dev_priv, engine);
2851} 2767}
2852 2768
2853int intel_init_render_ring_buffer(struct drm_device *dev) 2769int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
2854{ 2770{
2855 struct drm_i915_private *dev_priv = to_i915(dev); 2771 struct drm_i915_private *dev_priv = engine->i915;
2856 struct intel_engine_cs *engine = &dev_priv->engine[RCS];
2857 int ret; 2772 int ret;
2858 2773
2859 engine->name = "render ring";
2860 engine->id = RCS;
2861 engine->exec_id = I915_EXEC_RENDER;
2862 engine->hw_id = 0;
2863 engine->mmio_base = RENDER_RING_BASE;
2864
2865 intel_ring_default_vfuncs(dev_priv, engine); 2774 intel_ring_default_vfuncs(dev_priv, engine);
2866 2775
2867 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2868 if (HAS_L3_DPF(dev_priv)) 2776 if (HAS_L3_DPF(dev_priv))
2869 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 2777 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
2870 2778
2871 if (INTEL_GEN(dev_priv) >= 8) { 2779 if (INTEL_GEN(dev_priv) >= 8) {
2872 engine->init_context = intel_rcs_ctx_init; 2780 engine->init_context = intel_rcs_ctx_init;
2873 engine->add_request = gen8_render_add_request; 2781 engine->emit_request = gen8_render_emit_request;
2874 engine->flush = gen8_render_ring_flush; 2782 engine->emit_flush = gen8_render_ring_flush;
2875 if (i915_semaphore_is_enabled(dev_priv)) 2783 if (i915.semaphores)
2876 engine->semaphore.signal = gen8_rcs_signal; 2784 engine->semaphore.signal = gen8_rcs_signal;
2877 } else if (INTEL_GEN(dev_priv) >= 6) { 2785 } else if (INTEL_GEN(dev_priv) >= 6) {
2878 engine->init_context = intel_rcs_ctx_init; 2786 engine->init_context = intel_rcs_ctx_init;
2879 engine->flush = gen7_render_ring_flush; 2787 engine->emit_flush = gen7_render_ring_flush;
2880 if (IS_GEN6(dev_priv)) 2788 if (IS_GEN6(dev_priv))
2881 engine->flush = gen6_render_ring_flush; 2789 engine->emit_flush = gen6_render_ring_flush;
2882 } else if (IS_GEN5(dev_priv)) { 2790 } else if (IS_GEN5(dev_priv)) {
2883 engine->flush = gen4_render_ring_flush; 2791 engine->emit_flush = gen4_render_ring_flush;
2884 } else { 2792 } else {
2885 if (INTEL_GEN(dev_priv) < 4) 2793 if (INTEL_GEN(dev_priv) < 4)
2886 engine->flush = gen2_render_ring_flush; 2794 engine->emit_flush = gen2_render_ring_flush;
2887 else 2795 else
2888 engine->flush = gen4_render_ring_flush; 2796 engine->emit_flush = gen4_render_ring_flush;
2889 engine->irq_enable_mask = I915_USER_INTERRUPT; 2797 engine->irq_enable_mask = I915_USER_INTERRUPT;
2890 } 2798 }
2891 2799
2892 if (IS_HASWELL(dev_priv)) 2800 if (IS_HASWELL(dev_priv))
2893 engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 2801 engine->emit_bb_start = hsw_emit_bb_start;
2894 2802
2895 engine->init_hw = init_render_ring; 2803 engine->init_hw = init_render_ring;
2896 engine->cleanup = render_ring_cleanup; 2804 engine->cleanup = render_ring_cleanup;
2897 2805
2898 ret = intel_init_ring_buffer(dev, engine); 2806 ret = intel_init_ring_buffer(engine);
2899 if (ret) 2807 if (ret)
2900 return ret; 2808 return ret;
2901 2809
@@ -2912,166 +2820,71 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
2912 return 0; 2820 return 0;
2913} 2821}
2914 2822
2915int intel_init_bsd_ring_buffer(struct drm_device *dev) 2823int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
2916{ 2824{
2917 struct drm_i915_private *dev_priv = to_i915(dev); 2825 struct drm_i915_private *dev_priv = engine->i915;
2918 struct intel_engine_cs *engine = &dev_priv->engine[VCS];
2919
2920 engine->name = "bsd ring";
2921 engine->id = VCS;
2922 engine->exec_id = I915_EXEC_BSD;
2923 engine->hw_id = 1;
2924 2826
2925 intel_ring_default_vfuncs(dev_priv, engine); 2827 intel_ring_default_vfuncs(dev_priv, engine);
2926 2828
2927 if (INTEL_GEN(dev_priv) >= 6) { 2829 if (INTEL_GEN(dev_priv) >= 6) {
2928 engine->mmio_base = GEN6_BSD_RING_BASE;
2929 /* gen6 bsd needs a special wa for tail updates */ 2830 /* gen6 bsd needs a special wa for tail updates */
2930 if (IS_GEN6(dev_priv)) 2831 if (IS_GEN6(dev_priv))
2931 engine->write_tail = gen6_bsd_ring_write_tail; 2832 engine->submit_request = gen6_bsd_submit_request;
2932 engine->flush = gen6_bsd_ring_flush; 2833 engine->emit_flush = gen6_bsd_ring_flush;
2933 if (INTEL_GEN(dev_priv) >= 8) 2834 if (INTEL_GEN(dev_priv) < 8)
2934 engine->irq_enable_mask =
2935 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2936 else
2937 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2835 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2938 } else { 2836 } else {
2939 engine->mmio_base = BSD_RING_BASE; 2837 engine->mmio_base = BSD_RING_BASE;
2940 engine->flush = bsd_ring_flush; 2838 engine->emit_flush = bsd_ring_flush;
2941 if (IS_GEN5(dev_priv)) 2839 if (IS_GEN5(dev_priv))
2942 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2840 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
2943 else 2841 else
2944 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2842 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2945 } 2843 }
2946 2844
2947 return intel_init_ring_buffer(dev, engine); 2845 return intel_init_ring_buffer(engine);
2948} 2846}
2949 2847
2950/** 2848/**
2951 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) 2849 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2952 */ 2850 */
2953int intel_init_bsd2_ring_buffer(struct drm_device *dev) 2851int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
2954{ 2852{
2955 struct drm_i915_private *dev_priv = to_i915(dev); 2853 struct drm_i915_private *dev_priv = engine->i915;
2956 struct intel_engine_cs *engine = &dev_priv->engine[VCS2];
2957
2958 engine->name = "bsd2 ring";
2959 engine->id = VCS2;
2960 engine->exec_id = I915_EXEC_BSD;
2961 engine->hw_id = 4;
2962 engine->mmio_base = GEN8_BSD2_RING_BASE;
2963 2854
2964 intel_ring_default_vfuncs(dev_priv, engine); 2855 intel_ring_default_vfuncs(dev_priv, engine);
2965 2856
2966 engine->flush = gen6_bsd_ring_flush; 2857 engine->emit_flush = gen6_bsd_ring_flush;
2967 engine->irq_enable_mask =
2968 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2969 2858
2970 return intel_init_ring_buffer(dev, engine); 2859 return intel_init_ring_buffer(engine);
2971} 2860}
2972 2861
2973int intel_init_blt_ring_buffer(struct drm_device *dev) 2862int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
2974{ 2863{
2975 struct drm_i915_private *dev_priv = to_i915(dev); 2864 struct drm_i915_private *dev_priv = engine->i915;
2976 struct intel_engine_cs *engine = &dev_priv->engine[BCS];
2977
2978 engine->name = "blitter ring";
2979 engine->id = BCS;
2980 engine->exec_id = I915_EXEC_BLT;
2981 engine->hw_id = 2;
2982 engine->mmio_base = BLT_RING_BASE;
2983 2865
2984 intel_ring_default_vfuncs(dev_priv, engine); 2866 intel_ring_default_vfuncs(dev_priv, engine);
2985 2867
2986 engine->flush = gen6_ring_flush; 2868 engine->emit_flush = gen6_ring_flush;
2987 if (INTEL_GEN(dev_priv) >= 8) 2869 if (INTEL_GEN(dev_priv) < 8)
2988 engine->irq_enable_mask =
2989 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2990 else
2991 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2870 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2992 2871
2993 return intel_init_ring_buffer(dev, engine); 2872 return intel_init_ring_buffer(engine);
2994} 2873}
2995 2874
2996int intel_init_vebox_ring_buffer(struct drm_device *dev) 2875int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
2997{ 2876{
2998 struct drm_i915_private *dev_priv = to_i915(dev); 2877 struct drm_i915_private *dev_priv = engine->i915;
2999 struct intel_engine_cs *engine = &dev_priv->engine[VECS];
3000
3001 engine->name = "video enhancement ring";
3002 engine->id = VECS;
3003 engine->exec_id = I915_EXEC_VEBOX;
3004 engine->hw_id = 3;
3005 engine->mmio_base = VEBOX_RING_BASE;
3006 2878
3007 intel_ring_default_vfuncs(dev_priv, engine); 2879 intel_ring_default_vfuncs(dev_priv, engine);
3008 2880
3009 engine->flush = gen6_ring_flush; 2881 engine->emit_flush = gen6_ring_flush;
3010 2882
3011 if (INTEL_GEN(dev_priv) >= 8) { 2883 if (INTEL_GEN(dev_priv) < 8) {
3012 engine->irq_enable_mask =
3013 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
3014 } else {
3015 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2884 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
3016 engine->irq_enable = hsw_vebox_irq_enable; 2885 engine->irq_enable = hsw_vebox_irq_enable;
3017 engine->irq_disable = hsw_vebox_irq_disable; 2886 engine->irq_disable = hsw_vebox_irq_disable;
3018 } 2887 }
3019 2888
3020 return intel_init_ring_buffer(dev, engine); 2889 return intel_init_ring_buffer(engine);
3021}
3022
3023int
3024intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
3025{
3026 struct intel_engine_cs *engine = req->engine;
3027 int ret;
3028
3029 if (!engine->gpu_caches_dirty)
3030 return 0;
3031
3032 ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS);
3033 if (ret)
3034 return ret;
3035
3036 trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
3037
3038 engine->gpu_caches_dirty = false;
3039 return 0;
3040}
3041
3042int
3043intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
3044{
3045 struct intel_engine_cs *engine = req->engine;
3046 uint32_t flush_domains;
3047 int ret;
3048
3049 flush_domains = 0;
3050 if (engine->gpu_caches_dirty)
3051 flush_domains = I915_GEM_GPU_DOMAINS;
3052
3053 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3054 if (ret)
3055 return ret;
3056
3057 trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3058
3059 engine->gpu_caches_dirty = false;
3060 return 0;
3061}
3062
3063void
3064intel_stop_engine(struct intel_engine_cs *engine)
3065{
3066 int ret;
3067
3068 if (!intel_engine_initialized(engine))
3069 return;
3070
3071 ret = intel_engine_idle(engine);
3072 if (ret)
3073 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3074 engine->name, ret);
3075
3076 stop_ring(engine);
3077} 2890}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 12cb7ed90014..43e545e44352 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/hashtable.h> 4#include <linux/hashtable.h>
5#include "i915_gem_batch_pool.h" 5#include "i915_gem_batch_pool.h"
6#include "i915_gem_request.h"
6 7
7#define I915_CMD_HASH_ORDER 9 8#define I915_CMD_HASH_ORDER 9
8 9
@@ -31,23 +32,23 @@ struct intel_hw_status_page {
31 struct drm_i915_gem_object *obj; 32 struct drm_i915_gem_object *obj;
32}; 33};
33 34
34#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base)) 35#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
35#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) 36#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
36 37
37#define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base)) 38#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
38#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) 39#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
39 40
40#define I915_READ_HEAD(ring) I915_READ(RING_HEAD((ring)->mmio_base)) 41#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base))
41#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) 42#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
42 43
43#define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base)) 44#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
44#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) 45#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
45 46
46#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) 47#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
47#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) 48#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
48 49
49#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base)) 50#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
50#define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val) 51#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
51 52
52/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to 53/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
53 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. 54 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
@@ -62,7 +63,7 @@ struct intel_hw_status_page {
62 (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ 63 (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
63 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) 64 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
64 65
65enum intel_ring_hangcheck_action { 66enum intel_engine_hangcheck_action {
66 HANGCHECK_IDLE = 0, 67 HANGCHECK_IDLE = 0,
67 HANGCHECK_WAIT, 68 HANGCHECK_WAIT,
68 HANGCHECK_ACTIVE, 69 HANGCHECK_ACTIVE,
@@ -72,24 +73,26 @@ enum intel_ring_hangcheck_action {
72 73
73#define HANGCHECK_SCORE_RING_HUNG 31 74#define HANGCHECK_SCORE_RING_HUNG 31
74 75
75struct intel_ring_hangcheck { 76struct intel_engine_hangcheck {
76 u64 acthd; 77 u64 acthd;
77 unsigned long user_interrupts; 78 unsigned long user_interrupts;
78 u32 seqno; 79 u32 seqno;
79 int score; 80 int score;
80 enum intel_ring_hangcheck_action action; 81 enum intel_engine_hangcheck_action action;
81 int deadlock; 82 int deadlock;
82 u32 instdone[I915_NUM_INSTDONE_REG]; 83 u32 instdone[I915_NUM_INSTDONE_REG];
83}; 84};
84 85
85struct intel_ringbuffer { 86struct intel_ring {
86 struct drm_i915_gem_object *obj; 87 struct drm_i915_gem_object *obj;
87 void __iomem *virtual_start; 88 void *vaddr;
88 struct i915_vma *vma; 89 struct i915_vma *vma;
89 90
90 struct intel_engine_cs *engine; 91 struct intel_engine_cs *engine;
91 struct list_head link; 92 struct list_head link;
92 93
94 struct list_head request_list;
95
93 u32 head; 96 u32 head;
94 u32 tail; 97 u32 tail;
95 int space; 98 int space;
@@ -146,8 +149,10 @@ struct intel_engine_cs {
146 unsigned int exec_id; 149 unsigned int exec_id;
147 unsigned int hw_id; 150 unsigned int hw_id;
148 unsigned int guc_id; /* XXX same as hw_id? */ 151 unsigned int guc_id; /* XXX same as hw_id? */
152 u64 fence_context;
149 u32 mmio_base; 153 u32 mmio_base;
150 struct intel_ringbuffer *buffer; 154 unsigned int irq_shift;
155 struct intel_ring *buffer;
151 struct list_head buffers; 156 struct list_head buffers;
152 157
153 /* Rather than have every client wait upon all user interrupts, 158 /* Rather than have every client wait upon all user interrupts,
@@ -195,33 +200,34 @@ struct intel_engine_cs {
195 200
196 u32 irq_keep_mask; /* always keep these interrupts */ 201 u32 irq_keep_mask; /* always keep these interrupts */
197 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 202 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
198 void (*irq_enable)(struct intel_engine_cs *ring); 203 void (*irq_enable)(struct intel_engine_cs *engine);
199 void (*irq_disable)(struct intel_engine_cs *ring); 204 void (*irq_disable)(struct intel_engine_cs *engine);
200 205
201 int (*init_hw)(struct intel_engine_cs *ring); 206 int (*init_hw)(struct intel_engine_cs *engine);
202 207
203 int (*init_context)(struct drm_i915_gem_request *req); 208 int (*init_context)(struct drm_i915_gem_request *req);
204 209
205 void (*write_tail)(struct intel_engine_cs *ring, 210 int (*emit_flush)(struct drm_i915_gem_request *request,
206 u32 value); 211 u32 mode);
207 int __must_check (*flush)(struct drm_i915_gem_request *req, 212#define EMIT_INVALIDATE BIT(0)
208 u32 invalidate_domains, 213#define EMIT_FLUSH BIT(1)
209 u32 flush_domains); 214#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
210 int (*add_request)(struct drm_i915_gem_request *req); 215 int (*emit_bb_start)(struct drm_i915_gem_request *req,
216 u64 offset, u32 length,
217 unsigned int dispatch_flags);
218#define I915_DISPATCH_SECURE BIT(0)
219#define I915_DISPATCH_PINNED BIT(1)
220#define I915_DISPATCH_RS BIT(2)
221 int (*emit_request)(struct drm_i915_gem_request *req);
222 void (*submit_request)(struct drm_i915_gem_request *req);
211 /* Some chipsets are not quite as coherent as advertised and need 223 /* Some chipsets are not quite as coherent as advertised and need
212 * an expensive kick to force a true read of the up-to-date seqno. 224 * an expensive kick to force a true read of the up-to-date seqno.
213 * However, the up-to-date seqno is not always required and the last 225 * However, the up-to-date seqno is not always required and the last
214 * seen value is good enough. Note that the seqno will always be 226 * seen value is good enough. Note that the seqno will always be
215 * monotonic, even if not coherent. 227 * monotonic, even if not coherent.
216 */ 228 */
217 void (*irq_seqno_barrier)(struct intel_engine_cs *ring); 229 void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
218 int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, 230 void (*cleanup)(struct intel_engine_cs *engine);
219 u64 offset, u32 length,
220 unsigned dispatch_flags);
221#define I915_DISPATCH_SECURE 0x1
222#define I915_DISPATCH_PINNED 0x2
223#define I915_DISPATCH_RS 0x4
224 void (*cleanup)(struct intel_engine_cs *ring);
225 231
226 /* GEN8 signal/wait table - never trust comments! 232 /* GEN8 signal/wait table - never trust comments!
227 * signal to signal to signal to signal to signal to 233 * signal to signal to signal to signal to signal to
@@ -274,12 +280,9 @@ struct intel_engine_cs {
274 }; 280 };
275 281
276 /* AKA wait() */ 282 /* AKA wait() */
277 int (*sync_to)(struct drm_i915_gem_request *to_req, 283 int (*sync_to)(struct drm_i915_gem_request *req,
278 struct intel_engine_cs *from, 284 struct drm_i915_gem_request *signal);
279 u32 seqno); 285 int (*signal)(struct drm_i915_gem_request *req);
280 int (*signal)(struct drm_i915_gem_request *signaller_req,
281 /* num_dwords needed by caller */
282 unsigned int num_dwords);
283 } semaphore; 286 } semaphore;
284 287
285 /* Execlists */ 288 /* Execlists */
@@ -291,24 +294,6 @@ struct intel_engine_cs {
291 unsigned int idle_lite_restore_wa; 294 unsigned int idle_lite_restore_wa;
292 bool disable_lite_restore_wa; 295 bool disable_lite_restore_wa;
293 u32 ctx_desc_template; 296 u32 ctx_desc_template;
294 int (*emit_request)(struct drm_i915_gem_request *request);
295 int (*emit_flush)(struct drm_i915_gem_request *request,
296 u32 invalidate_domains,
297 u32 flush_domains);
298 int (*emit_bb_start)(struct drm_i915_gem_request *req,
299 u64 offset, unsigned dispatch_flags);
300
301 /**
302 * List of objects currently involved in rendering from the
303 * ringbuffer.
304 *
305 * Includes buffers having the contents of their GPU caches
306 * flushed, not necessarily primitives. last_read_req
307 * represents when the rendering involved will be completed.
308 *
309 * A reference is held on the buffer while on this list.
310 */
311 struct list_head active_list;
312 297
313 /** 298 /**
314 * List of breadcrumbs associated with GPU requests currently 299 * List of breadcrumbs associated with GPU requests currently
@@ -323,11 +308,16 @@ struct intel_engine_cs {
323 */ 308 */
324 u32 last_submitted_seqno; 309 u32 last_submitted_seqno;
325 310
326 bool gpu_caches_dirty; 311 /* An RCU guarded pointer to the last request. No reference is
312 * held to the request, users must carefully acquire a reference to
313 * the request using i915_gem_active_get_request_rcu(), or hold the
314 * struct_mutex.
315 */
316 struct i915_gem_active last_request;
327 317
328 struct i915_gem_context *last_context; 318 struct i915_gem_context *last_context;
329 319
330 struct intel_ring_hangcheck hangcheck; 320 struct intel_engine_hangcheck hangcheck;
331 321
332 struct { 322 struct {
333 struct drm_i915_gem_object *obj; 323 struct drm_i915_gem_object *obj;
@@ -338,7 +328,7 @@ struct intel_engine_cs {
338 328
339 /* 329 /*
340 * Table of commands the command parser needs to know about 330 * Table of commands the command parser needs to know about
341 * for this ring. 331 * for this engine.
342 */ 332 */
343 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 333 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
344 334
@@ -352,11 +342,11 @@ struct intel_engine_cs {
352 * Returns the bitmask for the length field of the specified command. 342 * Returns the bitmask for the length field of the specified command.
353 * Return 0 for an unrecognized/invalid command. 343 * Return 0 for an unrecognized/invalid command.
354 * 344 *
355 * If the command parser finds an entry for a command in the ring's 345 * If the command parser finds an entry for a command in the engine's
356 * cmd_tables, it gets the command's length based on the table entry. 346 * cmd_tables, it gets the command's length based on the table entry.
357 * If not, it calls this function to determine the per-ring length field 347 * If not, it calls this function to determine the per-engine length
358 * encoding for the command (i.e. certain opcode ranges use certain bits 348 * field encoding for the command (i.e. different opcode ranges use
359 * to encode the command length in the header). 349 * certain bits to encode the command length in the header).
360 */ 350 */
361 u32 (*get_cmd_length_mask)(u32 cmd_header); 351 u32 (*get_cmd_length_mask)(u32 cmd_header);
362}; 352};
@@ -374,8 +364,8 @@ intel_engine_flag(const struct intel_engine_cs *engine)
374} 364}
375 365
376static inline u32 366static inline u32
377intel_ring_sync_index(struct intel_engine_cs *engine, 367intel_engine_sync_index(struct intel_engine_cs *engine,
378 struct intel_engine_cs *other) 368 struct intel_engine_cs *other)
379{ 369{
380 int idx; 370 int idx;
381 371
@@ -437,67 +427,83 @@ intel_write_status_page(struct intel_engine_cs *engine,
437#define I915_GEM_HWS_SCRATCH_INDEX 0x40 427#define I915_GEM_HWS_SCRATCH_INDEX 0x40
438#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 428#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
439 429
440struct intel_ringbuffer * 430struct intel_ring *
441intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size); 431intel_engine_create_ring(struct intel_engine_cs *engine, int size);
442int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, 432int intel_ring_pin(struct intel_ring *ring);
443 struct intel_ringbuffer *ringbuf); 433void intel_ring_unpin(struct intel_ring *ring);
444void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf); 434void intel_ring_free(struct intel_ring *ring);
445void intel_ringbuffer_free(struct intel_ringbuffer *ring);
446 435
447void intel_stop_engine(struct intel_engine_cs *engine); 436void intel_engine_stop(struct intel_engine_cs *engine);
448void intel_cleanup_engine(struct intel_engine_cs *engine); 437void intel_engine_cleanup(struct intel_engine_cs *engine);
449 438
450int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); 439int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
451 440
452int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); 441int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
453int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); 442int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
454static inline void intel_ring_emit(struct intel_engine_cs *engine, 443
455 u32 data) 444static inline void intel_ring_emit(struct intel_ring *ring, u32 data)
456{ 445{
457 struct intel_ringbuffer *ringbuf = engine->buffer; 446 *(uint32_t *)(ring->vaddr + ring->tail) = data;
458 iowrite32(data, ringbuf->virtual_start + ringbuf->tail); 447 ring->tail += 4;
459 ringbuf->tail += 4;
460} 448}
461static inline void intel_ring_emit_reg(struct intel_engine_cs *engine, 449
462 i915_reg_t reg) 450static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg)
463{ 451{
464 intel_ring_emit(engine, i915_mmio_reg_offset(reg)); 452 intel_ring_emit(ring, i915_mmio_reg_offset(reg));
453}
454
455static inline void intel_ring_advance(struct intel_ring *ring)
456{
457 /* Dummy function.
458 *
459 * This serves as a placeholder in the code so that the reader
460 * can compare against the preceding intel_ring_begin() and
461 * check that the number of dwords emitted matches the space
462 * reserved for the command packet (i.e. the value passed to
463 * intel_ring_begin()).
464 */
465} 465}
466static inline void intel_ring_advance(struct intel_engine_cs *engine) 466
467static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
467{ 468{
468 struct intel_ringbuffer *ringbuf = engine->buffer; 469 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
469 ringbuf->tail &= ringbuf->size - 1; 470 return value & (ring->size - 1);
470} 471}
472
471int __intel_ring_space(int head, int tail, int size); 473int __intel_ring_space(int head, int tail, int size);
472void intel_ring_update_space(struct intel_ringbuffer *ringbuf); 474void intel_ring_update_space(struct intel_ring *ring);
473 475
474int __must_check intel_engine_idle(struct intel_engine_cs *engine); 476void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
475void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno);
476int intel_ring_flush_all_caches(struct drm_i915_gem_request *req);
477int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
478 477
479int intel_init_pipe_control(struct intel_engine_cs *engine, int size); 478int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
480void intel_fini_pipe_control(struct intel_engine_cs *engine); 479void intel_fini_pipe_control(struct intel_engine_cs *engine);
481 480
482int intel_init_render_ring_buffer(struct drm_device *dev); 481void intel_engine_setup_common(struct intel_engine_cs *engine);
483int intel_init_bsd_ring_buffer(struct drm_device *dev); 482int intel_engine_init_common(struct intel_engine_cs *engine);
484int intel_init_bsd2_ring_buffer(struct drm_device *dev); 483void intel_engine_cleanup_common(struct intel_engine_cs *engine);
485int intel_init_blt_ring_buffer(struct drm_device *dev);
486int intel_init_vebox_ring_buffer(struct drm_device *dev);
487 484
488u64 intel_ring_get_active_head(struct intel_engine_cs *engine); 485static inline int intel_engine_idle(struct intel_engine_cs *engine,
489static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) 486 bool interruptible)
490{ 487{
491 return intel_read_status_page(engine, I915_GEM_HWS_INDEX); 488 /* Wait upon the last request to be completed */
489 return i915_gem_active_wait_unlocked(&engine->last_request,
490 interruptible, NULL, NULL);
492} 491}
493 492
494int init_workarounds_ring(struct intel_engine_cs *engine); 493int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
494int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
495int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
496int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
497int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
495 498
496static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) 499u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
500static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
497{ 501{
498 return ringbuf->tail; 502 return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
499} 503}
500 504
505int init_workarounds_ring(struct intel_engine_cs *engine);
506
501/* 507/*
502 * Arbitrary size for largest possible 'add request' sequence. The code paths 508 * Arbitrary size for largest possible 'add request' sequence. The code paths
503 * are complex and variable. Empirical measurement shows that the worst case 509 * are complex and variable. Empirical measurement shows that the worst case
@@ -513,17 +519,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
513} 519}
514 520
515/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ 521/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
516struct intel_wait {
517 struct rb_node node;
518 struct task_struct *tsk;
519 u32 seqno;
520};
521
522struct intel_signal_node {
523 struct rb_node node;
524 struct intel_wait wait;
525};
526
527int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); 522int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
528 523
529static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) 524static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
@@ -570,4 +565,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
570unsigned int intel_kick_waiters(struct drm_i915_private *i915); 565unsigned int intel_kick_waiters(struct drm_i915_private *i915);
571unsigned int intel_kick_signalers(struct drm_i915_private *i915); 566unsigned int intel_kick_signalers(struct drm_i915_private *i915);
572 567
568static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
569{
570 return i915_gem_active_isset(&engine->last_request);
571}
572
573#endif /* _INTEL_RINGBUFFER_H_ */ 573#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 1d9736b0cced..cbdca7e4d307 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -36,6 +36,7 @@
36#include <drm/drm_atomic.h> 36#include <drm/drm_atomic.h>
37#include <drm/drm_plane_helper.h> 37#include <drm/drm_plane_helper.h>
38#include "intel_drv.h" 38#include "intel_drv.h"
39#include "intel_frontbuffer.h"
39#include <drm/i915_drm.h> 40#include <drm/i915_drm.h>
40#include "i915_drv.h" 41#include "i915_drv.h"
41 42
@@ -430,7 +431,7 @@ vlv_update_plane(struct drm_plane *dplane,
430 */ 431 */
431 sprctl |= SP_GAMMA_ENABLE; 432 sprctl |= SP_GAMMA_ENABLE;
432 433
433 if (obj->tiling_mode != I915_TILING_NONE) 434 if (i915_gem_object_is_tiled(obj))
434 sprctl |= SP_TILED; 435 sprctl |= SP_TILED;
435 436
436 /* Sizes are 0 based */ 437 /* Sizes are 0 based */
@@ -467,7 +468,7 @@ vlv_update_plane(struct drm_plane *dplane,
467 I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]); 468 I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]);
468 I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x); 469 I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x);
469 470
470 if (obj->tiling_mode != I915_TILING_NONE) 471 if (i915_gem_object_is_tiled(obj))
471 I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x); 472 I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x);
472 else 473 else
473 I915_WRITE(SPLINOFF(pipe, plane), linear_offset); 474 I915_WRITE(SPLINOFF(pipe, plane), linear_offset);
@@ -552,7 +553,7 @@ ivb_update_plane(struct drm_plane *plane,
552 */ 553 */
553 sprctl |= SPRITE_GAMMA_ENABLE; 554 sprctl |= SPRITE_GAMMA_ENABLE;
554 555
555 if (obj->tiling_mode != I915_TILING_NONE) 556 if (i915_gem_object_is_tiled(obj))
556 sprctl |= SPRITE_TILED; 557 sprctl |= SPRITE_TILED;
557 558
558 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 559 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
@@ -606,7 +607,7 @@ ivb_update_plane(struct drm_plane *plane,
606 * register */ 607 * register */
607 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 608 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
608 I915_WRITE(SPROFFSET(pipe), (y << 16) | x); 609 I915_WRITE(SPROFFSET(pipe), (y << 16) | x);
609 else if (obj->tiling_mode != I915_TILING_NONE) 610 else if (i915_gem_object_is_tiled(obj))
610 I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); 611 I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x);
611 else 612 else
612 I915_WRITE(SPRLINOFF(pipe), linear_offset); 613 I915_WRITE(SPRLINOFF(pipe), linear_offset);
@@ -693,7 +694,7 @@ ilk_update_plane(struct drm_plane *plane,
693 */ 694 */
694 dvscntr |= DVS_GAMMA_ENABLE; 695 dvscntr |= DVS_GAMMA_ENABLE;
695 696
696 if (obj->tiling_mode != I915_TILING_NONE) 697 if (i915_gem_object_is_tiled(obj))
697 dvscntr |= DVS_TILED; 698 dvscntr |= DVS_TILED;
698 699
699 if (IS_GEN6(dev)) 700 if (IS_GEN6(dev))
@@ -736,7 +737,7 @@ ilk_update_plane(struct drm_plane *plane,
736 I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); 737 I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]);
737 I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); 738 I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x);
738 739
739 if (obj->tiling_mode != I915_TILING_NONE) 740 if (i915_gem_object_is_tiled(obj))
740 I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); 741 I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x);
741 else 742 else
742 I915_WRITE(DVSLINOFF(pipe), linear_offset); 743 I915_WRITE(DVSLINOFF(pipe), linear_offset);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index ff80a81b1a84..43f833901b8e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -435,7 +435,7 @@ void intel_uncore_sanitize(struct drm_i915_private *dev_priv)
435 i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6); 435 i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6);
436 436
437 /* BIOS often leaves RC6 enabled, but disable it for hw init */ 437 /* BIOS often leaves RC6 enabled, but disable it for hw init */
438 intel_disable_gt_powersave(dev_priv); 438 intel_sanitize_gt_powersave(dev_priv);
439} 439}
440 440
441static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, 441static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
@@ -1618,8 +1618,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
1618 * @timeout_ms: timeout in millisecond 1618 * @timeout_ms: timeout in millisecond
1619 * 1619 *
1620 * This routine waits until the target register @reg contains the expected 1620 * This routine waits until the target register @reg contains the expected
1621 * @value after applying the @mask, i.e. it waits until 1621 * @value after applying the @mask, i.e. it waits until ::
1622 * (I915_READ_FW(@reg) & @mask) == @value 1622 *
1623 * (I915_READ_FW(reg) & mask) == value
1624 *
1623 * Otherwise, the wait will timeout after @timeout_ms milliseconds. 1625 * Otherwise, the wait will timeout after @timeout_ms milliseconds.
1624 * 1626 *
1625 * Note that this routine assumes the caller holds forcewake asserted, it is 1627 * Note that this routine assumes the caller holds forcewake asserted, it is
@@ -1652,8 +1654,10 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
1652 * @timeout_ms: timeout in millisecond 1654 * @timeout_ms: timeout in millisecond
1653 * 1655 *
1654 * This routine waits until the target register @reg contains the expected 1656 * This routine waits until the target register @reg contains the expected
1655 * @value after applying the @mask, i.e. it waits until 1657 * @value after applying the @mask, i.e. it waits until ::
1656 * (I915_READ(@reg) & @mask) == @value 1658 *
1659 * (I915_READ(reg) & mask) == value
1660 *
1657 * Otherwise, the wait will timeout after @timeout_ms milliseconds. 1661 * Otherwise, the wait will timeout after @timeout_ms milliseconds.
1658 * 1662 *
1659 * Returns 0 if the register matches the desired condition, or -ETIMEOUT. 1663 * Returns 0 if the register matches the desired condition, or -ETIMEOUT.