aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug12
-rw-r--r--drivers/gpu/drm/i915/Makefile2
-rw-r--r--drivers/gpu/drm/i915/dvo_ch7017.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c12
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c10
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c255
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c31
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h347
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c264
-rw-r--r--drivers/gpu/drm/i915/i915_gem.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_clflush.c8
-rw-r--r--drivers/gpu/drm/i915/i915_gem_clflush.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c116
-rw-r--r--drivers/gpu/drm/i915/i915_gem_dmabuf.c24
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c12
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c161
-rw-r--r--drivers/gpu/drm/i915/i915_gem_object.h4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c185
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.h14
-rw-r--r--drivers/gpu/drm/i915/i915_gem_shrinker.c71
-rw-r--r--drivers/gpu/drm/i915/i915_gem_stolen.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_timeline.c95
-rw-r--r--drivers/gpu/drm/i915/i915_gem_timeline.h47
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c59
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.c96
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c43
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c5
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c395
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h42
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c62
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.h1
-rw-r--r--drivers/gpu/drm/i915/i915_syncmap.c412
-rw-r--r--drivers/gpu/drm/i915/i915_syncmap.h38
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c26
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h49
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h34
-rw-r--r--drivers/gpu/drm/i915/intel_atomic_plane.c24
-rw-r--r--drivers/gpu/drm/i915/intel_audio.c19
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c13
-rw-r--r--drivers/gpu/drm/i915/intel_cdclk.c22
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c10
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c2
-rw-r--r--drivers/gpu/drm/i915/intel_display.c745
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c360
-rw-r--r--drivers/gpu/drm/i915/intel_dp_aux_backlight.c45
-rw-r--r--drivers/gpu/drm/i915/intel_dp_link_training.c25
-rw-r--r--drivers/gpu/drm/i915/intel_dp_mst.c17
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h86
-rw-r--r--drivers/gpu/drm/i915/intel_dsi.c7
-rw-r--r--drivers/gpu/drm/i915/intel_dsi_vbt.c8
-rw-r--r--drivers/gpu/drm/i915/intel_dvo.c2
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c231
-rw-r--r--drivers/gpu/drm/i915/intel_fbc.c4
-rw-r--r--drivers/gpu/drm/i915/intel_guc_ct.c461
-rw-r--r--drivers/gpu/drm/i915/intel_guc_ct.h86
-rw-r--r--drivers/gpu/drm/i915/intel_guc_fwif.h47
-rw-r--r--drivers/gpu/drm/i915/intel_guc_loader.c19
-rw-r--r--drivers/gpu/drm/i915/intel_guc_log.c6
-rw-r--r--drivers/gpu/drm/i915/intel_hangcheck.c2
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c7
-rw-r--r--drivers/gpu/drm/i915/intel_huc.c64
-rw-r--r--drivers/gpu/drm/i915/intel_lpe_audio.c99
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c415
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.h2
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c17
-rw-r--r--drivers/gpu/drm/i915/intel_pipe_crc.c30
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c1479
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c329
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h105
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c9
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c124
-rw-r--r--drivers/gpu/drm/i915/intel_tv.c208
-rw-r--r--drivers/gpu/drm/i915/intel_uc.c155
-rw-r--r--drivers/gpu/drm/i915/intel_uc.h41
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c384
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h170
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_coherency.c10
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_context.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c100
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_object.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_request.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_timeline.c299
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_mock_selftests.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_random.c11
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_random.h2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_sw_fence.c582
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_syncmap.c616
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_engine.c11
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c12
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_timeline.c45
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_timeline.h33
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_uncore.c46
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_uncore.h30
93 files changed, 7685 insertions, 2951 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index b00edd3b8800..78c5c049a347 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
61 61
62 If in doubt, say "N". 62 If in doubt, say "N".
63 63
64config DRM_I915_SW_FENCE_CHECK_DAG
65 bool "Enable additional driver debugging for detecting dependency cycles"
66 depends on DRM_I915
67 default n
68 help
69 Choose this option to turn on extra driver debugging that may affect
70 performance but will catch some internal issues.
71
72 Recommended for driver developers only.
73
74 If in doubt, say "N".
75
64config DRM_I915_SELFTEST 76config DRM_I915_SELFTEST
65 bool "Enable selftests upon driver load" 77 bool "Enable selftests upon driver load"
66 depends on DRM_I915 78 depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2cf04504e494..16dccf550412 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
16 i915_params.o \ 16 i915_params.o \
17 i915_pci.o \ 17 i915_pci.o \
18 i915_suspend.o \ 18 i915_suspend.o \
19 i915_syncmap.o \
19 i915_sw_fence.o \ 20 i915_sw_fence.o \
20 i915_sysfs.o \ 21 i915_sysfs.o \
21 intel_csr.o \ 22 intel_csr.o \
@@ -57,6 +58,7 @@ i915-y += i915_cmd_parser.o \
57 58
58# general-purpose microcontroller (GuC) support 59# general-purpose microcontroller (GuC) support
59i915-y += intel_uc.o \ 60i915-y += intel_uc.o \
61 intel_guc_ct.o \
60 intel_guc_log.o \ 62 intel_guc_log.o \
61 intel_guc_loader.o \ 63 intel_guc_loader.o \
62 intel_huc.o \ 64 intel_huc.o \
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c
index b3c7c199200c..80b3e16cf48c 100644
--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -280,10 +280,10 @@ static void ch7017_mode_set(struct intel_dvo_device *dvo,
280 (0 << CH7017_PHASE_DETECTOR_SHIFT); 280 (0 << CH7017_PHASE_DETECTOR_SHIFT);
281 } else { 281 } else {
282 outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_HIGH; 282 outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_HIGH;
283 lvds_pll_feedback_div = CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED | 283 lvds_pll_feedback_div =
284 CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
284 (2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) | 285 (2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) |
285 (3 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT); 286 (3 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT);
286 lvds_pll_feedback_div = 35;
287 lvds_control_2 = (3 << CH7017_LOOP_FILTER_SHIFT) | 287 lvds_control_2 = (3 << CH7017_LOOP_FILTER_SHIFT) |
288 (0 << CH7017_PHASE_DETECTOR_SHIFT); 288 (0 << CH7017_PHASE_DETECTOR_SHIFT);
289 if (1) { /* XXX: dual channel panel detection. Assume yes for now. */ 289 if (1) { /* XXX: dual channel panel detection. Assume yes for now. */
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index bada32b33237..6ae286cb5804 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
69 gvt_dbg_sched("ring id %d workload lrca %x", ring_id, 69 gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
70 workload->ctx_desc.lrca); 70 workload->ctx_desc.lrca);
71 71
72 context_page_num = intel_lr_context_size( 72 context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
73 gvt->dev_priv->engine[ring_id]);
74 73
75 context_page_num = context_page_num >> PAGE_SHIFT; 74 context_page_num = context_page_num >> PAGE_SHIFT;
76 75
@@ -181,6 +180,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
181 struct intel_engine_cs *engine = dev_priv->engine[ring_id]; 180 struct intel_engine_cs *engine = dev_priv->engine[ring_id];
182 struct drm_i915_gem_request *rq; 181 struct drm_i915_gem_request *rq;
183 struct intel_vgpu *vgpu = workload->vgpu; 182 struct intel_vgpu *vgpu = workload->vgpu;
183 struct intel_ring *ring;
184 int ret; 184 int ret;
185 185
186 gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", 186 gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -199,8 +199,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
199 * shadow_ctx pages invalid. So gvt need to pin itself. After update 199 * shadow_ctx pages invalid. So gvt need to pin itself. After update
200 * the guest context, gvt can unpin the shadow_ctx safely. 200 * the guest context, gvt can unpin the shadow_ctx safely.
201 */ 201 */
202 ret = engine->context_pin(engine, shadow_ctx); 202 ring = engine->context_pin(engine, shadow_ctx);
203 if (ret) { 203 if (IS_ERR(ring)) {
204 ret = PTR_ERR(ring);
204 gvt_vgpu_err("fail to pin shadow context\n"); 205 gvt_vgpu_err("fail to pin shadow context\n");
205 workload->status = ret; 206 workload->status = ret;
206 mutex_unlock(&dev_priv->drm.struct_mutex); 207 mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -330,8 +331,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
330 gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, 331 gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
331 workload->ctx_desc.lrca); 332 workload->ctx_desc.lrca);
332 333
333 context_page_num = intel_lr_context_size( 334 context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
334 gvt->dev_priv->engine[ring_id]);
335 335
336 context_page_num = context_page_num >> PAGE_SHIFT; 336 context_page_num = context_page_num >> PAGE_SHIFT;
337 337
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7af100f84410..f0cb22cc0dd6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1166,8 +1166,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
1166 find_reg(engine, is_master, reg_addr); 1166 find_reg(engine, is_master, reg_addr);
1167 1167
1168 if (!reg) { 1168 if (!reg) {
1169 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", 1169 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
1170 reg_addr, *cmd, engine->exec_id); 1170 reg_addr, *cmd, engine->name);
1171 return false; 1171 return false;
1172 } 1172 }
1173 1173
@@ -1222,11 +1222,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
1222 desc->bits[i].mask; 1222 desc->bits[i].mask;
1223 1223
1224 if (dword != desc->bits[i].expected) { 1224 if (dword != desc->bits[i].expected) {
1225 DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n", 1225 DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
1226 *cmd, 1226 *cmd,
1227 desc->bits[i].mask, 1227 desc->bits[i].mask,
1228 desc->bits[i].expected, 1228 desc->bits[i].expected,
1229 dword, engine->exec_id); 1229 dword, engine->name);
1230 return false; 1230 return false;
1231 } 1231 }
1232 } 1232 }
@@ -1284,7 +1284,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
1284 1284
1285 if (*cmd == MI_BATCH_BUFFER_END) { 1285 if (*cmd == MI_BATCH_BUFFER_END) {
1286 if (needs_clflush_after) { 1286 if (needs_clflush_after) {
1287 void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping); 1287 void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
1288 drm_clflush_virt_range(ptr, 1288 drm_clflush_virt_range(ptr,
1289 (void *)(cmd + 1) - ptr); 1289 (void *)(cmd + 1) - ptr);
1290 } 1290 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1c66108f4333..7e0816ccdc21 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2482,8 +2482,6 @@ static void i915_guc_client_info(struct seq_file *m,
2482 client->wq_size, client->wq_offset, client->wq_tail); 2482 client->wq_size, client->wq_offset, client->wq_tail);
2483 2483
2484 seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); 2484 seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
2485 seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
2486 seq_printf(m, "\tLast submission result: %d\n", client->retcode);
2487 2485
2488 for_each_engine(engine, dev_priv, id) { 2486 for_each_engine(engine, dev_priv, id) {
2489 u64 submissions = client->submissions[id]; 2487 u64 submissions = client->submissions[id];
@@ -2494,42 +2492,34 @@ static void i915_guc_client_info(struct seq_file *m,
2494 seq_printf(m, "\tTotal: %llu\n", tot); 2492 seq_printf(m, "\tTotal: %llu\n", tot);
2495} 2493}
2496 2494
2497static int i915_guc_info(struct seq_file *m, void *data) 2495static bool check_guc_submission(struct seq_file *m)
2498{ 2496{
2499 struct drm_i915_private *dev_priv = node_to_i915(m->private); 2497 struct drm_i915_private *dev_priv = node_to_i915(m->private);
2500 const struct intel_guc *guc = &dev_priv->guc; 2498 const struct intel_guc *guc = &dev_priv->guc;
2501 struct intel_engine_cs *engine;
2502 enum intel_engine_id id;
2503 u64 total;
2504 2499
2505 if (!guc->execbuf_client) { 2500 if (!guc->execbuf_client) {
2506 seq_printf(m, "GuC submission %s\n", 2501 seq_printf(m, "GuC submission %s\n",
2507 HAS_GUC_SCHED(dev_priv) ? 2502 HAS_GUC_SCHED(dev_priv) ?
2508 "disabled" : 2503 "disabled" :
2509 "not supported"); 2504 "not supported");
2510 return 0; 2505 return false;
2511 } 2506 }
2512 2507
2508 return true;
2509}
2510
2511static int i915_guc_info(struct seq_file *m, void *data)
2512{
2513 struct drm_i915_private *dev_priv = node_to_i915(m->private);
2514 const struct intel_guc *guc = &dev_priv->guc;
2515
2516 if (!check_guc_submission(m))
2517 return 0;
2518
2513 seq_printf(m, "Doorbell map:\n"); 2519 seq_printf(m, "Doorbell map:\n");
2514 seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); 2520 seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
2515 seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); 2521 seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
2516 2522
2517 seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
2518 seq_printf(m, "GuC action failure count: %u\n", guc->action_fail);
2519 seq_printf(m, "GuC last action command: 0x%x\n", guc->action_cmd);
2520 seq_printf(m, "GuC last action status: 0x%x\n", guc->action_status);
2521 seq_printf(m, "GuC last action error code: %d\n", guc->action_err);
2522
2523 total = 0;
2524 seq_printf(m, "\nGuC submissions:\n");
2525 for_each_engine(engine, dev_priv, id) {
2526 u64 submissions = guc->submissions[id];
2527 total += submissions;
2528 seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
2529 engine->name, submissions, guc->last_seqno[id]);
2530 }
2531 seq_printf(m, "\t%s: %llu\n", "Total", total);
2532
2533 seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); 2523 seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client);
2534 i915_guc_client_info(m, dev_priv, guc->execbuf_client); 2524 i915_guc_client_info(m, dev_priv, guc->execbuf_client);
2535 2525
@@ -2540,36 +2530,99 @@ static int i915_guc_info(struct seq_file *m, void *data)
2540 return 0; 2530 return 0;
2541} 2531}
2542 2532
2543static int i915_guc_log_dump(struct seq_file *m, void *data) 2533static int i915_guc_stage_pool(struct seq_file *m, void *data)
2544{ 2534{
2545 struct drm_i915_private *dev_priv = node_to_i915(m->private); 2535 struct drm_i915_private *dev_priv = node_to_i915(m->private);
2546 struct drm_i915_gem_object *obj; 2536 const struct intel_guc *guc = &dev_priv->guc;
2547 int i = 0, pg; 2537 struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr;
2538 struct i915_guc_client *client = guc->execbuf_client;
2539 unsigned int tmp;
2540 int index;
2548 2541
2549 if (!dev_priv->guc.log.vma) 2542 if (!check_guc_submission(m))
2550 return 0; 2543 return 0;
2551 2544
2552 obj = dev_priv->guc.log.vma->obj; 2545 for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
2553 for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { 2546 struct intel_engine_cs *engine;
2554 u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); 2547
2548 if (!(desc->attribute & GUC_STAGE_DESC_ATTR_ACTIVE))
2549 continue;
2550
2551 seq_printf(m, "GuC stage descriptor %u:\n", index);
2552 seq_printf(m, "\tIndex: %u\n", desc->stage_id);
2553 seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
2554 seq_printf(m, "\tPriority: %d\n", desc->priority);
2555 seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
2556 seq_printf(m, "\tEngines used: 0x%x\n",
2557 desc->engines_used);
2558 seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
2559 desc->db_trigger_phy,
2560 desc->db_trigger_cpu,
2561 desc->db_trigger_uk);
2562 seq_printf(m, "\tProcess descriptor: 0x%x\n",
2563 desc->process_desc);
2564 seq_printf(m, "\tWorkqueue address: 0x%x, size: 0x%x\n",
2565 desc->wq_addr, desc->wq_size);
2566 seq_putc(m, '\n');
2567
2568 for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
2569 u32 guc_engine_id = engine->guc_id;
2570 struct guc_execlist_context *lrc =
2571 &desc->lrc[guc_engine_id];
2572
2573 seq_printf(m, "\t%s LRC:\n", engine->name);
2574 seq_printf(m, "\t\tContext desc: 0x%x\n",
2575 lrc->context_desc);
2576 seq_printf(m, "\t\tContext id: 0x%x\n", lrc->context_id);
2577 seq_printf(m, "\t\tLRCA: 0x%x\n", lrc->ring_lrca);
2578 seq_printf(m, "\t\tRing begin: 0x%x\n", lrc->ring_begin);
2579 seq_printf(m, "\t\tRing end: 0x%x\n", lrc->ring_end);
2580 seq_putc(m, '\n');
2581 }
2582 }
2583
2584 return 0;
2585}
2586
2587static int i915_guc_log_dump(struct seq_file *m, void *data)
2588{
2589 struct drm_info_node *node = m->private;
2590 struct drm_i915_private *dev_priv = node_to_i915(node);
2591 bool dump_load_err = !!node->info_ent->data;
2592 struct drm_i915_gem_object *obj = NULL;
2593 u32 *log;
2594 int i = 0;
2595
2596 if (dump_load_err)
2597 obj = dev_priv->guc.load_err_log;
2598 else if (dev_priv->guc.log.vma)
2599 obj = dev_priv->guc.log.vma->obj;
2555 2600
2556 for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) 2601 if (!obj)
2557 seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", 2602 return 0;
2558 *(log + i), *(log + i + 1),
2559 *(log + i + 2), *(log + i + 3));
2560 2603
2561 kunmap_atomic(log); 2604 log = i915_gem_object_pin_map(obj, I915_MAP_WC);
2605 if (IS_ERR(log)) {
2606 DRM_DEBUG("Failed to pin object\n");
2607 seq_puts(m, "(log data unaccessible)\n");
2608 return PTR_ERR(log);
2562 } 2609 }
2563 2610
2611 for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
2612 seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
2613 *(log + i), *(log + i + 1),
2614 *(log + i + 2), *(log + i + 3));
2615
2564 seq_putc(m, '\n'); 2616 seq_putc(m, '\n');
2565 2617
2618 i915_gem_object_unpin_map(obj);
2619
2566 return 0; 2620 return 0;
2567} 2621}
2568 2622
2569static int i915_guc_log_control_get(void *data, u64 *val) 2623static int i915_guc_log_control_get(void *data, u64 *val)
2570{ 2624{
2571 struct drm_device *dev = data; 2625 struct drm_i915_private *dev_priv = data;
2572 struct drm_i915_private *dev_priv = to_i915(dev);
2573 2626
2574 if (!dev_priv->guc.log.vma) 2627 if (!dev_priv->guc.log.vma)
2575 return -EINVAL; 2628 return -EINVAL;
@@ -2581,14 +2634,13 @@ static int i915_guc_log_control_get(void *data, u64 *val)
2581 2634
2582static int i915_guc_log_control_set(void *data, u64 val) 2635static int i915_guc_log_control_set(void *data, u64 val)
2583{ 2636{
2584 struct drm_device *dev = data; 2637 struct drm_i915_private *dev_priv = data;
2585 struct drm_i915_private *dev_priv = to_i915(dev);
2586 int ret; 2638 int ret;
2587 2639
2588 if (!dev_priv->guc.log.vma) 2640 if (!dev_priv->guc.log.vma)
2589 return -EINVAL; 2641 return -EINVAL;
2590 2642
2591 ret = mutex_lock_interruptible(&dev->struct_mutex); 2643 ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
2592 if (ret) 2644 if (ret)
2593 return ret; 2645 return ret;
2594 2646
@@ -2596,7 +2648,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
2596 ret = i915_guc_log_control(dev_priv, val); 2648 ret = i915_guc_log_control(dev_priv, val);
2597 intel_runtime_pm_put(dev_priv); 2649 intel_runtime_pm_put(dev_priv);
2598 2650
2599 mutex_unlock(&dev->struct_mutex); 2651 mutex_unlock(&dev_priv->drm.struct_mutex);
2600 return ret; 2652 return ret;
2601} 2653}
2602 2654
@@ -2855,7 +2907,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
2855 seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version), 2907 seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
2856 CSR_VERSION_MINOR(csr->version)); 2908 CSR_VERSION_MINOR(csr->version));
2857 2909
2858 if (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6)) { 2910 if (IS_KABYLAKE(dev_priv) ||
2911 (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6))) {
2859 seq_printf(m, "DC3 -> DC5 count: %d\n", 2912 seq_printf(m, "DC3 -> DC5 count: %d\n",
2860 I915_READ(SKL_CSR_DC3_DC5_COUNT)); 2913 I915_READ(SKL_CSR_DC3_DC5_COUNT));
2861 seq_printf(m, "DC5 -> DC6 count: %d\n", 2914 seq_printf(m, "DC5 -> DC6 count: %d\n",
@@ -3043,36 +3096,6 @@ static void intel_connector_info(struct seq_file *m,
3043 intel_seq_print_mode(m, 2, mode); 3096 intel_seq_print_mode(m, 2, mode);
3044} 3097}
3045 3098
3046static bool cursor_active(struct drm_i915_private *dev_priv, int pipe)
3047{
3048 u32 state;
3049
3050 if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
3051 state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
3052 else
3053 state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
3054
3055 return state;
3056}
3057
3058static bool cursor_position(struct drm_i915_private *dev_priv,
3059 int pipe, int *x, int *y)
3060{
3061 u32 pos;
3062
3063 pos = I915_READ(CURPOS(pipe));
3064
3065 *x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK;
3066 if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT))
3067 *x = -*x;
3068
3069 *y = (pos >> CURSOR_Y_SHIFT) & CURSOR_POS_MASK;
3070 if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT))
3071 *y = -*y;
3072
3073 return cursor_active(dev_priv, pipe);
3074}
3075
3076static const char *plane_type(enum drm_plane_type type) 3099static const char *plane_type(enum drm_plane_type type)
3077{ 3100{
3078 switch (type) { 3101 switch (type) {
@@ -3194,9 +3217,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
3194 seq_printf(m, "CRTC info\n"); 3217 seq_printf(m, "CRTC info\n");
3195 seq_printf(m, "---------\n"); 3218 seq_printf(m, "---------\n");
3196 for_each_intel_crtc(dev, crtc) { 3219 for_each_intel_crtc(dev, crtc) {
3197 bool active;
3198 struct intel_crtc_state *pipe_config; 3220 struct intel_crtc_state *pipe_config;
3199 int x, y;
3200 3221
3201 drm_modeset_lock(&crtc->base.mutex, NULL); 3222 drm_modeset_lock(&crtc->base.mutex, NULL);
3202 pipe_config = to_intel_crtc_state(crtc->base.state); 3223 pipe_config = to_intel_crtc_state(crtc->base.state);
@@ -3208,14 +3229,18 @@ static int i915_display_info(struct seq_file *m, void *unused)
3208 yesno(pipe_config->dither), pipe_config->pipe_bpp); 3229 yesno(pipe_config->dither), pipe_config->pipe_bpp);
3209 3230
3210 if (pipe_config->base.active) { 3231 if (pipe_config->base.active) {
3232 struct intel_plane *cursor =
3233 to_intel_plane(crtc->base.cursor);
3234
3211 intel_crtc_info(m, crtc); 3235 intel_crtc_info(m, crtc);
3212 3236
3213 active = cursor_position(dev_priv, crtc->pipe, &x, &y); 3237 seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n",
3214 seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n", 3238 yesno(cursor->base.state->visible),
3215 yesno(crtc->cursor_base), 3239 cursor->base.state->crtc_x,
3216 x, y, crtc->base.cursor->state->crtc_w, 3240 cursor->base.state->crtc_y,
3217 crtc->base.cursor->state->crtc_h, 3241 cursor->base.state->crtc_w,
3218 crtc->cursor_addr, yesno(active)); 3242 cursor->base.state->crtc_h,
3243 cursor->cursor.base);
3219 intel_scaler_info(m, crtc); 3244 intel_scaler_info(m, crtc);
3220 intel_plane_info(m, crtc); 3245 intel_plane_info(m, crtc);
3221 } 3246 }
@@ -3316,7 +3341,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
3316 3341
3317 if (i915.enable_execlists) { 3342 if (i915.enable_execlists) {
3318 u32 ptr, read, write; 3343 u32 ptr, read, write;
3319 struct rb_node *rb; 3344 unsigned int idx;
3320 3345
3321 seq_printf(m, "\tExeclist status: 0x%08x %08x\n", 3346 seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
3322 I915_READ(RING_EXECLIST_STATUS_LO(engine)), 3347 I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3334,8 +3359,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
3334 if (read > write) 3359 if (read > write)
3335 write += GEN8_CSB_ENTRIES; 3360 write += GEN8_CSB_ENTRIES;
3336 while (read < write) { 3361 while (read < write) {
3337 unsigned int idx = ++read % GEN8_CSB_ENTRIES; 3362 idx = ++read % GEN8_CSB_ENTRIES;
3338
3339 seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", 3363 seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
3340 idx, 3364 idx,
3341 I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), 3365 I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3343,28 +3367,30 @@ static int i915_engine_info(struct seq_file *m, void *unused)
3343 } 3367 }
3344 3368
3345 rcu_read_lock(); 3369 rcu_read_lock();
3346 rq = READ_ONCE(engine->execlist_port[0].request); 3370 for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
3347 if (rq) { 3371 unsigned int count;
3348 seq_printf(m, "\t\tELSP[0] count=%d, ", 3372
3349 engine->execlist_port[0].count); 3373 rq = port_unpack(&engine->execlist_port[idx],
3350 print_request(m, rq, "rq: "); 3374 &count);
3351 } else { 3375 if (rq) {
3352 seq_printf(m, "\t\tELSP[0] idle\n"); 3376 seq_printf(m, "\t\tELSP[%d] count=%d, ",
3353 } 3377 idx, count);
3354 rq = READ_ONCE(engine->execlist_port[1].request); 3378 print_request(m, rq, "rq: ");
3355 if (rq) { 3379 } else {
3356 seq_printf(m, "\t\tELSP[1] count=%d, ", 3380 seq_printf(m, "\t\tELSP[%d] idle\n",
3357 engine->execlist_port[1].count); 3381 idx);
3358 print_request(m, rq, "rq: "); 3382 }
3359 } else {
3360 seq_printf(m, "\t\tELSP[1] idle\n");
3361 } 3383 }
3362 rcu_read_unlock(); 3384 rcu_read_unlock();
3363 3385
3364 spin_lock_irq(&engine->timeline->lock); 3386 spin_lock_irq(&engine->timeline->lock);
3365 for (rb = engine->execlist_first; rb; rb = rb_next(rb)) { 3387 for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
3366 rq = rb_entry(rb, typeof(*rq), priotree.node); 3388 struct i915_priolist *p =
3367 print_request(m, rq, "\t\tQ "); 3389 rb_entry(rb, typeof(*p), node);
3390
3391 list_for_each_entry(rq, &p->requests,
3392 priotree.link)
3393 print_request(m, rq, "\t\tQ ");
3368 } 3394 }
3369 spin_unlock_irq(&engine->timeline->lock); 3395 spin_unlock_irq(&engine->timeline->lock);
3370 } else if (INTEL_GEN(dev_priv) > 6) { 3396 } else if (INTEL_GEN(dev_priv) > 6) {
@@ -3704,16 +3730,10 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
3704 if (len == 0) 3730 if (len == 0)
3705 return 0; 3731 return 0;
3706 3732
3707 input_buffer = kmalloc(len + 1, GFP_KERNEL); 3733 input_buffer = memdup_user_nul(ubuf, len);
3708 if (!input_buffer) 3734 if (IS_ERR(input_buffer))
3709 return -ENOMEM; 3735 return PTR_ERR(input_buffer);
3710 3736
3711 if (copy_from_user(input_buffer, ubuf, len)) {
3712 status = -EFAULT;
3713 goto out;
3714 }
3715
3716 input_buffer[len] = '\0';
3717 DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len); 3737 DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len);
3718 3738
3719 drm_connector_list_iter_begin(dev, &conn_iter); 3739 drm_connector_list_iter_begin(dev, &conn_iter);
@@ -3739,7 +3759,6 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
3739 } 3759 }
3740 } 3760 }
3741 drm_connector_list_iter_end(&conn_iter); 3761 drm_connector_list_iter_end(&conn_iter);
3742out:
3743 kfree(input_buffer); 3762 kfree(input_buffer);
3744 if (status < 0) 3763 if (status < 0)
3745 return status; 3764 return status;
@@ -3900,6 +3919,8 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
3900 num_levels = 3; 3919 num_levels = 3;
3901 else if (IS_VALLEYVIEW(dev_priv)) 3920 else if (IS_VALLEYVIEW(dev_priv))
3902 num_levels = 1; 3921 num_levels = 1;
3922 else if (IS_G4X(dev_priv))
3923 num_levels = 3;
3903 else 3924 else
3904 num_levels = ilk_wm_max_level(dev_priv) + 1; 3925 num_levels = ilk_wm_max_level(dev_priv) + 1;
3905 3926
@@ -3912,8 +3933,10 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
3912 * - WM1+ latency values in 0.5us units 3933 * - WM1+ latency values in 0.5us units
3913 * - latencies are in us on gen9/vlv/chv 3934 * - latencies are in us on gen9/vlv/chv
3914 */ 3935 */
3915 if (INTEL_GEN(dev_priv) >= 9 || IS_VALLEYVIEW(dev_priv) || 3936 if (INTEL_GEN(dev_priv) >= 9 ||
3916 IS_CHERRYVIEW(dev_priv)) 3937 IS_VALLEYVIEW(dev_priv) ||
3938 IS_CHERRYVIEW(dev_priv) ||
3939 IS_G4X(dev_priv))
3917 latency *= 10; 3940 latency *= 10;
3918 else if (level > 0) 3941 else if (level > 0)
3919 latency *= 5; 3942 latency *= 5;
@@ -3974,7 +3997,7 @@ static int pri_wm_latency_open(struct inode *inode, struct file *file)
3974{ 3997{
3975 struct drm_i915_private *dev_priv = inode->i_private; 3998 struct drm_i915_private *dev_priv = inode->i_private;
3976 3999
3977 if (INTEL_GEN(dev_priv) < 5) 4000 if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
3978 return -ENODEV; 4001 return -ENODEV;
3979 4002
3980 return single_open(file, pri_wm_latency_show, dev_priv); 4003 return single_open(file, pri_wm_latency_show, dev_priv);
@@ -4016,6 +4039,8 @@ static ssize_t wm_latency_write(struct file *file, const char __user *ubuf,
4016 num_levels = 3; 4039 num_levels = 3;
4017 else if (IS_VALLEYVIEW(dev_priv)) 4040 else if (IS_VALLEYVIEW(dev_priv))
4018 num_levels = 1; 4041 num_levels = 1;
4042 else if (IS_G4X(dev_priv))
4043 num_levels = 3;
4019 else 4044 else
4020 num_levels = ilk_wm_max_level(dev_priv) + 1; 4045 num_levels = ilk_wm_max_level(dev_priv) + 1;
4021 4046
@@ -4776,6 +4801,8 @@ static const struct drm_info_list i915_debugfs_list[] = {
4776 {"i915_guc_info", i915_guc_info, 0}, 4801 {"i915_guc_info", i915_guc_info, 0},
4777 {"i915_guc_load_status", i915_guc_load_status_info, 0}, 4802 {"i915_guc_load_status", i915_guc_load_status_info, 0},
4778 {"i915_guc_log_dump", i915_guc_log_dump, 0}, 4803 {"i915_guc_log_dump", i915_guc_log_dump, 0},
4804 {"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},
4805 {"i915_guc_stage_pool", i915_guc_stage_pool, 0},
4779 {"i915_huc_load_status", i915_huc_load_status_info, 0}, 4806 {"i915_huc_load_status", i915_huc_load_status_info, 0},
4780 {"i915_frequency_info", i915_frequency_info, 0}, 4807 {"i915_frequency_info", i915_frequency_info, 0},
4781 {"i915_hangcheck_info", i915_hangcheck_info, 0}, 4808 {"i915_hangcheck_info", i915_hangcheck_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3036d4835b0f..7b8c72776f46 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
350 case I915_PARAM_HAS_EXEC_SOFTPIN: 350 case I915_PARAM_HAS_EXEC_SOFTPIN:
351 case I915_PARAM_HAS_EXEC_ASYNC: 351 case I915_PARAM_HAS_EXEC_ASYNC:
352 case I915_PARAM_HAS_EXEC_FENCE: 352 case I915_PARAM_HAS_EXEC_FENCE:
353 case I915_PARAM_HAS_EXEC_CAPTURE:
353 /* For the time being all of these are always true; 354 /* For the time being all of these are always true;
354 * if some supported hardware does not have one of these 355 * if some supported hardware does not have one of these
355 * features this value needs to be provided from 356 * features this value needs to be provided from
@@ -834,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
834 intel_uc_init_early(dev_priv); 835 intel_uc_init_early(dev_priv);
835 i915_memcpy_init_early(dev_priv); 836 i915_memcpy_init_early(dev_priv);
836 837
837 ret = intel_engines_init_early(dev_priv);
838 if (ret)
839 return ret;
840
841 ret = i915_workqueues_init(dev_priv); 838 ret = i915_workqueues_init(dev_priv);
842 if (ret < 0) 839 if (ret < 0)
843 goto err_engines; 840 goto err_engines;
@@ -855,7 +852,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
855 intel_init_audio_hooks(dev_priv); 852 intel_init_audio_hooks(dev_priv);
856 ret = i915_gem_load_init(dev_priv); 853 ret = i915_gem_load_init(dev_priv);
857 if (ret < 0) 854 if (ret < 0)
858 goto err_workqueues; 855 goto err_irq;
859 856
860 intel_display_crc_init(dev_priv); 857 intel_display_crc_init(dev_priv);
861 858
@@ -867,7 +864,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
867 864
868 return 0; 865 return 0;
869 866
870err_workqueues: 867err_irq:
868 intel_irq_fini(dev_priv);
871 i915_workqueues_cleanup(dev_priv); 869 i915_workqueues_cleanup(dev_priv);
872err_engines: 870err_engines:
873 i915_engines_cleanup(dev_priv); 871 i915_engines_cleanup(dev_priv);
@@ -882,6 +880,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
882{ 880{
883 i915_perf_fini(dev_priv); 881 i915_perf_fini(dev_priv);
884 i915_gem_load_cleanup(dev_priv); 882 i915_gem_load_cleanup(dev_priv);
883 intel_irq_fini(dev_priv);
885 i915_workqueues_cleanup(dev_priv); 884 i915_workqueues_cleanup(dev_priv);
886 i915_engines_cleanup(dev_priv); 885 i915_engines_cleanup(dev_priv);
887} 886}
@@ -947,14 +946,21 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
947 946
948 ret = i915_mmio_setup(dev_priv); 947 ret = i915_mmio_setup(dev_priv);
949 if (ret < 0) 948 if (ret < 0)
950 goto put_bridge; 949 goto err_bridge;
951 950
952 intel_uncore_init(dev_priv); 951 intel_uncore_init(dev_priv);
952
953 ret = intel_engines_init_mmio(dev_priv);
954 if (ret)
955 goto err_uncore;
956
953 i915_gem_init_mmio(dev_priv); 957 i915_gem_init_mmio(dev_priv);
954 958
955 return 0; 959 return 0;
956 960
957put_bridge: 961err_uncore:
962 intel_uncore_fini(dev_priv);
963err_bridge:
958 pci_dev_put(dev_priv->bridge_dev); 964 pci_dev_put(dev_priv->bridge_dev);
959 965
960 return ret; 966 return ret;
@@ -1213,9 +1219,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
1213 struct drm_i915_private *dev_priv; 1219 struct drm_i915_private *dev_priv;
1214 int ret; 1220 int ret;
1215 1221
1216 /* Enable nuclear pageflip on ILK+, except vlv/chv */ 1222 /* Enable nuclear pageflip on ILK+ */
1217 if (!i915.nuclear_pageflip && 1223 if (!i915.nuclear_pageflip && match_info->gen < 5)
1218 (match_info->gen < 5 || match_info->has_gmch_display))
1219 driver.driver_features &= ~DRIVER_ATOMIC; 1224 driver.driver_features &= ~DRIVER_ATOMIC;
1220 1225
1221 ret = -ENOMEM; 1226 ret = -ENOMEM;
@@ -1272,10 +1277,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
1272 1277
1273 dev_priv->ipc_enabled = false; 1278 dev_priv->ipc_enabled = false;
1274 1279
1275 /* Everything is in place, we can now relax! */
1276 DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
1277 driver.name, driver.major, driver.minor, driver.patchlevel,
1278 driver.date, pci_name(pdev), dev_priv->drm.primary->index);
1279 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) 1280 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
1280 DRM_INFO("DRM_I915_DEBUG enabled\n"); 1281 DRM_INFO("DRM_I915_DEBUG enabled\n");
1281 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1282 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9b0949f6c1a..35e161b5b90e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -55,6 +55,7 @@
55#include "i915_reg.h" 55#include "i915_reg.h"
56#include "i915_utils.h" 56#include "i915_utils.h"
57 57
58#include "intel_uncore.h"
58#include "intel_bios.h" 59#include "intel_bios.h"
59#include "intel_dpll_mgr.h" 60#include "intel_dpll_mgr.h"
60#include "intel_uc.h" 61#include "intel_uc.h"
@@ -79,8 +80,8 @@
79 80
80#define DRIVER_NAME "i915" 81#define DRIVER_NAME "i915"
81#define DRIVER_DESC "Intel Graphics" 82#define DRIVER_DESC "Intel Graphics"
82#define DRIVER_DATE "20170403" 83#define DRIVER_DATE "20170529"
83#define DRIVER_TIMESTAMP 1491198738 84#define DRIVER_TIMESTAMP 1496041258
84 85
85/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and 86/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
86 * WARN_ON()) for hw state sanity checks to check for unexpected conditions 87 * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -114,6 +115,13 @@ typedef struct {
114 fp; \ 115 fp; \
115}) 116})
116 117
118static inline bool is_fixed16_zero(uint_fixed_16_16_t val)
119{
120 if (val.val == 0)
121 return true;
122 return false;
123}
124
117static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val) 125static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val)
118{ 126{
119 uint_fixed_16_16_t fp; 127 uint_fixed_16_16_t fp;
@@ -152,8 +160,39 @@ static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1,
152 return max; 160 return max;
153} 161}
154 162
155static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val, 163static inline uint32_t div_round_up_fixed16(uint_fixed_16_16_t val,
156 uint32_t d) 164 uint_fixed_16_16_t d)
165{
166 return DIV_ROUND_UP(val.val, d.val);
167}
168
169static inline uint32_t mul_round_up_u32_fixed16(uint32_t val,
170 uint_fixed_16_16_t mul)
171{
172 uint64_t intermediate_val;
173 uint32_t result;
174
175 intermediate_val = (uint64_t) val * mul.val;
176 intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16);
177 WARN_ON(intermediate_val >> 32);
178 result = clamp_t(uint32_t, intermediate_val, 0, ~0);
179 return result;
180}
181
182static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val,
183 uint_fixed_16_16_t mul)
184{
185 uint64_t intermediate_val;
186 uint_fixed_16_16_t fp;
187
188 intermediate_val = (uint64_t) val.val * mul.val;
189 intermediate_val = intermediate_val >> 16;
190 WARN_ON(intermediate_val >> 32);
191 fp.val = clamp_t(uint32_t, intermediate_val, 0, ~0);
192 return fp;
193}
194
195static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d)
157{ 196{
158 uint_fixed_16_16_t fp, res; 197 uint_fixed_16_16_t fp, res;
159 198
@@ -162,8 +201,7 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val,
162 return res; 201 return res;
163} 202}
164 203
165static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val, 204static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d)
166 uint32_t d)
167{ 205{
168 uint_fixed_16_16_t res; 206 uint_fixed_16_16_t res;
169 uint64_t interm_val; 207 uint64_t interm_val;
@@ -176,6 +214,17 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val,
176 return res; 214 return res;
177} 215}
178 216
217static inline uint32_t div_round_up_u32_fixed16(uint32_t val,
218 uint_fixed_16_16_t d)
219{
220 uint64_t interm_val;
221
222 interm_val = (uint64_t)val << 16;
223 interm_val = DIV_ROUND_UP_ULL(interm_val, d.val);
224 WARN_ON(interm_val >> 32);
225 return clamp_t(uint32_t, interm_val, 0, ~0);
226}
227
179static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val, 228static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val,
180 uint_fixed_16_16_t mul) 229 uint_fixed_16_16_t mul)
181{ 230{
@@ -676,116 +725,6 @@ struct drm_i915_display_funcs {
676 void (*load_luts)(struct drm_crtc_state *crtc_state); 725 void (*load_luts)(struct drm_crtc_state *crtc_state);
677}; 726};
678 727
679enum forcewake_domain_id {
680 FW_DOMAIN_ID_RENDER = 0,
681 FW_DOMAIN_ID_BLITTER,
682 FW_DOMAIN_ID_MEDIA,
683
684 FW_DOMAIN_ID_COUNT
685};
686
687enum forcewake_domains {
688 FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
689 FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
690 FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA),
691 FORCEWAKE_ALL = (FORCEWAKE_RENDER |
692 FORCEWAKE_BLITTER |
693 FORCEWAKE_MEDIA)
694};
695
696#define FW_REG_READ (1)
697#define FW_REG_WRITE (2)
698
699enum decoupled_power_domain {
700 GEN9_DECOUPLED_PD_BLITTER = 0,
701 GEN9_DECOUPLED_PD_RENDER,
702 GEN9_DECOUPLED_PD_MEDIA,
703 GEN9_DECOUPLED_PD_ALL
704};
705
706enum decoupled_ops {
707 GEN9_DECOUPLED_OP_WRITE = 0,
708 GEN9_DECOUPLED_OP_READ
709};
710
711enum forcewake_domains
712intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
713 i915_reg_t reg, unsigned int op);
714
715struct intel_uncore_funcs {
716 void (*force_wake_get)(struct drm_i915_private *dev_priv,
717 enum forcewake_domains domains);
718 void (*force_wake_put)(struct drm_i915_private *dev_priv,
719 enum forcewake_domains domains);
720
721 uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv,
722 i915_reg_t r, bool trace);
723 uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
724 i915_reg_t r, bool trace);
725 uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
726 i915_reg_t r, bool trace);
727 uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
728 i915_reg_t r, bool trace);
729
730 void (*mmio_writeb)(struct drm_i915_private *dev_priv,
731 i915_reg_t r, uint8_t val, bool trace);
732 void (*mmio_writew)(struct drm_i915_private *dev_priv,
733 i915_reg_t r, uint16_t val, bool trace);
734 void (*mmio_writel)(struct drm_i915_private *dev_priv,
735 i915_reg_t r, uint32_t val, bool trace);
736};
737
738struct intel_forcewake_range {
739 u32 start;
740 u32 end;
741
742 enum forcewake_domains domains;
743};
744
745struct intel_uncore {
746 spinlock_t lock; /** lock is also taken in irq contexts. */
747
748 const struct intel_forcewake_range *fw_domains_table;
749 unsigned int fw_domains_table_entries;
750
751 struct notifier_block pmic_bus_access_nb;
752 struct intel_uncore_funcs funcs;
753
754 unsigned fifo_count;
755
756 enum forcewake_domains fw_domains;
757 enum forcewake_domains fw_domains_active;
758
759 u32 fw_set;
760 u32 fw_clear;
761 u32 fw_reset;
762
763 struct intel_uncore_forcewake_domain {
764 enum forcewake_domain_id id;
765 enum forcewake_domains mask;
766 unsigned wake_count;
767 struct hrtimer timer;
768 i915_reg_t reg_set;
769 i915_reg_t reg_ack;
770 } fw_domain[FW_DOMAIN_ID_COUNT];
771
772 int unclaimed_mmio_check;
773};
774
775#define __mask_next_bit(mask) ({ \
776 int __idx = ffs(mask) - 1; \
777 mask &= ~BIT(__idx); \
778 __idx; \
779})
780
781/* Iterate over initialised fw domains */
782#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
783 for (tmp__ = (mask__); \
784 tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
785
786#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
787 for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
788
789#define CSR_VERSION(major, minor) ((major) << 16 | (minor)) 728#define CSR_VERSION(major, minor) ((major) << 16 | (minor))
790#define CSR_VERSION_MAJOR(version) ((version) >> 16) 729#define CSR_VERSION_MAJOR(version) ((version) >> 16)
791#define CSR_VERSION_MINOR(version) ((version) & 0xffff) 730#define CSR_VERSION_MINOR(version) ((version) & 0xffff)
@@ -821,8 +760,8 @@ struct intel_csr {
821 func(has_gmbus_irq); \ 760 func(has_gmbus_irq); \
822 func(has_gmch_display); \ 761 func(has_gmch_display); \
823 func(has_guc); \ 762 func(has_guc); \
763 func(has_guc_ct); \
824 func(has_hotplug); \ 764 func(has_hotplug); \
825 func(has_hw_contexts); \
826 func(has_l3_dpf); \ 765 func(has_l3_dpf); \
827 func(has_llc); \ 766 func(has_llc); \
828 func(has_logical_ring_contexts); \ 767 func(has_logical_ring_contexts); \
@@ -1025,6 +964,9 @@ struct i915_gpu_state {
1025 u32 *pages[0]; 964 u32 *pages[0];
1026 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; 965 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
1027 966
967 struct drm_i915_error_object **user_bo;
968 long user_bo_count;
969
1028 struct drm_i915_error_object *wa_ctx; 970 struct drm_i915_error_object *wa_ctx;
1029 971
1030 struct drm_i915_error_request { 972 struct drm_i915_error_request {
@@ -1511,11 +1453,7 @@ struct i915_gem_mm {
1511 /** LRU list of objects with fence regs on them. */ 1453 /** LRU list of objects with fence regs on them. */
1512 struct list_head fence_list; 1454 struct list_head fence_list;
1513 1455
1514 /** 1456 u64 unordered_timeline;
1515 * Are we in a non-interruptible section of code like
1516 * modesetting?
1517 */
1518 bool interruptible;
1519 1457
1520 /* the indicator for dispatch video commands on two BSD rings */ 1458 /* the indicator for dispatch video commands on two BSD rings */
1521 atomic_t bsd_engine_dispatch_index; 1459 atomic_t bsd_engine_dispatch_index;
@@ -1566,7 +1504,7 @@ struct i915_gpu_error {
1566 * 1504 *
1567 * This is a counter which gets incremented when reset is triggered, 1505 * This is a counter which gets incremented when reset is triggered,
1568 * 1506 *
1569 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set 1507 * Before the reset commences, the I915_RESET_BACKOFF bit is set
1570 * meaning that any waiters holding onto the struct_mutex should 1508 * meaning that any waiters holding onto the struct_mutex should
1571 * relinquish the lock immediately in order for the reset to start. 1509 * relinquish the lock immediately in order for the reset to start.
1572 * 1510 *
@@ -1763,13 +1701,15 @@ struct ilk_wm_values {
1763 enum intel_ddb_partitioning partitioning; 1701 enum intel_ddb_partitioning partitioning;
1764}; 1702};
1765 1703
1766struct vlv_pipe_wm { 1704struct g4x_pipe_wm {
1767 uint16_t plane[I915_MAX_PLANES]; 1705 uint16_t plane[I915_MAX_PLANES];
1706 uint16_t fbc;
1768}; 1707};
1769 1708
1770struct vlv_sr_wm { 1709struct g4x_sr_wm {
1771 uint16_t plane; 1710 uint16_t plane;
1772 uint16_t cursor; 1711 uint16_t cursor;
1712 uint16_t fbc;
1773}; 1713};
1774 1714
1775struct vlv_wm_ddl_values { 1715struct vlv_wm_ddl_values {
@@ -1777,13 +1717,22 @@ struct vlv_wm_ddl_values {
1777}; 1717};
1778 1718
1779struct vlv_wm_values { 1719struct vlv_wm_values {
1780 struct vlv_pipe_wm pipe[3]; 1720 struct g4x_pipe_wm pipe[3];
1781 struct vlv_sr_wm sr; 1721 struct g4x_sr_wm sr;
1782 struct vlv_wm_ddl_values ddl[3]; 1722 struct vlv_wm_ddl_values ddl[3];
1783 uint8_t level; 1723 uint8_t level;
1784 bool cxsr; 1724 bool cxsr;
1785}; 1725};
1786 1726
1727struct g4x_wm_values {
1728 struct g4x_pipe_wm pipe[2];
1729 struct g4x_sr_wm sr;
1730 struct g4x_sr_wm hpll;
1731 bool cxsr;
1732 bool hpll_en;
1733 bool fbc_en;
1734};
1735
1787struct skl_ddb_entry { 1736struct skl_ddb_entry {
1788 uint16_t start, end; /* in number of blocks, 'end' is exclusive */ 1737 uint16_t start, end; /* in number of blocks, 'end' is exclusive */
1789}; 1738};
@@ -2100,7 +2049,7 @@ struct i915_oa_ops {
2100 size_t *offset); 2049 size_t *offset);
2101 2050
2102 /** 2051 /**
2103 * @oa_buffer_is_empty: Check if OA buffer empty (false positives OK) 2052 * @oa_buffer_check: Check for OA buffer data + update tail
2104 * 2053 *
2105 * This is either called via fops or the poll check hrtimer (atomic 2054 * This is either called via fops or the poll check hrtimer (atomic
2106 * ctx) without any locks taken. 2055 * ctx) without any locks taken.
@@ -2113,7 +2062,7 @@ struct i915_oa_ops {
2113 * here, which will be handled gracefully - likely resulting in an 2062 * here, which will be handled gracefully - likely resulting in an
2114 * %EAGAIN error for userspace. 2063 * %EAGAIN error for userspace.
2115 */ 2064 */
2116 bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); 2065 bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
2117}; 2066};
2118 2067
2119struct intel_cdclk_state { 2068struct intel_cdclk_state {
@@ -2127,6 +2076,7 @@ struct drm_i915_private {
2127 struct kmem_cache *vmas; 2076 struct kmem_cache *vmas;
2128 struct kmem_cache *requests; 2077 struct kmem_cache *requests;
2129 struct kmem_cache *dependencies; 2078 struct kmem_cache *dependencies;
2079 struct kmem_cache *priorities;
2130 2080
2131 const struct intel_device_info info; 2081 const struct intel_device_info info;
2132 2082
@@ -2362,7 +2312,6 @@ struct drm_i915_private {
2362 */ 2312 */
2363 struct mutex av_mutex; 2313 struct mutex av_mutex;
2364 2314
2365 uint32_t hw_context_size;
2366 struct list_head context_list; 2315 struct list_head context_list;
2367 2316
2368 u32 fdi_rx_config; 2317 u32 fdi_rx_config;
@@ -2413,6 +2362,7 @@ struct drm_i915_private {
2413 struct ilk_wm_values hw; 2362 struct ilk_wm_values hw;
2414 struct skl_wm_values skl_hw; 2363 struct skl_wm_values skl_hw;
2415 struct vlv_wm_values vlv; 2364 struct vlv_wm_values vlv;
2365 struct g4x_wm_values g4x;
2416 }; 2366 };
2417 2367
2418 uint8_t max_level; 2368 uint8_t max_level;
@@ -2454,11 +2404,14 @@ struct drm_i915_private {
2454 wait_queue_head_t poll_wq; 2404 wait_queue_head_t poll_wq;
2455 bool pollin; 2405 bool pollin;
2456 2406
2407 /**
2408 * For rate limiting any notifications of spurious
2409 * invalid OA reports
2410 */
2411 struct ratelimit_state spurious_report_rs;
2412
2457 bool periodic; 2413 bool periodic;
2458 int period_exponent; 2414 int period_exponent;
2459 int timestamp_frequency;
2460
2461 int tail_margin;
2462 2415
2463 int metrics_set; 2416 int metrics_set;
2464 2417
@@ -2472,6 +2425,70 @@ struct drm_i915_private {
2472 u8 *vaddr; 2425 u8 *vaddr;
2473 int format; 2426 int format;
2474 int format_size; 2427 int format_size;
2428
2429 /**
2430 * Locks reads and writes to all head/tail state
2431 *
2432 * Consider: the head and tail pointer state
2433 * needs to be read consistently from a hrtimer
2434 * callback (atomic context) and read() fop
2435 * (user context) with tail pointer updates
2436 * happening in atomic context and head updates
2437 * in user context and the (unlikely)
2438 * possibility of read() errors needing to
2439 * reset all head/tail state.
2440 *
2441 * Note: Contention or performance aren't
2442 * currently a significant concern here
2443 * considering the relatively low frequency of
2444 * hrtimer callbacks (5ms period) and that
2445 * reads typically only happen in response to a
2446 * hrtimer event and likely complete before the
2447 * next callback.
2448 *
2449 * Note: This lock is not held *while* reading
2450 * and copying data to userspace so the value
2451 * of head observed in htrimer callbacks won't
2452 * represent any partial consumption of data.
2453 */
2454 spinlock_t ptr_lock;
2455
2456 /**
2457 * One 'aging' tail pointer and one 'aged'
2458 * tail pointer ready to used for reading.
2459 *
2460 * Initial values of 0xffffffff are invalid
2461 * and imply that an update is required
2462 * (and should be ignored by an attempted
2463 * read)
2464 */
2465 struct {
2466 u32 offset;
2467 } tails[2];
2468
2469 /**
2470 * Index for the aged tail ready to read()
2471 * data up to.
2472 */
2473 unsigned int aged_tail_idx;
2474
2475 /**
2476 * A monotonic timestamp for when the current
2477 * aging tail pointer was read; used to
2478 * determine when it is old enough to trust.
2479 */
2480 u64 aging_timestamp;
2481
2482 /**
2483 * Although we can always read back the head
2484 * pointer register, we prefer to avoid
2485 * trusting the HW state, just to avoid any
2486 * risk that some hardware condition could
2487 * somehow bump the head pointer unpredictably
2488 * and cause us to forward the wrong OA buffer
2489 * data to userspace.
2490 */
2491 u32 head;
2475 } oa_buffer; 2492 } oa_buffer;
2476 2493
2477 u32 gen7_latched_oastatus1; 2494 u32 gen7_latched_oastatus1;
@@ -2870,7 +2887,6 @@ intel_info(const struct drm_i915_private *dev_priv)
2870 2887
2871#define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical) 2888#define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical)
2872 2889
2873#define HAS_HW_CONTEXTS(dev_priv) ((dev_priv)->info.has_hw_contexts)
2874#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ 2890#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
2875 ((dev_priv)->info.has_logical_ring_contexts) 2891 ((dev_priv)->info.has_logical_ring_contexts)
2876#define USES_PPGTT(dev_priv) (i915.enable_ppgtt) 2892#define USES_PPGTT(dev_priv) (i915.enable_ppgtt)
@@ -2909,6 +2925,7 @@ intel_info(const struct drm_i915_private *dev_priv)
2909#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) 2925#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2)
2910#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) 2926#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
2911#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) 2927#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc)
2928#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7)
2912 2929
2913#define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) 2930#define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
2914 2931
@@ -2931,6 +2948,7 @@ intel_info(const struct drm_i915_private *dev_priv)
2931 * properties, so we have separate macros to test them. 2948 * properties, so we have separate macros to test them.
2932 */ 2949 */
2933#define HAS_GUC(dev_priv) ((dev_priv)->info.has_guc) 2950#define HAS_GUC(dev_priv) ((dev_priv)->info.has_guc)
2951#define HAS_GUC_CT(dev_priv) ((dev_priv)->info.has_guc_ct)
2934#define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) 2952#define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv))
2935#define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv)) 2953#define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv))
2936#define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) 2954#define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv))
@@ -2981,15 +2999,26 @@ intel_info(const struct drm_i915_private *dev_priv)
2981 2999
2982#include "i915_trace.h" 3000#include "i915_trace.h"
2983 3001
2984static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) 3002static inline bool intel_vtd_active(void)
2985{ 3003{
2986#ifdef CONFIG_INTEL_IOMMU 3004#ifdef CONFIG_INTEL_IOMMU
2987 if (INTEL_GEN(dev_priv) >= 6 && intel_iommu_gfx_mapped) 3005 if (intel_iommu_gfx_mapped)
2988 return true; 3006 return true;
2989#endif 3007#endif
2990 return false; 3008 return false;
2991} 3009}
2992 3010
3011static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
3012{
3013 return INTEL_GEN(dev_priv) >= 6 && intel_vtd_active();
3014}
3015
3016static inline bool
3017intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
3018{
3019 return IS_BROXTON(dev_priv) && intel_vtd_active();
3020}
3021
2993int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 3022int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
2994 int enable_ppgtt); 3023 int enable_ppgtt);
2995 3024
@@ -3026,7 +3055,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
3026extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); 3055extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
3027int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); 3056int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
3028 3057
3029int intel_engines_init_early(struct drm_i915_private *dev_priv); 3058int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
3030int intel_engines_init(struct drm_i915_private *dev_priv); 3059int intel_engines_init(struct drm_i915_private *dev_priv);
3031 3060
3032/* intel_hotplug.c */ 3061/* intel_hotplug.c */
@@ -3063,43 +3092,10 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
3063 const char *fmt, ...); 3092 const char *fmt, ...);
3064 3093
3065extern void intel_irq_init(struct drm_i915_private *dev_priv); 3094extern void intel_irq_init(struct drm_i915_private *dev_priv);
3095extern void intel_irq_fini(struct drm_i915_private *dev_priv);
3066int intel_irq_install(struct drm_i915_private *dev_priv); 3096int intel_irq_install(struct drm_i915_private *dev_priv);
3067void intel_irq_uninstall(struct drm_i915_private *dev_priv); 3097void intel_irq_uninstall(struct drm_i915_private *dev_priv);
3068 3098
3069extern void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
3070extern void intel_uncore_init(struct drm_i915_private *dev_priv);
3071extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
3072extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
3073extern void intel_uncore_fini(struct drm_i915_private *dev_priv);
3074extern void intel_uncore_suspend(struct drm_i915_private *dev_priv);
3075extern void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
3076const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
3077void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
3078 enum forcewake_domains domains);
3079void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
3080 enum forcewake_domains domains);
3081/* Like above but the caller must manage the uncore.lock itself.
3082 * Must be used with I915_READ_FW and friends.
3083 */
3084void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
3085 enum forcewake_domains domains);
3086void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
3087 enum forcewake_domains domains);
3088u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
3089
3090void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
3091
3092int intel_wait_for_register(struct drm_i915_private *dev_priv,
3093 i915_reg_t reg,
3094 const u32 mask,
3095 const u32 value,
3096 const unsigned long timeout_ms);
3097int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
3098 i915_reg_t reg,
3099 const u32 mask,
3100 const u32 value,
3101 const unsigned long timeout_ms);
3102
3103static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) 3099static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
3104{ 3100{
3105 return dev_priv->gvt; 3101 return dev_priv->gvt;
@@ -3447,8 +3443,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
3447#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX 3443#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
3448 3444
3449int __must_check 3445int __must_check
3450i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, 3446i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
3451 bool write); 3447int __must_check
3448i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
3452int __must_check 3449int __must_check
3453i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); 3450i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
3454struct i915_vma * __must_check 3451struct i915_vma * __must_check
@@ -3711,8 +3708,8 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv);
3711void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv); 3708void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
3712void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv); 3709void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
3713void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, 3710void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
3714 void *eld, int port, int pipe, int tmds_clk_speed, 3711 enum pipe pipe, enum port port,
3715 bool dp_output, int link_rate); 3712 const void *eld, int ls_clock, bool dp_output);
3716 3713
3717/* intel_i2c.c */ 3714/* intel_i2c.c */
3718extern int intel_setup_gmbus(struct drm_i915_private *dev_priv); 3715extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0e07f35e270c..7ab47a84671f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,8 +46,6 @@
46#include <linux/dma-buf.h> 46#include <linux/dma-buf.h>
47 47
48static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 48static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
49static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
50static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
51 49
52static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 50static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
53{ 51{
@@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
705 args->size, &args->handle); 703 args->size, &args->handle);
706} 704}
707 705
706static inline enum fb_op_origin
707fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
708{
709 return (domain == I915_GEM_DOMAIN_GTT ?
710 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
711}
712
713static void
714flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
715{
716 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
717
718 if (!(obj->base.write_domain & flush_domains))
719 return;
720
721 /* No actual flushing is required for the GTT write domain. Writes
722 * to it "immediately" go to main memory as far as we know, so there's
723 * no chipset flush. It also doesn't land in render cache.
724 *
725 * However, we do have to enforce the order so that all writes through
726 * the GTT land before any writes to the device, such as updates to
727 * the GATT itself.
728 *
729 * We also have to wait a bit for the writes to land from the GTT.
730 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
731 * timing. This issue has only been observed when switching quickly
732 * between GTT writes and CPU reads from inside the kernel on recent hw,
733 * and it appears to only affect discrete GTT blocks (i.e. on LLC
734 * system agents we cannot reproduce this behaviour).
735 */
736 wmb();
737
738 switch (obj->base.write_domain) {
739 case I915_GEM_DOMAIN_GTT:
740 if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
741 if (intel_runtime_pm_get_if_in_use(dev_priv)) {
742 spin_lock_irq(&dev_priv->uncore.lock);
743 POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
744 spin_unlock_irq(&dev_priv->uncore.lock);
745 intel_runtime_pm_put(dev_priv);
746 }
747 }
748
749 intel_fb_obj_flush(obj,
750 fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
751 break;
752
753 case I915_GEM_DOMAIN_CPU:
754 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
755 break;
756 }
757
758 obj->base.write_domain = 0;
759}
760
708static inline int 761static inline int
709__copy_to_user_swizzled(char __user *cpu_vaddr, 762__copy_to_user_swizzled(char __user *cpu_vaddr,
710 const char *gpu_vaddr, int gpu_offset, 763 const char *gpu_vaddr, int gpu_offset,
@@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
794 goto out; 847 goto out;
795 } 848 }
796 849
797 i915_gem_object_flush_gtt_write_domain(obj); 850 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
798 851
799 /* If we're not in the cpu read domain, set ourself into the gtt 852 /* If we're not in the cpu read domain, set ourself into the gtt
800 * read domain and manually flush cachelines (if required). This 853 * read domain and manually flush cachelines (if required). This
@@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
846 goto out; 899 goto out;
847 } 900 }
848 901
849 i915_gem_object_flush_gtt_write_domain(obj); 902 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
850 903
851 /* If we're not in the cpu write domain, set ourself into the 904 /* If we're not in the cpu write domain, set ourself into the
852 * gtt write domain and manually flush cachelines (as required). 905 * gtt write domain and manually flush cachelines (as required).
@@ -1501,13 +1554,6 @@ err:
1501 return ret; 1554 return ret;
1502} 1555}
1503 1556
1504static inline enum fb_op_origin
1505write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1506{
1507 return (domain == I915_GEM_DOMAIN_GTT ?
1508 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1509}
1510
1511static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1557static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1512{ 1558{
1513 struct drm_i915_private *i915; 1559 struct drm_i915_private *i915;
@@ -1591,10 +1637,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1591 if (err) 1637 if (err)
1592 goto out_unpin; 1638 goto out_unpin;
1593 1639
1594 if (read_domains & I915_GEM_DOMAIN_GTT) 1640 if (read_domains & I915_GEM_DOMAIN_WC)
1595 err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1641 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1642 else if (read_domains & I915_GEM_DOMAIN_GTT)
1643 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1596 else 1644 else
1597 err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1645 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1598 1646
1599 /* And bump the LRU for this access */ 1647 /* And bump the LRU for this access */
1600 i915_gem_object_bump_inactive_ggtt(obj); 1648 i915_gem_object_bump_inactive_ggtt(obj);
@@ -1602,7 +1650,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1602 mutex_unlock(&dev->struct_mutex); 1650 mutex_unlock(&dev->struct_mutex);
1603 1651
1604 if (write_domain != 0) 1652 if (write_domain != 0)
1605 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); 1653 intel_fb_obj_invalidate(obj,
1654 fb_write_origin(obj, write_domain));
1606 1655
1607out_unpin: 1656out_unpin:
1608 i915_gem_object_unpin_pages(obj); 1657 i915_gem_object_unpin_pages(obj);
@@ -1737,6 +1786,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1737 * into userspace. (This view is aligned and sized appropriately for 1786 * into userspace. (This view is aligned and sized appropriately for
1738 * fenced access.) 1787 * fenced access.)
1739 * 1788 *
1789 * 2 - Recognise WC as a separate cache domain so that we can flush the
1790 * delayed writes via GTT before performing direct access via WC.
1791 *
1740 * Restrictions: 1792 * Restrictions:
1741 * 1793 *
1742 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1794 * * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1764,7 +1816,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1764 */ 1816 */
1765int i915_gem_mmap_gtt_version(void) 1817int i915_gem_mmap_gtt_version(void)
1766{ 1818{
1767 return 1; 1819 return 2;
1768} 1820}
1769 1821
1770static inline struct i915_ggtt_view 1822static inline struct i915_ggtt_view
@@ -2228,7 +2280,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2228 if (obj->mm.mapping) { 2280 if (obj->mm.mapping) {
2229 void *ptr; 2281 void *ptr;
2230 2282
2231 ptr = ptr_mask_bits(obj->mm.mapping); 2283 ptr = page_mask_bits(obj->mm.mapping);
2232 if (is_vmalloc_addr(ptr)) 2284 if (is_vmalloc_addr(ptr))
2233 vunmap(ptr); 2285 vunmap(ptr);
2234 else 2286 else
@@ -2560,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2560 } 2612 }
2561 GEM_BUG_ON(!obj->mm.pages); 2613 GEM_BUG_ON(!obj->mm.pages);
2562 2614
2563 ptr = ptr_unpack_bits(obj->mm.mapping, has_type); 2615 ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2564 if (ptr && has_type != type) { 2616 if (ptr && has_type != type) {
2565 if (pinned) { 2617 if (pinned) {
2566 ret = -EBUSY; 2618 ret = -EBUSY;
@@ -2582,7 +2634,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2582 goto err_unpin; 2634 goto err_unpin;
2583 } 2635 }
2584 2636
2585 obj->mm.mapping = ptr_pack_bits(ptr, type); 2637 obj->mm.mapping = page_pack_bits(ptr, type);
2586 } 2638 }
2587 2639
2588out_unlock: 2640out_unlock:
@@ -2967,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
2967 */ 3019 */
2968 3020
2969 if (i915.enable_execlists) { 3021 if (i915.enable_execlists) {
3022 struct execlist_port *port = engine->execlist_port;
2970 unsigned long flags; 3023 unsigned long flags;
3024 unsigned int n;
2971 3025
2972 spin_lock_irqsave(&engine->timeline->lock, flags); 3026 spin_lock_irqsave(&engine->timeline->lock, flags);
2973 3027
2974 i915_gem_request_put(engine->execlist_port[0].request); 3028 for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
2975 i915_gem_request_put(engine->execlist_port[1].request); 3029 i915_gem_request_put(port_request(&port[n]));
2976 memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); 3030 memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2977 engine->execlist_queue = RB_ROOT; 3031 engine->execlist_queue = RB_ROOT;
2978 engine->execlist_first = NULL; 3032 engine->execlist_first = NULL;
@@ -3101,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
3101 struct drm_i915_private *dev_priv = 3155 struct drm_i915_private *dev_priv =
3102 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3156 container_of(work, typeof(*dev_priv), gt.idle_work.work);
3103 struct drm_device *dev = &dev_priv->drm; 3157 struct drm_device *dev = &dev_priv->drm;
3104 struct intel_engine_cs *engine;
3105 enum intel_engine_id id;
3106 bool rearm_hangcheck; 3158 bool rearm_hangcheck;
3107 3159
3108 if (!READ_ONCE(dev_priv->gt.awake)) 3160 if (!READ_ONCE(dev_priv->gt.awake))
@@ -3140,10 +3192,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
3140 if (wait_for(intel_engines_are_idle(dev_priv), 10)) 3192 if (wait_for(intel_engines_are_idle(dev_priv), 10))
3141 DRM_ERROR("Timeout waiting for engines to idle\n"); 3193 DRM_ERROR("Timeout waiting for engines to idle\n");
3142 3194
3143 for_each_engine(engine, dev_priv, id) { 3195 intel_engines_mark_idle(dev_priv);
3144 intel_engine_disarm_breadcrumbs(engine); 3196 i915_gem_timelines_mark_idle(dev_priv);
3145 i915_gem_batch_pool_fini(&engine->batch_pool);
3146 }
3147 3197
3148 GEM_BUG_ON(!dev_priv->gt.awake); 3198 GEM_BUG_ON(!dev_priv->gt.awake);
3149 dev_priv->gt.awake = false; 3199 dev_priv->gt.awake = false;
@@ -3320,56 +3370,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
3320 return ret; 3370 return ret;
3321} 3371}
3322 3372
3323/** Flushes the GTT write domain for the object if it's dirty. */
3324static void
3325i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3326{
3327 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3328
3329 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3330 return;
3331
3332 /* No actual flushing is required for the GTT write domain. Writes
3333 * to it "immediately" go to main memory as far as we know, so there's
3334 * no chipset flush. It also doesn't land in render cache.
3335 *
3336 * However, we do have to enforce the order so that all writes through
3337 * the GTT land before any writes to the device, such as updates to
3338 * the GATT itself.
3339 *
3340 * We also have to wait a bit for the writes to land from the GTT.
3341 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3342 * timing. This issue has only been observed when switching quickly
3343 * between GTT writes and CPU reads from inside the kernel on recent hw,
3344 * and it appears to only affect discrete GTT blocks (i.e. on LLC
3345 * system agents we cannot reproduce this behaviour).
3346 */
3347 wmb();
3348 if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
3349 if (intel_runtime_pm_get_if_in_use(dev_priv)) {
3350 spin_lock_irq(&dev_priv->uncore.lock);
3351 POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3352 spin_unlock_irq(&dev_priv->uncore.lock);
3353 intel_runtime_pm_put(dev_priv);
3354 }
3355 }
3356
3357 intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
3358
3359 obj->base.write_domain = 0;
3360}
3361
3362/** Flushes the CPU write domain for the object if it's dirty. */
3363static void
3364i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3365{
3366 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3367 return;
3368
3369 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3370 obj->base.write_domain = 0;
3371}
3372
3373static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3373static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3374{ 3374{
3375 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) 3375 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
@@ -3390,6 +3390,69 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3390} 3390}
3391 3391
3392/** 3392/**
3393 * Moves a single object to the WC read, and possibly write domain.
3394 * @obj: object to act on
3395 * @write: ask for write access or read only
3396 *
3397 * This function returns when the move is complete, including waiting on
3398 * flushes to occur.
3399 */
3400int
3401i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3402{
3403 int ret;
3404
3405 lockdep_assert_held(&obj->base.dev->struct_mutex);
3406
3407 ret = i915_gem_object_wait(obj,
3408 I915_WAIT_INTERRUPTIBLE |
3409 I915_WAIT_LOCKED |
3410 (write ? I915_WAIT_ALL : 0),
3411 MAX_SCHEDULE_TIMEOUT,
3412 NULL);
3413 if (ret)
3414 return ret;
3415
3416 if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
3417 return 0;
3418
3419 /* Flush and acquire obj->pages so that we are coherent through
3420 * direct access in memory with previous cached writes through
3421 * shmemfs and that our cache domain tracking remains valid.
3422 * For example, if the obj->filp was moved to swap without us
3423 * being notified and releasing the pages, we would mistakenly
3424 * continue to assume that the obj remained out of the CPU cached
3425 * domain.
3426 */
3427 ret = i915_gem_object_pin_pages(obj);
3428 if (ret)
3429 return ret;
3430
3431 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3432
3433 /* Serialise direct access to this object with the barriers for
3434 * coherent writes from the GPU, by effectively invalidating the
3435 * WC domain upon first access.
3436 */
3437 if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
3438 mb();
3439
3440 /* It should now be out of any other write domains, and we can update
3441 * the domain values for our changes.
3442 */
3443 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3444 obj->base.read_domains |= I915_GEM_DOMAIN_WC;
3445 if (write) {
3446 obj->base.read_domains = I915_GEM_DOMAIN_WC;
3447 obj->base.write_domain = I915_GEM_DOMAIN_WC;
3448 obj->mm.dirty = true;
3449 }
3450
3451 i915_gem_object_unpin_pages(obj);
3452 return 0;
3453}
3454
3455/**
3393 * Moves a single object to the GTT read, and possibly write domain. 3456 * Moves a single object to the GTT read, and possibly write domain.
3394 * @obj: object to act on 3457 * @obj: object to act on
3395 * @write: ask for write access or read only 3458 * @write: ask for write access or read only
@@ -3428,7 +3491,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3428 if (ret) 3491 if (ret)
3429 return ret; 3492 return ret;
3430 3493
3431 i915_gem_object_flush_cpu_write_domain(obj); 3494 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3432 3495
3433 /* Serialise direct access to this object with the barriers for 3496 /* Serialise direct access to this object with the barriers for
3434 * coherent writes from the GPU, by effectively invalidating the 3497 * coherent writes from the GPU, by effectively invalidating the
@@ -3802,7 +3865,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3802 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3865 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3803 return 0; 3866 return 0;
3804 3867
3805 i915_gem_object_flush_gtt_write_domain(obj); 3868 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3806 3869
3807 /* Flush the CPU cache if it's still invalid. */ 3870 /* Flush the CPU cache if it's still invalid. */
3808 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3871 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -3996,7 +4059,7 @@ __busy_set_if_active(const struct dma_fence *fence,
3996 if (i915_gem_request_completed(rq)) 4059 if (i915_gem_request_completed(rq))
3997 return 0; 4060 return 0;
3998 4061
3999 return flag(rq->engine->exec_id); 4062 return flag(rq->engine->uabi_id);
4000} 4063}
4001 4064
4002static __always_inline unsigned int 4065static __always_inline unsigned int
@@ -4195,7 +4258,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4195 * catch if we ever need to fix it. In the meantime, if you do spot 4258 * catch if we ever need to fix it. In the meantime, if you do spot
4196 * such a local variable, please consider fixing! 4259 * such a local variable, please consider fixing!
4197 */ 4260 */
4198 if (WARN_ON(size >> PAGE_SHIFT > INT_MAX)) 4261 if (size >> PAGE_SHIFT > INT_MAX)
4199 return ERR_PTR(-E2BIG); 4262 return ERR_PTR(-E2BIG);
4200 4263
4201 if (overflows_type(size, obj->base.size)) 4264 if (overflows_type(size, obj->base.size))
@@ -4302,6 +4365,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
4302 intel_runtime_pm_put(i915); 4365 intel_runtime_pm_put(i915);
4303 mutex_unlock(&i915->drm.struct_mutex); 4366 mutex_unlock(&i915->drm.struct_mutex);
4304 4367
4368 cond_resched();
4369
4305 llist_for_each_entry_safe(obj, on, freed, freed) { 4370 llist_for_each_entry_safe(obj, on, freed, freed) {
4306 GEM_BUG_ON(obj->bind_count); 4371 GEM_BUG_ON(obj->bind_count);
4307 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4372 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
@@ -4349,8 +4414,11 @@ static void __i915_gem_free_work(struct work_struct *work)
4349 * unbound now. 4414 * unbound now.
4350 */ 4415 */
4351 4416
4352 while ((freed = llist_del_all(&i915->mm.free_list))) 4417 while ((freed = llist_del_all(&i915->mm.free_list))) {
4353 __i915_gem_free_objects(i915, freed); 4418 __i915_gem_free_objects(i915, freed);
4419 if (need_resched())
4420 break;
4421 }
4354} 4422}
4355 4423
4356static void __i915_gem_free_object_rcu(struct rcu_head *head) 4424static void __i915_gem_free_object_rcu(struct rcu_head *head)
@@ -4415,10 +4483,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
4415 * try to take over. The only way to remove the earlier state 4483 * try to take over. The only way to remove the earlier state
4416 * is by resetting. However, resetting on earlier gen is tricky as 4484 * is by resetting. However, resetting on earlier gen is tricky as
4417 * it may impact the display and we are uncertain about the stability 4485 * it may impact the display and we are uncertain about the stability
4418 * of the reset, so we only reset recent machines with logical 4486 * of the reset, so this could be applied to even earlier gen.
4419 * context support (that must be reset to remove any stray contexts).
4420 */ 4487 */
4421 if (HAS_HW_CONTEXTS(i915)) { 4488 if (INTEL_GEN(i915) >= 5) {
4422 int reset = intel_gpu_reset(i915, ALL_ENGINES); 4489 int reset = intel_gpu_reset(i915, ALL_ENGINES);
4423 WARN_ON(reset && reset != -ENODEV); 4490 WARN_ON(reset && reset != -ENODEV);
4424 } 4491 }
@@ -4661,11 +4728,9 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4661 if (value >= 0) 4728 if (value >= 0)
4662 return value; 4729 return value;
4663 4730
4664#ifdef CONFIG_INTEL_IOMMU
4665 /* Enable semaphores on SNB when IO remapping is off */ 4731 /* Enable semaphores on SNB when IO remapping is off */
4666 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) 4732 if (IS_GEN6(dev_priv) && intel_vtd_active())
4667 return false; 4733 return false;
4668#endif
4669 4734
4670 return true; 4735 return true;
4671} 4736}
@@ -4676,7 +4741,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
4676 4741
4677 mutex_lock(&dev_priv->drm.struct_mutex); 4742 mutex_lock(&dev_priv->drm.struct_mutex);
4678 4743
4679 i915_gem_clflush_init(dev_priv); 4744 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
4680 4745
4681 if (!i915.enable_execlists) { 4746 if (!i915.enable_execlists) {
4682 dev_priv->gt.resume = intel_legacy_submission_resume; 4747 dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4799,12 +4864,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
4799 if (!dev_priv->dependencies) 4864 if (!dev_priv->dependencies)
4800 goto err_requests; 4865 goto err_requests;
4801 4866
4867 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
4868 if (!dev_priv->priorities)
4869 goto err_dependencies;
4870
4802 mutex_lock(&dev_priv->drm.struct_mutex); 4871 mutex_lock(&dev_priv->drm.struct_mutex);
4803 INIT_LIST_HEAD(&dev_priv->gt.timelines); 4872 INIT_LIST_HEAD(&dev_priv->gt.timelines);
4804 err = i915_gem_timeline_init__global(dev_priv); 4873 err = i915_gem_timeline_init__global(dev_priv);
4805 mutex_unlock(&dev_priv->drm.struct_mutex); 4874 mutex_unlock(&dev_priv->drm.struct_mutex);
4806 if (err) 4875 if (err)
4807 goto err_dependencies; 4876 goto err_priorities;
4808 4877
4809 INIT_LIST_HEAD(&dev_priv->context_list); 4878 INIT_LIST_HEAD(&dev_priv->context_list);
4810 INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); 4879 INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4822,14 +4891,14 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
4822 4891
4823 init_waitqueue_head(&dev_priv->pending_flip_queue); 4892 init_waitqueue_head(&dev_priv->pending_flip_queue);
4824 4893
4825 dev_priv->mm.interruptible = true;
4826
4827 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 4894 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4828 4895
4829 spin_lock_init(&dev_priv->fb_tracking.lock); 4896 spin_lock_init(&dev_priv->fb_tracking.lock);
4830 4897
4831 return 0; 4898 return 0;
4832 4899
4900err_priorities:
4901 kmem_cache_destroy(dev_priv->priorities);
4833err_dependencies: 4902err_dependencies:
4834 kmem_cache_destroy(dev_priv->dependencies); 4903 kmem_cache_destroy(dev_priv->dependencies);
4835err_requests: 4904err_requests:
@@ -4853,6 +4922,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
4853 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 4922 WARN_ON(!list_empty(&dev_priv->gt.timelines));
4854 mutex_unlock(&dev_priv->drm.struct_mutex); 4923 mutex_unlock(&dev_priv->drm.struct_mutex);
4855 4924
4925 kmem_cache_destroy(dev_priv->priorities);
4856 kmem_cache_destroy(dev_priv->dependencies); 4926 kmem_cache_destroy(dev_priv->dependencies);
4857 kmem_cache_destroy(dev_priv->requests); 4927 kmem_cache_destroy(dev_priv->requests);
4858 kmem_cache_destroy(dev_priv->vmas); 4928 kmem_cache_destroy(dev_priv->vmas);
@@ -4864,9 +4934,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
4864 4934
4865int i915_gem_freeze(struct drm_i915_private *dev_priv) 4935int i915_gem_freeze(struct drm_i915_private *dev_priv)
4866{ 4936{
4867 mutex_lock(&dev_priv->drm.struct_mutex); 4937 /* Discard all purgeable objects, let userspace recover those as
4938 * required after resuming.
4939 */
4868 i915_gem_shrink_all(dev_priv); 4940 i915_gem_shrink_all(dev_priv);
4869 mutex_unlock(&dev_priv->drm.struct_mutex);
4870 4941
4871 return 0; 4942 return 0;
4872} 4943}
@@ -4891,12 +4962,13 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4891 * we update that state just before writing out the image. 4962 * we update that state just before writing out the image.
4892 * 4963 *
4893 * To try and reduce the hibernation image, we manually shrink 4964 * To try and reduce the hibernation image, we manually shrink
4894 * the objects as well. 4965 * the objects as well, see i915_gem_freeze()
4895 */ 4966 */
4896 4967
4897 mutex_lock(&dev_priv->drm.struct_mutex);
4898 i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); 4968 i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4969 i915_gem_drain_freed_objects(dev_priv);
4899 4970
4971 mutex_lock(&dev_priv->drm.struct_mutex);
4900 for (p = phases; *p; p++) { 4972 for (p = phases; *p; p++) {
4901 list_for_each_entry(obj, *p, global_link) { 4973 list_for_each_entry(obj, *p, global_link) {
4902 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4974 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 5a49487368ca..ee54597465b6 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -25,6 +25,8 @@
25#ifndef __I915_GEM_H__ 25#ifndef __I915_GEM_H__
26#define __I915_GEM_H__ 26#define __I915_GEM_H__
27 27
28#include <linux/bug.h>
29
28#ifdef CONFIG_DRM_I915_DEBUG_GEM 30#ifdef CONFIG_DRM_I915_DEBUG_GEM
29#define GEM_BUG_ON(expr) BUG_ON(expr) 31#define GEM_BUG_ON(expr) BUG_ON(expr)
30#define GEM_WARN_ON(expr) WARN_ON(expr) 32#define GEM_WARN_ON(expr) WARN_ON(expr)
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index ffd01e02fe94..ffac7a1f0caf 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -27,7 +27,6 @@
27#include "i915_gem_clflush.h" 27#include "i915_gem_clflush.h"
28 28
29static DEFINE_SPINLOCK(clflush_lock); 29static DEFINE_SPINLOCK(clflush_lock);
30static u64 clflush_context;
31 30
32struct clflush { 31struct clflush {
33 struct dma_fence dma; /* Must be first for dma_fence_free() */ 32 struct dma_fence dma; /* Must be first for dma_fence_free() */
@@ -157,7 +156,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
157 dma_fence_init(&clflush->dma, 156 dma_fence_init(&clflush->dma,
158 &i915_clflush_ops, 157 &i915_clflush_ops,
159 &clflush_lock, 158 &clflush_lock,
160 clflush_context, 159 to_i915(obj->base.dev)->mm.unordered_timeline,
161 0); 160 0);
162 i915_sw_fence_init(&clflush->wait, i915_clflush_notify); 161 i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
163 162
@@ -182,8 +181,3 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
182 GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); 181 GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
183 } 182 }
184} 183}
185
186void i915_gem_clflush_init(struct drm_i915_private *i915)
187{
188 clflush_context = dma_fence_context_alloc(1);
189}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
index b62d61a2d15f..2455a7820937 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -28,7 +28,6 @@
28struct drm_i915_private; 28struct drm_i915_private;
29struct drm_i915_gem_object; 29struct drm_i915_gem_object;
30 30
31void i915_gem_clflush_init(struct drm_i915_private *i915);
32void i915_gem_clflush_object(struct drm_i915_gem_object *obj, 31void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
33 unsigned int flags); 32 unsigned int flags);
34#define I915_CLFLUSH_FORCE BIT(0) 33#define I915_CLFLUSH_FORCE BIT(0)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8bd0c4966913..c5d1666d7071 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -92,33 +92,6 @@
92 92
93#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 93#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
94 94
95static int get_context_size(struct drm_i915_private *dev_priv)
96{
97 int ret;
98 u32 reg;
99
100 switch (INTEL_GEN(dev_priv)) {
101 case 6:
102 reg = I915_READ(CXT_SIZE);
103 ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
104 break;
105 case 7:
106 reg = I915_READ(GEN7_CXT_SIZE);
107 if (IS_HASWELL(dev_priv))
108 ret = HSW_CXT_TOTAL_SIZE;
109 else
110 ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
111 break;
112 case 8:
113 ret = GEN8_CXT_TOTAL_SIZE;
114 break;
115 default:
116 BUG();
117 }
118
119 return ret;
120}
121
122void i915_gem_context_free(struct kref *ctx_ref) 95void i915_gem_context_free(struct kref *ctx_ref)
123{ 96{
124 struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); 97 struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -151,45 +124,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
151 kfree(ctx); 124 kfree(ctx);
152} 125}
153 126
154static struct drm_i915_gem_object *
155alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
156{
157 struct drm_i915_gem_object *obj;
158 int ret;
159
160 lockdep_assert_held(&dev_priv->drm.struct_mutex);
161
162 obj = i915_gem_object_create(dev_priv, size);
163 if (IS_ERR(obj))
164 return obj;
165
166 /*
167 * Try to make the context utilize L3 as well as LLC.
168 *
169 * On VLV we don't have L3 controls in the PTEs so we
170 * shouldn't touch the cache level, especially as that
171 * would make the object snooped which might have a
172 * negative performance impact.
173 *
174 * Snooping is required on non-llc platforms in execlist
175 * mode, but since all GGTT accesses use PAT entry 0 we
176 * get snooping anyway regardless of cache_level.
177 *
178 * This is only applicable for Ivy Bridge devices since
179 * later platforms don't have L3 control bits in the PTE.
180 */
181 if (IS_IVYBRIDGE(dev_priv)) {
182 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
183 /* Failure shouldn't ever happen this early */
184 if (WARN_ON(ret)) {
185 i915_gem_object_put(obj);
186 return ERR_PTR(ret);
187 }
188 }
189
190 return obj;
191}
192
193static void context_close(struct i915_gem_context *ctx) 127static void context_close(struct i915_gem_context *ctx)
194{ 128{
195 i915_gem_context_set_closed(ctx); 129 i915_gem_context_set_closed(ctx);
@@ -265,26 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
265 kref_init(&ctx->ref); 199 kref_init(&ctx->ref);
266 list_add_tail(&ctx->link, &dev_priv->context_list); 200 list_add_tail(&ctx->link, &dev_priv->context_list);
267 ctx->i915 = dev_priv; 201 ctx->i915 = dev_priv;
268 202 ctx->priority = I915_PRIORITY_NORMAL;
269 if (dev_priv->hw_context_size) {
270 struct drm_i915_gem_object *obj;
271 struct i915_vma *vma;
272
273 obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
274 if (IS_ERR(obj)) {
275 ret = PTR_ERR(obj);
276 goto err_out;
277 }
278
279 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
280 if (IS_ERR(vma)) {
281 i915_gem_object_put(obj);
282 ret = PTR_ERR(vma);
283 goto err_out;
284 }
285
286 ctx->engine[RCS].state = vma;
287 }
288 203
289 /* Default context will never have a file_priv */ 204 /* Default context will never have a file_priv */
290 ret = DEFAULT_CONTEXT_HANDLE; 205 ret = DEFAULT_CONTEXT_HANDLE;
@@ -443,21 +358,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
443 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); 358 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
444 ida_init(&dev_priv->context_hw_ida); 359 ida_init(&dev_priv->context_hw_ida);
445 360
446 if (i915.enable_execlists) {
447 /* NB: intentionally left blank. We will allocate our own
448 * backing objects as we need them, thank you very much */
449 dev_priv->hw_context_size = 0;
450 } else if (HAS_HW_CONTEXTS(dev_priv)) {
451 dev_priv->hw_context_size =
452 round_up(get_context_size(dev_priv),
453 I915_GTT_PAGE_SIZE);
454 if (dev_priv->hw_context_size > (1<<20)) {
455 DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
456 dev_priv->hw_context_size);
457 dev_priv->hw_context_size = 0;
458 }
459 }
460
461 ctx = i915_gem_create_context(dev_priv, NULL); 361 ctx = i915_gem_create_context(dev_priv, NULL);
462 if (IS_ERR(ctx)) { 362 if (IS_ERR(ctx)) {
463 DRM_ERROR("Failed to create default global context (error %ld)\n", 363 DRM_ERROR("Failed to create default global context (error %ld)\n",
@@ -477,8 +377,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
477 GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); 377 GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
478 378
479 DRM_DEBUG_DRIVER("%s context support initialized\n", 379 DRM_DEBUG_DRIVER("%s context support initialized\n",
480 i915.enable_execlists ? "LR" : 380 dev_priv->engine[RCS]->context_size ? "logical" :
481 dev_priv->hw_context_size ? "HW" : "fake"); 381 "fake");
482 return 0; 382 return 0;
483} 383}
484 384
@@ -941,11 +841,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
941 return 0; 841 return 0;
942} 842}
943 843
944static bool contexts_enabled(struct drm_device *dev)
945{
946 return i915.enable_execlists || to_i915(dev)->hw_context_size;
947}
948
949static bool client_is_banned(struct drm_i915_file_private *file_priv) 844static bool client_is_banned(struct drm_i915_file_private *file_priv)
950{ 845{
951 return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS; 846 return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
@@ -954,12 +849,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
954int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, 849int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
955 struct drm_file *file) 850 struct drm_file *file)
956{ 851{
852 struct drm_i915_private *dev_priv = to_i915(dev);
957 struct drm_i915_gem_context_create *args = data; 853 struct drm_i915_gem_context_create *args = data;
958 struct drm_i915_file_private *file_priv = file->driver_priv; 854 struct drm_i915_file_private *file_priv = file->driver_priv;
959 struct i915_gem_context *ctx; 855 struct i915_gem_context *ctx;
960 int ret; 856 int ret;
961 857
962 if (!contexts_enabled(dev)) 858 if (!dev_priv->engine[RCS]->context_size)
963 return -ENODEV; 859 return -ENODEV;
964 860
965 if (args->pad != 0) 861 if (args->pad != 0)
@@ -977,7 +873,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
977 if (ret) 873 if (ret)
978 return ret; 874 return ret;
979 875
980 ctx = i915_gem_create_context(to_i915(dev), file_priv); 876 ctx = i915_gem_create_context(dev_priv, file_priv);
981 mutex_unlock(&dev->struct_mutex); 877 mutex_unlock(&dev->struct_mutex);
982 if (IS_ERR(ctx)) 878 if (IS_ERR(ctx))
983 return PTR_ERR(ctx); 879 return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index f225bf680b6d..6176e589cf09 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -122,12 +122,36 @@ static void i915_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long
122} 122}
123static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) 123static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
124{ 124{
125 struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
126 struct page *page;
127
128 if (page_num >= obj->base.size >> PAGE_SHIFT)
129 return NULL;
130
131 if (!i915_gem_object_has_struct_page(obj))
132 return NULL;
133
134 if (i915_gem_object_pin_pages(obj))
135 return NULL;
136
137 /* Synchronisation is left to the caller (via .begin_cpu_access()) */
138 page = i915_gem_object_get_page(obj, page_num);
139 if (IS_ERR(page))
140 goto err_unpin;
141
142 return kmap(page);
143
144err_unpin:
145 i915_gem_object_unpin_pages(obj);
125 return NULL; 146 return NULL;
126} 147}
127 148
128static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) 149static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
129{ 150{
151 struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
130 152
153 kunmap(virt_to_page(addr));
154 i915_gem_object_unpin_pages(obj);
131} 155}
132 156
133static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) 157static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4ee2dc38b7c9..04211c970b9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
1114 list_for_each_entry(vma, vmas, exec_list) { 1114 list_for_each_entry(vma, vmas, exec_list) {
1115 struct drm_i915_gem_object *obj = vma->obj; 1115 struct drm_i915_gem_object *obj = vma->obj;
1116 1116
1117 if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
1118 struct i915_gem_capture_list *capture;
1119
1120 capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1121 if (unlikely(!capture))
1122 return -ENOMEM;
1123
1124 capture->next = req->capture_list;
1125 capture->vma = vma;
1126 req->capture_list = capture;
1127 }
1128
1117 if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) 1129 if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
1118 continue; 1130 continue;
1119 1131
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7e3193aa7da1..0c1008a2bbda 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -168,13 +168,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
168 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 168 if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
169 return 3; 169 return 3;
170 170
171#ifdef CONFIG_INTEL_IOMMU
172 /* Disable ppgtt on SNB if VT-d is on. */ 171 /* Disable ppgtt on SNB if VT-d is on. */
173 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 172 if (IS_GEN6(dev_priv) && intel_vtd_active()) {
174 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 173 DRM_INFO("Disabling PPGTT because VT-d is on\n");
175 return 0; 174 return 0;
176 } 175 }
177#endif
178 176
179 /* Early VLV doesn't have this */ 177 /* Early VLV doesn't have this */
180 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 178 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
@@ -195,9 +193,12 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
195 u32 pte_flags; 193 u32 pte_flags;
196 int ret; 194 int ret;
197 195
198 ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); 196 if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
199 if (ret) 197 ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
200 return ret; 198 vma->size);
199 if (ret)
200 return ret;
201 }
201 202
202 vma->pages = vma->obj->mm.pages; 203 vma->pages = vma->obj->mm.pages;
203 204
@@ -1989,14 +1990,10 @@ void i915_ppgtt_release(struct kref *kref)
1989 */ 1990 */
1990static bool needs_idle_maps(struct drm_i915_private *dev_priv) 1991static bool needs_idle_maps(struct drm_i915_private *dev_priv)
1991{ 1992{
1992#ifdef CONFIG_INTEL_IOMMU
1993 /* Query intel_iommu to see if we need the workaround. Presumably that 1993 /* Query intel_iommu to see if we need the workaround. Presumably that
1994 * was loaded first. 1994 * was loaded first.
1995 */ 1995 */
1996 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 1996 return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
1997 return true;
1998#endif
1999 return false;
2000} 1997}
2001 1998
2002void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 1999void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
@@ -2188,6 +2185,101 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2188 gen8_set_pte(&gtt_base[i], scratch_pte); 2185 gen8_set_pte(&gtt_base[i], scratch_pte);
2189} 2186}
2190 2187
2188static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2189{
2190 struct drm_i915_private *dev_priv = vm->i915;
2191
2192 /*
2193 * Make sure the internal GAM fifo has been cleared of all GTT
2194 * writes before exiting stop_machine(). This guarantees that
2195 * any aperture accesses waiting to start in another process
2196 * cannot back up behind the GTT writes causing a hang.
2197 * The register can be any arbitrary GAM register.
2198 */
2199 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2200}
2201
2202struct insert_page {
2203 struct i915_address_space *vm;
2204 dma_addr_t addr;
2205 u64 offset;
2206 enum i915_cache_level level;
2207};
2208
2209static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2210{
2211 struct insert_page *arg = _arg;
2212
2213 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2214 bxt_vtd_ggtt_wa(arg->vm);
2215
2216 return 0;
2217}
2218
2219static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2220 dma_addr_t addr,
2221 u64 offset,
2222 enum i915_cache_level level,
2223 u32 unused)
2224{
2225 struct insert_page arg = { vm, addr, offset, level };
2226
2227 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2228}
2229
2230struct insert_entries {
2231 struct i915_address_space *vm;
2232 struct sg_table *st;
2233 u64 start;
2234 enum i915_cache_level level;
2235};
2236
2237static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2238{
2239 struct insert_entries *arg = _arg;
2240
2241 gen8_ggtt_insert_entries(arg->vm, arg->st, arg->start, arg->level, 0);
2242 bxt_vtd_ggtt_wa(arg->vm);
2243
2244 return 0;
2245}
2246
2247static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2248 struct sg_table *st,
2249 u64 start,
2250 enum i915_cache_level level,
2251 u32 unused)
2252{
2253 struct insert_entries arg = { vm, st, start, level };
2254
2255 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2256}
2257
2258struct clear_range {
2259 struct i915_address_space *vm;
2260 u64 start;
2261 u64 length;
2262};
2263
2264static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2265{
2266 struct clear_range *arg = _arg;
2267
2268 gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2269 bxt_vtd_ggtt_wa(arg->vm);
2270
2271 return 0;
2272}
2273
2274static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2275 u64 start,
2276 u64 length)
2277{
2278 struct clear_range arg = { vm, start, length };
2279
2280 stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2281}
2282
2191static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2283static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2192 u64 start, u64 length) 2284 u64 start, u64 length)
2193{ 2285{
@@ -2306,10 +2398,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2306 if (flags & I915_VMA_LOCAL_BIND) { 2398 if (flags & I915_VMA_LOCAL_BIND) {
2307 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2399 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2308 2400
2309 if (appgtt->base.allocate_va_range) { 2401 if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
2402 appgtt->base.allocate_va_range) {
2310 ret = appgtt->base.allocate_va_range(&appgtt->base, 2403 ret = appgtt->base.allocate_va_range(&appgtt->base,
2311 vma->node.start, 2404 vma->node.start,
2312 vma->node.size); 2405 vma->size);
2313 if (ret) 2406 if (ret)
2314 goto err_pages; 2407 goto err_pages;
2315 } 2408 }
@@ -2579,14 +2672,14 @@ static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2579{ 2672{
2580 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2673 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2581 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2674 snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2582 return snb_gmch_ctl << 25; /* 32 MB units */ 2675 return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
2583} 2676}
2584 2677
2585static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2678static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2586{ 2679{
2587 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2680 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2588 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2681 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2589 return bdw_gmch_ctl << 25; /* 32 MB units */ 2682 return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
2590} 2683}
2591 2684
2592static size_t chv_get_stolen_size(u16 gmch_ctrl) 2685static size_t chv_get_stolen_size(u16 gmch_ctrl)
@@ -2600,11 +2693,11 @@ static size_t chv_get_stolen_size(u16 gmch_ctrl)
2600 * 0x17 to 0x1d: 4MB increments start at 36MB 2693 * 0x17 to 0x1d: 4MB increments start at 36MB
2601 */ 2694 */
2602 if (gmch_ctrl < 0x11) 2695 if (gmch_ctrl < 0x11)
2603 return gmch_ctrl << 25; 2696 return (size_t)gmch_ctrl << 25;
2604 else if (gmch_ctrl < 0x17) 2697 else if (gmch_ctrl < 0x17)
2605 return (gmch_ctrl - 0x11 + 2) << 22; 2698 return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
2606 else 2699 else
2607 return (gmch_ctrl - 0x17 + 9) << 22; 2700 return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
2608} 2701}
2609 2702
2610static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2703static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
@@ -2613,10 +2706,10 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2613 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2706 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2614 2707
2615 if (gen9_gmch_ctl < 0xf0) 2708 if (gen9_gmch_ctl < 0xf0)
2616 return gen9_gmch_ctl << 25; /* 32 MB units */ 2709 return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
2617 else 2710 else
2618 /* 4MB increments starting at 0xf0 for 4MB */ 2711 /* 4MB increments starting at 0xf0 for 4MB */
2619 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2712 return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
2620} 2713}
2621 2714
2622static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2715static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
@@ -2743,13 +2836,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
2743 struct pci_dev *pdev = dev_priv->drm.pdev; 2836 struct pci_dev *pdev = dev_priv->drm.pdev;
2744 unsigned int size; 2837 unsigned int size;
2745 u16 snb_gmch_ctl; 2838 u16 snb_gmch_ctl;
2839 int err;
2746 2840
2747 /* TODO: We're not aware of mappable constraints on gen8 yet */ 2841 /* TODO: We're not aware of mappable constraints on gen8 yet */
2748 ggtt->mappable_base = pci_resource_start(pdev, 2); 2842 ggtt->mappable_base = pci_resource_start(pdev, 2);
2749 ggtt->mappable_end = pci_resource_len(pdev, 2); 2843 ggtt->mappable_end = pci_resource_len(pdev, 2);
2750 2844
2751 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 2845 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
2752 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 2846 if (!err)
2847 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
2848 if (err)
2849 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
2753 2850
2754 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2851 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2755 2852
@@ -2781,6 +2878,14 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
2781 2878
2782 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 2879 ggtt->base.insert_entries = gen8_ggtt_insert_entries;
2783 2880
2881 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
2882 if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
2883 ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
2884 ggtt->base.insert_page = bxt_vtd_ggtt_insert_page__BKL;
2885 if (ggtt->base.clear_range != nop_clear_range)
2886 ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
2887 }
2888
2784 ggtt->invalidate = gen6_ggtt_invalidate; 2889 ggtt->invalidate = gen6_ggtt_invalidate;
2785 2890
2786 return ggtt_probe_common(ggtt, size); 2891 return ggtt_probe_common(ggtt, size);
@@ -2792,6 +2897,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
2792 struct pci_dev *pdev = dev_priv->drm.pdev; 2897 struct pci_dev *pdev = dev_priv->drm.pdev;
2793 unsigned int size; 2898 unsigned int size;
2794 u16 snb_gmch_ctl; 2899 u16 snb_gmch_ctl;
2900 int err;
2795 2901
2796 ggtt->mappable_base = pci_resource_start(pdev, 2); 2902 ggtt->mappable_base = pci_resource_start(pdev, 2);
2797 ggtt->mappable_end = pci_resource_len(pdev, 2); 2903 ggtt->mappable_end = pci_resource_len(pdev, 2);
@@ -2804,8 +2910,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
2804 return -ENXIO; 2910 return -ENXIO;
2805 } 2911 }
2806 2912
2807 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 2913 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
2808 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 2914 if (!err)
2915 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
2916 if (err)
2917 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
2809 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2918 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2810 2919
2811 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 2920 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
@@ -2924,10 +3033,8 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
2924 ggtt->base.total >> 20); 3033 ggtt->base.total >> 20);
2925 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3034 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
2926 DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20); 3035 DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
2927#ifdef CONFIG_INTEL_IOMMU 3036 if (intel_vtd_active())
2928 if (intel_iommu_gfx_mapped)
2929 DRM_INFO("VT-d active for gfx access\n"); 3037 DRM_INFO("VT-d active for gfx access\n");
2930#endif
2931 3038
2932 return 0; 3039 return 0;
2933} 3040}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 174cf923c236..35e1a27729dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -37,8 +37,8 @@
37 37
38struct drm_i915_gem_object_ops { 38struct drm_i915_gem_object_ops {
39 unsigned int flags; 39 unsigned int flags;
40#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 40#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
41#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 41#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
42 42
43 /* Interface between the GEM object and its backing storage. 43 /* Interface between the GEM object and its backing storage.
44 * get_pages() is called once prior to the use of the associated set 44 * get_pages() is called once prior to the use of the associated set
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ddbc9499775..0d1e0d8873ef 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
61 if (i915_fence_signaled(fence)) 61 if (i915_fence_signaled(fence))
62 return false; 62 return false;
63 63
64 intel_engine_enable_signaling(to_request(fence)); 64 intel_engine_enable_signaling(to_request(fence), true);
65 return true; 65 return true;
66} 66}
67 67
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
159{ 159{
160 struct i915_dependency *dep, *next; 160 struct i915_dependency *dep, *next;
161 161
162 GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node)); 162 GEM_BUG_ON(!list_empty(&pt->link));
163 163
164 /* Everyone we depended upon (the fences we wait to be signaled) 164 /* Everyone we depended upon (the fences we wait to be signaled)
165 * should retire before us and remove themselves from our list. 165 * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
185{ 185{
186 INIT_LIST_HEAD(&pt->signalers_list); 186 INIT_LIST_HEAD(&pt->signalers_list);
187 INIT_LIST_HEAD(&pt->waiters_list); 187 INIT_LIST_HEAD(&pt->waiters_list);
188 RB_CLEAR_NODE(&pt->node); 188 INIT_LIST_HEAD(&pt->link);
189 pt->priority = INT_MIN; 189 pt->priority = INT_MIN;
190} 190}
191 191
@@ -214,12 +214,12 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
214 } 214 }
215 215
216 /* Finally reset hw state */ 216 /* Finally reset hw state */
217 tl->seqno = seqno;
218 intel_engine_init_global_seqno(engine, seqno); 217 intel_engine_init_global_seqno(engine, seqno);
218 tl->seqno = seqno;
219 219
220 list_for_each_entry(timeline, &i915->gt.timelines, link) 220 list_for_each_entry(timeline, &i915->gt.timelines, link)
221 memset(timeline->engine[id].sync_seqno, 0, 221 memset(timeline->engine[id].global_sync, 0,
222 sizeof(timeline->engine[id].sync_seqno)); 222 sizeof(timeline->engine[id].global_sync));
223 } 223 }
224 224
225 return 0; 225 return 0;
@@ -271,6 +271,48 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
271 /* Space left intentionally blank */ 271 /* Space left intentionally blank */
272} 272}
273 273
274static void advance_ring(struct drm_i915_gem_request *request)
275{
276 unsigned int tail;
277
278 /* We know the GPU must have read the request to have
279 * sent us the seqno + interrupt, so use the position
280 * of tail of the request to update the last known position
281 * of the GPU head.
282 *
283 * Note this requires that we are always called in request
284 * completion order.
285 */
286 if (list_is_last(&request->ring_link, &request->ring->request_list)) {
287 /* We may race here with execlists resubmitting this request
288 * as we retire it. The resubmission will move the ring->tail
289 * forwards (to request->wa_tail). We either read the
290 * current value that was written to hw, or the value that
291 * is just about to be. Either works, if we miss the last two
292 * noops - they are safe to be replayed on a reset.
293 */
294 tail = READ_ONCE(request->ring->tail);
295 } else {
296 tail = request->postfix;
297 }
298 list_del(&request->ring_link);
299
300 request->ring->head = tail;
301}
302
303static void free_capture_list(struct drm_i915_gem_request *request)
304{
305 struct i915_gem_capture_list *capture;
306
307 capture = request->capture_list;
308 while (capture) {
309 struct i915_gem_capture_list *next = capture->next;
310
311 kfree(capture);
312 capture = next;
313 }
314}
315
274static void i915_gem_request_retire(struct drm_i915_gem_request *request) 316static void i915_gem_request_retire(struct drm_i915_gem_request *request)
275{ 317{
276 struct intel_engine_cs *engine = request->engine; 318 struct intel_engine_cs *engine = request->engine;
@@ -287,16 +329,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
287 list_del_init(&request->link); 329 list_del_init(&request->link);
288 spin_unlock_irq(&engine->timeline->lock); 330 spin_unlock_irq(&engine->timeline->lock);
289 331
290 /* We know the GPU must have read the request to have
291 * sent us the seqno + interrupt, so use the position
292 * of tail of the request to update the last known position
293 * of the GPU head.
294 *
295 * Note this requires that we are always called in request
296 * completion order.
297 */
298 list_del(&request->ring_link);
299 request->ring->head = request->postfix;
300 if (!--request->i915->gt.active_requests) { 332 if (!--request->i915->gt.active_requests) {
301 GEM_BUG_ON(!request->i915->gt.awake); 333 GEM_BUG_ON(!request->i915->gt.awake);
302 mod_delayed_work(request->i915->wq, 334 mod_delayed_work(request->i915->wq,
@@ -304,6 +336,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
304 msecs_to_jiffies(100)); 336 msecs_to_jiffies(100));
305 } 337 }
306 unreserve_seqno(request->engine); 338 unreserve_seqno(request->engine);
339 advance_ring(request);
340
341 free_capture_list(request);
307 342
308 /* Walk through the active list, calling retire on each. This allows 343 /* Walk through the active list, calling retire on each. This allows
309 * objects to track their GPU activity and mark themselves as idle 344 * objects to track their GPU activity and mark themselves as idle
@@ -402,7 +437,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
402 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); 437 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
403 request->global_seqno = seqno; 438 request->global_seqno = seqno;
404 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 439 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
405 intel_engine_enable_signaling(request); 440 intel_engine_enable_signaling(request, false);
406 spin_unlock(&request->lock); 441 spin_unlock(&request->lock);
407 442
408 engine->emit_breadcrumb(request, 443 engine->emit_breadcrumb(request,
@@ -503,9 +538,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
503 * 538 *
504 * @engine: engine that we wish to issue the request on. 539 * @engine: engine that we wish to issue the request on.
505 * @ctx: context that the request will be associated with. 540 * @ctx: context that the request will be associated with.
506 * This can be NULL if the request is not directly related to
507 * any specific user context, in which case this function will
508 * choose an appropriate context to use.
509 * 541 *
510 * Returns a pointer to the allocated request if successful, 542 * Returns a pointer to the allocated request if successful,
511 * or an error code if not. 543 * or an error code if not.
@@ -516,6 +548,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
516{ 548{
517 struct drm_i915_private *dev_priv = engine->i915; 549 struct drm_i915_private *dev_priv = engine->i915;
518 struct drm_i915_gem_request *req; 550 struct drm_i915_gem_request *req;
551 struct intel_ring *ring;
519 int ret; 552 int ret;
520 553
521 lockdep_assert_held(&dev_priv->drm.struct_mutex); 554 lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -530,9 +563,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
530 * GGTT space, so do this first before we reserve a seqno for 563 * GGTT space, so do this first before we reserve a seqno for
531 * ourselves. 564 * ourselves.
532 */ 565 */
533 ret = engine->context_pin(engine, ctx); 566 ring = engine->context_pin(engine, ctx);
534 if (ret) 567 if (IS_ERR(ring))
535 return ERR_PTR(ret); 568 return ERR_CAST(ring);
569 GEM_BUG_ON(!ring);
536 570
537 ret = reserve_seqno(engine); 571 ret = reserve_seqno(engine);
538 if (ret) 572 if (ret)
@@ -598,11 +632,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
598 req->i915 = dev_priv; 632 req->i915 = dev_priv;
599 req->engine = engine; 633 req->engine = engine;
600 req->ctx = ctx; 634 req->ctx = ctx;
635 req->ring = ring;
601 636
602 /* No zalloc, must clear what we need by hand */ 637 /* No zalloc, must clear what we need by hand */
603 req->global_seqno = 0; 638 req->global_seqno = 0;
604 req->file_priv = NULL; 639 req->file_priv = NULL;
605 req->batch = NULL; 640 req->batch = NULL;
641 req->capture_list = NULL;
606 642
607 /* 643 /*
608 * Reserve space in the ring buffer for all the commands required to 644 * Reserve space in the ring buffer for all the commands required to
@@ -623,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
623 * GPU processing the request, we never over-estimate the 659 * GPU processing the request, we never over-estimate the
624 * position of the head. 660 * position of the head.
625 */ 661 */
626 req->head = req->ring->tail; 662 req->head = req->ring->emit;
627 663
628 /* Check that we didn't interrupt ourselves with a new request */ 664 /* Check that we didn't interrupt ourselves with a new request */
629 GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); 665 GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
@@ -651,6 +687,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
651 int ret; 687 int ret;
652 688
653 GEM_BUG_ON(to == from); 689 GEM_BUG_ON(to == from);
690 GEM_BUG_ON(to->timeline == from->timeline);
654 691
655 if (i915_gem_request_completed(from)) 692 if (i915_gem_request_completed(from))
656 return 0; 693 return 0;
@@ -663,9 +700,6 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
663 return ret; 700 return ret;
664 } 701 }
665 702
666 if (to->timeline == from->timeline)
667 return 0;
668
669 if (to->engine == from->engine) { 703 if (to->engine == from->engine) {
670 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, 704 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
671 &from->submit, 705 &from->submit,
@@ -674,55 +708,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
674 } 708 }
675 709
676 seqno = i915_gem_request_global_seqno(from); 710 seqno = i915_gem_request_global_seqno(from);
677 if (!seqno) { 711 if (!seqno)
678 ret = i915_sw_fence_await_dma_fence(&to->submit, 712 goto await_dma_fence;
679 &from->fence, 0,
680 GFP_KERNEL);
681 return ret < 0 ? ret : 0;
682 }
683 713
684 if (seqno <= to->timeline->sync_seqno[from->engine->id]) 714 if (!to->engine->semaphore.sync_to) {
685 return 0; 715 if (!__i915_gem_request_started(from, seqno))
716 goto await_dma_fence;
686 717
687 trace_i915_gem_ring_sync_to(to, from); 718 if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2))
688 if (!i915.semaphores) { 719 goto await_dma_fence;
689 if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
690 ret = i915_sw_fence_await_dma_fence(&to->submit,
691 &from->fence, 0,
692 GFP_KERNEL);
693 if (ret < 0)
694 return ret;
695 }
696 } else { 720 } else {
721 GEM_BUG_ON(!from->engine->semaphore.signal);
722
723 if (seqno <= to->timeline->global_sync[from->engine->id])
724 return 0;
725
726 trace_i915_gem_ring_sync_to(to, from);
697 ret = to->engine->semaphore.sync_to(to, from); 727 ret = to->engine->semaphore.sync_to(to, from);
698 if (ret) 728 if (ret)
699 return ret; 729 return ret;
730
731 to->timeline->global_sync[from->engine->id] = seqno;
700 } 732 }
701 733
702 to->timeline->sync_seqno[from->engine->id] = seqno;
703 return 0; 734 return 0;
735
736await_dma_fence:
737 ret = i915_sw_fence_await_dma_fence(&to->submit,
738 &from->fence, 0,
739 GFP_KERNEL);
740 return ret < 0 ? ret : 0;
704} 741}
705 742
706int 743int
707i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, 744i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
708 struct dma_fence *fence) 745 struct dma_fence *fence)
709{ 746{
710 struct dma_fence_array *array; 747 struct dma_fence **child = &fence;
748 unsigned int nchild = 1;
711 int ret; 749 int ret;
712 int i;
713
714 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
715 return 0;
716
717 if (dma_fence_is_i915(fence))
718 return i915_gem_request_await_request(req, to_request(fence));
719
720 if (!dma_fence_is_array(fence)) {
721 ret = i915_sw_fence_await_dma_fence(&req->submit,
722 fence, I915_FENCE_TIMEOUT,
723 GFP_KERNEL);
724 return ret < 0 ? ret : 0;
725 }
726 750
727 /* Note that if the fence-array was created in signal-on-any mode, 751 /* Note that if the fence-array was created in signal-on-any mode,
728 * we should *not* decompose it into its individual fences. However, 752 * we should *not* decompose it into its individual fences. However,
@@ -731,21 +755,46 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
731 * amdgpu and we should not see any incoming fence-array from 755 * amdgpu and we should not see any incoming fence-array from
732 * sync-file being in signal-on-any mode. 756 * sync-file being in signal-on-any mode.
733 */ 757 */
758 if (dma_fence_is_array(fence)) {
759 struct dma_fence_array *array = to_dma_fence_array(fence);
760
761 child = array->fences;
762 nchild = array->num_fences;
763 GEM_BUG_ON(!nchild);
764 }
765
766 do {
767 fence = *child++;
768 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
769 continue;
770
771 /*
772 * Requests on the same timeline are explicitly ordered, along
773 * with their dependencies, by i915_add_request() which ensures
774 * that requests are submitted in-order through each ring.
775 */
776 if (fence->context == req->fence.context)
777 continue;
734 778
735 array = to_dma_fence_array(fence); 779 /* Squash repeated waits to the same timelines */
736 for (i = 0; i < array->num_fences; i++) { 780 if (fence->context != req->i915->mm.unordered_timeline &&
737 struct dma_fence *child = array->fences[i]; 781 intel_timeline_sync_is_later(req->timeline, fence))
782 continue;
738 783
739 if (dma_fence_is_i915(child)) 784 if (dma_fence_is_i915(fence))
740 ret = i915_gem_request_await_request(req, 785 ret = i915_gem_request_await_request(req,
741 to_request(child)); 786 to_request(fence));
742 else 787 else
743 ret = i915_sw_fence_await_dma_fence(&req->submit, 788 ret = i915_sw_fence_await_dma_fence(&req->submit, fence,
744 child, I915_FENCE_TIMEOUT, 789 I915_FENCE_TIMEOUT,
745 GFP_KERNEL); 790 GFP_KERNEL);
746 if (ret < 0) 791 if (ret < 0)
747 return ret; 792 return ret;
748 } 793
794 /* Record the latest fence used against each timeline */
795 if (fence->context != req->i915->mm.unordered_timeline)
796 intel_timeline_sync_set(req->timeline, fence);
797 } while (--nchild);
749 798
750 return 0; 799 return 0;
751} 800}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 129c58bb4805..7b7c84369d78 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,12 +67,18 @@ struct i915_dependency {
67struct i915_priotree { 67struct i915_priotree {
68 struct list_head signalers_list; /* those before us, we depend upon */ 68 struct list_head signalers_list; /* those before us, we depend upon */
69 struct list_head waiters_list; /* those after us, they depend upon us */ 69 struct list_head waiters_list; /* those after us, they depend upon us */
70 struct rb_node node; 70 struct list_head link;
71 int priority; 71 int priority;
72#define I915_PRIORITY_MAX 1024 72#define I915_PRIORITY_MAX 1024
73#define I915_PRIORITY_NORMAL 0
73#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) 74#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
74}; 75};
75 76
77struct i915_gem_capture_list {
78 struct i915_gem_capture_list *next;
79 struct i915_vma *vma;
80};
81
76/** 82/**
77 * Request queue structure. 83 * Request queue structure.
78 * 84 *
@@ -167,6 +173,12 @@ struct drm_i915_gem_request {
167 * error state dump only). 173 * error state dump only).
168 */ 174 */
169 struct i915_vma *batch; 175 struct i915_vma *batch;
176 /** Additional buffers requested by userspace to be captured upon
177 * a GPU hang. The vma/obj on this list are protected by their
178 * active reference - all objects on this list must also be
179 * on the active_list (of their final request).
180 */
181 struct i915_gem_capture_list *capture_list;
170 struct list_head active_list; 182 struct list_head active_list;
171 183
172 /** Time at which this request was emitted, in jiffies. */ 184 /** Time at which this request was emitted, in jiffies. */
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 129ed303a6c4..0fd2b58ce475 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -35,9 +35,9 @@
35#include "i915_drv.h" 35#include "i915_drv.h"
36#include "i915_trace.h" 36#include "i915_trace.h"
37 37
38static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 38static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
39{ 39{
40 switch (mutex_trylock_recursive(&dev->struct_mutex)) { 40 switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
41 case MUTEX_TRYLOCK_FAILED: 41 case MUTEX_TRYLOCK_FAILED:
42 return false; 42 return false;
43 43
@@ -53,24 +53,29 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
53 BUG(); 53 BUG();
54} 54}
55 55
56static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) 56static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
57{ 57{
58 if (!unlock) 58 if (!unlock)
59 return; 59 return;
60 60
61 mutex_unlock(&dev->struct_mutex); 61 mutex_unlock(&dev_priv->drm.struct_mutex);
62
63 /* expedite the RCU grace period to free some request slabs */
64 synchronize_rcu_expedited();
65} 62}
66 63
67static bool any_vma_pinned(struct drm_i915_gem_object *obj) 64static bool any_vma_pinned(struct drm_i915_gem_object *obj)
68{ 65{
69 struct i915_vma *vma; 66 struct i915_vma *vma;
70 67
71 list_for_each_entry(vma, &obj->vma_list, obj_link) 68 list_for_each_entry(vma, &obj->vma_list, obj_link) {
69 /* Only GGTT vma may be permanently pinned, and are always
70 * at the start of the list. We can stop hunting as soon
71 * as we see a ppGTT vma.
72 */
73 if (!i915_vma_is_ggtt(vma))
74 break;
75
72 if (i915_vma_is_pinned(vma)) 76 if (i915_vma_is_pinned(vma))
73 return true; 77 return true;
78 }
74 79
75 return false; 80 return false;
76} 81}
@@ -156,7 +161,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
156 unsigned long count = 0; 161 unsigned long count = 0;
157 bool unlock; 162 bool unlock;
158 163
159 if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock)) 164 if (!shrinker_lock(dev_priv, &unlock))
160 return 0; 165 return 0;
161 166
162 trace_i915_gem_shrink(dev_priv, target, flags); 167 trace_i915_gem_shrink(dev_priv, target, flags);
@@ -244,7 +249,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
244 249
245 i915_gem_retire_requests(dev_priv); 250 i915_gem_retire_requests(dev_priv);
246 251
247 i915_gem_shrinker_unlock(&dev_priv->drm, unlock); 252 shrinker_unlock(dev_priv, unlock);
248 253
249 return count; 254 return count;
250} 255}
@@ -274,8 +279,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
274 I915_SHRINK_ACTIVE); 279 I915_SHRINK_ACTIVE);
275 intel_runtime_pm_put(dev_priv); 280 intel_runtime_pm_put(dev_priv);
276 281
277 synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
278
279 return freed; 282 return freed;
280} 283}
281 284
@@ -284,12 +287,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
284{ 287{
285 struct drm_i915_private *dev_priv = 288 struct drm_i915_private *dev_priv =
286 container_of(shrinker, struct drm_i915_private, mm.shrinker); 289 container_of(shrinker, struct drm_i915_private, mm.shrinker);
287 struct drm_device *dev = &dev_priv->drm;
288 struct drm_i915_gem_object *obj; 290 struct drm_i915_gem_object *obj;
289 unsigned long count; 291 unsigned long count;
290 bool unlock; 292 bool unlock;
291 293
292 if (!i915_gem_shrinker_lock(dev, &unlock)) 294 if (!shrinker_lock(dev_priv, &unlock))
293 return 0; 295 return 0;
294 296
295 i915_gem_retire_requests(dev_priv); 297 i915_gem_retire_requests(dev_priv);
@@ -304,7 +306,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
304 count += obj->base.size >> PAGE_SHIFT; 306 count += obj->base.size >> PAGE_SHIFT;
305 } 307 }
306 308
307 i915_gem_shrinker_unlock(dev, unlock); 309 shrinker_unlock(dev_priv, unlock);
308 310
309 return count; 311 return count;
310} 312}
@@ -314,11 +316,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
314{ 316{
315 struct drm_i915_private *dev_priv = 317 struct drm_i915_private *dev_priv =
316 container_of(shrinker, struct drm_i915_private, mm.shrinker); 318 container_of(shrinker, struct drm_i915_private, mm.shrinker);
317 struct drm_device *dev = &dev_priv->drm;
318 unsigned long freed; 319 unsigned long freed;
319 bool unlock; 320 bool unlock;
320 321
321 if (!i915_gem_shrinker_lock(dev, &unlock)) 322 if (!shrinker_lock(dev_priv, &unlock))
322 return SHRINK_STOP; 323 return SHRINK_STOP;
323 324
324 freed = i915_gem_shrink(dev_priv, 325 freed = i915_gem_shrink(dev_priv,
@@ -332,26 +333,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
332 I915_SHRINK_BOUND | 333 I915_SHRINK_BOUND |
333 I915_SHRINK_UNBOUND); 334 I915_SHRINK_UNBOUND);
334 335
335 i915_gem_shrinker_unlock(dev, unlock); 336 shrinker_unlock(dev_priv, unlock);
336 337
337 return freed; 338 return freed;
338} 339}
339 340
340struct shrinker_lock_uninterruptible {
341 bool was_interruptible;
342 bool unlock;
343};
344
345static bool 341static bool
346i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, 342shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
347 struct shrinker_lock_uninterruptible *slu, 343 int timeout_ms)
348 int timeout_ms)
349{ 344{
350 unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); 345 unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
351 346
352 do { 347 do {
353 if (i915_gem_wait_for_idle(dev_priv, 0) == 0 && 348 if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
354 i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) 349 shrinker_lock(dev_priv, unlock))
355 break; 350 break;
356 351
357 schedule_timeout_killable(1); 352 schedule_timeout_killable(1);
@@ -364,29 +359,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
364 } 359 }
365 } while (1); 360 } while (1);
366 361
367 slu->was_interruptible = dev_priv->mm.interruptible;
368 dev_priv->mm.interruptible = false;
369 return true; 362 return true;
370} 363}
371 364
372static void
373i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
374 struct shrinker_lock_uninterruptible *slu)
375{
376 dev_priv->mm.interruptible = slu->was_interruptible;
377 i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
378}
379
380static int 365static int
381i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) 366i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
382{ 367{
383 struct drm_i915_private *dev_priv = 368 struct drm_i915_private *dev_priv =
384 container_of(nb, struct drm_i915_private, mm.oom_notifier); 369 container_of(nb, struct drm_i915_private, mm.oom_notifier);
385 struct shrinker_lock_uninterruptible slu;
386 struct drm_i915_gem_object *obj; 370 struct drm_i915_gem_object *obj;
387 unsigned long unevictable, bound, unbound, freed_pages; 371 unsigned long unevictable, bound, unbound, freed_pages;
372 bool unlock;
388 373
389 if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) 374 if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
390 return NOTIFY_DONE; 375 return NOTIFY_DONE;
391 376
392 freed_pages = i915_gem_shrink_all(dev_priv); 377 freed_pages = i915_gem_shrink_all(dev_priv);
@@ -415,7 +400,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
415 bound += obj->base.size >> PAGE_SHIFT; 400 bound += obj->base.size >> PAGE_SHIFT;
416 } 401 }
417 402
418 i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu); 403 shrinker_unlock(dev_priv, unlock);
419 404
420 if (freed_pages || unbound || bound) 405 if (freed_pages || unbound || bound)
421 pr_info("Purging GPU memory, %lu pages freed, " 406 pr_info("Purging GPU memory, %lu pages freed, "
@@ -435,12 +420,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
435{ 420{
436 struct drm_i915_private *dev_priv = 421 struct drm_i915_private *dev_priv =
437 container_of(nb, struct drm_i915_private, mm.vmap_notifier); 422 container_of(nb, struct drm_i915_private, mm.vmap_notifier);
438 struct shrinker_lock_uninterruptible slu;
439 struct i915_vma *vma, *next; 423 struct i915_vma *vma, *next;
440 unsigned long freed_pages = 0; 424 unsigned long freed_pages = 0;
425 bool unlock;
441 int ret; 426 int ret;
442 427
443 if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) 428 if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
444 return NOTIFY_DONE; 429 return NOTIFY_DONE;
445 430
446 /* Force everything onto the inactive lists */ 431 /* Force everything onto the inactive lists */
@@ -465,7 +450,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
465 } 450 }
466 451
467out: 452out:
468 i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu); 453 shrinker_unlock(dev_priv, unlock);
469 454
470 *(unsigned long *)ptr += freed_pages; 455 *(unsigned long *)ptr += freed_pages;
471 return NOTIFY_DONE; 456 return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f3abdc27c5dd..681db6083f4d 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -414,12 +414,10 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
414 return 0; 414 return 0;
415 } 415 }
416 416
417#ifdef CONFIG_INTEL_IOMMU 417 if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
418 if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) {
419 DRM_INFO("DMAR active, disabling use of stolen memory\n"); 418 DRM_INFO("DMAR active, disabling use of stolen memory\n");
420 return 0; 419 return 0;
421 } 420 }
422#endif
423 421
424 if (ggtt->stolen_size == 0) 422 if (ggtt->stolen_size == 0)
425 return 0; 423 return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index b596ca7ee058..c597ce277a04 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -23,6 +23,32 @@
23 */ 23 */
24 24
25#include "i915_drv.h" 25#include "i915_drv.h"
26#include "i915_syncmap.h"
27
28static void __intel_timeline_init(struct intel_timeline *tl,
29 struct i915_gem_timeline *parent,
30 u64 context,
31 struct lock_class_key *lockclass,
32 const char *lockname)
33{
34 tl->fence_context = context;
35 tl->common = parent;
36#ifdef CONFIG_DEBUG_SPINLOCK
37 __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
38#else
39 spin_lock_init(&tl->lock);
40#endif
41 init_request_active(&tl->last_request, NULL);
42 INIT_LIST_HEAD(&tl->requests);
43 i915_syncmap_init(&tl->sync);
44}
45
46static void __intel_timeline_fini(struct intel_timeline *tl)
47{
48 GEM_BUG_ON(!list_empty(&tl->requests));
49
50 i915_syncmap_free(&tl->sync);
51}
26 52
27static int __i915_gem_timeline_init(struct drm_i915_private *i915, 53static int __i915_gem_timeline_init(struct drm_i915_private *i915,
28 struct i915_gem_timeline *timeline, 54 struct i915_gem_timeline *timeline,
@@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
35 61
36 lockdep_assert_held(&i915->drm.struct_mutex); 62 lockdep_assert_held(&i915->drm.struct_mutex);
37 63
64 /*
65 * Ideally we want a set of engines on a single leaf as we expect
66 * to mostly be tracking synchronisation between engines. It is not
67 * a huge issue if this is not the case, but we may want to mitigate
68 * any page crossing penalties if they become an issue.
69 */
70 BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
71
38 timeline->i915 = i915; 72 timeline->i915 = i915;
39 timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); 73 timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
40 if (!timeline->name) 74 if (!timeline->name)
@@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
44 78
45 /* Called during early_init before we know how many engines there are */ 79 /* Called during early_init before we know how many engines there are */
46 fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); 80 fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
47 for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { 81 for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
48 struct intel_timeline *tl = &timeline->engine[i]; 82 __intel_timeline_init(&timeline->engine[i],
49 83 timeline, fences++,
50 tl->fence_context = fences++; 84 lockclass, lockname);
51 tl->common = timeline;
52#ifdef CONFIG_DEBUG_SPINLOCK
53 __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
54#else
55 spin_lock_init(&tl->lock);
56#endif
57 init_request_active(&tl->last_request, NULL);
58 INIT_LIST_HEAD(&tl->requests);
59 }
60 85
61 return 0; 86 return 0;
62} 87}
@@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
81 &class, "&global_timeline->lock"); 106 &class, "&global_timeline->lock");
82} 107}
83 108
109/**
110 * i915_gem_timelines_mark_idle -- called when the driver idles
111 * @i915 - the drm_i915_private device
112 *
113 * When the driver is completely idle, we know that all of our sync points
114 * have been signaled and our tracking is then entirely redundant. Any request
115 * to wait upon an older sync point will be completed instantly as we know
116 * the fence is signaled and therefore we will not even look them up in the
117 * sync point map.
118 */
119void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
120{
121 struct i915_gem_timeline *timeline;
122 int i;
123
124 lockdep_assert_held(&i915->drm.struct_mutex);
125
126 list_for_each_entry(timeline, &i915->gt.timelines, link) {
127 for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
128 struct intel_timeline *tl = &timeline->engine[i];
129
130 /*
131 * All known fences are completed so we can scrap
132 * the current sync point tracking and start afresh,
133 * any attempt to wait upon a previous sync point
134 * will be skipped as the fence was signaled.
135 */
136 i915_syncmap_free(&tl->sync);
137 }
138 }
139}
140
84void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) 141void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
85{ 142{
86 int i; 143 int i;
87 144
88 lockdep_assert_held(&timeline->i915->drm.struct_mutex); 145 lockdep_assert_held(&timeline->i915->drm.struct_mutex);
89 146
90 for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { 147 for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
91 struct intel_timeline *tl = &timeline->engine[i]; 148 __intel_timeline_fini(&timeline->engine[i]);
92
93 GEM_BUG_ON(!list_empty(&tl->requests));
94 }
95 149
96 list_del(&timeline->link); 150 list_del(&timeline->link);
97 kfree(timeline->name); 151 kfree(timeline->name);
98} 152}
153
154#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
155#include "selftests/mock_timeline.c"
156#include "selftests/i915_gem_timeline.c"
157#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 6c53e14cab2a..bfb5eb94c64d 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -27,7 +27,9 @@
27 27
28#include <linux/list.h> 28#include <linux/list.h>
29 29
30#include "i915_utils.h"
30#include "i915_gem_request.h" 31#include "i915_gem_request.h"
32#include "i915_syncmap.h"
31 33
32struct i915_gem_timeline; 34struct i915_gem_timeline;
33 35
@@ -55,7 +57,25 @@ struct intel_timeline {
55 * struct_mutex. 57 * struct_mutex.
56 */ 58 */
57 struct i915_gem_active last_request; 59 struct i915_gem_active last_request;
58 u32 sync_seqno[I915_NUM_ENGINES]; 60
61 /**
62 * We track the most recent seqno that we wait on in every context so
63 * that we only have to emit a new await and dependency on a more
64 * recent sync point. As the contexts may be executed out-of-order, we
65 * have to track each individually and can not rely on an absolute
66 * global_seqno. When we know that all tracked fences are completed
67 * (i.e. when the driver is idle), we know that the syncmap is
68 * redundant and we can discard it without loss of generality.
69 */
70 struct i915_syncmap *sync;
71 /**
72 * Separately to the inter-context seqno map above, we track the last
73 * barrier (e.g. semaphore wait) to the global engine timelines. Note
74 * that this tracks global_seqno rather than the context.seqno, and
75 * so it is subject to the limitations of hw wraparound and that we
76 * may need to revoke global_seqno (on pre-emption).
77 */
78 u32 global_sync[I915_NUM_ENGINES];
59 79
60 struct i915_gem_timeline *common; 80 struct i915_gem_timeline *common;
61}; 81};
@@ -73,6 +93,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
73 struct i915_gem_timeline *tl, 93 struct i915_gem_timeline *tl,
74 const char *name); 94 const char *name);
75int i915_gem_timeline_init__global(struct drm_i915_private *i915); 95int i915_gem_timeline_init__global(struct drm_i915_private *i915);
96void i915_gem_timelines_mark_idle(struct drm_i915_private *i915);
76void i915_gem_timeline_fini(struct i915_gem_timeline *tl); 97void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
77 98
99static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
100 u64 context, u32 seqno)
101{
102 return i915_syncmap_set(&tl->sync, context, seqno);
103}
104
105static inline int intel_timeline_sync_set(struct intel_timeline *tl,
106 const struct dma_fence *fence)
107{
108 return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
109}
110
111static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
112 u64 context, u32 seqno)
113{
114 return i915_syncmap_is_later(&tl->sync, context, seqno);
115}
116
117static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
118 const struct dma_fence *fence)
119{
120 return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
121}
122
78#endif 123#endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8effc59f5cb5..e18f350bc364 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
712 print_error_obj(m, dev_priv->engine[i], NULL, obj); 712 print_error_obj(m, dev_priv->engine[i], NULL, obj);
713 } 713 }
714 714
715 for (j = 0; j < ee->user_bo_count; j++)
716 print_error_obj(m, dev_priv->engine[i],
717 "user", ee->user_bo[j]);
718
715 if (ee->num_requests) { 719 if (ee->num_requests) {
716 err_printf(m, "%s --- %d requests\n", 720 err_printf(m, "%s --- %d requests\n",
717 dev_priv->engine[i]->name, 721 dev_priv->engine[i]->name,
@@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
825{ 829{
826 struct i915_gpu_state *error = 830 struct i915_gpu_state *error =
827 container_of(error_ref, typeof(*error), ref); 831 container_of(error_ref, typeof(*error), ref);
828 int i; 832 long i, j;
829 833
830 for (i = 0; i < ARRAY_SIZE(error->engine); i++) { 834 for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
831 struct drm_i915_error_engine *ee = &error->engine[i]; 835 struct drm_i915_error_engine *ee = &error->engine[i];
832 836
837 for (j = 0; j < ee->user_bo_count; j++)
838 i915_error_object_free(ee->user_bo[j]);
839 kfree(ee->user_bo);
840
833 i915_error_object_free(ee->batchbuffer); 841 i915_error_object_free(ee->batchbuffer);
834 i915_error_object_free(ee->wa_batchbuffer); 842 i915_error_object_free(ee->wa_batchbuffer);
835 i915_error_object_free(ee->ringbuffer); 843 i915_error_object_free(ee->ringbuffer);
@@ -1316,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
1316static void error_record_engine_execlists(struct intel_engine_cs *engine, 1324static void error_record_engine_execlists(struct intel_engine_cs *engine,
1317 struct drm_i915_error_engine *ee) 1325 struct drm_i915_error_engine *ee)
1318{ 1326{
1327 const struct execlist_port *port = engine->execlist_port;
1319 unsigned int n; 1328 unsigned int n;
1320 1329
1321 for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) 1330 for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
1322 if (engine->execlist_port[n].request) 1331 struct drm_i915_gem_request *rq = port_request(&port[n]);
1323 record_request(engine->execlist_port[n].request, 1332
1324 &ee->execlist[n]); 1333 if (!rq)
1334 break;
1335
1336 record_request(rq, &ee->execlist[n]);
1337 }
1325} 1338}
1326 1339
1327static void record_context(struct drm_i915_error_context *e, 1340static void record_context(struct drm_i915_error_context *e,
@@ -1346,6 +1359,35 @@ static void record_context(struct drm_i915_error_context *e,
1346 e->active = ctx->active_count; 1359 e->active = ctx->active_count;
1347} 1360}
1348 1361
1362static void request_record_user_bo(struct drm_i915_gem_request *request,
1363 struct drm_i915_error_engine *ee)
1364{
1365 struct i915_gem_capture_list *c;
1366 struct drm_i915_error_object **bo;
1367 long count;
1368
1369 count = 0;
1370 for (c = request->capture_list; c; c = c->next)
1371 count++;
1372
1373 bo = NULL;
1374 if (count)
1375 bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
1376 if (!bo)
1377 return;
1378
1379 count = 0;
1380 for (c = request->capture_list; c; c = c->next) {
1381 bo[count] = i915_error_object_create(request->i915, c->vma);
1382 if (!bo[count])
1383 break;
1384 count++;
1385 }
1386
1387 ee->user_bo = bo;
1388 ee->user_bo_count = count;
1389}
1390
1349static void i915_gem_record_rings(struct drm_i915_private *dev_priv, 1391static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1350 struct i915_gpu_state *error) 1392 struct i915_gpu_state *error)
1351{ 1393{
@@ -1392,6 +1434,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1392 ee->wa_batchbuffer = 1434 ee->wa_batchbuffer =
1393 i915_error_object_create(dev_priv, 1435 i915_error_object_create(dev_priv,
1394 engine->scratch); 1436 engine->scratch);
1437 request_record_user_bo(request, ee);
1395 1438
1396 ee->ctx = 1439 ee->ctx =
1397 i915_error_object_create(dev_priv, 1440 i915_error_object_create(dev_priv,
@@ -1560,6 +1603,9 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
1560 error->done_reg = I915_READ(DONE_REG); 1603 error->done_reg = I915_READ(DONE_REG);
1561 } 1604 }
1562 1605
1606 if (INTEL_GEN(dev_priv) >= 5)
1607 error->ccid = I915_READ(CCID);
1608
1563 /* 3: Feature specific registers */ 1609 /* 3: Feature specific registers */
1564 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { 1610 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
1565 error->gam_ecochk = I915_READ(GAM_ECOCHK); 1611 error->gam_ecochk = I915_READ(GAM_ECOCHK);
@@ -1567,9 +1613,6 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
1567 } 1613 }
1568 1614
1569 /* 4: Everything else */ 1615 /* 4: Everything else */
1570 if (HAS_HW_CONTEXTS(dev_priv))
1571 error->ccid = I915_READ(CCID);
1572
1573 if (INTEL_GEN(dev_priv) >= 8) { 1616 if (INTEL_GEN(dev_priv) >= 8) {
1574 error->ier = I915_READ(GEN8_DE_MISC_IER); 1617 error->ier = I915_READ(GEN8_DE_MISC_IER);
1575 for (i = 0; i < 4; i++) 1618 for (i = 0; i < 4; i++)
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..e6e0c6ef1084 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
480 GEM_BUG_ON(freespace < wqi_size); 480 GEM_BUG_ON(freespace < wqi_size);
481 481
482 /* The GuC firmware wants the tail index in QWords, not bytes */ 482 /* The GuC firmware wants the tail index in QWords, not bytes */
483 tail = rq->tail; 483 tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
484 assert_ring_tail_valid(rq->ring, rq->tail);
485 tail >>= 3;
486 GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); 484 GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
487 485
488 /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we 486 /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
@@ -616,12 +614,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
616 b_ret = guc_ring_doorbell(client); 614 b_ret = guc_ring_doorbell(client);
617 615
618 client->submissions[engine_id] += 1; 616 client->submissions[engine_id] += 1;
619 client->retcode = b_ret;
620 if (b_ret)
621 client->b_fail += 1;
622
623 guc->submissions[engine_id] += 1;
624 guc->last_seqno[engine_id] = rq->global_seqno;
625 617
626 spin_unlock_irqrestore(&client->wq_lock, flags); 618 spin_unlock_irqrestore(&client->wq_lock, flags);
627} 619}
@@ -651,47 +643,68 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
651 trace_dma_fence_enable_signal(&rq->fence); 643 trace_dma_fence_enable_signal(&rq->fence);
652 644
653 spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); 645 spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
654 intel_engine_enable_signaling(rq); 646 intel_engine_enable_signaling(rq, true);
655 spin_unlock(&rq->lock); 647 spin_unlock(&rq->lock);
656} 648}
657 649
650static void port_assign(struct execlist_port *port,
651 struct drm_i915_gem_request *rq)
652{
653 GEM_BUG_ON(rq == port_request(port));
654
655 if (port_isset(port))
656 i915_gem_request_put(port_request(port));
657
658 port_set(port, i915_gem_request_get(rq));
659 nested_enable_signaling(rq);
660}
661
658static bool i915_guc_dequeue(struct intel_engine_cs *engine) 662static bool i915_guc_dequeue(struct intel_engine_cs *engine)
659{ 663{
660 struct execlist_port *port = engine->execlist_port; 664 struct execlist_port *port = engine->execlist_port;
661 struct drm_i915_gem_request *last = port[0].request; 665 struct drm_i915_gem_request *last = port_request(port);
662 struct rb_node *rb; 666 struct rb_node *rb;
663 bool submit = false; 667 bool submit = false;
664 668
665 spin_lock_irq(&engine->timeline->lock); 669 spin_lock_irq(&engine->timeline->lock);
666 rb = engine->execlist_first; 670 rb = engine->execlist_first;
671 GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
667 while (rb) { 672 while (rb) {
668 struct drm_i915_gem_request *rq = 673 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
669 rb_entry(rb, typeof(*rq), priotree.node); 674 struct drm_i915_gem_request *rq, *rn;
670 675
671 if (last && rq->ctx != last->ctx) { 676 list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
672 if (port != engine->execlist_port) 677 if (last && rq->ctx != last->ctx) {
673 break; 678 if (port != engine->execlist_port) {
674 679 __list_del_many(&p->requests,
675 i915_gem_request_assign(&port->request, last); 680 &rq->priotree.link);
676 nested_enable_signaling(last); 681 goto done;
677 port++; 682 }
683
684 if (submit)
685 port_assign(port, last);
686 port++;
687 }
688
689 INIT_LIST_HEAD(&rq->priotree.link);
690 rq->priotree.priority = INT_MAX;
691
692 i915_guc_submit(rq);
693 trace_i915_gem_request_in(rq, port_index(port, engine));
694 last = rq;
695 submit = true;
678 } 696 }
679 697
680 rb = rb_next(rb); 698 rb = rb_next(rb);
681 rb_erase(&rq->priotree.node, &engine->execlist_queue); 699 rb_erase(&p->node, &engine->execlist_queue);
682 RB_CLEAR_NODE(&rq->priotree.node); 700 INIT_LIST_HEAD(&p->requests);
683 rq->priotree.priority = INT_MAX; 701 if (p->priority != I915_PRIORITY_NORMAL)
684 702 kmem_cache_free(engine->i915->priorities, p);
685 i915_guc_submit(rq);
686 trace_i915_gem_request_in(rq, port - engine->execlist_port);
687 last = rq;
688 submit = true;
689 }
690 if (submit) {
691 i915_gem_request_assign(&port->request, last);
692 nested_enable_signaling(last);
693 engine->execlist_first = rb;
694 } 703 }
704done:
705 engine->execlist_first = rb;
706 if (submit)
707 port_assign(port, last);
695 spin_unlock_irq(&engine->timeline->lock); 708 spin_unlock_irq(&engine->timeline->lock);
696 709
697 return submit; 710 return submit;
@@ -705,17 +718,19 @@ static void i915_guc_irq_handler(unsigned long data)
705 bool submit; 718 bool submit;
706 719
707 do { 720 do {
708 rq = port[0].request; 721 rq = port_request(&port[0]);
709 while (rq && i915_gem_request_completed(rq)) { 722 while (rq && i915_gem_request_completed(rq)) {
710 trace_i915_gem_request_out(rq); 723 trace_i915_gem_request_out(rq);
711 i915_gem_request_put(rq); 724 i915_gem_request_put(rq);
712 port[0].request = port[1].request; 725
713 port[1].request = NULL; 726 port[0] = port[1];
714 rq = port[0].request; 727 memset(&port[1], 0, sizeof(port[1]));
728
729 rq = port_request(&port[0]);
715 } 730 }
716 731
717 submit = false; 732 submit = false;
718 if (!port[1].request) 733 if (!port_count(&port[1]))
719 submit = i915_guc_dequeue(engine); 734 submit = i915_guc_dequeue(engine);
720 } while (submit); 735 } while (submit);
721} 736}
@@ -1053,8 +1068,7 @@ static int guc_ads_create(struct intel_guc *guc)
1053 dev_priv->engine[RCS]->status_page.ggtt_offset; 1068 dev_priv->engine[RCS]->status_page.ggtt_offset;
1054 1069
1055 for_each_engine(engine, dev_priv, id) 1070 for_each_engine(engine, dev_priv, id)
1056 blob->ads.eng_state_size[engine->guc_id] = 1071 blob->ads.eng_state_size[engine->guc_id] = engine->context_size;
1057 intel_lr_context_size(engine);
1058 1072
1059 base = guc_ggtt_offset(vma); 1073 base = guc_ggtt_offset(vma);
1060 blob->ads.scheduler_policies = base + ptr_offset(blob, policies); 1074 blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 04493ef1d2f7..7b7f55a28eec 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1200,7 +1200,7 @@ out:
1200static void ivybridge_parity_work(struct work_struct *work) 1200static void ivybridge_parity_work(struct work_struct *work)
1201{ 1201{
1202 struct drm_i915_private *dev_priv = 1202 struct drm_i915_private *dev_priv =
1203 container_of(work, struct drm_i915_private, l3_parity.error_work); 1203 container_of(work, typeof(*dev_priv), l3_parity.error_work);
1204 u32 error_status, row, bank, subbank; 1204 u32 error_status, row, bank, subbank;
1205 char *parity_event[6]; 1205 char *parity_event[6];
1206 uint32_t misccpctl; 1206 uint32_t misccpctl;
@@ -1317,14 +1317,16 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
1317 ivybridge_parity_error_irq_handler(dev_priv, gt_iir); 1317 ivybridge_parity_error_irq_handler(dev_priv, gt_iir);
1318} 1318}
1319 1319
1320static __always_inline void 1320static void
1321gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) 1321gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
1322{ 1322{
1323 bool tasklet = false; 1323 bool tasklet = false;
1324 1324
1325 if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { 1325 if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
1326 set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); 1326 if (port_count(&engine->execlist_port[0])) {
1327 tasklet = true; 1327 __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
1328 tasklet = true;
1329 }
1328 } 1330 }
1329 1331
1330 if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { 1332 if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
@@ -2917,7 +2919,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
2917 u32 pipestat_mask; 2919 u32 pipestat_mask;
2918 u32 enable_mask; 2920 u32 enable_mask;
2919 enum pipe pipe; 2921 enum pipe pipe;
2920 u32 val;
2921 2922
2922 pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | 2923 pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
2923 PIPE_CRC_DONE_INTERRUPT_STATUS; 2924 PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2928,18 +2929,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
2928 2929
2929 enable_mask = I915_DISPLAY_PORT_INTERRUPT | 2930 enable_mask = I915_DISPLAY_PORT_INTERRUPT |
2930 I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | 2931 I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
2931 I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; 2932 I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
2933 I915_LPE_PIPE_A_INTERRUPT |
2934 I915_LPE_PIPE_B_INTERRUPT;
2935
2932 if (IS_CHERRYVIEW(dev_priv)) 2936 if (IS_CHERRYVIEW(dev_priv))
2933 enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT; 2937 enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
2938 I915_LPE_PIPE_C_INTERRUPT;
2934 2939
2935 WARN_ON(dev_priv->irq_mask != ~0); 2940 WARN_ON(dev_priv->irq_mask != ~0);
2936 2941
2937 val = (I915_LPE_PIPE_A_INTERRUPT |
2938 I915_LPE_PIPE_B_INTERRUPT |
2939 I915_LPE_PIPE_C_INTERRUPT);
2940
2941 enable_mask |= val;
2942
2943 dev_priv->irq_mask = ~enable_mask; 2942 dev_priv->irq_mask = ~enable_mask;
2944 2943
2945 GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask); 2944 GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);
@@ -4197,11 +4196,15 @@ static void i965_irq_uninstall(struct drm_device * dev)
4197void intel_irq_init(struct drm_i915_private *dev_priv) 4196void intel_irq_init(struct drm_i915_private *dev_priv)
4198{ 4197{
4199 struct drm_device *dev = &dev_priv->drm; 4198 struct drm_device *dev = &dev_priv->drm;
4199 int i;
4200 4200
4201 intel_hpd_init_work(dev_priv); 4201 intel_hpd_init_work(dev_priv);
4202 4202
4203 INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); 4203 INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
4204
4204 INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); 4205 INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
4206 for (i = 0; i < MAX_L3_SLICES; ++i)
4207 dev_priv->l3_parity.remap_info[i] = NULL;
4205 4208
4206 if (HAS_GUC_SCHED(dev_priv)) 4209 if (HAS_GUC_SCHED(dev_priv))
4207 dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; 4210 dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -4327,6 +4330,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
4327} 4330}
4328 4331
4329/** 4332/**
4333 * intel_irq_fini - deinitializes IRQ support
4334 * @i915: i915 device instance
4335 *
4336 * This function deinitializes all the IRQ support.
4337 */
4338void intel_irq_fini(struct drm_i915_private *i915)
4339{
4340 int i;
4341
4342 for (i = 0; i < MAX_L3_SLICES; ++i)
4343 kfree(i915->l3_parity.remap_info[i]);
4344}
4345
4346/**
4330 * intel_irq_install - enables the hardware interrupt 4347 * intel_irq_install - enables the hardware interrupt
4331 * @dev_priv: i915 device instance 4348 * @dev_priv: i915 device instance
4332 * 4349 *
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index f87b0c4e564d..f80db2ccd92f 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -220,7 +220,6 @@ static const struct intel_device_info intel_ironlake_m_info = {
220 .has_rc6 = 1, \ 220 .has_rc6 = 1, \
221 .has_rc6p = 1, \ 221 .has_rc6p = 1, \
222 .has_gmbus_irq = 1, \ 222 .has_gmbus_irq = 1, \
223 .has_hw_contexts = 1, \
224 .has_aliasing_ppgtt = 1, \ 223 .has_aliasing_ppgtt = 1, \
225 GEN_DEFAULT_PIPEOFFSETS, \ 224 GEN_DEFAULT_PIPEOFFSETS, \
226 CURSOR_OFFSETS 225 CURSOR_OFFSETS
@@ -245,7 +244,6 @@ static const struct intel_device_info intel_sandybridge_m_info = {
245 .has_rc6 = 1, \ 244 .has_rc6 = 1, \
246 .has_rc6p = 1, \ 245 .has_rc6p = 1, \
247 .has_gmbus_irq = 1, \ 246 .has_gmbus_irq = 1, \
248 .has_hw_contexts = 1, \
249 .has_aliasing_ppgtt = 1, \ 247 .has_aliasing_ppgtt = 1, \
250 .has_full_ppgtt = 1, \ 248 .has_full_ppgtt = 1, \
251 GEN_DEFAULT_PIPEOFFSETS, \ 249 GEN_DEFAULT_PIPEOFFSETS, \
@@ -280,7 +278,6 @@ static const struct intel_device_info intel_valleyview_info = {
280 .has_runtime_pm = 1, 278 .has_runtime_pm = 1,
281 .has_rc6 = 1, 279 .has_rc6 = 1,
282 .has_gmbus_irq = 1, 280 .has_gmbus_irq = 1,
283 .has_hw_contexts = 1,
284 .has_gmch_display = 1, 281 .has_gmch_display = 1,
285 .has_hotplug = 1, 282 .has_hotplug = 1,
286 .has_aliasing_ppgtt = 1, 283 .has_aliasing_ppgtt = 1,
@@ -340,7 +337,6 @@ static const struct intel_device_info intel_cherryview_info = {
340 .has_resource_streamer = 1, 337 .has_resource_streamer = 1,
341 .has_rc6 = 1, 338 .has_rc6 = 1,
342 .has_gmbus_irq = 1, 339 .has_gmbus_irq = 1,
343 .has_hw_contexts = 1,
344 .has_logical_ring_contexts = 1, 340 .has_logical_ring_contexts = 1,
345 .has_gmch_display = 1, 341 .has_gmch_display = 1,
346 .has_aliasing_ppgtt = 1, 342 .has_aliasing_ppgtt = 1,
@@ -387,7 +383,6 @@ static const struct intel_device_info intel_skylake_gt3_info = {
387 .has_rc6 = 1, \ 383 .has_rc6 = 1, \
388 .has_dp_mst = 1, \ 384 .has_dp_mst = 1, \
389 .has_gmbus_irq = 1, \ 385 .has_gmbus_irq = 1, \
390 .has_hw_contexts = 1, \
391 .has_logical_ring_contexts = 1, \ 386 .has_logical_ring_contexts = 1, \
392 .has_guc = 1, \ 387 .has_guc = 1, \
393 .has_decoupled_mmio = 1, \ 388 .has_decoupled_mmio = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 060b171480d5..85269bcc8372 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -205,25 +205,49 @@
205 205
206#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) 206#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
207 207
208/* There's a HW race condition between OA unit tail pointer register updates and 208/**
209 * DOC: OA Tail Pointer Race
210 *
211 * There's a HW race condition between OA unit tail pointer register updates and
209 * writes to memory whereby the tail pointer can sometimes get ahead of what's 212 * writes to memory whereby the tail pointer can sometimes get ahead of what's
210 * been written out to the OA buffer so far. 213 * been written out to the OA buffer so far (in terms of what's visible to the
214 * CPU).
215 *
216 * Although this can be observed explicitly while copying reports to userspace
217 * by checking for a zeroed report-id field in tail reports, we want to account
218 * for this earlier, as part of the _oa_buffer_check to avoid lots of redundant
219 * read() attempts.
220 *
221 * In effect we define a tail pointer for reading that lags the real tail
222 * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
223 * time for the corresponding reports to become visible to the CPU.
224 *
225 * To manage this we actually track two tail pointers:
226 * 1) An 'aging' tail with an associated timestamp that is tracked until we
227 * can trust the corresponding data is visible to the CPU; at which point
228 * it is considered 'aged'.
229 * 2) An 'aged' tail that can be used for read()ing.
230 *
231 * The two separate pointers let us decouple read()s from tail pointer aging.
232 *
233 * The tail pointers are checked and updated at a limited rate within a hrtimer
234 * callback (the same callback that is used for delivering POLLIN events)
211 * 235 *
212 * Although this can be observed explicitly by checking for a zeroed report-id 236 * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
213 * field in tail reports, it seems preferable to account for this earlier e.g. 237 * indicates that an updated tail pointer is needed.
214 * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
215 * in this situation.
216 * 238 *
217 * To give time for the most recent reports to land before they may be copied to 239 * Most of the implementation details for this workaround are in
218 * userspace, the driver operates as if the tail pointer effectively lags behind 240 * gen7_oa_buffer_check_unlocked() and gen7_appand_oa_reports()
219 * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
220 * based on this constant in nanoseconds, the current OA sampling exponent
221 * and current report size.
222 * 241 *
223 * There is also a fallback check while reading to simply skip over reports with 242 * Note for posterity: previously the driver used to define an effective tail
224 * a zeroed report-id. 243 * pointer that lagged the real pointer by a 'tail margin' measured in bytes
244 * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
245 * This was flawed considering that the OA unit may also automatically generate
246 * non-periodic reports (such as on context switch) or the OA unit may be
247 * enabled without any periodic sampling.
225 */ 248 */
226#define OA_TAIL_MARGIN_NSEC 100000ULL 249#define OA_TAIL_MARGIN_NSEC 100000ULL
250#define INVALID_TAIL_PTR 0xffffffff
227 251
228/* frequency for checking whether the OA unit has written new reports to the 252/* frequency for checking whether the OA unit has written new reports to the
229 * circular OA buffer... 253 * circular OA buffer...
@@ -308,27 +332,121 @@ struct perf_open_properties {
308 int oa_period_exponent; 332 int oa_period_exponent;
309}; 333};
310 334
311/* NB: This is either called via fops or the poll check hrtimer (atomic ctx) 335/**
336 * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state
337 * @dev_priv: i915 device instance
338 *
339 * This is either called via fops (for blocking reads in user ctx) or the poll
340 * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
341 * if there is data available for userspace to read.
312 * 342 *
313 * It's safe to read OA config state here unlocked, assuming that this is only 343 * This function is central to providing a workaround for the OA unit tail
314 * called while the stream is enabled, while the global OA configuration can't 344 * pointer having a race with respect to what data is visible to the CPU.
315 * be modified. 345 * It is responsible for reading tail pointers from the hardware and giving
346 * the pointers time to 'age' before they are made available for reading.
347 * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
316 * 348 *
317 * Note: we don't lock around the head/tail reads even though there's the slim 349 * Besides returning true when there is data available to read() this function
318 * possibility of read() fop errors forcing a re-init of the OA buffer 350 * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
319 * pointers. A race here could result in a false positive !empty status which 351 * and .aged_tail_idx state used for reading.
320 * is acceptable. 352 *
353 * Note: It's safe to read OA config state here unlocked, assuming that this is
354 * only called while the stream is enabled, while the global OA configuration
355 * can't be modified.
356 *
357 * Returns: %true if the OA buffer contains data, else %false
321 */ 358 */
322static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv) 359static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
323{ 360{
324 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 361 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
325 u32 oastatus2 = I915_READ(GEN7_OASTATUS2); 362 unsigned long flags;
326 u32 oastatus1 = I915_READ(GEN7_OASTATUS1); 363 unsigned int aged_idx;
327 u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; 364 u32 oastatus1;
328 u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 365 u32 head, hw_tail, aged_tail, aging_tail;
366 u64 now;
367
368 /* We have to consider the (unlikely) possibility that read() errors
369 * could result in an OA buffer reset which might reset the head,
370 * tails[] and aged_tail state.
371 */
372 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
373
374 /* NB: The head we observe here might effectively be a little out of
375 * date (between head and tails[aged_idx].offset if there is currently
376 * a read() in progress.
377 */
378 head = dev_priv->perf.oa.oa_buffer.head;
379
380 aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
381 aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
382 aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
329 383
330 return OA_TAKEN(tail, head) < 384 oastatus1 = I915_READ(GEN7_OASTATUS1);
331 dev_priv->perf.oa.tail_margin + report_size; 385 hw_tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
386
387 /* The tail pointer increases in 64 byte increments,
388 * not in report_size steps...
389 */
390 hw_tail &= ~(report_size - 1);
391
392 now = ktime_get_mono_fast_ns();
393
394 /* Update the aged tail
395 *
396 * Flip the tail pointer available for read()s once the aging tail is
397 * old enough to trust that the corresponding data will be visible to
398 * the CPU...
399 *
400 * Do this before updating the aging pointer in case we may be able to
401 * immediately start aging a new pointer too (if new data has become
402 * available) without needing to wait for a later hrtimer callback.
403 */
404 if (aging_tail != INVALID_TAIL_PTR &&
405 ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
406 OA_TAIL_MARGIN_NSEC)) {
407 aged_idx ^= 1;
408 dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
409
410 aged_tail = aging_tail;
411
412 /* Mark that we need a new pointer to start aging... */
413 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
414 aging_tail = INVALID_TAIL_PTR;
415 }
416
417 /* Update the aging tail
418 *
419 * We throttle aging tail updates until we have a new tail that
420 * represents >= one report more data than is already available for
421 * reading. This ensures there will be enough data for a successful
422 * read once this new pointer has aged and ensures we will give the new
423 * pointer time to age.
424 */
425 if (aging_tail == INVALID_TAIL_PTR &&
426 (aged_tail == INVALID_TAIL_PTR ||
427 OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
428 struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
429 u32 gtt_offset = i915_ggtt_offset(vma);
430
431 /* Be paranoid and do a bounds check on the pointer read back
432 * from hardware, just in case some spurious hardware condition
433 * could put the tail out of bounds...
434 */
435 if (hw_tail >= gtt_offset &&
436 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
437 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
438 aging_tail = hw_tail;
439 dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
440 } else {
441 DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
442 hw_tail);
443 }
444 }
445
446 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
447
448 return aged_tail == INVALID_TAIL_PTR ?
449 false : OA_TAKEN(aged_tail, head) >= report_size;
332} 450}
333 451
334/** 452/**
@@ -421,8 +539,6 @@ static int append_oa_sample(struct i915_perf_stream *stream,
421 * @buf: destination buffer given by userspace 539 * @buf: destination buffer given by userspace
422 * @count: the number of bytes userspace wants to read 540 * @count: the number of bytes userspace wants to read
423 * @offset: (inout): the current position for writing into @buf 541 * @offset: (inout): the current position for writing into @buf
424 * @head_ptr: (inout): the current oa buffer cpu read position
425 * @tail: the current oa buffer gpu write position
426 * 542 *
427 * Notably any error condition resulting in a short read (-%ENOSPC or 543 * Notably any error condition resulting in a short read (-%ENOSPC or
428 * -%EFAULT) will be returned even though one or more records may 544 * -%EFAULT) will be returned even though one or more records may
@@ -431,7 +547,7 @@ static int append_oa_sample(struct i915_perf_stream *stream,
431 * userspace. 547 * userspace.
432 * 548 *
433 * Note: reports are consumed from the head, and appended to the 549 * Note: reports are consumed from the head, and appended to the
434 * tail, so the head chases the tail?... If you think that's mad 550 * tail, so the tail chases the head?... If you think that's mad
435 * and back-to-front you're not alone, but this follows the 551 * and back-to-front you're not alone, but this follows the
436 * Gen PRM naming convention. 552 * Gen PRM naming convention.
437 * 553 *
@@ -440,57 +556,55 @@ static int append_oa_sample(struct i915_perf_stream *stream,
440static int gen7_append_oa_reports(struct i915_perf_stream *stream, 556static int gen7_append_oa_reports(struct i915_perf_stream *stream,
441 char __user *buf, 557 char __user *buf,
442 size_t count, 558 size_t count,
443 size_t *offset, 559 size_t *offset)
444 u32 *head_ptr,
445 u32 tail)
446{ 560{
447 struct drm_i915_private *dev_priv = stream->dev_priv; 561 struct drm_i915_private *dev_priv = stream->dev_priv;
448 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 562 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
449 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; 563 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
450 int tail_margin = dev_priv->perf.oa.tail_margin;
451 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 564 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
452 u32 mask = (OA_BUFFER_SIZE - 1); 565 u32 mask = (OA_BUFFER_SIZE - 1);
453 u32 head; 566 size_t start_offset = *offset;
567 unsigned long flags;
568 unsigned int aged_tail_idx;
569 u32 head, tail;
454 u32 taken; 570 u32 taken;
455 int ret = 0; 571 int ret = 0;
456 572
457 if (WARN_ON(!stream->enabled)) 573 if (WARN_ON(!stream->enabled))
458 return -EIO; 574 return -EIO;
459 575
460 head = *head_ptr - gtt_offset; 576 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
461 tail -= gtt_offset;
462 577
463 /* The OA unit is expected to wrap the tail pointer according to the OA 578 head = dev_priv->perf.oa.oa_buffer.head;
464 * buffer size and since we should never write a misaligned head 579 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
465 * pointer we don't expect to read one back either... 580 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
466 */
467 if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE ||
468 head % report_size) {
469 DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n",
470 head, tail);
471 dev_priv->perf.oa.ops.oa_disable(dev_priv);
472 dev_priv->perf.oa.ops.oa_enable(dev_priv);
473 *head_ptr = I915_READ(GEN7_OASTATUS2) &
474 GEN7_OASTATUS2_HEAD_MASK;
475 return -EIO;
476 }
477 581
582 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
478 583
479 /* The tail pointer increases in 64 byte increments, not in report_size 584 /* An invalid tail pointer here means we're still waiting for the poll
480 * steps... 585 * hrtimer callback to give us a pointer
481 */ 586 */
482 tail &= ~(report_size - 1); 587 if (tail == INVALID_TAIL_PTR)
588 return -EAGAIN;
483 589
484 /* Move the tail pointer back by the current tail_margin to account for 590 /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
485 * the possibility that the latest reports may not have really landed 591 * while indexing relative to oa_buf_base.
486 * in memory yet...
487 */ 592 */
593 head -= gtt_offset;
594 tail -= gtt_offset;
488 595
489 if (OA_TAKEN(tail, head) < report_size + tail_margin) 596 /* An out of bounds or misaligned head or tail pointer implies a driver
490 return -EAGAIN; 597 * bug since we validate + align the tail pointers we read from the
598 * hardware and we are in full control of the head pointer which should
599 * only be incremented by multiples of the report size (notably also
600 * all a power of two).
601 */
602 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
603 tail > OA_BUFFER_SIZE || tail % report_size,
604 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
605 head, tail))
606 return -EIO;
491 607
492 tail -= tail_margin;
493 tail &= mask;
494 608
495 for (/* none */; 609 for (/* none */;
496 (taken = OA_TAKEN(tail, head)); 610 (taken = OA_TAKEN(tail, head));
@@ -518,7 +632,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
518 * copying it to userspace... 632 * copying it to userspace...
519 */ 633 */
520 if (report32[0] == 0) { 634 if (report32[0] == 0) {
521 DRM_NOTE("Skipping spurious, invalid OA report\n"); 635 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
636 DRM_NOTE("Skipping spurious, invalid OA report\n");
522 continue; 637 continue;
523 } 638 }
524 639
@@ -535,7 +650,21 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
535 report32[0] = 0; 650 report32[0] = 0;
536 } 651 }
537 652
538 *head_ptr = gtt_offset + head; 653 if (start_offset != *offset) {
654 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
655
656 /* We removed the gtt_offset for the copy loop above, indexing
657 * relative to oa_buf_base so put back here...
658 */
659 head += gtt_offset;
660
661 I915_WRITE(GEN7_OASTATUS2,
662 ((head & GEN7_OASTATUS2_HEAD_MASK) |
663 OA_MEM_SELECT_GGTT));
664 dev_priv->perf.oa.oa_buffer.head = head;
665
666 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
667 }
539 668
540 return ret; 669 return ret;
541} 670}
@@ -562,22 +691,14 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
562 size_t *offset) 691 size_t *offset)
563{ 692{
564 struct drm_i915_private *dev_priv = stream->dev_priv; 693 struct drm_i915_private *dev_priv = stream->dev_priv;
565 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
566 u32 oastatus2;
567 u32 oastatus1; 694 u32 oastatus1;
568 u32 head;
569 u32 tail;
570 int ret; 695 int ret;
571 696
572 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) 697 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
573 return -EIO; 698 return -EIO;
574 699
575 oastatus2 = I915_READ(GEN7_OASTATUS2);
576 oastatus1 = I915_READ(GEN7_OASTATUS1); 700 oastatus1 = I915_READ(GEN7_OASTATUS1);
577 701
578 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
579 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
580
581 /* XXX: On Haswell we don't have a safe way to clear oastatus1 702 /* XXX: On Haswell we don't have a safe way to clear oastatus1
582 * bits while the OA unit is enabled (while the tail pointer 703 * bits while the OA unit is enabled (while the tail pointer
583 * may be updated asynchronously) so we ignore status bits 704 * may be updated asynchronously) so we ignore status bits
@@ -616,11 +737,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
616 dev_priv->perf.oa.ops.oa_disable(dev_priv); 737 dev_priv->perf.oa.ops.oa_disable(dev_priv);
617 dev_priv->perf.oa.ops.oa_enable(dev_priv); 738 dev_priv->perf.oa.ops.oa_enable(dev_priv);
618 739
619 oastatus2 = I915_READ(GEN7_OASTATUS2);
620 oastatus1 = I915_READ(GEN7_OASTATUS1); 740 oastatus1 = I915_READ(GEN7_OASTATUS1);
621
622 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
623 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
624 } 741 }
625 742
626 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { 743 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
@@ -632,29 +749,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
632 GEN7_OASTATUS1_REPORT_LOST; 749 GEN7_OASTATUS1_REPORT_LOST;
633 } 750 }
634 751
635 ret = gen7_append_oa_reports(stream, buf, count, offset, 752 return gen7_append_oa_reports(stream, buf, count, offset);
636 &head, tail);
637
638 /* All the report sizes are a power of two and the
639 * head should always be incremented by some multiple
640 * of the report size.
641 *
642 * A warning here, but notably if we later read back a
643 * misaligned pointer we will treat that as a bug since
644 * it could lead to a buffer overrun.
645 */
646 WARN_ONCE(head & (report_size - 1),
647 "i915: Writing misaligned OA head pointer");
648
649 /* Note: we update the head pointer here even if an error
650 * was returned since the error may represent a short read
651 * where some some reports were successfully copied.
652 */
653 I915_WRITE(GEN7_OASTATUS2,
654 ((head & GEN7_OASTATUS2_HEAD_MASK) |
655 OA_MEM_SELECT_GGTT));
656
657 return ret;
658} 753}
659 754
660/** 755/**
@@ -679,14 +774,8 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
679 if (!dev_priv->perf.oa.periodic) 774 if (!dev_priv->perf.oa.periodic)
680 return -EIO; 775 return -EIO;
681 776
682 /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
683 * just performs mmio reads of the OA buffer head + tail pointers and
684 * it's assumed we're handling some operation that implies the stream
685 * can't be destroyed until completion (such as a read()) that ensures
686 * the device + OA buffer can't disappear
687 */
688 return wait_event_interruptible(dev_priv->perf.oa.poll_wq, 777 return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
689 !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)); 778 dev_priv->perf.oa.ops.oa_buffer_check(dev_priv));
690} 779}
691 780
692/** 781/**
@@ -744,6 +833,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
744{ 833{
745 struct drm_i915_private *dev_priv = stream->dev_priv; 834 struct drm_i915_private *dev_priv = stream->dev_priv;
746 struct intel_engine_cs *engine = dev_priv->engine[RCS]; 835 struct intel_engine_cs *engine = dev_priv->engine[RCS];
836 struct intel_ring *ring;
747 int ret; 837 int ret;
748 838
749 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 839 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -755,9 +845,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
755 * 845 *
756 * NB: implied RCS engine... 846 * NB: implied RCS engine...
757 */ 847 */
758 ret = engine->context_pin(engine, stream->ctx); 848 ring = engine->context_pin(engine, stream->ctx);
759 if (ret) 849 mutex_unlock(&dev_priv->drm.struct_mutex);
760 goto unlock; 850 if (IS_ERR(ring))
851 return PTR_ERR(ring);
761 852
762 /* Explicitly track the ID (instead of calling i915_ggtt_offset() 853 /* Explicitly track the ID (instead of calling i915_ggtt_offset()
763 * on the fly) considering the difference with gen8+ and 854 * on the fly) considering the difference with gen8+ and
@@ -766,10 +857,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
766 dev_priv->perf.oa.specific_ctx_id = 857 dev_priv->perf.oa.specific_ctx_id =
767 i915_ggtt_offset(stream->ctx->engine[engine->id].state); 858 i915_ggtt_offset(stream->ctx->engine[engine->id].state);
768 859
769unlock: 860 return 0;
770 mutex_unlock(&dev_priv->drm.struct_mutex);
771
772 return ret;
773} 861}
774 862
775/** 863/**
@@ -824,19 +912,36 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
824 oa_put_render_ctx_id(stream); 912 oa_put_render_ctx_id(stream);
825 913
826 dev_priv->perf.oa.exclusive_stream = NULL; 914 dev_priv->perf.oa.exclusive_stream = NULL;
915
916 if (dev_priv->perf.oa.spurious_report_rs.missed) {
917 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
918 dev_priv->perf.oa.spurious_report_rs.missed);
919 }
827} 920}
828 921
829static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) 922static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
830{ 923{
831 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 924 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
925 unsigned long flags;
926
927 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
832 928
833 /* Pre-DevBDW: OABUFFER must be set with counters off, 929 /* Pre-DevBDW: OABUFFER must be set with counters off,
834 * before OASTATUS1, but after OASTATUS2 930 * before OASTATUS1, but after OASTATUS2
835 */ 931 */
836 I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */ 932 I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
933 dev_priv->perf.oa.oa_buffer.head = gtt_offset;
934
837 I915_WRITE(GEN7_OABUFFER, gtt_offset); 935 I915_WRITE(GEN7_OABUFFER, gtt_offset);
936
838 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ 937 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
839 938
939 /* Mark that we need updated tail pointers to read from... */
940 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
941 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
942
943 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
944
840 /* On Haswell we have to track which OASTATUS1 flags we've 945 /* On Haswell we have to track which OASTATUS1 flags we've
841 * already seen since they can't be cleared while periodic 946 * already seen since they can't be cleared while periodic
842 * sampling is enabled. 947 * sampling is enabled.
@@ -1094,12 +1199,6 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
1094 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); 1199 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
1095} 1200}
1096 1201
1097static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
1098{
1099 return div_u64(1000000000ULL * (2ULL << exponent),
1100 dev_priv->perf.oa.timestamp_frequency);
1101}
1102
1103static const struct i915_perf_stream_ops i915_oa_stream_ops = { 1202static const struct i915_perf_stream_ops i915_oa_stream_ops = {
1104 .destroy = i915_oa_stream_destroy, 1203 .destroy = i915_oa_stream_destroy,
1105 .enable = i915_oa_stream_enable, 1204 .enable = i915_oa_stream_enable,
@@ -1173,6 +1272,26 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
1173 return -EINVAL; 1272 return -EINVAL;
1174 } 1273 }
1175 1274
1275 /* We set up some ratelimit state to potentially throttle any _NOTES
1276 * about spurious, invalid OA reports which we don't forward to
1277 * userspace.
1278 *
1279 * The initialization is associated with opening the stream (not driver
1280 * init) considering we print a _NOTE about any throttling when closing
1281 * the stream instead of waiting until driver _fini which no one would
1282 * ever see.
1283 *
1284 * Using the same limiting factors as printk_ratelimit()
1285 */
1286 ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
1287 5 * HZ, 10);
1288 /* Since we use a DRM_NOTE for spurious reports it would be
1289 * inconsistent to let __ratelimit() automatically print a warning for
1290 * throttling.
1291 */
1292 ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
1293 RATELIMIT_MSG_ON_RELEASE);
1294
1176 stream->sample_size = sizeof(struct drm_i915_perf_record_header); 1295 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
1177 1296
1178 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; 1297 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
@@ -1190,20 +1309,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
1190 dev_priv->perf.oa.metrics_set = props->metrics_set; 1309 dev_priv->perf.oa.metrics_set = props->metrics_set;
1191 1310
1192 dev_priv->perf.oa.periodic = props->oa_periodic; 1311 dev_priv->perf.oa.periodic = props->oa_periodic;
1193 if (dev_priv->perf.oa.periodic) { 1312 if (dev_priv->perf.oa.periodic)
1194 u32 tail;
1195
1196 dev_priv->perf.oa.period_exponent = props->oa_period_exponent; 1313 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
1197 1314
1198 /* See comment for OA_TAIL_MARGIN_NSEC for details
1199 * about this tail_margin...
1200 */
1201 tail = div64_u64(OA_TAIL_MARGIN_NSEC,
1202 oa_exponent_to_ns(dev_priv,
1203 props->oa_period_exponent));
1204 dev_priv->perf.oa.tail_margin = (tail + 1) * format_size;
1205 }
1206
1207 if (stream->ctx) { 1315 if (stream->ctx) {
1208 ret = oa_get_render_ctx_id(stream); 1316 ret = oa_get_render_ctx_id(stream);
1209 if (ret) 1317 if (ret)
@@ -1352,7 +1460,15 @@ static ssize_t i915_perf_read(struct file *file,
1352 mutex_unlock(&dev_priv->perf.lock); 1460 mutex_unlock(&dev_priv->perf.lock);
1353 } 1461 }
1354 1462
1355 if (ret >= 0) { 1463 /* We allow the poll checking to sometimes report false positive POLLIN
1464 * events where we might actually report EAGAIN on read() if there's
1465 * not really any data available. In this situation though we don't
1466 * want to enter a busy loop between poll() reporting a POLLIN event
1467 * and read() returning -EAGAIN. Clearing the oa.pollin state here
1468 * effectively ensures we back off until the next hrtimer callback
1469 * before reporting another POLLIN event.
1470 */
1471 if (ret >= 0 || ret == -EAGAIN) {
1356 /* Maybe make ->pollin per-stream state if we support multiple 1472 /* Maybe make ->pollin per-stream state if we support multiple
1357 * concurrent streams in the future. 1473 * concurrent streams in the future.
1358 */ 1474 */
@@ -1368,7 +1484,7 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
1368 container_of(hrtimer, typeof(*dev_priv), 1484 container_of(hrtimer, typeof(*dev_priv),
1369 perf.oa.poll_check_timer); 1485 perf.oa.poll_check_timer);
1370 1486
1371 if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) { 1487 if (dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)) {
1372 dev_priv->perf.oa.pollin = true; 1488 dev_priv->perf.oa.pollin = true;
1373 wake_up(&dev_priv->perf.oa.poll_wq); 1489 wake_up(&dev_priv->perf.oa.poll_wq);
1374 } 1490 }
@@ -1817,11 +1933,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
1817 break; 1933 break;
1818 case DRM_I915_PERF_PROP_OA_FORMAT: 1934 case DRM_I915_PERF_PROP_OA_FORMAT:
1819 if (value == 0 || value >= I915_OA_FORMAT_MAX) { 1935 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
1820 DRM_DEBUG("Invalid OA report format\n"); 1936 DRM_DEBUG("Out-of-range OA report format %llu\n",
1937 value);
1821 return -EINVAL; 1938 return -EINVAL;
1822 } 1939 }
1823 if (!dev_priv->perf.oa.oa_formats[value].size) { 1940 if (!dev_priv->perf.oa.oa_formats[value].size) {
1824 DRM_DEBUG("Invalid OA report format\n"); 1941 DRM_DEBUG("Unsupported OA report format %llu\n",
1942 value);
1825 return -EINVAL; 1943 return -EINVAL;
1826 } 1944 }
1827 props->oa_format = value; 1945 props->oa_format = value;
@@ -2063,6 +2181,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
2063 INIT_LIST_HEAD(&dev_priv->perf.streams); 2181 INIT_LIST_HEAD(&dev_priv->perf.streams);
2064 mutex_init(&dev_priv->perf.lock); 2182 mutex_init(&dev_priv->perf.lock);
2065 spin_lock_init(&dev_priv->perf.hook_lock); 2183 spin_lock_init(&dev_priv->perf.hook_lock);
2184 spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
2066 2185
2067 dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; 2186 dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
2068 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; 2187 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
@@ -2070,10 +2189,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
2070 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; 2189 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
2071 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; 2190 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
2072 dev_priv->perf.oa.ops.read = gen7_oa_read; 2191 dev_priv->perf.oa.ops.read = gen7_oa_read;
2073 dev_priv->perf.oa.ops.oa_buffer_is_empty = 2192 dev_priv->perf.oa.ops.oa_buffer_check =
2074 gen7_oa_buffer_is_empty_fop_unlocked; 2193 gen7_oa_buffer_check_unlocked;
2075
2076 dev_priv->perf.oa.timestamp_frequency = 12500000;
2077 2194
2078 dev_priv->perf.oa.oa_formats = hsw_oa_formats; 2195 dev_priv->perf.oa.oa_formats = hsw_oa_formats;
2079 2196
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 11b12f412492..89888adb9af1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -85,6 +85,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
85#define VECS_HW 3 85#define VECS_HW 3
86#define VCS2_HW 4 86#define VCS2_HW 4
87 87
88/* Engine class */
89
90#define RENDER_CLASS 0
91#define VIDEO_DECODE_CLASS 1
92#define VIDEO_ENHANCEMENT_CLASS 2
93#define COPY_ENGINE_CLASS 3
94#define OTHER_CLASS 4
95
88/* PCI config space */ 96/* PCI config space */
89 97
90#define MCHBAR_I915 0x44 98#define MCHBAR_I915 0x44
@@ -3051,10 +3059,14 @@ enum skl_disp_power_wells {
3051#define CLKCFG_FSB_667 (3 << 0) /* hrawclk 166 */ 3059#define CLKCFG_FSB_667 (3 << 0) /* hrawclk 166 */
3052#define CLKCFG_FSB_800 (2 << 0) /* hrawclk 200 */ 3060#define CLKCFG_FSB_800 (2 << 0) /* hrawclk 200 */
3053#define CLKCFG_FSB_1067 (6 << 0) /* hrawclk 266 */ 3061#define CLKCFG_FSB_1067 (6 << 0) /* hrawclk 266 */
3062#define CLKCFG_FSB_1067_ALT (0 << 0) /* hrawclk 266 */
3054#define CLKCFG_FSB_1333 (7 << 0) /* hrawclk 333 */ 3063#define CLKCFG_FSB_1333 (7 << 0) /* hrawclk 333 */
3055/* Note, below two are guess */ 3064/*
3056#define CLKCFG_FSB_1600 (4 << 0) /* hrawclk 400 */ 3065 * Note that on at least on ELK the below value is reported for both
3057#define CLKCFG_FSB_1600_ALT (0 << 0) /* hrawclk 400 */ 3066 * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet
3067 * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz.
3068 */
3069#define CLKCFG_FSB_1333_ALT (4 << 0) /* hrawclk 333 */
3058#define CLKCFG_FSB_MASK (7 << 0) 3070#define CLKCFG_FSB_MASK (7 << 0)
3059#define CLKCFG_MEM_533 (1 << 4) 3071#define CLKCFG_MEM_533 (1 << 4)
3060#define CLKCFG_MEM_667 (2 << 4) 3072#define CLKCFG_MEM_667 (2 << 4)
@@ -3362,16 +3374,6 @@ enum skl_disp_power_wells {
3362#define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f) 3374#define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f)
3363#define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ 3375#define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
3364 GEN7_CXT_VFSTATE_SIZE(ctx_reg)) 3376 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
3365/* Haswell does have the CXT_SIZE register however it does not appear to be
3366 * valid. Now, docs explain in dwords what is in the context object. The full
3367 * size is 70720 bytes, however, the power context and execlist context will
3368 * never be saved (power context is stored elsewhere, and execlists don't work
3369 * on HSW) - so the final size, including the extra state required for the
3370 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
3371 */
3372#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
3373/* Same as Haswell, but 72064 bytes now. */
3374#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE)
3375 3377
3376enum { 3378enum {
3377 INTEL_ADVANCED_CONTEXT = 0, 3379 INTEL_ADVANCED_CONTEXT = 0,
@@ -5437,9 +5439,7 @@ enum {
5437#define CURSOR_MODE_128_ARGB_AX ((1 << 5) | CURSOR_MODE_128_32B_AX) 5439#define CURSOR_MODE_128_ARGB_AX ((1 << 5) | CURSOR_MODE_128_32B_AX)
5438#define CURSOR_MODE_256_ARGB_AX ((1 << 5) | CURSOR_MODE_256_32B_AX) 5440#define CURSOR_MODE_256_ARGB_AX ((1 << 5) | CURSOR_MODE_256_32B_AX)
5439#define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX) 5441#define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX)
5440#define MCURSOR_PIPE_SELECT (1 << 28) 5442#define MCURSOR_PIPE_SELECT(pipe) ((pipe) << 28)
5441#define MCURSOR_PIPE_A 0x00
5442#define MCURSOR_PIPE_B (1 << 28)
5443#define MCURSOR_GAMMA_ENABLE (1 << 26) 5443#define MCURSOR_GAMMA_ENABLE (1 << 26)
5444#define CURSOR_ROTATE_180 (1<<15) 5444#define CURSOR_ROTATE_180 (1<<15)
5445#define CURSOR_TRICKLE_FEED_DISABLE (1 << 14) 5445#define CURSOR_TRICKLE_FEED_DISABLE (1 << 14)
@@ -5449,7 +5449,9 @@ enum {
5449#define CURSOR_POS_SIGN 0x8000 5449#define CURSOR_POS_SIGN 0x8000
5450#define CURSOR_X_SHIFT 0 5450#define CURSOR_X_SHIFT 0
5451#define CURSOR_Y_SHIFT 16 5451#define CURSOR_Y_SHIFT 16
5452#define CURSIZE _MMIO(0x700a0) 5452#define CURSIZE _MMIO(0x700a0) /* 845/865 */
5453#define _CUR_FBC_CTL_A 0x700a0 /* ivb+ */
5454#define CUR_FBC_CTL_EN (1 << 31)
5453#define _CURBCNTR 0x700c0 5455#define _CURBCNTR 0x700c0
5454#define _CURBBASE 0x700c4 5456#define _CURBBASE 0x700c4
5455#define _CURBPOS 0x700c8 5457#define _CURBPOS 0x700c8
@@ -5465,6 +5467,7 @@ enum {
5465#define CURCNTR(pipe) _CURSOR2(pipe, _CURACNTR) 5467#define CURCNTR(pipe) _CURSOR2(pipe, _CURACNTR)
5466#define CURBASE(pipe) _CURSOR2(pipe, _CURABASE) 5468#define CURBASE(pipe) _CURSOR2(pipe, _CURABASE)
5467#define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS) 5469#define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS)
5470#define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A)
5468 5471
5469#define CURSOR_A_OFFSET 0x70080 5472#define CURSOR_A_OFFSET 0x70080
5470#define CURSOR_B_OFFSET 0x700c0 5473#define CURSOR_B_OFFSET 0x700c0
@@ -5497,8 +5500,7 @@ enum {
5497#define DISPPLANE_PIPE_CSC_ENABLE (1<<24) 5500#define DISPPLANE_PIPE_CSC_ENABLE (1<<24)
5498#define DISPPLANE_SEL_PIPE_SHIFT 24 5501#define DISPPLANE_SEL_PIPE_SHIFT 24
5499#define DISPPLANE_SEL_PIPE_MASK (3<<DISPPLANE_SEL_PIPE_SHIFT) 5502#define DISPPLANE_SEL_PIPE_MASK (3<<DISPPLANE_SEL_PIPE_SHIFT)
5500#define DISPPLANE_SEL_PIPE_A 0 5503#define DISPPLANE_SEL_PIPE(pipe) ((pipe)<<DISPPLANE_SEL_PIPE_SHIFT)
5501#define DISPPLANE_SEL_PIPE_B (1<<DISPPLANE_SEL_PIPE_SHIFT)
5502#define DISPPLANE_SRC_KEY_ENABLE (1<<22) 5504#define DISPPLANE_SRC_KEY_ENABLE (1<<22)
5503#define DISPPLANE_SRC_KEY_DISABLE 0 5505#define DISPPLANE_SRC_KEY_DISABLE 0
5504#define DISPPLANE_LINE_DOUBLE (1<<20) 5506#define DISPPLANE_LINE_DOUBLE (1<<20)
@@ -8276,7 +8278,7 @@ enum {
8276 8278
8277/* MIPI DSI registers */ 8279/* MIPI DSI registers */
8278 8280
8279#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ 8281#define _MIPI_PORT(port, a, c) (((port) == PORT_A) ? a : c) /* ports A and C only */
8280#define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) 8282#define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c))
8281 8283
8282#define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) 8284#define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a277f8eb7beb..474d23c0c0ce 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,7 @@
12#include <linux/reservation.h> 12#include <linux/reservation.h>
13 13
14#include "i915_sw_fence.h" 14#include "i915_sw_fence.h"
15#include "i915_selftest.h"
15 16
16#define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */ 17#define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
17 18
@@ -120,34 +121,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
120} 121}
121#endif 122#endif
122 123
123static void i915_sw_fence_release(struct kref *kref)
124{
125 struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
126
127 WARN_ON(atomic_read(&fence->pending) > 0);
128 debug_fence_destroy(fence);
129
130 if (fence->flags & I915_SW_FENCE_MASK) {
131 __i915_sw_fence_notify(fence, FENCE_FREE);
132 } else {
133 i915_sw_fence_fini(fence);
134 kfree(fence);
135 }
136}
137
138static void i915_sw_fence_put(struct i915_sw_fence *fence)
139{
140 debug_fence_assert(fence);
141 kref_put(&fence->kref, i915_sw_fence_release);
142}
143
144static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
145{
146 debug_fence_assert(fence);
147 kref_get(&fence->kref);
148 return fence;
149}
150
151static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, 124static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
152 struct list_head *continuation) 125 struct list_head *continuation)
153{ 126{
@@ -202,13 +175,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
202 175
203 debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY); 176 debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);
204 177
205 if (fence->flags & I915_SW_FENCE_MASK && 178 if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
206 __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
207 return; 179 return;
208 180
209 debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE); 181 debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);
210 182
211 __i915_sw_fence_wake_up_all(fence, continuation); 183 __i915_sw_fence_wake_up_all(fence, continuation);
184
185 debug_fence_destroy(fence);
186 __i915_sw_fence_notify(fence, FENCE_FREE);
212} 187}
213 188
214static void i915_sw_fence_complete(struct i915_sw_fence *fence) 189static void i915_sw_fence_complete(struct i915_sw_fence *fence)
@@ -232,33 +207,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
232 const char *name, 207 const char *name,
233 struct lock_class_key *key) 208 struct lock_class_key *key)
234{ 209{
235 BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK); 210 BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
236 211
237 debug_fence_init(fence); 212 debug_fence_init(fence);
238 213
239 __init_waitqueue_head(&fence->wait, name, key); 214 __init_waitqueue_head(&fence->wait, name, key);
240 kref_init(&fence->kref);
241 atomic_set(&fence->pending, 1); 215 atomic_set(&fence->pending, 1);
242 fence->flags = (unsigned long)fn; 216 fence->flags = (unsigned long)fn;
243} 217}
244 218
245static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
246{
247 i915_sw_fence_complete(fence);
248 i915_sw_fence_put(fence);
249}
250
251void i915_sw_fence_commit(struct i915_sw_fence *fence) 219void i915_sw_fence_commit(struct i915_sw_fence *fence)
252{ 220{
253 debug_fence_activate(fence); 221 debug_fence_activate(fence);
254 __i915_sw_fence_commit(fence); 222 i915_sw_fence_complete(fence);
255} 223}
256 224
257static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key) 225static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
258{ 226{
259 list_del(&wq->task_list); 227 list_del(&wq->task_list);
260 __i915_sw_fence_complete(wq->private, key); 228 __i915_sw_fence_complete(wq->private, key);
261 i915_sw_fence_put(wq->private); 229
262 if (wq->flags & I915_SW_FENCE_FLAG_ALLOC) 230 if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
263 kfree(wq); 231 kfree(wq);
264 return 0; 232 return 0;
@@ -307,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
307 unsigned long flags; 275 unsigned long flags;
308 bool err; 276 bool err;
309 277
310 if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG)) 278 if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
311 return false; 279 return false;
312 280
313 spin_lock_irqsave(&i915_sw_fence_lock, flags); 281 spin_lock_irqsave(&i915_sw_fence_lock, flags);
@@ -353,7 +321,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
353 INIT_LIST_HEAD(&wq->task_list); 321 INIT_LIST_HEAD(&wq->task_list);
354 wq->flags = pending; 322 wq->flags = pending;
355 wq->func = i915_sw_fence_wake; 323 wq->func = i915_sw_fence_wake;
356 wq->private = i915_sw_fence_get(fence); 324 wq->private = fence;
357 325
358 i915_sw_fence_await(fence); 326 i915_sw_fence_await(fence);
359 327
@@ -402,7 +370,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
402 dma_fence_put(cb->dma); 370 dma_fence_put(cb->dma);
403 cb->dma = NULL; 371 cb->dma = NULL;
404 372
405 __i915_sw_fence_commit(cb->fence); 373 i915_sw_fence_complete(cb->fence);
406 cb->timer.function = NULL; 374 cb->timer.function = NULL;
407} 375}
408 376
@@ -413,7 +381,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
413 381
414 del_timer_sync(&cb->timer); 382 del_timer_sync(&cb->timer);
415 if (cb->timer.function) 383 if (cb->timer.function)
416 __i915_sw_fence_commit(cb->fence); 384 i915_sw_fence_complete(cb->fence);
417 dma_fence_put(cb->dma); 385 dma_fence_put(cb->dma);
418 386
419 kfree(cb); 387 kfree(cb);
@@ -440,7 +408,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
440 return dma_fence_wait(dma, false); 408 return dma_fence_wait(dma, false);
441 } 409 }
442 410
443 cb->fence = i915_sw_fence_get(fence); 411 cb->fence = fence;
444 i915_sw_fence_await(fence); 412 i915_sw_fence_await(fence);
445 413
446 cb->dma = NULL; 414 cb->dma = NULL;
@@ -523,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
523 491
524 return ret; 492 return ret;
525} 493}
494
495#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
496#include "selftests/i915_sw_fence.c"
497#endif
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index d31cefbbcc04..1d3b6051daaf 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -23,7 +23,6 @@ struct reservation_object;
23struct i915_sw_fence { 23struct i915_sw_fence {
24 wait_queue_head_t wait; 24 wait_queue_head_t wait;
25 unsigned long flags; 25 unsigned long flags;
26 struct kref kref;
27 atomic_t pending; 26 atomic_t pending;
28}; 27};
29 28
diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c
new file mode 100644
index 000000000000..0087acf731a8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.c
@@ -0,0 +1,412 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/slab.h>
26
27#include "i915_syncmap.h"
28
29#include "i915_gem.h" /* GEM_BUG_ON() */
30#include "i915_selftest.h"
31
32#define SHIFT ilog2(KSYNCMAP)
33#define MASK (KSYNCMAP - 1)
34
35/*
36 * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
37 * context id to the last u32 fence seqno waited upon from that context.
38 * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
39 * the root. This allows us to access the whole tree via a single pointer
40 * to the most recently used layer. We expect fence contexts to be dense
41 * and most reuse to be on the same i915_gem_context but on neighbouring
42 * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
43 * effective lookup cache. If the new lookup is not on the same leaf, we
44 * expect it to be on the neighbouring branch.
45 *
46 * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
47 * allows us to store whether a particular seqno is valid (i.e. allows us
48 * to distinguish unset from 0).
49 *
50 * A branch holds an array of layer pointers, and has height > 0, and always
51 * has at least 2 layers (either branches or leaves) below it.
52 *
53 * For example,
54 * for x in
55 * 0 1 2 0x10 0x11 0x200 0x201
56 * 0x500000 0x500001 0x503000 0x503001
57 * 0xE<<60:
58 * i915_syncmap_set(&sync, x, lower_32_bits(x));
59 * will build a tree like:
60 * 0xXXXXXXXXXXXXXXXX
61 * 0-> 0x0000000000XXXXXX
62 * | 0-> 0x0000000000000XXX
63 * | | 0-> 0x00000000000000XX
64 * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2
65 * | | | 1-> 0x000000000000001X 0:10, 1:11
66 * | | 2-> 0x000000000000020X 0:200, 1:201
67 * | 5-> 0x000000000050XXXX
68 * | 0-> 0x000000000050000X 0:500000, 1:500001
69 * | 3-> 0x000000000050300X 0:503000, 1:503001
70 * e-> 0xe00000000000000X e:e
71 */
72
73struct i915_syncmap {
74 u64 prefix;
75 unsigned int height;
76 unsigned int bitmap;
77 struct i915_syncmap *parent;
78 /*
79 * Following this header is an array of either seqno or child pointers:
80 * union {
81 * u32 seqno[KSYNCMAP];
82 * struct i915_syncmap *child[KSYNCMAP];
83 * };
84 */
85};
86
87/**
88 * i915_syncmap_init -- initialise the #i915_syncmap
89 * @root - pointer to the #i915_syncmap
90 */
91void i915_syncmap_init(struct i915_syncmap **root)
92{
93 BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
94 BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
95 BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap));
96 *root = NULL;
97}
98
99static inline u32 *__sync_seqno(struct i915_syncmap *p)
100{
101 GEM_BUG_ON(p->height);
102 return (u32 *)(p + 1);
103}
104
105static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
106{
107 GEM_BUG_ON(!p->height);
108 return (struct i915_syncmap **)(p + 1);
109}
110
111static inline unsigned int
112__sync_branch_idx(const struct i915_syncmap *p, u64 id)
113{
114 return (id >> p->height) & MASK;
115}
116
117static inline unsigned int
118__sync_leaf_idx(const struct i915_syncmap *p, u64 id)
119{
120 GEM_BUG_ON(p->height);
121 return id & MASK;
122}
123
124static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
125{
126 return id >> p->height >> SHIFT;
127}
128
129static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
130{
131 GEM_BUG_ON(p->height);
132 return id >> SHIFT;
133}
134
135static inline bool seqno_later(u32 a, u32 b)
136{
137 return (s32)(a - b) >= 0;
138}
139
140/**
141 * i915_syncmap_is_later -- compare against the last know sync point
142 * @root - pointer to the #i915_syncmap
143 * @id - the context id (other timeline) we are synchronising to
144 * @seqno - the sequence number along the other timeline
145 *
146 * If we have already synchronised this @root timeline with another (@id) then
147 * we can omit any repeated or earlier synchronisation requests. If the two
148 * timelines are already coupled, we can also omit the dependency between the
149 * two as that is already known via the timeline.
150 *
151 * Returns true if the two timelines are already synchronised wrt to @seqno,
152 * false if not and the synchronisation must be emitted.
153 */
154bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
155{
156 struct i915_syncmap *p;
157 unsigned int idx;
158
159 p = *root;
160 if (!p)
161 return false;
162
163 if (likely(__sync_leaf_prefix(p, id) == p->prefix))
164 goto found;
165
166 /* First climb the tree back to a parent branch */
167 do {
168 p = p->parent;
169 if (!p)
170 return false;
171
172 if (__sync_branch_prefix(p, id) == p->prefix)
173 break;
174 } while (1);
175
176 /* And then descend again until we find our leaf */
177 do {
178 if (!p->height)
179 break;
180
181 p = __sync_child(p)[__sync_branch_idx(p, id)];
182 if (!p)
183 return false;
184
185 if (__sync_branch_prefix(p, id) != p->prefix)
186 return false;
187 } while (1);
188
189 *root = p;
190found:
191 idx = __sync_leaf_idx(p, id);
192 if (!(p->bitmap & BIT(idx)))
193 return false;
194
195 return seqno_later(__sync_seqno(p)[idx], seqno);
196}
197
198static struct i915_syncmap *
199__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
200{
201 struct i915_syncmap *p;
202
203 p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
204 if (unlikely(!p))
205 return NULL;
206
207 p->parent = parent;
208 p->height = 0;
209 p->bitmap = 0;
210 p->prefix = __sync_leaf_prefix(p, id);
211 return p;
212}
213
214static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
215{
216 unsigned int idx = __sync_leaf_idx(p, id);
217
218 p->bitmap |= BIT(idx);
219 __sync_seqno(p)[idx] = seqno;
220}
221
222static inline void __sync_set_child(struct i915_syncmap *p,
223 unsigned int idx,
224 struct i915_syncmap *child)
225{
226 p->bitmap |= BIT(idx);
227 __sync_child(p)[idx] = child;
228}
229
230static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
231{
232 struct i915_syncmap *p = *root;
233 unsigned int idx;
234
235 if (!p) {
236 p = __sync_alloc_leaf(NULL, id);
237 if (unlikely(!p))
238 return -ENOMEM;
239
240 goto found;
241 }
242
243 /* Caller handled the likely cached case */
244 GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
245
246 /* Climb back up the tree until we find a common prefix */
247 do {
248 if (!p->parent)
249 break;
250
251 p = p->parent;
252
253 if (__sync_branch_prefix(p, id) == p->prefix)
254 break;
255 } while (1);
256
257 /*
258 * No shortcut, we have to descend the tree to find the right layer
259 * containing this fence.
260 *
261 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
262 * or lower layers. Leaf nodes (height = 0) contain the fences, all
263 * other nodes (height > 0) are internal layers that point to a lower
264 * node. Each internal layer has at least 2 descendents.
265 *
266 * Starting at the top, we check whether the current prefix matches. If
267 * it doesn't, we have gone past our target and need to insert a join
268 * into the tree, and a new leaf node for the target as a descendent
269 * of the join, as well as the original layer.
270 *
271 * The matching prefix means we are still following the right branch
272 * of the tree. If it has height 0, we have found our leaf and just
273 * need to replace the fence slot with ourselves. If the height is
274 * not zero, our slot contains the next layer in the tree (unless
275 * it is empty, in which case we can add ourselves as a new leaf).
276 * As descend the tree the prefix grows (and height decreases).
277 */
278 do {
279 struct i915_syncmap *next;
280
281 if (__sync_branch_prefix(p, id) != p->prefix) {
282 unsigned int above;
283
284 /* Insert a join above the current layer */
285 next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
286 GFP_KERNEL);
287 if (unlikely(!next))
288 return -ENOMEM;
289
290 /* Compute the height at which these two diverge */
291 above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
292 above = round_up(above, SHIFT);
293 next->height = above + p->height;
294 next->prefix = __sync_branch_prefix(next, id);
295
296 /* Insert the join into the parent */
297 if (p->parent) {
298 idx = __sync_branch_idx(p->parent, id);
299 __sync_child(p->parent)[idx] = next;
300 GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
301 }
302 next->parent = p->parent;
303
304 /* Compute the idx of the other branch, not our id! */
305 idx = p->prefix >> (above - SHIFT) & MASK;
306 __sync_set_child(next, idx, p);
307 p->parent = next;
308
309 /* Ascend to the join */
310 p = next;
311 } else {
312 if (!p->height)
313 break;
314 }
315
316 /* Descend into the next layer */
317 GEM_BUG_ON(!p->height);
318 idx = __sync_branch_idx(p, id);
319 next = __sync_child(p)[idx];
320 if (!next) {
321 next = __sync_alloc_leaf(p, id);
322 if (unlikely(!next))
323 return -ENOMEM;
324
325 __sync_set_child(p, idx, next);
326 p = next;
327 break;
328 }
329
330 p = next;
331 } while (1);
332
333found:
334 GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
335 __sync_set_seqno(p, id, seqno);
336 *root = p;
337 return 0;
338}
339
340/**
341 * i915_syncmap_set -- mark the most recent syncpoint between contexts
342 * @root - pointer to the #i915_syncmap
343 * @id - the context id (other timeline) we have synchronised to
344 * @seqno - the sequence number along the other timeline
345 *
346 * When we synchronise this @root timeline with another (@id), we also know
347 * that we have synchronized with all previous seqno along that timeline. If
348 * we then have a request to synchronise with the same seqno or older, we can
349 * omit it, see i915_syncmap_is_later()
350 *
351 * Returns 0 on success, or a negative error code.
352 */
353int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
354{
355 struct i915_syncmap *p = *root;
356
357 /*
358 * We expect to be called in sequence following is_later(id), which
359 * should have preloaded the root for us.
360 */
361 if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
362 __sync_set_seqno(p, id, seqno);
363 return 0;
364 }
365
366 return __sync_set(root, id, seqno);
367}
368
369static void __sync_free(struct i915_syncmap *p)
370{
371 if (p->height) {
372 unsigned int i;
373
374 while ((i = ffs(p->bitmap))) {
375 p->bitmap &= ~0u << i;
376 __sync_free(__sync_child(p)[i - 1]);
377 }
378 }
379
380 kfree(p);
381}
382
383/**
384 * i915_syncmap_free -- free all memory associated with the syncmap
385 * @root - pointer to the #i915_syncmap
386 *
387 * Either when the timeline is to be freed and we no longer need the sync
388 * point tracking, or when the fences are all known to be signaled and the
389 * sync point tracking is redundant, we can free the #i915_syncmap to recover
390 * its allocations.
391 *
392 * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
393 * reuse.
394 */
395void i915_syncmap_free(struct i915_syncmap **root)
396{
397 struct i915_syncmap *p;
398
399 p = *root;
400 if (!p)
401 return;
402
403 while (p->parent)
404 p = p->parent;
405
406 __sync_free(p);
407 *root = NULL;
408}
409
410#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
411#include "selftests/i915_syncmap.c"
412#endif
diff --git a/drivers/gpu/drm/i915/i915_syncmap.h b/drivers/gpu/drm/i915/i915_syncmap.h
new file mode 100644
index 000000000000..0653f70bee82
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.h
@@ -0,0 +1,38 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef __I915_SYNCMAP_H__
26#define __I915_SYNCMAP_H__
27
28#include <linux/types.h>
29
30struct i915_syncmap;
31#define KSYNCMAP 16 /* radix of the tree, how many slots in each layer */
32
33void i915_syncmap_init(struct i915_syncmap **root);
34int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno);
35bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno);
36void i915_syncmap_free(struct i915_syncmap **root);
37
38#endif /* __I915_SYNCMAP_H__ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index f3fdfda5e558..1eef3fae4db3 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -181,13 +181,10 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
181 struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); 181 struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
182 struct drm_device *dev = &dev_priv->drm; 182 struct drm_device *dev = &dev_priv->drm;
183 struct i915_gem_context *ctx; 183 struct i915_gem_context *ctx;
184 u32 *temp = NULL; /* Just here to make handling failures easy */
185 int slice = (int)(uintptr_t)attr->private; 184 int slice = (int)(uintptr_t)attr->private;
185 u32 **remap_info;
186 int ret; 186 int ret;
187 187
188 if (!HAS_HW_CONTEXTS(dev_priv))
189 return -ENXIO;
190
191 ret = l3_access_valid(dev_priv, offset); 188 ret = l3_access_valid(dev_priv, offset);
192 if (ret) 189 if (ret)
193 return ret; 190 return ret;
@@ -196,11 +193,12 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
196 if (ret) 193 if (ret)
197 return ret; 194 return ret;
198 195
199 if (!dev_priv->l3_parity.remap_info[slice]) { 196 remap_info = &dev_priv->l3_parity.remap_info[slice];
200 temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); 197 if (!*remap_info) {
201 if (!temp) { 198 *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
202 mutex_unlock(&dev->struct_mutex); 199 if (!*remap_info) {
203 return -ENOMEM; 200 ret = -ENOMEM;
201 goto out;
204 } 202 }
205 } 203 }
206 204
@@ -208,18 +206,18 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
208 * aren't propagated. Since I cannot find a stable way to reset the GPU 206 * aren't propagated. Since I cannot find a stable way to reset the GPU
209 * at this point it is left as a TODO. 207 * at this point it is left as a TODO.
210 */ 208 */
211 if (temp) 209 memcpy(*remap_info + (offset/4), buf, count);
212 dev_priv->l3_parity.remap_info[slice] = temp;
213
214 memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
215 210
216 /* NB: We defer the remapping until we switch to the context */ 211 /* NB: We defer the remapping until we switch to the context */
217 list_for_each_entry(ctx, &dev_priv->context_list, link) 212 list_for_each_entry(ctx, &dev_priv->context_list, link)
218 ctx->remap_slice |= (1<<slice); 213 ctx->remap_slice |= (1<<slice);
219 214
215 ret = count;
216
217out:
220 mutex_unlock(&dev->struct_mutex); 218 mutex_unlock(&dev->struct_mutex);
221 219
222 return count; 220 return ret;
223} 221}
224 222
225static struct bin_attribute dpf_attrs = { 223static struct bin_attribute dpf_attrs = {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 66404c5aee82..b24a83d43559 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -89,6 +89,55 @@ TRACE_EVENT(intel_memory_cxsr,
89 __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) 89 __entry->frame[PIPE_C], __entry->scanline[PIPE_C])
90); 90);
91 91
92TRACE_EVENT(g4x_wm,
93 TP_PROTO(struct intel_crtc *crtc, const struct g4x_wm_values *wm),
94 TP_ARGS(crtc, wm),
95
96 TP_STRUCT__entry(
97 __field(enum pipe, pipe)
98 __field(u32, frame)
99 __field(u32, scanline)
100 __field(u16, primary)
101 __field(u16, sprite)
102 __field(u16, cursor)
103 __field(u16, sr_plane)
104 __field(u16, sr_cursor)
105 __field(u16, sr_fbc)
106 __field(u16, hpll_plane)
107 __field(u16, hpll_cursor)
108 __field(u16, hpll_fbc)
109 __field(bool, cxsr)
110 __field(bool, hpll)
111 __field(bool, fbc)
112 ),
113
114 TP_fast_assign(
115 __entry->pipe = crtc->pipe;
116 __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev,
117 crtc->pipe);
118 __entry->scanline = intel_get_crtc_scanline(crtc);
119 __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY];
120 __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0];
121 __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR];
122 __entry->sr_plane = wm->sr.plane;
123 __entry->sr_cursor = wm->sr.cursor;
124 __entry->sr_fbc = wm->sr.fbc;
125 __entry->hpll_plane = wm->hpll.plane;
126 __entry->hpll_cursor = wm->hpll.cursor;
127 __entry->hpll_fbc = wm->hpll.fbc;
128 __entry->cxsr = wm->cxsr;
129 __entry->hpll = wm->hpll_en;
130 __entry->fbc = wm->fbc_en;
131 ),
132
133 TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
134 pipe_name(__entry->pipe), __entry->frame, __entry->scanline,
135 __entry->primary, __entry->sprite, __entry->cursor,
136 yesno(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc,
137 yesno(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc,
138 yesno(__entry->fbc))
139);
140
92TRACE_EVENT(vlv_wm, 141TRACE_EVENT(vlv_wm,
93 TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), 142 TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm),
94 TP_ARGS(crtc, wm), 143 TP_ARGS(crtc, wm),
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index c5455d36b617..16ecd1ab108d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -70,20 +70,27 @@
70#define overflows_type(x, T) \ 70#define overflows_type(x, T) \
71 (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE)) 71 (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
72 72
73#define ptr_mask_bits(ptr) ({ \ 73#define ptr_mask_bits(ptr, n) ({ \
74 unsigned long __v = (unsigned long)(ptr); \ 74 unsigned long __v = (unsigned long)(ptr); \
75 (typeof(ptr))(__v & PAGE_MASK); \ 75 (typeof(ptr))(__v & -BIT(n)); \
76}) 76})
77 77
78#define ptr_unpack_bits(ptr, bits) ({ \ 78#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
79
80#define ptr_unpack_bits(ptr, bits, n) ({ \
79 unsigned long __v = (unsigned long)(ptr); \ 81 unsigned long __v = (unsigned long)(ptr); \
80 (bits) = __v & ~PAGE_MASK; \ 82 *(bits) = __v & (BIT(n) - 1); \
81 (typeof(ptr))(__v & PAGE_MASK); \ 83 (typeof(ptr))(__v & -BIT(n)); \
82}) 84})
83 85
84#define ptr_pack_bits(ptr, bits) \ 86#define ptr_pack_bits(ptr, bits, n) \
85 ((typeof(ptr))((unsigned long)(ptr) | (bits))) 87 ((typeof(ptr))((unsigned long)(ptr) | (bits)))
86 88
89#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
90#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
91#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
92#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
93
87#define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member) 94#define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
88 95
89#define fetch_and_zero(ptr) ({ \ 96#define fetch_and_zero(ptr) ({ \
@@ -92,4 +99,19 @@
92 __T; \ 99 __T; \
93}) 100})
94 101
102#define __mask_next_bit(mask) ({ \
103 int __idx = ffs(mask) - 1; \
104 mask &= ~BIT(__idx); \
105 __idx; \
106})
107
108#include <linux/list.h>
109
110static inline void __list_del_many(struct list_head *head,
111 struct list_head *first)
112{
113 first->prev = head;
114 WRITE_ONCE(head->next, first);
115}
116
95#endif /* !__I915_UTILS_H */ 117#endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index a40c82c65450..4325cb0a04f5 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -102,23 +102,7 @@ void
102intel_plane_destroy_state(struct drm_plane *plane, 102intel_plane_destroy_state(struct drm_plane *plane,
103 struct drm_plane_state *state) 103 struct drm_plane_state *state)
104{ 104{
105 struct i915_vma *vma; 105 WARN_ON(to_intel_plane_state(state)->vma);
106
107 vma = fetch_and_zero(&to_intel_plane_state(state)->vma);
108
109 /*
110 * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma.
111 * We currently don't clear all planes during driver unload, so we have
112 * to be able to unpin vma here for now.
113 *
114 * Normally this can only happen during unload when kmscon is disabled
115 * and userspace doesn't attempt to set a framebuffer at all.
116 */
117 if (vma) {
118 mutex_lock(&plane->dev->struct_mutex);
119 intel_unpin_fb_vma(vma);
120 mutex_unlock(&plane->dev->struct_mutex);
121 }
122 106
123 drm_atomic_helper_plane_destroy_state(plane, state); 107 drm_atomic_helper_plane_destroy_state(plane, state);
124} 108}
@@ -185,7 +169,7 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state,
185 } 169 }
186 170
187 intel_state->base.visible = false; 171 intel_state->base.visible = false;
188 ret = intel_plane->check_plane(plane, crtc_state, intel_state); 172 ret = intel_plane->check_plane(intel_plane, crtc_state, intel_state);
189 if (ret) 173 if (ret)
190 return ret; 174 return ret;
191 175
@@ -235,14 +219,14 @@ static void intel_plane_atomic_update(struct drm_plane *plane,
235 trace_intel_update_plane(plane, 219 trace_intel_update_plane(plane,
236 to_intel_crtc(crtc)); 220 to_intel_crtc(crtc));
237 221
238 intel_plane->update_plane(plane, 222 intel_plane->update_plane(intel_plane,
239 to_intel_crtc_state(crtc->state), 223 to_intel_crtc_state(crtc->state),
240 intel_state); 224 intel_state);
241 } else { 225 } else {
242 trace_intel_disable_plane(plane, 226 trace_intel_disable_plane(plane,
243 to_intel_crtc(crtc)); 227 to_intel_crtc(crtc));
244 228
245 intel_plane->disable_plane(plane, crtc); 229 intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
246 } 230 }
247} 231}
248 232
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 52c207e81f41..d805b6e6fe71 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -632,20 +632,9 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder,
632 (int) port, (int) pipe); 632 (int) port, (int) pipe);
633 } 633 }
634 634
635 switch (intel_encoder->type) { 635 intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld,
636 case INTEL_OUTPUT_HDMI: 636 crtc_state->port_clock,
637 intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe, 637 intel_encoder->type == INTEL_OUTPUT_DP);
638 crtc_state->port_clock,
639 false, 0);
640 break;
641 case INTEL_OUTPUT_DP:
642 intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
643 adjusted_mode->crtc_clock,
644 true, crtc_state->port_clock);
645 break;
646 default:
647 break;
648 }
649} 638}
650 639
651/** 640/**
@@ -680,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder)
680 (int) port, (int) pipe); 669 (int) port, (int) pipe);
681 } 670 }
682 671
683 intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0); 672 intel_lpe_audio_notify(dev_priv, pipe, port, NULL, 0, false);
684} 673}
685 674
686/** 675/**
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 9ccbf26124c6..183afcb036aa 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -64,10 +64,12 @@ static unsigned long wait_timeout(void)
64 64
65static noinline void missed_breadcrumb(struct intel_engine_cs *engine) 65static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
66{ 66{
67 DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n", 67 DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n",
68 engine->name, __builtin_return_address(0), 68 engine->name, __builtin_return_address(0),
69 yesno(test_bit(ENGINE_IRQ_BREADCRUMB, 69 yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
70 &engine->irq_posted))); 70 &engine->irq_posted)),
71 intel_engine_get_seqno(engine),
72 intel_engine_last_submit(engine));
71 73
72 set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); 74 set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
73} 75}
@@ -665,12 +667,13 @@ static int intel_breadcrumbs_signaler(void *arg)
665 return 0; 667 return 0;
666} 668}
667 669
668void intel_engine_enable_signaling(struct drm_i915_gem_request *request) 670void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
671 bool wakeup)
669{ 672{
670 struct intel_engine_cs *engine = request->engine; 673 struct intel_engine_cs *engine = request->engine;
671 struct intel_breadcrumbs *b = &engine->breadcrumbs; 674 struct intel_breadcrumbs *b = &engine->breadcrumbs;
672 struct rb_node *parent, **p; 675 struct rb_node *parent, **p;
673 bool first, wakeup; 676 bool first;
674 u32 seqno; 677 u32 seqno;
675 678
676 /* Note that we may be called from an interrupt handler on another 679 /* Note that we may be called from an interrupt handler on another
@@ -703,7 +706,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
703 * If we are the oldest waiter, enable the irq (after which we 706 * If we are the oldest waiter, enable the irq (after which we
704 * must double check that the seqno did not complete). 707 * must double check that the seqno did not complete).
705 */ 708 */
706 wakeup = __intel_engine_add_wait(engine, &request->signaling.wait); 709 wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
707 710
708 /* Now insert ourselves into the retirement ordered list of signals 711 /* Now insert ourselves into the retirement ordered list of signals
709 * on this engine. We track the oldest seqno as that will be the 712 * on this engine. We track the oldest seqno as that will be the
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index dd3ad52b7dfe..29792972d55d 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1071,9 +1071,15 @@ static int bxt_calc_cdclk(int max_pixclk)
1071 1071
1072static int glk_calc_cdclk(int max_pixclk) 1072static int glk_calc_cdclk(int max_pixclk)
1073{ 1073{
1074 if (max_pixclk > 2 * 158400) 1074 /*
1075 * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk
1076 * as a temporary workaround. Use a higher cdclk instead. (Note that
1077 * intel_compute_max_dotclk() limits the max pixel clock to 99% of max
1078 * cdclk.)
1079 */
1080 if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100))
1075 return 316800; 1081 return 316800;
1076 else if (max_pixclk > 2 * 79200) 1082 else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100))
1077 return 158400; 1083 return 158400;
1078 else 1084 else
1079 return 79200; 1085 return 79200;
@@ -1664,7 +1670,11 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
1664 int max_cdclk_freq = dev_priv->max_cdclk_freq; 1670 int max_cdclk_freq = dev_priv->max_cdclk_freq;
1665 1671
1666 if (IS_GEMINILAKE(dev_priv)) 1672 if (IS_GEMINILAKE(dev_priv))
1667 return 2 * max_cdclk_freq; 1673 /*
1674 * FIXME: Limiting to 99% as a temporary workaround. See
1675 * glk_calc_cdclk() for details.
1676 */
1677 return 2 * max_cdclk_freq * 99 / 100;
1668 else if (INTEL_INFO(dev_priv)->gen >= 9 || 1678 else if (INTEL_INFO(dev_priv)->gen >= 9 ||
1669 IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 1679 IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
1670 return max_cdclk_freq; 1680 return max_cdclk_freq;
@@ -1798,13 +1808,11 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv)
1798 case CLKCFG_FSB_800: 1808 case CLKCFG_FSB_800:
1799 return 200000; 1809 return 200000;
1800 case CLKCFG_FSB_1067: 1810 case CLKCFG_FSB_1067:
1811 case CLKCFG_FSB_1067_ALT:
1801 return 266667; 1812 return 266667;
1802 case CLKCFG_FSB_1333: 1813 case CLKCFG_FSB_1333:
1814 case CLKCFG_FSB_1333_ALT:
1803 return 333333; 1815 return 333333;
1804 /* these two are just a guess; one of them might be right */
1805 case CLKCFG_FSB_1600:
1806 case CLKCFG_FSB_1600_ALT:
1807 return 400000;
1808 default: 1816 default:
1809 return 133333; 1817 return 133333;
1810 } 1818 }
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 2797bf37c3ac..84a1f5e85153 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -777,13 +777,6 @@ out:
777 return ret; 777 return ret;
778} 778}
779 779
780static int intel_crt_set_property(struct drm_connector *connector,
781 struct drm_property *property,
782 uint64_t value)
783{
784 return 0;
785}
786
787void intel_crt_reset(struct drm_encoder *encoder) 780void intel_crt_reset(struct drm_encoder *encoder)
788{ 781{
789 struct drm_i915_private *dev_priv = to_i915(encoder->dev); 782 struct drm_i915_private *dev_priv = to_i915(encoder->dev);
@@ -814,10 +807,9 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = {
814 .late_register = intel_connector_register, 807 .late_register = intel_connector_register,
815 .early_unregister = intel_connector_unregister, 808 .early_unregister = intel_connector_unregister,
816 .destroy = intel_crt_destroy, 809 .destroy = intel_crt_destroy,
817 .set_property = intel_crt_set_property, 810 .set_property = drm_atomic_helper_connector_set_property,
818 .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, 811 .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
819 .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, 812 .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
820 .atomic_get_property = intel_connector_atomic_get_property,
821}; 813};
822 814
823static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = { 815static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = {
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 7d01dfe7faac..3718341662c2 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -337,7 +337,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
337 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 337 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
338 for_each_pipe(dev_priv, pipe) 338 for_each_pipe(dev_priv, pipe)
339 info->num_sprites[pipe] = 2; 339 info->num_sprites[pipe] = 2;
340 } else if (INTEL_GEN(dev_priv) >= 5) { 340 } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
341 for_each_pipe(dev_priv, pipe) 341 for_each_pipe(dev_priv, pipe)
342 info->num_sprites[pipe] = 1; 342 info->num_sprites[pipe] = 1;
343 } 343 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 6a037b856d96..7fa21df5bcd7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1277,7 +1277,7 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv,
1277 I915_STATE_WARN(val & SPRITE_ENABLE, 1277 I915_STATE_WARN(val & SPRITE_ENABLE,
1278 "sprite %c assertion failure, should be off on pipe %c but is still active\n", 1278 "sprite %c assertion failure, should be off on pipe %c but is still active\n",
1279 plane_name(pipe), pipe_name(pipe)); 1279 plane_name(pipe), pipe_name(pipe));
1280 } else if (INTEL_GEN(dev_priv) >= 5) { 1280 } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
1281 u32 val = I915_READ(DVSCNTR(pipe)); 1281 u32 val = I915_READ(DVSCNTR(pipe));
1282 I915_STATE_WARN(val & DVS_ENABLE, 1282 I915_STATE_WARN(val & DVS_ENABLE,
1283 "sprite %c assertion failure, should be off on pipe %c but is still active\n", 1283 "sprite %c assertion failure, should be off on pipe %c but is still active\n",
@@ -2084,6 +2084,18 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
2084 } 2084 }
2085} 2085}
2086 2086
2087static unsigned int intel_cursor_alignment(const struct drm_i915_private *dev_priv)
2088{
2089 if (IS_I830(dev_priv))
2090 return 16 * 1024;
2091 else if (IS_I85X(dev_priv))
2092 return 256;
2093 else if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
2094 return 32;
2095 else
2096 return 4 * 1024;
2097}
2098
2087static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv) 2099static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
2088{ 2100{
2089 if (INTEL_INFO(dev_priv)->gen >= 9) 2101 if (INTEL_INFO(dev_priv)->gen >= 9)
@@ -2386,11 +2398,17 @@ u32 intel_compute_tile_offset(int *x, int *y,
2386 const struct intel_plane_state *state, 2398 const struct intel_plane_state *state,
2387 int plane) 2399 int plane)
2388{ 2400{
2389 const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev); 2401 struct intel_plane *intel_plane = to_intel_plane(state->base.plane);
2402 struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev);
2390 const struct drm_framebuffer *fb = state->base.fb; 2403 const struct drm_framebuffer *fb = state->base.fb;
2391 unsigned int rotation = state->base.rotation; 2404 unsigned int rotation = state->base.rotation;
2392 int pitch = intel_fb_pitch(fb, plane, rotation); 2405 int pitch = intel_fb_pitch(fb, plane, rotation);
2393 u32 alignment = intel_surf_alignment(fb, plane); 2406 u32 alignment;
2407
2408 if (intel_plane->id == PLANE_CURSOR)
2409 alignment = intel_cursor_alignment(dev_priv);
2410 else
2411 alignment = intel_surf_alignment(fb, plane);
2394 2412
2395 return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch, 2413 return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
2396 rotation, alignment); 2414 rotation, alignment);
@@ -2750,7 +2768,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
2750 false); 2768 false);
2751 intel_pre_disable_primary_noatomic(&intel_crtc->base); 2769 intel_pre_disable_primary_noatomic(&intel_crtc->base);
2752 trace_intel_disable_plane(primary, intel_crtc); 2770 trace_intel_disable_plane(primary, intel_crtc);
2753 intel_plane->disable_plane(primary, &intel_crtc->base); 2771 intel_plane->disable_plane(intel_plane, intel_crtc);
2754 2772
2755 return; 2773 return;
2756 2774
@@ -2981,10 +2999,8 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
2981 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2999 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2982 dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; 3000 dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;
2983 3001
2984 if (INTEL_GEN(dev_priv) < 4) { 3002 if (INTEL_GEN(dev_priv) < 4)
2985 if (crtc->pipe == PIPE_B) 3003 dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe);
2986 dspcntr |= DISPPLANE_SEL_PIPE_B;
2987 }
2988 3004
2989 switch (fb->format->format) { 3005 switch (fb->format->format) {
2990 case DRM_FORMAT_C8: 3006 case DRM_FORMAT_C8:
@@ -3063,14 +3079,14 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
3063 return 0; 3079 return 0;
3064} 3080}
3065 3081
3066static void i9xx_update_primary_plane(struct drm_plane *primary, 3082static void i9xx_update_primary_plane(struct intel_plane *primary,
3067 const struct intel_crtc_state *crtc_state, 3083 const struct intel_crtc_state *crtc_state,
3068 const struct intel_plane_state *plane_state) 3084 const struct intel_plane_state *plane_state)
3069{ 3085{
3070 struct drm_i915_private *dev_priv = to_i915(primary->dev); 3086 struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
3071 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3087 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
3072 struct drm_framebuffer *fb = plane_state->base.fb; 3088 const struct drm_framebuffer *fb = plane_state->base.fb;
3073 int plane = intel_crtc->plane; 3089 enum plane plane = primary->plane;
3074 u32 linear_offset; 3090 u32 linear_offset;
3075 u32 dspcntr = plane_state->ctl; 3091 u32 dspcntr = plane_state->ctl;
3076 i915_reg_t reg = DSPCNTR(plane); 3092 i915_reg_t reg = DSPCNTR(plane);
@@ -3081,12 +3097,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
3081 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); 3097 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
3082 3098
3083 if (INTEL_GEN(dev_priv) >= 4) 3099 if (INTEL_GEN(dev_priv) >= 4)
3084 intel_crtc->dspaddr_offset = plane_state->main.offset; 3100 crtc->dspaddr_offset = plane_state->main.offset;
3085 else 3101 else
3086 intel_crtc->dspaddr_offset = linear_offset; 3102 crtc->dspaddr_offset = linear_offset;
3087 3103
3088 intel_crtc->adjusted_x = x; 3104 crtc->adjusted_x = x;
3089 intel_crtc->adjusted_y = y; 3105 crtc->adjusted_y = y;
3090 3106
3091 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 3107 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
3092 3108
@@ -3112,31 +3128,29 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
3112 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 3128 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3113 I915_WRITE_FW(DSPSURF(plane), 3129 I915_WRITE_FW(DSPSURF(plane),
3114 intel_plane_ggtt_offset(plane_state) + 3130 intel_plane_ggtt_offset(plane_state) +
3115 intel_crtc->dspaddr_offset); 3131 crtc->dspaddr_offset);
3116 I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); 3132 I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
3117 } else if (INTEL_GEN(dev_priv) >= 4) { 3133 } else if (INTEL_GEN(dev_priv) >= 4) {
3118 I915_WRITE_FW(DSPSURF(plane), 3134 I915_WRITE_FW(DSPSURF(plane),
3119 intel_plane_ggtt_offset(plane_state) + 3135 intel_plane_ggtt_offset(plane_state) +
3120 intel_crtc->dspaddr_offset); 3136 crtc->dspaddr_offset);
3121 I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); 3137 I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x);
3122 I915_WRITE_FW(DSPLINOFF(plane), linear_offset); 3138 I915_WRITE_FW(DSPLINOFF(plane), linear_offset);
3123 } else { 3139 } else {
3124 I915_WRITE_FW(DSPADDR(plane), 3140 I915_WRITE_FW(DSPADDR(plane),
3125 intel_plane_ggtt_offset(plane_state) + 3141 intel_plane_ggtt_offset(plane_state) +
3126 intel_crtc->dspaddr_offset); 3142 crtc->dspaddr_offset);
3127 } 3143 }
3128 POSTING_READ_FW(reg); 3144 POSTING_READ_FW(reg);
3129 3145
3130 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 3146 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
3131} 3147}
3132 3148
3133static void i9xx_disable_primary_plane(struct drm_plane *primary, 3149static void i9xx_disable_primary_plane(struct intel_plane *primary,
3134 struct drm_crtc *crtc) 3150 struct intel_crtc *crtc)
3135{ 3151{
3136 struct drm_device *dev = crtc->dev; 3152 struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
3137 struct drm_i915_private *dev_priv = to_i915(dev); 3153 enum plane plane = primary->plane;
3138 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3139 int plane = intel_crtc->plane;
3140 unsigned long irqflags; 3154 unsigned long irqflags;
3141 3155
3142 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 3156 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -3321,16 +3335,15 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
3321 return plane_ctl; 3335 return plane_ctl;
3322} 3336}
3323 3337
3324static void skylake_update_primary_plane(struct drm_plane *plane, 3338static void skylake_update_primary_plane(struct intel_plane *plane,
3325 const struct intel_crtc_state *crtc_state, 3339 const struct intel_crtc_state *crtc_state,
3326 const struct intel_plane_state *plane_state) 3340 const struct intel_plane_state *plane_state)
3327{ 3341{
3328 struct drm_device *dev = plane->dev; 3342 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
3329 struct drm_i915_private *dev_priv = to_i915(dev); 3343 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
3330 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3344 const struct drm_framebuffer *fb = plane_state->base.fb;
3331 struct drm_framebuffer *fb = plane_state->base.fb; 3345 enum plane_id plane_id = plane->id;
3332 enum plane_id plane_id = to_intel_plane(plane)->id; 3346 enum pipe pipe = plane->pipe;
3333 enum pipe pipe = to_intel_plane(plane)->pipe;
3334 u32 plane_ctl = plane_state->ctl; 3347 u32 plane_ctl = plane_state->ctl;
3335 unsigned int rotation = plane_state->base.rotation; 3348 unsigned int rotation = plane_state->base.rotation;
3336 u32 stride = skl_plane_stride(fb, 0, rotation); 3349 u32 stride = skl_plane_stride(fb, 0, rotation);
@@ -3352,10 +3365,10 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
3352 dst_w--; 3365 dst_w--;
3353 dst_h--; 3366 dst_h--;
3354 3367
3355 intel_crtc->dspaddr_offset = surf_addr; 3368 crtc->dspaddr_offset = surf_addr;
3356 3369
3357 intel_crtc->adjusted_x = src_x; 3370 crtc->adjusted_x = src_x;
3358 intel_crtc->adjusted_y = src_y; 3371 crtc->adjusted_y = src_y;
3359 3372
3360 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 3373 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
3361 3374
@@ -3394,13 +3407,12 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
3394 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 3407 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
3395} 3408}
3396 3409
3397static void skylake_disable_primary_plane(struct drm_plane *primary, 3410static void skylake_disable_primary_plane(struct intel_plane *primary,
3398 struct drm_crtc *crtc) 3411 struct intel_crtc *crtc)
3399{ 3412{
3400 struct drm_device *dev = crtc->dev; 3413 struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
3401 struct drm_i915_private *dev_priv = to_i915(dev); 3414 enum plane_id plane_id = primary->id;
3402 enum plane_id plane_id = to_intel_plane(primary)->id; 3415 enum pipe pipe = primary->pipe;
3403 enum pipe pipe = to_intel_plane(primary)->pipe;
3404 unsigned long irqflags; 3416 unsigned long irqflags;
3405 3417
3406 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 3418 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -3433,7 +3445,7 @@ static void intel_update_primary_planes(struct drm_device *dev)
3433 trace_intel_update_plane(&plane->base, 3445 trace_intel_update_plane(&plane->base,
3434 to_intel_crtc(crtc)); 3446 to_intel_crtc(crtc));
3435 3447
3436 plane->update_plane(&plane->base, 3448 plane->update_plane(plane,
3437 to_intel_crtc_state(crtc->state), 3449 to_intel_crtc_state(crtc->state),
3438 plane_state); 3450 plane_state);
3439 } 3451 }
@@ -4861,12 +4873,9 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc)
4861{ 4873{
4862 if (intel_crtc->overlay) { 4874 if (intel_crtc->overlay) {
4863 struct drm_device *dev = intel_crtc->base.dev; 4875 struct drm_device *dev = intel_crtc->base.dev;
4864 struct drm_i915_private *dev_priv = to_i915(dev);
4865 4876
4866 mutex_lock(&dev->struct_mutex); 4877 mutex_lock(&dev->struct_mutex);
4867 dev_priv->mm.interruptible = false;
4868 (void) intel_overlay_switch_off(intel_crtc->overlay); 4878 (void) intel_overlay_switch_off(intel_crtc->overlay);
4869 dev_priv->mm.interruptible = true;
4870 mutex_unlock(&dev->struct_mutex); 4879 mutex_unlock(&dev->struct_mutex);
4871 } 4880 }
4872 4881
@@ -5086,7 +5095,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask
5086 intel_crtc_dpms_overlay_disable(intel_crtc); 5095 intel_crtc_dpms_overlay_disable(intel_crtc);
5087 5096
5088 drm_for_each_plane_mask(p, dev, plane_mask) 5097 drm_for_each_plane_mask(p, dev, plane_mask)
5089 to_intel_plane(p)->disable_plane(p, crtc); 5098 to_intel_plane(p)->disable_plane(to_intel_plane(p), intel_crtc);
5090 5099
5091 /* 5100 /*
5092 * FIXME: Once we grow proper nuclear flip support out of this we need 5101 * FIXME: Once we grow proper nuclear flip support out of this we need
@@ -5722,6 +5731,8 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
5722static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, 5731static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
5723 struct drm_atomic_state *old_state) 5732 struct drm_atomic_state *old_state)
5724{ 5733{
5734 struct intel_atomic_state *old_intel_state =
5735 to_intel_atomic_state(old_state);
5725 struct drm_crtc *crtc = pipe_config->base.crtc; 5736 struct drm_crtc *crtc = pipe_config->base.crtc;
5726 struct drm_device *dev = crtc->dev; 5737 struct drm_device *dev = crtc->dev;
5727 struct drm_i915_private *dev_priv = to_i915(dev); 5738 struct drm_i915_private *dev_priv = to_i915(dev);
@@ -5754,7 +5765,11 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
5754 5765
5755 intel_color_load_luts(&pipe_config->base); 5766 intel_color_load_luts(&pipe_config->base);
5756 5767
5757 intel_update_watermarks(intel_crtc); 5768 if (dev_priv->display.initial_watermarks != NULL)
5769 dev_priv->display.initial_watermarks(old_intel_state,
5770 intel_crtc->config);
5771 else
5772 intel_update_watermarks(intel_crtc);
5758 intel_enable_pipe(intel_crtc); 5773 intel_enable_pipe(intel_crtc);
5759 5774
5760 assert_vblank_disabled(crtc); 5775 assert_vblank_disabled(crtc);
@@ -5920,9 +5935,10 @@ void intel_encoder_destroy(struct drm_encoder *encoder)
5920 5935
5921/* Cross check the actual hw state with our own modeset state tracking (and it's 5936/* Cross check the actual hw state with our own modeset state tracking (and it's
5922 * internal consistency). */ 5937 * internal consistency). */
5923static void intel_connector_verify_state(struct intel_connector *connector) 5938static void intel_connector_verify_state(struct drm_crtc_state *crtc_state,
5939 struct drm_connector_state *conn_state)
5924{ 5940{
5925 struct drm_crtc *crtc = connector->base.state->crtc; 5941 struct intel_connector *connector = to_intel_connector(conn_state->connector);
5926 5942
5927 DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", 5943 DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
5928 connector->base.base.id, 5944 connector->base.base.id,
@@ -5930,15 +5946,14 @@ static void intel_connector_verify_state(struct intel_connector *connector)
5930 5946
5931 if (connector->get_hw_state(connector)) { 5947 if (connector->get_hw_state(connector)) {
5932 struct intel_encoder *encoder = connector->encoder; 5948 struct intel_encoder *encoder = connector->encoder;
5933 struct drm_connector_state *conn_state = connector->base.state;
5934 5949
5935 I915_STATE_WARN(!crtc, 5950 I915_STATE_WARN(!crtc_state,
5936 "connector enabled without attached crtc\n"); 5951 "connector enabled without attached crtc\n");
5937 5952
5938 if (!crtc) 5953 if (!crtc_state)
5939 return; 5954 return;
5940 5955
5941 I915_STATE_WARN(!crtc->state->active, 5956 I915_STATE_WARN(!crtc_state->active,
5942 "connector is active, but attached crtc isn't\n"); 5957 "connector is active, but attached crtc isn't\n");
5943 5958
5944 if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST) 5959 if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST)
@@ -5950,9 +5965,9 @@ static void intel_connector_verify_state(struct intel_connector *connector)
5950 I915_STATE_WARN(conn_state->crtc != encoder->base.crtc, 5965 I915_STATE_WARN(conn_state->crtc != encoder->base.crtc,
5951 "attached encoder crtc differs from connector crtc\n"); 5966 "attached encoder crtc differs from connector crtc\n");
5952 } else { 5967 } else {
5953 I915_STATE_WARN(crtc && crtc->state->active, 5968 I915_STATE_WARN(crtc_state && crtc_state->active,
5954 "attached crtc is active, but connector isn't\n"); 5969 "attached crtc is active, but connector isn't\n");
5955 I915_STATE_WARN(!crtc && connector->base.state->best_encoder, 5970 I915_STATE_WARN(!crtc_state && conn_state->best_encoder,
5956 "best encoder set without crtc!\n"); 5971 "best encoder set without crtc!\n");
5957 } 5972 }
5958} 5973}
@@ -6372,8 +6387,8 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, enum pipe
6372 vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val); 6387 vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val);
6373 6388
6374 reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13); 6389 reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13);
6375 reg_val &= 0x8cffffff; 6390 reg_val &= 0x00ffffff;
6376 reg_val = 0x8c000000; 6391 reg_val |= 0x8c000000;
6377 vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val); 6392 vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val);
6378 6393
6379 reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1)); 6394 reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1));
@@ -8177,9 +8192,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
8177{ 8192{
8178 struct drm_device *dev = crtc->base.dev; 8193 struct drm_device *dev = crtc->base.dev;
8179 struct drm_i915_private *dev_priv = to_i915(dev); 8194 struct drm_i915_private *dev_priv = to_i915(dev);
8180 struct dpll reduced_clock;
8181 bool has_reduced_clock = false;
8182 struct intel_shared_dpll *pll;
8183 const struct intel_limit *limit; 8195 const struct intel_limit *limit;
8184 int refclk = 120000; 8196 int refclk = 120000;
8185 8197
@@ -8221,20 +8233,14 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
8221 return -EINVAL; 8233 return -EINVAL;
8222 } 8234 }
8223 8235
8224 ironlake_compute_dpll(crtc, crtc_state, 8236 ironlake_compute_dpll(crtc, crtc_state, NULL);
8225 has_reduced_clock ? &reduced_clock : NULL);
8226 8237
8227 pll = intel_get_shared_dpll(crtc, crtc_state, NULL); 8238 if (!intel_get_shared_dpll(crtc, crtc_state, NULL)) {
8228 if (pll == NULL) {
8229 DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", 8239 DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
8230 pipe_name(crtc->pipe)); 8240 pipe_name(crtc->pipe));
8231 return -EINVAL; 8241 return -EINVAL;
8232 } 8242 }
8233 8243
8234 if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) &&
8235 has_reduced_clock)
8236 crtc->lowfreq_avail = true;
8237
8238 return 0; 8244 return 0;
8239} 8245}
8240 8246
@@ -9138,38 +9144,171 @@ out:
9138 return active; 9144 return active;
9139} 9145}
9140 9146
9147static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
9148{
9149 struct drm_i915_private *dev_priv =
9150 to_i915(plane_state->base.plane->dev);
9151 const struct drm_framebuffer *fb = plane_state->base.fb;
9152 const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
9153 u32 base;
9154
9155 if (INTEL_INFO(dev_priv)->cursor_needs_physical)
9156 base = obj->phys_handle->busaddr;
9157 else
9158 base = intel_plane_ggtt_offset(plane_state);
9159
9160 base += plane_state->main.offset;
9161
9162 /* ILK+ do this automagically */
9163 if (HAS_GMCH_DISPLAY(dev_priv) &&
9164 plane_state->base.rotation & DRM_MODE_ROTATE_180)
9165 base += (plane_state->base.crtc_h *
9166 plane_state->base.crtc_w - 1) * fb->format->cpp[0];
9167
9168 return base;
9169}
9170
9171static u32 intel_cursor_position(const struct intel_plane_state *plane_state)
9172{
9173 int x = plane_state->base.crtc_x;
9174 int y = plane_state->base.crtc_y;
9175 u32 pos = 0;
9176
9177 if (x < 0) {
9178 pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
9179 x = -x;
9180 }
9181 pos |= x << CURSOR_X_SHIFT;
9182
9183 if (y < 0) {
9184 pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
9185 y = -y;
9186 }
9187 pos |= y << CURSOR_Y_SHIFT;
9188
9189 return pos;
9190}
9191
9192static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
9193{
9194 const struct drm_mode_config *config =
9195 &plane_state->base.plane->dev->mode_config;
9196 int width = plane_state->base.crtc_w;
9197 int height = plane_state->base.crtc_h;
9198
9199 return width > 0 && width <= config->cursor_width &&
9200 height > 0 && height <= config->cursor_height;
9201}
9202
9203static int intel_check_cursor(struct intel_crtc_state *crtc_state,
9204 struct intel_plane_state *plane_state)
9205{
9206 const struct drm_framebuffer *fb = plane_state->base.fb;
9207 int src_x, src_y;
9208 u32 offset;
9209 int ret;
9210
9211 ret = drm_plane_helper_check_state(&plane_state->base,
9212 &plane_state->clip,
9213 DRM_PLANE_HELPER_NO_SCALING,
9214 DRM_PLANE_HELPER_NO_SCALING,
9215 true, true);
9216 if (ret)
9217 return ret;
9218
9219 if (!fb)
9220 return 0;
9221
9222 if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
9223 DRM_DEBUG_KMS("cursor cannot be tiled\n");
9224 return -EINVAL;
9225 }
9226
9227 src_x = plane_state->base.src_x >> 16;
9228 src_y = plane_state->base.src_y >> 16;
9229
9230 intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
9231 offset = intel_compute_tile_offset(&src_x, &src_y, plane_state, 0);
9232
9233 if (src_x != 0 || src_y != 0) {
9234 DRM_DEBUG_KMS("Arbitrary cursor panning not supported\n");
9235 return -EINVAL;
9236 }
9237
9238 plane_state->main.offset = offset;
9239
9240 return 0;
9241}
9242
9141static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, 9243static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state,
9142 const struct intel_plane_state *plane_state) 9244 const struct intel_plane_state *plane_state)
9143{ 9245{
9144 unsigned int width = plane_state->base.crtc_w; 9246 const struct drm_framebuffer *fb = plane_state->base.fb;
9145 unsigned int stride = roundup_pow_of_two(width) * 4;
9146 9247
9147 switch (stride) { 9248 return CURSOR_ENABLE |
9148 default: 9249 CURSOR_GAMMA_ENABLE |
9149 WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", 9250 CURSOR_FORMAT_ARGB |
9150 width, stride); 9251 CURSOR_STRIDE(fb->pitches[0]);
9151 stride = 256; 9252}
9152 /* fallthrough */ 9253
9254static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state)
9255{
9256 int width = plane_state->base.crtc_w;
9257
9258 /*
9259 * 845g/865g are only limited by the width of their cursors,
9260 * the height is arbitrary up to the precision of the register.
9261 */
9262 return intel_cursor_size_ok(plane_state) && IS_ALIGNED(width, 64);
9263}
9264
9265static int i845_check_cursor(struct intel_plane *plane,
9266 struct intel_crtc_state *crtc_state,
9267 struct intel_plane_state *plane_state)
9268{
9269 const struct drm_framebuffer *fb = plane_state->base.fb;
9270 int ret;
9271
9272 ret = intel_check_cursor(crtc_state, plane_state);
9273 if (ret)
9274 return ret;
9275
9276 /* if we want to turn off the cursor ignore width and height */
9277 if (!fb)
9278 return 0;
9279
9280 /* Check for which cursor types we support */
9281 if (!i845_cursor_size_ok(plane_state)) {
9282 DRM_DEBUG("Cursor dimension %dx%d not supported\n",
9283 plane_state->base.crtc_w,
9284 plane_state->base.crtc_h);
9285 return -EINVAL;
9286 }
9287
9288 switch (fb->pitches[0]) {
9153 case 256: 9289 case 256:
9154 case 512: 9290 case 512:
9155 case 1024: 9291 case 1024:
9156 case 2048: 9292 case 2048:
9157 break; 9293 break;
9294 default:
9295 DRM_DEBUG_KMS("Invalid cursor stride (%u)\n",
9296 fb->pitches[0]);
9297 return -EINVAL;
9158 } 9298 }
9159 9299
9160 return CURSOR_ENABLE | 9300 plane_state->ctl = i845_cursor_ctl(crtc_state, plane_state);
9161 CURSOR_GAMMA_ENABLE | 9301
9162 CURSOR_FORMAT_ARGB | 9302 return 0;
9163 CURSOR_STRIDE(stride);
9164} 9303}
9165 9304
9166static void i845_update_cursor(struct drm_crtc *crtc, u32 base, 9305static void i845_update_cursor(struct intel_plane *plane,
9306 const struct intel_crtc_state *crtc_state,
9167 const struct intel_plane_state *plane_state) 9307 const struct intel_plane_state *plane_state)
9168{ 9308{
9169 struct drm_device *dev = crtc->dev; 9309 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
9170 struct drm_i915_private *dev_priv = to_i915(dev); 9310 u32 cntl = 0, base = 0, pos = 0, size = 0;
9171 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 9311 unsigned long irqflags;
9172 uint32_t cntl = 0, size = 0;
9173 9312
9174 if (plane_state && plane_state->base.visible) { 9313 if (plane_state && plane_state->base.visible) {
9175 unsigned int width = plane_state->base.crtc_w; 9314 unsigned int width = plane_state->base.crtc_w;
@@ -9177,35 +9316,41 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
9177 9316
9178 cntl = plane_state->ctl; 9317 cntl = plane_state->ctl;
9179 size = (height << 12) | width; 9318 size = (height << 12) | width;
9180 }
9181 9319
9182 if (intel_crtc->cursor_cntl != 0 && 9320 base = intel_cursor_base(plane_state);
9183 (intel_crtc->cursor_base != base || 9321 pos = intel_cursor_position(plane_state);
9184 intel_crtc->cursor_size != size ||
9185 intel_crtc->cursor_cntl != cntl)) {
9186 /* On these chipsets we can only modify the base/size/stride
9187 * whilst the cursor is disabled.
9188 */
9189 I915_WRITE_FW(CURCNTR(PIPE_A), 0);
9190 POSTING_READ_FW(CURCNTR(PIPE_A));
9191 intel_crtc->cursor_cntl = 0;
9192 } 9322 }
9193 9323
9194 if (intel_crtc->cursor_base != base) { 9324 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
9195 I915_WRITE_FW(CURBASE(PIPE_A), base);
9196 intel_crtc->cursor_base = base;
9197 }
9198 9325
9199 if (intel_crtc->cursor_size != size) { 9326 /* On these chipsets we can only modify the base/size/stride
9327 * whilst the cursor is disabled.
9328 */
9329 if (plane->cursor.base != base ||
9330 plane->cursor.size != size ||
9331 plane->cursor.cntl != cntl) {
9332 I915_WRITE_FW(CURCNTR(PIPE_A), 0);
9333 I915_WRITE_FW(CURBASE(PIPE_A), base);
9200 I915_WRITE_FW(CURSIZE, size); 9334 I915_WRITE_FW(CURSIZE, size);
9201 intel_crtc->cursor_size = size; 9335 I915_WRITE_FW(CURPOS(PIPE_A), pos);
9202 }
9203
9204 if (intel_crtc->cursor_cntl != cntl) {
9205 I915_WRITE_FW(CURCNTR(PIPE_A), cntl); 9336 I915_WRITE_FW(CURCNTR(PIPE_A), cntl);
9206 POSTING_READ_FW(CURCNTR(PIPE_A)); 9337
9207 intel_crtc->cursor_cntl = cntl; 9338 plane->cursor.base = base;
9339 plane->cursor.size = size;
9340 plane->cursor.cntl = cntl;
9341 } else {
9342 I915_WRITE_FW(CURPOS(PIPE_A), pos);
9208 } 9343 }
9344
9345 POSTING_READ_FW(CURCNTR(PIPE_A));
9346
9347 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
9348}
9349
9350static void i845_disable_cursor(struct intel_plane *plane,
9351 struct intel_crtc *crtc)
9352{
9353 i845_update_cursor(plane, NULL, NULL);
9209} 9354}
9210 9355
9211static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, 9356static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
@@ -9214,7 +9359,6 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
9214 struct drm_i915_private *dev_priv = 9359 struct drm_i915_private *dev_priv =
9215 to_i915(plane_state->base.plane->dev); 9360 to_i915(plane_state->base.plane->dev);
9216 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 9361 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
9217 enum pipe pipe = crtc->pipe;
9218 u32 cntl; 9362 u32 cntl;
9219 9363
9220 cntl = MCURSOR_GAMMA_ENABLE; 9364 cntl = MCURSOR_GAMMA_ENABLE;
@@ -9222,7 +9366,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
9222 if (HAS_DDI(dev_priv)) 9366 if (HAS_DDI(dev_priv))
9223 cntl |= CURSOR_PIPE_CSC_ENABLE; 9367 cntl |= CURSOR_PIPE_CSC_ENABLE;
9224 9368
9225 cntl |= pipe << 28; /* Connect to correct pipe */ 9369 cntl |= MCURSOR_PIPE_SELECT(crtc->pipe);
9226 9370
9227 switch (plane_state->base.crtc_w) { 9371 switch (plane_state->base.crtc_w) {
9228 case 64: 9372 case 64:
@@ -9245,116 +9389,154 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
9245 return cntl; 9389 return cntl;
9246} 9390}
9247 9391
9248static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, 9392static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state)
9249 const struct intel_plane_state *plane_state)
9250{ 9393{
9251 struct drm_device *dev = crtc->dev; 9394 struct drm_i915_private *dev_priv =
9252 struct drm_i915_private *dev_priv = to_i915(dev); 9395 to_i915(plane_state->base.plane->dev);
9253 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 9396 int width = plane_state->base.crtc_w;
9254 int pipe = intel_crtc->pipe; 9397 int height = plane_state->base.crtc_h;
9255 uint32_t cntl = 0;
9256 9398
9257 if (plane_state && plane_state->base.visible) 9399 if (!intel_cursor_size_ok(plane_state))
9258 cntl = plane_state->ctl; 9400 return false;
9259 9401
9260 if (intel_crtc->cursor_cntl != cntl) { 9402 /* Cursor width is limited to a few power-of-two sizes */
9261 I915_WRITE_FW(CURCNTR(pipe), cntl); 9403 switch (width) {
9262 POSTING_READ_FW(CURCNTR(pipe)); 9404 case 256:
9263 intel_crtc->cursor_cntl = cntl; 9405 case 128:
9406 case 64:
9407 break;
9408 default:
9409 return false;
9264 } 9410 }
9265 9411
9266 /* and commit changes on next vblank */ 9412 /*
9267 I915_WRITE_FW(CURBASE(pipe), base); 9413 * IVB+ have CUR_FBC_CTL which allows an arbitrary cursor
9268 POSTING_READ_FW(CURBASE(pipe)); 9414 * height from 8 lines up to the cursor width, when the
9415 * cursor is not rotated. Everything else requires square
9416 * cursors.
9417 */
9418 if (HAS_CUR_FBC(dev_priv) &&
9419 plane_state->base.rotation & DRM_MODE_ROTATE_0) {
9420 if (height < 8 || height > width)
9421 return false;
9422 } else {
9423 if (height != width)
9424 return false;
9425 }
9269 9426
9270 intel_crtc->cursor_base = base; 9427 return true;
9271} 9428}
9272 9429
9273/* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */ 9430static int i9xx_check_cursor(struct intel_plane *plane,
9274static void intel_crtc_update_cursor(struct drm_crtc *crtc, 9431 struct intel_crtc_state *crtc_state,
9275 const struct intel_plane_state *plane_state) 9432 struct intel_plane_state *plane_state)
9276{ 9433{
9277 struct drm_device *dev = crtc->dev; 9434 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
9278 struct drm_i915_private *dev_priv = to_i915(dev); 9435 const struct drm_framebuffer *fb = plane_state->base.fb;
9279 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 9436 enum pipe pipe = plane->pipe;
9280 int pipe = intel_crtc->pipe; 9437 int ret;
9281 u32 base = intel_crtc->cursor_addr;
9282 unsigned long irqflags;
9283 u32 pos = 0;
9284
9285 if (plane_state) {
9286 int x = plane_state->base.crtc_x;
9287 int y = plane_state->base.crtc_y;
9288 9438
9289 if (x < 0) { 9439 ret = intel_check_cursor(crtc_state, plane_state);
9290 pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT; 9440 if (ret)
9291 x = -x; 9441 return ret;
9292 }
9293 pos |= x << CURSOR_X_SHIFT;
9294 9442
9295 if (y < 0) { 9443 /* if we want to turn off the cursor ignore width and height */
9296 pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT; 9444 if (!fb)
9297 y = -y; 9445 return 0;
9298 }
9299 pos |= y << CURSOR_Y_SHIFT;
9300 9446
9301 /* ILK+ do this automagically */ 9447 /* Check for which cursor types we support */
9302 if (HAS_GMCH_DISPLAY(dev_priv) && 9448 if (!i9xx_cursor_size_ok(plane_state)) {
9303 plane_state->base.rotation & DRM_MODE_ROTATE_180) { 9449 DRM_DEBUG("Cursor dimension %dx%d not supported\n",
9304 base += (plane_state->base.crtc_h * 9450 plane_state->base.crtc_w,
9305 plane_state->base.crtc_w - 1) * 4; 9451 plane_state->base.crtc_h);
9306 } 9452 return -EINVAL;
9307 } 9453 }
9308 9454
9309 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 9455 if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) {
9456 DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n",
9457 fb->pitches[0], plane_state->base.crtc_w);
9458 return -EINVAL;
9459 }
9310 9460
9311 I915_WRITE_FW(CURPOS(pipe), pos); 9461 /*
9462 * There's something wrong with the cursor on CHV pipe C.
9463 * If it straddles the left edge of the screen then
9464 * moving it away from the edge or disabling it often
9465 * results in a pipe underrun, and often that can lead to
9466 * dead pipe (constant underrun reported, and it scans
9467 * out just a solid color). To recover from that, the
9468 * display power well must be turned off and on again.
9469 * Refuse the put the cursor into that compromised position.
9470 */
9471 if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
9472 plane_state->base.visible && plane_state->base.crtc_x < 0) {
9473 DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
9474 return -EINVAL;
9475 }
9312 9476
9313 if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) 9477 plane_state->ctl = i9xx_cursor_ctl(crtc_state, plane_state);
9314 i845_update_cursor(crtc, base, plane_state);
9315 else
9316 i9xx_update_cursor(crtc, base, plane_state);
9317 9478
9318 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 9479 return 0;
9319} 9480}
9320 9481
9321static bool cursor_size_ok(struct drm_i915_private *dev_priv, 9482static void i9xx_update_cursor(struct intel_plane *plane,
9322 uint32_t width, uint32_t height) 9483 const struct intel_crtc_state *crtc_state,
9484 const struct intel_plane_state *plane_state)
9323{ 9485{
9324 if (width == 0 || height == 0) 9486 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
9325 return false; 9487 enum pipe pipe = plane->pipe;
9488 u32 cntl = 0, base = 0, pos = 0, fbc_ctl = 0;
9489 unsigned long irqflags;
9326 9490
9327 /* 9491 if (plane_state && plane_state->base.visible) {
9328 * 845g/865g are special in that they are only limited by 9492 cntl = plane_state->ctl;
9329 * the width of their cursors, the height is arbitrary up to
9330 * the precision of the register. Everything else requires
9331 * square cursors, limited to a few power-of-two sizes.
9332 */
9333 if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
9334 if ((width & 63) != 0)
9335 return false;
9336 9493
9337 if (width > (IS_I845G(dev_priv) ? 64 : 512)) 9494 if (plane_state->base.crtc_h != plane_state->base.crtc_w)
9338 return false; 9495 fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1);
9339 9496
9340 if (height > 1023) 9497 base = intel_cursor_base(plane_state);
9341 return false; 9498 pos = intel_cursor_position(plane_state);
9499 }
9500
9501 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
9502
9503 /*
9504 * On some platforms writing CURCNTR first will also
9505 * cause CURPOS to be armed by the CURBASE write.
9506 * Without the CURCNTR write the CURPOS write would
9507 * arm itself.
9508 *
9509 * CURCNTR and CUR_FBC_CTL are always
9510 * armed by the CURBASE write only.
9511 */
9512 if (plane->cursor.base != base ||
9513 plane->cursor.size != fbc_ctl ||
9514 plane->cursor.cntl != cntl) {
9515 I915_WRITE_FW(CURCNTR(pipe), cntl);
9516 if (HAS_CUR_FBC(dev_priv))
9517 I915_WRITE_FW(CUR_FBC_CTL(pipe), fbc_ctl);
9518 I915_WRITE_FW(CURPOS(pipe), pos);
9519 I915_WRITE_FW(CURBASE(pipe), base);
9520
9521 plane->cursor.base = base;
9522 plane->cursor.size = fbc_ctl;
9523 plane->cursor.cntl = cntl;
9342 } else { 9524 } else {
9343 switch (width | height) { 9525 I915_WRITE_FW(CURPOS(pipe), pos);
9344 case 256:
9345 case 128:
9346 if (IS_GEN2(dev_priv))
9347 return false;
9348 case 64:
9349 break;
9350 default:
9351 return false;
9352 }
9353 } 9526 }
9354 9527
9355 return true; 9528 POSTING_READ_FW(CURBASE(pipe));
9529
9530 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
9531}
9532
9533static void i9xx_disable_cursor(struct intel_plane *plane,
9534 struct intel_crtc *crtc)
9535{
9536 i9xx_update_cursor(plane, NULL, NULL);
9356} 9537}
9357 9538
9539
9358/* VESA 640x480x72Hz mode to set on the pipe */ 9540/* VESA 640x480x72Hz mode to set on the pipe */
9359static struct drm_display_mode load_detect_mode = { 9541static struct drm_display_mode load_detect_mode = {
9360 DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 31500, 640, 664, 9542 DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 31500, 640, 664,
@@ -9566,6 +9748,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
9566 */ 9748 */
9567 if (!crtc) { 9749 if (!crtc) {
9568 DRM_DEBUG_KMS("no pipe available for load-detect\n"); 9750 DRM_DEBUG_KMS("no pipe available for load-detect\n");
9751 ret = -ENODEV;
9569 goto fail; 9752 goto fail;
9570 } 9753 }
9571 9754
@@ -9622,6 +9805,7 @@ found:
9622 DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n"); 9805 DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
9623 if (IS_ERR(fb)) { 9806 if (IS_ERR(fb)) {
9624 DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n"); 9807 DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
9808 ret = PTR_ERR(fb);
9625 goto fail; 9809 goto fail;
9626 } 9810 }
9627 9811
@@ -10853,21 +11037,21 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
10853 turn_off, turn_on, mode_changed); 11037 turn_off, turn_on, mode_changed);
10854 11038
10855 if (turn_on) { 11039 if (turn_on) {
10856 if (INTEL_GEN(dev_priv) < 5) 11040 if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
10857 pipe_config->update_wm_pre = true; 11041 pipe_config->update_wm_pre = true;
10858 11042
10859 /* must disable cxsr around plane enable/disable */ 11043 /* must disable cxsr around plane enable/disable */
10860 if (plane->id != PLANE_CURSOR) 11044 if (plane->id != PLANE_CURSOR)
10861 pipe_config->disable_cxsr = true; 11045 pipe_config->disable_cxsr = true;
10862 } else if (turn_off) { 11046 } else if (turn_off) {
10863 if (INTEL_GEN(dev_priv) < 5) 11047 if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
10864 pipe_config->update_wm_post = true; 11048 pipe_config->update_wm_post = true;
10865 11049
10866 /* must disable cxsr around plane enable/disable */ 11050 /* must disable cxsr around plane enable/disable */
10867 if (plane->id != PLANE_CURSOR) 11051 if (plane->id != PLANE_CURSOR)
10868 pipe_config->disable_cxsr = true; 11052 pipe_config->disable_cxsr = true;
10869 } else if (intel_wm_need_update(&plane->base, plane_state)) { 11053 } else if (intel_wm_need_update(&plane->base, plane_state)) {
10870 if (INTEL_GEN(dev_priv) < 5) { 11054 if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) {
10871 /* FIXME bollocks */ 11055 /* FIXME bollocks */
10872 pipe_config->update_wm_pre = true; 11056 pipe_config->update_wm_pre = true;
10873 pipe_config->update_wm_post = true; 11057 pipe_config->update_wm_post = true;
@@ -11291,7 +11475,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
11291 shared_dpll = crtc_state->shared_dpll; 11475 shared_dpll = crtc_state->shared_dpll;
11292 dpll_hw_state = crtc_state->dpll_hw_state; 11476 dpll_hw_state = crtc_state->dpll_hw_state;
11293 force_thru = crtc_state->pch_pfit.force_thru; 11477 force_thru = crtc_state->pch_pfit.force_thru;
11294 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 11478 if (IS_G4X(dev_priv) ||
11479 IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
11295 wm_state = crtc_state->wm; 11480 wm_state = crtc_state->wm;
11296 11481
11297 /* Keep base drm_crtc_state intact, only clear our extended struct */ 11482 /* Keep base drm_crtc_state intact, only clear our extended struct */
@@ -11303,7 +11488,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
11303 crtc_state->shared_dpll = shared_dpll; 11488 crtc_state->shared_dpll = shared_dpll;
11304 crtc_state->dpll_hw_state = dpll_hw_state; 11489 crtc_state->dpll_hw_state = dpll_hw_state;
11305 crtc_state->pch_pfit.force_thru = force_thru; 11490 crtc_state->pch_pfit.force_thru = force_thru;
11306 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 11491 if (IS_G4X(dev_priv) ||
11492 IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
11307 crtc_state->wm = wm_state; 11493 crtc_state->wm = wm_state;
11308} 11494}
11309 11495
@@ -11865,7 +12051,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
11865 * allocation. In that case since the ddb allocation will be updated 12051 * allocation. In that case since the ddb allocation will be updated
11866 * once the plane becomes visible, we can skip this check 12052 * once the plane becomes visible, we can skip this check
11867 */ 12053 */
11868 if (intel_crtc->cursor_addr) { 12054 if (1) {
11869 hw_plane_wm = &hw_wm.planes[PLANE_CURSOR]; 12055 hw_plane_wm = &hw_wm.planes[PLANE_CURSOR];
11870 sw_plane_wm = &sw_wm->planes[PLANE_CURSOR]; 12056 sw_plane_wm = &sw_wm->planes[PLANE_CURSOR];
11871 12057
@@ -11921,11 +12107,15 @@ verify_connector_state(struct drm_device *dev,
11921 12107
11922 for_each_new_connector_in_state(state, connector, new_conn_state, i) { 12108 for_each_new_connector_in_state(state, connector, new_conn_state, i) {
11923 struct drm_encoder *encoder = connector->encoder; 12109 struct drm_encoder *encoder = connector->encoder;
12110 struct drm_crtc_state *crtc_state = NULL;
11924 12111
11925 if (new_conn_state->crtc != crtc) 12112 if (new_conn_state->crtc != crtc)
11926 continue; 12113 continue;
11927 12114
11928 intel_connector_verify_state(to_intel_connector(connector)); 12115 if (crtc)
12116 crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
12117
12118 intel_connector_verify_state(crtc_state, new_conn_state);
11929 12119
11930 I915_STATE_WARN(new_conn_state->best_encoder != encoder, 12120 I915_STATE_WARN(new_conn_state->best_encoder != encoder,
11931 "connector's atomic encoder doesn't match legacy encoder\n"); 12121 "connector's atomic encoder doesn't match legacy encoder\n");
@@ -12043,7 +12233,7 @@ verify_crtc_state(struct drm_crtc *crtc,
12043 12233
12044 intel_pipe_config_sanity_check(dev_priv, pipe_config); 12234 intel_pipe_config_sanity_check(dev_priv, pipe_config);
12045 12235
12046 sw_config = to_intel_crtc_state(crtc->state); 12236 sw_config = to_intel_crtc_state(new_crtc_state);
12047 if (!intel_pipe_config_compare(dev_priv, sw_config, 12237 if (!intel_pipe_config_compare(dev_priv, sw_config,
12048 pipe_config, false)) { 12238 pipe_config, false)) {
12049 I915_STATE_WARN(1, "pipe state doesn't match!\n"); 12239 I915_STATE_WARN(1, "pipe state doesn't match!\n");
@@ -13139,7 +13329,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
13139 if (obj) { 13329 if (obj) {
13140 if (plane->type == DRM_PLANE_TYPE_CURSOR && 13330 if (plane->type == DRM_PLANE_TYPE_CURSOR &&
13141 INTEL_INFO(dev_priv)->cursor_needs_physical) { 13331 INTEL_INFO(dev_priv)->cursor_needs_physical) {
13142 const int align = IS_I830(dev_priv) ? 16 * 1024 : 256; 13332 const int align = intel_cursor_alignment(dev_priv);
13143 13333
13144 ret = i915_gem_object_attach_phys(obj, align); 13334 ret = i915_gem_object_attach_phys(obj, align);
13145 if (ret) { 13335 if (ret) {
@@ -13269,11 +13459,11 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state
13269} 13459}
13270 13460
13271static int 13461static int
13272intel_check_primary_plane(struct drm_plane *plane, 13462intel_check_primary_plane(struct intel_plane *plane,
13273 struct intel_crtc_state *crtc_state, 13463 struct intel_crtc_state *crtc_state,
13274 struct intel_plane_state *state) 13464 struct intel_plane_state *state)
13275{ 13465{
13276 struct drm_i915_private *dev_priv = to_i915(plane->dev); 13466 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
13277 struct drm_crtc *crtc = state->base.crtc; 13467 struct drm_crtc *crtc = state->base.crtc;
13278 int min_scale = DRM_PLANE_HELPER_NO_SCALING; 13468 int min_scale = DRM_PLANE_HELPER_NO_SCALING;
13279 int max_scale = DRM_PLANE_HELPER_NO_SCALING; 13469 int max_scale = DRM_PLANE_HELPER_NO_SCALING;
@@ -13452,7 +13642,7 @@ intel_legacy_cursor_update(struct drm_plane *plane,
13452 goto out_free; 13642 goto out_free;
13453 13643
13454 if (INTEL_INFO(dev_priv)->cursor_needs_physical) { 13644 if (INTEL_INFO(dev_priv)->cursor_needs_physical) {
13455 int align = IS_I830(dev_priv) ? 16 * 1024 : 256; 13645 int align = intel_cursor_alignment(dev_priv);
13456 13646
13457 ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align); 13647 ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align);
13458 if (ret) { 13648 if (ret) {
@@ -13488,12 +13678,12 @@ intel_legacy_cursor_update(struct drm_plane *plane,
13488 13678
13489 if (plane->state->visible) { 13679 if (plane->state->visible) {
13490 trace_intel_update_plane(plane, to_intel_crtc(crtc)); 13680 trace_intel_update_plane(plane, to_intel_crtc(crtc));
13491 intel_plane->update_plane(plane, 13681 intel_plane->update_plane(intel_plane,
13492 to_intel_crtc_state(crtc->state), 13682 to_intel_crtc_state(crtc->state),
13493 to_intel_plane_state(plane->state)); 13683 to_intel_plane_state(plane->state));
13494 } else { 13684 } else {
13495 trace_intel_disable_plane(plane, to_intel_crtc(crtc)); 13685 trace_intel_disable_plane(plane, to_intel_crtc(crtc));
13496 intel_plane->disable_plane(plane, crtc); 13686 intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
13497 } 13687 }
13498 13688
13499 intel_cleanup_plane_fb(plane, new_plane_state); 13689 intel_cleanup_plane_fb(plane, new_plane_state);
@@ -13636,107 +13826,9 @@ fail:
13636 return ERR_PTR(ret); 13826 return ERR_PTR(ret);
13637} 13827}
13638 13828
13639static int
13640intel_check_cursor_plane(struct drm_plane *plane,
13641 struct intel_crtc_state *crtc_state,
13642 struct intel_plane_state *state)
13643{
13644 struct drm_i915_private *dev_priv = to_i915(plane->dev);
13645 struct drm_framebuffer *fb = state->base.fb;
13646 struct drm_i915_gem_object *obj = intel_fb_obj(fb);
13647 enum pipe pipe = to_intel_plane(plane)->pipe;
13648 unsigned stride;
13649 int ret;
13650
13651 ret = drm_plane_helper_check_state(&state->base,
13652 &state->clip,
13653 DRM_PLANE_HELPER_NO_SCALING,
13654 DRM_PLANE_HELPER_NO_SCALING,
13655 true, true);
13656 if (ret)
13657 return ret;
13658
13659 /* if we want to turn off the cursor ignore width and height */
13660 if (!obj)
13661 return 0;
13662
13663 /* Check for which cursor types we support */
13664 if (!cursor_size_ok(dev_priv, state->base.crtc_w,
13665 state->base.crtc_h)) {
13666 DRM_DEBUG("Cursor dimension %dx%d not supported\n",
13667 state->base.crtc_w, state->base.crtc_h);
13668 return -EINVAL;
13669 }
13670
13671 stride = roundup_pow_of_two(state->base.crtc_w) * 4;
13672 if (obj->base.size < stride * state->base.crtc_h) {
13673 DRM_DEBUG_KMS("buffer is too small\n");
13674 return -ENOMEM;
13675 }
13676
13677 if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
13678 DRM_DEBUG_KMS("cursor cannot be tiled\n");
13679 return -EINVAL;
13680 }
13681
13682 /*
13683 * There's something wrong with the cursor on CHV pipe C.
13684 * If it straddles the left edge of the screen then
13685 * moving it away from the edge or disabling it often
13686 * results in a pipe underrun, and often that can lead to
13687 * dead pipe (constant underrun reported, and it scans
13688 * out just a solid color). To recover from that, the
13689 * display power well must be turned off and on again.
13690 * Refuse the put the cursor into that compromised position.
13691 */
13692 if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
13693 state->base.visible && state->base.crtc_x < 0) {
13694 DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
13695 return -EINVAL;
13696 }
13697
13698 if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
13699 state->ctl = i845_cursor_ctl(crtc_state, state);
13700 else
13701 state->ctl = i9xx_cursor_ctl(crtc_state, state);
13702
13703 return 0;
13704}
13705
13706static void
13707intel_disable_cursor_plane(struct drm_plane *plane,
13708 struct drm_crtc *crtc)
13709{
13710 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
13711
13712 intel_crtc->cursor_addr = 0;
13713 intel_crtc_update_cursor(crtc, NULL);
13714}
13715
13716static void
13717intel_update_cursor_plane(struct drm_plane *plane,
13718 const struct intel_crtc_state *crtc_state,
13719 const struct intel_plane_state *state)
13720{
13721 struct drm_crtc *crtc = crtc_state->base.crtc;
13722 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
13723 struct drm_i915_private *dev_priv = to_i915(plane->dev);
13724 struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb);
13725 uint32_t addr;
13726
13727 if (!obj)
13728 addr = 0;
13729 else if (!INTEL_INFO(dev_priv)->cursor_needs_physical)
13730 addr = intel_plane_ggtt_offset(state);
13731 else
13732 addr = obj->phys_handle->busaddr;
13733
13734 intel_crtc->cursor_addr = addr;
13735 intel_crtc_update_cursor(crtc, state);
13736}
13737
13738static struct intel_plane * 13829static struct intel_plane *
13739intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) 13830intel_cursor_plane_create(struct drm_i915_private *dev_priv,
13831 enum pipe pipe)
13740{ 13832{
13741 struct intel_plane *cursor = NULL; 13833 struct intel_plane *cursor = NULL;
13742 struct intel_plane_state *state = NULL; 13834 struct intel_plane_state *state = NULL;
@@ -13762,9 +13854,22 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
13762 cursor->plane = pipe; 13854 cursor->plane = pipe;
13763 cursor->id = PLANE_CURSOR; 13855 cursor->id = PLANE_CURSOR;
13764 cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe); 13856 cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe);
13765 cursor->check_plane = intel_check_cursor_plane; 13857
13766 cursor->update_plane = intel_update_cursor_plane; 13858 if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
13767 cursor->disable_plane = intel_disable_cursor_plane; 13859 cursor->update_plane = i845_update_cursor;
13860 cursor->disable_plane = i845_disable_cursor;
13861 cursor->check_plane = i845_check_cursor;
13862 } else {
13863 cursor->update_plane = i9xx_update_cursor;
13864 cursor->disable_plane = i9xx_disable_cursor;
13865 cursor->check_plane = i9xx_check_cursor;
13866 }
13867
13868 cursor->cursor.base = ~0;
13869 cursor->cursor.cntl = ~0;
13870
13871 if (IS_I845G(dev_priv) || IS_I865G(dev_priv) || HAS_CUR_FBC(dev_priv))
13872 cursor->cursor.size = ~0;
13768 13873
13769 ret = drm_universal_plane_init(&dev_priv->drm, &cursor->base, 13874 ret = drm_universal_plane_init(&dev_priv->drm, &cursor->base,
13770 0, &intel_cursor_plane_funcs, 13875 0, &intel_cursor_plane_funcs,
@@ -13873,10 +13978,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
13873 intel_crtc->pipe = pipe; 13978 intel_crtc->pipe = pipe;
13874 intel_crtc->plane = primary->plane; 13979 intel_crtc->plane = primary->plane;
13875 13980
13876 intel_crtc->cursor_base = ~0;
13877 intel_crtc->cursor_cntl = ~0;
13878 intel_crtc->cursor_size = ~0;
13879
13880 /* initialize shared scalers */ 13981 /* initialize shared scalers */
13881 intel_crtc_init_scalers(intel_crtc, crtc_state); 13982 intel_crtc_init_scalers(intel_crtc, crtc_state);
13882 13983
@@ -14416,7 +14517,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
14416 case DRM_FORMAT_UYVY: 14517 case DRM_FORMAT_UYVY:
14417 case DRM_FORMAT_YVYU: 14518 case DRM_FORMAT_YVYU:
14418 case DRM_FORMAT_VYUY: 14519 case DRM_FORMAT_VYUY:
14419 if (INTEL_GEN(dev_priv) < 5) { 14520 if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) {
14420 DRM_DEBUG_KMS("unsupported pixel format: %s\n", 14521 DRM_DEBUG_KMS("unsupported pixel format: %s\n",
14421 drm_get_format_name(mode_cmd->pixel_format, &format_name)); 14522 drm_get_format_name(mode_cmd->pixel_format, &format_name));
14422 goto err; 14523 goto err;
@@ -14928,6 +15029,7 @@ int intel_modeset_init(struct drm_device *dev)
14928 15029
14929 dev->mode_config.funcs = &intel_mode_funcs; 15030 dev->mode_config.funcs = &intel_mode_funcs;
14930 15031
15032 init_llist_head(&dev_priv->atomic_helper.free_list);
14931 INIT_WORK(&dev_priv->atomic_helper.free_work, 15033 INIT_WORK(&dev_priv->atomic_helper.free_work,
14932 intel_atomic_helper_free_state_worker); 15034 intel_atomic_helper_free_state_worker);
14933 15035
@@ -15149,7 +15251,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
15149 continue; 15251 continue;
15150 15252
15151 trace_intel_disable_plane(&plane->base, crtc); 15253 trace_intel_disable_plane(&plane->base, crtc);
15152 plane->disable_plane(&plane->base, &crtc->base); 15254 plane->disable_plane(plane, crtc);
15153 } 15255 }
15154 } 15256 }
15155 15257
@@ -15520,7 +15622,10 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
15520 pll->on = false; 15622 pll->on = false;
15521 } 15623 }
15522 15624
15523 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 15625 if (IS_G4X(dev_priv)) {
15626 g4x_wm_get_hw_state(dev);
15627 g4x_wm_sanitize(dev_priv);
15628 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
15524 vlv_wm_get_hw_state(dev); 15629 vlv_wm_get_hw_state(dev);
15525 vlv_wm_sanitize(dev_priv); 15630 vlv_wm_sanitize(dev_priv);
15526 } else if (IS_GEN9(dev_priv)) { 15631 } else if (IS_GEN9(dev_priv)) {
@@ -15554,13 +15659,6 @@ void intel_display_resume(struct drm_device *dev)
15554 if (state) 15659 if (state)
15555 state->acquire_ctx = &ctx; 15660 state->acquire_ctx = &ctx;
15556 15661
15557 /*
15558 * This is a cludge because with real atomic modeset mode_config.mutex
15559 * won't be taken. Unfortunately some probed state like
15560 * audio_codec_enable is still protected by mode_config.mutex, so lock
15561 * it here for now.
15562 */
15563 mutex_lock(&dev->mode_config.mutex);
15564 drm_modeset_acquire_init(&ctx, 0); 15662 drm_modeset_acquire_init(&ctx, 0);
15565 15663
15566 while (1) { 15664 while (1) {
@@ -15576,7 +15674,6 @@ void intel_display_resume(struct drm_device *dev)
15576 15674
15577 drm_modeset_drop_locks(&ctx); 15675 drm_modeset_drop_locks(&ctx);
15578 drm_modeset_acquire_fini(&ctx); 15676 drm_modeset_acquire_fini(&ctx);
15579 mutex_unlock(&dev->mode_config.mutex);
15580 15677
15581 if (ret) 15678 if (ret)
15582 DRM_ERROR("Restoring old state failed with %i\n", ret); 15679 DRM_ERROR("Restoring old state failed with %i\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ee77b519835c..4a6feb6a69bd 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -133,36 +133,55 @@ static void vlv_steal_power_sequencer(struct drm_device *dev,
133 enum pipe pipe); 133 enum pipe pipe);
134static void intel_dp_unset_edid(struct intel_dp *intel_dp); 134static void intel_dp_unset_edid(struct intel_dp *intel_dp);
135 135
136static int 136static int intel_dp_num_rates(u8 link_bw_code)
137intel_dp_max_link_bw(struct intel_dp *intel_dp)
138{ 137{
139 int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE]; 138 switch (link_bw_code) {
140 139 default:
141 switch (max_link_bw) { 140 WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
141 link_bw_code);
142 case DP_LINK_BW_1_62: 142 case DP_LINK_BW_1_62:
143 return 1;
143 case DP_LINK_BW_2_7: 144 case DP_LINK_BW_2_7:
145 return 2;
144 case DP_LINK_BW_5_4: 146 case DP_LINK_BW_5_4:
145 break; 147 return 3;
146 default:
147 WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
148 max_link_bw);
149 max_link_bw = DP_LINK_BW_1_62;
150 break;
151 } 148 }
152 return max_link_bw;
153} 149}
154 150
155static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp) 151/* update sink rates from dpcd */
152static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
156{ 153{
157 struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); 154 int i, num_rates;
158 u8 source_max, sink_max; 155
156 num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]);
157
158 for (i = 0; i < num_rates; i++)
159 intel_dp->sink_rates[i] = default_rates[i];
159 160
160 source_max = intel_dig_port->max_lanes; 161 intel_dp->num_sink_rates = num_rates;
161 sink_max = intel_dp->max_sink_lane_count; 162}
163
164/* Theoretical max between source and sink */
165static int intel_dp_max_common_rate(struct intel_dp *intel_dp)
166{
167 return intel_dp->common_rates[intel_dp->num_common_rates - 1];
168}
169
170/* Theoretical max between source and sink */
171static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp)
172{
173 struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
174 int source_max = intel_dig_port->max_lanes;
175 int sink_max = drm_dp_max_lane_count(intel_dp->dpcd);
162 176
163 return min(source_max, sink_max); 177 return min(source_max, sink_max);
164} 178}
165 179
180int intel_dp_max_lane_count(struct intel_dp *intel_dp)
181{
182 return intel_dp->max_link_lane_count;
183}
184
166int 185int
167intel_dp_link_required(int pixel_clock, int bpp) 186intel_dp_link_required(int pixel_clock, int bpp)
168{ 187{
@@ -205,34 +224,25 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp)
205 return max_dotclk; 224 return max_dotclk;
206} 225}
207 226
208static int 227static void
209intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) 228intel_dp_set_source_rates(struct intel_dp *intel_dp)
210{
211 if (intel_dp->num_sink_rates) {
212 *sink_rates = intel_dp->sink_rates;
213 return intel_dp->num_sink_rates;
214 }
215
216 *sink_rates = default_rates;
217
218 return (intel_dp->max_sink_link_bw >> 3) + 1;
219}
220
221static int
222intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
223{ 229{
224 struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); 230 struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
225 struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); 231 struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
232 const int *source_rates;
226 int size; 233 int size;
227 234
235 /* This should only be done once */
236 WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates);
237
228 if (IS_GEN9_LP(dev_priv)) { 238 if (IS_GEN9_LP(dev_priv)) {
229 *source_rates = bxt_rates; 239 source_rates = bxt_rates;
230 size = ARRAY_SIZE(bxt_rates); 240 size = ARRAY_SIZE(bxt_rates);
231 } else if (IS_GEN9_BC(dev_priv)) { 241 } else if (IS_GEN9_BC(dev_priv)) {
232 *source_rates = skl_rates; 242 source_rates = skl_rates;
233 size = ARRAY_SIZE(skl_rates); 243 size = ARRAY_SIZE(skl_rates);
234 } else { 244 } else {
235 *source_rates = default_rates; 245 source_rates = default_rates;
236 size = ARRAY_SIZE(default_rates); 246 size = ARRAY_SIZE(default_rates);
237 } 247 }
238 248
@@ -240,7 +250,8 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
240 if (!intel_dp_source_supports_hbr2(intel_dp)) 250 if (!intel_dp_source_supports_hbr2(intel_dp))
241 size--; 251 size--;
242 252
243 return size; 253 intel_dp->source_rates = source_rates;
254 intel_dp->num_source_rates = size;
244} 255}
245 256
246static int intersect_rates(const int *source_rates, int source_len, 257static int intersect_rates(const int *source_rates, int source_len,
@@ -266,50 +277,83 @@ static int intersect_rates(const int *source_rates, int source_len,
266 return k; 277 return k;
267} 278}
268 279
269static int intel_dp_common_rates(struct intel_dp *intel_dp, 280/* return index of rate in rates array, or -1 if not found */
270 int *common_rates) 281static int intel_dp_rate_index(const int *rates, int len, int rate)
271{ 282{
272 const int *source_rates, *sink_rates; 283 int i;
273 int source_len, sink_len;
274 284
275 sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); 285 for (i = 0; i < len; i++)
276 source_len = intel_dp_source_rates(intel_dp, &source_rates); 286 if (rate == rates[i])
287 return i;
277 288
278 return intersect_rates(source_rates, source_len, 289 return -1;
279 sink_rates, sink_len,
280 common_rates);
281} 290}
282 291
283static int intel_dp_link_rate_index(struct intel_dp *intel_dp, 292static void intel_dp_set_common_rates(struct intel_dp *intel_dp)
284 int *common_rates, int link_rate)
285{ 293{
286 int common_len; 294 WARN_ON(!intel_dp->num_source_rates || !intel_dp->num_sink_rates);
287 int index; 295
296 intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates,
297 intel_dp->num_source_rates,
298 intel_dp->sink_rates,
299 intel_dp->num_sink_rates,
300 intel_dp->common_rates);
288 301
289 common_len = intel_dp_common_rates(intel_dp, common_rates); 302 /* Paranoia, there should always be something in common. */
290 for (index = 0; index < common_len; index++) { 303 if (WARN_ON(intel_dp->num_common_rates == 0)) {
291 if (link_rate == common_rates[common_len - index - 1]) 304 intel_dp->common_rates[0] = default_rates[0];
292 return common_len - index - 1; 305 intel_dp->num_common_rates = 1;
293 } 306 }
307}
294 308
295 return -1; 309/* get length of common rates potentially limited by max_rate */
310static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp,
311 int max_rate)
312{
313 const int *common_rates = intel_dp->common_rates;
314 int i, common_len = intel_dp->num_common_rates;
315
316 /* Limit results by potentially reduced max rate */
317 for (i = 0; i < common_len; i++) {
318 if (common_rates[common_len - i - 1] <= max_rate)
319 return common_len - i;
320 }
321
322 return 0;
323}
324
325static bool intel_dp_link_params_valid(struct intel_dp *intel_dp)
326{
327 /*
328 * FIXME: we need to synchronize the current link parameters with
329 * hardware readout. Currently fast link training doesn't work on
330 * boot-up.
331 */
332 if (intel_dp->link_rate == 0 ||
333 intel_dp->link_rate > intel_dp->max_link_rate)
334 return false;
335
336 if (intel_dp->lane_count == 0 ||
337 intel_dp->lane_count > intel_dp_max_lane_count(intel_dp))
338 return false;
339
340 return true;
296} 341}
297 342
298int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, 343int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
299 int link_rate, uint8_t lane_count) 344 int link_rate, uint8_t lane_count)
300{ 345{
301 int common_rates[DP_MAX_SUPPORTED_RATES]; 346 int index;
302 int link_rate_index;
303 347
304 link_rate_index = intel_dp_link_rate_index(intel_dp, 348 index = intel_dp_rate_index(intel_dp->common_rates,
305 common_rates, 349 intel_dp->num_common_rates,
306 link_rate); 350 link_rate);
307 if (link_rate_index > 0) { 351 if (index > 0) {
308 intel_dp->max_sink_link_bw = drm_dp_link_rate_to_bw_code(common_rates[link_rate_index - 1]); 352 intel_dp->max_link_rate = intel_dp->common_rates[index - 1];
309 intel_dp->max_sink_lane_count = lane_count; 353 intel_dp->max_link_lane_count = lane_count;
310 } else if (lane_count > 1) { 354 } else if (lane_count > 1) {
311 intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); 355 intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
312 intel_dp->max_sink_lane_count = lane_count >> 1; 356 intel_dp->max_link_lane_count = lane_count >> 1;
313 } else { 357 } else {
314 DRM_ERROR("Link Training Unsuccessful\n"); 358 DRM_ERROR("Link Training Unsuccessful\n");
315 return -1; 359 return -1;
@@ -1486,24 +1530,21 @@ static void snprintf_int_array(char *str, size_t len,
1486 1530
1487static void intel_dp_print_rates(struct intel_dp *intel_dp) 1531static void intel_dp_print_rates(struct intel_dp *intel_dp)
1488{ 1532{
1489 const int *source_rates, *sink_rates;
1490 int source_len, sink_len, common_len;
1491 int common_rates[DP_MAX_SUPPORTED_RATES];
1492 char str[128]; /* FIXME: too big for stack? */ 1533 char str[128]; /* FIXME: too big for stack? */
1493 1534
1494 if ((drm_debug & DRM_UT_KMS) == 0) 1535 if ((drm_debug & DRM_UT_KMS) == 0)
1495 return; 1536 return;
1496 1537
1497 source_len = intel_dp_source_rates(intel_dp, &source_rates); 1538 snprintf_int_array(str, sizeof(str),
1498 snprintf_int_array(str, sizeof(str), source_rates, source_len); 1539 intel_dp->source_rates, intel_dp->num_source_rates);
1499 DRM_DEBUG_KMS("source rates: %s\n", str); 1540 DRM_DEBUG_KMS("source rates: %s\n", str);
1500 1541
1501 sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); 1542 snprintf_int_array(str, sizeof(str),
1502 snprintf_int_array(str, sizeof(str), sink_rates, sink_len); 1543 intel_dp->sink_rates, intel_dp->num_sink_rates);
1503 DRM_DEBUG_KMS("sink rates: %s\n", str); 1544 DRM_DEBUG_KMS("sink rates: %s\n", str);
1504 1545
1505 common_len = intel_dp_common_rates(intel_dp, common_rates); 1546 snprintf_int_array(str, sizeof(str),
1506 snprintf_int_array(str, sizeof(str), common_rates, common_len); 1547 intel_dp->common_rates, intel_dp->num_common_rates);
1507 DRM_DEBUG_KMS("common rates: %s\n", str); 1548 DRM_DEBUG_KMS("common rates: %s\n", str);
1508} 1549}
1509 1550
@@ -1538,39 +1579,34 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp)
1538 return true; 1579 return true;
1539} 1580}
1540 1581
1541static int rate_to_index(int find, const int *rates)
1542{
1543 int i = 0;
1544
1545 for (i = 0; i < DP_MAX_SUPPORTED_RATES; ++i)
1546 if (find == rates[i])
1547 break;
1548
1549 return i;
1550}
1551
1552int 1582int
1553intel_dp_max_link_rate(struct intel_dp *intel_dp) 1583intel_dp_max_link_rate(struct intel_dp *intel_dp)
1554{ 1584{
1555 int rates[DP_MAX_SUPPORTED_RATES] = {};
1556 int len; 1585 int len;
1557 1586
1558 len = intel_dp_common_rates(intel_dp, rates); 1587 len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate);
1559 if (WARN_ON(len <= 0)) 1588 if (WARN_ON(len <= 0))
1560 return 162000; 1589 return 162000;
1561 1590
1562 return rates[len - 1]; 1591 return intel_dp->common_rates[len - 1];
1563} 1592}
1564 1593
1565int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) 1594int intel_dp_rate_select(struct intel_dp *intel_dp, int rate)
1566{ 1595{
1567 return rate_to_index(rate, intel_dp->sink_rates); 1596 int i = intel_dp_rate_index(intel_dp->sink_rates,
1597 intel_dp->num_sink_rates, rate);
1598
1599 if (WARN_ON(i < 0))
1600 i = 0;
1601
1602 return i;
1568} 1603}
1569 1604
1570void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, 1605void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
1571 uint8_t *link_bw, uint8_t *rate_select) 1606 uint8_t *link_bw, uint8_t *rate_select)
1572{ 1607{
1573 if (intel_dp->num_sink_rates) { 1608 /* eDP 1.4 rate select method. */
1609 if (intel_dp->use_rate_select) {
1574 *link_bw = 0; 1610 *link_bw = 0;
1575 *rate_select = 1611 *rate_select =
1576 intel_dp_rate_select(intel_dp, port_clock); 1612 intel_dp_rate_select(intel_dp, port_clock);
@@ -1618,14 +1654,13 @@ intel_dp_compute_config(struct intel_encoder *encoder,
1618 /* Conveniently, the link BW constants become indices with a shift...*/ 1654 /* Conveniently, the link BW constants become indices with a shift...*/
1619 int min_clock = 0; 1655 int min_clock = 0;
1620 int max_clock; 1656 int max_clock;
1621 int link_rate_index;
1622 int bpp, mode_rate; 1657 int bpp, mode_rate;
1623 int link_avail, link_clock; 1658 int link_avail, link_clock;
1624 int common_rates[DP_MAX_SUPPORTED_RATES] = {};
1625 int common_len; 1659 int common_len;
1626 uint8_t link_bw, rate_select; 1660 uint8_t link_bw, rate_select;
1627 1661
1628 common_len = intel_dp_common_rates(intel_dp, common_rates); 1662 common_len = intel_dp_common_len_rate_limit(intel_dp,
1663 intel_dp->max_link_rate);
1629 1664
1630 /* No common link rates between source and sink */ 1665 /* No common link rates between source and sink */
1631 WARN_ON(common_len <= 0); 1666 WARN_ON(common_len <= 0);
@@ -1662,16 +1697,18 @@ intel_dp_compute_config(struct intel_encoder *encoder,
1662 1697
1663 /* Use values requested by Compliance Test Request */ 1698 /* Use values requested by Compliance Test Request */
1664 if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { 1699 if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
1665 link_rate_index = intel_dp_link_rate_index(intel_dp, 1700 int index;
1666 common_rates, 1701
1667 intel_dp->compliance.test_link_rate); 1702 index = intel_dp_rate_index(intel_dp->common_rates,
1668 if (link_rate_index >= 0) 1703 intel_dp->num_common_rates,
1669 min_clock = max_clock = link_rate_index; 1704 intel_dp->compliance.test_link_rate);
1705 if (index >= 0)
1706 min_clock = max_clock = index;
1670 min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; 1707 min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count;
1671 } 1708 }
1672 DRM_DEBUG_KMS("DP link computation with max lane count %i " 1709 DRM_DEBUG_KMS("DP link computation with max lane count %i "
1673 "max bw %d pixel clock %iKHz\n", 1710 "max bw %d pixel clock %iKHz\n",
1674 max_lane_count, common_rates[max_clock], 1711 max_lane_count, intel_dp->common_rates[max_clock],
1675 adjusted_mode->crtc_clock); 1712 adjusted_mode->crtc_clock);
1676 1713
1677 /* Walk through all bpp values. Luckily they're all nicely spaced with 2 1714 /* Walk through all bpp values. Luckily they're all nicely spaced with 2
@@ -1707,7 +1744,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
1707 lane_count <= max_lane_count; 1744 lane_count <= max_lane_count;
1708 lane_count <<= 1) { 1745 lane_count <<= 1) {
1709 1746
1710 link_clock = common_rates[clock]; 1747 link_clock = intel_dp->common_rates[clock];
1711 link_avail = intel_dp_max_data_rate(link_clock, 1748 link_avail = intel_dp_max_data_rate(link_clock,
1712 lane_count); 1749 lane_count);
1713 1750
@@ -1739,7 +1776,7 @@ found:
1739 pipe_config->lane_count = lane_count; 1776 pipe_config->lane_count = lane_count;
1740 1777
1741 pipe_config->pipe_bpp = bpp; 1778 pipe_config->pipe_bpp = bpp;
1742 pipe_config->port_clock = common_rates[clock]; 1779 pipe_config->port_clock = intel_dp->common_rates[clock];
1743 1780
1744 intel_dp_compute_rate(intel_dp, pipe_config->port_clock, 1781 intel_dp_compute_rate(intel_dp, pipe_config->port_clock,
1745 &link_bw, &rate_select); 1782 &link_bw, &rate_select);
@@ -3051,7 +3088,8 @@ static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp)
3051{ 3088{
3052 uint8_t psr_caps = 0; 3089 uint8_t psr_caps = 0;
3053 3090
3054 drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps); 3091 if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1)
3092 return false;
3055 return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED; 3093 return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED;
3056} 3094}
3057 3095
@@ -3059,9 +3097,9 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp)
3059{ 3097{
3060 uint8_t dprx = 0; 3098 uint8_t dprx = 0;
3061 3099
3062 drm_dp_dpcd_readb(&intel_dp->aux, 3100 if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST,
3063 DP_DPRX_FEATURE_ENUMERATION_LIST, 3101 &dprx) != 1)
3064 &dprx); 3102 return false;
3065 return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; 3103 return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED;
3066} 3104}
3067 3105
@@ -3069,7 +3107,9 @@ static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp)
3069{ 3107{
3070 uint8_t alpm_caps = 0; 3108 uint8_t alpm_caps = 0;
3071 3109
3072 drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &alpm_caps); 3110 if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP,
3111 &alpm_caps) != 1)
3112 return false;
3073 return alpm_caps & DP_ALPM_CAP; 3113 return alpm_caps & DP_ALPM_CAP;
3074} 3114}
3075 3115
@@ -3642,9 +3682,10 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
3642 uint8_t frame_sync_cap; 3682 uint8_t frame_sync_cap;
3643 3683
3644 dev_priv->psr.sink_support = true; 3684 dev_priv->psr.sink_support = true;
3645 drm_dp_dpcd_read(&intel_dp->aux, 3685 if (drm_dp_dpcd_readb(&intel_dp->aux,
3646 DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, 3686 DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
3647 &frame_sync_cap, 1); 3687 &frame_sync_cap) != 1)
3688 frame_sync_cap = 0;
3648 dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; 3689 dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
3649 /* PSR2 needs frame sync as well */ 3690 /* PSR2 needs frame sync as well */
3650 dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; 3691 dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
@@ -3695,6 +3736,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
3695 intel_dp->num_sink_rates = i; 3736 intel_dp->num_sink_rates = i;
3696 } 3737 }
3697 3738
3739 if (intel_dp->num_sink_rates)
3740 intel_dp->use_rate_select = true;
3741 else
3742 intel_dp_set_sink_rates(intel_dp);
3743
3744 intel_dp_set_common_rates(intel_dp);
3745
3698 return true; 3746 return true;
3699} 3747}
3700 3748
@@ -3702,11 +3750,18 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
3702static bool 3750static bool
3703intel_dp_get_dpcd(struct intel_dp *intel_dp) 3751intel_dp_get_dpcd(struct intel_dp *intel_dp)
3704{ 3752{
3753 u8 sink_count;
3754
3705 if (!intel_dp_read_dpcd(intel_dp)) 3755 if (!intel_dp_read_dpcd(intel_dp))
3706 return false; 3756 return false;
3707 3757
3708 if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, 3758 /* Don't clobber cached eDP rates. */
3709 &intel_dp->sink_count, 1) < 0) 3759 if (!is_edp(intel_dp)) {
3760 intel_dp_set_sink_rates(intel_dp);
3761 intel_dp_set_common_rates(intel_dp);
3762 }
3763
3764 if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &sink_count) <= 0)
3710 return false; 3765 return false;
3711 3766
3712 /* 3767 /*
@@ -3714,7 +3769,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
3714 * a member variable in intel_dp will track any changes 3769 * a member variable in intel_dp will track any changes
3715 * between short pulse interrupts. 3770 * between short pulse interrupts.
3716 */ 3771 */
3717 intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); 3772 intel_dp->sink_count = DP_GET_SINK_COUNT(sink_count);
3718 3773
3719 /* 3774 /*
3720 * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that 3775 * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that
@@ -3743,7 +3798,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
3743static bool 3798static bool
3744intel_dp_can_mst(struct intel_dp *intel_dp) 3799intel_dp_can_mst(struct intel_dp *intel_dp)
3745{ 3800{
3746 u8 buf[1]; 3801 u8 mstm_cap;
3747 3802
3748 if (!i915.enable_dp_mst) 3803 if (!i915.enable_dp_mst)
3749 return false; 3804 return false;
@@ -3754,10 +3809,10 @@ intel_dp_can_mst(struct intel_dp *intel_dp)
3754 if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) 3809 if (intel_dp->dpcd[DP_DPCD_REV] < 0x12)
3755 return false; 3810 return false;
3756 3811
3757 if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1) 3812 if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1)
3758 return false; 3813 return false;
3759 3814
3760 return buf[0] & DP_MST_CAP; 3815 return mstm_cap & DP_MST_CAP;
3761} 3816}
3762 3817
3763static void 3818static void
@@ -3903,9 +3958,8 @@ stop:
3903static bool 3958static bool
3904intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector) 3959intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector)
3905{ 3960{
3906 return drm_dp_dpcd_read(&intel_dp->aux, 3961 return drm_dp_dpcd_readb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR,
3907 DP_DEVICE_SERVICE_IRQ_VECTOR, 3962 sink_irq_vector) == 1;
3908 sink_irq_vector, 1) == 1;
3909} 3963}
3910 3964
3911static bool 3965static bool
@@ -3926,7 +3980,6 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
3926{ 3980{
3927 int status = 0; 3981 int status = 0;
3928 int min_lane_count = 1; 3982 int min_lane_count = 1;
3929 int common_rates[DP_MAX_SUPPORTED_RATES] = {};
3930 int link_rate_index, test_link_rate; 3983 int link_rate_index, test_link_rate;
3931 uint8_t test_lane_count, test_link_bw; 3984 uint8_t test_lane_count, test_link_bw;
3932 /* (DP CTS 1.2) 3985 /* (DP CTS 1.2)
@@ -3943,7 +3996,7 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
3943 test_lane_count &= DP_MAX_LANE_COUNT_MASK; 3996 test_lane_count &= DP_MAX_LANE_COUNT_MASK;
3944 /* Validate the requested lane count */ 3997 /* Validate the requested lane count */
3945 if (test_lane_count < min_lane_count || 3998 if (test_lane_count < min_lane_count ||
3946 test_lane_count > intel_dp->max_sink_lane_count) 3999 test_lane_count > intel_dp->max_link_lane_count)
3947 return DP_TEST_NAK; 4000 return DP_TEST_NAK;
3948 4001
3949 status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, 4002 status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE,
@@ -3954,9 +4007,9 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
3954 } 4007 }
3955 /* Validate the requested link rate */ 4008 /* Validate the requested link rate */
3956 test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); 4009 test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw);
3957 link_rate_index = intel_dp_link_rate_index(intel_dp, 4010 link_rate_index = intel_dp_rate_index(intel_dp->common_rates,
3958 common_rates, 4011 intel_dp->num_common_rates,
3959 test_link_rate); 4012 test_link_rate);
3960 if (link_rate_index < 0) 4013 if (link_rate_index < 0)
3961 return DP_TEST_NAK; 4014 return DP_TEST_NAK;
3962 4015
@@ -3969,13 +4022,13 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
3969static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) 4022static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
3970{ 4023{
3971 uint8_t test_pattern; 4024 uint8_t test_pattern;
3972 uint16_t test_misc; 4025 uint8_t test_misc;
3973 __be16 h_width, v_height; 4026 __be16 h_width, v_height;
3974 int status = 0; 4027 int status = 0;
3975 4028
3976 /* Read the TEST_PATTERN (DP CTS 3.1.5) */ 4029 /* Read the TEST_PATTERN (DP CTS 3.1.5) */
3977 status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, 4030 status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_PATTERN,
3978 &test_pattern, 1); 4031 &test_pattern);
3979 if (status <= 0) { 4032 if (status <= 0) {
3980 DRM_DEBUG_KMS("Test pattern read failed\n"); 4033 DRM_DEBUG_KMS("Test pattern read failed\n");
3981 return DP_TEST_NAK; 4034 return DP_TEST_NAK;
@@ -3997,8 +4050,8 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
3997 return DP_TEST_NAK; 4050 return DP_TEST_NAK;
3998 } 4051 }
3999 4052
4000 status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, 4053 status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_MISC0,
4001 &test_misc, 1); 4054 &test_misc);
4002 if (status <= 0) { 4055 if (status <= 0) {
4003 DRM_DEBUG_KMS("TEST MISC read failed\n"); 4056 DRM_DEBUG_KMS("TEST MISC read failed\n");
4004 return DP_TEST_NAK; 4057 return DP_TEST_NAK;
@@ -4057,10 +4110,8 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
4057 */ 4110 */
4058 block += intel_connector->detect_edid->extensions; 4111 block += intel_connector->detect_edid->extensions;
4059 4112
4060 if (!drm_dp_dpcd_write(&intel_dp->aux, 4113 if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_EDID_CHECKSUM,
4061 DP_TEST_EDID_CHECKSUM, 4114 block->checksum) <= 0)
4062 &block->checksum,
4063 1))
4064 DRM_DEBUG_KMS("Failed to write EDID checksum\n"); 4115 DRM_DEBUG_KMS("Failed to write EDID checksum\n");
4065 4116
4066 test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; 4117 test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
@@ -4224,9 +4275,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
4224 if (!to_intel_crtc(intel_encoder->base.crtc)->active) 4275 if (!to_intel_crtc(intel_encoder->base.crtc)->active)
4225 return; 4276 return;
4226 4277
4227 /* FIXME: we need to synchronize this sort of stuff with hardware 4278 /*
4228 * readout. Currently fast link training doesn't work on boot-up. */ 4279 * Validate the cached values of intel_dp->link_rate and
4229 if (!intel_dp->lane_count) 4280 * intel_dp->lane_count before attempting to retrain.
4281 */
4282 if (!intel_dp_link_params_valid(intel_dp))
4230 return; 4283 return;
4231 4284
4232 /* Retrain if Channel EQ or CR not ok */ 4285 /* Retrain if Channel EQ or CR not ok */
@@ -4613,11 +4666,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
4613 yesno(drm_dp_tps3_supported(intel_dp->dpcd))); 4666 yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
4614 4667
4615 if (intel_dp->reset_link_params) { 4668 if (intel_dp->reset_link_params) {
4616 /* Set the max lane count for sink */ 4669 /* Initial max link lane count */
4617 intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); 4670 intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
4618 4671
4619 /* Set the max link BW for sink */ 4672 /* Initial max link rate */
4620 intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); 4673 intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
4621 4674
4622 intel_dp->reset_link_params = false; 4675 intel_dp->reset_link_params = false;
4623 } 4676 }
@@ -5127,7 +5180,7 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port)
5127 return intel_bios_is_port_edp(dev_priv, port); 5180 return intel_bios_is_port_edp(dev_priv, port);
5128} 5181}
5129 5182
5130void 5183static void
5131intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) 5184intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
5132{ 5185{
5133 struct intel_connector *intel_connector = to_intel_connector(connector); 5186 struct intel_connector *intel_connector = to_intel_connector(connector);
@@ -5932,6 +5985,29 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port)
5932 } 5985 }
5933} 5986}
5934 5987
5988static void intel_dp_modeset_retry_work_fn(struct work_struct *work)
5989{
5990 struct intel_connector *intel_connector;
5991 struct drm_connector *connector;
5992
5993 intel_connector = container_of(work, typeof(*intel_connector),
5994 modeset_retry_work);
5995 connector = &intel_connector->base;
5996 DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
5997 connector->name);
5998
5999 /* Grab the locks before changing connector property*/
6000 mutex_lock(&connector->dev->mode_config.mutex);
6001 /* Set connector link status to BAD and send a Uevent to notify
6002 * userspace to do a modeset.
6003 */
6004 drm_mode_connector_set_link_status_property(connector,
6005 DRM_MODE_LINK_STATUS_BAD);
6006 mutex_unlock(&connector->dev->mode_config.mutex);
6007 /* Send Hotplug uevent so userspace can reprobe */
6008 drm_kms_helper_hotplug_event(connector->dev);
6009}
6010
5935bool 6011bool
5936intel_dp_init_connector(struct intel_digital_port *intel_dig_port, 6012intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
5937 struct intel_connector *intel_connector) 6013 struct intel_connector *intel_connector)
@@ -5944,11 +6020,17 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
5944 enum port port = intel_dig_port->port; 6020 enum port port = intel_dig_port->port;
5945 int type; 6021 int type;
5946 6022
6023 /* Initialize the work for modeset in case of link train failure */
6024 INIT_WORK(&intel_connector->modeset_retry_work,
6025 intel_dp_modeset_retry_work_fn);
6026
5947 if (WARN(intel_dig_port->max_lanes < 1, 6027 if (WARN(intel_dig_port->max_lanes < 1,
5948 "Not enough lanes (%d) for DP on port %c\n", 6028 "Not enough lanes (%d) for DP on port %c\n",
5949 intel_dig_port->max_lanes, port_name(port))) 6029 intel_dig_port->max_lanes, port_name(port)))
5950 return false; 6030 return false;
5951 6031
6032 intel_dp_set_source_rates(intel_dp);
6033
5952 intel_dp->reset_link_params = true; 6034 intel_dp->reset_link_params = true;
5953 intel_dp->pps_pipe = INVALID_PIPE; 6035 intel_dp->pps_pipe = INVALID_PIPE;
5954 intel_dp->active_pipe = INVALID_PIPE; 6036 intel_dp->active_pipe = INVALID_PIPE;
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
index 6532e226db29..a0995c00fc84 100644
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -28,6 +28,10 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
28{ 28{
29 uint8_t reg_val = 0; 29 uint8_t reg_val = 0;
30 30
31 /* Early return when display use other mechanism to enable backlight. */
32 if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP))
33 return;
34
31 if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER, 35 if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER,
32 &reg_val) < 0) { 36 &reg_val) < 0) {
33 DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n", 37 DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
@@ -97,15 +101,37 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector)
97{ 101{
98 struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); 102 struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
99 uint8_t dpcd_buf = 0; 103 uint8_t dpcd_buf = 0;
104 uint8_t edp_backlight_mode = 0;
100 105
101 set_aux_backlight_enable(intel_dp, true); 106 if (drm_dp_dpcd_readb(&intel_dp->aux,
107 DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) {
108 DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
109 DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
110 return;
111 }
112
113 edp_backlight_mode = dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
114
115 switch (edp_backlight_mode) {
116 case DP_EDP_BACKLIGHT_CONTROL_MODE_PWM:
117 case DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET:
118 case DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT:
119 dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
120 dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
121 if (drm_dp_dpcd_writeb(&intel_dp->aux,
122 DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf) < 0) {
123 DRM_DEBUG_KMS("Failed to write aux backlight mode\n");
124 }
125 break;
126
127 /* Do nothing when it is already DPCD mode */
128 case DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD:
129 default:
130 break;
131 }
102 132
103 if ((drm_dp_dpcd_readb(&intel_dp->aux, 133 set_aux_backlight_enable(intel_dp, true);
104 DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) == 1) && 134 intel_dp_aux_set_backlight(connector, connector->panel.backlight.level);
105 ((dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) ==
106 DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET))
107 drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
108 (dpcd_buf | DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD));
109} 135}
110 136
111static void intel_dp_aux_disable_backlight(struct intel_connector *connector) 137static void intel_dp_aux_disable_backlight(struct intel_connector *connector)
@@ -143,9 +169,8 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector)
143 * the panel can support backlight control over the aux channel 169 * the panel can support backlight control over the aux channel
144 */ 170 */
145 if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP && 171 if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP &&
146 (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && 172 (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP) &&
147 !((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_PIN_ENABLE_CAP) || 173 !(intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP)) {
148 (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP))) {
149 DRM_DEBUG_KMS("AUX Backlight Control Supported!\n"); 174 DRM_DEBUG_KMS("AUX Backlight Control Supported!\n");
150 return true; 175 return true;
151 } 176 }
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 0048b520baf7..b79c1c0e404c 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -146,7 +146,8 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
146 link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; 146 link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
147 drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); 147 drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
148 148
149 if (intel_dp->num_sink_rates) 149 /* eDP 1.4 rate select method. */
150 if (!link_bw)
150 drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, 151 drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
151 &rate_select, 1); 152 &rate_select, 1);
152 153
@@ -313,6 +314,24 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp)
313void 314void
314intel_dp_start_link_train(struct intel_dp *intel_dp) 315intel_dp_start_link_train(struct intel_dp *intel_dp)
315{ 316{
316 intel_dp_link_training_clock_recovery(intel_dp); 317 struct intel_connector *intel_connector = intel_dp->attached_connector;
317 intel_dp_link_training_channel_equalization(intel_dp); 318
319 if (!intel_dp_link_training_clock_recovery(intel_dp))
320 goto failure_handling;
321 if (!intel_dp_link_training_channel_equalization(intel_dp))
322 goto failure_handling;
323
324 DRM_DEBUG_KMS("Link Training Passed at Link Rate = %d, Lane count = %d",
325 intel_dp->link_rate, intel_dp->lane_count);
326 return;
327
328 failure_handling:
329 DRM_DEBUG_KMS("Link Training failed at link rate = %d, lane count = %d",
330 intel_dp->link_rate, intel_dp->lane_count);
331 if (!intel_dp_get_link_train_fallback_values(intel_dp,
332 intel_dp->link_rate,
333 intel_dp->lane_count))
334 /* Schedule a Hotplug Uevent to userspace to start modeset */
335 schedule_work(&intel_connector->modeset_retry_work);
336 return;
318} 337}
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 1dee9933005f..3715386e4272 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -56,7 +56,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
56 * for MST we always configure max link bw - the spec doesn't 56 * for MST we always configure max link bw - the spec doesn't
57 * seem to suggest we should do otherwise. 57 * seem to suggest we should do otherwise.
58 */ 58 */
59 lane_count = drm_dp_max_lane_count(intel_dp->dpcd); 59 lane_count = intel_dp_max_lane_count(intel_dp);
60
60 pipe_config->lane_count = lane_count; 61 pipe_config->lane_count = lane_count;
61 62
62 pipe_config->pipe_bpp = bpp; 63 pipe_config->pipe_bpp = bpp;
@@ -329,14 +330,6 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
329 return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); 330 return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port);
330} 331}
331 332
332static int
333intel_dp_mst_set_property(struct drm_connector *connector,
334 struct drm_property *property,
335 uint64_t val)
336{
337 return 0;
338}
339
340static void 333static void
341intel_dp_mst_connector_destroy(struct drm_connector *connector) 334intel_dp_mst_connector_destroy(struct drm_connector *connector)
342{ 335{
@@ -353,8 +346,7 @@ static const struct drm_connector_funcs intel_dp_mst_connector_funcs = {
353 .dpms = drm_atomic_helper_connector_dpms, 346 .dpms = drm_atomic_helper_connector_dpms,
354 .detect = intel_dp_mst_detect, 347 .detect = intel_dp_mst_detect,
355 .fill_modes = drm_helper_probe_single_connector_modes, 348 .fill_modes = drm_helper_probe_single_connector_modes,
356 .set_property = intel_dp_mst_set_property, 349 .set_property = drm_atomic_helper_connector_set_property,
357 .atomic_get_property = intel_connector_atomic_get_property,
358 .late_register = intel_connector_register, 350 .late_register = intel_connector_register,
359 .early_unregister = intel_connector_unregister, 351 .early_unregister = intel_connector_unregister,
360 .destroy = intel_dp_mst_connector_destroy, 352 .destroy = intel_dp_mst_connector_destroy,
@@ -378,7 +370,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
378 int max_rate, mode_rate, max_lanes, max_link_clock; 370 int max_rate, mode_rate, max_lanes, max_link_clock;
379 371
380 max_link_clock = intel_dp_max_link_rate(intel_dp); 372 max_link_clock = intel_dp_max_link_rate(intel_dp);
381 max_lanes = drm_dp_max_lane_count(intel_dp->dpcd); 373 max_lanes = intel_dp_max_lane_count(intel_dp);
382 374
383 max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes); 375 max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
384 mode_rate = intel_dp_link_required(mode->clock, bpp); 376 mode_rate = intel_dp_link_required(mode->clock, bpp);
@@ -495,7 +487,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
495 drm_mode_connector_attach_encoder(&intel_connector->base, 487 drm_mode_connector_attach_encoder(&intel_connector->base,
496 &intel_dp->mst_encoders[i]->base.base); 488 &intel_dp->mst_encoders[i]->base.base);
497 } 489 }
498 intel_dp_add_properties(intel_dp, connector);
499 490
500 drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); 491 drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0);
501 drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); 492 drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 48ea8d9d49fe..bd500977b3fc 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -88,7 +88,6 @@
88 int cpu, ret, timeout = (US) * 1000; \ 88 int cpu, ret, timeout = (US) * 1000; \
89 u64 base; \ 89 u64 base; \
90 _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ 90 _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
91 BUILD_BUG_ON((US) > 50000); \
92 if (!(ATOMIC)) { \ 91 if (!(ATOMIC)) { \
93 preempt_disable(); \ 92 preempt_disable(); \
94 cpu = smp_processor_id(); \ 93 cpu = smp_processor_id(); \
@@ -130,8 +129,14 @@
130 ret__; \ 129 ret__; \
131}) 130})
132 131
133#define wait_for_atomic(COND, MS) _wait_for_atomic((COND), (MS) * 1000, 1) 132#define wait_for_atomic_us(COND, US) \
134#define wait_for_atomic_us(COND, US) _wait_for_atomic((COND), (US), 1) 133({ \
134 BUILD_BUG_ON(!__builtin_constant_p(US)); \
135 BUILD_BUG_ON((US) > 50000); \
136 _wait_for_atomic((COND), (US), 1); \
137})
138
139#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
135 140
136#define KHz(x) (1000 * (x)) 141#define KHz(x) (1000 * (x))
137#define MHz(x) KHz(1000 * (x)) 142#define MHz(x) KHz(1000 * (x))
@@ -321,6 +326,9 @@ struct intel_connector {
321 void *port; /* store this opaque as its illegal to dereference it */ 326 void *port; /* store this opaque as its illegal to dereference it */
322 327
323 struct intel_dp *mst_port; 328 struct intel_dp *mst_port;
329
330 /* Work struct to schedule a uevent on link train failure */
331 struct work_struct modeset_retry_work;
324}; 332};
325 333
326struct dpll { 334struct dpll {
@@ -504,8 +512,8 @@ enum vlv_wm_level {
504}; 512};
505 513
506struct vlv_wm_state { 514struct vlv_wm_state {
507 struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; 515 struct g4x_pipe_wm wm[NUM_VLV_WM_LEVELS];
508 struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; 516 struct g4x_sr_wm sr[NUM_VLV_WM_LEVELS];
509 uint8_t num_levels; 517 uint8_t num_levels;
510 bool cxsr; 518 bool cxsr;
511}; 519};
@@ -514,6 +522,22 @@ struct vlv_fifo_state {
514 u16 plane[I915_MAX_PLANES]; 522 u16 plane[I915_MAX_PLANES];
515}; 523};
516 524
525enum g4x_wm_level {
526 G4X_WM_LEVEL_NORMAL,
527 G4X_WM_LEVEL_SR,
528 G4X_WM_LEVEL_HPLL,
529 NUM_G4X_WM_LEVELS,
530};
531
532struct g4x_wm_state {
533 struct g4x_pipe_wm wm;
534 struct g4x_sr_wm sr;
535 struct g4x_sr_wm hpll;
536 bool cxsr;
537 bool hpll_en;
538 bool fbc_en;
539};
540
517struct intel_crtc_wm_state { 541struct intel_crtc_wm_state {
518 union { 542 union {
519 struct { 543 struct {
@@ -541,7 +565,7 @@ struct intel_crtc_wm_state {
541 565
542 struct { 566 struct {
543 /* "raw" watermarks (not inverted) */ 567 /* "raw" watermarks (not inverted) */
544 struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; 568 struct g4x_pipe_wm raw[NUM_VLV_WM_LEVELS];
545 /* intermediate watermarks (inverted) */ 569 /* intermediate watermarks (inverted) */
546 struct vlv_wm_state intermediate; 570 struct vlv_wm_state intermediate;
547 /* optimal watermarks (inverted) */ 571 /* optimal watermarks (inverted) */
@@ -549,6 +573,15 @@ struct intel_crtc_wm_state {
549 /* display FIFO split */ 573 /* display FIFO split */
550 struct vlv_fifo_state fifo_state; 574 struct vlv_fifo_state fifo_state;
551 } vlv; 575 } vlv;
576
577 struct {
578 /* "raw" watermarks */
579 struct g4x_pipe_wm raw[NUM_G4X_WM_LEVELS];
580 /* intermediate watermarks */
581 struct g4x_wm_state intermediate;
582 /* optimal watermarks */
583 struct g4x_wm_state optimal;
584 } g4x;
552 }; 585 };
553 586
554 /* 587 /*
@@ -766,11 +799,6 @@ struct intel_crtc {
766 int adjusted_x; 799 int adjusted_x;
767 int adjusted_y; 800 int adjusted_y;
768 801
769 uint32_t cursor_addr;
770 uint32_t cursor_cntl;
771 uint32_t cursor_size;
772 uint32_t cursor_base;
773
774 struct intel_crtc_state *config; 802 struct intel_crtc_state *config;
775 803
776 /* global reset count when the last flip was submitted */ 804 /* global reset count when the last flip was submitted */
@@ -786,6 +814,7 @@ struct intel_crtc {
786 union { 814 union {
787 struct intel_pipe_wm ilk; 815 struct intel_pipe_wm ilk;
788 struct vlv_wm_state vlv; 816 struct vlv_wm_state vlv;
817 struct g4x_wm_state g4x;
789 } active; 818 } active;
790 } wm; 819 } wm;
791 820
@@ -811,18 +840,22 @@ struct intel_plane {
811 int max_downscale; 840 int max_downscale;
812 uint32_t frontbuffer_bit; 841 uint32_t frontbuffer_bit;
813 842
843 struct {
844 u32 base, cntl, size;
845 } cursor;
846
814 /* 847 /*
815 * NOTE: Do not place new plane state fields here (e.g., when adding 848 * NOTE: Do not place new plane state fields here (e.g., when adding
816 * new plane properties). New runtime state should now be placed in 849 * new plane properties). New runtime state should now be placed in
817 * the intel_plane_state structure and accessed via plane_state. 850 * the intel_plane_state structure and accessed via plane_state.
818 */ 851 */
819 852
820 void (*update_plane)(struct drm_plane *plane, 853 void (*update_plane)(struct intel_plane *plane,
821 const struct intel_crtc_state *crtc_state, 854 const struct intel_crtc_state *crtc_state,
822 const struct intel_plane_state *plane_state); 855 const struct intel_plane_state *plane_state);
823 void (*disable_plane)(struct drm_plane *plane, 856 void (*disable_plane)(struct intel_plane *plane,
824 struct drm_crtc *crtc); 857 struct intel_crtc *crtc);
825 int (*check_plane)(struct drm_plane *plane, 858 int (*check_plane)(struct intel_plane *plane,
826 struct intel_crtc_state *crtc_state, 859 struct intel_crtc_state *crtc_state,
827 struct intel_plane_state *state); 860 struct intel_plane_state *state);
828}; 861};
@@ -948,13 +981,20 @@ struct intel_dp {
948 uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; 981 uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
949 uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; 982 uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
950 uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; 983 uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE];
951 /* sink rates as reported by DP_SUPPORTED_LINK_RATES */ 984 /* source rates */
952 uint8_t num_sink_rates; 985 int num_source_rates;
986 const int *source_rates;
987 /* sink rates as reported by DP_MAX_LINK_RATE/DP_SUPPORTED_LINK_RATES */
988 int num_sink_rates;
953 int sink_rates[DP_MAX_SUPPORTED_RATES]; 989 int sink_rates[DP_MAX_SUPPORTED_RATES];
954 /* Max lane count for the sink as per DPCD registers */ 990 bool use_rate_select;
955 uint8_t max_sink_lane_count; 991 /* intersection of source and sink rates */
956 /* Max link BW for the sink as per DPCD registers */ 992 int num_common_rates;
957 int max_sink_link_bw; 993 int common_rates[DP_MAX_SUPPORTED_RATES];
994 /* Max lane count for the current link */
995 int max_link_lane_count;
996 /* Max rate for the current link */
997 int max_link_rate;
958 /* sink or branch descriptor */ 998 /* sink or branch descriptor */
959 struct intel_dp_desc desc; 999 struct intel_dp_desc desc;
960 struct drm_dp_aux aux; 1000 struct drm_dp_aux aux;
@@ -1491,10 +1531,10 @@ void intel_edp_backlight_off(struct intel_dp *intel_dp);
1491void intel_edp_panel_vdd_on(struct intel_dp *intel_dp); 1531void intel_edp_panel_vdd_on(struct intel_dp *intel_dp);
1492void intel_edp_panel_on(struct intel_dp *intel_dp); 1532void intel_edp_panel_on(struct intel_dp *intel_dp);
1493void intel_edp_panel_off(struct intel_dp *intel_dp); 1533void intel_edp_panel_off(struct intel_dp *intel_dp);
1494void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector);
1495void intel_dp_mst_suspend(struct drm_device *dev); 1534void intel_dp_mst_suspend(struct drm_device *dev);
1496void intel_dp_mst_resume(struct drm_device *dev); 1535void intel_dp_mst_resume(struct drm_device *dev);
1497int intel_dp_max_link_rate(struct intel_dp *intel_dp); 1536int intel_dp_max_link_rate(struct intel_dp *intel_dp);
1537int intel_dp_max_lane_count(struct intel_dp *intel_dp);
1498int intel_dp_rate_select(struct intel_dp *intel_dp, int rate); 1538int intel_dp_rate_select(struct intel_dp *intel_dp, int rate);
1499void intel_dp_hot_plug(struct intel_encoder *intel_encoder); 1539void intel_dp_hot_plug(struct intel_encoder *intel_encoder);
1500void intel_power_sequencer_reset(struct drm_i915_private *dev_priv); 1540void intel_power_sequencer_reset(struct drm_i915_private *dev_priv);
@@ -1825,6 +1865,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv,
1825 struct intel_rps_client *rps, 1865 struct intel_rps_client *rps,
1826 unsigned long submitted); 1866 unsigned long submitted);
1827void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req); 1867void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
1868void g4x_wm_get_hw_state(struct drm_device *dev);
1828void vlv_wm_get_hw_state(struct drm_device *dev); 1869void vlv_wm_get_hw_state(struct drm_device *dev);
1829void ilk_wm_get_hw_state(struct drm_device *dev); 1870void ilk_wm_get_hw_state(struct drm_device *dev);
1830void skl_wm_get_hw_state(struct drm_device *dev); 1871void skl_wm_get_hw_state(struct drm_device *dev);
@@ -1832,6 +1873,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
1832 struct skl_ddb_allocation *ddb /* out */); 1873 struct skl_ddb_allocation *ddb /* out */);
1833void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, 1874void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
1834 struct skl_pipe_wm *out); 1875 struct skl_pipe_wm *out);
1876void g4x_wm_sanitize(struct drm_i915_private *dev_priv);
1835void vlv_wm_sanitize(struct drm_i915_private *dev_priv); 1877void vlv_wm_sanitize(struct drm_i915_private *dev_priv);
1836bool intel_can_enable_sagv(struct drm_atomic_state *state); 1878bool intel_can_enable_sagv(struct drm_atomic_state *state);
1837int intel_enable_sagv(struct drm_i915_private *dev_priv); 1879int intel_enable_sagv(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 3ffe8b1f1d48..fc0ef492252a 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder)
410 val |= (ULPS_STATE_ENTER | DEVICE_READY); 410 val |= (ULPS_STATE_ENTER | DEVICE_READY);
411 I915_WRITE(MIPI_DEVICE_READY(port), val); 411 I915_WRITE(MIPI_DEVICE_READY(port), val);
412 412
413 /* Wait for ULPS Not active */ 413 /* Wait for ULPS active */
414 if (intel_wait_for_register(dev_priv, 414 if (intel_wait_for_register(dev_priv,
415 MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 415 MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20))
416 GLK_ULPS_NOT_ACTIVE, 20)) 416 DRM_ERROR("ULPS not active\n");
417 DRM_ERROR("ULPS is still active\n");
418 417
419 /* Exit ULPS */ 418 /* Exit ULPS */
420 val = I915_READ(MIPI_DEVICE_READY(port)); 419 val = I915_READ(MIPI_DEVICE_READY(port));
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 0dce7792643a..7158c7ce9c09 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -694,8 +694,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
694 clk_zero_cnt << 8 | prepare_cnt; 694 clk_zero_cnt << 8 | prepare_cnt;
695 695
696 /* 696 /*
697 * LP to HS switch count = 4TLPX + PREP_COUNT * 2 + EXIT_ZERO_COUNT * 2 697 * LP to HS switch count = 4TLPX + PREP_COUNT * mul + EXIT_ZERO_COUNT *
698 * + 10UI + Extra Byte Count 698 * mul + 10UI + Extra Byte Count
699 * 699 *
700 * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count 700 * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count
701 * Extra Byte Count is calculated according to number of lanes. 701 * Extra Byte Count is calculated according to number of lanes.
@@ -708,8 +708,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
708 /* B044 */ 708 /* B044 */
709 /* FIXME: 709 /* FIXME:
710 * The comment above does not match with the code */ 710 * The comment above does not match with the code */
711 lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * 2 + 711 lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * mul +
712 exit_zero_cnt * 2 + 10, 8); 712 exit_zero_cnt * mul + 10, 8);
713 713
714 hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8); 714 hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8);
715 715
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 6025839ed3b7..c1544a53095d 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -350,7 +350,7 @@ static const struct drm_connector_funcs intel_dvo_connector_funcs = {
350 .early_unregister = intel_connector_unregister, 350 .early_unregister = intel_connector_unregister,
351 .destroy = intel_dvo_destroy, 351 .destroy = intel_dvo_destroy,
352 .fill_modes = drm_helper_probe_single_connector_modes, 352 .fill_modes = drm_helper_probe_single_connector_modes,
353 .atomic_get_property = intel_connector_atomic_get_property, 353 .set_property = drm_atomic_helper_connector_set_property,
354 .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, 354 .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
355 .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, 355 .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
356}; 356};
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 854e8e0c836b..413bfd8d4bf4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -26,69 +26,177 @@
26#include "intel_ringbuffer.h" 26#include "intel_ringbuffer.h"
27#include "intel_lrc.h" 27#include "intel_lrc.h"
28 28
29static const struct engine_info { 29/* Haswell does have the CXT_SIZE register however it does not appear to be
30 * valid. Now, docs explain in dwords what is in the context object. The full
31 * size is 70720 bytes, however, the power context and execlist context will
32 * never be saved (power context is stored elsewhere, and execlists don't work
33 * on HSW) - so the final size, including the extra state required for the
34 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
35 */
36#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
37/* Same as Haswell, but 72064 bytes now. */
38#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE)
39
40#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
41#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
42
43#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
44
45struct engine_class_info {
30 const char *name; 46 const char *name;
31 unsigned int exec_id; 47 int (*init_legacy)(struct intel_engine_cs *engine);
48 int (*init_execlists)(struct intel_engine_cs *engine);
49};
50
51static const struct engine_class_info intel_engine_classes[] = {
52 [RENDER_CLASS] = {
53 .name = "rcs",
54 .init_execlists = logical_render_ring_init,
55 .init_legacy = intel_init_render_ring_buffer,
56 },
57 [COPY_ENGINE_CLASS] = {
58 .name = "bcs",
59 .init_execlists = logical_xcs_ring_init,
60 .init_legacy = intel_init_blt_ring_buffer,
61 },
62 [VIDEO_DECODE_CLASS] = {
63 .name = "vcs",
64 .init_execlists = logical_xcs_ring_init,
65 .init_legacy = intel_init_bsd_ring_buffer,
66 },
67 [VIDEO_ENHANCEMENT_CLASS] = {
68 .name = "vecs",
69 .init_execlists = logical_xcs_ring_init,
70 .init_legacy = intel_init_vebox_ring_buffer,
71 },
72};
73
74struct engine_info {
32 unsigned int hw_id; 75 unsigned int hw_id;
76 unsigned int uabi_id;
77 u8 class;
78 u8 instance;
33 u32 mmio_base; 79 u32 mmio_base;
34 unsigned irq_shift; 80 unsigned irq_shift;
35 int (*init_legacy)(struct intel_engine_cs *engine); 81};
36 int (*init_execlists)(struct intel_engine_cs *engine); 82
37} intel_engines[] = { 83static const struct engine_info intel_engines[] = {
38 [RCS] = { 84 [RCS] = {
39 .name = "rcs",
40 .hw_id = RCS_HW, 85 .hw_id = RCS_HW,
41 .exec_id = I915_EXEC_RENDER, 86 .uabi_id = I915_EXEC_RENDER,
87 .class = RENDER_CLASS,
88 .instance = 0,
42 .mmio_base = RENDER_RING_BASE, 89 .mmio_base = RENDER_RING_BASE,
43 .irq_shift = GEN8_RCS_IRQ_SHIFT, 90 .irq_shift = GEN8_RCS_IRQ_SHIFT,
44 .init_execlists = logical_render_ring_init,
45 .init_legacy = intel_init_render_ring_buffer,
46 }, 91 },
47 [BCS] = { 92 [BCS] = {
48 .name = "bcs",
49 .hw_id = BCS_HW, 93 .hw_id = BCS_HW,
50 .exec_id = I915_EXEC_BLT, 94 .uabi_id = I915_EXEC_BLT,
95 .class = COPY_ENGINE_CLASS,
96 .instance = 0,
51 .mmio_base = BLT_RING_BASE, 97 .mmio_base = BLT_RING_BASE,
52 .irq_shift = GEN8_BCS_IRQ_SHIFT, 98 .irq_shift = GEN8_BCS_IRQ_SHIFT,
53 .init_execlists = logical_xcs_ring_init,
54 .init_legacy = intel_init_blt_ring_buffer,
55 }, 99 },
56 [VCS] = { 100 [VCS] = {
57 .name = "vcs",
58 .hw_id = VCS_HW, 101 .hw_id = VCS_HW,
59 .exec_id = I915_EXEC_BSD, 102 .uabi_id = I915_EXEC_BSD,
103 .class = VIDEO_DECODE_CLASS,
104 .instance = 0,
60 .mmio_base = GEN6_BSD_RING_BASE, 105 .mmio_base = GEN6_BSD_RING_BASE,
61 .irq_shift = GEN8_VCS1_IRQ_SHIFT, 106 .irq_shift = GEN8_VCS1_IRQ_SHIFT,
62 .init_execlists = logical_xcs_ring_init,
63 .init_legacy = intel_init_bsd_ring_buffer,
64 }, 107 },
65 [VCS2] = { 108 [VCS2] = {
66 .name = "vcs2",
67 .hw_id = VCS2_HW, 109 .hw_id = VCS2_HW,
68 .exec_id = I915_EXEC_BSD, 110 .uabi_id = I915_EXEC_BSD,
111 .class = VIDEO_DECODE_CLASS,
112 .instance = 1,
69 .mmio_base = GEN8_BSD2_RING_BASE, 113 .mmio_base = GEN8_BSD2_RING_BASE,
70 .irq_shift = GEN8_VCS2_IRQ_SHIFT, 114 .irq_shift = GEN8_VCS2_IRQ_SHIFT,
71 .init_execlists = logical_xcs_ring_init,
72 .init_legacy = intel_init_bsd2_ring_buffer,
73 }, 115 },
74 [VECS] = { 116 [VECS] = {
75 .name = "vecs",
76 .hw_id = VECS_HW, 117 .hw_id = VECS_HW,
77 .exec_id = I915_EXEC_VEBOX, 118 .uabi_id = I915_EXEC_VEBOX,
119 .class = VIDEO_ENHANCEMENT_CLASS,
120 .instance = 0,
78 .mmio_base = VEBOX_RING_BASE, 121 .mmio_base = VEBOX_RING_BASE,
79 .irq_shift = GEN8_VECS_IRQ_SHIFT, 122 .irq_shift = GEN8_VECS_IRQ_SHIFT,
80 .init_execlists = logical_xcs_ring_init,
81 .init_legacy = intel_init_vebox_ring_buffer,
82 }, 123 },
83}; 124};
84 125
126/**
127 * ___intel_engine_context_size() - return the size of the context for an engine
128 * @dev_priv: i915 device private
129 * @class: engine class
130 *
131 * Each engine class may require a different amount of space for a context
132 * image.
133 *
134 * Return: size (in bytes) of an engine class specific context image
135 *
136 * Note: this size includes the HWSP, which is part of the context image
137 * in LRC mode, but does not include the "shared data page" used with
138 * GuC submission. The caller should account for this if using the GuC.
139 */
140static u32
141__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
142{
143 u32 cxt_size;
144
145 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
146
147 switch (class) {
148 case RENDER_CLASS:
149 switch (INTEL_GEN(dev_priv)) {
150 default:
151 MISSING_CASE(INTEL_GEN(dev_priv));
152 case 9:
153 return GEN9_LR_CONTEXT_RENDER_SIZE;
154 case 8:
155 return i915.enable_execlists ?
156 GEN8_LR_CONTEXT_RENDER_SIZE :
157 GEN8_CXT_TOTAL_SIZE;
158 case 7:
159 if (IS_HASWELL(dev_priv))
160 return HSW_CXT_TOTAL_SIZE;
161
162 cxt_size = I915_READ(GEN7_CXT_SIZE);
163 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
164 PAGE_SIZE);
165 case 6:
166 cxt_size = I915_READ(CXT_SIZE);
167 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
168 PAGE_SIZE);
169 case 5:
170 case 4:
171 case 3:
172 case 2:
173 /* For the special day when i810 gets merged. */
174 case 1:
175 return 0;
176 }
177 break;
178 default:
179 MISSING_CASE(class);
180 case VIDEO_DECODE_CLASS:
181 case VIDEO_ENHANCEMENT_CLASS:
182 case COPY_ENGINE_CLASS:
183 if (INTEL_GEN(dev_priv) < 8)
184 return 0;
185 return GEN8_LR_CONTEXT_OTHER_SIZE;
186 }
187}
188
85static int 189static int
86intel_engine_setup(struct drm_i915_private *dev_priv, 190intel_engine_setup(struct drm_i915_private *dev_priv,
87 enum intel_engine_id id) 191 enum intel_engine_id id)
88{ 192{
89 const struct engine_info *info = &intel_engines[id]; 193 const struct engine_info *info = &intel_engines[id];
194 const struct engine_class_info *class_info;
90 struct intel_engine_cs *engine; 195 struct intel_engine_cs *engine;
91 196
197 GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
198 class_info = &intel_engine_classes[info->class];
199
92 GEM_BUG_ON(dev_priv->engine[id]); 200 GEM_BUG_ON(dev_priv->engine[id]);
93 engine = kzalloc(sizeof(*engine), GFP_KERNEL); 201 engine = kzalloc(sizeof(*engine), GFP_KERNEL);
94 if (!engine) 202 if (!engine)
@@ -96,11 +204,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
96 204
97 engine->id = id; 205 engine->id = id;
98 engine->i915 = dev_priv; 206 engine->i915 = dev_priv;
99 engine->name = info->name; 207 WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
100 engine->exec_id = info->exec_id; 208 class_info->name, info->instance) >=
209 sizeof(engine->name));
210 engine->uabi_id = info->uabi_id;
101 engine->hw_id = engine->guc_id = info->hw_id; 211 engine->hw_id = engine->guc_id = info->hw_id;
102 engine->mmio_base = info->mmio_base; 212 engine->mmio_base = info->mmio_base;
103 engine->irq_shift = info->irq_shift; 213 engine->irq_shift = info->irq_shift;
214 engine->class = info->class;
215 engine->instance = info->instance;
216
217 engine->context_size = __intel_engine_context_size(dev_priv,
218 engine->class);
219 if (WARN_ON(engine->context_size > BIT(20)))
220 engine->context_size = 0;
104 221
105 /* Nothing to do here, execute in order of dependencies */ 222 /* Nothing to do here, execute in order of dependencies */
106 engine->schedule = NULL; 223 engine->schedule = NULL;
@@ -112,18 +229,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
112} 229}
113 230
114/** 231/**
115 * intel_engines_init_early() - allocate the Engine Command Streamers 232 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
116 * @dev_priv: i915 device private 233 * @dev_priv: i915 device private
117 * 234 *
118 * Return: non-zero if the initialization failed. 235 * Return: non-zero if the initialization failed.
119 */ 236 */
120int intel_engines_init_early(struct drm_i915_private *dev_priv) 237int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
121{ 238{
122 struct intel_device_info *device_info = mkwrite_device_info(dev_priv); 239 struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
123 unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; 240 const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
124 unsigned int mask = 0;
125 struct intel_engine_cs *engine; 241 struct intel_engine_cs *engine;
126 enum intel_engine_id id; 242 enum intel_engine_id id;
243 unsigned int mask = 0;
127 unsigned int i; 244 unsigned int i;
128 int err; 245 int err;
129 246
@@ -150,6 +267,12 @@ int intel_engines_init_early(struct drm_i915_private *dev_priv)
150 if (WARN_ON(mask != ring_mask)) 267 if (WARN_ON(mask != ring_mask))
151 device_info->ring_mask = mask; 268 device_info->ring_mask = mask;
152 269
270 /* We always presume we have at least RCS available for later probing */
271 if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
272 err = -ENODEV;
273 goto cleanup;
274 }
275
153 device_info->num_rings = hweight32(mask); 276 device_info->num_rings = hweight32(mask);
154 277
155 return 0; 278 return 0;
@@ -161,7 +284,7 @@ cleanup:
161} 284}
162 285
163/** 286/**
164 * intel_engines_init() - allocate, populate and init the Engine Command Streamers 287 * intel_engines_init() - init the Engine Command Streamers
165 * @dev_priv: i915 device private 288 * @dev_priv: i915 device private
166 * 289 *
167 * Return: non-zero if the initialization failed. 290 * Return: non-zero if the initialization failed.
@@ -175,12 +298,14 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
175 int err = 0; 298 int err = 0;
176 299
177 for_each_engine(engine, dev_priv, id) { 300 for_each_engine(engine, dev_priv, id) {
301 const struct engine_class_info *class_info =
302 &intel_engine_classes[engine->class];
178 int (*init)(struct intel_engine_cs *engine); 303 int (*init)(struct intel_engine_cs *engine);
179 304
180 if (i915.enable_execlists) 305 if (i915.enable_execlists)
181 init = intel_engines[id].init_execlists; 306 init = class_info->init_execlists;
182 else 307 else
183 init = intel_engines[id].init_legacy; 308 init = class_info->init_legacy;
184 if (!init) { 309 if (!init) {
185 kfree(engine); 310 kfree(engine);
186 dev_priv->engine[id] = NULL; 311 dev_priv->engine[id] = NULL;
@@ -223,6 +348,9 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
223{ 348{
224 struct drm_i915_private *dev_priv = engine->i915; 349 struct drm_i915_private *dev_priv = engine->i915;
225 350
351 GEM_BUG_ON(!intel_engine_is_idle(engine));
352 GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
353
226 /* Our semaphore implementation is strictly monotonic (i.e. we proceed 354 /* Our semaphore implementation is strictly monotonic (i.e. we proceed
227 * so long as the semaphore value in the register/page is greater 355 * so long as the semaphore value in the register/page is greater
228 * than the sync value), so whenever we reset the seqno, 356 * than the sync value), so whenever we reset the seqno,
@@ -253,13 +381,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
253 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); 381 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
254 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 382 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
255 383
256 GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
257 engine->hangcheck.seqno = seqno;
258
259 /* After manually advancing the seqno, fake the interrupt in case 384 /* After manually advancing the seqno, fake the interrupt in case
260 * there are any waiters for that seqno. 385 * there are any waiters for that seqno.
261 */ 386 */
262 intel_engine_wakeup(engine); 387 intel_engine_wakeup(engine);
388
389 GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
263} 390}
264 391
265static void intel_engine_init_timeline(struct intel_engine_cs *engine) 392static void intel_engine_init_timeline(struct intel_engine_cs *engine)
@@ -342,6 +469,7 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
342 */ 469 */
343int intel_engine_init_common(struct intel_engine_cs *engine) 470int intel_engine_init_common(struct intel_engine_cs *engine)
344{ 471{
472 struct intel_ring *ring;
345 int ret; 473 int ret;
346 474
347 engine->set_default_submission(engine); 475 engine->set_default_submission(engine);
@@ -353,9 +481,9 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
353 * be available. To avoid this we always pin the default 481 * be available. To avoid this we always pin the default
354 * context. 482 * context.
355 */ 483 */
356 ret = engine->context_pin(engine, engine->i915->kernel_context); 484 ring = engine->context_pin(engine, engine->i915->kernel_context);
357 if (ret) 485 if (IS_ERR(ring))
358 return ret; 486 return PTR_ERR(ring);
359 487
360 ret = intel_engine_init_breadcrumbs(engine); 488 ret = intel_engine_init_breadcrumbs(engine);
361 if (ret) 489 if (ret)
@@ -723,8 +851,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
723 */ 851 */
724 } 852 }
725 853
854 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk */
726 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ 855 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
727 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 856 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
857 GEN9_ENABLE_YV12_BUGFIX |
728 GEN9_ENABLE_GPGPU_PREEMPTION); 858 GEN9_ENABLE_GPGPU_PREEMPTION);
729 859
730 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ 860 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */
@@ -1086,17 +1216,24 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
1086{ 1216{
1087 struct drm_i915_private *dev_priv = engine->i915; 1217 struct drm_i915_private *dev_priv = engine->i915;
1088 1218
1219 /* More white lies, if wedged, hw state is inconsistent */
1220 if (i915_terminally_wedged(&dev_priv->gpu_error))
1221 return true;
1222
1089 /* Any inflight/incomplete requests? */ 1223 /* Any inflight/incomplete requests? */
1090 if (!i915_seqno_passed(intel_engine_get_seqno(engine), 1224 if (!i915_seqno_passed(intel_engine_get_seqno(engine),
1091 intel_engine_last_submit(engine))) 1225 intel_engine_last_submit(engine)))
1092 return false; 1226 return false;
1093 1227
1228 if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
1229 return true;
1230
1094 /* Interrupt/tasklet pending? */ 1231 /* Interrupt/tasklet pending? */
1095 if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) 1232 if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
1096 return false; 1233 return false;
1097 1234
1098 /* Both ports drained, no more ELSP submission? */ 1235 /* Both ports drained, no more ELSP submission? */
1099 if (engine->execlist_port[0].request) 1236 if (port_request(&engine->execlist_port[0]))
1100 return false; 1237 return false;
1101 1238
1102 /* Ring stopped? */ 1239 /* Ring stopped? */
@@ -1137,6 +1274,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
1137 engine->set_default_submission(engine); 1274 engine->set_default_submission(engine);
1138} 1275}
1139 1276
1277void intel_engines_mark_idle(struct drm_i915_private *i915)
1278{
1279 struct intel_engine_cs *engine;
1280 enum intel_engine_id id;
1281
1282 for_each_engine(engine, i915, id) {
1283 intel_engine_disarm_breadcrumbs(engine);
1284 i915_gem_batch_pool_fini(&engine->batch_pool);
1285 engine->no_priolist = false;
1286 }
1287}
1288
1140#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1289#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1141#include "selftests/mock_engine.c" 1290#include "selftests/mock_engine.c"
1142#endif 1291#endif
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index db7f8f0a1f36..ff2fc5bc4af4 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -1312,14 +1312,12 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
1312 1312
1313static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) 1313static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
1314{ 1314{
1315#ifdef CONFIG_INTEL_IOMMU
1316 /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ 1315 /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
1317 if (intel_iommu_gfx_mapped && 1316 if (intel_vtd_active() &&
1318 (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { 1317 (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
1319 DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); 1318 DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
1320 return true; 1319 return true;
1321 } 1320 }
1322#endif
1323 1321
1324 return false; 1322 return false;
1325} 1323}
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
new file mode 100644
index 000000000000..c4cbec140101
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
@@ -0,0 +1,461 @@
1/*
2 * Copyright © 2016-2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "i915_drv.h"
25#include "intel_guc_ct.h"
26
27enum { CTB_SEND = 0, CTB_RECV = 1 };
28
29enum { CTB_OWNER_HOST = 0 };
30
31void intel_guc_ct_init_early(struct intel_guc_ct *ct)
32{
33 /* we're using static channel owners */
34 ct->host_channel.owner = CTB_OWNER_HOST;
35}
36
37static inline const char *guc_ct_buffer_type_to_str(u32 type)
38{
39 switch (type) {
40 case INTEL_GUC_CT_BUFFER_TYPE_SEND:
41 return "SEND";
42 case INTEL_GUC_CT_BUFFER_TYPE_RECV:
43 return "RECV";
44 default:
45 return "<invalid>";
46 }
47}
48
49static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
50 u32 cmds_addr, u32 size, u32 owner)
51{
52 DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
53 desc, cmds_addr, size, owner);
54 memset(desc, 0, sizeof(*desc));
55 desc->addr = cmds_addr;
56 desc->size = size;
57 desc->owner = owner;
58}
59
60static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
61{
62 DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
63 desc, desc->head, desc->tail);
64 desc->head = 0;
65 desc->tail = 0;
66 desc->is_in_error = 0;
67}
68
69static int guc_action_register_ct_buffer(struct intel_guc *guc,
70 u32 desc_addr,
71 u32 type)
72{
73 u32 action[] = {
74 INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
75 desc_addr,
76 sizeof(struct guc_ct_buffer_desc),
77 type
78 };
79 int err;
80
81 /* Can't use generic send(), CT registration must go over MMIO */
82 err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
83 if (err)
84 DRM_ERROR("CT: register %s buffer failed; err=%d\n",
85 guc_ct_buffer_type_to_str(type), err);
86 return err;
87}
88
89static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
90 u32 owner,
91 u32 type)
92{
93 u32 action[] = {
94 INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
95 owner,
96 type
97 };
98 int err;
99
100 /* Can't use generic send(), CT deregistration must go over MMIO */
101 err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
102 if (err)
103 DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n",
104 guc_ct_buffer_type_to_str(type), owner, err);
105 return err;
106}
107
108static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
109{
110 return ctch->vma != NULL;
111}
112
113static int ctch_init(struct intel_guc *guc,
114 struct intel_guc_ct_channel *ctch)
115{
116 struct i915_vma *vma;
117 void *blob;
118 int err;
119 int i;
120
121 GEM_BUG_ON(ctch->vma);
122
123 /* We allocate 1 page to hold both descriptors and both buffers.
124 * ___________.....................
125 * |desc (SEND)| :
126 * |___________| PAGE/4
127 * :___________....................:
128 * |desc (RECV)| :
129 * |___________| PAGE/4
130 * :_______________________________:
131 * |cmds (SEND) |
132 * | PAGE/4
133 * |_______________________________|
134 * |cmds (RECV) |
135 * | PAGE/4
136 * |_______________________________|
137 *
138 * Each message can use a maximum of 32 dwords and we don't expect to
139 * have more than 1 in flight at any time, so we have enough space.
140 * Some logic further ahead will rely on the fact that there is only 1
141 * page and that it is always mapped, so if the size is changed the
142 * other code will need updating as well.
143 */
144
145 /* allocate vma */
146 vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
147 if (IS_ERR(vma)) {
148 err = PTR_ERR(vma);
149 goto err_out;
150 }
151 ctch->vma = vma;
152
153 /* map first page */
154 blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
155 if (IS_ERR(blob)) {
156 err = PTR_ERR(blob);
157 goto err_vma;
158 }
159 DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma));
160
161 /* store pointers to desc and cmds */
162 for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
163 GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
164 ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
165 ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
166 }
167
168 return 0;
169
170err_vma:
171 i915_vma_unpin_and_release(&ctch->vma);
172err_out:
173 DRM_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
174 ctch->owner, err);
175 return err;
176}
177
178static void ctch_fini(struct intel_guc *guc,
179 struct intel_guc_ct_channel *ctch)
180{
181 GEM_BUG_ON(!ctch->vma);
182
183 i915_gem_object_unpin_map(ctch->vma->obj);
184 i915_vma_unpin_and_release(&ctch->vma);
185}
186
187static int ctch_open(struct intel_guc *guc,
188 struct intel_guc_ct_channel *ctch)
189{
190 u32 base;
191 int err;
192 int i;
193
194 DRM_DEBUG_DRIVER("CT: channel %d reopen=%s\n",
195 ctch->owner, yesno(ctch_is_open(ctch)));
196
197 if (!ctch->vma) {
198 err = ctch_init(guc, ctch);
199 if (unlikely(err))
200 goto err_out;
201 }
202
203 /* vma should be already allocated and map'ed */
204 base = guc_ggtt_offset(ctch->vma);
205
206 /* (re)initialize descriptors
207 * cmds buffers are in the second half of the blob page
208 */
209 for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
210 GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
211 guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
212 base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
213 PAGE_SIZE/4,
214 ctch->owner);
215 }
216
217 /* register buffers, starting wirh RECV buffer
218 * descriptors are in first half of the blob
219 */
220 err = guc_action_register_ct_buffer(guc,
221 base + PAGE_SIZE/4 * CTB_RECV,
222 INTEL_GUC_CT_BUFFER_TYPE_RECV);
223 if (unlikely(err))
224 goto err_fini;
225
226 err = guc_action_register_ct_buffer(guc,
227 base + PAGE_SIZE/4 * CTB_SEND,
228 INTEL_GUC_CT_BUFFER_TYPE_SEND);
229 if (unlikely(err))
230 goto err_deregister;
231
232 return 0;
233
234err_deregister:
235 guc_action_deregister_ct_buffer(guc,
236 ctch->owner,
237 INTEL_GUC_CT_BUFFER_TYPE_RECV);
238err_fini:
239 ctch_fini(guc, ctch);
240err_out:
241 DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err);
242 return err;
243}
244
245static void ctch_close(struct intel_guc *guc,
246 struct intel_guc_ct_channel *ctch)
247{
248 GEM_BUG_ON(!ctch_is_open(ctch));
249
250 guc_action_deregister_ct_buffer(guc,
251 ctch->owner,
252 INTEL_GUC_CT_BUFFER_TYPE_SEND);
253 guc_action_deregister_ct_buffer(guc,
254 ctch->owner,
255 INTEL_GUC_CT_BUFFER_TYPE_RECV);
256 ctch_fini(guc, ctch);
257}
258
259static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
260{
261 /* For now it's trivial */
262 return ++ctch->next_fence;
263}
264
265static int ctb_write(struct intel_guc_ct_buffer *ctb,
266 const u32 *action,
267 u32 len /* in dwords */,
268 u32 fence)
269{
270 struct guc_ct_buffer_desc *desc = ctb->desc;
271 u32 head = desc->head / 4; /* in dwords */
272 u32 tail = desc->tail / 4; /* in dwords */
273 u32 size = desc->size / 4; /* in dwords */
274 u32 used; /* in dwords */
275 u32 header;
276 u32 *cmds = ctb->cmds;
277 unsigned int i;
278
279 GEM_BUG_ON(desc->size % 4);
280 GEM_BUG_ON(desc->head % 4);
281 GEM_BUG_ON(desc->tail % 4);
282 GEM_BUG_ON(tail >= size);
283
284 /*
285 * tail == head condition indicates empty. GuC FW does not support
286 * using up the entire buffer to get tail == head meaning full.
287 */
288 if (tail < head)
289 used = (size - head) + tail;
290 else
291 used = tail - head;
292
293 /* make sure there is a space including extra dw for the fence */
294 if (unlikely(used + len + 1 >= size))
295 return -ENOSPC;
296
297 /* Write the message. The format is the following:
298 * DW0: header (including action code)
299 * DW1: fence
300 * DW2+: action data
301 */
302 header = (len << GUC_CT_MSG_LEN_SHIFT) |
303 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
304 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
305
306 cmds[tail] = header;
307 tail = (tail + 1) % size;
308
309 cmds[tail] = fence;
310 tail = (tail + 1) % size;
311
312 for (i = 1; i < len; i++) {
313 cmds[tail] = action[i];
314 tail = (tail + 1) % size;
315 }
316
317 /* now update desc tail (back in bytes) */
318 desc->tail = tail * 4;
319 GEM_BUG_ON(desc->tail > desc->size);
320
321 return 0;
322}
323
324/* Wait for the response from the GuC.
325 * @fence: response fence
326 * @status: placeholder for status
327 * return: 0 response received (status is valid)
328 * -ETIMEDOUT no response within hardcoded timeout
329 * -EPROTO no response, ct buffer was in error
330 */
331static int wait_for_response(struct guc_ct_buffer_desc *desc,
332 u32 fence,
333 u32 *status)
334{
335 int err;
336
337 /*
338 * Fast commands should complete in less than 10us, so sample quickly
339 * up to that length of time, then switch to a slower sleep-wait loop.
340 * No GuC command should ever take longer than 10ms.
341 */
342#define done (READ_ONCE(desc->fence) == fence)
343 err = wait_for_us(done, 10);
344 if (err)
345 err = wait_for(done, 10);
346#undef done
347
348 if (unlikely(err)) {
349 DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
350 fence, desc->fence);
351
352 if (WARN_ON(desc->is_in_error)) {
353 /* Something went wrong with the messaging, try to reset
354 * the buffer and hope for the best
355 */
356 guc_ct_buffer_desc_reset(desc);
357 err = -EPROTO;
358 }
359 }
360
361 *status = desc->status;
362 return err;
363}
364
365static int ctch_send(struct intel_guc *guc,
366 struct intel_guc_ct_channel *ctch,
367 const u32 *action,
368 u32 len,
369 u32 *status)
370{
371 struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
372 struct guc_ct_buffer_desc *desc = ctb->desc;
373 u32 fence;
374 int err;
375
376 GEM_BUG_ON(!ctch_is_open(ctch));
377 GEM_BUG_ON(!len);
378 GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
379
380 fence = ctch_get_next_fence(ctch);
381 err = ctb_write(ctb, action, len, fence);
382 if (unlikely(err))
383 return err;
384
385 intel_guc_notify(guc);
386
387 err = wait_for_response(desc, fence, status);
388 if (unlikely(err))
389 return err;
390 if (*status != INTEL_GUC_STATUS_SUCCESS)
391 return -EIO;
392 return 0;
393}
394
395/*
396 * Command Transport (CT) buffer based GuC send function.
397 */
398static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
399{
400 struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
401 u32 status = ~0; /* undefined */
402 int err;
403
404 mutex_lock(&guc->send_mutex);
405
406 err = ctch_send(guc, ctch, action, len, &status);
407 if (unlikely(err)) {
408 DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
409 action[0], err, status);
410 }
411
412 mutex_unlock(&guc->send_mutex);
413 return err;
414}
415
416/**
417 * Enable buffer based command transport
418 * Shall only be called for platforms with HAS_GUC_CT.
419 * @guc: the guc
420 * return: 0 on success
421 * non-zero on failure
422 */
423int intel_guc_enable_ct(struct intel_guc *guc)
424{
425 struct drm_i915_private *dev_priv = guc_to_i915(guc);
426 struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
427 int err;
428
429 GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
430
431 err = ctch_open(guc, ctch);
432 if (unlikely(err))
433 return err;
434
435 /* Switch into cmd transport buffer based send() */
436 guc->send = intel_guc_send_ct;
437 DRM_INFO("CT: %s\n", enableddisabled(true));
438 return 0;
439}
440
441/**
442 * Disable buffer based command transport.
443 * Shall only be called for platforms with HAS_GUC_CT.
444 * @guc: the guc
445 */
446void intel_guc_disable_ct(struct intel_guc *guc)
447{
448 struct drm_i915_private *dev_priv = guc_to_i915(guc);
449 struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
450
451 GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
452
453 if (!ctch_is_open(ctch))
454 return;
455
456 ctch_close(guc, ctch);
457
458 /* Disable send */
459 guc->send = intel_guc_send_nop;
460 DRM_INFO("CT: %s\n", enableddisabled(false));
461}
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
new file mode 100644
index 000000000000..6d97f36fcc62
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
@@ -0,0 +1,86 @@
1/*
2 * Copyright © 2016-2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef _INTEL_GUC_CT_H_
25#define _INTEL_GUC_CT_H_
26
27struct intel_guc;
28struct i915_vma;
29
30#include "intel_guc_fwif.h"
31
32/**
33 * DOC: Command Transport (CT).
34 *
35 * Buffer based command transport is a replacement for MMIO based mechanism.
36 * It can be used to perform both host-2-guc and guc-to-host communication.
37 */
38
39/** Represents single command transport buffer.
40 *
41 * A single command transport buffer consists of two parts, the header
42 * record (command transport buffer descriptor) and the actual buffer which
43 * holds the commands.
44 *
45 * @desc: pointer to the buffer descriptor
46 * @cmds: pointer to the commands buffer
47 */
48struct intel_guc_ct_buffer {
49 struct guc_ct_buffer_desc *desc;
50 u32 *cmds;
51};
52
53/** Represents pair of command transport buffers.
54 *
55 * Buffers go in pairs to allow bi-directional communication.
56 * To simplify the code we place both of them in the same vma.
57 * Buffers from the same pair must share unique owner id.
58 *
59 * @vma: pointer to the vma with pair of CT buffers
60 * @ctbs: buffers for sending(0) and receiving(1) commands
61 * @owner: unique identifier
62 * @next_fence: fence to be used with next send command
63 */
64struct intel_guc_ct_channel {
65 struct i915_vma *vma;
66 struct intel_guc_ct_buffer ctbs[2];
67 u32 owner;
68 u32 next_fence;
69};
70
71/** Holds all command transport channels.
72 *
73 * @host_channel: main channel used by the host
74 */
75struct intel_guc_ct {
76 struct intel_guc_ct_channel host_channel;
77 /* other channels are tbd */
78};
79
80void intel_guc_ct_init_early(struct intel_guc_ct *ct);
81
82/* XXX: move to intel_uc.h ? don't fit there either */
83int intel_guc_enable_ct(struct intel_guc *guc);
84void intel_guc_disable_ct(struct intel_guc *guc);
85
86#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index cb36cbf3818f..5fa286074811 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -23,8 +23,8 @@
23#ifndef _INTEL_GUC_FWIF_H 23#ifndef _INTEL_GUC_FWIF_H
24#define _INTEL_GUC_FWIF_H 24#define _INTEL_GUC_FWIF_H
25 25
26#define GFXCORE_FAMILY_GEN9 12 26#define GUC_CORE_FAMILY_GEN9 12
27#define GFXCORE_FAMILY_UNKNOWN 0x7fffffff 27#define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff
28 28
29#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 29#define GUC_CLIENT_PRIORITY_KMD_HIGH 0
30#define GUC_CLIENT_PRIORITY_HIGH 1 30#define GUC_CLIENT_PRIORITY_HIGH 1
@@ -331,6 +331,47 @@ struct guc_stage_desc {
331 u64 desc_private; 331 u64 desc_private;
332} __packed; 332} __packed;
333 333
334/*
335 * Describes single command transport buffer.
336 * Used by both guc-master and clients.
337 */
338struct guc_ct_buffer_desc {
339 u32 addr; /* gfx address */
340 u64 host_private; /* host private data */
341 u32 size; /* size in bytes */
342 u32 head; /* offset updated by GuC*/
343 u32 tail; /* offset updated by owner */
344 u32 is_in_error; /* error indicator */
345 u32 fence; /* fence updated by GuC */
346 u32 status; /* status updated by GuC */
347 u32 owner; /* id of the channel owner */
348 u32 owner_sub_id; /* owner-defined field for extra tracking */
349 u32 reserved[5];
350} __packed;
351
352/* Type of command transport buffer */
353#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u
354#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u
355
356/*
357 * Definition of the command transport message header (DW0)
358 *
359 * bit[4..0] message len (in dwords)
360 * bit[7..5] reserved
361 * bit[8] write fence to desc
362 * bit[9] write status to H2G buff
363 * bit[10] send status (via G2H)
364 * bit[15..11] reserved
365 * bit[31..16] action code
366 */
367#define GUC_CT_MSG_LEN_SHIFT 0
368#define GUC_CT_MSG_LEN_MASK 0x1F
369#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8)
370#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9)
371#define GUC_CT_MSG_SEND_STATUS (1 << 10)
372#define GUC_CT_MSG_ACTION_SHIFT 16
373#define GUC_CT_MSG_ACTION_MASK 0xFFFF
374
334#define GUC_FORCEWAKE_RENDER (1 << 0) 375#define GUC_FORCEWAKE_RENDER (1 << 0)
335#define GUC_FORCEWAKE_MEDIA (1 << 1) 376#define GUC_FORCEWAKE_MEDIA (1 << 1)
336 377
@@ -515,6 +556,8 @@ enum intel_guc_action {
515 INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, 556 INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
516 INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, 557 INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
517 INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, 558 INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
559 INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
560 INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
518 INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, 561 INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
519 INTEL_GUC_ACTION_LIMIT 562 INTEL_GUC_ACTION_LIMIT
520}; 563};
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 8a1a023e48b2..d9045b6e897b 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -61,6 +61,9 @@
61#define KBL_FW_MAJOR 9 61#define KBL_FW_MAJOR 9
62#define KBL_FW_MINOR 14 62#define KBL_FW_MINOR 14
63 63
64#define GLK_FW_MAJOR 10
65#define GLK_FW_MINOR 56
66
64#define GUC_FW_PATH(platform, major, minor) \ 67#define GUC_FW_PATH(platform, major, minor) \
65 "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" 68 "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
66 69
@@ -73,6 +76,8 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
73#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) 76#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
74MODULE_FIRMWARE(I915_KBL_GUC_UCODE); 77MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
75 78
79#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR)
80
76 81
77static u32 get_gttype(struct drm_i915_private *dev_priv) 82static u32 get_gttype(struct drm_i915_private *dev_priv)
78{ 83{
@@ -86,11 +91,11 @@ static u32 get_core_family(struct drm_i915_private *dev_priv)
86 91
87 switch (gen) { 92 switch (gen) {
88 case 9: 93 case 9:
89 return GFXCORE_FAMILY_GEN9; 94 return GUC_CORE_FAMILY_GEN9;
90 95
91 default: 96 default:
92 WARN(1, "GEN%d does not support GuC operation!\n", gen); 97 MISSING_CASE(gen);
93 return GFXCORE_FAMILY_UNKNOWN; 98 return GUC_CORE_FAMILY_UNKNOWN;
94 } 99 }
95} 100}
96 101
@@ -280,10 +285,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
280 285
281 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 286 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
282 287
283 /* init WOPCM */
284 I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
285 I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE);
286
287 /* Enable MIA caching. GuC clock gating is disabled. */ 288 /* Enable MIA caching. GuC clock gating is disabled. */
288 I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); 289 I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
289 290
@@ -405,6 +406,10 @@ int intel_guc_select_fw(struct intel_guc *guc)
405 guc->fw.path = I915_KBL_GUC_UCODE; 406 guc->fw.path = I915_KBL_GUC_UCODE;
406 guc->fw.major_ver_wanted = KBL_FW_MAJOR; 407 guc->fw.major_ver_wanted = KBL_FW_MAJOR;
407 guc->fw.minor_ver_wanted = KBL_FW_MINOR; 408 guc->fw.minor_ver_wanted = KBL_FW_MINOR;
409 } else if (IS_GEMINILAKE(dev_priv)) {
410 guc->fw.path = I915_GLK_GUC_UCODE;
411 guc->fw.major_ver_wanted = GLK_FW_MAJOR;
412 guc->fw.minor_ver_wanted = GLK_FW_MINOR;
408 } else { 413 } else {
409 DRM_ERROR("No GuC firmware known for platform with GuC!\n"); 414 DRM_ERROR("No GuC firmware known for platform with GuC!\n");
410 return -ENOENT; 415 return -ENOENT;
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 6fb63a3c65b0..16d3b8719cab 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -359,12 +359,16 @@ static int guc_log_runtime_create(struct intel_guc *guc)
359 void *vaddr; 359 void *vaddr;
360 struct rchan *guc_log_relay_chan; 360 struct rchan *guc_log_relay_chan;
361 size_t n_subbufs, subbuf_size; 361 size_t n_subbufs, subbuf_size;
362 int ret = 0; 362 int ret;
363 363
364 lockdep_assert_held(&dev_priv->drm.struct_mutex); 364 lockdep_assert_held(&dev_priv->drm.struct_mutex);
365 365
366 GEM_BUG_ON(guc_log_has_runtime(guc)); 366 GEM_BUG_ON(guc_log_has_runtime(guc));
367 367
368 ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
369 if (ret)
370 return ret;
371
368 /* Create a WC (Uncached for read) vmalloc mapping of log 372 /* Create a WC (Uncached for read) vmalloc mapping of log
369 * buffer pages, so that we can directly get the data 373 * buffer pages, so that we can directly get the data
370 * (up-to-date) from memory. 374 * (up-to-date) from memory.
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index dce742243ba6..9b0ece427bdc 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -407,7 +407,7 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
407 "%s, ", engine->name); 407 "%s, ", engine->name);
408 msg[len-2] = '\0'; 408 msg[len-2] = '\0';
409 409
410 return i915_handle_error(i915, hung, msg); 410 return i915_handle_error(i915, hung, "%s", msg);
411} 411}
412 412
413/* 413/*
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index c6b8207724fa..58d690393b29 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1327,6 +1327,11 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state)
1327 return false; 1327 return false;
1328 } 1328 }
1329 1329
1330 /* Display Wa #1139 */
1331 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) &&
1332 crtc_state->base.adjusted_mode.htotal > 5460)
1333 return false;
1334
1330 return true; 1335 return true;
1331} 1336}
1332 1337
@@ -1392,7 +1397,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
1392 } 1397 }
1393 1398
1394 if (!pipe_config->bw_constrained) { 1399 if (!pipe_config->bw_constrained) {
1395 DRM_DEBUG_KMS("forcing pipe bpc to %i for HDMI\n", desired_bpp); 1400 DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp);
1396 pipe_config->pipe_bpp = desired_bpp; 1401 pipe_config->pipe_bpp = desired_bpp;
1397 } 1402 }
1398 1403
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 9ee819666a4c..f5eb18d0e2d1 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -52,6 +52,10 @@
52#define KBL_HUC_FW_MINOR 00 52#define KBL_HUC_FW_MINOR 00
53#define KBL_BLD_NUM 1810 53#define KBL_BLD_NUM 1810
54 54
55#define GLK_HUC_FW_MAJOR 02
56#define GLK_HUC_FW_MINOR 00
57#define GLK_BLD_NUM 1748
58
55#define HUC_FW_PATH(platform, major, minor, bld_num) \ 59#define HUC_FW_PATH(platform, major, minor, bld_num) \
56 "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ 60 "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \
57 __stringify(minor) "_" __stringify(bld_num) ".bin" 61 __stringify(minor) "_" __stringify(bld_num) ".bin"
@@ -68,6 +72,9 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE);
68 KBL_HUC_FW_MINOR, KBL_BLD_NUM) 72 KBL_HUC_FW_MINOR, KBL_BLD_NUM)
69MODULE_FIRMWARE(I915_KBL_HUC_UCODE); 73MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
70 74
75#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \
76 GLK_HUC_FW_MINOR, GLK_BLD_NUM)
77
71/** 78/**
72 * huc_ucode_xfer() - DMA's the firmware 79 * huc_ucode_xfer() - DMA's the firmware
73 * @dev_priv: the drm_i915_private device 80 * @dev_priv: the drm_i915_private device
@@ -99,11 +106,6 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv)
99 106
100 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 107 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
101 108
102 /* init WOPCM */
103 I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
104 I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE |
105 HUC_LOADING_AGENT_GUC);
106
107 /* Set the source address for the uCode */ 109 /* Set the source address for the uCode */
108 offset = guc_ggtt_offset(vma) + huc_fw->header_offset; 110 offset = guc_ggtt_offset(vma) + huc_fw->header_offset;
109 I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); 111 I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
@@ -169,6 +171,10 @@ void intel_huc_select_fw(struct intel_huc *huc)
169 huc->fw.path = I915_KBL_HUC_UCODE; 171 huc->fw.path = I915_KBL_HUC_UCODE;
170 huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; 172 huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR;
171 huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; 173 huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR;
174 } else if (IS_GEMINILAKE(dev_priv)) {
175 huc->fw.path = I915_GLK_HUC_UCODE;
176 huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR;
177 huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR;
172 } else { 178 } else {
173 DRM_ERROR("No HuC firmware known for platform with HuC!\n"); 179 DRM_ERROR("No HuC firmware known for platform with HuC!\n");
174 return; 180 return;
@@ -186,68 +192,36 @@ void intel_huc_select_fw(struct intel_huc *huc)
186 * earlier call to intel_huc_init(), so here we need only check that 192 * earlier call to intel_huc_init(), so here we need only check that
187 * is succeeded, and then transfer the image to the h/w. 193 * is succeeded, and then transfer the image to the h/w.
188 * 194 *
189 * Return: non-zero code on error
190 */ 195 */
191int intel_huc_init_hw(struct intel_huc *huc) 196void intel_huc_init_hw(struct intel_huc *huc)
192{ 197{
193 struct drm_i915_private *dev_priv = huc_to_i915(huc); 198 struct drm_i915_private *dev_priv = huc_to_i915(huc);
194 int err; 199 int err;
195 200
196 if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_NONE)
197 return 0;
198
199 DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", 201 DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
200 huc->fw.path, 202 huc->fw.path,
201 intel_uc_fw_status_repr(huc->fw.fetch_status), 203 intel_uc_fw_status_repr(huc->fw.fetch_status),
202 intel_uc_fw_status_repr(huc->fw.load_status)); 204 intel_uc_fw_status_repr(huc->fw.load_status));
203 205
204 if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_SUCCESS && 206 if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
205 huc->fw.load_status == INTEL_UC_FIRMWARE_FAIL) 207 return;
206 return -ENOEXEC;
207 208
208 huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; 209 huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING;
209 210
210 switch (huc->fw.fetch_status) {
211 case INTEL_UC_FIRMWARE_FAIL:
212 /* something went wrong :( */
213 err = -EIO;
214 goto fail;
215
216 case INTEL_UC_FIRMWARE_NONE:
217 case INTEL_UC_FIRMWARE_PENDING:
218 default:
219 /* "can't happen" */
220 WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n",
221 huc->fw.path,
222 intel_uc_fw_status_repr(huc->fw.fetch_status),
223 huc->fw.fetch_status);
224 err = -ENXIO;
225 goto fail;
226
227 case INTEL_UC_FIRMWARE_SUCCESS:
228 break;
229 }
230
231 err = huc_ucode_xfer(dev_priv); 211 err = huc_ucode_xfer(dev_priv);
232 if (err)
233 goto fail;
234 212
235 huc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; 213 huc->fw.load_status = err ?
214 INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS;
236 215
237 DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", 216 DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
238 huc->fw.path, 217 huc->fw.path,
239 intel_uc_fw_status_repr(huc->fw.fetch_status), 218 intel_uc_fw_status_repr(huc->fw.fetch_status),
240 intel_uc_fw_status_repr(huc->fw.load_status)); 219 intel_uc_fw_status_repr(huc->fw.load_status));
241 220
242 return 0; 221 if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
243 222 DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
244fail:
245 if (huc->fw.load_status == INTEL_UC_FIRMWARE_PENDING)
246 huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
247
248 DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
249 223
250 return err; 224 return;
251} 225}
252 226
253/** 227/**
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 25d8e76489e4..3bf65288ffff 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -63,6 +63,7 @@
63#include <linux/acpi.h> 63#include <linux/acpi.h>
64#include <linux/device.h> 64#include <linux/device.h>
65#include <linux/pci.h> 65#include <linux/pci.h>
66#include <linux/pm_runtime.h>
66 67
67#include "i915_drv.h" 68#include "i915_drv.h"
68#include <linux/delay.h> 69#include <linux/delay.h>
@@ -110,6 +111,11 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
110 pinfo.size_data = sizeof(*pdata); 111 pinfo.size_data = sizeof(*pdata);
111 pinfo.dma_mask = DMA_BIT_MASK(32); 112 pinfo.dma_mask = DMA_BIT_MASK(32);
112 113
114 pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes;
115 pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */
116 pdata->port[0].pipe = -1;
117 pdata->port[1].pipe = -1;
118 pdata->port[2].pipe = -1;
113 spin_lock_init(&pdata->lpe_audio_slock); 119 spin_lock_init(&pdata->lpe_audio_slock);
114 120
115 platdev = platform_device_register_full(&pinfo); 121 platdev = platform_device_register_full(&pinfo);
@@ -121,6 +127,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
121 127
122 kfree(rsc); 128 kfree(rsc);
123 129
130 pm_runtime_forbid(&platdev->dev);
131 pm_runtime_set_active(&platdev->dev);
132 pm_runtime_enable(&platdev->dev);
133
124 return platdev; 134 return platdev;
125 135
126err: 136err:
@@ -144,44 +154,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
144 154
145static void lpe_audio_irq_unmask(struct irq_data *d) 155static void lpe_audio_irq_unmask(struct irq_data *d)
146{ 156{
147 struct drm_i915_private *dev_priv = d->chip_data;
148 unsigned long irqflags;
149 u32 val = (I915_LPE_PIPE_A_INTERRUPT |
150 I915_LPE_PIPE_B_INTERRUPT);
151
152 if (IS_CHERRYVIEW(dev_priv))
153 val |= I915_LPE_PIPE_C_INTERRUPT;
154
155 spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
156
157 dev_priv->irq_mask &= ~val;
158 I915_WRITE(VLV_IIR, val);
159 I915_WRITE(VLV_IIR, val);
160 I915_WRITE(VLV_IMR, dev_priv->irq_mask);
161 POSTING_READ(VLV_IMR);
162
163 spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
164} 157}
165 158
166static void lpe_audio_irq_mask(struct irq_data *d) 159static void lpe_audio_irq_mask(struct irq_data *d)
167{ 160{
168 struct drm_i915_private *dev_priv = d->chip_data;
169 unsigned long irqflags;
170 u32 val = (I915_LPE_PIPE_A_INTERRUPT |
171 I915_LPE_PIPE_B_INTERRUPT);
172
173 if (IS_CHERRYVIEW(dev_priv))
174 val |= I915_LPE_PIPE_C_INTERRUPT;
175
176 spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
177
178 dev_priv->irq_mask |= val;
179 I915_WRITE(VLV_IMR, dev_priv->irq_mask);
180 I915_WRITE(VLV_IIR, val);
181 I915_WRITE(VLV_IIR, val);
182 POSTING_READ(VLV_IIR);
183
184 spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
185} 161}
186 162
187static struct irq_chip lpe_audio_irqchip = { 163static struct irq_chip lpe_audio_irqchip = {
@@ -325,8 +301,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
325 301
326 desc = irq_to_desc(dev_priv->lpe_audio.irq); 302 desc = irq_to_desc(dev_priv->lpe_audio.irq);
327 303
328 lpe_audio_irq_mask(&desc->irq_data);
329
330 lpe_audio_platdev_destroy(dev_priv); 304 lpe_audio_platdev_destroy(dev_priv);
331 305
332 irq_free_desc(dev_priv->lpe_audio.irq); 306 irq_free_desc(dev_priv->lpe_audio.irq);
@@ -337,53 +311,47 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
337 * intel_lpe_audio_notify() - notify lpe audio event 311 * intel_lpe_audio_notify() - notify lpe audio event
338 * audio driver and i915 312 * audio driver and i915
339 * @dev_priv: the i915 drm device private data 313 * @dev_priv: the i915 drm device private data
314 * @pipe: pipe
315 * @port: port
340 * @eld : ELD data 316 * @eld : ELD data
341 * @pipe: pipe id 317 * @ls_clock: Link symbol clock in kHz
342 * @port: port id 318 * @dp_output: Driving a DP output?
343 * @tmds_clk_speed: tmds clock frequency in Hz
344 * 319 *
345 * Notify lpe audio driver of eld change. 320 * Notify lpe audio driver of eld change.
346 */ 321 */
347void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, 322void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
348 void *eld, int port, int pipe, int tmds_clk_speed, 323 enum pipe pipe, enum port port,
349 bool dp_output, int link_rate) 324 const void *eld, int ls_clock, bool dp_output)
350{ 325{
351 unsigned long irq_flags; 326 unsigned long irqflags;
352 struct intel_hdmi_lpe_audio_pdata *pdata = NULL; 327 struct intel_hdmi_lpe_audio_pdata *pdata;
328 struct intel_hdmi_lpe_audio_port_pdata *ppdata;
353 u32 audio_enable; 329 u32 audio_enable;
354 330
355 if (!HAS_LPE_AUDIO(dev_priv)) 331 if (!HAS_LPE_AUDIO(dev_priv))
356 return; 332 return;
357 333
358 pdata = dev_get_platdata( 334 pdata = dev_get_platdata(&dev_priv->lpe_audio.platdev->dev);
359 &(dev_priv->lpe_audio.platdev->dev)); 335 ppdata = &pdata->port[port - PORT_B];
360 336
361 spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags); 337 spin_lock_irqsave(&pdata->lpe_audio_slock, irqflags);
362 338
363 audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port)); 339 audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port));
364 340
365 if (eld != NULL) { 341 if (eld != NULL) {
366 memcpy(pdata->eld.eld_data, eld, 342 memcpy(ppdata->eld, eld, HDMI_MAX_ELD_BYTES);
367 HDMI_MAX_ELD_BYTES); 343 ppdata->pipe = pipe;
368 pdata->eld.port_id = port; 344 ppdata->ls_clock = ls_clock;
369 pdata->eld.pipe_id = pipe; 345 ppdata->dp_output = dp_output;
370 pdata->hdmi_connected = true;
371
372 pdata->dp_output = dp_output;
373 if (tmds_clk_speed)
374 pdata->tmds_clock_speed = tmds_clk_speed;
375 if (link_rate)
376 pdata->link_rate = link_rate;
377 346
378 /* Unmute the amp for both DP and HDMI */ 347 /* Unmute the amp for both DP and HDMI */
379 I915_WRITE(VLV_AUD_PORT_EN_DBG(port), 348 I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
380 audio_enable & ~VLV_AMP_MUTE); 349 audio_enable & ~VLV_AMP_MUTE);
381
382 } else { 350 } else {
383 memset(pdata->eld.eld_data, 0, 351 memset(ppdata->eld, 0, HDMI_MAX_ELD_BYTES);
384 HDMI_MAX_ELD_BYTES); 352 ppdata->pipe = -1;
385 pdata->hdmi_connected = false; 353 ppdata->ls_clock = 0;
386 pdata->dp_output = false; 354 ppdata->dp_output = false;
387 355
388 /* Mute the amp for both DP and HDMI */ 356 /* Mute the amp for both DP and HDMI */
389 I915_WRITE(VLV_AUD_PORT_EN_DBG(port), 357 I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
@@ -391,10 +359,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
391 } 359 }
392 360
393 if (pdata->notify_audio_lpe) 361 if (pdata->notify_audio_lpe)
394 pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev); 362 pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev, port - PORT_B);
395 else
396 pdata->notify_pending = true;
397 363
398 spin_unlock_irqrestore(&pdata->lpe_audio_slock, 364 spin_unlock_irqrestore(&pdata->lpe_audio_slock, irqflags);
399 irq_flags);
400} 365}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8f7c631fc1f..014b30ace8a0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,10 +138,6 @@
138#include "i915_drv.h" 138#include "i915_drv.h"
139#include "intel_mocs.h" 139#include "intel_mocs.h"
140 140
141#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
142#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
143#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
144
145#define RING_EXECLIST_QFULL (1 << 0x2) 141#define RING_EXECLIST_QFULL (1 << 0x2)
146#define RING_EXECLIST1_VALID (1 << 0x3) 142#define RING_EXECLIST1_VALID (1 << 0x3)
147#define RING_EXECLIST0_VALID (1 << 0x4) 143#define RING_EXECLIST0_VALID (1 << 0x4)
@@ -326,8 +322,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
326 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; 322 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
327 u32 *reg_state = ce->lrc_reg_state; 323 u32 *reg_state = ce->lrc_reg_state;
328 324
329 assert_ring_tail_valid(rq->ring, rq->tail); 325 reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
330 reg_state[CTX_RING_TAIL+1] = rq->tail;
331 326
332 /* True 32b PPGTT with dynamic page allocation: update PDP 327 /* True 32b PPGTT with dynamic page allocation: update PDP
333 * registers and point the unallocated PDPs to scratch page. 328 * registers and point the unallocated PDPs to scratch page.
@@ -342,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
342 337
343static void execlists_submit_ports(struct intel_engine_cs *engine) 338static void execlists_submit_ports(struct intel_engine_cs *engine)
344{ 339{
345 struct drm_i915_private *dev_priv = engine->i915;
346 struct execlist_port *port = engine->execlist_port; 340 struct execlist_port *port = engine->execlist_port;
347 u32 __iomem *elsp = 341 u32 __iomem *elsp =
348 dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); 342 engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
349 u64 desc[2]; 343 unsigned int n;
350 344
351 GEM_BUG_ON(port[0].count > 1); 345 for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
352 if (!port[0].count) 346 struct drm_i915_gem_request *rq;
353 execlists_context_status_change(port[0].request, 347 unsigned int count;
354 INTEL_CONTEXT_SCHEDULE_IN); 348 u64 desc;
355 desc[0] = execlists_update_context(port[0].request); 349
356 GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); 350 rq = port_unpack(&port[n], &count);
357 port[0].count++; 351 if (rq) {
358 352 GEM_BUG_ON(count > !n);
359 if (port[1].request) { 353 if (!count++)
360 GEM_BUG_ON(port[1].count); 354 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
361 execlists_context_status_change(port[1].request, 355 port_set(&port[n], port_pack(rq, count));
362 INTEL_CONTEXT_SCHEDULE_IN); 356 desc = execlists_update_context(rq);
363 desc[1] = execlists_update_context(port[1].request); 357 GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
364 GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); 358 } else {
365 port[1].count = 1; 359 GEM_BUG_ON(!n);
366 } else { 360 desc = 0;
367 desc[1] = 0; 361 }
368 }
369 GEM_BUG_ON(desc[0] == desc[1]);
370
371 /* You must always write both descriptors in the order below. */
372 writel(upper_32_bits(desc[1]), elsp);
373 writel(lower_32_bits(desc[1]), elsp);
374 362
375 writel(upper_32_bits(desc[0]), elsp); 363 writel(upper_32_bits(desc), elsp);
376 /* The context is automatically loaded after the following */ 364 writel(lower_32_bits(desc), elsp);
377 writel(lower_32_bits(desc[0]), elsp); 365 }
378} 366}
379 367
380static bool ctx_single_port_submission(const struct i915_gem_context *ctx) 368static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -395,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
395 return true; 383 return true;
396} 384}
397 385
386static void port_assign(struct execlist_port *port,
387 struct drm_i915_gem_request *rq)
388{
389 GEM_BUG_ON(rq == port_request(port));
390
391 if (port_isset(port))
392 i915_gem_request_put(port_request(port));
393
394 port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
395}
396
398static void execlists_dequeue(struct intel_engine_cs *engine) 397static void execlists_dequeue(struct intel_engine_cs *engine)
399{ 398{
400 struct drm_i915_gem_request *last; 399 struct drm_i915_gem_request *last;
@@ -402,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
402 struct rb_node *rb; 401 struct rb_node *rb;
403 bool submit = false; 402 bool submit = false;
404 403
405 last = port->request; 404 last = port_request(port);
406 if (last) 405 if (last)
407 /* WaIdleLiteRestore:bdw,skl 406 /* WaIdleLiteRestore:bdw,skl
408 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL 407 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -412,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
412 */ 411 */
413 last->tail = last->wa_tail; 412 last->tail = last->wa_tail;
414 413
415 GEM_BUG_ON(port[1].request); 414 GEM_BUG_ON(port_isset(&port[1]));
416 415
417 /* Hardware submission is through 2 ports. Conceptually each port 416 /* Hardware submission is through 2 ports. Conceptually each port
418 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is 417 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -437,72 +436,86 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
437 436
438 spin_lock_irq(&engine->timeline->lock); 437 spin_lock_irq(&engine->timeline->lock);
439 rb = engine->execlist_first; 438 rb = engine->execlist_first;
439 GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
440 while (rb) { 440 while (rb) {
441 struct drm_i915_gem_request *cursor = 441 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
442 rb_entry(rb, typeof(*cursor), priotree.node); 442 struct drm_i915_gem_request *rq, *rn;
443 443
444 /* Can we combine this request with the current port? It has to 444 list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
445 * be the same context/ringbuffer and not have any exceptions 445 /*
446 * (e.g. GVT saying never to combine contexts). 446 * Can we combine this request with the current port?
447 * 447 * It has to be the same context/ringbuffer and not
448 * If we can combine the requests, we can execute both by 448 * have any exceptions (e.g. GVT saying never to
449 * updating the RING_TAIL to point to the end of the second 449 * combine contexts).
450 * request, and so we never need to tell the hardware about 450 *
451 * the first. 451 * If we can combine the requests, we can execute both
452 */ 452 * by updating the RING_TAIL to point to the end of the
453 if (last && !can_merge_ctx(cursor->ctx, last->ctx)) { 453 * second request, and so we never need to tell the
454 /* If we are on the second port and cannot combine 454 * hardware about the first.
455 * this request with the last, then we are done.
456 */
457 if (port != engine->execlist_port)
458 break;
459
460 /* If GVT overrides us we only ever submit port[0],
461 * leaving port[1] empty. Note that we also have
462 * to be careful that we don't queue the same
463 * context (even though a different request) to
464 * the second port.
465 */ 455 */
466 if (ctx_single_port_submission(last->ctx) || 456 if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
467 ctx_single_port_submission(cursor->ctx)) 457 /*
468 break; 458 * If we are on the second port and cannot
459 * combine this request with the last, then we
460 * are done.
461 */
462 if (port != engine->execlist_port) {
463 __list_del_many(&p->requests,
464 &rq->priotree.link);
465 goto done;
466 }
467
468 /*
469 * If GVT overrides us we only ever submit
470 * port[0], leaving port[1] empty. Note that we
471 * also have to be careful that we don't queue
472 * the same context (even though a different
473 * request) to the second port.
474 */
475 if (ctx_single_port_submission(last->ctx) ||
476 ctx_single_port_submission(rq->ctx)) {
477 __list_del_many(&p->requests,
478 &rq->priotree.link);
479 goto done;
480 }
481
482 GEM_BUG_ON(last->ctx == rq->ctx);
483
484 if (submit)
485 port_assign(port, last);
486 port++;
487 }
469 488
470 GEM_BUG_ON(last->ctx == cursor->ctx); 489 INIT_LIST_HEAD(&rq->priotree.link);
490 rq->priotree.priority = INT_MAX;
471 491
472 i915_gem_request_assign(&port->request, last); 492 __i915_gem_request_submit(rq);
473 port++; 493 trace_i915_gem_request_in(rq, port_index(port, engine));
494 last = rq;
495 submit = true;
474 } 496 }
475 497
476 rb = rb_next(rb); 498 rb = rb_next(rb);
477 rb_erase(&cursor->priotree.node, &engine->execlist_queue); 499 rb_erase(&p->node, &engine->execlist_queue);
478 RB_CLEAR_NODE(&cursor->priotree.node); 500 INIT_LIST_HEAD(&p->requests);
479 cursor->priotree.priority = INT_MAX; 501 if (p->priority != I915_PRIORITY_NORMAL)
480 502 kmem_cache_free(engine->i915->priorities, p);
481 __i915_gem_request_submit(cursor);
482 trace_i915_gem_request_in(cursor, port - engine->execlist_port);
483 last = cursor;
484 submit = true;
485 }
486 if (submit) {
487 i915_gem_request_assign(&port->request, last);
488 engine->execlist_first = rb;
489 } 503 }
504done:
505 engine->execlist_first = rb;
506 if (submit)
507 port_assign(port, last);
490 spin_unlock_irq(&engine->timeline->lock); 508 spin_unlock_irq(&engine->timeline->lock);
491 509
492 if (submit) 510 if (submit)
493 execlists_submit_ports(engine); 511 execlists_submit_ports(engine);
494} 512}
495 513
496static bool execlists_elsp_idle(struct intel_engine_cs *engine)
497{
498 return !engine->execlist_port[0].request;
499}
500
501static bool execlists_elsp_ready(const struct intel_engine_cs *engine) 514static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
502{ 515{
503 const struct execlist_port *port = engine->execlist_port; 516 const struct execlist_port *port = engine->execlist_port;
504 517
505 return port[0].count + port[1].count < 2; 518 return port_count(&port[0]) + port_count(&port[1]) < 2;
506} 519}
507 520
508/* 521/*
@@ -515,6 +528,15 @@ static void intel_lrc_irq_handler(unsigned long data)
515 struct execlist_port *port = engine->execlist_port; 528 struct execlist_port *port = engine->execlist_port;
516 struct drm_i915_private *dev_priv = engine->i915; 529 struct drm_i915_private *dev_priv = engine->i915;
517 530
531 /* We can skip acquiring intel_runtime_pm_get() here as it was taken
532 * on our behalf by the request (see i915_gem_mark_busy()) and it will
533 * not be relinquished until the device is idle (see
534 * i915_gem_idle_work_handler()). As a precaution, we make sure
535 * that all ELSP are drained i.e. we have processed the CSB,
536 * before allowing ourselves to idle and calling intel_runtime_pm_put().
537 */
538 GEM_BUG_ON(!dev_priv->gt.awake);
539
518 intel_uncore_forcewake_get(dev_priv, engine->fw_domains); 540 intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
519 541
520 /* Prefer doing test_and_clear_bit() as a two stage operation to avoid 542 /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
@@ -543,7 +565,9 @@ static void intel_lrc_irq_handler(unsigned long data)
543 tail = GEN8_CSB_WRITE_PTR(head); 565 tail = GEN8_CSB_WRITE_PTR(head);
544 head = GEN8_CSB_READ_PTR(head); 566 head = GEN8_CSB_READ_PTR(head);
545 while (head != tail) { 567 while (head != tail) {
568 struct drm_i915_gem_request *rq;
546 unsigned int status; 569 unsigned int status;
570 unsigned int count;
547 571
548 if (++head == GEN8_CSB_ENTRIES) 572 if (++head == GEN8_CSB_ENTRIES)
549 head = 0; 573 head = 0;
@@ -571,22 +595,26 @@ static void intel_lrc_irq_handler(unsigned long data)
571 595
572 /* Check the context/desc id for this event matches */ 596 /* Check the context/desc id for this event matches */
573 GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != 597 GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
574 port[0].context_id); 598 port->context_id);
575 599
576 GEM_BUG_ON(port[0].count == 0); 600 rq = port_unpack(port, &count);
577 if (--port[0].count == 0) { 601 GEM_BUG_ON(count == 0);
602 if (--count == 0) {
578 GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); 603 GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
579 GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); 604 GEM_BUG_ON(!i915_gem_request_completed(rq));
580 execlists_context_status_change(port[0].request, 605 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
581 INTEL_CONTEXT_SCHEDULE_OUT); 606
607 trace_i915_gem_request_out(rq);
608 i915_gem_request_put(rq);
582 609
583 trace_i915_gem_request_out(port[0].request);
584 i915_gem_request_put(port[0].request);
585 port[0] = port[1]; 610 port[0] = port[1];
586 memset(&port[1], 0, sizeof(port[1])); 611 memset(&port[1], 0, sizeof(port[1]));
612 } else {
613 port_set(port, port_pack(rq, count));
587 } 614 }
588 615
589 GEM_BUG_ON(port[0].count == 0 && 616 /* After the final element, the hw should be idle */
617 GEM_BUG_ON(port_count(port) == 0 &&
590 !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); 618 !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
591 } 619 }
592 620
@@ -600,28 +628,66 @@ static void intel_lrc_irq_handler(unsigned long data)
600 intel_uncore_forcewake_put(dev_priv, engine->fw_domains); 628 intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
601} 629}
602 630
603static bool insert_request(struct i915_priotree *pt, struct rb_root *root) 631static bool
632insert_request(struct intel_engine_cs *engine,
633 struct i915_priotree *pt,
634 int prio)
604{ 635{
605 struct rb_node **p, *rb; 636 struct i915_priolist *p;
637 struct rb_node **parent, *rb;
606 bool first = true; 638 bool first = true;
607 639
640 if (unlikely(engine->no_priolist))
641 prio = I915_PRIORITY_NORMAL;
642
643find_priolist:
608 /* most positive priority is scheduled first, equal priorities fifo */ 644 /* most positive priority is scheduled first, equal priorities fifo */
609 rb = NULL; 645 rb = NULL;
610 p = &root->rb_node; 646 parent = &engine->execlist_queue.rb_node;
611 while (*p) { 647 while (*parent) {
612 struct i915_priotree *pos; 648 rb = *parent;
613 649 p = rb_entry(rb, typeof(*p), node);
614 rb = *p; 650 if (prio > p->priority) {
615 pos = rb_entry(rb, typeof(*pos), node); 651 parent = &rb->rb_left;
616 if (pt->priority > pos->priority) { 652 } else if (prio < p->priority) {
617 p = &rb->rb_left; 653 parent = &rb->rb_right;
618 } else {
619 p = &rb->rb_right;
620 first = false; 654 first = false;
655 } else {
656 list_add_tail(&pt->link, &p->requests);
657 return false;
621 } 658 }
622 } 659 }
623 rb_link_node(&pt->node, rb, p); 660
624 rb_insert_color(&pt->node, root); 661 if (prio == I915_PRIORITY_NORMAL) {
662 p = &engine->default_priolist;
663 } else {
664 p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
665 /* Convert an allocation failure to a priority bump */
666 if (unlikely(!p)) {
667 prio = I915_PRIORITY_NORMAL; /* recurses just once */
668
669 /* To maintain ordering with all rendering, after an
670 * allocation failure we have to disable all scheduling.
671 * Requests will then be executed in fifo, and schedule
672 * will ensure that dependencies are emitted in fifo.
673 * There will be still some reordering with existing
674 * requests, so if userspace lied about their
675 * dependencies that reordering may be visible.
676 */
677 engine->no_priolist = true;
678 goto find_priolist;
679 }
680 }
681
682 p->priority = prio;
683 rb_link_node(&p->node, rb, parent);
684 rb_insert_color(&p->node, &engine->execlist_queue);
685
686 INIT_LIST_HEAD(&p->requests);
687 list_add_tail(&pt->link, &p->requests);
688
689 if (first)
690 engine->execlist_first = &p->node;
625 691
626 return first; 692 return first;
627} 693}
@@ -634,12 +700,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
634 /* Will be called from irq-context when using foreign fences. */ 700 /* Will be called from irq-context when using foreign fences. */
635 spin_lock_irqsave(&engine->timeline->lock, flags); 701 spin_lock_irqsave(&engine->timeline->lock, flags);
636 702
637 if (insert_request(&request->priotree, &engine->execlist_queue)) { 703 if (insert_request(engine,
638 engine->execlist_first = &request->priotree.node; 704 &request->priotree,
705 request->priotree.priority)) {
639 if (execlists_elsp_ready(engine)) 706 if (execlists_elsp_ready(engine))
640 tasklet_hi_schedule(&engine->irq_tasklet); 707 tasklet_hi_schedule(&engine->irq_tasklet);
641 } 708 }
642 709
710 GEM_BUG_ON(!engine->execlist_first);
711 GEM_BUG_ON(list_empty(&request->priotree.link));
712
643 spin_unlock_irqrestore(&engine->timeline->lock, flags); 713 spin_unlock_irqrestore(&engine->timeline->lock, flags);
644} 714}
645 715
@@ -709,6 +779,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
709 list_safe_reset_next(dep, p, dfs_link); 779 list_safe_reset_next(dep, p, dfs_link);
710 } 780 }
711 781
782 /* If we didn't need to bump any existing priorities, and we haven't
783 * yet submitted this request (i.e. there is no potential race with
784 * execlists_submit_request()), we can set our own priority and skip
785 * acquiring the engine locks.
786 */
787 if (request->priotree.priority == INT_MIN) {
788 GEM_BUG_ON(!list_empty(&request->priotree.link));
789 request->priotree.priority = prio;
790 if (stack.dfs_link.next == stack.dfs_link.prev)
791 return;
792 __list_del_entry(&stack.dfs_link);
793 }
794
712 engine = request->engine; 795 engine = request->engine;
713 spin_lock_irq(&engine->timeline->lock); 796 spin_lock_irq(&engine->timeline->lock);
714 797
@@ -724,10 +807,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
724 continue; 807 continue;
725 808
726 pt->priority = prio; 809 pt->priority = prio;
727 if (!RB_EMPTY_NODE(&pt->node)) { 810 if (!list_empty(&pt->link)) {
728 rb_erase(&pt->node, &engine->execlist_queue); 811 __list_del_entry(&pt->link);
729 if (insert_request(pt, &engine->execlist_queue)) 812 insert_request(engine, pt, prio);
730 engine->execlist_first = &pt->node;
731 } 813 }
732 } 814 }
733 815
@@ -736,8 +818,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
736 /* XXX Do we need to preempt to make room for us and our deps? */ 818 /* XXX Do we need to preempt to make room for us and our deps? */
737} 819}
738 820
739static int execlists_context_pin(struct intel_engine_cs *engine, 821static struct intel_ring *
740 struct i915_gem_context *ctx) 822execlists_context_pin(struct intel_engine_cs *engine,
823 struct i915_gem_context *ctx)
741{ 824{
742 struct intel_context *ce = &ctx->engine[engine->id]; 825 struct intel_context *ce = &ctx->engine[engine->id];
743 unsigned int flags; 826 unsigned int flags;
@@ -746,8 +829,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
746 829
747 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 830 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
748 831
749 if (ce->pin_count++) 832 if (likely(ce->pin_count++))
750 return 0; 833 goto out;
751 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ 834 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
752 835
753 if (!ce->state) { 836 if (!ce->state) {
@@ -771,7 +854,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
771 goto unpin_vma; 854 goto unpin_vma;
772 } 855 }
773 856
774 ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias); 857 ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias);
775 if (ret) 858 if (ret)
776 goto unpin_map; 859 goto unpin_map;
777 860
@@ -784,7 +867,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
784 ce->state->obj->mm.dirty = true; 867 ce->state->obj->mm.dirty = true;
785 868
786 i915_gem_context_get(ctx); 869 i915_gem_context_get(ctx);
787 return 0; 870out:
871 return ce->ring;
788 872
789unpin_map: 873unpin_map:
790 i915_gem_object_unpin_map(ce->state->obj); 874 i915_gem_object_unpin_map(ce->state->obj);
@@ -792,7 +876,7 @@ unpin_vma:
792 __i915_vma_unpin(ce->state); 876 __i915_vma_unpin(ce->state);
793err: 877err:
794 ce->pin_count = 0; 878 ce->pin_count = 0;
795 return ret; 879 return ERR_PTR(ret);
796} 880}
797 881
798static void execlists_context_unpin(struct intel_engine_cs *engine, 882static void execlists_context_unpin(struct intel_engine_cs *engine,
@@ -829,9 +913,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
829 */ 913 */
830 request->reserved_space += EXECLISTS_REQUEST_SIZE; 914 request->reserved_space += EXECLISTS_REQUEST_SIZE;
831 915
832 GEM_BUG_ON(!ce->ring);
833 request->ring = ce->ring;
834
835 if (i915.enable_guc_submission) { 916 if (i915.enable_guc_submission) {
836 /* 917 /*
837 * Check that the GuC has space for the request before 918 * Check that the GuC has space for the request before
@@ -1139,14 +1220,12 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
1139 return ret; 1220 return ret;
1140} 1221}
1141 1222
1142static u32 port_seqno(struct execlist_port *port)
1143{
1144 return port->request ? port->request->global_seqno : 0;
1145}
1146
1147static int gen8_init_common_ring(struct intel_engine_cs *engine) 1223static int gen8_init_common_ring(struct intel_engine_cs *engine)
1148{ 1224{
1149 struct drm_i915_private *dev_priv = engine->i915; 1225 struct drm_i915_private *dev_priv = engine->i915;
1226 struct execlist_port *port = engine->execlist_port;
1227 unsigned int n;
1228 bool submit;
1150 int ret; 1229 int ret;
1151 1230
1152 ret = intel_mocs_init_engine(engine); 1231 ret = intel_mocs_init_engine(engine);
@@ -1167,16 +1246,24 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
1167 1246
1168 /* After a GPU reset, we may have requests to replay */ 1247 /* After a GPU reset, we may have requests to replay */
1169 clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); 1248 clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
1170 if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { 1249
1171 DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", 1250 submit = false;
1172 engine->name, 1251 for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
1173 port_seqno(&engine->execlist_port[0]), 1252 if (!port_isset(&port[n]))
1174 port_seqno(&engine->execlist_port[1])); 1253 break;
1175 engine->execlist_port[0].count = 0; 1254
1176 engine->execlist_port[1].count = 0; 1255 DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
1177 execlists_submit_ports(engine); 1256 engine->name, n,
1257 port_request(&port[n])->global_seqno);
1258
1259 /* Discard the current inflight count */
1260 port_set(&port[n], port_request(&port[n]));
1261 submit = true;
1178 } 1262 }
1179 1263
1264 if (submit && !i915.enable_guc_submission)
1265 execlists_submit_ports(engine);
1266
1180 return 0; 1267 return 0;
1181} 1268}
1182 1269
@@ -1252,13 +1339,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
1252 intel_ring_update_space(request->ring); 1339 intel_ring_update_space(request->ring);
1253 1340
1254 /* Catch up with any missed context-switch interrupts */ 1341 /* Catch up with any missed context-switch interrupts */
1255 if (request->ctx != port[0].request->ctx) { 1342 if (request->ctx != port_request(port)->ctx) {
1256 i915_gem_request_put(port[0].request); 1343 i915_gem_request_put(port_request(port));
1257 port[0] = port[1]; 1344 port[0] = port[1];
1258 memset(&port[1], 0, sizeof(port[1])); 1345 memset(&port[1], 0, sizeof(port[1]));
1259 } 1346 }
1260 1347
1261 GEM_BUG_ON(request->ctx != port[0].request->ctx); 1348 GEM_BUG_ON(request->ctx != port_request(port)->ctx);
1262 1349
1263 /* Reset WaIdleLiteRestore:bdw,skl as well */ 1350 /* Reset WaIdleLiteRestore:bdw,skl as well */
1264 request->tail = 1351 request->tail =
@@ -1907,44 +1994,6 @@ populate_lr_context(struct i915_gem_context *ctx,
1907 return 0; 1994 return 0;
1908} 1995}
1909 1996
1910/**
1911 * intel_lr_context_size() - return the size of the context for an engine
1912 * @engine: which engine to find the context size for
1913 *
1914 * Each engine may require a different amount of space for a context image,
1915 * so when allocating (or copying) an image, this function can be used to
1916 * find the right size for the specific engine.
1917 *
1918 * Return: size (in bytes) of an engine-specific context image
1919 *
1920 * Note: this size includes the HWSP, which is part of the context image
1921 * in LRC mode, but does not include the "shared data page" used with
1922 * GuC submission. The caller should account for this if using the GuC.
1923 */
1924uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
1925{
1926 int ret = 0;
1927
1928 WARN_ON(INTEL_GEN(engine->i915) < 8);
1929
1930 switch (engine->id) {
1931 case RCS:
1932 if (INTEL_GEN(engine->i915) >= 9)
1933 ret = GEN9_LR_CONTEXT_RENDER_SIZE;
1934 else
1935 ret = GEN8_LR_CONTEXT_RENDER_SIZE;
1936 break;
1937 case VCS:
1938 case BCS:
1939 case VECS:
1940 case VCS2:
1941 ret = GEN8_LR_CONTEXT_OTHER_SIZE;
1942 break;
1943 }
1944
1945 return ret;
1946}
1947
1948static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, 1997static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
1949 struct intel_engine_cs *engine) 1998 struct intel_engine_cs *engine)
1950{ 1999{
@@ -1957,8 +2006,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
1957 2006
1958 WARN_ON(ce->state); 2007 WARN_ON(ce->state);
1959 2008
1960 context_size = round_up(intel_lr_context_size(engine), 2009 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1961 I915_GTT_PAGE_SIZE);
1962 2010
1963 /* One extra page as the sharing data between driver and GuC */ 2011 /* One extra page as the sharing data between driver and GuC */
1964 context_size += PAGE_SIZE * LRC_PPHWSP_PN; 2012 context_size += PAGE_SIZE * LRC_PPHWSP_PN;
@@ -1989,7 +2037,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
1989 2037
1990 ce->ring = ring; 2038 ce->ring = ring;
1991 ce->state = vma; 2039 ce->state = vma;
1992 ce->initialised = engine->init_context == NULL; 2040 ce->initialised |= engine->init_context == NULL;
1993 2041
1994 return 0; 2042 return 0;
1995 2043
@@ -2036,8 +2084,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
2036 ce->state->obj->mm.dirty = true; 2084 ce->state->obj->mm.dirty = true;
2037 i915_gem_object_unpin_map(ce->state->obj); 2085 i915_gem_object_unpin_map(ce->state->obj);
2038 2086
2039 ce->ring->head = ce->ring->tail = 0; 2087 intel_ring_reset(ce->ring, 0);
2040 intel_ring_update_space(ce->ring);
2041 } 2088 }
2042 } 2089 }
2043} 2090}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index e8015e7bf4e9..52b3a1fd4059 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
78struct drm_i915_private; 78struct drm_i915_private;
79struct i915_gem_context; 79struct i915_gem_context;
80 80
81uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
82
83void intel_lr_context_resume(struct drm_i915_private *dev_priv); 81void intel_lr_context_resume(struct drm_i915_private *dev_priv);
84uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, 82uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
85 struct intel_engine_cs *engine); 83 struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index cb50c527401f..c8103f8d4dfa 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -888,10 +888,14 @@ static void pch_enable_backlight(struct intel_connector *connector)
888 struct drm_i915_private *dev_priv = to_i915(connector->base.dev); 888 struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
889 struct intel_panel *panel = &connector->panel; 889 struct intel_panel *panel = &connector->panel;
890 enum pipe pipe = intel_get_pipe_from_connector(connector); 890 enum pipe pipe = intel_get_pipe_from_connector(connector);
891 enum transcoder cpu_transcoder = 891 enum transcoder cpu_transcoder;
892 intel_pipe_to_cpu_transcoder(dev_priv, pipe);
893 u32 cpu_ctl2, pch_ctl1, pch_ctl2; 892 u32 cpu_ctl2, pch_ctl1, pch_ctl2;
894 893
894 if (!WARN_ON_ONCE(pipe == INVALID_PIPE))
895 cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe);
896 else
897 cpu_transcoder = TRANSCODER_EDP;
898
895 cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2); 899 cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2);
896 if (cpu_ctl2 & BLM_PWM_ENABLE) { 900 if (cpu_ctl2 & BLM_PWM_ENABLE) {
897 DRM_DEBUG_KMS("cpu backlight already enabled\n"); 901 DRM_DEBUG_KMS("cpu backlight already enabled\n");
@@ -973,6 +977,9 @@ static void i965_enable_backlight(struct intel_connector *connector)
973 enum pipe pipe = intel_get_pipe_from_connector(connector); 977 enum pipe pipe = intel_get_pipe_from_connector(connector);
974 u32 ctl, ctl2, freq; 978 u32 ctl, ctl2, freq;
975 979
980 if (WARN_ON_ONCE(pipe == INVALID_PIPE))
981 pipe = PIPE_A;
982
976 ctl2 = I915_READ(BLC_PWM_CTL2); 983 ctl2 = I915_READ(BLC_PWM_CTL2);
977 if (ctl2 & BLM_PWM_ENABLE) { 984 if (ctl2 & BLM_PWM_ENABLE) {
978 DRM_DEBUG_KMS("backlight already enabled\n"); 985 DRM_DEBUG_KMS("backlight already enabled\n");
@@ -1037,6 +1044,9 @@ static void bxt_enable_backlight(struct intel_connector *connector)
1037 enum pipe pipe = intel_get_pipe_from_connector(connector); 1044 enum pipe pipe = intel_get_pipe_from_connector(connector);
1038 u32 pwm_ctl, val; 1045 u32 pwm_ctl, val;
1039 1046
1047 if (WARN_ON_ONCE(pipe == INVALID_PIPE))
1048 pipe = PIPE_A;
1049
1040 /* Controller 1 uses the utility pin. */ 1050 /* Controller 1 uses the utility pin. */
1041 if (panel->backlight.controller == 1) { 1051 if (panel->backlight.controller == 1) {
1042 val = I915_READ(UTIL_PIN_CTL); 1052 val = I915_READ(UTIL_PIN_CTL);
@@ -1093,7 +1103,8 @@ void intel_panel_enable_backlight(struct intel_connector *connector)
1093 if (!panel->backlight.present) 1103 if (!panel->backlight.present)
1094 return; 1104 return;
1095 1105
1096 DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); 1106 if (!WARN_ON_ONCE(pipe == INVALID_PIPE))
1107 DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe));
1097 1108
1098 mutex_lock(&dev_priv->backlight_lock); 1109 mutex_lock(&dev_priv->backlight_lock);
1099 1110
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index 206ee4f0150e..8fbd2bd0877f 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -513,16 +513,20 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
513 struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); 513 struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
514 struct intel_crtc_state *pipe_config; 514 struct intel_crtc_state *pipe_config;
515 struct drm_atomic_state *state; 515 struct drm_atomic_state *state;
516 struct drm_modeset_acquire_ctx ctx;
516 int ret = 0; 517 int ret = 0;
517 518
518 drm_modeset_lock_all(dev); 519 drm_modeset_acquire_init(&ctx, 0);
520
519 state = drm_atomic_state_alloc(dev); 521 state = drm_atomic_state_alloc(dev);
520 if (!state) { 522 if (!state) {
521 ret = -ENOMEM; 523 ret = -ENOMEM;
522 goto unlock; 524 goto unlock;
523 } 525 }
524 526
525 state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx; 527 state->acquire_ctx = &ctx;
528
529retry:
526 pipe_config = intel_atomic_get_crtc_state(state, crtc); 530 pipe_config = intel_atomic_get_crtc_state(state, crtc);
527 if (IS_ERR(pipe_config)) { 531 if (IS_ERR(pipe_config)) {
528 ret = PTR_ERR(pipe_config); 532 ret = PTR_ERR(pipe_config);
@@ -537,10 +541,17 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
537 ret = drm_atomic_commit(state); 541 ret = drm_atomic_commit(state);
538 542
539put_state: 543put_state:
544 if (ret == -EDEADLK) {
545 drm_atomic_state_clear(state);
546 drm_modeset_backoff(&ctx);
547 goto retry;
548 }
549
540 drm_atomic_state_put(state); 550 drm_atomic_state_put(state);
541unlock: 551unlock:
542 WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret); 552 WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret);
543 drm_modeset_unlock_all(dev); 553 drm_modeset_drop_locks(&ctx);
554 drm_modeset_acquire_fini(&ctx);
544} 555}
545 556
546static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, 557static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv,
@@ -842,19 +853,12 @@ static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf,
842 return -E2BIG; 853 return -E2BIG;
843 } 854 }
844 855
845 tmpbuf = kmalloc(len + 1, GFP_KERNEL); 856 tmpbuf = memdup_user_nul(ubuf, len);
846 if (!tmpbuf) 857 if (IS_ERR(tmpbuf))
847 return -ENOMEM; 858 return PTR_ERR(tmpbuf);
848
849 if (copy_from_user(tmpbuf, ubuf, len)) {
850 ret = -EFAULT;
851 goto out;
852 }
853 tmpbuf[len] = '\0';
854 859
855 ret = display_crc_ctl_parse(dev_priv, tmpbuf, len); 860 ret = display_crc_ctl_parse(dev_priv, tmpbuf, len);
856 861
857out:
858 kfree(tmpbuf); 862 kfree(tmpbuf);
859 if (ret < 0) 863 if (ret < 0)
860 return ret; 864 return ret;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 570bd603f401..936eef1634c7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -386,13 +386,53 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl
386 return was_enabled; 386 return was_enabled;
387} 387}
388 388
389/**
390 * intel_set_memory_cxsr - Configure CxSR state
391 * @dev_priv: i915 device
392 * @enable: Allow vs. disallow CxSR
393 *
394 * Allow or disallow the system to enter a special CxSR
395 * (C-state self refresh) state. What typically happens in CxSR mode
396 * is that several display FIFOs may get combined into a single larger
397 * FIFO for a particular plane (so called max FIFO mode) to allow the
398 * system to defer memory fetches longer, and the memory will enter
399 * self refresh.
400 *
401 * Note that enabling CxSR does not guarantee that the system enter
402 * this special mode, nor does it guarantee that the system stays
403 * in that mode once entered. So this just allows/disallows the system
404 * to autonomously utilize the CxSR mode. Other factors such as core
405 * C-states will affect when/if the system actually enters/exits the
406 * CxSR mode.
407 *
408 * Note that on VLV/CHV this actually only controls the max FIFO mode,
409 * and the system is free to enter/exit memory self refresh at any time
410 * even when the use of CxSR has been disallowed.
411 *
412 * While the system is actually in the CxSR/max FIFO mode, some plane
413 * control registers will not get latched on vblank. Thus in order to
414 * guarantee the system will respond to changes in the plane registers
415 * we must always disallow CxSR prior to making changes to those registers.
416 * Unfortunately the system will re-evaluate the CxSR conditions at
417 * frame start which happens after vblank start (which is when the plane
418 * registers would get latched), so we can't proceed with the plane update
419 * during the same frame where we disallowed CxSR.
420 *
421 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
422 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
423 * the hardware w.r.t. HPLL SR when writing to plane registers.
424 * Disallowing just CxSR is sufficient.
425 */
389bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 426bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
390{ 427{
391 bool ret; 428 bool ret;
392 429
393 mutex_lock(&dev_priv->wm.wm_mutex); 430 mutex_lock(&dev_priv->wm.wm_mutex);
394 ret = _intel_set_memory_cxsr(dev_priv, enable); 431 ret = _intel_set_memory_cxsr(dev_priv, enable);
395 dev_priv->wm.vlv.cxsr = enable; 432 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
433 dev_priv->wm.vlv.cxsr = enable;
434 else if (IS_G4X(dev_priv))
435 dev_priv->wm.g4x.cxsr = enable;
396 mutex_unlock(&dev_priv->wm.wm_mutex); 436 mutex_unlock(&dev_priv->wm.wm_mutex);
397 437
398 return ret; 438 return ret;
@@ -454,13 +494,6 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
454 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; 494 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
455 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; 495 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
456 fifo_state->plane[PLANE_CURSOR] = 63; 496 fifo_state->plane[PLANE_CURSOR] = 63;
457
458 DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n",
459 pipe_name(pipe),
460 fifo_state->plane[PLANE_PRIMARY],
461 fifo_state->plane[PLANE_SPRITE0],
462 fifo_state->plane[PLANE_SPRITE1],
463 fifo_state->plane[PLANE_CURSOR]);
464} 497}
465 498
466static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) 499static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
@@ -538,20 +571,6 @@ static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
538 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 571 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
539 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 572 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
540}; 573};
541static const struct intel_watermark_params g4x_wm_info = {
542 .fifo_size = G4X_FIFO_SIZE,
543 .max_wm = G4X_MAX_WM,
544 .default_wm = G4X_MAX_WM,
545 .guard_size = 2,
546 .cacheline_size = G4X_FIFO_LINE_SIZE,
547};
548static const struct intel_watermark_params g4x_cursor_wm_info = {
549 .fifo_size = I965_CURSOR_FIFO,
550 .max_wm = I965_CURSOR_MAX_WM,
551 .default_wm = I965_CURSOR_DFT_WM,
552 .guard_size = 2,
553 .cacheline_size = G4X_FIFO_LINE_SIZE,
554};
555static const struct intel_watermark_params i965_cursor_wm_info = { 574static const struct intel_watermark_params i965_cursor_wm_info = {
556 .fifo_size = I965_CURSOR_FIFO, 575 .fifo_size = I965_CURSOR_FIFO,
557 .max_wm = I965_CURSOR_MAX_WM, 576 .max_wm = I965_CURSOR_MAX_WM,
@@ -596,8 +615,104 @@ static const struct intel_watermark_params i845_wm_info = {
596}; 615};
597 616
598/** 617/**
618 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
619 * @pixel_rate: Pipe pixel rate in kHz
620 * @cpp: Plane bytes per pixel
621 * @latency: Memory wakeup latency in 0.1us units
622 *
623 * Compute the watermark using the method 1 or "small buffer"
624 * formula. The caller may additonally add extra cachelines
625 * to account for TLB misses and clock crossings.
626 *
627 * This method is concerned with the short term drain rate
628 * of the FIFO, ie. it does not account for blanking periods
629 * which would effectively reduce the average drain rate across
630 * a longer period. The name "small" refers to the fact the
631 * FIFO is relatively small compared to the amount of data
632 * fetched.
633 *
634 * The FIFO level vs. time graph might look something like:
635 *
636 * |\ |\
637 * | \ | \
638 * __---__---__ (- plane active, _ blanking)
639 * -> time
640 *
641 * or perhaps like this:
642 *
643 * |\|\ |\|\
644 * __----__----__ (- plane active, _ blanking)
645 * -> time
646 *
647 * Returns:
648 * The watermark in bytes
649 */
650static unsigned int intel_wm_method1(unsigned int pixel_rate,
651 unsigned int cpp,
652 unsigned int latency)
653{
654 uint64_t ret;
655
656 ret = (uint64_t) pixel_rate * cpp * latency;
657 ret = DIV_ROUND_UP_ULL(ret, 10000);
658
659 return ret;
660}
661
662/**
663 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
664 * @pixel_rate: Pipe pixel rate in kHz
665 * @htotal: Pipe horizontal total
666 * @width: Plane width in pixels
667 * @cpp: Plane bytes per pixel
668 * @latency: Memory wakeup latency in 0.1us units
669 *
670 * Compute the watermark using the method 2 or "large buffer"
671 * formula. The caller may additonally add extra cachelines
672 * to account for TLB misses and clock crossings.
673 *
674 * This method is concerned with the long term drain rate
675 * of the FIFO, ie. it does account for blanking periods
676 * which effectively reduce the average drain rate across
677 * a longer period. The name "large" refers to the fact the
678 * FIFO is relatively large compared to the amount of data
679 * fetched.
680 *
681 * The FIFO level vs. time graph might look something like:
682 *
683 * |\___ |\___
684 * | \___ | \___
685 * | \ | \
686 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
687 * -> time
688 *
689 * Returns:
690 * The watermark in bytes
691 */
692static unsigned int intel_wm_method2(unsigned int pixel_rate,
693 unsigned int htotal,
694 unsigned int width,
695 unsigned int cpp,
696 unsigned int latency)
697{
698 unsigned int ret;
699
700 /*
701 * FIXME remove once all users are computing
702 * watermarks in the correct place.
703 */
704 if (WARN_ON_ONCE(htotal == 0))
705 htotal = 1;
706
707 ret = (latency * pixel_rate) / (htotal * 10000);
708 ret = (ret + 1) * width * cpp;
709
710 return ret;
711}
712
713/**
599 * intel_calculate_wm - calculate watermark level 714 * intel_calculate_wm - calculate watermark level
600 * @clock_in_khz: pixel clock 715 * @pixel_rate: pixel clock
601 * @wm: chip FIFO params 716 * @wm: chip FIFO params
602 * @cpp: bytes per pixel 717 * @cpp: bytes per pixel
603 * @latency_ns: memory latency for the platform 718 * @latency_ns: memory latency for the platform
@@ -613,12 +728,12 @@ static const struct intel_watermark_params i845_wm_info = {
613 * past the watermark point. If the FIFO drains completely, a FIFO underrun 728 * past the watermark point. If the FIFO drains completely, a FIFO underrun
614 * will occur, and a display engine hang could result. 729 * will occur, and a display engine hang could result.
615 */ 730 */
616static unsigned long intel_calculate_wm(unsigned long clock_in_khz, 731static unsigned int intel_calculate_wm(int pixel_rate,
617 const struct intel_watermark_params *wm, 732 const struct intel_watermark_params *wm,
618 int fifo_size, int cpp, 733 int fifo_size, int cpp,
619 unsigned long latency_ns) 734 unsigned int latency_ns)
620{ 735{
621 long entries_required, wm_size; 736 int entries, wm_size;
622 737
623 /* 738 /*
624 * Note: we need to make sure we don't overflow for various clock & 739 * Note: we need to make sure we don't overflow for various clock &
@@ -626,18 +741,17 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
626 * clocks go from a few thousand to several hundred thousand. 741 * clocks go from a few thousand to several hundred thousand.
627 * latency is usually a few thousand 742 * latency is usually a few thousand
628 */ 743 */
629 entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) / 744 entries = intel_wm_method1(pixel_rate, cpp,
630 1000; 745 latency_ns / 100);
631 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); 746 entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
632 747 wm->guard_size;
633 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); 748 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
634
635 wm_size = fifo_size - (entries_required + wm->guard_size);
636 749
637 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); 750 wm_size = fifo_size - entries;
751 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
638 752
639 /* Don't promote wm_size to unsigned... */ 753 /* Don't promote wm_size to unsigned... */
640 if (wm_size > (long)wm->max_wm) 754 if (wm_size > wm->max_wm)
641 wm_size = wm->max_wm; 755 wm_size = wm->max_wm;
642 if (wm_size <= 0) 756 if (wm_size <= 0)
643 wm_size = wm->default_wm; 757 wm_size = wm->default_wm;
@@ -655,6 +769,21 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
655 return wm_size; 769 return wm_size;
656} 770}
657 771
772static bool is_disabling(int old, int new, int threshold)
773{
774 return old >= threshold && new < threshold;
775}
776
777static bool is_enabling(int old, int new, int threshold)
778{
779 return old < threshold && new >= threshold;
780}
781
782static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
783{
784 return dev_priv->wm.max_level + 1;
785}
786
658static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, 787static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
659 const struct intel_plane_state *plane_state) 788 const struct intel_plane_state *plane_state)
660{ 789{
@@ -699,7 +828,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
699 struct intel_crtc *crtc; 828 struct intel_crtc *crtc;
700 const struct cxsr_latency *latency; 829 const struct cxsr_latency *latency;
701 u32 reg; 830 u32 reg;
702 unsigned long wm; 831 unsigned int wm;
703 832
704 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 833 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
705 dev_priv->is_ddr3, 834 dev_priv->is_ddr3,
@@ -733,7 +862,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
733 /* cursor SR */ 862 /* cursor SR */
734 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 863 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
735 pineview_display_wm.fifo_size, 864 pineview_display_wm.fifo_size,
736 cpp, latency->cursor_sr); 865 4, latency->cursor_sr);
737 reg = I915_READ(DSPFW3); 866 reg = I915_READ(DSPFW3);
738 reg &= ~DSPFW_CURSOR_SR_MASK; 867 reg &= ~DSPFW_CURSOR_SR_MASK;
739 reg |= FW_WM(wm, CURSOR_SR); 868 reg |= FW_WM(wm, CURSOR_SR);
@@ -751,7 +880,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
751 /* cursor HPLL off SR */ 880 /* cursor HPLL off SR */
752 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 881 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
753 pineview_display_hplloff_wm.fifo_size, 882 pineview_display_hplloff_wm.fifo_size,
754 cpp, latency->cursor_hpll_disable); 883 4, latency->cursor_hpll_disable);
755 reg = I915_READ(DSPFW3); 884 reg = I915_READ(DSPFW3);
756 reg &= ~DSPFW_HPLL_CURSOR_MASK; 885 reg &= ~DSPFW_HPLL_CURSOR_MASK;
757 reg |= FW_WM(wm, HPLL_CURSOR); 886 reg |= FW_WM(wm, HPLL_CURSOR);
@@ -764,144 +893,50 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
764 } 893 }
765} 894}
766 895
767static bool g4x_compute_wm0(struct drm_i915_private *dev_priv,
768 int plane,
769 const struct intel_watermark_params *display,
770 int display_latency_ns,
771 const struct intel_watermark_params *cursor,
772 int cursor_latency_ns,
773 int *plane_wm,
774 int *cursor_wm)
775{
776 struct intel_crtc *crtc;
777 const struct drm_display_mode *adjusted_mode;
778 const struct drm_framebuffer *fb;
779 int htotal, hdisplay, clock, cpp;
780 int line_time_us, line_count;
781 int entries, tlb_miss;
782
783 crtc = intel_get_crtc_for_plane(dev_priv, plane);
784 if (!intel_crtc_active(crtc)) {
785 *cursor_wm = cursor->guard_size;
786 *plane_wm = display->guard_size;
787 return false;
788 }
789
790 adjusted_mode = &crtc->config->base.adjusted_mode;
791 fb = crtc->base.primary->state->fb;
792 clock = adjusted_mode->crtc_clock;
793 htotal = adjusted_mode->crtc_htotal;
794 hdisplay = crtc->config->pipe_src_w;
795 cpp = fb->format->cpp[0];
796
797 /* Use the small buffer method to calculate plane watermark */
798 entries = ((clock * cpp / 1000) * display_latency_ns) / 1000;
799 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
800 if (tlb_miss > 0)
801 entries += tlb_miss;
802 entries = DIV_ROUND_UP(entries, display->cacheline_size);
803 *plane_wm = entries + display->guard_size;
804 if (*plane_wm > (int)display->max_wm)
805 *plane_wm = display->max_wm;
806
807 /* Use the large buffer method to calculate cursor watermark */
808 line_time_us = max(htotal * 1000 / clock, 1);
809 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
810 entries = line_count * crtc->base.cursor->state->crtc_w * cpp;
811 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
812 if (tlb_miss > 0)
813 entries += tlb_miss;
814 entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
815 *cursor_wm = entries + cursor->guard_size;
816 if (*cursor_wm > (int)cursor->max_wm)
817 *cursor_wm = (int)cursor->max_wm;
818
819 return true;
820}
821
822/* 896/*
823 * Check the wm result. 897 * Documentation says:
824 * 898 * "If the line size is small, the TLB fetches can get in the way of the
825 * If any calculated watermark values is larger than the maximum value that 899 * data fetches, causing some lag in the pixel data return which is not
826 * can be programmed into the associated watermark register, that watermark 900 * accounted for in the above formulas. The following adjustment only
827 * must be disabled. 901 * needs to be applied if eight whole lines fit in the buffer at once.
902 * The WM is adjusted upwards by the difference between the FIFO size
903 * and the size of 8 whole lines. This adjustment is always performed
904 * in the actual pixel depth regardless of whether FBC is enabled or not."
828 */ 905 */
829static bool g4x_check_srwm(struct drm_i915_private *dev_priv, 906static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
830 int display_wm, int cursor_wm,
831 const struct intel_watermark_params *display,
832 const struct intel_watermark_params *cursor)
833{ 907{
834 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", 908 int tlb_miss = fifo_size * 64 - width * cpp * 8;
835 display_wm, cursor_wm);
836
837 if (display_wm > display->max_wm) {
838 DRM_DEBUG_KMS("display watermark is too large(%d/%u), disabling\n",
839 display_wm, display->max_wm);
840 return false;
841 }
842 909
843 if (cursor_wm > cursor->max_wm) { 910 return max(0, tlb_miss);
844 DRM_DEBUG_KMS("cursor watermark is too large(%d/%u), disabling\n",
845 cursor_wm, cursor->max_wm);
846 return false;
847 }
848
849 if (!(display_wm || cursor_wm)) {
850 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
851 return false;
852 }
853
854 return true;
855} 911}
856 912
857static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, 913static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
858 int plane, 914 const struct g4x_wm_values *wm)
859 int latency_ns,
860 const struct intel_watermark_params *display,
861 const struct intel_watermark_params *cursor,
862 int *display_wm, int *cursor_wm)
863{ 915{
864 struct intel_crtc *crtc; 916 enum pipe pipe;
865 const struct drm_display_mode *adjusted_mode;
866 const struct drm_framebuffer *fb;
867 int hdisplay, htotal, cpp, clock;
868 unsigned long line_time_us;
869 int line_count, line_size;
870 int small, large;
871 int entries;
872
873 if (!latency_ns) {
874 *display_wm = *cursor_wm = 0;
875 return false;
876 }
877
878 crtc = intel_get_crtc_for_plane(dev_priv, plane);
879 adjusted_mode = &crtc->config->base.adjusted_mode;
880 fb = crtc->base.primary->state->fb;
881 clock = adjusted_mode->crtc_clock;
882 htotal = adjusted_mode->crtc_htotal;
883 hdisplay = crtc->config->pipe_src_w;
884 cpp = fb->format->cpp[0];
885
886 line_time_us = max(htotal * 1000 / clock, 1);
887 line_count = (latency_ns / line_time_us + 1000) / 1000;
888 line_size = hdisplay * cpp;
889
890 /* Use the minimum of the small and large buffer method for primary */
891 small = ((clock * cpp / 1000) * latency_ns) / 1000;
892 large = line_count * line_size;
893 917
894 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); 918 for_each_pipe(dev_priv, pipe)
895 *display_wm = entries + display->guard_size; 919 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
896 920
897 /* calculate the self-refresh watermark for display cursor */ 921 I915_WRITE(DSPFW1,
898 entries = line_count * cpp * crtc->base.cursor->state->crtc_w; 922 FW_WM(wm->sr.plane, SR) |
899 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 923 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
900 *cursor_wm = entries + cursor->guard_size; 924 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
925 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
926 I915_WRITE(DSPFW2,
927 (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
928 FW_WM(wm->sr.fbc, FBC_SR) |
929 FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
930 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
931 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
932 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
933 I915_WRITE(DSPFW3,
934 (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
935 FW_WM(wm->sr.cursor, CURSOR_SR) |
936 FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
937 FW_WM(wm->hpll.plane, HPLL_SR));
901 938
902 return g4x_check_srwm(dev_priv, 939 POSTING_READ(DSPFW1);
903 *display_wm, *cursor_wm,
904 display, cursor);
905} 940}
906 941
907#define FW_WM_VLV(value, plane) \ 942#define FW_WM_VLV(value, plane) \
@@ -985,17 +1020,535 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
985 1020
986#undef FW_WM_VLV 1021#undef FW_WM_VLV
987 1022
1023static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1024{
1025 /* all latencies in usec */
1026 dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1027 dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1028 dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1029
1030 dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1031}
1032
1033static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1034{
1035 /*
1036 * DSPCNTR[13] supposedly controls whether the
1037 * primary plane can use the FIFO space otherwise
1038 * reserved for the sprite plane. It's not 100% clear
1039 * what the actual FIFO size is, but it looks like we
1040 * can happily set both primary and sprite watermarks
1041 * up to 127 cachelines. So that would seem to mean
1042 * that either DSPCNTR[13] doesn't do anything, or that
1043 * the total FIFO is >= 256 cachelines in size. Either
1044 * way, we don't seem to have to worry about this
1045 * repartitioning as the maximum watermark value the
1046 * register can hold for each plane is lower than the
1047 * minimum FIFO size.
1048 */
1049 switch (plane_id) {
1050 case PLANE_CURSOR:
1051 return 63;
1052 case PLANE_PRIMARY:
1053 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1054 case PLANE_SPRITE0:
1055 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1056 default:
1057 MISSING_CASE(plane_id);
1058 return 0;
1059 }
1060}
1061
1062static int g4x_fbc_fifo_size(int level)
1063{
1064 switch (level) {
1065 case G4X_WM_LEVEL_SR:
1066 return 7;
1067 case G4X_WM_LEVEL_HPLL:
1068 return 15;
1069 default:
1070 MISSING_CASE(level);
1071 return 0;
1072 }
1073}
1074
1075static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1076 const struct intel_plane_state *plane_state,
1077 int level)
1078{
1079 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1080 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1081 const struct drm_display_mode *adjusted_mode =
1082 &crtc_state->base.adjusted_mode;
1083 int clock, htotal, cpp, width, wm;
1084 int latency = dev_priv->wm.pri_latency[level] * 10;
1085
1086 if (latency == 0)
1087 return USHRT_MAX;
1088
1089 if (!intel_wm_plane_visible(crtc_state, plane_state))
1090 return 0;
1091
1092 /*
1093 * Not 100% sure which way ELK should go here as the
1094 * spec only says CL/CTG should assume 32bpp and BW
1095 * doesn't need to. But as these things followed the
1096 * mobile vs. desktop lines on gen3 as well, let's
1097 * assume ELK doesn't need this.
1098 *
1099 * The spec also fails to list such a restriction for
1100 * the HPLL watermark, which seems a little strange.
1101 * Let's use 32bpp for the HPLL watermark as well.
1102 */
1103 if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1104 level != G4X_WM_LEVEL_NORMAL)
1105 cpp = 4;
1106 else
1107 cpp = plane_state->base.fb->format->cpp[0];
1108
1109 clock = adjusted_mode->crtc_clock;
1110 htotal = adjusted_mode->crtc_htotal;
1111
1112 if (plane->id == PLANE_CURSOR)
1113 width = plane_state->base.crtc_w;
1114 else
1115 width = drm_rect_width(&plane_state->base.dst);
1116
1117 if (plane->id == PLANE_CURSOR) {
1118 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1119 } else if (plane->id == PLANE_PRIMARY &&
1120 level == G4X_WM_LEVEL_NORMAL) {
1121 wm = intel_wm_method1(clock, cpp, latency);
1122 } else {
1123 int small, large;
1124
1125 small = intel_wm_method1(clock, cpp, latency);
1126 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1127
1128 wm = min(small, large);
1129 }
1130
1131 wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1132 width, cpp);
1133
1134 wm = DIV_ROUND_UP(wm, 64) + 2;
1135
1136 return min_t(int, wm, USHRT_MAX);
1137}
1138
1139static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1140 int level, enum plane_id plane_id, u16 value)
1141{
1142 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1143 bool dirty = false;
1144
1145 for (; level < intel_wm_num_levels(dev_priv); level++) {
1146 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1147
1148 dirty |= raw->plane[plane_id] != value;
1149 raw->plane[plane_id] = value;
1150 }
1151
1152 return dirty;
1153}
1154
1155static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1156 int level, u16 value)
1157{
1158 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1159 bool dirty = false;
1160
1161 /* NORMAL level doesn't have an FBC watermark */
1162 level = max(level, G4X_WM_LEVEL_SR);
1163
1164 for (; level < intel_wm_num_levels(dev_priv); level++) {
1165 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1166
1167 dirty |= raw->fbc != value;
1168 raw->fbc = value;
1169 }
1170
1171 return dirty;
1172}
1173
1174static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1175 const struct intel_plane_state *pstate,
1176 uint32_t pri_val);
1177
1178static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1179 const struct intel_plane_state *plane_state)
1180{
1181 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1182 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1183 enum plane_id plane_id = plane->id;
1184 bool dirty = false;
1185 int level;
1186
1187 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1188 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1189 if (plane_id == PLANE_PRIMARY)
1190 dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1191 goto out;
1192 }
1193
1194 for (level = 0; level < num_levels; level++) {
1195 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1196 int wm, max_wm;
1197
1198 wm = g4x_compute_wm(crtc_state, plane_state, level);
1199 max_wm = g4x_plane_fifo_size(plane_id, level);
1200
1201 if (wm > max_wm)
1202 break;
1203
1204 dirty |= raw->plane[plane_id] != wm;
1205 raw->plane[plane_id] = wm;
1206
1207 if (plane_id != PLANE_PRIMARY ||
1208 level == G4X_WM_LEVEL_NORMAL)
1209 continue;
1210
1211 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1212 raw->plane[plane_id]);
1213 max_wm = g4x_fbc_fifo_size(level);
1214
1215 /*
1216 * FBC wm is not mandatory as we
1217 * can always just disable its use.
1218 */
1219 if (wm > max_wm)
1220 wm = USHRT_MAX;
1221
1222 dirty |= raw->fbc != wm;
1223 raw->fbc = wm;
1224 }
1225
1226 /* mark watermarks as invalid */
1227 dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1228
1229 if (plane_id == PLANE_PRIMARY)
1230 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1231
1232 out:
1233 if (dirty) {
1234 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1235 plane->base.name,
1236 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1237 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1238 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1239
1240 if (plane_id == PLANE_PRIMARY)
1241 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1242 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1243 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1244 }
1245
1246 return dirty;
1247}
1248
1249static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1250 enum plane_id plane_id, int level)
1251{
1252 const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1253
1254 return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1255}
1256
1257static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1258 int level)
1259{
1260 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1261
1262 if (level > dev_priv->wm.max_level)
1263 return false;
1264
1265 return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1266 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1267 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1268}
1269
1270/* mark all levels starting from 'level' as invalid */
1271static void g4x_invalidate_wms(struct intel_crtc *crtc,
1272 struct g4x_wm_state *wm_state, int level)
1273{
1274 if (level <= G4X_WM_LEVEL_NORMAL) {
1275 enum plane_id plane_id;
1276
1277 for_each_plane_id_on_crtc(crtc, plane_id)
1278 wm_state->wm.plane[plane_id] = USHRT_MAX;
1279 }
1280
1281 if (level <= G4X_WM_LEVEL_SR) {
1282 wm_state->cxsr = false;
1283 wm_state->sr.cursor = USHRT_MAX;
1284 wm_state->sr.plane = USHRT_MAX;
1285 wm_state->sr.fbc = USHRT_MAX;
1286 }
1287
1288 if (level <= G4X_WM_LEVEL_HPLL) {
1289 wm_state->hpll_en = false;
1290 wm_state->hpll.cursor = USHRT_MAX;
1291 wm_state->hpll.plane = USHRT_MAX;
1292 wm_state->hpll.fbc = USHRT_MAX;
1293 }
1294}
1295
1296static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1297{
1298 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1299 struct intel_atomic_state *state =
1300 to_intel_atomic_state(crtc_state->base.state);
1301 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1302 int num_active_planes = hweight32(crtc_state->active_planes &
1303 ~BIT(PLANE_CURSOR));
1304 const struct g4x_pipe_wm *raw;
1305 struct intel_plane_state *plane_state;
1306 struct intel_plane *plane;
1307 enum plane_id plane_id;
1308 int i, level;
1309 unsigned int dirty = 0;
1310
1311 for_each_intel_plane_in_state(state, plane, plane_state, i) {
1312 const struct intel_plane_state *old_plane_state =
1313 to_intel_plane_state(plane->base.state);
1314
1315 if (plane_state->base.crtc != &crtc->base &&
1316 old_plane_state->base.crtc != &crtc->base)
1317 continue;
1318
1319 if (g4x_raw_plane_wm_compute(crtc_state, plane_state))
1320 dirty |= BIT(plane->id);
1321 }
1322
1323 if (!dirty)
1324 return 0;
1325
1326 level = G4X_WM_LEVEL_NORMAL;
1327 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1328 goto out;
1329
1330 raw = &crtc_state->wm.g4x.raw[level];
1331 for_each_plane_id_on_crtc(crtc, plane_id)
1332 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1333
1334 level = G4X_WM_LEVEL_SR;
1335
1336 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1337 goto out;
1338
1339 raw = &crtc_state->wm.g4x.raw[level];
1340 wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1341 wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1342 wm_state->sr.fbc = raw->fbc;
1343
1344 wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1345
1346 level = G4X_WM_LEVEL_HPLL;
1347
1348 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1349 goto out;
1350
1351 raw = &crtc_state->wm.g4x.raw[level];
1352 wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1353 wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1354 wm_state->hpll.fbc = raw->fbc;
1355
1356 wm_state->hpll_en = wm_state->cxsr;
1357
1358 level++;
1359
1360 out:
1361 if (level == G4X_WM_LEVEL_NORMAL)
1362 return -EINVAL;
1363
1364 /* invalidate the higher levels */
1365 g4x_invalidate_wms(crtc, wm_state, level);
1366
1367 /*
1368 * Determine if the FBC watermark(s) can be used. IF
1369 * this isn't the case we prefer to disable the FBC
1370 ( watermark(s) rather than disable the SR/HPLL
1371 * level(s) entirely.
1372 */
1373 wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1374
1375 if (level >= G4X_WM_LEVEL_SR &&
1376 wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1377 wm_state->fbc_en = false;
1378 else if (level >= G4X_WM_LEVEL_HPLL &&
1379 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1380 wm_state->fbc_en = false;
1381
1382 return 0;
1383}
1384
1385static int g4x_compute_intermediate_wm(struct drm_device *dev,
1386 struct intel_crtc *crtc,
1387 struct intel_crtc_state *crtc_state)
1388{
1389 struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate;
1390 const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal;
1391 const struct g4x_wm_state *active = &crtc->wm.active.g4x;
1392 enum plane_id plane_id;
1393
1394 intermediate->cxsr = optimal->cxsr && active->cxsr &&
1395 !crtc_state->disable_cxsr;
1396 intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1397 !crtc_state->disable_cxsr;
1398 intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1399
1400 for_each_plane_id_on_crtc(crtc, plane_id) {
1401 intermediate->wm.plane[plane_id] =
1402 max(optimal->wm.plane[plane_id],
1403 active->wm.plane[plane_id]);
1404
1405 WARN_ON(intermediate->wm.plane[plane_id] >
1406 g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1407 }
1408
1409 intermediate->sr.plane = max(optimal->sr.plane,
1410 active->sr.plane);
1411 intermediate->sr.cursor = max(optimal->sr.cursor,
1412 active->sr.cursor);
1413 intermediate->sr.fbc = max(optimal->sr.fbc,
1414 active->sr.fbc);
1415
1416 intermediate->hpll.plane = max(optimal->hpll.plane,
1417 active->hpll.plane);
1418 intermediate->hpll.cursor = max(optimal->hpll.cursor,
1419 active->hpll.cursor);
1420 intermediate->hpll.fbc = max(optimal->hpll.fbc,
1421 active->hpll.fbc);
1422
1423 WARN_ON((intermediate->sr.plane >
1424 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1425 intermediate->sr.cursor >
1426 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1427 intermediate->cxsr);
1428 WARN_ON((intermediate->sr.plane >
1429 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1430 intermediate->sr.cursor >
1431 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1432 intermediate->hpll_en);
1433
1434 WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1435 intermediate->fbc_en && intermediate->cxsr);
1436 WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1437 intermediate->fbc_en && intermediate->hpll_en);
1438
1439 /*
1440 * If our intermediate WM are identical to the final WM, then we can
1441 * omit the post-vblank programming; only update if it's different.
1442 */
1443 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1444 crtc_state->wm.need_postvbl_update = true;
1445
1446 return 0;
1447}
1448
1449static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1450 struct g4x_wm_values *wm)
1451{
1452 struct intel_crtc *crtc;
1453 int num_active_crtcs = 0;
1454
1455 wm->cxsr = true;
1456 wm->hpll_en = true;
1457 wm->fbc_en = true;
1458
1459 for_each_intel_crtc(&dev_priv->drm, crtc) {
1460 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1461
1462 if (!crtc->active)
1463 continue;
1464
1465 if (!wm_state->cxsr)
1466 wm->cxsr = false;
1467 if (!wm_state->hpll_en)
1468 wm->hpll_en = false;
1469 if (!wm_state->fbc_en)
1470 wm->fbc_en = false;
1471
1472 num_active_crtcs++;
1473 }
1474
1475 if (num_active_crtcs != 1) {
1476 wm->cxsr = false;
1477 wm->hpll_en = false;
1478 wm->fbc_en = false;
1479 }
1480
1481 for_each_intel_crtc(&dev_priv->drm, crtc) {
1482 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1483 enum pipe pipe = crtc->pipe;
1484
1485 wm->pipe[pipe] = wm_state->wm;
1486 if (crtc->active && wm->cxsr)
1487 wm->sr = wm_state->sr;
1488 if (crtc->active && wm->hpll_en)
1489 wm->hpll = wm_state->hpll;
1490 }
1491}
1492
1493static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1494{
1495 struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1496 struct g4x_wm_values new_wm = {};
1497
1498 g4x_merge_wm(dev_priv, &new_wm);
1499
1500 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1501 return;
1502
1503 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1504 _intel_set_memory_cxsr(dev_priv, false);
1505
1506 g4x_write_wm_values(dev_priv, &new_wm);
1507
1508 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1509 _intel_set_memory_cxsr(dev_priv, true);
1510
1511 *old_wm = new_wm;
1512}
1513
1514static void g4x_initial_watermarks(struct intel_atomic_state *state,
1515 struct intel_crtc_state *crtc_state)
1516{
1517 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1518 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1519
1520 mutex_lock(&dev_priv->wm.wm_mutex);
1521 crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1522 g4x_program_watermarks(dev_priv);
1523 mutex_unlock(&dev_priv->wm.wm_mutex);
1524}
1525
1526static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1527 struct intel_crtc_state *crtc_state)
1528{
1529 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1530 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1531
1532 if (!crtc_state->wm.need_postvbl_update)
1533 return;
1534
1535 mutex_lock(&dev_priv->wm.wm_mutex);
1536 intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1537 g4x_program_watermarks(dev_priv);
1538 mutex_unlock(&dev_priv->wm.wm_mutex);
1539}
1540
988/* latency must be in 0.1us units. */ 1541/* latency must be in 0.1us units. */
989static unsigned int vlv_wm_method2(unsigned int pixel_rate, 1542static unsigned int vlv_wm_method2(unsigned int pixel_rate,
990 unsigned int pipe_htotal, 1543 unsigned int htotal,
991 unsigned int horiz_pixels, 1544 unsigned int width,
992 unsigned int cpp, 1545 unsigned int cpp,
993 unsigned int latency) 1546 unsigned int latency)
994{ 1547{
995 unsigned int ret; 1548 unsigned int ret;
996 1549
997 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 1550 ret = intel_wm_method2(pixel_rate, htotal,
998 ret = (ret + 1) * horiz_pixels * cpp; 1551 width, cpp, latency);
999 ret = DIV_ROUND_UP(ret, 64); 1552 ret = DIV_ROUND_UP(ret, 64);
1000 1553
1001 return ret; 1554 return ret;
@@ -1029,17 +1582,15 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1029 if (dev_priv->wm.pri_latency[level] == 0) 1582 if (dev_priv->wm.pri_latency[level] == 0)
1030 return USHRT_MAX; 1583 return USHRT_MAX;
1031 1584
1032 if (!plane_state->base.visible) 1585 if (!intel_wm_plane_visible(crtc_state, plane_state))
1033 return 0; 1586 return 0;
1034 1587
1035 cpp = plane_state->base.fb->format->cpp[0]; 1588 cpp = plane_state->base.fb->format->cpp[0];
1036 clock = adjusted_mode->crtc_clock; 1589 clock = adjusted_mode->crtc_clock;
1037 htotal = adjusted_mode->crtc_htotal; 1590 htotal = adjusted_mode->crtc_htotal;
1038 width = crtc_state->pipe_src_w; 1591 width = crtc_state->pipe_src_w;
1039 if (WARN_ON(htotal == 0))
1040 htotal = 1;
1041 1592
1042 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1593 if (plane->id == PLANE_CURSOR) {
1043 /* 1594 /*
1044 * FIXME the formula gives values that are 1595 * FIXME the formula gives values that are
1045 * too big for the cursor FIFO, and hence we 1596 * too big for the cursor FIFO, and hence we
@@ -1064,7 +1615,7 @@ static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1064static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) 1615static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1065{ 1616{
1066 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1617 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1067 const struct vlv_pipe_wm *raw = 1618 const struct g4x_pipe_wm *raw =
1068 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; 1619 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1069 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; 1620 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1070 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); 1621 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
@@ -1143,18 +1694,13 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1143 return 0; 1694 return 0;
1144} 1695}
1145 1696
1146static int vlv_num_wm_levels(struct drm_i915_private *dev_priv)
1147{
1148 return dev_priv->wm.max_level + 1;
1149}
1150
1151/* mark all levels starting from 'level' as invalid */ 1697/* mark all levels starting from 'level' as invalid */
1152static void vlv_invalidate_wms(struct intel_crtc *crtc, 1698static void vlv_invalidate_wms(struct intel_crtc *crtc,
1153 struct vlv_wm_state *wm_state, int level) 1699 struct vlv_wm_state *wm_state, int level)
1154{ 1700{
1155 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1701 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1156 1702
1157 for (; level < vlv_num_wm_levels(dev_priv); level++) { 1703 for (; level < intel_wm_num_levels(dev_priv); level++) {
1158 enum plane_id plane_id; 1704 enum plane_id plane_id;
1159 1705
1160 for_each_plane_id_on_crtc(crtc, plane_id) 1706 for_each_plane_id_on_crtc(crtc, plane_id)
@@ -1181,11 +1727,11 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1181 int level, enum plane_id plane_id, u16 value) 1727 int level, enum plane_id plane_id, u16 value)
1182{ 1728{
1183 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1729 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1184 int num_levels = vlv_num_wm_levels(dev_priv); 1730 int num_levels = intel_wm_num_levels(dev_priv);
1185 bool dirty = false; 1731 bool dirty = false;
1186 1732
1187 for (; level < num_levels; level++) { 1733 for (; level < num_levels; level++) {
1188 struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1734 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1189 1735
1190 dirty |= raw->plane[plane_id] != value; 1736 dirty |= raw->plane[plane_id] != value;
1191 raw->plane[plane_id] = value; 1737 raw->plane[plane_id] = value;
@@ -1194,22 +1740,22 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1194 return dirty; 1740 return dirty;
1195} 1741}
1196 1742
1197static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, 1743static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1198 const struct intel_plane_state *plane_state) 1744 const struct intel_plane_state *plane_state)
1199{ 1745{
1200 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 1746 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1201 enum plane_id plane_id = plane->id; 1747 enum plane_id plane_id = plane->id;
1202 int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); 1748 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1203 int level; 1749 int level;
1204 bool dirty = false; 1750 bool dirty = false;
1205 1751
1206 if (!plane_state->base.visible) { 1752 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1207 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); 1753 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1208 goto out; 1754 goto out;
1209 } 1755 }
1210 1756
1211 for (level = 0; level < num_levels; level++) { 1757 for (level = 0; level < num_levels; level++) {
1212 struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1758 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1213 int wm = vlv_compute_wm_level(crtc_state, plane_state, level); 1759 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1214 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; 1760 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1215 1761
@@ -1225,7 +1771,7 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state,
1225 1771
1226out: 1772out:
1227 if (dirty) 1773 if (dirty)
1228 DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", 1774 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1229 plane->base.name, 1775 plane->base.name,
1230 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], 1776 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1231 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], 1777 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
@@ -1234,10 +1780,10 @@ out:
1234 return dirty; 1780 return dirty;
1235} 1781}
1236 1782
1237static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, 1783static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1238 enum plane_id plane_id, int level) 1784 enum plane_id plane_id, int level)
1239{ 1785{
1240 const struct vlv_pipe_wm *raw = 1786 const struct g4x_pipe_wm *raw =
1241 &crtc_state->wm.vlv.raw[level]; 1787 &crtc_state->wm.vlv.raw[level];
1242 const struct vlv_fifo_state *fifo_state = 1788 const struct vlv_fifo_state *fifo_state =
1243 &crtc_state->wm.vlv.fifo_state; 1789 &crtc_state->wm.vlv.fifo_state;
@@ -1245,12 +1791,12 @@ static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1245 return raw->plane[plane_id] <= fifo_state->plane[plane_id]; 1791 return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1246} 1792}
1247 1793
1248static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) 1794static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1249{ 1795{
1250 return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && 1796 return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1251 vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && 1797 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1252 vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && 1798 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1253 vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); 1799 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1254} 1800}
1255 1801
1256static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) 1802static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
@@ -1279,7 +1825,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1279 old_plane_state->base.crtc != &crtc->base) 1825 old_plane_state->base.crtc != &crtc->base)
1280 continue; 1826 continue;
1281 1827
1282 if (vlv_plane_wm_compute(crtc_state, plane_state)) 1828 if (vlv_raw_plane_wm_compute(crtc_state, plane_state))
1283 dirty |= BIT(plane->id); 1829 dirty |= BIT(plane->id);
1284 } 1830 }
1285 1831
@@ -1313,7 +1859,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1313 } 1859 }
1314 1860
1315 /* initially allow all levels */ 1861 /* initially allow all levels */
1316 wm_state->num_levels = vlv_num_wm_levels(dev_priv); 1862 wm_state->num_levels = intel_wm_num_levels(dev_priv);
1317 /* 1863 /*
1318 * Note that enabling cxsr with no primary/sprite planes 1864 * Note that enabling cxsr with no primary/sprite planes
1319 * enabled can wedge the pipe. Hence we only allow cxsr 1865 * enabled can wedge the pipe. Hence we only allow cxsr
@@ -1322,10 +1868,10 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1322 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; 1868 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1323 1869
1324 for (level = 0; level < wm_state->num_levels; level++) { 1870 for (level = 0; level < wm_state->num_levels; level++) {
1325 const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1871 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1326 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; 1872 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1327 1873
1328 if (!vlv_crtc_wm_is_valid(crtc_state, level)) 1874 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1329 break; 1875 break;
1330 1876
1331 for_each_plane_id_on_crtc(crtc, plane_id) { 1877 for_each_plane_id_on_crtc(crtc, plane_id) {
@@ -1539,16 +2085,6 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv,
1539 } 2085 }
1540} 2086}
1541 2087
1542static bool is_disabling(int old, int new, int threshold)
1543{
1544 return old >= threshold && new < threshold;
1545}
1546
1547static bool is_enabling(int old, int new, int threshold)
1548{
1549 return old < threshold && new >= threshold;
1550}
1551
1552static void vlv_program_watermarks(struct drm_i915_private *dev_priv) 2088static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
1553{ 2089{
1554 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; 2090 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
@@ -1609,65 +2145,6 @@ static void vlv_optimize_watermarks(struct intel_atomic_state *state,
1609 mutex_unlock(&dev_priv->wm.wm_mutex); 2145 mutex_unlock(&dev_priv->wm.wm_mutex);
1610} 2146}
1611 2147
1612#define single_plane_enabled(mask) is_power_of_2(mask)
1613
1614static void g4x_update_wm(struct intel_crtc *crtc)
1615{
1616 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1617 static const int sr_latency_ns = 12000;
1618 int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1619 int plane_sr, cursor_sr;
1620 unsigned int enabled = 0;
1621 bool cxsr_enabled;
1622
1623 if (g4x_compute_wm0(dev_priv, PIPE_A,
1624 &g4x_wm_info, pessimal_latency_ns,
1625 &g4x_cursor_wm_info, pessimal_latency_ns,
1626 &planea_wm, &cursora_wm))
1627 enabled |= 1 << PIPE_A;
1628
1629 if (g4x_compute_wm0(dev_priv, PIPE_B,
1630 &g4x_wm_info, pessimal_latency_ns,
1631 &g4x_cursor_wm_info, pessimal_latency_ns,
1632 &planeb_wm, &cursorb_wm))
1633 enabled |= 1 << PIPE_B;
1634
1635 if (single_plane_enabled(enabled) &&
1636 g4x_compute_srwm(dev_priv, ffs(enabled) - 1,
1637 sr_latency_ns,
1638 &g4x_wm_info,
1639 &g4x_cursor_wm_info,
1640 &plane_sr, &cursor_sr)) {
1641 cxsr_enabled = true;
1642 } else {
1643 cxsr_enabled = false;
1644 intel_set_memory_cxsr(dev_priv, false);
1645 plane_sr = cursor_sr = 0;
1646 }
1647
1648 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1649 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1650 planea_wm, cursora_wm,
1651 planeb_wm, cursorb_wm,
1652 plane_sr, cursor_sr);
1653
1654 I915_WRITE(DSPFW1,
1655 FW_WM(plane_sr, SR) |
1656 FW_WM(cursorb_wm, CURSORB) |
1657 FW_WM(planeb_wm, PLANEB) |
1658 FW_WM(planea_wm, PLANEA));
1659 I915_WRITE(DSPFW2,
1660 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1661 FW_WM(cursora_wm, CURSORA));
1662 /* HPLL off in SR has some issues on G4x... disable it */
1663 I915_WRITE(DSPFW3,
1664 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1665 FW_WM(cursor_sr, CURSOR_SR));
1666
1667 if (cxsr_enabled)
1668 intel_set_memory_cxsr(dev_priv, true);
1669}
1670
1671static void i965_update_wm(struct intel_crtc *unused_crtc) 2148static void i965_update_wm(struct intel_crtc *unused_crtc)
1672{ 2149{
1673 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 2150 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
@@ -1689,14 +2166,10 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
1689 int htotal = adjusted_mode->crtc_htotal; 2166 int htotal = adjusted_mode->crtc_htotal;
1690 int hdisplay = crtc->config->pipe_src_w; 2167 int hdisplay = crtc->config->pipe_src_w;
1691 int cpp = fb->format->cpp[0]; 2168 int cpp = fb->format->cpp[0];
1692 unsigned long line_time_us;
1693 int entries; 2169 int entries;
1694 2170
1695 line_time_us = max(htotal * 1000 / clock, 1); 2171 entries = intel_wm_method2(clock, htotal,
1696 2172 hdisplay, cpp, sr_latency_ns / 100);
1697 /* Use ns/us then divide to preserve precision */
1698 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1699 cpp * hdisplay;
1700 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 2173 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1701 srwm = I965_FIFO_SIZE - entries; 2174 srwm = I965_FIFO_SIZE - entries;
1702 if (srwm < 0) 2175 if (srwm < 0)
@@ -1705,13 +2178,14 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
1705 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 2178 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1706 entries, srwm); 2179 entries, srwm);
1707 2180
1708 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 2181 entries = intel_wm_method2(clock, htotal,
1709 cpp * crtc->base.cursor->state->crtc_w; 2182 crtc->base.cursor->state->crtc_w, 4,
2183 sr_latency_ns / 100);
1710 entries = DIV_ROUND_UP(entries, 2184 entries = DIV_ROUND_UP(entries,
1711 i965_cursor_wm_info.cacheline_size); 2185 i965_cursor_wm_info.cacheline_size) +
1712 cursor_sr = i965_cursor_wm_info.fifo_size - 2186 i965_cursor_wm_info.guard_size;
1713 (entries + i965_cursor_wm_info.guard_size);
1714 2187
2188 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
1715 if (cursor_sr > i965_cursor_wm_info.max_wm) 2189 if (cursor_sr > i965_cursor_wm_info.max_wm)
1716 cursor_sr = i965_cursor_wm_info.max_wm; 2190 cursor_sr = i965_cursor_wm_info.max_wm;
1717 2191
@@ -1848,7 +2322,6 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
1848 int htotal = adjusted_mode->crtc_htotal; 2322 int htotal = adjusted_mode->crtc_htotal;
1849 int hdisplay = enabled->config->pipe_src_w; 2323 int hdisplay = enabled->config->pipe_src_w;
1850 int cpp; 2324 int cpp;
1851 unsigned long line_time_us;
1852 int entries; 2325 int entries;
1853 2326
1854 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) 2327 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
@@ -1856,11 +2329,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
1856 else 2329 else
1857 cpp = fb->format->cpp[0]; 2330 cpp = fb->format->cpp[0];
1858 2331
1859 line_time_us = max(htotal * 1000 / clock, 1); 2332 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
1860 2333 sr_latency_ns / 100);
1861 /* Use ns/us then divide to preserve precision */
1862 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1863 cpp * hdisplay;
1864 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 2334 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1865 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 2335 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1866 srwm = wm_info->fifo_size - entries; 2336 srwm = wm_info->fifo_size - entries;
@@ -1917,34 +2387,31 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
1917} 2387}
1918 2388
1919/* latency must be in 0.1us units. */ 2389/* latency must be in 0.1us units. */
1920static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 2390static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2391 unsigned int cpp,
2392 unsigned int latency)
1921{ 2393{
1922 uint64_t ret; 2394 unsigned int ret;
1923
1924 if (WARN(latency == 0, "Latency value missing\n"))
1925 return UINT_MAX;
1926 2395
1927 ret = (uint64_t) pixel_rate * cpp * latency; 2396 ret = intel_wm_method1(pixel_rate, cpp, latency);
1928 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; 2397 ret = DIV_ROUND_UP(ret, 64) + 2;
1929 2398
1930 return ret; 2399 return ret;
1931} 2400}
1932 2401
1933/* latency must be in 0.1us units. */ 2402/* latency must be in 0.1us units. */
1934static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 2403static unsigned int ilk_wm_method2(unsigned int pixel_rate,
1935 uint32_t horiz_pixels, uint8_t cpp, 2404 unsigned int htotal,
1936 uint32_t latency) 2405 unsigned int width,
2406 unsigned int cpp,
2407 unsigned int latency)
1937{ 2408{
1938 uint32_t ret; 2409 unsigned int ret;
1939
1940 if (WARN(latency == 0, "Latency value missing\n"))
1941 return UINT_MAX;
1942 if (WARN_ON(!pipe_htotal))
1943 return UINT_MAX;
1944 2410
1945 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 2411 ret = intel_wm_method2(pixel_rate, htotal,
1946 ret = (ret + 1) * horiz_pixels * cpp; 2412 width, cpp, latency);
1947 ret = DIV_ROUND_UP(ret, 64) + 2; 2413 ret = DIV_ROUND_UP(ret, 64) + 2;
2414
1948 return ret; 2415 return ret;
1949} 2416}
1950 2417
@@ -3360,26 +3827,27 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
3360 * Return value is provided in 16.16 fixed point form to retain fractional part. 3827 * Return value is provided in 16.16 fixed point form to retain fractional part.
3361 * Caller should take care of dividing & rounding off the value. 3828 * Caller should take care of dividing & rounding off the value.
3362 */ 3829 */
3363static uint32_t 3830static uint_fixed_16_16_t
3364skl_plane_downscale_amount(const struct intel_crtc_state *cstate, 3831skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
3365 const struct intel_plane_state *pstate) 3832 const struct intel_plane_state *pstate)
3366{ 3833{
3367 struct intel_plane *plane = to_intel_plane(pstate->base.plane); 3834 struct intel_plane *plane = to_intel_plane(pstate->base.plane);
3368 uint32_t downscale_h, downscale_w;
3369 uint32_t src_w, src_h, dst_w, dst_h; 3835 uint32_t src_w, src_h, dst_w, dst_h;
3836 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
3837 uint_fixed_16_16_t downscale_h, downscale_w;
3370 3838
3371 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 3839 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
3372 return DRM_PLANE_HELPER_NO_SCALING; 3840 return u32_to_fixed_16_16(0);
3373 3841
3374 /* n.b., src is 16.16 fixed point, dst is whole integer */ 3842 /* n.b., src is 16.16 fixed point, dst is whole integer */
3375 if (plane->id == PLANE_CURSOR) { 3843 if (plane->id == PLANE_CURSOR) {
3376 src_w = pstate->base.src_w; 3844 src_w = pstate->base.src_w >> 16;
3377 src_h = pstate->base.src_h; 3845 src_h = pstate->base.src_h >> 16;
3378 dst_w = pstate->base.crtc_w; 3846 dst_w = pstate->base.crtc_w;
3379 dst_h = pstate->base.crtc_h; 3847 dst_h = pstate->base.crtc_h;
3380 } else { 3848 } else {
3381 src_w = drm_rect_width(&pstate->base.src); 3849 src_w = drm_rect_width(&pstate->base.src) >> 16;
3382 src_h = drm_rect_height(&pstate->base.src); 3850 src_h = drm_rect_height(&pstate->base.src) >> 16;
3383 dst_w = drm_rect_width(&pstate->base.dst); 3851 dst_w = drm_rect_width(&pstate->base.dst);
3384 dst_h = drm_rect_height(&pstate->base.dst); 3852 dst_h = drm_rect_height(&pstate->base.dst);
3385 } 3853 }
@@ -3387,11 +3855,12 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
3387 if (drm_rotation_90_or_270(pstate->base.rotation)) 3855 if (drm_rotation_90_or_270(pstate->base.rotation))
3388 swap(dst_w, dst_h); 3856 swap(dst_w, dst_h);
3389 3857
3390 downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); 3858 fp_w_ratio = fixed_16_16_div(src_w, dst_w);
3391 downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); 3859 fp_h_ratio = fixed_16_16_div(src_h, dst_h);
3860 downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1));
3861 downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1));
3392 3862
3393 /* Provide result in 16.16 fixed point */ 3863 return mul_fixed16(downscale_w, downscale_h);
3394 return (uint64_t)downscale_w * downscale_h >> 16;
3395} 3864}
3396 3865
3397static unsigned int 3866static unsigned int
@@ -3401,10 +3870,11 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
3401{ 3870{
3402 struct intel_plane *plane = to_intel_plane(pstate->plane); 3871 struct intel_plane *plane = to_intel_plane(pstate->plane);
3403 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 3872 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
3404 uint32_t down_scale_amount, data_rate; 3873 uint32_t data_rate;
3405 uint32_t width = 0, height = 0; 3874 uint32_t width = 0, height = 0;
3406 struct drm_framebuffer *fb; 3875 struct drm_framebuffer *fb;
3407 u32 format; 3876 u32 format;
3877 uint_fixed_16_16_t down_scale_amount;
3408 3878
3409 if (!intel_pstate->base.visible) 3879 if (!intel_pstate->base.visible)
3410 return 0; 3880 return 0;
@@ -3438,7 +3908,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
3438 3908
3439 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); 3909 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
3440 3910
3441 return (uint64_t)data_rate * down_scale_amount >> 16; 3911 return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
3442} 3912}
3443 3913
3444/* 3914/*
@@ -3587,6 +4057,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
3587 int num_active; 4057 int num_active;
3588 unsigned plane_data_rate[I915_MAX_PLANES] = {}; 4058 unsigned plane_data_rate[I915_MAX_PLANES] = {};
3589 unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; 4059 unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
4060 uint16_t total_min_blocks = 0;
3590 4061
3591 /* Clear the partitioning for disabled planes. */ 4062 /* Clear the partitioning for disabled planes. */
3592 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 4063 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
@@ -3602,10 +4073,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
3602 4073
3603 skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); 4074 skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
3604 alloc_size = skl_ddb_entry_size(alloc); 4075 alloc_size = skl_ddb_entry_size(alloc);
3605 if (alloc_size == 0) { 4076 if (alloc_size == 0)
3606 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
3607 return 0; 4077 return 0;
3608 }
3609 4078
3610 skl_ddb_calc_min(cstate, num_active, minimum, y_minimum); 4079 skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
3611 4080
@@ -3616,10 +4085,18 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
3616 */ 4085 */
3617 4086
3618 for_each_plane_id_on_crtc(intel_crtc, plane_id) { 4087 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
3619 alloc_size -= minimum[plane_id]; 4088 total_min_blocks += minimum[plane_id];
3620 alloc_size -= y_minimum[plane_id]; 4089 total_min_blocks += y_minimum[plane_id];
3621 } 4090 }
3622 4091
4092 if (total_min_blocks > alloc_size) {
4093 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4094 DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
4095 alloc_size);
4096 return -EINVAL;
4097 }
4098
4099 alloc_size -= total_min_blocks;
3623 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; 4100 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
3624 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; 4101 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
3625 4102
@@ -3698,7 +4175,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
3698 return FP_16_16_MAX; 4175 return FP_16_16_MAX;
3699 4176
3700 wm_intermediate_val = latency * pixel_rate * cpp; 4177 wm_intermediate_val = latency * pixel_rate * cpp;
3701 ret = fixed_16_16_div_round_up_u64(wm_intermediate_val, 1000 * 512); 4178 ret = fixed_16_16_div_u64(wm_intermediate_val, 1000 * 512);
3702 return ret; 4179 return ret;
3703} 4180}
3704 4181
@@ -3720,12 +4197,33 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
3720 return ret; 4197 return ret;
3721} 4198}
3722 4199
3723static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, 4200static uint_fixed_16_16_t
3724 struct intel_plane_state *pstate) 4201intel_get_linetime_us(struct intel_crtc_state *cstate)
4202{
4203 uint32_t pixel_rate;
4204 uint32_t crtc_htotal;
4205 uint_fixed_16_16_t linetime_us;
4206
4207 if (!cstate->base.active)
4208 return u32_to_fixed_16_16(0);
4209
4210 pixel_rate = cstate->pixel_rate;
4211
4212 if (WARN_ON(pixel_rate == 0))
4213 return u32_to_fixed_16_16(0);
4214
4215 crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4216 linetime_us = fixed_16_16_div_u64(crtc_htotal * 1000, pixel_rate);
4217
4218 return linetime_us;
4219}
4220
4221static uint32_t
4222skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4223 const struct intel_plane_state *pstate)
3725{ 4224{
3726 uint64_t adjusted_pixel_rate; 4225 uint64_t adjusted_pixel_rate;
3727 uint64_t downscale_amount; 4226 uint_fixed_16_16_t downscale_amount;
3728 uint64_t pixel_rate;
3729 4227
3730 /* Shouldn't reach here on disabled planes... */ 4228 /* Shouldn't reach here on disabled planes... */
3731 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 4229 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
@@ -3738,15 +4236,13 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
3738 adjusted_pixel_rate = cstate->pixel_rate; 4236 adjusted_pixel_rate = cstate->pixel_rate;
3739 downscale_amount = skl_plane_downscale_amount(cstate, pstate); 4237 downscale_amount = skl_plane_downscale_amount(cstate, pstate);
3740 4238
3741 pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; 4239 return mul_round_up_u32_fixed16(adjusted_pixel_rate,
3742 WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); 4240 downscale_amount);
3743
3744 return pixel_rate;
3745} 4241}
3746 4242
3747static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 4243static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3748 struct intel_crtc_state *cstate, 4244 struct intel_crtc_state *cstate,
3749 struct intel_plane_state *intel_pstate, 4245 const struct intel_plane_state *intel_pstate,
3750 uint16_t ddb_allocation, 4246 uint16_t ddb_allocation,
3751 int level, 4247 int level,
3752 uint16_t *out_blocks, /* out */ 4248 uint16_t *out_blocks, /* out */
@@ -3754,8 +4250,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3754 bool *enabled /* out */) 4250 bool *enabled /* out */)
3755{ 4251{
3756 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); 4252 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
3757 struct drm_plane_state *pstate = &intel_pstate->base; 4253 const struct drm_plane_state *pstate = &intel_pstate->base;
3758 struct drm_framebuffer *fb = pstate->fb; 4254 const struct drm_framebuffer *fb = pstate->fb;
3759 uint32_t latency = dev_priv->wm.skl_latency[level]; 4255 uint32_t latency = dev_priv->wm.skl_latency[level];
3760 uint_fixed_16_16_t method1, method2; 4256 uint_fixed_16_16_t method1, method2;
3761 uint_fixed_16_16_t plane_blocks_per_line; 4257 uint_fixed_16_16_t plane_blocks_per_line;
@@ -3834,8 +4330,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3834 if (y_tiled) { 4330 if (y_tiled) {
3835 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * 4331 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
3836 y_min_scanlines, 512); 4332 y_min_scanlines, 512);
3837 plane_blocks_per_line = 4333 plane_blocks_per_line = fixed_16_16_div(interm_pbpl,
3838 fixed_16_16_div_round_up(interm_pbpl, y_min_scanlines); 4334 y_min_scanlines);
3839 } else if (x_tiled) { 4335 } else if (x_tiled) {
3840 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); 4336 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
3841 plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); 4337 plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl);
@@ -3856,19 +4352,25 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3856 if (y_tiled) { 4352 if (y_tiled) {
3857 selected_result = max_fixed_16_16(method2, y_tile_minimum); 4353 selected_result = max_fixed_16_16(method2, y_tile_minimum);
3858 } else { 4354 } else {
4355 uint32_t linetime_us;
4356
4357 linetime_us = fixed_16_16_to_u32_round_up(
4358 intel_get_linetime_us(cstate));
3859 if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && 4359 if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
3860 (plane_bytes_per_line / 512 < 1)) 4360 (plane_bytes_per_line / 512 < 1))
3861 selected_result = method2; 4361 selected_result = method2;
3862 else if ((ddb_allocation / 4362 else if ((ddb_allocation && ddb_allocation /
3863 fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1) 4363 fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1)
3864 selected_result = min_fixed_16_16(method1, method2); 4364 selected_result = min_fixed_16_16(method1, method2);
4365 else if (latency >= linetime_us)
4366 selected_result = min_fixed_16_16(method1, method2);
3865 else 4367 else
3866 selected_result = method1; 4368 selected_result = method1;
3867 } 4369 }
3868 4370
3869 res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1; 4371 res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1;
3870 res_lines = DIV_ROUND_UP(selected_result.val, 4372 res_lines = div_round_up_fixed16(selected_result,
3871 plane_blocks_per_line.val); 4373 plane_blocks_per_line);
3872 4374
3873 if (level >= 1 && level <= 7) { 4375 if (level >= 1 && level <= 7) {
3874 if (y_tiled) { 4376 if (y_tiled) {
@@ -3907,54 +4409,39 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3907} 4409}
3908 4410
3909static int 4411static int
3910skl_compute_wm_level(const struct drm_i915_private *dev_priv, 4412skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
3911 struct skl_ddb_allocation *ddb, 4413 struct skl_ddb_allocation *ddb,
3912 struct intel_crtc_state *cstate, 4414 struct intel_crtc_state *cstate,
3913 struct intel_plane *intel_plane, 4415 const struct intel_plane_state *intel_pstate,
3914 int level, 4416 struct skl_plane_wm *wm)
3915 struct skl_wm_level *result)
3916{ 4417{
3917 struct drm_atomic_state *state = cstate->base.state;
3918 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4418 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3919 struct drm_plane *plane = &intel_plane->base; 4419 struct drm_plane *plane = intel_pstate->base.plane;
3920 struct intel_plane_state *intel_pstate = NULL; 4420 struct intel_plane *intel_plane = to_intel_plane(plane);
3921 uint16_t ddb_blocks; 4421 uint16_t ddb_blocks;
3922 enum pipe pipe = intel_crtc->pipe; 4422 enum pipe pipe = intel_crtc->pipe;
4423 int level, max_level = ilk_wm_max_level(dev_priv);
3923 int ret; 4424 int ret;
3924 4425
3925 if (state) 4426 if (WARN_ON(!intel_pstate->base.fb))
3926 intel_pstate = 4427 return -EINVAL;
3927 intel_atomic_get_existing_plane_state(state,
3928 intel_plane);
3929
3930 /*
3931 * Note: If we start supporting multiple pending atomic commits against
3932 * the same planes/CRTC's in the future, plane->state will no longer be
3933 * the correct pre-state to use for the calculations here and we'll
3934 * need to change where we get the 'unchanged' plane data from.
3935 *
3936 * For now this is fine because we only allow one queued commit against
3937 * a CRTC. Even if the plane isn't modified by this transaction and we
3938 * don't have a plane lock, we still have the CRTC's lock, so we know
3939 * that no other transactions are racing with us to update it.
3940 */
3941 if (!intel_pstate)
3942 intel_pstate = to_intel_plane_state(plane->state);
3943
3944 WARN_ON(!intel_pstate->base.fb);
3945 4428
3946 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); 4429 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
3947 4430
3948 ret = skl_compute_plane_wm(dev_priv, 4431 for (level = 0; level <= max_level; level++) {
3949 cstate, 4432 struct skl_wm_level *result = &wm->wm[level];
3950 intel_pstate, 4433
3951 ddb_blocks, 4434 ret = skl_compute_plane_wm(dev_priv,
3952 level, 4435 cstate,
3953 &result->plane_res_b, 4436 intel_pstate,
3954 &result->plane_res_l, 4437 ddb_blocks,
3955 &result->plane_en); 4438 level,
3956 if (ret) 4439 &result->plane_res_b,
3957 return ret; 4440 &result->plane_res_l,
4441 &result->plane_en);
4442 if (ret)
4443 return ret;
4444 }
3958 4445
3959 return 0; 4446 return 0;
3960} 4447}
@@ -3964,19 +4451,16 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate)
3964{ 4451{
3965 struct drm_atomic_state *state = cstate->base.state; 4452 struct drm_atomic_state *state = cstate->base.state;
3966 struct drm_i915_private *dev_priv = to_i915(state->dev); 4453 struct drm_i915_private *dev_priv = to_i915(state->dev);
3967 uint32_t pixel_rate; 4454 uint_fixed_16_16_t linetime_us;
3968 uint32_t linetime_wm; 4455 uint32_t linetime_wm;
3969 4456
3970 if (!cstate->base.active) 4457 linetime_us = intel_get_linetime_us(cstate);
3971 return 0;
3972 4458
3973 pixel_rate = cstate->pixel_rate; 4459 if (is_fixed16_zero(linetime_us))
3974
3975 if (WARN_ON(pixel_rate == 0))
3976 return 0; 4460 return 0;
3977 4461
3978 linetime_wm = DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 4462 linetime_wm = fixed_16_16_to_u32_round_up(mul_u32_fixed_16_16(8,
3979 1000, pixel_rate); 4463 linetime_us));
3980 4464
3981 /* Display WA #1135: bxt. */ 4465 /* Display WA #1135: bxt. */
3982 if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled) 4466 if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
@@ -4000,10 +4484,11 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4000 struct skl_pipe_wm *pipe_wm) 4484 struct skl_pipe_wm *pipe_wm)
4001{ 4485{
4002 struct drm_device *dev = cstate->base.crtc->dev; 4486 struct drm_device *dev = cstate->base.crtc->dev;
4487 struct drm_crtc_state *crtc_state = &cstate->base;
4003 const struct drm_i915_private *dev_priv = to_i915(dev); 4488 const struct drm_i915_private *dev_priv = to_i915(dev);
4004 struct intel_plane *intel_plane; 4489 struct drm_plane *plane;
4490 const struct drm_plane_state *pstate;
4005 struct skl_plane_wm *wm; 4491 struct skl_plane_wm *wm;
4006 int level, max_level = ilk_wm_max_level(dev_priv);
4007 int ret; 4492 int ret;
4008 4493
4009 /* 4494 /*
@@ -4012,18 +4497,17 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4012 */ 4497 */
4013 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); 4498 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4014 4499
4015 for_each_intel_plane_mask(&dev_priv->drm, 4500 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4016 intel_plane, 4501 const struct intel_plane_state *intel_pstate =
4017 cstate->base.plane_mask) { 4502 to_intel_plane_state(pstate);
4018 wm = &pipe_wm->planes[intel_plane->id]; 4503 enum plane_id plane_id = to_intel_plane(plane)->id;
4019 4504
4020 for (level = 0; level <= max_level; level++) { 4505 wm = &pipe_wm->planes[plane_id];
4021 ret = skl_compute_wm_level(dev_priv, ddb, cstate, 4506
4022 intel_plane, level, 4507 ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4023 &wm->wm[level]); 4508 intel_pstate, wm);
4024 if (ret) 4509 if (ret)
4025 return ret; 4510 return ret;
4026 }
4027 skl_compute_transition_wm(cstate, &wm->trans_wm); 4511 skl_compute_transition_wm(cstate, &wm->trans_wm);
4028 } 4512 }
4029 pipe_wm->linetime = skl_compute_linetime_wm(cstate); 4513 pipe_wm->linetime = skl_compute_linetime_wm(cstate);
@@ -4654,6 +5138,32 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
4654#define _FW_WM_VLV(value, plane) \ 5138#define _FW_WM_VLV(value, plane) \
4655 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) 5139 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
4656 5140
5141static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5142 struct g4x_wm_values *wm)
5143{
5144 uint32_t tmp;
5145
5146 tmp = I915_READ(DSPFW1);
5147 wm->sr.plane = _FW_WM(tmp, SR);
5148 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5149 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5150 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5151
5152 tmp = I915_READ(DSPFW2);
5153 wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5154 wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5155 wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5156 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5157 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5158 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5159
5160 tmp = I915_READ(DSPFW3);
5161 wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5162 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5163 wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5164 wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5165}
5166
4657static void vlv_read_wm_values(struct drm_i915_private *dev_priv, 5167static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
4658 struct vlv_wm_values *wm) 5168 struct vlv_wm_values *wm)
4659{ 5169{
@@ -4730,6 +5240,147 @@ static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
4730#undef _FW_WM 5240#undef _FW_WM
4731#undef _FW_WM_VLV 5241#undef _FW_WM_VLV
4732 5242
5243void g4x_wm_get_hw_state(struct drm_device *dev)
5244{
5245 struct drm_i915_private *dev_priv = to_i915(dev);
5246 struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5247 struct intel_crtc *crtc;
5248
5249 g4x_read_wm_values(dev_priv, wm);
5250
5251 wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5252
5253 for_each_intel_crtc(dev, crtc) {
5254 struct intel_crtc_state *crtc_state =
5255 to_intel_crtc_state(crtc->base.state);
5256 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5257 struct g4x_pipe_wm *raw;
5258 enum pipe pipe = crtc->pipe;
5259 enum plane_id plane_id;
5260 int level, max_level;
5261
5262 active->cxsr = wm->cxsr;
5263 active->hpll_en = wm->hpll_en;
5264 active->fbc_en = wm->fbc_en;
5265
5266 active->sr = wm->sr;
5267 active->hpll = wm->hpll;
5268
5269 for_each_plane_id_on_crtc(crtc, plane_id) {
5270 active->wm.plane[plane_id] =
5271 wm->pipe[pipe].plane[plane_id];
5272 }
5273
5274 if (wm->cxsr && wm->hpll_en)
5275 max_level = G4X_WM_LEVEL_HPLL;
5276 else if (wm->cxsr)
5277 max_level = G4X_WM_LEVEL_SR;
5278 else
5279 max_level = G4X_WM_LEVEL_NORMAL;
5280
5281 level = G4X_WM_LEVEL_NORMAL;
5282 raw = &crtc_state->wm.g4x.raw[level];
5283 for_each_plane_id_on_crtc(crtc, plane_id)
5284 raw->plane[plane_id] = active->wm.plane[plane_id];
5285
5286 if (++level > max_level)
5287 goto out;
5288
5289 raw = &crtc_state->wm.g4x.raw[level];
5290 raw->plane[PLANE_PRIMARY] = active->sr.plane;
5291 raw->plane[PLANE_CURSOR] = active->sr.cursor;
5292 raw->plane[PLANE_SPRITE0] = 0;
5293 raw->fbc = active->sr.fbc;
5294
5295 if (++level > max_level)
5296 goto out;
5297
5298 raw = &crtc_state->wm.g4x.raw[level];
5299 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5300 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5301 raw->plane[PLANE_SPRITE0] = 0;
5302 raw->fbc = active->hpll.fbc;
5303
5304 out:
5305 for_each_plane_id_on_crtc(crtc, plane_id)
5306 g4x_raw_plane_wm_set(crtc_state, level,
5307 plane_id, USHRT_MAX);
5308 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
5309
5310 crtc_state->wm.g4x.optimal = *active;
5311 crtc_state->wm.g4x.intermediate = *active;
5312
5313 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5314 pipe_name(pipe),
5315 wm->pipe[pipe].plane[PLANE_PRIMARY],
5316 wm->pipe[pipe].plane[PLANE_CURSOR],
5317 wm->pipe[pipe].plane[PLANE_SPRITE0]);
5318 }
5319
5320 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5321 wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
5322 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5323 wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
5324 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5325 yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
5326}
5327
5328void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
5329{
5330 struct intel_plane *plane;
5331 struct intel_crtc *crtc;
5332
5333 mutex_lock(&dev_priv->wm.wm_mutex);
5334
5335 for_each_intel_plane(&dev_priv->drm, plane) {
5336 struct intel_crtc *crtc =
5337 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5338 struct intel_crtc_state *crtc_state =
5339 to_intel_crtc_state(crtc->base.state);
5340 struct intel_plane_state *plane_state =
5341 to_intel_plane_state(plane->base.state);
5342 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
5343 enum plane_id plane_id = plane->id;
5344 int level;
5345
5346 if (plane_state->base.visible)
5347 continue;
5348
5349 for (level = 0; level < 3; level++) {
5350 struct g4x_pipe_wm *raw =
5351 &crtc_state->wm.g4x.raw[level];
5352
5353 raw->plane[plane_id] = 0;
5354 wm_state->wm.plane[plane_id] = 0;
5355 }
5356
5357 if (plane_id == PLANE_PRIMARY) {
5358 for (level = 0; level < 3; level++) {
5359 struct g4x_pipe_wm *raw =
5360 &crtc_state->wm.g4x.raw[level];
5361 raw->fbc = 0;
5362 }
5363
5364 wm_state->sr.fbc = 0;
5365 wm_state->hpll.fbc = 0;
5366 wm_state->fbc_en = false;
5367 }
5368 }
5369
5370 for_each_intel_crtc(&dev_priv->drm, crtc) {
5371 struct intel_crtc_state *crtc_state =
5372 to_intel_crtc_state(crtc->base.state);
5373
5374 crtc_state->wm.g4x.intermediate =
5375 crtc_state->wm.g4x.optimal;
5376 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
5377 }
5378
5379 g4x_program_watermarks(dev_priv);
5380
5381 mutex_unlock(&dev_priv->wm.wm_mutex);
5382}
5383
4733void vlv_wm_get_hw_state(struct drm_device *dev) 5384void vlv_wm_get_hw_state(struct drm_device *dev)
4734{ 5385{
4735 struct drm_i915_private *dev_priv = to_i915(dev); 5386 struct drm_i915_private *dev_priv = to_i915(dev);
@@ -4792,7 +5443,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
4792 active->cxsr = wm->cxsr; 5443 active->cxsr = wm->cxsr;
4793 5444
4794 for (level = 0; level < active->num_levels; level++) { 5445 for (level = 0; level < active->num_levels; level++) {
4795 struct vlv_pipe_wm *raw = 5446 struct g4x_pipe_wm *raw =
4796 &crtc_state->wm.vlv.raw[level]; 5447 &crtc_state->wm.vlv.raw[level];
4797 5448
4798 active->sr[level].plane = wm->sr.plane; 5449 active->sr[level].plane = wm->sr.plane;
@@ -4852,7 +5503,7 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
4852 continue; 5503 continue;
4853 5504
4854 for (level = 0; level < wm_state->num_levels; level++) { 5505 for (level = 0; level < wm_state->num_levels; level++) {
4855 struct vlv_pipe_wm *raw = 5506 struct g4x_pipe_wm *raw =
4856 &crtc_state->wm.vlv.raw[level]; 5507 &crtc_state->wm.vlv.raw[level];
4857 5508
4858 raw->plane[plane_id] = 0; 5509 raw->plane[plane_id] = 0;
@@ -8036,6 +8687,12 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
8036 dev_priv->display.initial_watermarks = vlv_initial_watermarks; 8687 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
8037 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; 8688 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
8038 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; 8689 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
8690 } else if (IS_G4X(dev_priv)) {
8691 g4x_setup_wm_latency(dev_priv);
8692 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
8693 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
8694 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
8695 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
8039 } else if (IS_PINEVIEW(dev_priv)) { 8696 } else if (IS_PINEVIEW(dev_priv)) {
8040 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 8697 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
8041 dev_priv->is_ddr3, 8698 dev_priv->is_ddr3,
@@ -8051,8 +8708,6 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
8051 dev_priv->display.update_wm = NULL; 8708 dev_priv->display.update_wm = NULL;
8052 } else 8709 } else
8053 dev_priv->display.update_wm = pineview_update_wm; 8710 dev_priv->display.update_wm = pineview_update_wm;
8054 } else if (IS_G4X(dev_priv)) {
8055 dev_priv->display.update_wm = g4x_update_wm;
8056 } else if (IS_GEN4(dev_priv)) { 8711 } else if (IS_GEN4(dev_priv)) {
8057 dev_priv->display.update_wm = i965_update_wm; 8712 dev_priv->display.update_wm = i965_update_wm;
8058 } else if (IS_GEN3(dev_priv)) { 8713 } else if (IS_GEN3(dev_priv)) {
@@ -8135,9 +8790,9 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
8135 I915_WRITE_FW(GEN6_PCODE_DATA1, 0); 8790 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
8136 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 8791 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
8137 8792
8138 if (intel_wait_for_register_fw(dev_priv, 8793 if (__intel_wait_for_register_fw(dev_priv,
8139 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 8794 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
8140 500)) { 8795 500, 0, NULL)) {
8141 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); 8796 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
8142 return -ETIMEDOUT; 8797 return -ETIMEDOUT;
8143 } 8798 }
@@ -8180,9 +8835,9 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
8180 I915_WRITE_FW(GEN6_PCODE_DATA1, 0); 8835 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
8181 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 8836 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
8182 8837
8183 if (intel_wait_for_register_fw(dev_priv, 8838 if (__intel_wait_for_register_fw(dev_priv,
8184 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 8839 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
8185 500)) { 8840 500, 0, NULL)) {
8186 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); 8841 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
8187 return -ETIMEDOUT; 8842 return -ETIMEDOUT;
8188 } 8843 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66a2b8b83972..acd1da9b62a3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -39,17 +39,27 @@
39 */ 39 */
40#define LEGACY_REQUEST_SIZE 200 40#define LEGACY_REQUEST_SIZE 200
41 41
42static int __intel_ring_space(int head, int tail, int size) 42static unsigned int __intel_ring_space(unsigned int head,
43 unsigned int tail,
44 unsigned int size)
43{ 45{
44 int space = head - tail; 46 /*
45 if (space <= 0) 47 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
46 space += size; 48 * same cacheline, the Head Pointer must not be greater than the Tail
47 return space - I915_RING_FREE_SPACE; 49 * Pointer."
50 */
51 GEM_BUG_ON(!is_power_of_2(size));
52 return (head - tail - CACHELINE_BYTES) & (size - 1);
48} 53}
49 54
50void intel_ring_update_space(struct intel_ring *ring) 55unsigned int intel_ring_update_space(struct intel_ring *ring)
51{ 56{
52 ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); 57 unsigned int space;
58
59 space = __intel_ring_space(ring->head, ring->emit, ring->size);
60
61 ring->space = space;
62 return space;
53} 63}
54 64
55static int 65static int
@@ -538,9 +548,9 @@ static int init_ring_common(struct intel_engine_cs *engine)
538 I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID); 548 I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID);
539 549
540 /* If the head is still not zero, the ring is dead */ 550 /* If the head is still not zero, the ring is dead */
541 if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base), 551 if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base),
542 RING_VALID, RING_VALID, 552 RING_VALID, RING_VALID,
543 50)) { 553 50)) {
544 DRM_ERROR("%s initialization failed " 554 DRM_ERROR("%s initialization failed "
545 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", 555 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
546 engine->name, 556 engine->name,
@@ -774,8 +784,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
774 784
775 i915_gem_request_submit(request); 785 i915_gem_request_submit(request);
776 786
777 assert_ring_tail_valid(request->ring, request->tail); 787 I915_WRITE_TAIL(request->engine,
778 I915_WRITE_TAIL(request->engine, request->tail); 788 intel_ring_set_tail(request->ring, request->tail));
779} 789}
780 790
781static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) 791static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1259,6 +1269,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
1259{ 1269{
1260 struct drm_i915_private *dev_priv = engine->i915; 1270 struct drm_i915_private *dev_priv = engine->i915;
1261 1271
1272 GEM_BUG_ON(engine->id != RCS);
1273
1262 dev_priv->status_page_dmah = 1274 dev_priv->status_page_dmah =
1263 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); 1275 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
1264 if (!dev_priv->status_page_dmah) 1276 if (!dev_priv->status_page_dmah)
@@ -1270,17 +1282,18 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
1270 return 0; 1282 return 0;
1271} 1283}
1272 1284
1273int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias) 1285int intel_ring_pin(struct intel_ring *ring,
1286 struct drm_i915_private *i915,
1287 unsigned int offset_bias)
1274{ 1288{
1275 unsigned int flags; 1289 enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
1276 enum i915_map_type map;
1277 struct i915_vma *vma = ring->vma; 1290 struct i915_vma *vma = ring->vma;
1291 unsigned int flags;
1278 void *addr; 1292 void *addr;
1279 int ret; 1293 int ret;
1280 1294
1281 GEM_BUG_ON(ring->vaddr); 1295 GEM_BUG_ON(ring->vaddr);
1282 1296
1283 map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
1284 1297
1285 flags = PIN_GLOBAL; 1298 flags = PIN_GLOBAL;
1286 if (offset_bias) 1299 if (offset_bias)
@@ -1316,11 +1329,23 @@ err:
1316 return PTR_ERR(addr); 1329 return PTR_ERR(addr);
1317} 1330}
1318 1331
1332void intel_ring_reset(struct intel_ring *ring, u32 tail)
1333{
1334 GEM_BUG_ON(!list_empty(&ring->request_list));
1335 ring->tail = tail;
1336 ring->head = tail;
1337 ring->emit = tail;
1338 intel_ring_update_space(ring);
1339}
1340
1319void intel_ring_unpin(struct intel_ring *ring) 1341void intel_ring_unpin(struct intel_ring *ring)
1320{ 1342{
1321 GEM_BUG_ON(!ring->vma); 1343 GEM_BUG_ON(!ring->vma);
1322 GEM_BUG_ON(!ring->vaddr); 1344 GEM_BUG_ON(!ring->vaddr);
1323 1345
1346 /* Discard any unused bytes beyond that submitted to hw. */
1347 intel_ring_reset(ring, ring->tail);
1348
1324 if (i915_vma_is_map_and_fenceable(ring->vma)) 1349 if (i915_vma_is_map_and_fenceable(ring->vma))
1325 i915_vma_unpin_iomap(ring->vma); 1350 i915_vma_unpin_iomap(ring->vma);
1326 else 1351 else
@@ -1338,7 +1363,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
1338 1363
1339 obj = i915_gem_object_create_stolen(dev_priv, size); 1364 obj = i915_gem_object_create_stolen(dev_priv, size);
1340 if (!obj) 1365 if (!obj)
1341 obj = i915_gem_object_create(dev_priv, size); 1366 obj = i915_gem_object_create_internal(dev_priv, size);
1342 if (IS_ERR(obj)) 1367 if (IS_ERR(obj))
1343 return ERR_CAST(obj); 1368 return ERR_CAST(obj);
1344 1369
@@ -1369,8 +1394,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
1369 if (!ring) 1394 if (!ring)
1370 return ERR_PTR(-ENOMEM); 1395 return ERR_PTR(-ENOMEM);
1371 1396
1372 ring->engine = engine;
1373
1374 INIT_LIST_HEAD(&ring->request_list); 1397 INIT_LIST_HEAD(&ring->request_list);
1375 1398
1376 ring->size = size; 1399 ring->size = size;
@@ -1424,22 +1447,73 @@ static int context_pin(struct i915_gem_context *ctx)
1424 PIN_GLOBAL | PIN_HIGH); 1447 PIN_GLOBAL | PIN_HIGH);
1425} 1448}
1426 1449
1427static int intel_ring_context_pin(struct intel_engine_cs *engine, 1450static struct i915_vma *
1428 struct i915_gem_context *ctx) 1451alloc_context_vma(struct intel_engine_cs *engine)
1452{
1453 struct drm_i915_private *i915 = engine->i915;
1454 struct drm_i915_gem_object *obj;
1455 struct i915_vma *vma;
1456
1457 obj = i915_gem_object_create(i915, engine->context_size);
1458 if (IS_ERR(obj))
1459 return ERR_CAST(obj);
1460
1461 /*
1462 * Try to make the context utilize L3 as well as LLC.
1463 *
1464 * On VLV we don't have L3 controls in the PTEs so we
1465 * shouldn't touch the cache level, especially as that
1466 * would make the object snooped which might have a
1467 * negative performance impact.
1468 *
1469 * Snooping is required on non-llc platforms in execlist
1470 * mode, but since all GGTT accesses use PAT entry 0 we
1471 * get snooping anyway regardless of cache_level.
1472 *
1473 * This is only applicable for Ivy Bridge devices since
1474 * later platforms don't have L3 control bits in the PTE.
1475 */
1476 if (IS_IVYBRIDGE(i915)) {
1477 /* Ignore any error, regard it as a simple optimisation */
1478 i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
1479 }
1480
1481 vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
1482 if (IS_ERR(vma))
1483 i915_gem_object_put(obj);
1484
1485 return vma;
1486}
1487
1488static struct intel_ring *
1489intel_ring_context_pin(struct intel_engine_cs *engine,
1490 struct i915_gem_context *ctx)
1429{ 1491{
1430 struct intel_context *ce = &ctx->engine[engine->id]; 1492 struct intel_context *ce = &ctx->engine[engine->id];
1431 int ret; 1493 int ret;
1432 1494
1433 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1495 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
1434 1496
1435 if (ce->pin_count++) 1497 if (likely(ce->pin_count++))
1436 return 0; 1498 goto out;
1437 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ 1499 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
1438 1500
1501 if (!ce->state && engine->context_size) {
1502 struct i915_vma *vma;
1503
1504 vma = alloc_context_vma(engine);
1505 if (IS_ERR(vma)) {
1506 ret = PTR_ERR(vma);
1507 goto err;
1508 }
1509
1510 ce->state = vma;
1511 }
1512
1439 if (ce->state) { 1513 if (ce->state) {
1440 ret = context_pin(ctx); 1514 ret = context_pin(ctx);
1441 if (ret) 1515 if (ret)
1442 goto error; 1516 goto err;
1443 1517
1444 ce->state->obj->mm.dirty = true; 1518 ce->state->obj->mm.dirty = true;
1445 } 1519 }
@@ -1455,11 +1529,14 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
1455 ce->initialised = true; 1529 ce->initialised = true;
1456 1530
1457 i915_gem_context_get(ctx); 1531 i915_gem_context_get(ctx);
1458 return 0;
1459 1532
1460error: 1533out:
1534 /* One ringbuffer to rule them all */
1535 return engine->buffer;
1536
1537err:
1461 ce->pin_count = 0; 1538 ce->pin_count = 0;
1462 return ret; 1539 return ERR_PTR(ret);
1463} 1540}
1464 1541
1465static void intel_ring_context_unpin(struct intel_engine_cs *engine, 1542static void intel_ring_context_unpin(struct intel_engine_cs *engine,
@@ -1481,78 +1558,70 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine,
1481 1558
1482static int intel_init_ring_buffer(struct intel_engine_cs *engine) 1559static int intel_init_ring_buffer(struct intel_engine_cs *engine)
1483{ 1560{
1484 struct drm_i915_private *dev_priv = engine->i915;
1485 struct intel_ring *ring; 1561 struct intel_ring *ring;
1486 int ret; 1562 int err;
1487
1488 WARN_ON(engine->buffer);
1489 1563
1490 intel_engine_setup_common(engine); 1564 intel_engine_setup_common(engine);
1491 1565
1492 ret = intel_engine_init_common(engine); 1566 err = intel_engine_init_common(engine);
1493 if (ret) 1567 if (err)
1494 goto error; 1568 goto err;
1569
1570 if (HWS_NEEDS_PHYSICAL(engine->i915))
1571 err = init_phys_status_page(engine);
1572 else
1573 err = init_status_page(engine);
1574 if (err)
1575 goto err;
1495 1576
1496 ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); 1577 ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
1497 if (IS_ERR(ring)) { 1578 if (IS_ERR(ring)) {
1498 ret = PTR_ERR(ring); 1579 err = PTR_ERR(ring);
1499 goto error; 1580 goto err_hws;
1500 }
1501
1502 if (HWS_NEEDS_PHYSICAL(dev_priv)) {
1503 WARN_ON(engine->id != RCS);
1504 ret = init_phys_status_page(engine);
1505 if (ret)
1506 goto error;
1507 } else {
1508 ret = init_status_page(engine);
1509 if (ret)
1510 goto error;
1511 } 1581 }
1512 1582
1513 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 1583 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
1514 ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE); 1584 err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE);
1515 if (ret) { 1585 if (err)
1516 intel_ring_free(ring); 1586 goto err_ring;
1517 goto error; 1587
1518 } 1588 GEM_BUG_ON(engine->buffer);
1519 engine->buffer = ring; 1589 engine->buffer = ring;
1520 1590
1521 return 0; 1591 return 0;
1522 1592
1523error: 1593err_ring:
1524 intel_engine_cleanup(engine); 1594 intel_ring_free(ring);
1525 return ret; 1595err_hws:
1596 if (HWS_NEEDS_PHYSICAL(engine->i915))
1597 cleanup_phys_status_page(engine);
1598 else
1599 cleanup_status_page(engine);
1600err:
1601 intel_engine_cleanup_common(engine);
1602 return err;
1526} 1603}
1527 1604
1528void intel_engine_cleanup(struct intel_engine_cs *engine) 1605void intel_engine_cleanup(struct intel_engine_cs *engine)
1529{ 1606{
1530 struct drm_i915_private *dev_priv; 1607 struct drm_i915_private *dev_priv = engine->i915;
1531
1532 dev_priv = engine->i915;
1533 1608
1534 if (engine->buffer) { 1609 WARN_ON(INTEL_GEN(dev_priv) > 2 &&
1535 WARN_ON(INTEL_GEN(dev_priv) > 2 && 1610 (I915_READ_MODE(engine) & MODE_IDLE) == 0);
1536 (I915_READ_MODE(engine) & MODE_IDLE) == 0);
1537 1611
1538 intel_ring_unpin(engine->buffer); 1612 intel_ring_unpin(engine->buffer);
1539 intel_ring_free(engine->buffer); 1613 intel_ring_free(engine->buffer);
1540 engine->buffer = NULL;
1541 }
1542 1614
1543 if (engine->cleanup) 1615 if (engine->cleanup)
1544 engine->cleanup(engine); 1616 engine->cleanup(engine);
1545 1617
1546 if (HWS_NEEDS_PHYSICAL(dev_priv)) { 1618 if (HWS_NEEDS_PHYSICAL(dev_priv))
1547 WARN_ON(engine->id != RCS);
1548 cleanup_phys_status_page(engine); 1619 cleanup_phys_status_page(engine);
1549 } else { 1620 else
1550 cleanup_status_page(engine); 1621 cleanup_status_page(engine);
1551 }
1552 1622
1553 intel_engine_cleanup_common(engine); 1623 intel_engine_cleanup_common(engine);
1554 1624
1555 engine->i915 = NULL;
1556 dev_priv->engine[engine->id] = NULL; 1625 dev_priv->engine[engine->id] = NULL;
1557 kfree(engine); 1626 kfree(engine);
1558} 1627}
@@ -1562,8 +1631,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
1562 struct intel_engine_cs *engine; 1631 struct intel_engine_cs *engine;
1563 enum intel_engine_id id; 1632 enum intel_engine_id id;
1564 1633
1634 /* Restart from the beginning of the rings for convenience */
1565 for_each_engine(engine, dev_priv, id) 1635 for_each_engine(engine, dev_priv, id)
1566 engine->buffer->head = engine->buffer->tail; 1636 intel_ring_reset(engine->buffer, 0);
1567} 1637}
1568 1638
1569static int ring_request_alloc(struct drm_i915_gem_request *request) 1639static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1578,9 +1648,6 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
1578 */ 1648 */
1579 request->reserved_space += LEGACY_REQUEST_SIZE; 1649 request->reserved_space += LEGACY_REQUEST_SIZE;
1580 1650
1581 GEM_BUG_ON(!request->engine->buffer);
1582 request->ring = request->engine->buffer;
1583
1584 cs = intel_ring_begin(request, 0); 1651 cs = intel_ring_begin(request, 0);
1585 if (IS_ERR(cs)) 1652 if (IS_ERR(cs))
1586 return PTR_ERR(cs); 1653 return PTR_ERR(cs);
@@ -1589,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
1589 return 0; 1656 return 0;
1590} 1657}
1591 1658
1592static int wait_for_space(struct drm_i915_gem_request *req, int bytes) 1659static noinline int wait_for_space(struct drm_i915_gem_request *req,
1660 unsigned int bytes)
1593{ 1661{
1594 struct intel_ring *ring = req->ring; 1662 struct intel_ring *ring = req->ring;
1595 struct drm_i915_gem_request *target; 1663 struct drm_i915_gem_request *target;
@@ -1597,8 +1665,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
1597 1665
1598 lockdep_assert_held(&req->i915->drm.struct_mutex); 1666 lockdep_assert_held(&req->i915->drm.struct_mutex);
1599 1667
1600 intel_ring_update_space(ring); 1668 if (intel_ring_update_space(ring) >= bytes)
1601 if (ring->space >= bytes)
1602 return 0; 1669 return 0;
1603 1670
1604 /* 1671 /*
@@ -1613,12 +1680,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
1613 GEM_BUG_ON(!req->reserved_space); 1680 GEM_BUG_ON(!req->reserved_space);
1614 1681
1615 list_for_each_entry(target, &ring->request_list, ring_link) { 1682 list_for_each_entry(target, &ring->request_list, ring_link) {
1616 unsigned space;
1617
1618 /* Would completion of this request free enough space? */ 1683 /* Would completion of this request free enough space? */
1619 space = __intel_ring_space(target->postfix, ring->tail, 1684 if (bytes <= __intel_ring_space(target->postfix,
1620 ring->size); 1685 ring->emit, ring->size))
1621 if (space >= bytes)
1622 break; 1686 break;
1623 } 1687 }
1624 1688
@@ -1638,59 +1702,64 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
1638 return 0; 1702 return 0;
1639} 1703}
1640 1704
1641u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) 1705u32 *intel_ring_begin(struct drm_i915_gem_request *req,
1706 unsigned int num_dwords)
1642{ 1707{
1643 struct intel_ring *ring = req->ring; 1708 struct intel_ring *ring = req->ring;
1644 int remain_actual = ring->size - ring->tail; 1709 const unsigned int remain_usable = ring->effective_size - ring->emit;
1645 int remain_usable = ring->effective_size - ring->tail; 1710 const unsigned int bytes = num_dwords * sizeof(u32);
1646 int bytes = num_dwords * sizeof(u32); 1711 unsigned int need_wrap = 0;
1647 int total_bytes, wait_bytes; 1712 unsigned int total_bytes;
1648 bool need_wrap = false;
1649 u32 *cs; 1713 u32 *cs;
1650 1714
1651 total_bytes = bytes + req->reserved_space; 1715 total_bytes = bytes + req->reserved_space;
1716 GEM_BUG_ON(total_bytes > ring->effective_size);
1652 1717
1653 if (unlikely(bytes > remain_usable)) { 1718 if (unlikely(total_bytes > remain_usable)) {
1654 /* 1719 const int remain_actual = ring->size - ring->emit;
1655 * Not enough space for the basic request. So need to flush 1720
1656 * out the remainder and then wait for base + reserved. 1721 if (bytes > remain_usable) {
1657 */ 1722 /*
1658 wait_bytes = remain_actual + total_bytes; 1723 * Not enough space for the basic request. So need to
1659 need_wrap = true; 1724 * flush out the remainder and then wait for
1660 } else if (unlikely(total_bytes > remain_usable)) { 1725 * base + reserved.
1661 /* 1726 */
1662 * The base request will fit but the reserved space 1727 total_bytes += remain_actual;
1663 * falls off the end. So we don't need an immediate wrap 1728 need_wrap = remain_actual | 1;
1664 * and only need to effectively wait for the reserved 1729 } else {
1665 * size space from the start of ringbuffer. 1730 /*
1666 */ 1731 * The base request will fit but the reserved space
1667 wait_bytes = remain_actual + req->reserved_space; 1732 * falls off the end. So we don't need an immediate
1668 } else { 1733 * wrap and only need to effectively wait for the
1669 /* No wrapping required, just waiting. */ 1734 * reserved size from the start of ringbuffer.
1670 wait_bytes = total_bytes; 1735 */
1736 total_bytes = req->reserved_space + remain_actual;
1737 }
1671 } 1738 }
1672 1739
1673 if (wait_bytes > ring->space) { 1740 if (unlikely(total_bytes > ring->space)) {
1674 int ret = wait_for_space(req, wait_bytes); 1741 int ret = wait_for_space(req, total_bytes);
1675 if (unlikely(ret)) 1742 if (unlikely(ret))
1676 return ERR_PTR(ret); 1743 return ERR_PTR(ret);
1677 } 1744 }
1678 1745
1679 if (unlikely(need_wrap)) { 1746 if (unlikely(need_wrap)) {
1680 GEM_BUG_ON(remain_actual > ring->space); 1747 need_wrap &= ~1;
1681 GEM_BUG_ON(ring->tail + remain_actual > ring->size); 1748 GEM_BUG_ON(need_wrap > ring->space);
1749 GEM_BUG_ON(ring->emit + need_wrap > ring->size);
1682 1750
1683 /* Fill the tail with MI_NOOP */ 1751 /* Fill the tail with MI_NOOP */
1684 memset(ring->vaddr + ring->tail, 0, remain_actual); 1752 memset(ring->vaddr + ring->emit, 0, need_wrap);
1685 ring->tail = 0; 1753 ring->emit = 0;
1686 ring->space -= remain_actual; 1754 ring->space -= need_wrap;
1687 } 1755 }
1688 1756
1689 GEM_BUG_ON(ring->tail > ring->size - bytes); 1757 GEM_BUG_ON(ring->emit > ring->size - bytes);
1690 cs = ring->vaddr + ring->tail; 1758 GEM_BUG_ON(ring->space < bytes);
1691 ring->tail += bytes; 1759 cs = ring->vaddr + ring->emit;
1760 GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes));
1761 ring->emit += bytes;
1692 ring->space -= bytes; 1762 ring->space -= bytes;
1693 GEM_BUG_ON(ring->space < 0);
1694 1763
1695 return cs; 1764 return cs;
1696} 1765}
@@ -1699,7 +1768,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
1699int intel_ring_cacheline_align(struct drm_i915_gem_request *req) 1768int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
1700{ 1769{
1701 int num_dwords = 1770 int num_dwords =
1702 (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); 1771 (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
1703 u32 *cs; 1772 u32 *cs;
1704 1773
1705 if (num_dwords == 0) 1774 if (num_dwords == 0)
@@ -1736,11 +1805,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
1736 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); 1805 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
1737 1806
1738 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1807 /* Wait for the ring not to be idle, i.e. for it to wake up. */
1739 if (intel_wait_for_register_fw(dev_priv, 1808 if (__intel_wait_for_register_fw(dev_priv,
1740 GEN6_BSD_SLEEP_PSMI_CONTROL, 1809 GEN6_BSD_SLEEP_PSMI_CONTROL,
1741 GEN6_BSD_SLEEP_INDICATOR, 1810 GEN6_BSD_SLEEP_INDICATOR,
1742 0, 1811 0,
1743 50)) 1812 1000, 0, NULL))
1744 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1813 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1745 1814
1746 /* Now that the ring is fully powered up, update the tail */ 1815 /* Now that the ring is fully powered up, update the tail */
@@ -2182,20 +2251,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
2182 return intel_init_ring_buffer(engine); 2251 return intel_init_ring_buffer(engine);
2183} 2252}
2184 2253
2185/**
2186 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2187 */
2188int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
2189{
2190 struct drm_i915_private *dev_priv = engine->i915;
2191
2192 intel_ring_default_vfuncs(dev_priv, engine);
2193
2194 engine->emit_flush = gen6_bsd_ring_flush;
2195
2196 return intel_init_ring_buffer(engine);
2197}
2198
2199int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) 2254int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
2200{ 2255{
2201 struct drm_i915_private *dev_priv = engine->i915; 2256 struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..6aa20ac8cde3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -17,17 +17,6 @@
17#define CACHELINE_BYTES 64 17#define CACHELINE_BYTES 64
18#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) 18#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
19 19
20/*
21 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
22 * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
23 * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
24 *
25 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
26 * cacheline, the Head Pointer must not be greater than the Tail
27 * Pointer."
28 */
29#define I915_RING_FREE_SPACE 64
30
31struct intel_hw_status_page { 20struct intel_hw_status_page {
32 struct i915_vma *vma; 21 struct i915_vma *vma;
33 u32 *page_addr; 22 u32 *page_addr;
@@ -139,16 +128,15 @@ struct intel_ring {
139 struct i915_vma *vma; 128 struct i915_vma *vma;
140 void *vaddr; 129 void *vaddr;
141 130
142 struct intel_engine_cs *engine;
143
144 struct list_head request_list; 131 struct list_head request_list;
145 132
146 u32 head; 133 u32 head;
147 u32 tail; 134 u32 tail;
135 u32 emit;
148 136
149 int space; 137 u32 space;
150 int size; 138 u32 size;
151 int effective_size; 139 u32 effective_size;
152}; 140};
153 141
154struct i915_gem_context; 142struct i915_gem_context;
@@ -189,15 +177,28 @@ enum intel_engine_id {
189 VECS 177 VECS
190}; 178};
191 179
180struct i915_priolist {
181 struct rb_node node;
182 struct list_head requests;
183 int priority;
184};
185
186#define INTEL_ENGINE_CS_MAX_NAME 8
187
192struct intel_engine_cs { 188struct intel_engine_cs {
193 struct drm_i915_private *i915; 189 struct drm_i915_private *i915;
194 const char *name; 190 char name[INTEL_ENGINE_CS_MAX_NAME];
195 enum intel_engine_id id; 191 enum intel_engine_id id;
196 unsigned int exec_id; 192 unsigned int uabi_id;
197 unsigned int hw_id; 193 unsigned int hw_id;
198 unsigned int guc_id; 194 unsigned int guc_id;
199 u32 mmio_base; 195
196 u8 class;
197 u8 instance;
198 u32 context_size;
199 u32 mmio_base;
200 unsigned int irq_shift; 200 unsigned int irq_shift;
201
201 struct intel_ring *buffer; 202 struct intel_ring *buffer;
202 struct intel_timeline *timeline; 203 struct intel_timeline *timeline;
203 204
@@ -265,8 +266,8 @@ struct intel_engine_cs {
265 266
266 void (*set_default_submission)(struct intel_engine_cs *engine); 267 void (*set_default_submission)(struct intel_engine_cs *engine);
267 268
268 int (*context_pin)(struct intel_engine_cs *engine, 269 struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
269 struct i915_gem_context *ctx); 270 struct i915_gem_context *ctx);
270 void (*context_unpin)(struct intel_engine_cs *engine, 271 void (*context_unpin)(struct intel_engine_cs *engine,
271 struct i915_gem_context *ctx); 272 struct i915_gem_context *ctx);
272 int (*request_alloc)(struct drm_i915_gem_request *req); 273 int (*request_alloc)(struct drm_i915_gem_request *req);
@@ -372,9 +373,18 @@ struct intel_engine_cs {
372 373
373 /* Execlists */ 374 /* Execlists */
374 struct tasklet_struct irq_tasklet; 375 struct tasklet_struct irq_tasklet;
376 struct i915_priolist default_priolist;
377 bool no_priolist;
375 struct execlist_port { 378 struct execlist_port {
376 struct drm_i915_gem_request *request; 379 struct drm_i915_gem_request *request_count;
377 unsigned int count; 380#define EXECLIST_COUNT_BITS 2
381#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
382#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
383#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
384#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
385#define port_set(p, packed) ((p)->request_count = (packed))
386#define port_isset(p) ((p)->request_count)
387#define port_index(p, e) ((p) - (e)->execlist_port)
378 GEM_DEBUG_DECL(u32 context_id); 388 GEM_DEBUG_DECL(u32 context_id);
379 } execlist_port[2]; 389 } execlist_port[2];
380 struct rb_root execlist_queue; 390 struct rb_root execlist_queue;
@@ -487,7 +497,11 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
487 497
488struct intel_ring * 498struct intel_ring *
489intel_engine_create_ring(struct intel_engine_cs *engine, int size); 499intel_engine_create_ring(struct intel_engine_cs *engine, int size);
490int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); 500int intel_ring_pin(struct intel_ring *ring,
501 struct drm_i915_private *i915,
502 unsigned int offset_bias);
503void intel_ring_reset(struct intel_ring *ring, u32 tail);
504unsigned int intel_ring_update_space(struct intel_ring *ring);
491void intel_ring_unpin(struct intel_ring *ring); 505void intel_ring_unpin(struct intel_ring *ring);
492void intel_ring_free(struct intel_ring *ring); 506void intel_ring_free(struct intel_ring *ring);
493 507
@@ -498,7 +512,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
498 512
499int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); 513int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
500 514
501u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); 515u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
516 unsigned int n);
502 517
503static inline void 518static inline void
504intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) 519intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
@@ -511,7 +526,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
511 * reserved for the command packet (i.e. the value passed to 526 * reserved for the command packet (i.e. the value passed to
512 * intel_ring_begin()). 527 * intel_ring_begin()).
513 */ 528 */
514 GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); 529 GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
515} 530}
516 531
517static inline u32 532static inline u32
@@ -538,9 +553,40 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
538 */ 553 */
539 GEM_BUG_ON(!IS_ALIGNED(tail, 8)); 554 GEM_BUG_ON(!IS_ALIGNED(tail, 8));
540 GEM_BUG_ON(tail >= ring->size); 555 GEM_BUG_ON(tail >= ring->size);
556
557 /*
558 * "Ring Buffer Use"
559 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
560 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
561 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
562 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
563 * same cacheline, the Head Pointer must not be greater than the Tail
564 * Pointer."
565 *
566 * We use ring->head as the last known location of the actual RING_HEAD,
567 * it may have advanced but in the worst case it is equally the same
568 * as ring->head and so we should never program RING_TAIL to advance
569 * into the same cacheline as ring->head.
570 */
571#define cacheline(a) round_down(a, CACHELINE_BYTES)
572 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
573 tail < ring->head);
574#undef cacheline
541} 575}
542 576
543void intel_ring_update_space(struct intel_ring *ring); 577static inline unsigned int
578intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
579{
580 /* Whilst writes to the tail are strictly order, there is no
581 * serialisation between readers and the writers. The tail may be
582 * read by i915_gem_request_retire() just as it is being updated
583 * by execlists, as although the breadcrumb is complete, the context
584 * switch hasn't been seen.
585 */
586 assert_ring_tail_valid(ring, tail);
587 ring->tail = tail;
588 return tail;
589}
544 590
545void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); 591void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
546 592
@@ -551,7 +597,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine);
551 597
552int intel_init_render_ring_buffer(struct intel_engine_cs *engine); 598int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
553int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); 599int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
554int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
555int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); 600int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
556int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); 601int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
557 602
@@ -652,7 +697,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
652 struct intel_wait *wait); 697 struct intel_wait *wait);
653void intel_engine_remove_wait(struct intel_engine_cs *engine, 698void intel_engine_remove_wait(struct intel_engine_cs *engine,
654 struct intel_wait *wait); 699 struct intel_wait *wait);
655void intel_engine_enable_signaling(struct drm_i915_gem_request *request); 700void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
701 bool wakeup);
656void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); 702void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
657 703
658static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) 704static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
@@ -685,6 +731,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
685bool intel_engine_is_idle(struct intel_engine_cs *engine); 731bool intel_engine_is_idle(struct intel_engine_cs *engine);
686bool intel_engines_are_idle(struct drm_i915_private *dev_priv); 732bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
687 733
734void intel_engines_mark_idle(struct drm_i915_private *i915);
688void intel_engines_reset_default_submission(struct drm_i915_private *i915); 735void intel_engines_reset_default_submission(struct drm_i915_private *i915);
689 736
690#endif /* _INTEL_RINGBUFFER_H_ */ 737#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index ef6fa87b2f8a..6cc181203135 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2875,11 +2875,10 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo,
2875 2875
2876 BUILD_BUG_ON(sizeof(enhancements) != 2); 2876 BUILD_BUG_ON(sizeof(enhancements) != 2);
2877 2877
2878 enhancements.response = 0; 2878 if (!intel_sdvo_get_value(intel_sdvo,
2879 intel_sdvo_get_value(intel_sdvo, 2879 SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
2880 SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS, 2880 &enhancements, sizeof(enhancements)) ||
2881 &enhancements, sizeof(enhancements)); 2881 enhancements.response == 0) {
2882 if (enhancements.response == 0) {
2883 DRM_DEBUG_KMS("No enhancement is supported\n"); 2882 DRM_DEBUG_KMS("No enhancement is supported\n");
2884 return true; 2883 return true;
2885 } 2884 }
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 191e14ddde0c..c4bf19364e49 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -210,16 +210,14 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
210} 210}
211 211
212static void 212static void
213skl_update_plane(struct drm_plane *drm_plane, 213skl_update_plane(struct intel_plane *plane,
214 const struct intel_crtc_state *crtc_state, 214 const struct intel_crtc_state *crtc_state,
215 const struct intel_plane_state *plane_state) 215 const struct intel_plane_state *plane_state)
216{ 216{
217 struct drm_device *dev = drm_plane->dev; 217 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
218 struct drm_i915_private *dev_priv = to_i915(dev); 218 const struct drm_framebuffer *fb = plane_state->base.fb;
219 struct intel_plane *intel_plane = to_intel_plane(drm_plane); 219 enum plane_id plane_id = plane->id;
220 struct drm_framebuffer *fb = plane_state->base.fb; 220 enum pipe pipe = plane->pipe;
221 enum plane_id plane_id = intel_plane->id;
222 enum pipe pipe = intel_plane->pipe;
223 u32 plane_ctl = plane_state->ctl; 221 u32 plane_ctl = plane_state->ctl;
224 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 222 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
225 u32 surf_addr = plane_state->main.offset; 223 u32 surf_addr = plane_state->main.offset;
@@ -288,13 +286,11 @@ skl_update_plane(struct drm_plane *drm_plane,
288} 286}
289 287
290static void 288static void
291skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) 289skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
292{ 290{
293 struct drm_device *dev = dplane->dev; 291 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
294 struct drm_i915_private *dev_priv = to_i915(dev); 292 enum plane_id plane_id = plane->id;
295 struct intel_plane *intel_plane = to_intel_plane(dplane); 293 enum pipe pipe = plane->pipe;
296 enum plane_id plane_id = intel_plane->id;
297 enum pipe pipe = intel_plane->pipe;
298 unsigned long irqflags; 294 unsigned long irqflags;
299 295
300 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 296 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -308,10 +304,10 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
308} 304}
309 305
310static void 306static void
311chv_update_csc(struct intel_plane *intel_plane, uint32_t format) 307chv_update_csc(struct intel_plane *plane, uint32_t format)
312{ 308{
313 struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev); 309 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
314 enum plane_id plane_id = intel_plane->id; 310 enum plane_id plane_id = plane->id;
315 311
316 /* Seems RGB data bypasses the CSC always */ 312 /* Seems RGB data bypasses the CSC always */
317 if (!format_is_yuv(format)) 313 if (!format_is_yuv(format))
@@ -411,16 +407,14 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
411} 407}
412 408
413static void 409static void
414vlv_update_plane(struct drm_plane *dplane, 410vlv_update_plane(struct intel_plane *plane,
415 const struct intel_crtc_state *crtc_state, 411 const struct intel_crtc_state *crtc_state,
416 const struct intel_plane_state *plane_state) 412 const struct intel_plane_state *plane_state)
417{ 413{
418 struct drm_device *dev = dplane->dev; 414 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
419 struct drm_i915_private *dev_priv = to_i915(dev); 415 const struct drm_framebuffer *fb = plane_state->base.fb;
420 struct intel_plane *intel_plane = to_intel_plane(dplane); 416 enum pipe pipe = plane->pipe;
421 struct drm_framebuffer *fb = plane_state->base.fb; 417 enum plane_id plane_id = plane->id;
422 enum pipe pipe = intel_plane->pipe;
423 enum plane_id plane_id = intel_plane->id;
424 u32 sprctl = plane_state->ctl; 418 u32 sprctl = plane_state->ctl;
425 u32 sprsurf_offset = plane_state->main.offset; 419 u32 sprsurf_offset = plane_state->main.offset;
426 u32 linear_offset; 420 u32 linear_offset;
@@ -442,7 +436,7 @@ vlv_update_plane(struct drm_plane *dplane,
442 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 436 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
443 437
444 if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) 438 if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B)
445 chv_update_csc(intel_plane, fb->format->format); 439 chv_update_csc(plane, fb->format->format);
446 440
447 if (key->flags) { 441 if (key->flags) {
448 I915_WRITE_FW(SPKEYMINVAL(pipe, plane_id), key->min_value); 442 I915_WRITE_FW(SPKEYMINVAL(pipe, plane_id), key->min_value);
@@ -469,13 +463,11 @@ vlv_update_plane(struct drm_plane *dplane,
469} 463}
470 464
471static void 465static void
472vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) 466vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
473{ 467{
474 struct drm_device *dev = dplane->dev; 468 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
475 struct drm_i915_private *dev_priv = to_i915(dev); 469 enum pipe pipe = plane->pipe;
476 struct intel_plane *intel_plane = to_intel_plane(dplane); 470 enum plane_id plane_id = plane->id;
477 enum pipe pipe = intel_plane->pipe;
478 enum plane_id plane_id = intel_plane->id;
479 unsigned long irqflags; 471 unsigned long irqflags;
480 472
481 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 473 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -545,15 +537,13 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
545} 537}
546 538
547static void 539static void
548ivb_update_plane(struct drm_plane *plane, 540ivb_update_plane(struct intel_plane *plane,
549 const struct intel_crtc_state *crtc_state, 541 const struct intel_crtc_state *crtc_state,
550 const struct intel_plane_state *plane_state) 542 const struct intel_plane_state *plane_state)
551{ 543{
552 struct drm_device *dev = plane->dev; 544 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
553 struct drm_i915_private *dev_priv = to_i915(dev); 545 const struct drm_framebuffer *fb = plane_state->base.fb;
554 struct intel_plane *intel_plane = to_intel_plane(plane); 546 enum pipe pipe = plane->pipe;
555 struct drm_framebuffer *fb = plane_state->base.fb;
556 enum pipe pipe = intel_plane->pipe;
557 u32 sprctl = plane_state->ctl, sprscale = 0; 547 u32 sprctl = plane_state->ctl, sprscale = 0;
558 u32 sprsurf_offset = plane_state->main.offset; 548 u32 sprsurf_offset = plane_state->main.offset;
559 u32 linear_offset; 549 u32 linear_offset;
@@ -600,7 +590,7 @@ ivb_update_plane(struct drm_plane *plane,
600 I915_WRITE_FW(SPRLINOFF(pipe), linear_offset); 590 I915_WRITE_FW(SPRLINOFF(pipe), linear_offset);
601 591
602 I915_WRITE_FW(SPRSIZE(pipe), (crtc_h << 16) | crtc_w); 592 I915_WRITE_FW(SPRSIZE(pipe), (crtc_h << 16) | crtc_w);
603 if (intel_plane->can_scale) 593 if (plane->can_scale)
604 I915_WRITE_FW(SPRSCALE(pipe), sprscale); 594 I915_WRITE_FW(SPRSCALE(pipe), sprscale);
605 I915_WRITE_FW(SPRCTL(pipe), sprctl); 595 I915_WRITE_FW(SPRCTL(pipe), sprctl);
606 I915_WRITE_FW(SPRSURF(pipe), 596 I915_WRITE_FW(SPRSURF(pipe),
@@ -611,19 +601,17 @@ ivb_update_plane(struct drm_plane *plane,
611} 601}
612 602
613static void 603static void
614ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) 604ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
615{ 605{
616 struct drm_device *dev = plane->dev; 606 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
617 struct drm_i915_private *dev_priv = to_i915(dev); 607 enum pipe pipe = plane->pipe;
618 struct intel_plane *intel_plane = to_intel_plane(plane);
619 int pipe = intel_plane->pipe;
620 unsigned long irqflags; 608 unsigned long irqflags;
621 609
622 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 610 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
623 611
624 I915_WRITE_FW(SPRCTL(pipe), 0); 612 I915_WRITE_FW(SPRCTL(pipe), 0);
625 /* Can't leave the scaler enabled... */ 613 /* Can't leave the scaler enabled... */
626 if (intel_plane->can_scale) 614 if (plane->can_scale)
627 I915_WRITE_FW(SPRSCALE(pipe), 0); 615 I915_WRITE_FW(SPRSCALE(pipe), 0);
628 616
629 I915_WRITE_FW(SPRSURF(pipe), 0); 617 I915_WRITE_FW(SPRSURF(pipe), 0);
@@ -632,7 +620,7 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
632 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 620 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
633} 621}
634 622
635static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state, 623static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
636 const struct intel_plane_state *plane_state) 624 const struct intel_plane_state *plane_state)
637{ 625{
638 struct drm_i915_private *dev_priv = 626 struct drm_i915_private *dev_priv =
@@ -686,15 +674,13 @@ static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
686} 674}
687 675
688static void 676static void
689ilk_update_plane(struct drm_plane *plane, 677g4x_update_plane(struct intel_plane *plane,
690 const struct intel_crtc_state *crtc_state, 678 const struct intel_crtc_state *crtc_state,
691 const struct intel_plane_state *plane_state) 679 const struct intel_plane_state *plane_state)
692{ 680{
693 struct drm_device *dev = plane->dev; 681 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
694 struct drm_i915_private *dev_priv = to_i915(dev); 682 const struct drm_framebuffer *fb = plane_state->base.fb;
695 struct intel_plane *intel_plane = to_intel_plane(plane); 683 enum pipe pipe = plane->pipe;
696 struct drm_framebuffer *fb = plane_state->base.fb;
697 int pipe = intel_plane->pipe;
698 u32 dvscntr = plane_state->ctl, dvsscale = 0; 684 u32 dvscntr = plane_state->ctl, dvsscale = 0;
699 u32 dvssurf_offset = plane_state->main.offset; 685 u32 dvssurf_offset = plane_state->main.offset;
700 u32 linear_offset; 686 u32 linear_offset;
@@ -747,12 +733,10 @@ ilk_update_plane(struct drm_plane *plane,
747} 733}
748 734
749static void 735static void
750ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) 736g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
751{ 737{
752 struct drm_device *dev = plane->dev; 738 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
753 struct drm_i915_private *dev_priv = to_i915(dev); 739 enum pipe pipe = plane->pipe;
754 struct intel_plane *intel_plane = to_intel_plane(plane);
755 int pipe = intel_plane->pipe;
756 unsigned long irqflags; 740 unsigned long irqflags;
757 741
758 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 742 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -768,14 +752,12 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
768} 752}
769 753
770static int 754static int
771intel_check_sprite_plane(struct drm_plane *plane, 755intel_check_sprite_plane(struct intel_plane *plane,
772 struct intel_crtc_state *crtc_state, 756 struct intel_crtc_state *crtc_state,
773 struct intel_plane_state *state) 757 struct intel_plane_state *state)
774{ 758{
775 struct drm_i915_private *dev_priv = to_i915(plane->dev); 759 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
776 struct drm_crtc *crtc = state->base.crtc; 760 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
777 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
778 struct intel_plane *intel_plane = to_intel_plane(plane);
779 struct drm_framebuffer *fb = state->base.fb; 761 struct drm_framebuffer *fb = state->base.fb;
780 int crtc_x, crtc_y; 762 int crtc_x, crtc_y;
781 unsigned int crtc_w, crtc_h; 763 unsigned int crtc_w, crtc_h;
@@ -797,7 +779,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
797 } 779 }
798 780
799 /* Don't modify another pipe's plane */ 781 /* Don't modify another pipe's plane */
800 if (intel_plane->pipe != intel_crtc->pipe) { 782 if (plane->pipe != crtc->pipe) {
801 DRM_DEBUG_KMS("Wrong plane <-> crtc mapping\n"); 783 DRM_DEBUG_KMS("Wrong plane <-> crtc mapping\n");
802 return -EINVAL; 784 return -EINVAL;
803 } 785 }
@@ -814,16 +796,16 @@ intel_check_sprite_plane(struct drm_plane *plane,
814 if (state->ckey.flags == I915_SET_COLORKEY_NONE) { 796 if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
815 can_scale = 1; 797 can_scale = 1;
816 min_scale = 1; 798 min_scale = 1;
817 max_scale = skl_max_scale(intel_crtc, crtc_state); 799 max_scale = skl_max_scale(crtc, crtc_state);
818 } else { 800 } else {
819 can_scale = 0; 801 can_scale = 0;
820 min_scale = DRM_PLANE_HELPER_NO_SCALING; 802 min_scale = DRM_PLANE_HELPER_NO_SCALING;
821 max_scale = DRM_PLANE_HELPER_NO_SCALING; 803 max_scale = DRM_PLANE_HELPER_NO_SCALING;
822 } 804 }
823 } else { 805 } else {
824 can_scale = intel_plane->can_scale; 806 can_scale = plane->can_scale;
825 max_scale = intel_plane->max_downscale << 16; 807 max_scale = plane->max_downscale << 16;
826 min_scale = intel_plane->can_scale ? 1 : (1 << 16); 808 min_scale = plane->can_scale ? 1 : (1 << 16);
827 } 809 }
828 810
829 /* 811 /*
@@ -967,7 +949,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
967 if (ret) 949 if (ret)
968 return ret; 950 return ret;
969 951
970 state->ctl = ilk_sprite_ctl(crtc_state, state); 952 state->ctl = g4x_sprite_ctl(crtc_state, state);
971 } 953 }
972 954
973 return 0; 955 return 0;
@@ -1027,7 +1009,7 @@ out:
1027 return ret; 1009 return ret;
1028} 1010}
1029 1011
1030static const uint32_t ilk_plane_formats[] = { 1012static const uint32_t g4x_plane_formats[] = {
1031 DRM_FORMAT_XRGB8888, 1013 DRM_FORMAT_XRGB8888,
1032 DRM_FORMAT_YUYV, 1014 DRM_FORMAT_YUYV,
1033 DRM_FORMAT_YVYU, 1015 DRM_FORMAT_YVYU,
@@ -1131,15 +1113,15 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
1131 intel_plane->can_scale = true; 1113 intel_plane->can_scale = true;
1132 intel_plane->max_downscale = 16; 1114 intel_plane->max_downscale = 16;
1133 1115
1134 intel_plane->update_plane = ilk_update_plane; 1116 intel_plane->update_plane = g4x_update_plane;
1135 intel_plane->disable_plane = ilk_disable_plane; 1117 intel_plane->disable_plane = g4x_disable_plane;
1136 1118
1137 if (IS_GEN6(dev_priv)) { 1119 if (IS_GEN6(dev_priv)) {
1138 plane_formats = snb_plane_formats; 1120 plane_formats = snb_plane_formats;
1139 num_plane_formats = ARRAY_SIZE(snb_plane_formats); 1121 num_plane_formats = ARRAY_SIZE(snb_plane_formats);
1140 } else { 1122 } else {
1141 plane_formats = ilk_plane_formats; 1123 plane_formats = g4x_plane_formats;
1142 num_plane_formats = ARRAY_SIZE(ilk_plane_formats); 1124 num_plane_formats = ARRAY_SIZE(g4x_plane_formats);
1143 } 1125 }
1144 } 1126 }
1145 1127
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index e077c2a9e694..784df024e230 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -48,41 +48,6 @@ struct intel_tv {
48 struct intel_encoder base; 48 struct intel_encoder base;
49 49
50 int type; 50 int type;
51 const char *tv_format;
52 int margin[4];
53 u32 save_TV_H_CTL_1;
54 u32 save_TV_H_CTL_2;
55 u32 save_TV_H_CTL_3;
56 u32 save_TV_V_CTL_1;
57 u32 save_TV_V_CTL_2;
58 u32 save_TV_V_CTL_3;
59 u32 save_TV_V_CTL_4;
60 u32 save_TV_V_CTL_5;
61 u32 save_TV_V_CTL_6;
62 u32 save_TV_V_CTL_7;
63 u32 save_TV_SC_CTL_1, save_TV_SC_CTL_2, save_TV_SC_CTL_3;
64
65 u32 save_TV_CSC_Y;
66 u32 save_TV_CSC_Y2;
67 u32 save_TV_CSC_U;
68 u32 save_TV_CSC_U2;
69 u32 save_TV_CSC_V;
70 u32 save_TV_CSC_V2;
71 u32 save_TV_CLR_KNOBS;
72 u32 save_TV_CLR_LEVEL;
73 u32 save_TV_WIN_POS;
74 u32 save_TV_WIN_SIZE;
75 u32 save_TV_FILTER_CTL_1;
76 u32 save_TV_FILTER_CTL_2;
77 u32 save_TV_FILTER_CTL_3;
78
79 u32 save_TV_H_LUMA[60];
80 u32 save_TV_H_CHROMA[60];
81 u32 save_TV_V_LUMA[43];
82 u32 save_TV_V_CHROMA[43];
83
84 u32 save_TV_DAC;
85 u32 save_TV_CTL;
86}; 51};
87 52
88struct video_levels { 53struct video_levels {
@@ -873,32 +838,18 @@ intel_disable_tv(struct intel_encoder *encoder,
873 I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE); 838 I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE);
874} 839}
875 840
876static const struct tv_mode * 841static const struct tv_mode *intel_tv_mode_find(struct drm_connector_state *conn_state)
877intel_tv_mode_lookup(const char *tv_format)
878{ 842{
879 int i; 843 int format = conn_state->tv.mode;
880
881 for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
882 const struct tv_mode *tv_mode = &tv_modes[i];
883 844
884 if (!strcmp(tv_format, tv_mode->name)) 845 return &tv_modes[format];
885 return tv_mode;
886 }
887 return NULL;
888}
889
890static const struct tv_mode *
891intel_tv_mode_find(struct intel_tv *intel_tv)
892{
893 return intel_tv_mode_lookup(intel_tv->tv_format);
894} 846}
895 847
896static enum drm_mode_status 848static enum drm_mode_status
897intel_tv_mode_valid(struct drm_connector *connector, 849intel_tv_mode_valid(struct drm_connector *connector,
898 struct drm_display_mode *mode) 850 struct drm_display_mode *mode)
899{ 851{
900 struct intel_tv *intel_tv = intel_attached_tv(connector); 852 const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
901 const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
902 int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; 853 int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
903 854
904 if (mode->clock > max_dotclk) 855 if (mode->clock > max_dotclk)
@@ -925,8 +876,7 @@ intel_tv_compute_config(struct intel_encoder *encoder,
925 struct intel_crtc_state *pipe_config, 876 struct intel_crtc_state *pipe_config,
926 struct drm_connector_state *conn_state) 877 struct drm_connector_state *conn_state)
927{ 878{
928 struct intel_tv *intel_tv = enc_to_tv(encoder); 879 const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
929 const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
930 880
931 if (!tv_mode) 881 if (!tv_mode)
932 return false; 882 return false;
@@ -1032,7 +982,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
1032 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 982 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
1033 struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); 983 struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
1034 struct intel_tv *intel_tv = enc_to_tv(encoder); 984 struct intel_tv *intel_tv = enc_to_tv(encoder);
1035 const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); 985 const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
1036 u32 tv_ctl; 986 u32 tv_ctl;
1037 u32 scctl1, scctl2, scctl3; 987 u32 scctl1, scctl2, scctl3;
1038 int i, j; 988 int i, j;
@@ -1135,12 +1085,12 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
1135 else 1085 else
1136 ysize = 2*tv_mode->nbr_end + 1; 1086 ysize = 2*tv_mode->nbr_end + 1;
1137 1087
1138 xpos += intel_tv->margin[TV_MARGIN_LEFT]; 1088 xpos += conn_state->tv.margins.left;
1139 ypos += intel_tv->margin[TV_MARGIN_TOP]; 1089 ypos += conn_state->tv.margins.top;
1140 xsize -= (intel_tv->margin[TV_MARGIN_LEFT] + 1090 xsize -= (conn_state->tv.margins.left +
1141 intel_tv->margin[TV_MARGIN_RIGHT]); 1091 conn_state->tv.margins.right);
1142 ysize -= (intel_tv->margin[TV_MARGIN_TOP] + 1092 ysize -= (conn_state->tv.margins.top +
1143 intel_tv->margin[TV_MARGIN_BOTTOM]); 1093 conn_state->tv.margins.bottom);
1144 I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos); 1094 I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
1145 I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); 1095 I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
1146 1096
@@ -1288,7 +1238,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
1288static void intel_tv_find_better_format(struct drm_connector *connector) 1238static void intel_tv_find_better_format(struct drm_connector *connector)
1289{ 1239{
1290 struct intel_tv *intel_tv = intel_attached_tv(connector); 1240 struct intel_tv *intel_tv = intel_attached_tv(connector);
1291 const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); 1241 const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
1292 int i; 1242 int i;
1293 1243
1294 if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == 1244 if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
@@ -1304,9 +1254,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
1304 break; 1254 break;
1305 } 1255 }
1306 1256
1307 intel_tv->tv_format = tv_mode->name; 1257 connector->state->tv.mode = i;
1308 drm_object_property_set_value(&connector->base,
1309 connector->dev->mode_config.tv_mode_property, i);
1310} 1258}
1311 1259
1312/** 1260/**
@@ -1347,16 +1295,15 @@ intel_tv_detect(struct drm_connector *connector,
1347 connector_status_connected; 1295 connector_status_connected;
1348 } else 1296 } else
1349 status = connector_status_unknown; 1297 status = connector_status_unknown;
1350 } else
1351 return connector->status;
1352 1298
1353 if (status != connector_status_connected) 1299 if (status == connector_status_connected) {
1354 return status; 1300 intel_tv->type = type;
1355 1301 intel_tv_find_better_format(connector);
1356 intel_tv->type = type; 1302 }
1357 intel_tv_find_better_format(connector);
1358 1303
1359 return connector_status_connected; 1304 return status;
1305 } else
1306 return connector->status;
1360} 1307}
1361 1308
1362static const struct input_res { 1309static const struct input_res {
@@ -1376,12 +1323,9 @@ static const struct input_res {
1376 * Chose preferred mode according to line number of TV format 1323 * Chose preferred mode according to line number of TV format
1377 */ 1324 */
1378static void 1325static void
1379intel_tv_chose_preferred_modes(struct drm_connector *connector, 1326intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode,
1380 struct drm_display_mode *mode_ptr) 1327 struct drm_display_mode *mode_ptr)
1381{ 1328{
1382 struct intel_tv *intel_tv = intel_attached_tv(connector);
1383 const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
1384
1385 if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) 1329 if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480)
1386 mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; 1330 mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
1387 else if (tv_mode->nbr_end > 480) { 1331 else if (tv_mode->nbr_end > 480) {
@@ -1404,8 +1348,7 @@ static int
1404intel_tv_get_modes(struct drm_connector *connector) 1348intel_tv_get_modes(struct drm_connector *connector)
1405{ 1349{
1406 struct drm_display_mode *mode_ptr; 1350 struct drm_display_mode *mode_ptr;
1407 struct intel_tv *intel_tv = intel_attached_tv(connector); 1351 const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
1408 const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
1409 int j, count = 0; 1352 int j, count = 0;
1410 u64 tmp; 1353 u64 tmp;
1411 1354
@@ -1448,7 +1391,7 @@ intel_tv_get_modes(struct drm_connector *connector)
1448 mode_ptr->clock = (int) tmp; 1391 mode_ptr->clock = (int) tmp;
1449 1392
1450 mode_ptr->type = DRM_MODE_TYPE_DRIVER; 1393 mode_ptr->type = DRM_MODE_TYPE_DRIVER;
1451 intel_tv_chose_preferred_modes(connector, mode_ptr); 1394 intel_tv_choose_preferred_modes(tv_mode, mode_ptr);
1452 drm_mode_probed_add(connector, mode_ptr); 1395 drm_mode_probed_add(connector, mode_ptr);
1453 count++; 1396 count++;
1454 } 1397 }
@@ -1463,74 +1406,47 @@ intel_tv_destroy(struct drm_connector *connector)
1463 kfree(connector); 1406 kfree(connector);
1464} 1407}
1465 1408
1466
1467static int
1468intel_tv_set_property(struct drm_connector *connector, struct drm_property *property,
1469 uint64_t val)
1470{
1471 struct drm_device *dev = connector->dev;
1472 struct intel_tv *intel_tv = intel_attached_tv(connector);
1473 struct drm_crtc *crtc = intel_tv->base.base.crtc;
1474 int ret = 0;
1475 bool changed = false;
1476
1477 ret = drm_object_property_set_value(&connector->base, property, val);
1478 if (ret < 0)
1479 goto out;
1480
1481 if (property == dev->mode_config.tv_left_margin_property &&
1482 intel_tv->margin[TV_MARGIN_LEFT] != val) {
1483 intel_tv->margin[TV_MARGIN_LEFT] = val;
1484 changed = true;
1485 } else if (property == dev->mode_config.tv_right_margin_property &&
1486 intel_tv->margin[TV_MARGIN_RIGHT] != val) {
1487 intel_tv->margin[TV_MARGIN_RIGHT] = val;
1488 changed = true;
1489 } else if (property == dev->mode_config.tv_top_margin_property &&
1490 intel_tv->margin[TV_MARGIN_TOP] != val) {
1491 intel_tv->margin[TV_MARGIN_TOP] = val;
1492 changed = true;
1493 } else if (property == dev->mode_config.tv_bottom_margin_property &&
1494 intel_tv->margin[TV_MARGIN_BOTTOM] != val) {
1495 intel_tv->margin[TV_MARGIN_BOTTOM] = val;
1496 changed = true;
1497 } else if (property == dev->mode_config.tv_mode_property) {
1498 if (val >= ARRAY_SIZE(tv_modes)) {
1499 ret = -EINVAL;
1500 goto out;
1501 }
1502 if (!strcmp(intel_tv->tv_format, tv_modes[val].name))
1503 goto out;
1504
1505 intel_tv->tv_format = tv_modes[val].name;
1506 changed = true;
1507 } else {
1508 ret = -EINVAL;
1509 goto out;
1510 }
1511
1512 if (changed && crtc)
1513 intel_crtc_restore_mode(crtc);
1514out:
1515 return ret;
1516}
1517
1518static const struct drm_connector_funcs intel_tv_connector_funcs = { 1409static const struct drm_connector_funcs intel_tv_connector_funcs = {
1519 .dpms = drm_atomic_helper_connector_dpms, 1410 .dpms = drm_atomic_helper_connector_dpms,
1520 .late_register = intel_connector_register, 1411 .late_register = intel_connector_register,
1521 .early_unregister = intel_connector_unregister, 1412 .early_unregister = intel_connector_unregister,
1522 .destroy = intel_tv_destroy, 1413 .destroy = intel_tv_destroy,
1523 .set_property = intel_tv_set_property, 1414 .set_property = drm_atomic_helper_connector_set_property,
1524 .atomic_get_property = intel_connector_atomic_get_property,
1525 .fill_modes = drm_helper_probe_single_connector_modes, 1415 .fill_modes = drm_helper_probe_single_connector_modes,
1526 .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, 1416 .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
1527 .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, 1417 .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
1528}; 1418};
1529 1419
1420static int intel_tv_atomic_check(struct drm_connector *connector,
1421 struct drm_connector_state *new_state)
1422{
1423 struct drm_crtc_state *new_crtc_state;
1424 struct drm_connector_state *old_state;
1425
1426 if (!new_state->crtc)
1427 return 0;
1428
1429 old_state = drm_atomic_get_old_connector_state(new_state->state, connector);
1430 new_crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc);
1431
1432 if (old_state->tv.mode != new_state->tv.mode ||
1433 old_state->tv.margins.left != new_state->tv.margins.left ||
1434 old_state->tv.margins.right != new_state->tv.margins.right ||
1435 old_state->tv.margins.top != new_state->tv.margins.top ||
1436 old_state->tv.margins.bottom != new_state->tv.margins.bottom) {
1437 /* Force a modeset. */
1438
1439 new_crtc_state->connectors_changed = true;
1440 }
1441
1442 return 0;
1443}
1444
1530static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = { 1445static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = {
1531 .detect_ctx = intel_tv_detect, 1446 .detect_ctx = intel_tv_detect,
1532 .mode_valid = intel_tv_mode_valid, 1447 .mode_valid = intel_tv_mode_valid,
1533 .get_modes = intel_tv_get_modes, 1448 .get_modes = intel_tv_get_modes,
1449 .atomic_check = intel_tv_atomic_check,
1534}; 1450};
1535 1451
1536static const struct drm_encoder_funcs intel_tv_enc_funcs = { 1452static const struct drm_encoder_funcs intel_tv_enc_funcs = {
@@ -1548,6 +1464,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
1548 u32 tv_dac_on, tv_dac_off, save_tv_dac; 1464 u32 tv_dac_on, tv_dac_off, save_tv_dac;
1549 const char *tv_format_names[ARRAY_SIZE(tv_modes)]; 1465 const char *tv_format_names[ARRAY_SIZE(tv_modes)];
1550 int i, initial_mode = 0; 1466 int i, initial_mode = 0;
1467 struct drm_connector_state *state;
1551 1468
1552 if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) 1469 if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED)
1553 return; 1470 return;
@@ -1593,6 +1510,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
1593 1510
1594 intel_encoder = &intel_tv->base; 1511 intel_encoder = &intel_tv->base;
1595 connector = &intel_connector->base; 1512 connector = &intel_connector->base;
1513 state = connector->state;
1596 1514
1597 /* The documentation, for the older chipsets at least, recommend 1515 /* The documentation, for the older chipsets at least, recommend
1598 * using a polling method rather than hotplug detection for TVs. 1516 * using a polling method rather than hotplug detection for TVs.
@@ -1630,12 +1548,12 @@ intel_tv_init(struct drm_i915_private *dev_priv)
1630 intel_tv->type = DRM_MODE_CONNECTOR_Unknown; 1548 intel_tv->type = DRM_MODE_CONNECTOR_Unknown;
1631 1549
1632 /* BIOS margin values */ 1550 /* BIOS margin values */
1633 intel_tv->margin[TV_MARGIN_LEFT] = 54; 1551 state->tv.margins.left = 54;
1634 intel_tv->margin[TV_MARGIN_TOP] = 36; 1552 state->tv.margins.top = 36;
1635 intel_tv->margin[TV_MARGIN_RIGHT] = 46; 1553 state->tv.margins.right = 46;
1636 intel_tv->margin[TV_MARGIN_BOTTOM] = 37; 1554 state->tv.margins.bottom = 37;
1637 1555
1638 intel_tv->tv_format = tv_modes[initial_mode].name; 1556 state->tv.mode = initial_mode;
1639 1557
1640 drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs); 1558 drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs);
1641 connector->interlace_allowed = false; 1559 connector->interlace_allowed = false;
@@ -1649,17 +1567,17 @@ intel_tv_init(struct drm_i915_private *dev_priv)
1649 tv_format_names); 1567 tv_format_names);
1650 1568
1651 drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property, 1569 drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
1652 initial_mode); 1570 state->tv.mode);
1653 drm_object_attach_property(&connector->base, 1571 drm_object_attach_property(&connector->base,
1654 dev->mode_config.tv_left_margin_property, 1572 dev->mode_config.tv_left_margin_property,
1655 intel_tv->margin[TV_MARGIN_LEFT]); 1573 state->tv.margins.left);
1656 drm_object_attach_property(&connector->base, 1574 drm_object_attach_property(&connector->base,
1657 dev->mode_config.tv_top_margin_property, 1575 dev->mode_config.tv_top_margin_property,
1658 intel_tv->margin[TV_MARGIN_TOP]); 1576 state->tv.margins.top);
1659 drm_object_attach_property(&connector->base, 1577 drm_object_attach_property(&connector->base,
1660 dev->mode_config.tv_right_margin_property, 1578 dev->mode_config.tv_right_margin_property,
1661 intel_tv->margin[TV_MARGIN_RIGHT]); 1579 state->tv.margins.right);
1662 drm_object_attach_property(&connector->base, 1580 drm_object_attach_property(&connector->base,
1663 dev->mode_config.tv_bottom_margin_property, 1581 dev->mode_config.tv_bottom_margin_property,
1664 intel_tv->margin[TV_MARGIN_BOTTOM]); 1582 state->tv.margins.bottom);
1665} 1583}
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index c117424f1f50..7a7b07de28a3 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -94,12 +94,22 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
94 i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv); 94 i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv);
95} 95}
96 96
97static void guc_write_irq_trigger(struct intel_guc *guc)
98{
99 struct drm_i915_private *dev_priv = guc_to_i915(guc);
100
101 I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
102}
103
97void intel_uc_init_early(struct drm_i915_private *dev_priv) 104void intel_uc_init_early(struct drm_i915_private *dev_priv)
98{ 105{
99 struct intel_guc *guc = &dev_priv->guc; 106 struct intel_guc *guc = &dev_priv->guc;
100 107
108 intel_guc_ct_init_early(&guc->ct);
109
101 mutex_init(&guc->send_mutex); 110 mutex_init(&guc->send_mutex);
102 guc->send = intel_guc_send_mmio; 111 guc->send = intel_guc_send_nop;
112 guc->notify = guc_write_irq_trigger;
103} 113}
104 114
105static void fetch_uc_fw(struct drm_i915_private *dev_priv, 115static void fetch_uc_fw(struct drm_i915_private *dev_priv,
@@ -252,13 +262,81 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
252 __intel_uc_fw_fini(&dev_priv->huc.fw); 262 __intel_uc_fw_fini(&dev_priv->huc.fw);
253} 263}
254 264
265static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
266{
267 GEM_BUG_ON(!guc->send_regs.base);
268 GEM_BUG_ON(!guc->send_regs.count);
269 GEM_BUG_ON(i >= guc->send_regs.count);
270
271 return _MMIO(guc->send_regs.base + 4 * i);
272}
273
274static void guc_init_send_regs(struct intel_guc *guc)
275{
276 struct drm_i915_private *dev_priv = guc_to_i915(guc);
277 enum forcewake_domains fw_domains = 0;
278 unsigned int i;
279
280 guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
281 guc->send_regs.count = SOFT_SCRATCH_COUNT - 1;
282
283 for (i = 0; i < guc->send_regs.count; i++) {
284 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
285 guc_send_reg(guc, i),
286 FW_REG_READ | FW_REG_WRITE);
287 }
288 guc->send_regs.fw_domains = fw_domains;
289}
290
291static void guc_capture_load_err_log(struct intel_guc *guc)
292{
293 if (!guc->log.vma || i915.guc_log_level < 0)
294 return;
295
296 if (!guc->load_err_log)
297 guc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
298
299 return;
300}
301
302static void guc_free_load_err_log(struct intel_guc *guc)
303{
304 if (guc->load_err_log)
305 i915_gem_object_put(guc->load_err_log);
306}
307
308static int guc_enable_communication(struct intel_guc *guc)
309{
310 struct drm_i915_private *dev_priv = guc_to_i915(guc);
311
312 guc_init_send_regs(guc);
313
314 if (HAS_GUC_CT(dev_priv))
315 return intel_guc_enable_ct(guc);
316
317 guc->send = intel_guc_send_mmio;
318 return 0;
319}
320
321static void guc_disable_communication(struct intel_guc *guc)
322{
323 struct drm_i915_private *dev_priv = guc_to_i915(guc);
324
325 if (HAS_GUC_CT(dev_priv))
326 intel_guc_disable_ct(guc);
327
328 guc->send = intel_guc_send_nop;
329}
330
255int intel_uc_init_hw(struct drm_i915_private *dev_priv) 331int intel_uc_init_hw(struct drm_i915_private *dev_priv)
256{ 332{
333 struct intel_guc *guc = &dev_priv->guc;
257 int ret, attempts; 334 int ret, attempts;
258 335
259 if (!i915.enable_guc_loading) 336 if (!i915.enable_guc_loading)
260 return 0; 337 return 0;
261 338
339 guc_disable_communication(guc);
262 gen9_reset_guc_interrupts(dev_priv); 340 gen9_reset_guc_interrupts(dev_priv);
263 341
264 /* We need to notify the guc whenever we change the GGTT */ 342 /* We need to notify the guc whenever we change the GGTT */
@@ -274,6 +352,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
274 goto err_guc; 352 goto err_guc;
275 } 353 }
276 354
355 /* init WOPCM */
356 I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
357 I915_WRITE(DMA_GUC_WOPCM_OFFSET,
358 GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
359
277 /* WaEnableuKernelHeaderValidFix:skl */ 360 /* WaEnableuKernelHeaderValidFix:skl */
278 /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ 361 /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
279 if (IS_GEN9(dev_priv)) 362 if (IS_GEN9(dev_priv))
@@ -301,7 +384,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
301 384
302 /* Did we succeded or run out of retries? */ 385 /* Did we succeded or run out of retries? */
303 if (ret) 386 if (ret)
304 goto err_submission; 387 goto err_log_capture;
388
389 ret = guc_enable_communication(guc);
390 if (ret)
391 goto err_log_capture;
305 392
306 intel_guc_auth_huc(dev_priv); 393 intel_guc_auth_huc(dev_priv);
307 if (i915.enable_guc_submission) { 394 if (i915.enable_guc_submission) {
@@ -325,7 +412,10 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
325 * marks the GPU as wedged until reset). 412 * marks the GPU as wedged until reset).
326 */ 413 */
327err_interrupts: 414err_interrupts:
415 guc_disable_communication(guc);
328 gen9_disable_guc_interrupts(dev_priv); 416 gen9_disable_guc_interrupts(dev_priv);
417err_log_capture:
418 guc_capture_load_err_log(guc);
329err_submission: 419err_submission:
330 if (i915.enable_guc_submission) 420 if (i915.enable_guc_submission)
331 i915_guc_submission_fini(dev_priv); 421 i915_guc_submission_fini(dev_priv);
@@ -351,25 +441,25 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
351 if (!i915.enable_guc_loading) 441 if (!i915.enable_guc_loading)
352 return; 442 return;
353 443
354 if (i915.enable_guc_submission) { 444 guc_free_load_err_log(&dev_priv->guc);
445
446 if (i915.enable_guc_submission)
355 i915_guc_submission_disable(dev_priv); 447 i915_guc_submission_disable(dev_priv);
448
449 guc_disable_communication(&dev_priv->guc);
450
451 if (i915.enable_guc_submission) {
356 gen9_disable_guc_interrupts(dev_priv); 452 gen9_disable_guc_interrupts(dev_priv);
357 i915_guc_submission_fini(dev_priv); 453 i915_guc_submission_fini(dev_priv);
358 } 454 }
455
359 i915_ggtt_disable_guc(dev_priv); 456 i915_ggtt_disable_guc(dev_priv);
360} 457}
361 458
362/* 459int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
363 * Read GuC command/status register (SOFT_SCRATCH_0)
364 * Return true if it contains a response rather than a command
365 */
366static bool guc_recv(struct intel_guc *guc, u32 *status)
367{ 460{
368 struct drm_i915_private *dev_priv = guc_to_i915(guc); 461 WARN(1, "Unexpected send: action=%#x\n", *action);
369 462 return -ENODEV;
370 u32 val = I915_READ(SOFT_SCRATCH(0));
371 *status = val;
372 return INTEL_GUC_RECV_IS_RESPONSE(val);
373} 463}
374 464
375/* 465/*
@@ -382,30 +472,33 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
382 int i; 472 int i;
383 int ret; 473 int ret;
384 474
385 if (WARN_ON(len < 1 || len > 15)) 475 GEM_BUG_ON(!len);
386 return -EINVAL; 476 GEM_BUG_ON(len > guc->send_regs.count);
387 477
388 mutex_lock(&guc->send_mutex); 478 /* If CT is available, we expect to use MMIO only during init/fini */
389 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); 479 GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
480 *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
481 *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
390 482
391 dev_priv->guc.action_count += 1; 483 mutex_lock(&guc->send_mutex);
392 dev_priv->guc.action_cmd = action[0]; 484 intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
393 485
394 for (i = 0; i < len; i++) 486 for (i = 0; i < len; i++)
395 I915_WRITE(SOFT_SCRATCH(i), action[i]); 487 I915_WRITE(guc_send_reg(guc, i), action[i]);
396 488
397 POSTING_READ(SOFT_SCRATCH(i - 1)); 489 POSTING_READ(guc_send_reg(guc, i - 1));
398 490
399 I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); 491 intel_guc_notify(guc);
400 492
401 /* 493 /*
402 * Fast commands should complete in less than 10us, so sample quickly 494 * No GuC command should ever take longer than 10ms.
403 * up to that length of time, then switch to a slower sleep-wait loop. 495 * Fast commands should still complete in 10us.
404 * No inte_guc_send command should ever take longer than 10ms.
405 */ 496 */
406 ret = wait_for_us(guc_recv(guc, &status), 10); 497 ret = __intel_wait_for_register_fw(dev_priv,
407 if (ret) 498 guc_send_reg(guc, 0),
408 ret = wait_for(guc_recv(guc, &status), 10); 499 INTEL_GUC_RECV_MASK,
500 INTEL_GUC_RECV_MASK,
501 10, 10, &status);
409 if (status != INTEL_GUC_STATUS_SUCCESS) { 502 if (status != INTEL_GUC_STATUS_SUCCESS) {
410 /* 503 /*
411 * Either the GuC explicitly returned an error (which 504 * Either the GuC explicitly returned an error (which
@@ -418,13 +511,9 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
418 DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" 511 DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
419 " ret=%d status=0x%08X response=0x%08X\n", 512 " ret=%d status=0x%08X response=0x%08X\n",
420 action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); 513 action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
421
422 dev_priv->guc.action_fail += 1;
423 dev_priv->guc.action_err = ret;
424 } 514 }
425 dev_priv->guc.action_status = status;
426 515
427 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); 516 intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains);
428 mutex_unlock(&guc->send_mutex); 517 mutex_unlock(&guc->send_mutex);
429 518
430 return ret; 519 return ret;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 4b7f73aeddac..69daf4c01cd0 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -27,7 +27,7 @@
27#include "intel_guc_fwif.h" 27#include "intel_guc_fwif.h"
28#include "i915_guc_reg.h" 28#include "i915_guc_reg.h"
29#include "intel_ringbuffer.h" 29#include "intel_ringbuffer.h"
30 30#include "intel_guc_ct.h"
31#include "i915_vma.h" 31#include "i915_vma.h"
32 32
33struct drm_i915_gem_request; 33struct drm_i915_gem_request;
@@ -59,12 +59,6 @@ struct drm_i915_gem_request;
59 * available in the work queue (note, the queue is shared, 59 * available in the work queue (note, the queue is shared,
60 * not per-engine). It is OK for this to be nonzero, but 60 * not per-engine). It is OK for this to be nonzero, but
61 * it should not be huge! 61 * it should not be huge!
62 * q_fail: failed to enqueue a work item. This should never happen,
63 * because we check for space beforehand.
64 * b_fail: failed to ring the doorbell. This should never happen, unless
65 * somehow the hardware misbehaves, or maybe if the GuC firmware
66 * crashes? We probably need to reset the GPU to recover.
67 * retcode: errno from last guc_submit()
68 */ 62 */
69struct i915_guc_client { 63struct i915_guc_client {
70 struct i915_vma *vma; 64 struct i915_vma *vma;
@@ -87,8 +81,6 @@ struct i915_guc_client {
87 uint32_t wq_tail; 81 uint32_t wq_tail;
88 uint32_t wq_rsvd; 82 uint32_t wq_rsvd;
89 uint32_t no_wq_space; 83 uint32_t no_wq_space;
90 uint32_t b_fail;
91 int retcode;
92 84
93 /* Per-engine counts of GuC submissions */ 85 /* Per-engine counts of GuC submissions */
94 uint64_t submissions[I915_NUM_ENGINES]; 86 uint64_t submissions[I915_NUM_ENGINES];
@@ -181,6 +173,10 @@ struct intel_guc_log {
181struct intel_guc { 173struct intel_guc {
182 struct intel_uc_fw fw; 174 struct intel_uc_fw fw;
183 struct intel_guc_log log; 175 struct intel_guc_log log;
176 struct intel_guc_ct ct;
177
178 /* Log snapshot if GuC errors during load */
179 struct drm_i915_gem_object *load_err_log;
184 180
185 /* intel_guc_recv interrupt related state */ 181 /* intel_guc_recv interrupt related state */
186 bool interrupts_enabled; 182 bool interrupts_enabled;
@@ -195,21 +191,21 @@ struct intel_guc {
195 DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); 191 DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
196 uint32_t db_cacheline; /* Cyclic counter mod pagesize */ 192 uint32_t db_cacheline; /* Cyclic counter mod pagesize */
197 193
198 /* Action status & statistics */ 194 /* GuC's FW specific registers used in MMIO send */
199 uint64_t action_count; /* Total commands issued */ 195 struct {
200 uint32_t action_cmd; /* Last command word */ 196 u32 base;
201 uint32_t action_status; /* Last return status */ 197 unsigned int count;
202 uint32_t action_fail; /* Total number of failures */ 198 enum forcewake_domains fw_domains;
203 int32_t action_err; /* Last error code */ 199 } send_regs;
204
205 uint64_t submissions[I915_NUM_ENGINES];
206 uint32_t last_seqno[I915_NUM_ENGINES];
207 200
208 /* To serialize the intel_guc_send actions */ 201 /* To serialize the intel_guc_send actions */
209 struct mutex send_mutex; 202 struct mutex send_mutex;
210 203
211 /* GuC's FW specific send function */ 204 /* GuC's FW specific send function */
212 int (*send)(struct intel_guc *guc, const u32 *data, u32 len); 205 int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
206
207 /* GuC's FW specific notify function */
208 void (*notify)(struct intel_guc *guc);
213}; 209};
214 210
215struct intel_huc { 211struct intel_huc {
@@ -227,12 +223,19 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
227int intel_uc_init_hw(struct drm_i915_private *dev_priv); 223int intel_uc_init_hw(struct drm_i915_private *dev_priv);
228void intel_uc_fini_hw(struct drm_i915_private *dev_priv); 224void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
229int intel_guc_sample_forcewake(struct intel_guc *guc); 225int intel_guc_sample_forcewake(struct intel_guc *guc);
226int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
230int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); 227int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
228
231static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) 229static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
232{ 230{
233 return guc->send(guc, action, len); 231 return guc->send(guc, action, len);
234} 232}
235 233
234static inline void intel_guc_notify(struct intel_guc *guc)
235{
236 guc->notify(guc);
237}
238
236/* intel_guc_loader.c */ 239/* intel_guc_loader.c */
237int intel_guc_select_fw(struct intel_guc *guc); 240int intel_guc_select_fw(struct intel_guc *guc);
238int intel_guc_init_hw(struct intel_guc *guc); 241int intel_guc_init_hw(struct intel_guc *guc);
@@ -266,7 +269,7 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
266 269
267/* intel_huc.c */ 270/* intel_huc.c */
268void intel_huc_select_fw(struct intel_huc *huc); 271void intel_huc_select_fw(struct intel_huc *huc);
269int intel_huc_init_hw(struct intel_huc *huc); 272void intel_huc_init_hw(struct intel_huc *huc);
270void intel_guc_auth_huc(struct drm_i915_private *dev_priv); 273void intel_guc_auth_huc(struct drm_i915_private *dev_priv);
271 274
272#endif 275#endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 6d1ea26b2493..47d7ee1b5d86 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -29,6 +29,7 @@
29#include <linux/pm_runtime.h> 29#include <linux/pm_runtime.h>
30 30
31#define FORCEWAKE_ACK_TIMEOUT_MS 50 31#define FORCEWAKE_ACK_TIMEOUT_MS 50
32#define GT_FIFO_TIMEOUT_MS 10
32 33
33#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__)) 34#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__))
34 35
@@ -172,22 +173,6 @@ static void fw_domains_get_with_thread_status(struct drm_i915_private *dev_priv,
172 __gen6_gt_wait_for_thread_c0(dev_priv); 173 __gen6_gt_wait_for_thread_c0(dev_priv);
173} 174}
174 175
175static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
176{
177 u32 gtfifodbg;
178
179 gtfifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
180 if (WARN(gtfifodbg, "GT wake FIFO error 0x%x\n", gtfifodbg))
181 __raw_i915_write32(dev_priv, GTFIFODBG, gtfifodbg);
182}
183
184static void fw_domains_put_with_fifo(struct drm_i915_private *dev_priv,
185 enum forcewake_domains fw_domains)
186{
187 fw_domains_put(dev_priv, fw_domains);
188 gen6_gt_check_fifodbg(dev_priv);
189}
190
191static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv) 176static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
192{ 177{
193 u32 count = __raw_i915_read32(dev_priv, GTFIFOCTL); 178 u32 count = __raw_i915_read32(dev_priv, GTFIFOCTL);
@@ -195,30 +180,27 @@ static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
195 return count & GT_FIFO_FREE_ENTRIES_MASK; 180 return count & GT_FIFO_FREE_ENTRIES_MASK;
196} 181}
197 182
198static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) 183static void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
199{ 184{
200 int ret = 0; 185 u32 n;
201 186
202 /* On VLV, FIFO will be shared by both SW and HW. 187 /* On VLV, FIFO will be shared by both SW and HW.
203 * So, we need to read the FREE_ENTRIES everytime */ 188 * So, we need to read the FREE_ENTRIES everytime */
204 if (IS_VALLEYVIEW(dev_priv)) 189 if (IS_VALLEYVIEW(dev_priv))
205 dev_priv->uncore.fifo_count = fifo_free_entries(dev_priv); 190 n = fifo_free_entries(dev_priv);
206 191 else
207 if (dev_priv->uncore.fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) { 192 n = dev_priv->uncore.fifo_count;
208 int loop = 500; 193
209 u32 fifo = fifo_free_entries(dev_priv); 194 if (n <= GT_FIFO_NUM_RESERVED_ENTRIES) {
210 195 if (wait_for_atomic((n = fifo_free_entries(dev_priv)) >
211 while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { 196 GT_FIFO_NUM_RESERVED_ENTRIES,
212 udelay(10); 197 GT_FIFO_TIMEOUT_MS)) {
213 fifo = fifo_free_entries(dev_priv); 198 DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n);
199 return;
214 } 200 }
215 if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
216 ++ret;
217 dev_priv->uncore.fifo_count = fifo;
218 } 201 }
219 dev_priv->uncore.fifo_count--;
220 202
221 return ret; 203 dev_priv->uncore.fifo_count = n - 1;
222} 204}
223 205
224static enum hrtimer_restart 206static enum hrtimer_restart
@@ -232,6 +214,9 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
232 214
233 assert_rpm_device_not_suspended(dev_priv); 215 assert_rpm_device_not_suspended(dev_priv);
234 216
217 if (xchg(&domain->active, false))
218 return HRTIMER_RESTART;
219
235 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 220 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
236 if (WARN_ON(domain->wake_count == 0)) 221 if (WARN_ON(domain->wake_count == 0))
237 domain->wake_count++; 222 domain->wake_count++;
@@ -262,6 +247,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
262 active_domains = 0; 247 active_domains = 0;
263 248
264 for_each_fw_domain(domain, dev_priv, tmp) { 249 for_each_fw_domain(domain, dev_priv, tmp) {
250 smp_store_mb(domain->active, false);
265 if (hrtimer_cancel(&domain->timer) == 0) 251 if (hrtimer_cancel(&domain->timer) == 0)
266 continue; 252 continue;
267 253
@@ -384,15 +370,35 @@ vlv_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
384} 370}
385 371
386static bool 372static bool
373gen6_check_for_fifo_debug(struct drm_i915_private *dev_priv)
374{
375 u32 fifodbg;
376
377 fifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
378
379 if (unlikely(fifodbg)) {
380 DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg);
381 __raw_i915_write32(dev_priv, GTFIFODBG, fifodbg);
382 }
383
384 return fifodbg;
385}
386
387static bool
387check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) 388check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
388{ 389{
390 bool ret = false;
391
389 if (HAS_FPGA_DBG_UNCLAIMED(dev_priv)) 392 if (HAS_FPGA_DBG_UNCLAIMED(dev_priv))
390 return fpga_check_for_unclaimed_mmio(dev_priv); 393 ret |= fpga_check_for_unclaimed_mmio(dev_priv);
391 394
392 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 395 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
393 return vlv_check_for_unclaimed_mmio(dev_priv); 396 ret |= vlv_check_for_unclaimed_mmio(dev_priv);
394 397
395 return false; 398 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
399 ret |= gen6_check_for_fifo_debug(dev_priv);
400
401 return ret;
396} 402}
397 403
398static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, 404static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
@@ -404,11 +410,6 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
404 if (check_for_unclaimed_mmio(dev_priv)) 410 if (check_for_unclaimed_mmio(dev_priv))
405 DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); 411 DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
406 412
407 /* clear out old GT FIFO errors */
408 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
409 __raw_i915_write32(dev_priv, GTFIFODBG,
410 __raw_i915_read32(dev_priv, GTFIFODBG));
411
412 /* WaDisableShadowRegForCpd:chv */ 413 /* WaDisableShadowRegForCpd:chv */
413 if (IS_CHERRYVIEW(dev_priv)) { 414 if (IS_CHERRYVIEW(dev_priv)) {
414 __raw_i915_write32(dev_priv, GTFIFOCTL, 415 __raw_i915_write32(dev_priv, GTFIFOCTL,
@@ -454,9 +455,12 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
454 455
455 fw_domains &= dev_priv->uncore.fw_domains; 456 fw_domains &= dev_priv->uncore.fw_domains;
456 457
457 for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) 458 for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) {
458 if (domain->wake_count++) 459 if (domain->wake_count++) {
459 fw_domains &= ~domain->mask; 460 fw_domains &= ~domain->mask;
461 domain->active = true;
462 }
463 }
460 464
461 if (fw_domains) 465 if (fw_domains)
462 dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); 466 dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
@@ -521,8 +525,10 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
521 if (WARN_ON(domain->wake_count == 0)) 525 if (WARN_ON(domain->wake_count == 0))
522 continue; 526 continue;
523 527
524 if (--domain->wake_count) 528 if (--domain->wake_count) {
529 domain->active = true;
525 continue; 530 continue;
531 }
526 532
527 fw_domain_arm_timer(domain); 533 fw_domain_arm_timer(domain);
528 } 534 }
@@ -804,6 +810,18 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
804 __unclaimed_reg_debug(dev_priv, reg, read, before); 810 __unclaimed_reg_debug(dev_priv, reg, read, before);
805} 811}
806 812
813enum decoupled_power_domain {
814 GEN9_DECOUPLED_PD_BLITTER = 0,
815 GEN9_DECOUPLED_PD_RENDER,
816 GEN9_DECOUPLED_PD_MEDIA,
817 GEN9_DECOUPLED_PD_ALL
818};
819
820enum decoupled_ops {
821 GEN9_DECOUPLED_OP_WRITE = 0,
822 GEN9_DECOUPLED_OP_READ
823};
824
807static const enum decoupled_power_domain fw2dpd_domain[] = { 825static const enum decoupled_power_domain fw2dpd_domain[] = {
808 GEN9_DECOUPLED_PD_RENDER, 826 GEN9_DECOUPLED_PD_RENDER,
809 GEN9_DECOUPLED_PD_BLITTER, 827 GEN9_DECOUPLED_PD_BLITTER,
@@ -1047,15 +1065,10 @@ __gen2_write(32)
1047#define __gen6_write(x) \ 1065#define __gen6_write(x) \
1048static void \ 1066static void \
1049gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ 1067gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
1050 u32 __fifo_ret = 0; \
1051 GEN6_WRITE_HEADER; \ 1068 GEN6_WRITE_HEADER; \
1052 if (NEEDS_FORCE_WAKE(offset)) { \ 1069 if (NEEDS_FORCE_WAKE(offset)) \
1053 __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ 1070 __gen6_gt_wait_for_fifo(dev_priv); \
1054 } \
1055 __raw_i915_write##x(dev_priv, reg, val); \ 1071 __raw_i915_write##x(dev_priv, reg, val); \
1056 if (unlikely(__fifo_ret)) { \
1057 gen6_gt_check_fifodbg(dev_priv); \
1058 } \
1059 GEN6_WRITE_FOOTER; \ 1072 GEN6_WRITE_FOOTER; \
1060} 1073}
1061 1074
@@ -1108,19 +1121,19 @@ __gen6_write(32)
1108#undef GEN6_WRITE_FOOTER 1121#undef GEN6_WRITE_FOOTER
1109#undef GEN6_WRITE_HEADER 1122#undef GEN6_WRITE_HEADER
1110 1123
1111#define ASSIGN_WRITE_MMIO_VFUNCS(x) \ 1124#define ASSIGN_WRITE_MMIO_VFUNCS(i915, x) \
1112do { \ 1125do { \
1113 dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ 1126 (i915)->uncore.funcs.mmio_writeb = x##_write8; \
1114 dev_priv->uncore.funcs.mmio_writew = x##_write16; \ 1127 (i915)->uncore.funcs.mmio_writew = x##_write16; \
1115 dev_priv->uncore.funcs.mmio_writel = x##_write32; \ 1128 (i915)->uncore.funcs.mmio_writel = x##_write32; \
1116} while (0) 1129} while (0)
1117 1130
1118#define ASSIGN_READ_MMIO_VFUNCS(x) \ 1131#define ASSIGN_READ_MMIO_VFUNCS(i915, x) \
1119do { \ 1132do { \
1120 dev_priv->uncore.funcs.mmio_readb = x##_read8; \ 1133 (i915)->uncore.funcs.mmio_readb = x##_read8; \
1121 dev_priv->uncore.funcs.mmio_readw = x##_read16; \ 1134 (i915)->uncore.funcs.mmio_readw = x##_read16; \
1122 dev_priv->uncore.funcs.mmio_readl = x##_read32; \ 1135 (i915)->uncore.funcs.mmio_readl = x##_read32; \
1123 dev_priv->uncore.funcs.mmio_readq = x##_read64; \ 1136 (i915)->uncore.funcs.mmio_readq = x##_read64; \
1124} while (0) 1137} while (0)
1125 1138
1126 1139
@@ -1190,11 +1203,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
1190 FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9); 1203 FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
1191 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 1204 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
1192 dev_priv->uncore.funcs.force_wake_get = fw_domains_get; 1205 dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
1193 if (!IS_CHERRYVIEW(dev_priv)) 1206 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
1194 dev_priv->uncore.funcs.force_wake_put =
1195 fw_domains_put_with_fifo;
1196 else
1197 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
1198 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, 1207 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
1199 FORCEWAKE_VLV, FORCEWAKE_ACK_VLV); 1208 FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
1200 fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA, 1209 fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA,
@@ -1202,11 +1211,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
1202 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 1211 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
1203 dev_priv->uncore.funcs.force_wake_get = 1212 dev_priv->uncore.funcs.force_wake_get =
1204 fw_domains_get_with_thread_status; 1213 fw_domains_get_with_thread_status;
1205 if (IS_HASWELL(dev_priv)) 1214 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
1206 dev_priv->uncore.funcs.force_wake_put =
1207 fw_domains_put_with_fifo;
1208 else
1209 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
1210 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, 1215 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
1211 FORCEWAKE_MT, FORCEWAKE_ACK_HSW); 1216 FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
1212 } else if (IS_IVYBRIDGE(dev_priv)) { 1217 } else if (IS_IVYBRIDGE(dev_priv)) {
@@ -1223,8 +1228,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
1223 */ 1228 */
1224 dev_priv->uncore.funcs.force_wake_get = 1229 dev_priv->uncore.funcs.force_wake_get =
1225 fw_domains_get_with_thread_status; 1230 fw_domains_get_with_thread_status;
1226 dev_priv->uncore.funcs.force_wake_put = 1231 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
1227 fw_domains_put_with_fifo;
1228 1232
1229 /* We need to init first for ECOBUS access and then 1233 /* We need to init first for ECOBUS access and then
1230 * determine later if we want to reinit, in case of MT access is 1234 * determine later if we want to reinit, in case of MT access is
@@ -1242,7 +1246,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
1242 spin_lock_irq(&dev_priv->uncore.lock); 1246 spin_lock_irq(&dev_priv->uncore.lock);
1243 fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER); 1247 fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER);
1244 ecobus = __raw_i915_read32(dev_priv, ECOBUS); 1248 ecobus = __raw_i915_read32(dev_priv, ECOBUS);
1245 fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER); 1249 fw_domains_put(dev_priv, FORCEWAKE_RENDER);
1246 spin_unlock_irq(&dev_priv->uncore.lock); 1250 spin_unlock_irq(&dev_priv->uncore.lock);
1247 1251
1248 if (!(ecobus & FORCEWAKE_MT_ENABLE)) { 1252 if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
@@ -1254,8 +1258,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
1254 } else if (IS_GEN6(dev_priv)) { 1258 } else if (IS_GEN6(dev_priv)) {
1255 dev_priv->uncore.funcs.force_wake_get = 1259 dev_priv->uncore.funcs.force_wake_get =
1256 fw_domains_get_with_thread_status; 1260 fw_domains_get_with_thread_status;
1257 dev_priv->uncore.funcs.force_wake_put = 1261 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
1258 fw_domains_put_with_fifo;
1259 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, 1262 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
1260 FORCEWAKE, FORCEWAKE_ACK); 1263 FORCEWAKE, FORCEWAKE_ACK);
1261 } 1264 }
@@ -1310,34 +1313,34 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
1310 i915_pmic_bus_access_notifier; 1313 i915_pmic_bus_access_notifier;
1311 1314
1312 if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) { 1315 if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) {
1313 ASSIGN_WRITE_MMIO_VFUNCS(gen2); 1316 ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen2);
1314 ASSIGN_READ_MMIO_VFUNCS(gen2); 1317 ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen2);
1315 } else if (IS_GEN5(dev_priv)) { 1318 } else if (IS_GEN5(dev_priv)) {
1316 ASSIGN_WRITE_MMIO_VFUNCS(gen5); 1319 ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen5);
1317 ASSIGN_READ_MMIO_VFUNCS(gen5); 1320 ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen5);
1318 } else if (IS_GEN(dev_priv, 6, 7)) { 1321 } else if (IS_GEN(dev_priv, 6, 7)) {
1319 ASSIGN_WRITE_MMIO_VFUNCS(gen6); 1322 ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen6);
1320 1323
1321 if (IS_VALLEYVIEW(dev_priv)) { 1324 if (IS_VALLEYVIEW(dev_priv)) {
1322 ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges); 1325 ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges);
1323 ASSIGN_READ_MMIO_VFUNCS(fwtable); 1326 ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
1324 } else { 1327 } else {
1325 ASSIGN_READ_MMIO_VFUNCS(gen6); 1328 ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
1326 } 1329 }
1327 } else if (IS_GEN8(dev_priv)) { 1330 } else if (IS_GEN8(dev_priv)) {
1328 if (IS_CHERRYVIEW(dev_priv)) { 1331 if (IS_CHERRYVIEW(dev_priv)) {
1329 ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges); 1332 ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges);
1330 ASSIGN_WRITE_MMIO_VFUNCS(fwtable); 1333 ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
1331 ASSIGN_READ_MMIO_VFUNCS(fwtable); 1334 ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
1332 1335
1333 } else { 1336 } else {
1334 ASSIGN_WRITE_MMIO_VFUNCS(gen8); 1337 ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8);
1335 ASSIGN_READ_MMIO_VFUNCS(gen6); 1338 ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
1336 } 1339 }
1337 } else { 1340 } else {
1338 ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); 1341 ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
1339 ASSIGN_WRITE_MMIO_VFUNCS(fwtable); 1342 ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
1340 ASSIGN_READ_MMIO_VFUNCS(fwtable); 1343 ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
1341 if (HAS_DECOUPLED_MMIO(dev_priv)) { 1344 if (HAS_DECOUPLED_MMIO(dev_priv)) {
1342 dev_priv->uncore.funcs.mmio_readl = 1345 dev_priv->uncore.funcs.mmio_readl =
1343 gen9_decoupled_read32; 1346 gen9_decoupled_read32;
@@ -1353,8 +1356,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
1353 1356
1354 i915_check_and_clear_faults(dev_priv); 1357 i915_check_and_clear_faults(dev_priv);
1355} 1358}
1356#undef ASSIGN_WRITE_MMIO_VFUNCS
1357#undef ASSIGN_READ_MMIO_VFUNCS
1358 1359
1359void intel_uncore_fini(struct drm_i915_private *dev_priv) 1360void intel_uncore_fini(struct drm_i915_private *dev_priv)
1360{ 1361{
@@ -1435,9 +1436,39 @@ out:
1435 return ret; 1436 return ret;
1436} 1437}
1437 1438
1438static int i915_reset_complete(struct pci_dev *pdev) 1439static void gen3_stop_rings(struct drm_i915_private *dev_priv)
1440{
1441 struct intel_engine_cs *engine;
1442 enum intel_engine_id id;
1443
1444 for_each_engine(engine, dev_priv, id) {
1445 const u32 base = engine->mmio_base;
1446 const i915_reg_t mode = RING_MI_MODE(base);
1447
1448 I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
1449 if (intel_wait_for_register_fw(dev_priv,
1450 mode,
1451 MODE_IDLE,
1452 MODE_IDLE,
1453 500))
1454 DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n",
1455 engine->name);
1456
1457 I915_WRITE_FW(RING_CTL(base), 0);
1458 I915_WRITE_FW(RING_HEAD(base), 0);
1459 I915_WRITE_FW(RING_TAIL(base), 0);
1460
1461 /* Check acts as a post */
1462 if (I915_READ_FW(RING_HEAD(base)) != 0)
1463 DRM_DEBUG_DRIVER("%s: ring head not parked\n",
1464 engine->name);
1465 }
1466}
1467
1468static bool i915_reset_complete(struct pci_dev *pdev)
1439{ 1469{
1440 u8 gdrst; 1470 u8 gdrst;
1471
1441 pci_read_config_byte(pdev, I915_GDRST, &gdrst); 1472 pci_read_config_byte(pdev, I915_GDRST, &gdrst);
1442 return (gdrst & GRDOM_RESET_STATUS) == 0; 1473 return (gdrst & GRDOM_RESET_STATUS) == 0;
1443} 1474}
@@ -1448,15 +1479,16 @@ static int i915_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask
1448 1479
1449 /* assert reset for at least 20 usec */ 1480 /* assert reset for at least 20 usec */
1450 pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); 1481 pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
1451 udelay(20); 1482 usleep_range(50, 200);
1452 pci_write_config_byte(pdev, I915_GDRST, 0); 1483 pci_write_config_byte(pdev, I915_GDRST, 0);
1453 1484
1454 return wait_for(i915_reset_complete(pdev), 500); 1485 return wait_for(i915_reset_complete(pdev), 500);
1455} 1486}
1456 1487
1457static int g4x_reset_complete(struct pci_dev *pdev) 1488static bool g4x_reset_complete(struct pci_dev *pdev)
1458{ 1489{
1459 u8 gdrst; 1490 u8 gdrst;
1491
1460 pci_read_config_byte(pdev, I915_GDRST, &gdrst); 1492 pci_read_config_byte(pdev, I915_GDRST, &gdrst);
1461 return (gdrst & GRDOM_RESET_ENABLE) == 0; 1493 return (gdrst & GRDOM_RESET_ENABLE) == 0;
1462} 1494}
@@ -1464,6 +1496,10 @@ static int g4x_reset_complete(struct pci_dev *pdev)
1464static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) 1496static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
1465{ 1497{
1466 struct pci_dev *pdev = dev_priv->drm.pdev; 1498 struct pci_dev *pdev = dev_priv->drm.pdev;
1499
1500 /* Stop engines before we reset; see g4x_do_reset() below for why. */
1501 gen3_stop_rings(dev_priv);
1502
1467 pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); 1503 pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
1468 return wait_for(g4x_reset_complete(pdev), 500); 1504 return wait_for(g4x_reset_complete(pdev), 500);
1469} 1505}
@@ -1473,29 +1509,41 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
1473 struct pci_dev *pdev = dev_priv->drm.pdev; 1509 struct pci_dev *pdev = dev_priv->drm.pdev;
1474 int ret; 1510 int ret;
1475 1511
1476 pci_write_config_byte(pdev, I915_GDRST,
1477 GRDOM_RENDER | GRDOM_RESET_ENABLE);
1478 ret = wait_for(g4x_reset_complete(pdev), 500);
1479 if (ret)
1480 return ret;
1481
1482 /* WaVcpClkGateDisableForMediaReset:ctg,elk */ 1512 /* WaVcpClkGateDisableForMediaReset:ctg,elk */
1483 I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); 1513 I915_WRITE(VDECCLK_GATE_D,
1514 I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
1484 POSTING_READ(VDECCLK_GATE_D); 1515 POSTING_READ(VDECCLK_GATE_D);
1485 1516
1517 /* We stop engines, otherwise we might get failed reset and a
1518 * dead gpu (on elk).
1519 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
1520 */
1521 gen3_stop_rings(dev_priv);
1522
1486 pci_write_config_byte(pdev, I915_GDRST, 1523 pci_write_config_byte(pdev, I915_GDRST,
1487 GRDOM_MEDIA | GRDOM_RESET_ENABLE); 1524 GRDOM_MEDIA | GRDOM_RESET_ENABLE);
1488 ret = wait_for(g4x_reset_complete(pdev), 500); 1525 ret = wait_for(g4x_reset_complete(pdev), 500);
1489 if (ret) 1526 if (ret) {
1490 return ret; 1527 DRM_DEBUG_DRIVER("Wait for media reset failed\n");
1528 goto out;
1529 }
1491 1530
1492 /* WaVcpClkGateDisableForMediaReset:ctg,elk */ 1531 pci_write_config_byte(pdev, I915_GDRST,
1493 I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); 1532 GRDOM_RENDER | GRDOM_RESET_ENABLE);
1494 POSTING_READ(VDECCLK_GATE_D); 1533 ret = wait_for(g4x_reset_complete(pdev), 500);
1534 if (ret) {
1535 DRM_DEBUG_DRIVER("Wait for render reset failed\n");
1536 goto out;
1537 }
1495 1538
1539out:
1496 pci_write_config_byte(pdev, I915_GDRST, 0); 1540 pci_write_config_byte(pdev, I915_GDRST, 0);
1497 1541
1498 return 0; 1542 I915_WRITE(VDECCLK_GATE_D,
1543 I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
1544 POSTING_READ(VDECCLK_GATE_D);
1545
1546 return ret;
1499} 1547}
1500 1548
1501static int ironlake_do_reset(struct drm_i915_private *dev_priv, 1549static int ironlake_do_reset(struct drm_i915_private *dev_priv,
@@ -1503,41 +1551,51 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
1503{ 1551{
1504 int ret; 1552 int ret;
1505 1553
1506 I915_WRITE(ILK_GDSR, 1554 I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
1507 ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
1508 ret = intel_wait_for_register(dev_priv, 1555 ret = intel_wait_for_register(dev_priv,
1509 ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, 1556 ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
1510 500); 1557 500);
1511 if (ret) 1558 if (ret) {
1512 return ret; 1559 DRM_DEBUG_DRIVER("Wait for render reset failed\n");
1560 goto out;
1561 }
1513 1562
1514 I915_WRITE(ILK_GDSR, 1563 I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
1515 ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
1516 ret = intel_wait_for_register(dev_priv, 1564 ret = intel_wait_for_register(dev_priv,
1517 ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, 1565 ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
1518 500); 1566 500);
1519 if (ret) 1567 if (ret) {
1520 return ret; 1568 DRM_DEBUG_DRIVER("Wait for media reset failed\n");
1569 goto out;
1570 }
1521 1571
1572out:
1522 I915_WRITE(ILK_GDSR, 0); 1573 I915_WRITE(ILK_GDSR, 0);
1523 1574 POSTING_READ(ILK_GDSR);
1524 return 0; 1575 return ret;
1525} 1576}
1526 1577
1527/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ 1578/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
1528static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, 1579static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
1529 u32 hw_domain_mask) 1580 u32 hw_domain_mask)
1530{ 1581{
1582 int err;
1583
1531 /* GEN6_GDRST is not in the gt power well, no need to check 1584 /* GEN6_GDRST is not in the gt power well, no need to check
1532 * for fifo space for the write or forcewake the chip for 1585 * for fifo space for the write or forcewake the chip for
1533 * the read 1586 * the read
1534 */ 1587 */
1535 __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); 1588 __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
1536 1589
1537 /* Spin waiting for the device to ack the reset requests */ 1590 /* Wait for the device to ack the reset requests */
1538 return intel_wait_for_register_fw(dev_priv, 1591 err = intel_wait_for_register_fw(dev_priv,
1539 GEN6_GDRST, hw_domain_mask, 0, 1592 GEN6_GDRST, hw_domain_mask, 0,
1540 500); 1593 500);
1594 if (err)
1595 DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
1596 hw_domain_mask);
1597
1598 return err;
1541} 1599}
1542 1600
1543/** 1601/**
@@ -1585,19 +1643,23 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
1585} 1643}
1586 1644
1587/** 1645/**
1588 * intel_wait_for_register_fw - wait until register matches expected state 1646 * __intel_wait_for_register_fw - wait until register matches expected state
1589 * @dev_priv: the i915 device 1647 * @dev_priv: the i915 device
1590 * @reg: the register to read 1648 * @reg: the register to read
1591 * @mask: mask to apply to register value 1649 * @mask: mask to apply to register value
1592 * @value: expected value 1650 * @value: expected value
1593 * @timeout_ms: timeout in millisecond 1651 * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
1652 * @slow_timeout_ms: slow timeout in millisecond
1653 * @out_value: optional placeholder to hold registry value
1594 * 1654 *
1595 * This routine waits until the target register @reg contains the expected 1655 * This routine waits until the target register @reg contains the expected
1596 * @value after applying the @mask, i.e. it waits until :: 1656 * @value after applying the @mask, i.e. it waits until ::
1597 * 1657 *
1598 * (I915_READ_FW(reg) & mask) == value 1658 * (I915_READ_FW(reg) & mask) == value
1599 * 1659 *
1600 * Otherwise, the wait will timeout after @timeout_ms milliseconds. 1660 * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
1661 * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
1662 * must be not larger than 20,0000 microseconds.
1601 * 1663 *
1602 * Note that this routine assumes the caller holds forcewake asserted, it is 1664 * Note that this routine assumes the caller holds forcewake asserted, it is
1603 * not suitable for very long waits. See intel_wait_for_register() if you 1665 * not suitable for very long waits. See intel_wait_for_register() if you
@@ -1606,16 +1668,31 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
1606 * 1668 *
1607 * Returns 0 if the register matches the desired condition, or -ETIMEOUT. 1669 * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
1608 */ 1670 */
1609int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, 1671int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
1610 i915_reg_t reg, 1672 i915_reg_t reg,
1611 const u32 mask, 1673 u32 mask,
1612 const u32 value, 1674 u32 value,
1613 const unsigned long timeout_ms) 1675 unsigned int fast_timeout_us,
1614{ 1676 unsigned int slow_timeout_ms,
1615#define done ((I915_READ_FW(reg) & mask) == value) 1677 u32 *out_value)
1616 int ret = wait_for_us(done, 2); 1678{
1617 if (ret) 1679 u32 uninitialized_var(reg_value);
1618 ret = wait_for(done, timeout_ms); 1680#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
1681 int ret;
1682
1683 /* Catch any overuse of this function */
1684 might_sleep_if(slow_timeout_ms);
1685 GEM_BUG_ON(fast_timeout_us > 20000);
1686
1687 ret = -ETIMEDOUT;
1688 if (fast_timeout_us && fast_timeout_us <= 20000)
1689 ret = _wait_for_atomic(done, fast_timeout_us, 0);
1690 if (ret && slow_timeout_ms)
1691 ret = wait_for(done, slow_timeout_ms);
1692
1693 if (out_value)
1694 *out_value = reg_value;
1695
1619 return ret; 1696 return ret;
1620#undef done 1697#undef done
1621} 1698}
@@ -1639,18 +1716,26 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
1639 */ 1716 */
1640int intel_wait_for_register(struct drm_i915_private *dev_priv, 1717int intel_wait_for_register(struct drm_i915_private *dev_priv,
1641 i915_reg_t reg, 1718 i915_reg_t reg,
1642 const u32 mask, 1719 u32 mask,
1643 const u32 value, 1720 u32 value,
1644 const unsigned long timeout_ms) 1721 unsigned int timeout_ms)
1645{ 1722{
1646
1647 unsigned fw = 1723 unsigned fw =
1648 intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); 1724 intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
1649 int ret; 1725 int ret;
1650 1726
1651 intel_uncore_forcewake_get(dev_priv, fw); 1727 might_sleep();
1652 ret = wait_for_us((I915_READ_FW(reg) & mask) == value, 2); 1728
1653 intel_uncore_forcewake_put(dev_priv, fw); 1729 spin_lock_irq(&dev_priv->uncore.lock);
1730 intel_uncore_forcewake_get__locked(dev_priv, fw);
1731
1732 ret = __intel_wait_for_register_fw(dev_priv,
1733 reg, mask, value,
1734 2, 0, NULL);
1735
1736 intel_uncore_forcewake_put__locked(dev_priv, fw);
1737 spin_unlock_irq(&dev_priv->uncore.lock);
1738
1654 if (ret) 1739 if (ret)
1655 ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value, 1740 ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
1656 timeout_ms); 1741 timeout_ms);
@@ -1658,7 +1743,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv,
1658 return ret; 1743 return ret;
1659} 1744}
1660 1745
1661static int gen8_request_engine_reset(struct intel_engine_cs *engine) 1746static int gen8_reset_engine_start(struct intel_engine_cs *engine)
1662{ 1747{
1663 struct drm_i915_private *dev_priv = engine->i915; 1748 struct drm_i915_private *dev_priv = engine->i915;
1664 int ret; 1749 int ret;
@@ -1677,7 +1762,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
1677 return ret; 1762 return ret;
1678} 1763}
1679 1764
1680static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine) 1765static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
1681{ 1766{
1682 struct drm_i915_private *dev_priv = engine->i915; 1767 struct drm_i915_private *dev_priv = engine->i915;
1683 1768
@@ -1692,14 +1777,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
1692 unsigned int tmp; 1777 unsigned int tmp;
1693 1778
1694 for_each_engine_masked(engine, dev_priv, engine_mask, tmp) 1779 for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
1695 if (gen8_request_engine_reset(engine)) 1780 if (gen8_reset_engine_start(engine))
1696 goto not_ready; 1781 goto not_ready;
1697 1782
1698 return gen6_reset_engines(dev_priv, engine_mask); 1783 return gen6_reset_engines(dev_priv, engine_mask);
1699 1784
1700not_ready: 1785not_ready:
1701 for_each_engine_masked(engine, dev_priv, engine_mask, tmp) 1786 for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
1702 gen8_unrequest_engine_reset(engine); 1787 gen8_reset_engine_cancel(engine);
1703 1788
1704 return -EIO; 1789 return -EIO;
1705} 1790}
@@ -1730,8 +1815,11 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
1730int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) 1815int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
1731{ 1816{
1732 reset_func reset; 1817 reset_func reset;
1818 int retry;
1733 int ret; 1819 int ret;
1734 1820
1821 might_sleep();
1822
1735 reset = intel_get_gpu_reset(dev_priv); 1823 reset = intel_get_gpu_reset(dev_priv);
1736 if (reset == NULL) 1824 if (reset == NULL)
1737 return -ENODEV; 1825 return -ENODEV;
@@ -1740,7 +1828,13 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
1740 * request may be dropped and never completes (causing -EIO). 1828 * request may be dropped and never completes (causing -EIO).
1741 */ 1829 */
1742 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 1830 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
1743 ret = reset(dev_priv, engine_mask); 1831 for (retry = 0; retry < 3; retry++) {
1832 ret = reset(dev_priv, engine_mask);
1833 if (ret != -ETIMEDOUT)
1834 break;
1835
1836 cond_resched();
1837 }
1744 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 1838 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
1745 1839
1746 return ret; 1840 return ret;
@@ -1754,17 +1848,12 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
1754int intel_guc_reset(struct drm_i915_private *dev_priv) 1848int intel_guc_reset(struct drm_i915_private *dev_priv)
1755{ 1849{
1756 int ret; 1850 int ret;
1757 unsigned long irqflags;
1758 1851
1759 if (!HAS_GUC(dev_priv)) 1852 if (!HAS_GUC(dev_priv))
1760 return -EINVAL; 1853 return -EINVAL;
1761 1854
1762 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 1855 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
1763 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
1764
1765 ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); 1856 ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
1766
1767 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
1768 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 1857 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
1769 1858
1770 return ret; 1859 return ret;
@@ -1873,5 +1962,6 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
1873} 1962}
1874 1963
1875#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1964#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1965#include "selftests/mock_uncore.c"
1876#include "selftests/intel_uncore.c" 1966#include "selftests/intel_uncore.c"
1877#endif 1967#endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
new file mode 100644
index 000000000000..5f90278da461
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -0,0 +1,170 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef __INTEL_UNCORE_H__
26#define __INTEL_UNCORE_H__
27
28struct drm_i915_private;
29
30enum forcewake_domain_id {
31 FW_DOMAIN_ID_RENDER = 0,
32 FW_DOMAIN_ID_BLITTER,
33 FW_DOMAIN_ID_MEDIA,
34
35 FW_DOMAIN_ID_COUNT
36};
37
38enum forcewake_domains {
39 FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
40 FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
41 FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA),
42 FORCEWAKE_ALL = (FORCEWAKE_RENDER |
43 FORCEWAKE_BLITTER |
44 FORCEWAKE_MEDIA)
45};
46
47struct intel_uncore_funcs {
48 void (*force_wake_get)(struct drm_i915_private *dev_priv,
49 enum forcewake_domains domains);
50 void (*force_wake_put)(struct drm_i915_private *dev_priv,
51 enum forcewake_domains domains);
52
53 uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv,
54 i915_reg_t r, bool trace);
55 uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
56 i915_reg_t r, bool trace);
57 uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
58 i915_reg_t r, bool trace);
59 uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
60 i915_reg_t r, bool trace);
61
62 void (*mmio_writeb)(struct drm_i915_private *dev_priv,
63 i915_reg_t r, uint8_t val, bool trace);
64 void (*mmio_writew)(struct drm_i915_private *dev_priv,
65 i915_reg_t r, uint16_t val, bool trace);
66 void (*mmio_writel)(struct drm_i915_private *dev_priv,
67 i915_reg_t r, uint32_t val, bool trace);
68};
69
70struct intel_forcewake_range {
71 u32 start;
72 u32 end;
73
74 enum forcewake_domains domains;
75};
76
77struct intel_uncore {
78 spinlock_t lock; /** lock is also taken in irq contexts. */
79
80 const struct intel_forcewake_range *fw_domains_table;
81 unsigned int fw_domains_table_entries;
82
83 struct notifier_block pmic_bus_access_nb;
84 struct intel_uncore_funcs funcs;
85
86 unsigned int fifo_count;
87
88 enum forcewake_domains fw_domains;
89 enum forcewake_domains fw_domains_active;
90
91 u32 fw_set;
92 u32 fw_clear;
93 u32 fw_reset;
94
95 struct intel_uncore_forcewake_domain {
96 enum forcewake_domain_id id;
97 enum forcewake_domains mask;
98 unsigned int wake_count;
99 bool active;
100 struct hrtimer timer;
101 i915_reg_t reg_set;
102 i915_reg_t reg_ack;
103 } fw_domain[FW_DOMAIN_ID_COUNT];
104
105 int unclaimed_mmio_check;
106};
107
108/* Iterate over initialised fw domains */
109#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
110 for (tmp__ = (mask__); \
111 tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
112
113#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
114 for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
115
116
117void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
118void intel_uncore_init(struct drm_i915_private *dev_priv);
119bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
120bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
121void intel_uncore_fini(struct drm_i915_private *dev_priv);
122void intel_uncore_suspend(struct drm_i915_private *dev_priv);
123void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
124
125u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
126void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
127const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
128
129enum forcewake_domains
130intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
131 i915_reg_t reg, unsigned int op);
132#define FW_REG_READ (1)
133#define FW_REG_WRITE (2)
134
135void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
136 enum forcewake_domains domains);
137void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
138 enum forcewake_domains domains);
139/* Like above but the caller must manage the uncore.lock itself.
140 * Must be used with I915_READ_FW and friends.
141 */
142void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
143 enum forcewake_domains domains);
144void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
145 enum forcewake_domains domains);
146
147int intel_wait_for_register(struct drm_i915_private *dev_priv,
148 i915_reg_t reg,
149 u32 mask,
150 u32 value,
151 unsigned int timeout_ms);
152int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
153 i915_reg_t reg,
154 u32 mask,
155 u32 value,
156 unsigned int fast_timeout_us,
157 unsigned int slow_timeout_ms,
158 u32 *out_value);
159static inline
160int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
161 i915_reg_t reg,
162 u32 mask,
163 u32 value,
164 unsigned int timeout_ms)
165{
166 return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
167 2, timeout_ms, NULL);
168}
169
170#endif /* !__INTEL_UNCORE_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index f08d0179b3df..95d4aebc0181 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -138,10 +138,7 @@ static int wc_set(struct drm_i915_gem_object *obj,
138 typeof(v) *map; 138 typeof(v) *map;
139 int err; 139 int err;
140 140
141 /* XXX GTT write followed by WC write go missing */ 141 err = i915_gem_object_set_to_wc_domain(obj, true);
142 i915_gem_object_flush_gtt_write_domain(obj);
143
144 err = i915_gem_object_set_to_gtt_domain(obj, true);
145 if (err) 142 if (err)
146 return err; 143 return err;
147 144
@@ -162,10 +159,7 @@ static int wc_get(struct drm_i915_gem_object *obj,
162 typeof(v) map; 159 typeof(v) map;
163 int err; 160 int err;
164 161
165 /* XXX WC write followed by GTT write go missing */ 162 err = i915_gem_object_set_to_wc_domain(obj, false);
166 i915_gem_object_flush_gtt_write_domain(obj);
167
168 err = i915_gem_object_set_to_gtt_domain(obj, false);
169 if (err) 163 if (err)
170 return err; 164 return err;
171 165
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1afb8b06e3e1..12b85b3278cd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
320static int igt_ctx_exec(void *arg) 320static int igt_ctx_exec(void *arg)
321{ 321{
322 struct drm_i915_private *i915 = arg; 322 struct drm_i915_private *i915 = arg;
323 struct drm_i915_gem_object *obj; 323 struct drm_i915_gem_object *obj = NULL;
324 struct drm_file *file; 324 struct drm_file *file;
325 IGT_TIMEOUT(end_time); 325 IGT_TIMEOUT(end_time);
326 LIST_HEAD(objects); 326 LIST_HEAD(objects);
@@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg)
359 } 359 }
360 360
361 for_each_engine(engine, i915, id) { 361 for_each_engine(engine, i915, id) {
362 if (dw == 0) { 362 if (!obj) {
363 obj = create_test_object(ctx, file, &objects); 363 obj = create_test_object(ctx, file, &objects);
364 if (IS_ERR(obj)) { 364 if (IS_ERR(obj)) {
365 err = PTR_ERR(obj); 365 err = PTR_ERR(obj);
@@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg)
376 goto out_unlock; 376 goto out_unlock;
377 } 377 }
378 378
379 if (++dw == max_dwords(obj)) 379 if (++dw == max_dwords(obj)) {
380 obj = NULL;
380 dw = 0; 381 dw = 0;
382 }
381 ndwords++; 383 ndwords++;
382 } 384 }
383 ncontexts++; 385 ncontexts++;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
index 817bef74bbcb..d15cc9d3a5cd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
@@ -271,6 +271,105 @@ err_obj:
271 return err; 271 return err;
272} 272}
273 273
274static int igt_dmabuf_export_kmap(void *arg)
275{
276 struct drm_i915_private *i915 = arg;
277 struct drm_i915_gem_object *obj;
278 struct dma_buf *dmabuf;
279 void *ptr;
280 int err;
281
282 obj = i915_gem_object_create(i915, 2*PAGE_SIZE);
283 if (IS_ERR(obj))
284 return PTR_ERR(obj);
285
286 dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
287 i915_gem_object_put(obj);
288 if (IS_ERR(dmabuf)) {
289 err = PTR_ERR(dmabuf);
290 pr_err("i915_gem_prime_export failed with err=%d\n", err);
291 return err;
292 }
293
294 ptr = dma_buf_kmap(dmabuf, 0);
295 if (!ptr) {
296 pr_err("dma_buf_kmap failed\n");
297 err = -ENOMEM;
298 goto err;
299 }
300
301 if (memchr_inv(ptr, 0, PAGE_SIZE)) {
302 dma_buf_kunmap(dmabuf, 0, ptr);
303 pr_err("Exported page[0] not initialiased to zero!\n");
304 err = -EINVAL;
305 goto err;
306 }
307
308 memset(ptr, 0xc5, PAGE_SIZE);
309 dma_buf_kunmap(dmabuf, 0, ptr);
310
311 ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
312 if (IS_ERR(ptr)) {
313 err = PTR_ERR(ptr);
314 pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
315 goto err;
316 }
317 memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE);
318 i915_gem_object_unpin_map(obj);
319
320 ptr = dma_buf_kmap(dmabuf, 1);
321 if (!ptr) {
322 pr_err("dma_buf_kmap failed\n");
323 err = -ENOMEM;
324 goto err;
325 }
326
327 if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) {
328 dma_buf_kunmap(dmabuf, 1, ptr);
329 pr_err("Exported page[1] not set to 0xaa!\n");
330 err = -EINVAL;
331 goto err;
332 }
333
334 memset(ptr, 0xc5, PAGE_SIZE);
335 dma_buf_kunmap(dmabuf, 1, ptr);
336
337 ptr = dma_buf_kmap(dmabuf, 0);
338 if (!ptr) {
339 pr_err("dma_buf_kmap failed\n");
340 err = -ENOMEM;
341 goto err;
342 }
343 if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) {
344 dma_buf_kunmap(dmabuf, 0, ptr);
345 pr_err("Exported page[0] did not retain 0xc5!\n");
346 err = -EINVAL;
347 goto err;
348 }
349 dma_buf_kunmap(dmabuf, 0, ptr);
350
351 ptr = dma_buf_kmap(dmabuf, 2);
352 if (ptr) {
353 pr_err("Erroneously kmapped beyond the end of the object!\n");
354 dma_buf_kunmap(dmabuf, 2, ptr);
355 err = -EINVAL;
356 goto err;
357 }
358
359 ptr = dma_buf_kmap(dmabuf, -1);
360 if (ptr) {
361 pr_err("Erroneously kmapped before the start of the object!\n");
362 dma_buf_kunmap(dmabuf, -1, ptr);
363 err = -EINVAL;
364 goto err;
365 }
366
367 err = 0;
368err:
369 dma_buf_put(dmabuf);
370 return err;
371}
372
274int i915_gem_dmabuf_mock_selftests(void) 373int i915_gem_dmabuf_mock_selftests(void)
275{ 374{
276 static const struct i915_subtest tests[] = { 375 static const struct i915_subtest tests[] = {
@@ -279,6 +378,7 @@ int i915_gem_dmabuf_mock_selftests(void)
279 SUBTEST(igt_dmabuf_import), 378 SUBTEST(igt_dmabuf_import),
280 SUBTEST(igt_dmabuf_import_ownership), 379 SUBTEST(igt_dmabuf_import_ownership),
281 SUBTEST(igt_dmabuf_export_vmap), 380 SUBTEST(igt_dmabuf_export_vmap),
381 SUBTEST(igt_dmabuf_export_kmap),
282 }; 382 };
283 struct drm_i915_private *i915; 383 struct drm_i915_private *i915;
284 int err; 384 int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 67d82bf1407f..8f011c447e41 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
266 if (offset >= obj->base.size) 266 if (offset >= obj->base.size)
267 continue; 267 continue;
268 268
269 i915_gem_object_flush_gtt_write_domain(obj); 269 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
270 270
271 p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 271 p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
272 cpu = kmap(p) + offset_in_page(offset); 272 cpu = kmap(p) + offset_in_page(offset);
@@ -545,7 +545,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
545 } 545 }
546 546
547 mutex_lock(&i915->drm.struct_mutex); 547 mutex_lock(&i915->drm.struct_mutex);
548 intel_runtime_pm_get(i915);
548 err = make_obj_busy(obj); 549 err = make_obj_busy(obj);
550 intel_runtime_pm_put(i915);
549 mutex_unlock(&i915->drm.struct_mutex); 551 mutex_unlock(&i915->drm.struct_mutex);
550 if (err) { 552 if (err) {
551 pr_err("[loop %d] Failed to busy the object\n", loop); 553 pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 98b7aac41eec..6664cb2eb0b8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -580,7 +580,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
580 if (err) 580 if (err)
581 goto err; 581 goto err;
582 582
583 err = i915_gem_object_set_to_gtt_domain(obj, true); 583 err = i915_gem_object_set_to_wc_domain(obj, true);
584 if (err) 584 if (err)
585 goto err; 585 goto err;
586 586
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
new file mode 100644
index 000000000000..7a44dab631b8
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
@@ -0,0 +1,299 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "../i915_selftest.h"
26#include "i915_random.h"
27
28#include "mock_gem_device.h"
29#include "mock_timeline.h"
30
31struct __igt_sync {
32 const char *name;
33 u32 seqno;
34 bool expected;
35 bool set;
36};
37
38static int __igt_sync(struct intel_timeline *tl,
39 u64 ctx,
40 const struct __igt_sync *p,
41 const char *name)
42{
43 int ret;
44
45 if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
46 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
47 name, p->name, ctx, p->seqno, yesno(p->expected));
48 return -EINVAL;
49 }
50
51 if (p->set) {
52 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
53 if (ret)
54 return ret;
55 }
56
57 return 0;
58}
59
60static int igt_sync(void *arg)
61{
62 const struct __igt_sync pass[] = {
63 { "unset", 0, false, false },
64 { "new", 0, false, true },
65 { "0a", 0, true, true },
66 { "1a", 1, false, true },
67 { "1b", 1, true, true },
68 { "0b", 0, true, false },
69 { "2a", 2, false, true },
70 { "4", 4, false, true },
71 { "INT_MAX", INT_MAX, false, true },
72 { "INT_MAX-1", INT_MAX-1, true, false },
73 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
74 { "INT_MAX", INT_MAX, true, false },
75 { "UINT_MAX", UINT_MAX, false, true },
76 { "wrap", 0, false, true },
77 { "unwrap", UINT_MAX, true, false },
78 {},
79 }, *p;
80 struct intel_timeline *tl;
81 int order, offset;
82 int ret;
83
84 tl = mock_timeline(0);
85 if (!tl)
86 return -ENOMEM;
87
88 for (p = pass; p->name; p++) {
89 for (order = 1; order < 64; order++) {
90 for (offset = -1; offset <= (order > 1); offset++) {
91 u64 ctx = BIT_ULL(order) + offset;
92
93 ret = __igt_sync(tl, ctx, p, "1");
94 if (ret)
95 goto out;
96 }
97 }
98 }
99 mock_timeline_destroy(tl);
100
101 tl = mock_timeline(0);
102 if (!tl)
103 return -ENOMEM;
104
105 for (order = 1; order < 64; order++) {
106 for (offset = -1; offset <= (order > 1); offset++) {
107 u64 ctx = BIT_ULL(order) + offset;
108
109 for (p = pass; p->name; p++) {
110 ret = __igt_sync(tl, ctx, p, "2");
111 if (ret)
112 goto out;
113 }
114 }
115 }
116
117out:
118 mock_timeline_destroy(tl);
119 return ret;
120}
121
122static unsigned int random_engine(struct rnd_state *rnd)
123{
124 return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32;
125}
126
127static int bench_sync(void *arg)
128{
129 struct rnd_state prng;
130 struct intel_timeline *tl;
131 unsigned long end_time, count;
132 u64 prng32_1M;
133 ktime_t kt;
134 int order, last_order;
135
136 tl = mock_timeline(0);
137 if (!tl)
138 return -ENOMEM;
139
140 /* Lookups from cache are very fast and so the random number generation
141 * and the loop itself becomes a significant factor in the per-iteration
142 * timings. We try to compensate the results by measuring the overhead
143 * of the prng and subtract it from the reported results.
144 */
145 prandom_seed_state(&prng, i915_selftest.random_seed);
146 count = 0;
147 kt = ktime_get();
148 end_time = jiffies + HZ/10;
149 do {
150 u32 x;
151
152 /* Make sure the compiler doesn't optimise away the prng call */
153 WRITE_ONCE(x, prandom_u32_state(&prng));
154
155 count++;
156 } while (!time_after(jiffies, end_time));
157 kt = ktime_sub(ktime_get(), kt);
158 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
159 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
160 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
161
162 /* Benchmark (only) setting random context ids */
163 prandom_seed_state(&prng, i915_selftest.random_seed);
164 count = 0;
165 kt = ktime_get();
166 end_time = jiffies + HZ/10;
167 do {
168 u64 id = i915_prandom_u64_state(&prng);
169
170 __intel_timeline_sync_set(tl, id, 0);
171 count++;
172 } while (!time_after(jiffies, end_time));
173 kt = ktime_sub(ktime_get(), kt);
174 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
175 pr_info("%s: %lu random insertions, %lluns/insert\n",
176 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
177
178 /* Benchmark looking up the exact same context ids as we just set */
179 prandom_seed_state(&prng, i915_selftest.random_seed);
180 end_time = count;
181 kt = ktime_get();
182 while (end_time--) {
183 u64 id = i915_prandom_u64_state(&prng);
184
185 if (!__intel_timeline_sync_is_later(tl, id, 0)) {
186 mock_timeline_destroy(tl);
187 pr_err("Lookup of %llu failed\n", id);
188 return -EINVAL;
189 }
190 }
191 kt = ktime_sub(ktime_get(), kt);
192 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
193 pr_info("%s: %lu random lookups, %lluns/lookup\n",
194 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
195
196 mock_timeline_destroy(tl);
197 cond_resched();
198
199 tl = mock_timeline(0);
200 if (!tl)
201 return -ENOMEM;
202
203 /* Benchmark setting the first N (in order) contexts */
204 count = 0;
205 kt = ktime_get();
206 end_time = jiffies + HZ/10;
207 do {
208 __intel_timeline_sync_set(tl, count++, 0);
209 } while (!time_after(jiffies, end_time));
210 kt = ktime_sub(ktime_get(), kt);
211 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
212 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
213
214 /* Benchmark looking up the exact same context ids as we just set */
215 end_time = count;
216 kt = ktime_get();
217 while (end_time--) {
218 if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
219 pr_err("Lookup of %lu failed\n", end_time);
220 mock_timeline_destroy(tl);
221 return -EINVAL;
222 }
223 }
224 kt = ktime_sub(ktime_get(), kt);
225 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
226 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
227
228 mock_timeline_destroy(tl);
229 cond_resched();
230
231 tl = mock_timeline(0);
232 if (!tl)
233 return -ENOMEM;
234
235 /* Benchmark searching for a random context id and maybe changing it */
236 prandom_seed_state(&prng, i915_selftest.random_seed);
237 count = 0;
238 kt = ktime_get();
239 end_time = jiffies + HZ/10;
240 do {
241 u32 id = random_engine(&prng);
242 u32 seqno = prandom_u32_state(&prng);
243
244 if (!__intel_timeline_sync_is_later(tl, id, seqno))
245 __intel_timeline_sync_set(tl, id, seqno);
246
247 count++;
248 } while (!time_after(jiffies, end_time));
249 kt = ktime_sub(ktime_get(), kt);
250 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
251 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
252 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
253 mock_timeline_destroy(tl);
254 cond_resched();
255
256 /* Benchmark searching for a known context id and changing the seqno */
257 for (last_order = 1, order = 1; order < 32;
258 ({ int tmp = last_order; last_order = order; order += tmp; })) {
259 unsigned int mask = BIT(order) - 1;
260
261 tl = mock_timeline(0);
262 if (!tl)
263 return -ENOMEM;
264
265 count = 0;
266 kt = ktime_get();
267 end_time = jiffies + HZ/10;
268 do {
269 /* Without assuming too many details of the underlying
270 * implementation, try to identify its phase-changes
271 * (if any)!
272 */
273 u64 id = (u64)(count & mask) << order;
274
275 __intel_timeline_sync_is_later(tl, id, 0);
276 __intel_timeline_sync_set(tl, id, 0);
277
278 count++;
279 } while (!time_after(jiffies, end_time));
280 kt = ktime_sub(ktime_get(), kt);
281 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
282 __func__, count, order,
283 (long long)div64_ul(ktime_to_ns(kt), count));
284 mock_timeline_destroy(tl);
285 cond_resched();
286 }
287
288 return 0;
289}
290
291int i915_gem_timeline_mock_selftests(void)
292{
293 static const struct i915_subtest tests[] = {
294 SUBTEST(igt_sync),
295 SUBTEST(bench_sync),
296 };
297
298 return i915_subtests(tests, NULL);
299}
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index be9a9ebf5692..fc74687501ba 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -9,9 +9,12 @@
9 * Tests are executed in order by igt/drv_selftest 9 * Tests are executed in order by igt/drv_selftest
10 */ 10 */
11selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ 11selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
12selftest(fence, i915_sw_fence_mock_selftests)
12selftest(scatterlist, scatterlist_mock_selftests) 13selftest(scatterlist, scatterlist_mock_selftests)
14selftest(syncmap, i915_syncmap_mock_selftests)
13selftest(uncore, intel_uncore_mock_selftests) 15selftest(uncore, intel_uncore_mock_selftests)
14selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) 16selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
17selftest(timelines, i915_gem_timeline_mock_selftests)
15selftest(requests, i915_gem_request_mock_selftests) 18selftest(requests, i915_gem_request_mock_selftests)
16selftest(objects, i915_gem_object_mock_selftests) 19selftest(objects, i915_gem_object_mock_selftests)
17selftest(dmabuf, i915_gem_dmabuf_mock_selftests) 20selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c
index c17c83c30637..d044bf9a6feb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -30,6 +30,17 @@
30 30
31#include "i915_random.h" 31#include "i915_random.h"
32 32
33u64 i915_prandom_u64_state(struct rnd_state *rnd)
34{
35 u64 x;
36
37 x = prandom_u32_state(rnd);
38 x <<= 32;
39 x |= prandom_u32_state(rnd);
40
41 return x;
42}
43
33static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) 44static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
34{ 45{
35 return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); 46 return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h
index b9c334ce6cd9..6c9379871384 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -41,6 +41,8 @@
41#define I915_RND_SUBSTATE(name__, parent__) \ 41#define I915_RND_SUBSTATE(name__, parent__) \
42 struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) 42 struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__)))
43 43
44u64 i915_prandom_u64_state(struct rnd_state *rnd);
45
44unsigned int *i915_random_order(unsigned int count, 46unsigned int *i915_random_order(unsigned int count,
45 struct rnd_state *state); 47 struct rnd_state *state);
46void i915_random_reorder(unsigned int *order, 48void i915_random_reorder(unsigned int *order,
diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
new file mode 100644
index 000000000000..19d145d6bf52
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -0,0 +1,582 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/completion.h>
26#include <linux/delay.h>
27
28#include "../i915_selftest.h"
29
30static int __i915_sw_fence_call
31fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
32{
33 switch (state) {
34 case FENCE_COMPLETE:
35 break;
36
37 case FENCE_FREE:
38 /* Leave the fence for the caller to free it after testing */
39 break;
40 }
41
42 return NOTIFY_DONE;
43}
44
45static struct i915_sw_fence *alloc_fence(void)
46{
47 struct i915_sw_fence *fence;
48
49 fence = kmalloc(sizeof(*fence), GFP_KERNEL);
50 if (!fence)
51 return NULL;
52
53 i915_sw_fence_init(fence, fence_notify);
54 return fence;
55}
56
57static void free_fence(struct i915_sw_fence *fence)
58{
59 i915_sw_fence_fini(fence);
60 kfree(fence);
61}
62
63static int __test_self(struct i915_sw_fence *fence)
64{
65 if (i915_sw_fence_done(fence))
66 return -EINVAL;
67
68 i915_sw_fence_commit(fence);
69 if (!i915_sw_fence_done(fence))
70 return -EINVAL;
71
72 i915_sw_fence_wait(fence);
73 if (!i915_sw_fence_done(fence))
74 return -EINVAL;
75
76 return 0;
77}
78
79static int test_self(void *arg)
80{
81 struct i915_sw_fence *fence;
82 int ret;
83
84 /* Test i915_sw_fence signaling and completion testing */
85 fence = alloc_fence();
86 if (!fence)
87 return -ENOMEM;
88
89 ret = __test_self(fence);
90
91 free_fence(fence);
92 return ret;
93}
94
95static int test_dag(void *arg)
96{
97 struct i915_sw_fence *A, *B, *C;
98 int ret = -EINVAL;
99
100 /* Test detection of cycles within the i915_sw_fence graphs */
101 if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
102 return 0;
103
104 A = alloc_fence();
105 if (!A)
106 return -ENOMEM;
107
108 if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) {
109 pr_err("recursive cycle not detected (AA)\n");
110 goto err_A;
111 }
112
113 B = alloc_fence();
114 if (!B) {
115 ret = -ENOMEM;
116 goto err_A;
117 }
118
119 i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
120 if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) {
121 pr_err("single depth cycle not detected (BAB)\n");
122 goto err_B;
123 }
124
125 C = alloc_fence();
126 if (!C) {
127 ret = -ENOMEM;
128 goto err_B;
129 }
130
131 if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) {
132 pr_err("invalid cycle detected\n");
133 goto err_C;
134 }
135 if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) {
136 pr_err("single depth cycle not detected (CBC)\n");
137 goto err_C;
138 }
139 if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) {
140 pr_err("cycle not detected (BA, CB, AC)\n");
141 goto err_C;
142 }
143 if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) {
144 pr_err("invalid cycle detected\n");
145 goto err_C;
146 }
147
148 i915_sw_fence_commit(A);
149 i915_sw_fence_commit(B);
150 i915_sw_fence_commit(C);
151
152 ret = 0;
153 if (!i915_sw_fence_done(C)) {
154 pr_err("fence C not done\n");
155 ret = -EINVAL;
156 }
157 if (!i915_sw_fence_done(B)) {
158 pr_err("fence B not done\n");
159 ret = -EINVAL;
160 }
161 if (!i915_sw_fence_done(A)) {
162 pr_err("fence A not done\n");
163 ret = -EINVAL;
164 }
165err_C:
166 free_fence(C);
167err_B:
168 free_fence(B);
169err_A:
170 free_fence(A);
171 return ret;
172}
173
174static int test_AB(void *arg)
175{
176 struct i915_sw_fence *A, *B;
177 int ret;
178
179 /* Test i915_sw_fence (A) waiting on an event source (B) */
180 A = alloc_fence();
181 if (!A)
182 return -ENOMEM;
183 B = alloc_fence();
184 if (!B) {
185 ret = -ENOMEM;
186 goto err_A;
187 }
188
189 ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
190 if (ret < 0)
191 goto err_B;
192 if (ret == 0) {
193 pr_err("Incorrectly reported fence A was complete before await\n");
194 ret = -EINVAL;
195 goto err_B;
196 }
197
198 ret = -EINVAL;
199 i915_sw_fence_commit(A);
200 if (i915_sw_fence_done(A))
201 goto err_B;
202
203 i915_sw_fence_commit(B);
204 if (!i915_sw_fence_done(B)) {
205 pr_err("Fence B is not done\n");
206 goto err_B;
207 }
208
209 if (!i915_sw_fence_done(A)) {
210 pr_err("Fence A is not done\n");
211 goto err_B;
212 }
213
214 ret = 0;
215err_B:
216 free_fence(B);
217err_A:
218 free_fence(A);
219 return ret;
220}
221
222static int test_ABC(void *arg)
223{
224 struct i915_sw_fence *A, *B, *C;
225 int ret;
226
227 /* Test a chain of fences, A waits on B who waits on C */
228 A = alloc_fence();
229 if (!A)
230 return -ENOMEM;
231
232 B = alloc_fence();
233 if (!B) {
234 ret = -ENOMEM;
235 goto err_A;
236 }
237
238 C = alloc_fence();
239 if (!C) {
240 ret = -ENOMEM;
241 goto err_B;
242 }
243
244 ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
245 if (ret < 0)
246 goto err_C;
247 if (ret == 0) {
248 pr_err("Incorrectly reported fence B was complete before await\n");
249 goto err_C;
250 }
251
252 ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
253 if (ret < 0)
254 goto err_C;
255 if (ret == 0) {
256 pr_err("Incorrectly reported fence C was complete before await\n");
257 goto err_C;
258 }
259
260 ret = -EINVAL;
261 i915_sw_fence_commit(A);
262 if (i915_sw_fence_done(A)) {
263 pr_err("Fence A completed early\n");
264 goto err_C;
265 }
266
267 i915_sw_fence_commit(B);
268 if (i915_sw_fence_done(B)) {
269 pr_err("Fence B completed early\n");
270 goto err_C;
271 }
272
273 if (i915_sw_fence_done(A)) {
274 pr_err("Fence A completed early (after signaling B)\n");
275 goto err_C;
276 }
277
278 i915_sw_fence_commit(C);
279
280 ret = 0;
281 if (!i915_sw_fence_done(C)) {
282 pr_err("Fence C not done\n");
283 ret = -EINVAL;
284 }
285 if (!i915_sw_fence_done(B)) {
286 pr_err("Fence B not done\n");
287 ret = -EINVAL;
288 }
289 if (!i915_sw_fence_done(A)) {
290 pr_err("Fence A not done\n");
291 ret = -EINVAL;
292 }
293err_C:
294 free_fence(C);
295err_B:
296 free_fence(B);
297err_A:
298 free_fence(A);
299 return ret;
300}
301
302static int test_AB_C(void *arg)
303{
304 struct i915_sw_fence *A, *B, *C;
305 int ret = -EINVAL;
306
307 /* Test multiple fences (AB) waiting on a single event (C) */
308 A = alloc_fence();
309 if (!A)
310 return -ENOMEM;
311
312 B = alloc_fence();
313 if (!B) {
314 ret = -ENOMEM;
315 goto err_A;
316 }
317
318 C = alloc_fence();
319 if (!C) {
320 ret = -ENOMEM;
321 goto err_B;
322 }
323
324 ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL);
325 if (ret < 0)
326 goto err_C;
327 if (ret == 0) {
328 ret = -EINVAL;
329 goto err_C;
330 }
331
332 ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
333 if (ret < 0)
334 goto err_C;
335 if (ret == 0) {
336 ret = -EINVAL;
337 goto err_C;
338 }
339
340 i915_sw_fence_commit(A);
341 i915_sw_fence_commit(B);
342
343 ret = 0;
344 if (i915_sw_fence_done(A)) {
345 pr_err("Fence A completed early\n");
346 ret = -EINVAL;
347 }
348
349 if (i915_sw_fence_done(B)) {
350 pr_err("Fence B completed early\n");
351 ret = -EINVAL;
352 }
353
354 i915_sw_fence_commit(C);
355 if (!i915_sw_fence_done(C)) {
356 pr_err("Fence C not done\n");
357 ret = -EINVAL;
358 }
359
360 if (!i915_sw_fence_done(B)) {
361 pr_err("Fence B not done\n");
362 ret = -EINVAL;
363 }
364
365 if (!i915_sw_fence_done(A)) {
366 pr_err("Fence A not done\n");
367 ret = -EINVAL;
368 }
369
370err_C:
371 free_fence(C);
372err_B:
373 free_fence(B);
374err_A:
375 free_fence(A);
376 return ret;
377}
378
379static int test_C_AB(void *arg)
380{
381 struct i915_sw_fence *A, *B, *C;
382 int ret;
383
384 /* Test multiple event sources (A,B) for a single fence (C) */
385 A = alloc_fence();
386 if (!A)
387 return -ENOMEM;
388
389 B = alloc_fence();
390 if (!B) {
391 ret = -ENOMEM;
392 goto err_A;
393 }
394
395 C = alloc_fence();
396 if (!C) {
397 ret = -ENOMEM;
398 goto err_B;
399 }
400
401 ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL);
402 if (ret < 0)
403 goto err_C;
404 if (ret == 0) {
405 ret = -EINVAL;
406 goto err_C;
407 }
408
409 ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL);
410 if (ret < 0)
411 goto err_C;
412 if (ret == 0) {
413 ret = -EINVAL;
414 goto err_C;
415 }
416
417 ret = 0;
418 i915_sw_fence_commit(C);
419 if (i915_sw_fence_done(C))
420 ret = -EINVAL;
421
422 i915_sw_fence_commit(A);
423 i915_sw_fence_commit(B);
424
425 if (!i915_sw_fence_done(A)) {
426 pr_err("Fence A not done\n");
427 ret = -EINVAL;
428 }
429
430 if (!i915_sw_fence_done(B)) {
431 pr_err("Fence B not done\n");
432 ret = -EINVAL;
433 }
434
435 if (!i915_sw_fence_done(C)) {
436 pr_err("Fence C not done\n");
437 ret = -EINVAL;
438 }
439
440err_C:
441 free_fence(C);
442err_B:
443 free_fence(B);
444err_A:
445 free_fence(A);
446 return ret;
447}
448
449static int test_chain(void *arg)
450{
451 int nfences = 4096;
452 struct i915_sw_fence **fences;
453 int ret, i;
454
455 /* Test a long chain of fences */
456 fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
457 if (!fences)
458 return -ENOMEM;
459
460 for (i = 0; i < nfences; i++) {
461 fences[i] = alloc_fence();
462 if (!fences[i]) {
463 nfences = i;
464 ret = -ENOMEM;
465 goto err;
466 }
467
468 if (i > 0) {
469 ret = i915_sw_fence_await_sw_fence_gfp(fences[i],
470 fences[i - 1],
471 GFP_KERNEL);
472 if (ret < 0) {
473 nfences = i + 1;
474 goto err;
475 }
476
477 i915_sw_fence_commit(fences[i]);
478 }
479 }
480
481 ret = 0;
482 for (i = nfences; --i; ) {
483 if (i915_sw_fence_done(fences[i])) {
484 if (ret == 0)
485 pr_err("Fence[%d] completed early\n", i);
486 ret = -EINVAL;
487 }
488 }
489 i915_sw_fence_commit(fences[0]);
490 for (i = 0; ret == 0 && i < nfences; i++) {
491 if (!i915_sw_fence_done(fences[i])) {
492 pr_err("Fence[%d] is not done\n", i);
493 ret = -EINVAL;
494 }
495 }
496
497err:
498 for (i = 0; i < nfences; i++)
499 free_fence(fences[i]);
500 kfree(fences);
501 return ret;
502}
503
504struct task_ipc {
505 struct work_struct work;
506 struct completion started;
507 struct i915_sw_fence *in, *out;
508 int value;
509};
510
511static void task_ipc(struct work_struct *work)
512{
513 struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
514
515 complete(&ipc->started);
516
517 i915_sw_fence_wait(ipc->in);
518 smp_store_mb(ipc->value, 1);
519 i915_sw_fence_commit(ipc->out);
520}
521
522static int test_ipc(void *arg)
523{
524 struct task_ipc ipc;
525 int ret = 0;
526
527 /* Test use of i915_sw_fence as an interprocess signaling mechanism */
528 ipc.in = alloc_fence();
529 if (!ipc.in)
530 return -ENOMEM;
531 ipc.out = alloc_fence();
532 if (!ipc.out) {
533 ret = -ENOMEM;
534 goto err_in;
535 }
536
537 /* use a completion to avoid chicken-and-egg testing */
538 init_completion(&ipc.started);
539
540 ipc.value = 0;
541 INIT_WORK_ONSTACK(&ipc.work, task_ipc);
542 schedule_work(&ipc.work);
543
544 wait_for_completion(&ipc.started);
545
546 usleep_range(1000, 2000);
547 if (READ_ONCE(ipc.value)) {
548 pr_err("worker updated value before i915_sw_fence was signaled\n");
549 ret = -EINVAL;
550 }
551
552 i915_sw_fence_commit(ipc.in);
553 i915_sw_fence_wait(ipc.out);
554
555 if (!READ_ONCE(ipc.value)) {
556 pr_err("worker signaled i915_sw_fence before value was posted\n");
557 ret = -EINVAL;
558 }
559
560 flush_work(&ipc.work);
561 destroy_work_on_stack(&ipc.work);
562 free_fence(ipc.out);
563err_in:
564 free_fence(ipc.in);
565 return ret;
566}
567
568int i915_sw_fence_mock_selftests(void)
569{
570 static const struct i915_subtest tests[] = {
571 SUBTEST(test_self),
572 SUBTEST(test_dag),
573 SUBTEST(test_AB),
574 SUBTEST(test_ABC),
575 SUBTEST(test_AB_C),
576 SUBTEST(test_C_AB),
577 SUBTEST(test_chain),
578 SUBTEST(test_ipc),
579 };
580
581 return i915_subtests(tests, NULL);
582}
diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
new file mode 100644
index 000000000000..bcab3d00a785
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
@@ -0,0 +1,616 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "../i915_selftest.h"
26#include "i915_random.h"
27
28static char *
29__sync_print(struct i915_syncmap *p,
30 char *buf, unsigned long *sz,
31 unsigned int depth,
32 unsigned int last,
33 unsigned int idx)
34{
35 unsigned long len;
36 unsigned int i, X;
37
38 if (depth) {
39 unsigned int d;
40
41 for (d = 0; d < depth - 1; d++) {
42 if (last & BIT(depth - d - 1))
43 len = scnprintf(buf, *sz, "| ");
44 else
45 len = scnprintf(buf, *sz, " ");
46 buf += len;
47 *sz -= len;
48 }
49 len = scnprintf(buf, *sz, "%x-> ", idx);
50 buf += len;
51 *sz -= len;
52 }
53
54 /* We mark bits after the prefix as "X" */
55 len = scnprintf(buf, *sz, "0x%016llx", p->prefix << p->height << SHIFT);
56 buf += len;
57 *sz -= len;
58 X = (p->height + SHIFT) / 4;
59 scnprintf(buf - X, *sz + X, "%*s", X, "XXXXXXXXXXXXXXXXX");
60
61 if (!p->height) {
62 for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
63 len = scnprintf(buf, *sz, " %x:%x,",
64 i, __sync_seqno(p)[i]);
65 buf += len;
66 *sz -= len;
67 }
68 buf -= 1;
69 *sz += 1;
70 }
71
72 len = scnprintf(buf, *sz, "\n");
73 buf += len;
74 *sz -= len;
75
76 if (p->height) {
77 for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
78 buf = __sync_print(__sync_child(p)[i], buf, sz,
79 depth + 1,
80 last << 1 | !!(p->bitmap >> (i + 1)),
81 i);
82 }
83 }
84
85 return buf;
86}
87
88static bool
89i915_syncmap_print_to_buf(struct i915_syncmap *p, char *buf, unsigned long sz)
90{
91 if (!p)
92 return false;
93
94 while (p->parent)
95 p = p->parent;
96
97 __sync_print(p, buf, &sz, 0, 1, 0);
98 return true;
99}
100
101static int check_syncmap_free(struct i915_syncmap **sync)
102{
103 i915_syncmap_free(sync);
104 if (*sync) {
105 pr_err("sync not cleared after free\n");
106 return -EINVAL;
107 }
108
109 return 0;
110}
111
112static int dump_syncmap(struct i915_syncmap *sync, int err)
113{
114 char *buf;
115
116 if (!err)
117 return check_syncmap_free(&sync);
118
119 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
120 if (!buf)
121 goto skip;
122
123 if (i915_syncmap_print_to_buf(sync, buf, PAGE_SIZE))
124 pr_err("%s", buf);
125
126 kfree(buf);
127
128skip:
129 i915_syncmap_free(&sync);
130 return err;
131}
132
133static int igt_syncmap_init(void *arg)
134{
135 struct i915_syncmap *sync = (void *)~0ul;
136
137 /*
138 * Cursory check that we can initialise a random pointer and transform
139 * it into the root pointer of a syncmap.
140 */
141
142 i915_syncmap_init(&sync);
143 return check_syncmap_free(&sync);
144}
145
146static int check_seqno(struct i915_syncmap *leaf, unsigned int idx, u32 seqno)
147{
148 if (leaf->height) {
149 pr_err("%s: not a leaf, height is %d\n",
150 __func__, leaf->height);
151 return -EINVAL;
152 }
153
154 if (__sync_seqno(leaf)[idx] != seqno) {
155 pr_err("%s: seqno[%d], found %x, expected %x\n",
156 __func__, idx, __sync_seqno(leaf)[idx], seqno);
157 return -EINVAL;
158 }
159
160 return 0;
161}
162
163static int check_one(struct i915_syncmap **sync, u64 context, u32 seqno)
164{
165 int err;
166
167 err = i915_syncmap_set(sync, context, seqno);
168 if (err)
169 return err;
170
171 if ((*sync)->height) {
172 pr_err("Inserting first context=%llx did not return leaf (height=%d, prefix=%llx\n",
173 context, (*sync)->height, (*sync)->prefix);
174 return -EINVAL;
175 }
176
177 if ((*sync)->parent) {
178 pr_err("Inserting first context=%llx created branches!\n",
179 context);
180 return -EINVAL;
181 }
182
183 if (hweight32((*sync)->bitmap) != 1) {
184 pr_err("First bitmap does not contain a single entry, found %x (count=%d)!\n",
185 (*sync)->bitmap, hweight32((*sync)->bitmap));
186 return -EINVAL;
187 }
188
189 err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
190 if (err)
191 return err;
192
193 if (!i915_syncmap_is_later(sync, context, seqno)) {
194 pr_err("Lookup of first context=%llx/seqno=%x failed!\n",
195 context, seqno);
196 return -EINVAL;
197 }
198
199 return 0;
200}
201
202static int igt_syncmap_one(void *arg)
203{
204 I915_RND_STATE(prng);
205 IGT_TIMEOUT(end_time);
206 struct i915_syncmap *sync;
207 unsigned long max = 1;
208 int err;
209
210 /*
211 * Check that inserting a new id, creates a leaf and only that leaf.
212 */
213
214 i915_syncmap_init(&sync);
215
216 do {
217 u64 context = i915_prandom_u64_state(&prng);
218 unsigned long loop;
219
220 err = check_syncmap_free(&sync);
221 if (err)
222 goto out;
223
224 for (loop = 0; loop <= max; loop++) {
225 err = check_one(&sync, context,
226 prandom_u32_state(&prng));
227 if (err)
228 goto out;
229 }
230 max++;
231 } while (!__igt_timeout(end_time, NULL));
232 pr_debug("%s: Completed %lu single insertions\n",
233 __func__, max * (max - 1) / 2);
234out:
235 return dump_syncmap(sync, err);
236}
237
238static int check_leaf(struct i915_syncmap **sync, u64 context, u32 seqno)
239{
240 int err;
241
242 err = i915_syncmap_set(sync, context, seqno);
243 if (err)
244 return err;
245
246 if ((*sync)->height) {
247 pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
248 context, (*sync)->height, (*sync)->prefix);
249 return -EINVAL;
250 }
251
252 if (hweight32((*sync)->bitmap) != 1) {
253 pr_err("First entry into leaf (context=%llx) does not contain a single entry, found %x (count=%d)!\n",
254 context, (*sync)->bitmap, hweight32((*sync)->bitmap));
255 return -EINVAL;
256 }
257
258 err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
259 if (err)
260 return err;
261
262 if (!i915_syncmap_is_later(sync, context, seqno)) {
263 pr_err("Lookup of first entry context=%llx/seqno=%x failed!\n",
264 context, seqno);
265 return -EINVAL;
266 }
267
268 return 0;
269}
270
271static int igt_syncmap_join_above(void *arg)
272{
273 struct i915_syncmap *sync;
274 unsigned int pass, order;
275 int err;
276
277 i915_syncmap_init(&sync);
278
279 /*
280 * When we have a new id that doesn't fit inside the existing tree,
281 * we need to add a new layer above.
282 *
283 * 1: 0x00000001
284 * 2: 0x00000010
285 * 3: 0x00000100
286 * 4: 0x00001000
287 * ...
288 * Each pass the common prefix shrinks and we have to insert a join.
289 * Each join will only contain two branches, the latest of which
290 * is always a leaf.
291 *
292 * If we then reuse the same set of contexts, we expect to build an
293 * identical tree.
294 */
295 for (pass = 0; pass < 3; pass++) {
296 for (order = 0; order < 64; order += SHIFT) {
297 u64 context = BIT_ULL(order);
298 struct i915_syncmap *join;
299
300 err = check_leaf(&sync, context, 0);
301 if (err)
302 goto out;
303
304 join = sync->parent;
305 if (!join) /* very first insert will have no parents */
306 continue;
307
308 if (!join->height) {
309 pr_err("Parent with no height!\n");
310 err = -EINVAL;
311 goto out;
312 }
313
314 if (hweight32(join->bitmap) != 2) {
315 pr_err("Join does not have 2 children: %x (%d)\n",
316 join->bitmap, hweight32(join->bitmap));
317 err = -EINVAL;
318 goto out;
319 }
320
321 if (__sync_child(join)[__sync_branch_idx(join, context)] != sync) {
322 pr_err("Leaf misplaced in parent!\n");
323 err = -EINVAL;
324 goto out;
325 }
326 }
327 }
328out:
329 return dump_syncmap(sync, err);
330}
331
332static int igt_syncmap_join_below(void *arg)
333{
334 struct i915_syncmap *sync;
335 unsigned int step, order, idx;
336 int err;
337
338 i915_syncmap_init(&sync);
339
340 /*
341 * Check that we can split a compacted branch by replacing it with
342 * a join.
343 */
344 for (step = 0; step < KSYNCMAP; step++) {
345 for (order = 64 - SHIFT; order > 0; order -= SHIFT) {
346 u64 context = step * BIT_ULL(order);
347
348 err = i915_syncmap_set(&sync, context, 0);
349 if (err)
350 goto out;
351
352 if (sync->height) {
353 pr_err("Inserting context=%llx (order=%d, step=%d) did not return leaf (height=%d, prefix=%llx\n",
354 context, order, step, sync->height, sync->prefix);
355 err = -EINVAL;
356 goto out;
357 }
358 }
359 }
360
361 for (step = 0; step < KSYNCMAP; step++) {
362 for (order = SHIFT; order < 64; order += SHIFT) {
363 u64 context = step * BIT_ULL(order);
364
365 if (!i915_syncmap_is_later(&sync, context, 0)) {
366 pr_err("1: context %llx (order=%d, step=%d) not found\n",
367 context, order, step);
368 err = -EINVAL;
369 goto out;
370 }
371
372 for (idx = 1; idx < KSYNCMAP; idx++) {
373 if (i915_syncmap_is_later(&sync, context + idx, 0)) {
374 pr_err("1: context %llx (order=%d, step=%d) should not exist\n",
375 context + idx, order, step);
376 err = -EINVAL;
377 goto out;
378 }
379 }
380 }
381 }
382
383 for (order = SHIFT; order < 64; order += SHIFT) {
384 for (step = 0; step < KSYNCMAP; step++) {
385 u64 context = step * BIT_ULL(order);
386
387 if (!i915_syncmap_is_later(&sync, context, 0)) {
388 pr_err("2: context %llx (order=%d, step=%d) not found\n",
389 context, order, step);
390 err = -EINVAL;
391 goto out;
392 }
393 }
394 }
395
396out:
397 return dump_syncmap(sync, err);
398}
399
400static int igt_syncmap_neighbours(void *arg)
401{
402 I915_RND_STATE(prng);
403 IGT_TIMEOUT(end_time);
404 struct i915_syncmap *sync;
405 int err;
406
407 /*
408 * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP
409 * neighbouring ids, they all fit into the same leaf.
410 */
411
412 i915_syncmap_init(&sync);
413 do {
414 u64 context = i915_prandom_u64_state(&prng) & ~MASK;
415 unsigned int idx;
416
417 if (i915_syncmap_is_later(&sync, context, 0)) /* Skip repeats */
418 continue;
419
420 for (idx = 0; idx < KSYNCMAP; idx++) {
421 err = i915_syncmap_set(&sync, context + idx, 0);
422 if (err)
423 goto out;
424
425 if (sync->height) {
426 pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
427 context, sync->height, sync->prefix);
428 err = -EINVAL;
429 goto out;
430 }
431
432 if (sync->bitmap != BIT(idx + 1) - 1) {
433 pr_err("Inserting neighbouring context=0x%llx+%d, did not fit into the same leaf bitmap=%x (%d), expected %lx (%d)\n",
434 context, idx,
435 sync->bitmap, hweight32(sync->bitmap),
436 BIT(idx + 1) - 1, idx + 1);
437 err = -EINVAL;
438 goto out;
439 }
440 }
441 } while (!__igt_timeout(end_time, NULL));
442out:
443 return dump_syncmap(sync, err);
444}
445
446static int igt_syncmap_compact(void *arg)
447{
448 struct i915_syncmap *sync;
449 unsigned int idx, order;
450 int err;
451
452 i915_syncmap_init(&sync);
453
454 /*
455 * The syncmap are "space efficient" compressed radix trees - any
456 * branch with only one child is skipped and replaced by the child.
457 *
458 * If we construct a tree with ids that are neighbouring at a non-zero
459 * height, we form a join but each child of that join is directly a
460 * leaf holding the single id.
461 */
462 for (order = SHIFT; order < 64; order += SHIFT) {
463 err = check_syncmap_free(&sync);
464 if (err)
465 goto out;
466
467 /* Create neighbours in the parent */
468 for (idx = 0; idx < KSYNCMAP; idx++) {
469 u64 context = idx * BIT_ULL(order) + idx;
470
471 err = i915_syncmap_set(&sync, context, 0);
472 if (err)
473 goto out;
474
475 if (sync->height) {
476 pr_err("Inserting context=%llx (order=%d, idx=%d) did not return leaf (height=%d, prefix=%llx\n",
477 context, order, idx,
478 sync->height, sync->prefix);
479 err = -EINVAL;
480 goto out;
481 }
482 }
483
484 sync = sync->parent;
485 if (sync->parent) {
486 pr_err("Parent (join) of last leaf was not the sync!\n");
487 err = -EINVAL;
488 goto out;
489 }
490
491 if (sync->height != order) {
492 pr_err("Join does not have the expected height, found %d, expected %d\n",
493 sync->height, order);
494 err = -EINVAL;
495 goto out;
496 }
497
498 if (sync->bitmap != BIT(KSYNCMAP) - 1) {
499 pr_err("Join is not full!, found %x (%d) expected %lx (%d)\n",
500 sync->bitmap, hweight32(sync->bitmap),
501 BIT(KSYNCMAP) - 1, KSYNCMAP);
502 err = -EINVAL;
503 goto out;
504 }
505
506 /* Each of our children should be a leaf */
507 for (idx = 0; idx < KSYNCMAP; idx++) {
508 struct i915_syncmap *leaf = __sync_child(sync)[idx];
509
510 if (leaf->height) {
511 pr_err("Child %d is a not leaf!\n", idx);
512 err = -EINVAL;
513 goto out;
514 }
515
516 if (leaf->parent != sync) {
517 pr_err("Child %d is not attached to us!\n",
518 idx);
519 err = -EINVAL;
520 goto out;
521 }
522
523 if (!is_power_of_2(leaf->bitmap)) {
524 pr_err("Child %d holds more than one id, found %x (%d)\n",
525 idx, leaf->bitmap, hweight32(leaf->bitmap));
526 err = -EINVAL;
527 goto out;
528 }
529
530 if (leaf->bitmap != BIT(idx)) {
531 pr_err("Child %d has wrong seqno idx, found %d, expected %d\n",
532 idx, ilog2(leaf->bitmap), idx);
533 err = -EINVAL;
534 goto out;
535 }
536 }
537 }
538out:
539 return dump_syncmap(sync, err);
540}
541
542static int igt_syncmap_random(void *arg)
543{
544 I915_RND_STATE(prng);
545 IGT_TIMEOUT(end_time);
546 struct i915_syncmap *sync;
547 unsigned long count, phase, i;
548 u32 seqno;
549 int err;
550
551 i915_syncmap_init(&sync);
552
553 /*
554 * Having tried to test the individual operations within i915_syncmap,
555 * run a smoketest exploring the entire u64 space with random
556 * insertions.
557 */
558
559 count = 0;
560 phase = jiffies + HZ/100 + 1;
561 do {
562 u64 context = i915_prandom_u64_state(&prng);
563
564 err = i915_syncmap_set(&sync, context, 0);
565 if (err)
566 goto out;
567
568 count++;
569 } while (!time_after(jiffies, phase));
570 seqno = 0;
571
572 phase = 0;
573 do {
574 I915_RND_STATE(ctx);
575 u32 last_seqno = seqno;
576 bool expect;
577
578 seqno = prandom_u32_state(&prng);
579 expect = seqno_later(last_seqno, seqno);
580
581 for (i = 0; i < count; i++) {
582 u64 context = i915_prandom_u64_state(&ctx);
583
584 if (i915_syncmap_is_later(&sync, context, seqno) != expect) {
585 pr_err("context=%llu, last=%u this=%u did not match expectation (%d)\n",
586 context, last_seqno, seqno, expect);
587 err = -EINVAL;
588 goto out;
589 }
590
591 err = i915_syncmap_set(&sync, context, seqno);
592 if (err)
593 goto out;
594 }
595
596 phase++;
597 } while (!__igt_timeout(end_time, NULL));
598 pr_debug("Completed %lu passes, each of %lu contexts\n", phase, count);
599out:
600 return dump_syncmap(sync, err);
601}
602
603int i915_syncmap_mock_selftests(void)
604{
605 static const struct i915_subtest tests[] = {
606 SUBTEST(igt_syncmap_init),
607 SUBTEST(igt_syncmap_one),
608 SUBTEST(igt_syncmap_join_above),
609 SUBTEST(igt_syncmap_join_below),
610 SUBTEST(igt_syncmap_neighbours),
611 SUBTEST(igt_syncmap_compact),
612 SUBTEST(igt_syncmap_random),
613 };
614
615 return i915_subtests(tests, NULL);
616}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 0ad624a1db90..5b18a2dc19a8 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -52,11 +52,12 @@ static void hw_delay_complete(unsigned long data)
52 spin_unlock(&engine->hw_lock); 52 spin_unlock(&engine->hw_lock);
53} 53}
54 54
55static int mock_context_pin(struct intel_engine_cs *engine, 55static struct intel_ring *
56 struct i915_gem_context *ctx) 56mock_context_pin(struct intel_engine_cs *engine,
57 struct i915_gem_context *ctx)
57{ 58{
58 i915_gem_context_get(ctx); 59 i915_gem_context_get(ctx);
59 return 0; 60 return engine->buffer;
60} 61}
61 62
62static void mock_context_unpin(struct intel_engine_cs *engine, 63static void mock_context_unpin(struct intel_engine_cs *engine,
@@ -72,7 +73,6 @@ static int mock_request_alloc(struct drm_i915_gem_request *request)
72 INIT_LIST_HEAD(&mock->link); 73 INIT_LIST_HEAD(&mock->link);
73 mock->delay = 0; 74 mock->delay = 0;
74 75
75 request->ring = request->engine->buffer;
76 return 0; 76 return 0;
77} 77}
78 78
@@ -112,7 +112,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
112 if (!ring) 112 if (!ring)
113 return NULL; 113 return NULL;
114 114
115 ring->engine = engine;
116 ring->size = sz; 115 ring->size = sz;
117 ring->effective_size = sz; 116 ring->effective_size = sz;
118 ring->vaddr = (void *)(ring + 1); 117 ring->vaddr = (void *)(ring + 1);
@@ -141,7 +140,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
141 140
142 /* minimal engine setup for requests */ 141 /* minimal engine setup for requests */
143 engine->base.i915 = i915; 142 engine->base.i915 = i915;
144 engine->base.name = name; 143 snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
145 engine->base.id = id++; 144 engine->base.id = id++;
146 engine->base.status_page.page_addr = (void *)(engine + 1); 145 engine->base.status_page.page_addr = (void *)(engine + 1);
147 146
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 9f24c5da3f8d..627e2aa09766 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -30,6 +30,7 @@
30#include "mock_gem_device.h" 30#include "mock_gem_device.h"
31#include "mock_gem_object.h" 31#include "mock_gem_object.h"
32#include "mock_gtt.h" 32#include "mock_gtt.h"
33#include "mock_uncore.h"
33 34
34void mock_device_flush(struct drm_i915_private *i915) 35void mock_device_flush(struct drm_i915_private *i915)
35{ 36{
@@ -73,6 +74,7 @@ static void mock_device_release(struct drm_device *dev)
73 74
74 destroy_workqueue(i915->wq); 75 destroy_workqueue(i915->wq);
75 76
77 kmem_cache_destroy(i915->priorities);
76 kmem_cache_destroy(i915->dependencies); 78 kmem_cache_destroy(i915->dependencies);
77 kmem_cache_destroy(i915->requests); 79 kmem_cache_destroy(i915->requests);
78 kmem_cache_destroy(i915->vmas); 80 kmem_cache_destroy(i915->vmas);
@@ -119,6 +121,7 @@ struct drm_i915_private *mock_gem_device(void)
119 goto err; 121 goto err;
120 122
121 device_initialize(&pdev->dev); 123 device_initialize(&pdev->dev);
124 pdev->class = PCI_BASE_CLASS_DISPLAY << 16;
122 pdev->dev.release = release_dev; 125 pdev->dev.release = release_dev;
123 dev_set_name(&pdev->dev, "mock"); 126 dev_set_name(&pdev->dev, "mock");
124 dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 127 dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
@@ -143,6 +146,7 @@ struct drm_i915_private *mock_gem_device(void)
143 mkwrite_device_info(i915)->gen = -1; 146 mkwrite_device_info(i915)->gen = -1;
144 147
145 spin_lock_init(&i915->mm.object_stat_lock); 148 spin_lock_init(&i915->mm.object_stat_lock);
149 mock_uncore_init(i915);
146 150
147 init_waitqueue_head(&i915->gpu_error.wait_queue); 151 init_waitqueue_head(&i915->gpu_error.wait_queue);
148 init_waitqueue_head(&i915->gpu_error.reset_queue); 152 init_waitqueue_head(&i915->gpu_error.reset_queue);
@@ -184,12 +188,16 @@ struct drm_i915_private *mock_gem_device(void)
184 if (!i915->dependencies) 188 if (!i915->dependencies)
185 goto err_requests; 189 goto err_requests;
186 190
191 i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
192 if (!i915->priorities)
193 goto err_dependencies;
194
187 mutex_lock(&i915->drm.struct_mutex); 195 mutex_lock(&i915->drm.struct_mutex);
188 INIT_LIST_HEAD(&i915->gt.timelines); 196 INIT_LIST_HEAD(&i915->gt.timelines);
189 err = i915_gem_timeline_init__global(i915); 197 err = i915_gem_timeline_init__global(i915);
190 if (err) { 198 if (err) {
191 mutex_unlock(&i915->drm.struct_mutex); 199 mutex_unlock(&i915->drm.struct_mutex);
192 goto err_dependencies; 200 goto err_priorities;
193 } 201 }
194 202
195 mock_init_ggtt(i915); 203 mock_init_ggtt(i915);
@@ -209,6 +217,8 @@ struct drm_i915_private *mock_gem_device(void)
209err_engine: 217err_engine:
210 for_each_engine(engine, i915, id) 218 for_each_engine(engine, i915, id)
211 mock_engine_free(engine); 219 mock_engine_free(engine);
220err_priorities:
221 kmem_cache_destroy(i915->priorities);
212err_dependencies: 222err_dependencies:
213 kmem_cache_destroy(i915->dependencies); 223 kmem_cache_destroy(i915->dependencies);
214err_requests: 224err_requests:
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
new file mode 100644
index 000000000000..47b1f47c5812
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -0,0 +1,45 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "mock_timeline.h"
26
27struct intel_timeline *mock_timeline(u64 context)
28{
29 static struct lock_class_key class;
30 struct intel_timeline *tl;
31
32 tl = kzalloc(sizeof(*tl), GFP_KERNEL);
33 if (!tl)
34 return NULL;
35
36 __intel_timeline_init(tl, NULL, context, &class, "mock");
37
38 return tl;
39}
40
41void mock_timeline_destroy(struct intel_timeline *tl)
42{
43 __intel_timeline_fini(tl);
44 kfree(tl);
45}
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h
new file mode 100644
index 000000000000..c27ff4639b8b
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h
@@ -0,0 +1,33 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef __MOCK_TIMELINE__
26#define __MOCK_TIMELINE__
27
28#include "../i915_gem_timeline.h"
29
30struct intel_timeline *mock_timeline(u64 context);
31void mock_timeline_destroy(struct intel_timeline *tl);
32
33#endif /* !__MOCK_TIMELINE__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c
new file mode 100644
index 000000000000..8ef14c7e5e38
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
@@ -0,0 +1,46 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "mock_uncore.h"
26
27#define __nop_write(x) \
28static void \
29nop_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { }
30__nop_write(8)
31__nop_write(16)
32__nop_write(32)
33
34#define __nop_read(x) \
35static u##x \
36nop_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { return 0; }
37__nop_read(8)
38__nop_read(16)
39__nop_read(32)
40__nop_read(64)
41
42void mock_uncore_init(struct drm_i915_private *i915)
43{
44 ASSIGN_WRITE_MMIO_VFUNCS(i915, nop);
45 ASSIGN_READ_MMIO_VFUNCS(i915, nop);
46}
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h
new file mode 100644
index 000000000000..d79aa3ca4d51
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef __MOCK_UNCORE_H
26#define __MOCK_UNCORE_H
27
28void mock_uncore_init(struct drm_i915_private *i915);
29
30#endif /* !__MOCK_UNCORE_H */