aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-03-15 22:51:35 -0400
committerDave Airlie <airlied@redhat.com>2018-03-15 22:51:35 -0400
commit3a1b5de36fdf403d1b004e537dc13997633d65df (patch)
treecfddd7bc6a5b7bab371e2944d7e03a85148053d7
parentd4487b57a4594ffa1d28911706144329096a6de7 (diff)
parent05b429a8eed83084a8a7fc04c97120a5ba07b5be (diff)
Merge tag 'drm-intel-fixes-2018-03-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
Only GVT fixes: - Two warnings fix for runtime pm and usr copy (Xiong, Zhenyu) - OA context fix for vGPU profiling (Min) - privilege batch buffer reloc fix (Fred) * tag 'drm-intel-fixes-2018-03-15' of git://anongit.freedesktop.org/drm/drm-intel: drm/i915/gvt: fix user copy warning by whitelist workload rb_tail field drm/i915/gvt: Correct the privilege shadow batch buffer address drm/i915/gvt: keep oa config in shadow ctx drm/i915/gvt: Add runtime_pm_get/put into gvt_switch_mmio
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c8
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c71
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.h5
4 files changed, 82 insertions, 4 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index c8454ac43fae..db6b94dda5df 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -471,6 +471,7 @@ struct parser_exec_state {
471 * used when ret from 2nd level batch buffer 471 * used when ret from 2nd level batch buffer
472 */ 472 */
473 int saved_buf_addr_type; 473 int saved_buf_addr_type;
474 bool is_ctx_wa;
474 475
475 struct cmd_info *info; 476 struct cmd_info *info;
476 477
@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
1715 bb->accessing = true; 1716 bb->accessing = true;
1716 bb->bb_start_cmd_va = s->ip_va; 1717 bb->bb_start_cmd_va = s->ip_va;
1717 1718
1719 if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
1720 bb->bb_offset = s->ip_va - s->rb_va;
1721 else
1722 bb->bb_offset = 0;
1723
1718 /* 1724 /*
1719 * ip_va saves the virtual address of the shadow batch buffer, while 1725 * ip_va saves the virtual address of the shadow batch buffer, while
1720 * ip_gma saves the graphics address of the original batch buffer. 1726 * ip_gma saves the graphics address of the original batch buffer.
@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload)
2571 s.ring_tail = gma_tail; 2577 s.ring_tail = gma_tail;
2572 s.rb_va = workload->shadow_ring_buffer_va; 2578 s.rb_va = workload->shadow_ring_buffer_va;
2573 s.workload = workload; 2579 s.workload = workload;
2580 s.is_ctx_wa = false;
2574 2581
2575 if ((bypass_scan_mask & (1 << workload->ring_id)) || 2582 if ((bypass_scan_mask & (1 << workload->ring_id)) ||
2576 gma_head == gma_tail) 2583 gma_head == gma_tail)
@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
2624 s.ring_tail = gma_tail; 2631 s.ring_tail = gma_tail;
2625 s.rb_va = wa_ctx->indirect_ctx.shadow_va; 2632 s.rb_va = wa_ctx->indirect_ctx.shadow_va;
2626 s.workload = workload; 2633 s.workload = workload;
2634 s.is_ctx_wa = true;
2627 2635
2628 if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { 2636 if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
2629 ret = -EINVAL; 2637 ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 256f1bb522b7..152df3d0291e 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -394,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
394 * performace for batch mmio read/write, so we need 394 * performace for batch mmio read/write, so we need
395 * handle forcewake mannually. 395 * handle forcewake mannually.
396 */ 396 */
397 intel_runtime_pm_get(dev_priv);
397 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 398 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
398 switch_mmio(pre, next, ring_id); 399 switch_mmio(pre, next, ring_id);
399 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 400 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
401 intel_runtime_pm_put(dev_priv);
400} 402}
401 403
402/** 404/**
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index b55b3580ca1d..d74d6f05c62c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
52 pdp_pair[i].val = pdp[7 - i]; 52 pdp_pair[i].val = pdp[7 - i];
53} 53}
54 54
55/*
56 * when populating shadow ctx from guest, we should not overrride oa related
57 * registers, so that they will not be overlapped by guest oa configs. Thus
58 * made it possible to capture oa data from host for both host and guests.
59 */
60static void sr_oa_regs(struct intel_vgpu_workload *workload,
61 u32 *reg_state, bool save)
62{
63 struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
64 u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
65 u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
66 int i = 0;
67 u32 flex_mmio[] = {
68 i915_mmio_reg_offset(EU_PERF_CNTL0),
69 i915_mmio_reg_offset(EU_PERF_CNTL1),
70 i915_mmio_reg_offset(EU_PERF_CNTL2),
71 i915_mmio_reg_offset(EU_PERF_CNTL3),
72 i915_mmio_reg_offset(EU_PERF_CNTL4),
73 i915_mmio_reg_offset(EU_PERF_CNTL5),
74 i915_mmio_reg_offset(EU_PERF_CNTL6),
75 };
76
77 if (!workload || !reg_state || workload->ring_id != RCS)
78 return;
79
80 if (save) {
81 workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
82
83 for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
84 u32 state_offset = ctx_flexeu0 + i * 2;
85
86 workload->flex_mmio[i] = reg_state[state_offset + 1];
87 }
88 } else {
89 reg_state[ctx_oactxctrl] =
90 i915_mmio_reg_offset(GEN8_OACTXCONTROL);
91 reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
92
93 for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
94 u32 state_offset = ctx_flexeu0 + i * 2;
95 u32 mmio = flex_mmio[i];
96
97 reg_state[state_offset] = mmio;
98 reg_state[state_offset + 1] = workload->flex_mmio[i];
99 }
100 }
101}
102
55static int populate_shadow_context(struct intel_vgpu_workload *workload) 103static int populate_shadow_context(struct intel_vgpu_workload *workload)
56{ 104{
57 struct intel_vgpu *vgpu = workload->vgpu; 105 struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
98 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); 146 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
99 shadow_ring_context = kmap(page); 147 shadow_ring_context = kmap(page);
100 148
149 sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
101#define COPY_REG(name) \ 150#define COPY_REG(name) \
102 intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ 151 intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
103 + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) 152 + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
122 sizeof(*shadow_ring_context), 171 sizeof(*shadow_ring_context),
123 I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); 172 I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
124 173
174 sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
125 kunmap(page); 175 kunmap(page);
126 return 0; 176 return 0;
127} 177}
@@ -376,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
376 goto err; 426 goto err;
377 } 427 }
378 428
429 /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
430 * is only updated into ring_scan_buffer, not real ring address
431 * allocated in later copy_workload_to_ring_buffer. pls be noted
432 * shadow_ring_buffer_va is now pointed to real ring buffer va
433 * in copy_workload_to_ring_buffer.
434 */
435
436 if (bb->bb_offset)
437 bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
438 + bb->bb_offset;
439
379 /* relocate shadow batch buffer */ 440 /* relocate shadow batch buffer */
380 bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); 441 bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
381 if (gmadr_bytes == 8) 442 if (gmadr_bytes == 8)
@@ -1044,10 +1105,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
1044 1105
1045 bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); 1106 bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
1046 1107
1047 s->workloads = kmem_cache_create("gvt-g_vgpu_workload", 1108 s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
1048 sizeof(struct intel_vgpu_workload), 0, 1109 sizeof(struct intel_vgpu_workload), 0,
1049 SLAB_HWCACHE_ALIGN, 1110 SLAB_HWCACHE_ALIGN,
1050 NULL); 1111 offsetof(struct intel_vgpu_workload, rb_tail),
1112 sizeof_field(struct intel_vgpu_workload, rb_tail),
1113 NULL);
1051 1114
1052 if (!s->workloads) { 1115 if (!s->workloads) {
1053 ret = -ENOMEM; 1116 ret = -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index ff175a98b19e..a79a4f60637e 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
110 /* shadow batch buffer */ 110 /* shadow batch buffer */
111 struct list_head shadow_bb; 111 struct list_head shadow_bb;
112 struct intel_shadow_wa_ctx wa_ctx; 112 struct intel_shadow_wa_ctx wa_ctx;
113
114 /* oa registers */
115 u32 oactxctrl;
116 u32 flex_mmio[7];
113}; 117};
114 118
115struct intel_vgpu_shadow_bb { 119struct intel_vgpu_shadow_bb {
@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb {
120 u32 *bb_start_cmd_va; 124 u32 *bb_start_cmd_va;
121 unsigned int clflush; 125 unsigned int clflush;
122 bool accessing; 126 bool accessing;
127 unsigned long bb_offset;
123}; 128};
124 129
125#define workload_q_head(vgpu, ring_id) \ 130#define workload_q_head(vgpu, ring_id) \