diff options
author | Dave Airlie <airlied@redhat.com> | 2018-03-15 22:51:35 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-03-15 22:51:35 -0400 |
commit | 3a1b5de36fdf403d1b004e537dc13997633d65df (patch) | |
tree | cfddd7bc6a5b7bab371e2944d7e03a85148053d7 | |
parent | d4487b57a4594ffa1d28911706144329096a6de7 (diff) | |
parent | 05b429a8eed83084a8a7fc04c97120a5ba07b5be (diff) |
Merge tag 'drm-intel-fixes-2018-03-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
Only GVT fixes:
- Two warnings fix for runtime pm and usr copy (Xiong, Zhenyu)
- OA context fix for vGPU profiling (Min)
- privilege batch buffer reloc fix (Fred)
* tag 'drm-intel-fixes-2018-03-15' of git://anongit.freedesktop.org/drm/drm-intel:
drm/i915/gvt: fix user copy warning by whitelist workload rb_tail field
drm/i915/gvt: Correct the privilege shadow batch buffer address
drm/i915/gvt: keep oa config in shadow ctx
drm/i915/gvt: Add runtime_pm_get/put into gvt_switch_mmio
-rw-r--r-- | drivers/gpu/drm/i915/gvt/cmd_parser.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/mmio_context.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/scheduler.c | 71 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/scheduler.h | 5 |
4 files changed, 82 insertions, 4 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index c8454ac43fae..db6b94dda5df 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c | |||
@@ -471,6 +471,7 @@ struct parser_exec_state { | |||
471 | * used when ret from 2nd level batch buffer | 471 | * used when ret from 2nd level batch buffer |
472 | */ | 472 | */ |
473 | int saved_buf_addr_type; | 473 | int saved_buf_addr_type; |
474 | bool is_ctx_wa; | ||
474 | 475 | ||
475 | struct cmd_info *info; | 476 | struct cmd_info *info; |
476 | 477 | ||
@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s) | |||
1715 | bb->accessing = true; | 1716 | bb->accessing = true; |
1716 | bb->bb_start_cmd_va = s->ip_va; | 1717 | bb->bb_start_cmd_va = s->ip_va; |
1717 | 1718 | ||
1719 | if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa)) | ||
1720 | bb->bb_offset = s->ip_va - s->rb_va; | ||
1721 | else | ||
1722 | bb->bb_offset = 0; | ||
1723 | |||
1718 | /* | 1724 | /* |
1719 | * ip_va saves the virtual address of the shadow batch buffer, while | 1725 | * ip_va saves the virtual address of the shadow batch buffer, while |
1720 | * ip_gma saves the graphics address of the original batch buffer. | 1726 | * ip_gma saves the graphics address of the original batch buffer. |
@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload) | |||
2571 | s.ring_tail = gma_tail; | 2577 | s.ring_tail = gma_tail; |
2572 | s.rb_va = workload->shadow_ring_buffer_va; | 2578 | s.rb_va = workload->shadow_ring_buffer_va; |
2573 | s.workload = workload; | 2579 | s.workload = workload; |
2580 | s.is_ctx_wa = false; | ||
2574 | 2581 | ||
2575 | if ((bypass_scan_mask & (1 << workload->ring_id)) || | 2582 | if ((bypass_scan_mask & (1 << workload->ring_id)) || |
2576 | gma_head == gma_tail) | 2583 | gma_head == gma_tail) |
@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) | |||
2624 | s.ring_tail = gma_tail; | 2631 | s.ring_tail = gma_tail; |
2625 | s.rb_va = wa_ctx->indirect_ctx.shadow_va; | 2632 | s.rb_va = wa_ctx->indirect_ctx.shadow_va; |
2626 | s.workload = workload; | 2633 | s.workload = workload; |
2634 | s.is_ctx_wa = true; | ||
2627 | 2635 | ||
2628 | if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { | 2636 | if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { |
2629 | ret = -EINVAL; | 2637 | ret = -EINVAL; |
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 256f1bb522b7..152df3d0291e 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c | |||
@@ -394,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, | |||
394 | * performace for batch mmio read/write, so we need | 394 | * performace for batch mmio read/write, so we need |
395 | * handle forcewake mannually. | 395 | * handle forcewake mannually. |
396 | */ | 396 | */ |
397 | intel_runtime_pm_get(dev_priv); | ||
397 | intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); | 398 | intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); |
398 | switch_mmio(pre, next, ring_id); | 399 | switch_mmio(pre, next, ring_id); |
399 | intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); | 400 | intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); |
401 | intel_runtime_pm_put(dev_priv); | ||
400 | } | 402 | } |
401 | 403 | ||
402 | /** | 404 | /** |
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index b55b3580ca1d..d74d6f05c62c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c | |||
@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer( | |||
52 | pdp_pair[i].val = pdp[7 - i]; | 52 | pdp_pair[i].val = pdp[7 - i]; |
53 | } | 53 | } |
54 | 54 | ||
55 | /* | ||
56 | * when populating shadow ctx from guest, we should not overrride oa related | ||
57 | * registers, so that they will not be overlapped by guest oa configs. Thus | ||
58 | * made it possible to capture oa data from host for both host and guests. | ||
59 | */ | ||
60 | static void sr_oa_regs(struct intel_vgpu_workload *workload, | ||
61 | u32 *reg_state, bool save) | ||
62 | { | ||
63 | struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; | ||
64 | u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; | ||
65 | u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; | ||
66 | int i = 0; | ||
67 | u32 flex_mmio[] = { | ||
68 | i915_mmio_reg_offset(EU_PERF_CNTL0), | ||
69 | i915_mmio_reg_offset(EU_PERF_CNTL1), | ||
70 | i915_mmio_reg_offset(EU_PERF_CNTL2), | ||
71 | i915_mmio_reg_offset(EU_PERF_CNTL3), | ||
72 | i915_mmio_reg_offset(EU_PERF_CNTL4), | ||
73 | i915_mmio_reg_offset(EU_PERF_CNTL5), | ||
74 | i915_mmio_reg_offset(EU_PERF_CNTL6), | ||
75 | }; | ||
76 | |||
77 | if (!workload || !reg_state || workload->ring_id != RCS) | ||
78 | return; | ||
79 | |||
80 | if (save) { | ||
81 | workload->oactxctrl = reg_state[ctx_oactxctrl + 1]; | ||
82 | |||
83 | for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { | ||
84 | u32 state_offset = ctx_flexeu0 + i * 2; | ||
85 | |||
86 | workload->flex_mmio[i] = reg_state[state_offset + 1]; | ||
87 | } | ||
88 | } else { | ||
89 | reg_state[ctx_oactxctrl] = | ||
90 | i915_mmio_reg_offset(GEN8_OACTXCONTROL); | ||
91 | reg_state[ctx_oactxctrl + 1] = workload->oactxctrl; | ||
92 | |||
93 | for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { | ||
94 | u32 state_offset = ctx_flexeu0 + i * 2; | ||
95 | u32 mmio = flex_mmio[i]; | ||
96 | |||
97 | reg_state[state_offset] = mmio; | ||
98 | reg_state[state_offset + 1] = workload->flex_mmio[i]; | ||
99 | } | ||
100 | } | ||
101 | } | ||
102 | |||
55 | static int populate_shadow_context(struct intel_vgpu_workload *workload) | 103 | static int populate_shadow_context(struct intel_vgpu_workload *workload) |
56 | { | 104 | { |
57 | struct intel_vgpu *vgpu = workload->vgpu; | 105 | struct intel_vgpu *vgpu = workload->vgpu; |
@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) | |||
98 | page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); | 146 | page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); |
99 | shadow_ring_context = kmap(page); | 147 | shadow_ring_context = kmap(page); |
100 | 148 | ||
149 | sr_oa_regs(workload, (u32 *)shadow_ring_context, true); | ||
101 | #define COPY_REG(name) \ | 150 | #define COPY_REG(name) \ |
102 | intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ | 151 | intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ |
103 | + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) | 152 | + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) |
@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) | |||
122 | sizeof(*shadow_ring_context), | 171 | sizeof(*shadow_ring_context), |
123 | I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); | 172 | I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); |
124 | 173 | ||
174 | sr_oa_regs(workload, (u32 *)shadow_ring_context, false); | ||
125 | kunmap(page); | 175 | kunmap(page); |
126 | return 0; | 176 | return 0; |
127 | } | 177 | } |
@@ -376,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) | |||
376 | goto err; | 426 | goto err; |
377 | } | 427 | } |
378 | 428 | ||
429 | /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va | ||
430 | * is only updated into ring_scan_buffer, not real ring address | ||
431 | * allocated in later copy_workload_to_ring_buffer. pls be noted | ||
432 | * shadow_ring_buffer_va is now pointed to real ring buffer va | ||
433 | * in copy_workload_to_ring_buffer. | ||
434 | */ | ||
435 | |||
436 | if (bb->bb_offset) | ||
437 | bb->bb_start_cmd_va = workload->shadow_ring_buffer_va | ||
438 | + bb->bb_offset; | ||
439 | |||
379 | /* relocate shadow batch buffer */ | 440 | /* relocate shadow batch buffer */ |
380 | bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); | 441 | bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); |
381 | if (gmadr_bytes == 8) | 442 | if (gmadr_bytes == 8) |
@@ -1044,10 +1105,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) | |||
1044 | 1105 | ||
1045 | bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); | 1106 | bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); |
1046 | 1107 | ||
1047 | s->workloads = kmem_cache_create("gvt-g_vgpu_workload", | 1108 | s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", |
1048 | sizeof(struct intel_vgpu_workload), 0, | 1109 | sizeof(struct intel_vgpu_workload), 0, |
1049 | SLAB_HWCACHE_ALIGN, | 1110 | SLAB_HWCACHE_ALIGN, |
1050 | NULL); | 1111 | offsetof(struct intel_vgpu_workload, rb_tail), |
1112 | sizeof_field(struct intel_vgpu_workload, rb_tail), | ||
1113 | NULL); | ||
1051 | 1114 | ||
1052 | if (!s->workloads) { | 1115 | if (!s->workloads) { |
1053 | ret = -ENOMEM; | 1116 | ret = -ENOMEM; |
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index ff175a98b19e..a79a4f60637e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h | |||
@@ -110,6 +110,10 @@ struct intel_vgpu_workload { | |||
110 | /* shadow batch buffer */ | 110 | /* shadow batch buffer */ |
111 | struct list_head shadow_bb; | 111 | struct list_head shadow_bb; |
112 | struct intel_shadow_wa_ctx wa_ctx; | 112 | struct intel_shadow_wa_ctx wa_ctx; |
113 | |||
114 | /* oa registers */ | ||
115 | u32 oactxctrl; | ||
116 | u32 flex_mmio[7]; | ||
113 | }; | 117 | }; |
114 | 118 | ||
115 | struct intel_vgpu_shadow_bb { | 119 | struct intel_vgpu_shadow_bb { |
@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb { | |||
120 | u32 *bb_start_cmd_va; | 124 | u32 *bb_start_cmd_va; |
121 | unsigned int clflush; | 125 | unsigned int clflush; |
122 | bool accessing; | 126 | bool accessing; |
127 | unsigned long bb_offset; | ||
123 | }; | 128 | }; |
124 | 129 | ||
125 | #define workload_q_head(vgpu, ring_id) \ | 130 | #define workload_q_head(vgpu, ring_id) \ |