Merge tag 'drm-intel-next-fixes-2019-05-09' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

- Two fixes for the freshly enabled semaphore ordering code - Includes gvt-next-fixes-2019-05-07 Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190509100109.GA14820@jlahtine-desk.ger.corp.intel.com
author: Dave Airlie <airlied@redhat.com> 2019-05-10 00:28:36 -0400
committer: Dave Airlie <airlied@redhat.com> 2019-05-10 00:28:36 -0400
commit: b1c19fa16c82f45032f6c1d715dbaf250bdd824c (patch)
tree: d6369c38e740535df0fd2c4af70f772dc0e1526f
parent: eb85d03e01c3e9f3b0ba7282b2e3515a635decb2 (diff)
parent: 23372cce8fe7ee98a6458fd3d035a55b87f0c6fe (diff)
11 files changed, 93 insertions, 34 deletions
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 2ec89bcb59f1..8a9606f91e68 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -196,9 +196,9 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
 int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
 {
        struct dentry *ent;
-        char name[10] = "";
+        char name[16] = "";
-        sprintf(name, "vgpu%d", vgpu->id);
+        snprintf(name, 16, "vgpu%d", vgpu->id);
        vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root);
        if (!vgpu->debugfs)
                return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 4e1e425189ba..41c8ebc60c63 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -45,6 +45,7 @@ static int vgpu_gem_get_pages(
        int i, ret;
        gen8_pte_t __iomem *gtt_entries;
        struct intel_vgpu_fb_info *fb_info;
+        u32 page_num;
        fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info;
        if (WARN_ON(!fb_info))
@@ -54,14 +55,15 @@ static int vgpu_gem_get_pages(
        if (unlikely(!st))
                return -ENOMEM;
-        ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL);
+        page_num = obj->base.size >> PAGE_SHIFT;
+        ret = sg_alloc_table(st, page_num, GFP_KERNEL);
        if (ret) {
                kfree(st);
                return ret;
        }
        gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
                (fb_info->start >> PAGE_SHIFT);
-        for_each_sg(st->sgl, sg, fb_info->size, i) {
+        for_each_sg(st->sgl, sg, page_num, i) {
                sg->offset = 0;
                sg->length = PAGE_SIZE;
                sg_dma_address(sg) =
@@ -158,7 +160,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
                return NULL;
        drm_gem_private_object_init(dev, &obj->base,
-                info->size << PAGE_SHIFT);
+                roundup(info->size, PAGE_SIZE));
        i915_gem_object_init(obj, &intel_vgpu_gem_ops);
        obj->read_domains = I915_GEM_DOMAIN_GTT;
@@ -206,11 +208,12 @@ static int vgpu_get_plane_info(struct drm_device *dev,
                struct intel_vgpu_fb_info *info,
                int plane_id)
 {
-        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_vgpu_primary_plane_format p;
        struct intel_vgpu_cursor_plane_format c;
        int ret, tile_height = 1;
+        memset(info, 0, sizeof(*info));
        if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
                ret = intel_vgpu_decode_primary_plane(vgpu, &p);
                if (ret)
@@ -267,8 +270,7 @@ static int vgpu_get_plane_info(struct drm_device *dev,
                return -EINVAL;
        }
-        info->size = (info->stride * roundup(info->height, tile_height)
+        info->size = info->stride * roundup(info->height, tile_height);
-                      + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (info->size == 0) {
                gvt_vgpu_err("fb size is zero\n");
                return -EINVAL;
@@ -278,11 +280,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
                gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start);
                return -EFAULT;
        }
-        if (((info->start >> PAGE_SHIFT) + info->size) >
-                ggtt_total_entries(&dev_priv->ggtt)) {
-                gvt_vgpu_err("Invalid GTT offset or size\n");
-                return -EFAULT;
-        }
        if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) {
                gvt_vgpu_err("invalid gma addr\n");
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c2f7d20f6346..08c74e65836b 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -811,7 +811,7 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
 /* Allocate shadow page table without guest page. */
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
-                struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type)
+                struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
 {
        struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
        struct intel_vgpu_ppgtt_spt *spt = NULL;
@@ -861,7 +861,7 @@ err_free_spt:
 /* Allocate shadow page table associated with specific gfn. */
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
-                struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type,
+                struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
                unsigned long gfn, bool guest_pde_ips)
 {
        struct intel_vgpu_ppgtt_spt *spt;
@@ -936,7 +936,7 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
 {
        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
        struct intel_vgpu_ppgtt_spt *s;
-        intel_gvt_gtt_type_t cur_pt_type;
+        enum intel_gvt_gtt_type cur_pt_type;
        GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
@@ -1076,6 +1076,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
        } else {
                int type = get_next_pt_type(we->type);
+                if (!gtt_type_is_pt(type))
+                        goto err;
                spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
                if (IS_ERR(spt)) {
                        ret = PTR_ERR(spt);
@@ -1855,7 +1858,7 @@ static void vgpu_free_mm(struct intel_vgpu_mm *mm)
 * Zero on success, negative error code in pointer if failed.
 */
 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
-                intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+                enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
 {
        struct intel_gvt *gvt = vgpu->gvt;
        struct intel_vgpu_mm *mm;
@@ -2309,7 +2312,7 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
 }
 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
-                intel_gvt_gtt_type_t type)
+                enum intel_gvt_gtt_type type)
 {
        struct intel_vgpu_gtt *gtt = &vgpu->gtt;
        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
@@ -2594,7 +2597,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
 * Zero on success, negative error code if failed.
 */
 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
-                intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+                enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
 {
        struct intel_vgpu_mm *mm;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 32c573aea494..42d0394f0de2 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -95,8 +95,8 @@ struct intel_gvt_gtt {
        unsigned long scratch_mfn;
 };
-typedef enum {
+enum intel_gvt_gtt_type {
-        GTT_TYPE_INVALID = -1,
+        GTT_TYPE_INVALID = 0,
        GTT_TYPE_GGTT_PTE,
@@ -124,7 +124,7 @@ typedef enum {
        GTT_TYPE_PPGTT_PML4_PT,
        GTT_TYPE_MAX,
-} intel_gvt_gtt_type_t;
+};
 enum intel_gvt_mm_type {
        INTEL_GVT_MM_GGTT,
@@ -148,7 +148,7 @@ struct intel_vgpu_mm {
        union {
                struct {
-                        intel_gvt_gtt_type_t root_entry_type;
+                        enum intel_gvt_gtt_type root_entry_type;
                        /*
                         * The 4 PDPs in ring context. For 48bit addressing,
                         * only PDP0 is valid and point to PML4. For 32it
@@ -169,7 +169,7 @@ struct intel_vgpu_mm {
 };
 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
-                intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+                enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
 static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm)
 {
@@ -233,7 +233,7 @@ struct intel_vgpu_ppgtt_spt {
        struct intel_vgpu *vgpu;
        struct {
-                intel_gvt_gtt_type_t type;
+                enum intel_gvt_gtt_type type;
                bool pde_ips; /* for 64KB PTEs */
                void *vaddr;
                struct page *page;
@@ -241,7 +241,7 @@ struct intel_vgpu_ppgtt_spt {
        } shadow_page;
        struct {
-                intel_gvt_gtt_type_t type;
+                enum intel_gvt_gtt_type type;
                bool pde_ips; /* for 64KB PTEs */
                unsigned long gfn;
                unsigned long write_cnt;
@@ -267,7 +267,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
                u64 pdps[]);
 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
-                intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+                enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 18f01eeb2510..90673fca792f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1206,7 +1206,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
 {
-        intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+        enum intel_gvt_gtt_type root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
        struct intel_vgpu_mm *mm;
        u64 *pdps;
@@ -3303,7 +3303,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
 /* Special MMIO blocks. */
 static struct gvt_mmio_block mmio_blocks[] = {
        {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
-        {D_ALL, MCHBAR_MIRROR_REG_BASE, 0x4000, NULL, NULL},
+        {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
        {D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
                pvinfo_mmio_read, pvinfo_mmio_write},
        {D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e7e14c842be4..edf6d646eb25 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
        {RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
        {RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
+        {RCS0, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */
        {RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
        {RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
index 3de5b643b266..33aaa14bfdde 100644
--- a/drivers/gpu/drm/i915/gvt/reg.h
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -126,7 +126,4 @@
 #define RING_GFX_MODE(base)     _MMIO((base) + 0x29c)
 #define VF_GUARDBAND            _MMIO(0x83a4)
-/* define the effective range of MCHBAR register on Sandybridge+ */
-#define MCHBAR_MIRROR_REG_BASE  _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8998fa5ab198..7c99bbc3e2b8 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1343,7 +1343,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
        struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
        struct intel_vgpu_mm *mm;
        struct intel_vgpu *vgpu = workload->vgpu;
-        intel_gvt_gtt_type_t root_entry_type;
+        enum intel_gvt_gtt_type root_entry_type;
        u64 pdps[GVT_RING_CTX_NR_PDPS];
        switch (desc->addressing_mode) {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..ce342f7f7ddb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
        if (i915_gem_context_is_banned(request->gem_context))
                i915_request_skip(request, -EIO);
+        /*
+         * Are we using semaphores when the gpu is already saturated?
+         *
+         * Using semaphores incurs a cost in having the GPU poll a
+         * memory location, busywaiting for it to change. The continual
+         * memory reads can have a noticeable impact on the rest of the
+         * system with the extra bus traffic, stalling the cpu as it too
+         * tries to access memory across the bus (perf stat -e bus-cycles).
+         *
+         * If we installed a semaphore on this request and we only submit
+         * the request after the signaler completed, that indicates the
+         * system is overloaded and using semaphores at this time only
+         * increases the amount of work we are doing. If so, we disable
+         * further use of semaphores until we are idle again, whence we
+         * optimistically try again.
+         */
+        if (request->sched.semaphores &&
+            i915_sw_fence_signaled(&request->semaphore))
+                request->hw_context->saturated |= request->sched.semaphores;
        /* We may be recursing from the signal callback of another i915 fence */
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
@@ -799,6 +819,39 @@ err_unreserve:
 }
 static int
+i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
+{
+        if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+                return 0;
+        signal = list_prev_entry(signal, ring_link);
+        if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+                return 0;
+        return i915_sw_fence_await_dma_fence(&rq->submit,
+                                             &signal->fence, 0,
+                                             I915_FENCE_GFP);
+}
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+        /*
+         * Polling a semaphore causes bus traffic, delaying other users of
+         * both the GPU and CPU. We want to limit the impact on others,
+         * while taking advantage of early submission to reduce GPU
+         * latency. Therefore we restrict ourselves to not using more
+         * than one semaphore from each source, and not using a semaphore
+         * if we have detected the engine is saturated (i.e. would not be
+         * submitted early and cause bus traffic reading an already passed
+         * semaphore).
+         *
+         * See the are-we-too-late? check in __i915_request_submit().
+         */
+        return rq->sched.semaphores | rq->hw_context->saturated;
+}
+static int
 emit_semaphore_wait(struct i915_request *to,
                    struct i915_request *from,
                    gfp_t gfp)
@@ -811,11 +864,15 @@ emit_semaphore_wait(struct i915_request *to,
        GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
        /* Just emit the first semaphore we see as request space is limited. */
-        if (to->sched.semaphores & from->engine->mask)
+        if (already_busywaiting(to) & from->engine->mask)
                return i915_sw_fence_await_dma_fence(&to->submit,
                                                     &from->fence, 0,
                                                     I915_FENCE_GFP);
+        err = i915_request_await_start(to, from);
+        if (err < 0)
+                return err;
        err = i915_sw_fence_await_dma_fence(&to->semaphore,
                                            &from->fence, 0,
                                            I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8931e0fee873..924cc556223a 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce,
        ce->gem_context = ctx;
        ce->engine = engine;
        ce->ops = engine->cops;
+        ce->saturated = 0;
        INIT_LIST_HEAD(&ce->signal_link);
        INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 68b4ca1611e0..339c7437fe82 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include "i915_active_types.h"
+#include "intel_engine_types.h"
 struct i915_gem_context;
 struct i915_vma;
@@ -58,6 +59,8 @@ struct intel_context {
        atomic_t pin_count;
        struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
+        intel_engine_mask_t saturated; /* submitting semaphores too late? */
        /**
         * active_tracker: Active tracker for the external rq activity
         * on this intel_context object.
author	Dave Airlie <airlied@redhat.com>	2019-05-10 00:28:36 -0400
committer	Dave Airlie <airlied@redhat.com>	2019-05-10 00:28:36 -0400
commit	b1c19fa16c82f45032f6c1d715dbaf250bdd824c (patch)
tree	d6369c38e740535df0fd2c4af70f772dc0e1526f
parent	eb85d03e01c3e9f3b0ba7282b2e3515a635decb2 (diff)
parent	23372cce8fe7ee98a6458fd3d035a55b87f0c6fe (diff)