drm/i915/bdw: Add GTT functions

With the PTE clarifications, the bind and clear functions can now be added for gen8. v2: Use for_each_sg_pages in gen8_ggtt_insert_entries. v3: Drop dev argument to pte encode functions, upstream lost it. Also rebase on top of the scratch page movement. v4: Rebase on top of the new address space vfuncs. v5: Add the bool use_scratch argument to clear_range and the bool valid argument to the PTE encode function to follow upstream changes. v6: Add a FIXME(BDW) about the size mismatch of the readback check that Jon Bloomfield spotted. v7: Squash in fixup patch from Ben for the posting read to match the 64bit ptes and so shut up the WARN. Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1) Reviewed-by: Imre Deak <imre.deak@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
author: Ben Widawsky <benjamin.widawsky@intel.com> 2013-11-03 00:07:18 -0400
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2013-11-08 12:09:44 -0500
commit: 94ec8f6130ef4fdce1c80ca6bdeeef103a239a7c (patch)
tree: 05997ac327a28054414536ced2b2186c00f6b542
parent: d31eb10e6c9f0f040c82ab710f93ce95e6f14d89 (diff)
1 files changed, 86 insertions, 3 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b66284e10032..cf539a6a5a22 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t;
 #define HSW_WB_ELLC_LLC_AGE0            HSW_CACHEABILITY_CONTROL(0xb)
 #define HSW_WT_ELLC_LLC_AGE0            HSW_CACHEABILITY_CONTROL(0x6)
+static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
+                                             enum i915_cache_level level,
+                                             bool valid)
+{
+        gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
+        pte |= addr;
+        return pte;
+}
 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
                                     enum i915_cache_level level,
                                     bool valid)
@@ -576,6 +585,57 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
        return 0;
 }
+static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
+{
+#ifdef writeq
+        writeq(pte, addr);
+#else
+        iowrite32((u32)pte, addr);
+        iowrite32(pte >> 32, addr + 4);
+#endif
+}
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+                                     struct sg_table *st,
+                                     unsigned int first_entry,
+                                     enum i915_cache_level level)
+{
+        struct drm_i915_private *dev_priv = vm->dev->dev_private;
+        gen8_gtt_pte_t __iomem *gtt_entries =
+                (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+        int i = 0;
+        struct sg_page_iter sg_iter;
+        dma_addr_t addr;
+        for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+                addr = sg_dma_address(sg_iter.sg) +
+                        (sg_iter.sg_pgoffset << PAGE_SHIFT);
+                gen8_set_pte(&gtt_entries[i],
+                             gen8_pte_encode(addr, level, true));
+                i++;
+        }
+        /*
+         * XXX: This serves as a posting read to make sure that the PTE has
+         * actually been updated. There is some concern that even though
+         * registers and PTEs are within the same BAR that they are potentially
+         * of NUMA access patterns. Therefore, even with the way we assume
+         * hardware should work, we must keep this posting read for paranoia.
+         */
+        if (i != 0)
+                WARN_ON(readq(&gtt_entries[i-1])
+                        != gen8_pte_encode(addr, level, true));
+#if 0 /* TODO: Still needed on GEN8? */
+        /* This next bit makes the above posting read even more important. We
+         * want to flush the TLBs only after we're certain all the PTE updates
+         * have finished.
+         */
+        I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+        POSTING_READ(GFX_FLSH_CNTL_GEN6);
+#endif
+}
 /*
 * Binds an object into the global gtt with the specified cache level. The object
 * will be accessible to the GPU via commands whose operands reference offsets
@@ -618,6 +678,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
        POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+                                  unsigned int first_entry,
+                                  unsigned int num_entries,
+                                  bool use_scratch)
+{
+        struct drm_i915_private *dev_priv = vm->dev->dev_private;
+        gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
+                (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
+        const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
+        int i;
+        if (WARN(num_entries > max_entries,
+                 "First entry = %d; Num entries = %d (max=%d)\n",
+                 first_entry, num_entries, max_entries))
+                num_entries = max_entries;
+        scratch_pte = gen8_pte_encode(vm->scratch.addr,
+                                      I915_CACHE_LLC,
+                                      use_scratch);
+        for (i = 0; i < num_entries; i++)
+                gen8_set_pte(&gtt_base[i], scratch_pte);
+        readl(gtt_base);
+}
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
                                  unsigned int first_entry,
                                  unsigned int num_entries,
@@ -641,7 +725,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
        readl(gtt_base);
 }
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
                                     struct sg_table *st,
                                     unsigned int pg_start,
@@ -947,8 +1030,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
        ret = ggtt_probe_common(dev, gtt_size);
-        dev_priv->gtt.base.clear_range = NULL;
+        dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
-        dev_priv->gtt.base.insert_entries = NULL;
+        dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
        return ret;
 }
author	Ben Widawsky <benjamin.widawsky@intel.com>	2013-11-03 00:07:18 -0400
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2013-11-08 12:09:44 -0500
commit	94ec8f6130ef4fdce1c80ca6bdeeef103a239a7c (patch)
tree	05997ac327a28054414536ced2b2186c00f6b542
parent	d31eb10e6c9f0f040c82ab710f93ce95e6f14d89 (diff)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b66284e10032..cf539a6a5a22 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t;
58	#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)	58	#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
59	#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)	59	#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
60		60
		61	static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
		62	enum i915_cache_level level,
		63	bool valid)
		64	{
		65	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT \| _PAGE_RW : 0;
		66	pte \|= addr;
		67	return pte;
		68	}
		69
61	static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,	70	static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
62	enum i915_cache_level level,	71	enum i915_cache_level level,
63	bool valid)	72	bool valid)
@@ -576,6 +585,57 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
576	return 0;	585	return 0;
577	}	586	}
578		587
		588	static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
		589	{
		590	#ifdef writeq
		591	writeq(pte, addr);
		592	#else
		593	iowrite32((u32)pte, addr);
		594	iowrite32(pte >> 32, addr + 4);
		595	#endif
		596	}
		597
		598	static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
		599	struct sg_table *st,
		600	unsigned int first_entry,
		601	enum i915_cache_level level)
		602	{
		603	struct drm_i915_private *dev_priv = vm->dev->dev_private;
		604	gen8_gtt_pte_t __iomem *gtt_entries =
		605	(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
		606	int i = 0;
		607	struct sg_page_iter sg_iter;
		608	dma_addr_t addr;
		609
		610	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
		611	addr = sg_dma_address(sg_iter.sg) +
		612	(sg_iter.sg_pgoffset << PAGE_SHIFT);
		613	gen8_set_pte(&gtt_entries[i],
		614	gen8_pte_encode(addr, level, true));
		615	i++;
		616	}
		617
		618	/*
		619	* XXX: This serves as a posting read to make sure that the PTE has
		620	* actually been updated. There is some concern that even though
		621	* registers and PTEs are within the same BAR that they are potentially
		622	* of NUMA access patterns. Therefore, even with the way we assume
		623	* hardware should work, we must keep this posting read for paranoia.
		624	*/
		625	if (i != 0)
		626	WARN_ON(readq(&gtt_entries[i-1])
		627	!= gen8_pte_encode(addr, level, true));
		628
		629	#if 0 /* TODO: Still needed on GEN8? */
		630	/* This next bit makes the above posting read even more important. We
		631	* want to flush the TLBs only after we're certain all the PTE updates
		632	* have finished.
		633	*/
		634	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
		635	POSTING_READ(GFX_FLSH_CNTL_GEN6);
		636	#endif
		637	}
		638
579	/*	639	/*
580	* Binds an object into the global gtt with the specified cache level. The object	640	* Binds an object into the global gtt with the specified cache level. The object
581	* will be accessible to the GPU via commands whose operands reference offsets	641	* will be accessible to the GPU via commands whose operands reference offsets
@@ -618,6 +678,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
618	POSTING_READ(GFX_FLSH_CNTL_GEN6);	678	POSTING_READ(GFX_FLSH_CNTL_GEN6);
619	}	679	}
620		680
		681	static void gen8_ggtt_clear_range(struct i915_address_space *vm,
		682	unsigned int first_entry,
		683	unsigned int num_entries,
		684	bool use_scratch)
		685	{
		686	struct drm_i915_private *dev_priv = vm->dev->dev_private;
		687	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
		688	(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
		689	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
		690	int i;
		691
		692	if (WARN(num_entries > max_entries,
		693	"First entry = %d; Num entries = %d (max=%d)\n",
		694	first_entry, num_entries, max_entries))
		695	num_entries = max_entries;
		696
		697	scratch_pte = gen8_pte_encode(vm->scratch.addr,
		698	I915_CACHE_LLC,
		699	use_scratch);
		700	for (i = 0; i < num_entries; i++)
		701	gen8_set_pte(&gtt_base[i], scratch_pte);
		702	readl(gtt_base);
		703	}
		704
621	static void gen6_ggtt_clear_range(struct i915_address_space *vm,	705	static void gen6_ggtt_clear_range(struct i915_address_space *vm,
622	unsigned int first_entry,	706	unsigned int first_entry,
623	unsigned int num_entries,	707	unsigned int num_entries,
@@ -641,7 +725,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
641	readl(gtt_base);	725	readl(gtt_base);
642	}	726	}
643		727
644
645	static void i915_ggtt_insert_entries(struct i915_address_space *vm,	728	static void i915_ggtt_insert_entries(struct i915_address_space *vm,
646	struct sg_table *st,	729	struct sg_table *st,
647	unsigned int pg_start,	730	unsigned int pg_start,
@@ -947,8 +1030,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
947		1030
948	ret = ggtt_probe_common(dev, gtt_size);	1031	ret = ggtt_probe_common(dev, gtt_size);
949		1032
950	dev_priv->gtt.base.clear_range = NULL;	1033	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
951	dev_priv->gtt.base.insert_entries = NULL;	1034	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
952		1035
953	return ret;	1036	return ret;
954	}	1037	}