diff options
author | Ben Widawsky <benjamin.widawsky@intel.com> | 2013-11-03 00:07:18 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-11-08 12:09:44 -0500 |
commit | 94ec8f6130ef4fdce1c80ca6bdeeef103a239a7c (patch) | |
tree | 05997ac327a28054414536ced2b2186c00f6b542 | |
parent | d31eb10e6c9f0f040c82ab710f93ce95e6f14d89 (diff) |
drm/i915/bdw: Add GTT functions
With the PTE clarifications, the bind and clear functions can now be
added for gen8.
v2: Use for_each_sg_pages in gen8_ggtt_insert_entries.
v3: Drop dev argument to pte encode functions, upstream lost it. Also
rebase on top of the scratch page movement.
v4: Rebase on top of the new address space vfuncs.
v5: Add the bool use_scratch argument to clear_range and the bool valid argument
to the PTE encode function to follow upstream changes.
v6: Add a FIXME(BDW) about the size mismatch of the readback check
that Jon Bloomfield spotted.
v7: Squash in fixup patch from Ben for the posting read to match the
64bit ptes and so shut up the WARN.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 89 |
1 files changed, 86 insertions, 3 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b66284e10032..cf539a6a5a22 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c | |||
@@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t; | |||
58 | #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) | 58 | #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) |
59 | #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) | 59 | #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) |
60 | 60 | ||
61 | static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, | ||
62 | enum i915_cache_level level, | ||
63 | bool valid) | ||
64 | { | ||
65 | gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; | ||
66 | pte |= addr; | ||
67 | return pte; | ||
68 | } | ||
69 | |||
61 | static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, | 70 | static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, |
62 | enum i915_cache_level level, | 71 | enum i915_cache_level level, |
63 | bool valid) | 72 | bool valid) |
@@ -576,6 +585,57 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) | |||
576 | return 0; | 585 | return 0; |
577 | } | 586 | } |
578 | 587 | ||
588 | static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) | ||
589 | { | ||
590 | #ifdef writeq | ||
591 | writeq(pte, addr); | ||
592 | #else | ||
593 | iowrite32((u32)pte, addr); | ||
594 | iowrite32(pte >> 32, addr + 4); | ||
595 | #endif | ||
596 | } | ||
597 | |||
598 | static void gen8_ggtt_insert_entries(struct i915_address_space *vm, | ||
599 | struct sg_table *st, | ||
600 | unsigned int first_entry, | ||
601 | enum i915_cache_level level) | ||
602 | { | ||
603 | struct drm_i915_private *dev_priv = vm->dev->dev_private; | ||
604 | gen8_gtt_pte_t __iomem *gtt_entries = | ||
605 | (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; | ||
606 | int i = 0; | ||
607 | struct sg_page_iter sg_iter; | ||
608 | dma_addr_t addr; | ||
609 | |||
610 | for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { | ||
611 | addr = sg_dma_address(sg_iter.sg) + | ||
612 | (sg_iter.sg_pgoffset << PAGE_SHIFT); | ||
613 | gen8_set_pte(>t_entries[i], | ||
614 | gen8_pte_encode(addr, level, true)); | ||
615 | i++; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * XXX: This serves as a posting read to make sure that the PTE has | ||
620 | * actually been updated. There is some concern that even though | ||
621 | * registers and PTEs are within the same BAR that they are potentially | ||
622 | * of NUMA access patterns. Therefore, even with the way we assume | ||
623 | * hardware should work, we must keep this posting read for paranoia. | ||
624 | */ | ||
625 | if (i != 0) | ||
626 | WARN_ON(readq(>t_entries[i-1]) | ||
627 | != gen8_pte_encode(addr, level, true)); | ||
628 | |||
629 | #if 0 /* TODO: Still needed on GEN8? */ | ||
630 | /* This next bit makes the above posting read even more important. We | ||
631 | * want to flush the TLBs only after we're certain all the PTE updates | ||
632 | * have finished. | ||
633 | */ | ||
634 | I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); | ||
635 | POSTING_READ(GFX_FLSH_CNTL_GEN6); | ||
636 | #endif | ||
637 | } | ||
638 | |||
579 | /* | 639 | /* |
580 | * Binds an object into the global gtt with the specified cache level. The object | 640 | * Binds an object into the global gtt with the specified cache level. The object |
581 | * will be accessible to the GPU via commands whose operands reference offsets | 641 | * will be accessible to the GPU via commands whose operands reference offsets |
@@ -618,6 +678,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, | |||
618 | POSTING_READ(GFX_FLSH_CNTL_GEN6); | 678 | POSTING_READ(GFX_FLSH_CNTL_GEN6); |
619 | } | 679 | } |
620 | 680 | ||
681 | static void gen8_ggtt_clear_range(struct i915_address_space *vm, | ||
682 | unsigned int first_entry, | ||
683 | unsigned int num_entries, | ||
684 | bool use_scratch) | ||
685 | { | ||
686 | struct drm_i915_private *dev_priv = vm->dev->dev_private; | ||
687 | gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = | ||
688 | (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; | ||
689 | const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; | ||
690 | int i; | ||
691 | |||
692 | if (WARN(num_entries > max_entries, | ||
693 | "First entry = %d; Num entries = %d (max=%d)\n", | ||
694 | first_entry, num_entries, max_entries)) | ||
695 | num_entries = max_entries; | ||
696 | |||
697 | scratch_pte = gen8_pte_encode(vm->scratch.addr, | ||
698 | I915_CACHE_LLC, | ||
699 | use_scratch); | ||
700 | for (i = 0; i < num_entries; i++) | ||
701 | gen8_set_pte(>t_base[i], scratch_pte); | ||
702 | readl(gtt_base); | ||
703 | } | ||
704 | |||
621 | static void gen6_ggtt_clear_range(struct i915_address_space *vm, | 705 | static void gen6_ggtt_clear_range(struct i915_address_space *vm, |
622 | unsigned int first_entry, | 706 | unsigned int first_entry, |
623 | unsigned int num_entries, | 707 | unsigned int num_entries, |
@@ -641,7 +725,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, | |||
641 | readl(gtt_base); | 725 | readl(gtt_base); |
642 | } | 726 | } |
643 | 727 | ||
644 | |||
645 | static void i915_ggtt_insert_entries(struct i915_address_space *vm, | 728 | static void i915_ggtt_insert_entries(struct i915_address_space *vm, |
646 | struct sg_table *st, | 729 | struct sg_table *st, |
647 | unsigned int pg_start, | 730 | unsigned int pg_start, |
@@ -947,8 +1030,8 @@ static int gen8_gmch_probe(struct drm_device *dev, | |||
947 | 1030 | ||
948 | ret = ggtt_probe_common(dev, gtt_size); | 1031 | ret = ggtt_probe_common(dev, gtt_size); |
949 | 1032 | ||
950 | dev_priv->gtt.base.clear_range = NULL; | 1033 | dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; |
951 | dev_priv->gtt.base.insert_entries = NULL; | 1034 | dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; |
952 | 1035 | ||
953 | return ret; | 1036 | return ret; |
954 | } | 1037 | } |