diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-10-29 14:27:20 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-10-30 05:10:20 -0400 |
commit | daf3dc0f9b0044e25ac96d1b9c65dca4df1aa99f (patch) | |
tree | fcb1011819af08b72c0b3780711522f75e569aac | |
parent | 4c8d351d0b4936ae93ed0febb7bf0ee3044e49ba (diff) |
drm/i915/gtt: Record the scratch pte
Record the scratch PTE encoding upon creation rather than recomputing
the bits everytime. This is important for the next patch where we forgo
having a valid scratch page with which we may compute the bits and so
require keeping the PTE value instead.
v2: Fix up scrub_64K to use scratch_pte as well.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181029182721.29568-1-chris@chris-wilson.co.uk
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 85 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.h | 11 |
2 files changed, 50 insertions, 46 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 19b2d991b5d8..afe45cbcd762 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c | |||
@@ -186,9 +186,9 @@ static void clear_pages(struct i915_vma *vma) | |||
186 | memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); | 186 | memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); |
187 | } | 187 | } |
188 | 188 | ||
189 | static gen8_pte_t gen8_pte_encode(dma_addr_t addr, | 189 | static u64 gen8_pte_encode(dma_addr_t addr, |
190 | enum i915_cache_level level, | 190 | enum i915_cache_level level, |
191 | u32 flags) | 191 | u32 flags) |
192 | { | 192 | { |
193 | gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; | 193 | gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; |
194 | 194 | ||
@@ -225,9 +225,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, | |||
225 | #define gen8_pdpe_encode gen8_pde_encode | 225 | #define gen8_pdpe_encode gen8_pde_encode |
226 | #define gen8_pml4e_encode gen8_pde_encode | 226 | #define gen8_pml4e_encode gen8_pde_encode |
227 | 227 | ||
228 | static gen6_pte_t snb_pte_encode(dma_addr_t addr, | 228 | static u64 snb_pte_encode(dma_addr_t addr, |
229 | enum i915_cache_level level, | 229 | enum i915_cache_level level, |
230 | u32 unused) | 230 | u32 flags) |
231 | { | 231 | { |
232 | gen6_pte_t pte = GEN6_PTE_VALID; | 232 | gen6_pte_t pte = GEN6_PTE_VALID; |
233 | pte |= GEN6_PTE_ADDR_ENCODE(addr); | 233 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
@@ -247,9 +247,9 @@ static gen6_pte_t snb_pte_encode(dma_addr_t addr, | |||
247 | return pte; | 247 | return pte; |
248 | } | 248 | } |
249 | 249 | ||
250 | static gen6_pte_t ivb_pte_encode(dma_addr_t addr, | 250 | static u64 ivb_pte_encode(dma_addr_t addr, |
251 | enum i915_cache_level level, | 251 | enum i915_cache_level level, |
252 | u32 unused) | 252 | u32 flags) |
253 | { | 253 | { |
254 | gen6_pte_t pte = GEN6_PTE_VALID; | 254 | gen6_pte_t pte = GEN6_PTE_VALID; |
255 | pte |= GEN6_PTE_ADDR_ENCODE(addr); | 255 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
@@ -271,9 +271,9 @@ static gen6_pte_t ivb_pte_encode(dma_addr_t addr, | |||
271 | return pte; | 271 | return pte; |
272 | } | 272 | } |
273 | 273 | ||
274 | static gen6_pte_t byt_pte_encode(dma_addr_t addr, | 274 | static u64 byt_pte_encode(dma_addr_t addr, |
275 | enum i915_cache_level level, | 275 | enum i915_cache_level level, |
276 | u32 flags) | 276 | u32 flags) |
277 | { | 277 | { |
278 | gen6_pte_t pte = GEN6_PTE_VALID; | 278 | gen6_pte_t pte = GEN6_PTE_VALID; |
279 | pte |= GEN6_PTE_ADDR_ENCODE(addr); | 279 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
@@ -287,9 +287,9 @@ static gen6_pte_t byt_pte_encode(dma_addr_t addr, | |||
287 | return pte; | 287 | return pte; |
288 | } | 288 | } |
289 | 289 | ||
290 | static gen6_pte_t hsw_pte_encode(dma_addr_t addr, | 290 | static u64 hsw_pte_encode(dma_addr_t addr, |
291 | enum i915_cache_level level, | 291 | enum i915_cache_level level, |
292 | u32 unused) | 292 | u32 flags) |
293 | { | 293 | { |
294 | gen6_pte_t pte = GEN6_PTE_VALID; | 294 | gen6_pte_t pte = GEN6_PTE_VALID; |
295 | pte |= HSW_PTE_ADDR_ENCODE(addr); | 295 | pte |= HSW_PTE_ADDR_ENCODE(addr); |
@@ -300,9 +300,9 @@ static gen6_pte_t hsw_pte_encode(dma_addr_t addr, | |||
300 | return pte; | 300 | return pte; |
301 | } | 301 | } |
302 | 302 | ||
303 | static gen6_pte_t iris_pte_encode(dma_addr_t addr, | 303 | static u64 iris_pte_encode(dma_addr_t addr, |
304 | enum i915_cache_level level, | 304 | enum i915_cache_level level, |
305 | u32 unused) | 305 | u32 flags) |
306 | { | 306 | { |
307 | gen6_pte_t pte = GEN6_PTE_VALID; | 307 | gen6_pte_t pte = GEN6_PTE_VALID; |
308 | pte |= HSW_PTE_ADDR_ENCODE(addr); | 308 | pte |= HSW_PTE_ADDR_ENCODE(addr); |
@@ -666,14 +666,13 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) | |||
666 | static void gen8_initialize_pt(struct i915_address_space *vm, | 666 | static void gen8_initialize_pt(struct i915_address_space *vm, |
667 | struct i915_page_table *pt) | 667 | struct i915_page_table *pt) |
668 | { | 668 | { |
669 | fill_px(vm, pt, | 669 | fill_px(vm, pt, vm->scratch_pte); |
670 | gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); | ||
671 | } | 670 | } |
672 | 671 | ||
673 | static void gen6_initialize_pt(struct gen6_hw_ppgtt *ppgtt, | 672 | static void gen6_initialize_pt(struct i915_address_space *vm, |
674 | struct i915_page_table *pt) | 673 | struct i915_page_table *pt) |
675 | { | 674 | { |
676 | fill32_px(&ppgtt->base.vm, pt, ppgtt->scratch_pte); | 675 | fill32_px(vm, pt, vm->scratch_pte); |
677 | } | 676 | } |
678 | 677 | ||
679 | static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) | 678 | static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) |
@@ -807,15 +806,13 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) | |||
807 | /* Removes entries from a single page table, releasing it if it's empty. | 806 | /* Removes entries from a single page table, releasing it if it's empty. |
808 | * Caller can use the return value to update higher-level entries. | 807 | * Caller can use the return value to update higher-level entries. |
809 | */ | 808 | */ |
810 | static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, | 809 | static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm, |
811 | struct i915_page_table *pt, | 810 | struct i915_page_table *pt, |
812 | u64 start, u64 length) | 811 | u64 start, u64 length) |
813 | { | 812 | { |
814 | unsigned int num_entries = gen8_pte_count(start, length); | 813 | unsigned int num_entries = gen8_pte_count(start, length); |
815 | unsigned int pte = gen8_pte_index(start); | 814 | unsigned int pte = gen8_pte_index(start); |
816 | unsigned int pte_end = pte + num_entries; | 815 | unsigned int pte_end = pte + num_entries; |
817 | const gen8_pte_t scratch_pte = | ||
818 | gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); | ||
819 | gen8_pte_t *vaddr; | 816 | gen8_pte_t *vaddr; |
820 | 817 | ||
821 | GEM_BUG_ON(num_entries > pt->used_ptes); | 818 | GEM_BUG_ON(num_entries > pt->used_ptes); |
@@ -826,7 +823,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, | |||
826 | 823 | ||
827 | vaddr = kmap_atomic_px(pt); | 824 | vaddr = kmap_atomic_px(pt); |
828 | while (pte < pte_end) | 825 | while (pte < pte_end) |
829 | vaddr[pte++] = scratch_pte; | 826 | vaddr[pte++] = vm->scratch_pte; |
830 | kunmap_atomic(vaddr); | 827 | kunmap_atomic(vaddr); |
831 | 828 | ||
832 | return false; | 829 | return false; |
@@ -1159,7 +1156,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, | |||
1159 | if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { | 1156 | if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { |
1160 | u16 i; | 1157 | u16 i; |
1161 | 1158 | ||
1162 | encode = pte_encode | vma->vm->scratch_page.daddr; | 1159 | encode = vma->vm->scratch_pte; |
1163 | vaddr = kmap_atomic_px(pd->page_table[idx.pde]); | 1160 | vaddr = kmap_atomic_px(pd->page_table[idx.pde]); |
1164 | 1161 | ||
1165 | for (i = 1; i < index; i += 16) | 1162 | for (i = 1; i < index; i += 16) |
@@ -1216,6 +1213,11 @@ static int gen8_init_scratch(struct i915_address_space *vm) | |||
1216 | if (ret) | 1213 | if (ret) |
1217 | return ret; | 1214 | return ret; |
1218 | 1215 | ||
1216 | vm->scratch_pte = | ||
1217 | gen8_pte_encode(vm->scratch_page.daddr, | ||
1218 | I915_CACHE_LLC, | ||
1219 | PTE_READ_ONLY); | ||
1220 | |||
1219 | vm->scratch_pt = alloc_pt(vm); | 1221 | vm->scratch_pt = alloc_pt(vm); |
1220 | if (IS_ERR(vm->scratch_pt)) { | 1222 | if (IS_ERR(vm->scratch_pt)) { |
1221 | ret = PTR_ERR(vm->scratch_pt); | 1223 | ret = PTR_ERR(vm->scratch_pt); |
@@ -1524,8 +1526,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, | |||
1524 | static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) | 1526 | static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) |
1525 | { | 1527 | { |
1526 | struct i915_address_space *vm = &ppgtt->vm; | 1528 | struct i915_address_space *vm = &ppgtt->vm; |
1527 | const gen8_pte_t scratch_pte = | 1529 | const gen8_pte_t scratch_pte = vm->scratch_pte; |
1528 | gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); | ||
1529 | u64 start = 0, length = ppgtt->vm.total; | 1530 | u64 start = 0, length = ppgtt->vm.total; |
1530 | 1531 | ||
1531 | if (use_4lvl(vm)) { | 1532 | if (use_4lvl(vm)) { |
@@ -1672,7 +1673,7 @@ err_free: | |||
1672 | static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m) | 1673 | static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m) |
1673 | { | 1674 | { |
1674 | struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); | 1675 | struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); |
1675 | const gen6_pte_t scratch_pte = ppgtt->scratch_pte; | 1676 | const gen6_pte_t scratch_pte = base->vm.scratch_pte; |
1676 | struct i915_page_table *pt; | 1677 | struct i915_page_table *pt; |
1677 | u32 pte, pde; | 1678 | u32 pte, pde; |
1678 | 1679 | ||
@@ -1785,7 +1786,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, | |||
1785 | unsigned int pde = first_entry / GEN6_PTES; | 1786 | unsigned int pde = first_entry / GEN6_PTES; |
1786 | unsigned int pte = first_entry % GEN6_PTES; | 1787 | unsigned int pte = first_entry % GEN6_PTES; |
1787 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; | 1788 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; |
1788 | const gen6_pte_t scratch_pte = ppgtt->scratch_pte; | 1789 | const gen6_pte_t scratch_pte = vm->scratch_pte; |
1789 | 1790 | ||
1790 | while (num_entries) { | 1791 | while (num_entries) { |
1791 | struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++]; | 1792 | struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++]; |
@@ -1876,7 +1877,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, | |||
1876 | if (IS_ERR(pt)) | 1877 | if (IS_ERR(pt)) |
1877 | goto unwind_out; | 1878 | goto unwind_out; |
1878 | 1879 | ||
1879 | gen6_initialize_pt(ppgtt, pt); | 1880 | gen6_initialize_pt(vm, pt); |
1880 | ppgtt->base.pd.page_table[pde] = pt; | 1881 | ppgtt->base.pd.page_table[pde] = pt; |
1881 | 1882 | ||
1882 | if (i915_vma_is_bound(ppgtt->vma, | 1883 | if (i915_vma_is_bound(ppgtt->vma, |
@@ -1914,9 +1915,9 @@ static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt) | |||
1914 | if (ret) | 1915 | if (ret) |
1915 | return ret; | 1916 | return ret; |
1916 | 1917 | ||
1917 | ppgtt->scratch_pte = | 1918 | vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr, |
1918 | vm->pte_encode(vm->scratch_page.daddr, | 1919 | I915_CACHE_NONE, |
1919 | I915_CACHE_NONE, PTE_READ_ONLY); | 1920 | PTE_READ_ONLY); |
1920 | 1921 | ||
1921 | vm->scratch_pt = alloc_pt(vm); | 1922 | vm->scratch_pt = alloc_pt(vm); |
1922 | if (IS_ERR(vm->scratch_pt)) { | 1923 | if (IS_ERR(vm->scratch_pt)) { |
@@ -1924,7 +1925,7 @@ static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt) | |||
1924 | return PTR_ERR(vm->scratch_pt); | 1925 | return PTR_ERR(vm->scratch_pt); |
1925 | } | 1926 | } |
1926 | 1927 | ||
1927 | gen6_initialize_pt(ppgtt, vm->scratch_pt); | 1928 | gen6_initialize_pt(vm, vm->scratch_pt); |
1928 | gen6_for_all_pdes(unused, &ppgtt->base.pd, pde) | 1929 | gen6_for_all_pdes(unused, &ppgtt->base.pd, pde) |
1929 | ppgtt->base.pd.page_table[pde] = vm->scratch_pt; | 1930 | ppgtt->base.pd.page_table[pde] = vm->scratch_pt; |
1930 | 1931 | ||
@@ -2469,8 +2470,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, | |||
2469 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | 2470 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
2470 | unsigned first_entry = start / I915_GTT_PAGE_SIZE; | 2471 | unsigned first_entry = start / I915_GTT_PAGE_SIZE; |
2471 | unsigned num_entries = length / I915_GTT_PAGE_SIZE; | 2472 | unsigned num_entries = length / I915_GTT_PAGE_SIZE; |
2472 | const gen8_pte_t scratch_pte = | 2473 | const gen8_pte_t scratch_pte = vm->scratch_pte; |
2473 | gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); | ||
2474 | gen8_pte_t __iomem *gtt_base = | 2474 | gen8_pte_t __iomem *gtt_base = |
2475 | (gen8_pte_t __iomem *)ggtt->gsm + first_entry; | 2475 | (gen8_pte_t __iomem *)ggtt->gsm + first_entry; |
2476 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; | 2476 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; |
@@ -2595,8 +2595,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, | |||
2595 | first_entry, num_entries, max_entries)) | 2595 | first_entry, num_entries, max_entries)) |
2596 | num_entries = max_entries; | 2596 | num_entries = max_entries; |
2597 | 2597 | ||
2598 | scratch_pte = vm->pte_encode(vm->scratch_page.daddr, | 2598 | scratch_pte = vm->scratch_pte; |
2599 | I915_CACHE_LLC, 0); | ||
2600 | 2599 | ||
2601 | for (i = 0; i < num_entries; i++) | 2600 | for (i = 0; i < num_entries; i++) |
2602 | iowrite32(scratch_pte, >t_base[i]); | 2601 | iowrite32(scratch_pte, >t_base[i]); |
@@ -3002,6 +3001,10 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) | |||
3002 | return ret; | 3001 | return ret; |
3003 | } | 3002 | } |
3004 | 3003 | ||
3004 | ggtt->vm.scratch_pte = | ||
3005 | ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr, | ||
3006 | I915_CACHE_NONE, 0); | ||
3007 | |||
3005 | return 0; | 3008 | return 0; |
3006 | } | 3009 | } |
3007 | 3010 | ||
@@ -3348,6 +3351,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) | |||
3348 | ggtt->vm.vma_ops.set_pages = ggtt_set_pages; | 3351 | ggtt->vm.vma_ops.set_pages = ggtt_set_pages; |
3349 | ggtt->vm.vma_ops.clear_pages = clear_pages; | 3352 | ggtt->vm.vma_ops.clear_pages = clear_pages; |
3350 | 3353 | ||
3354 | ggtt->vm.pte_encode = gen8_pte_encode; | ||
3355 | |||
3351 | setup_private_pat(dev_priv); | 3356 | setup_private_pat(dev_priv); |
3352 | 3357 | ||
3353 | return ggtt_probe_common(ggtt, size); | 3358 | return ggtt_probe_common(ggtt, size); |
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5d2c5ba55ad8..a030299b79cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h | |||
@@ -289,6 +289,7 @@ struct i915_address_space { | |||
289 | 289 | ||
290 | struct mutex mutex; /* protects vma and our lists */ | 290 | struct mutex mutex; /* protects vma and our lists */ |
291 | 291 | ||
292 | u64 scratch_pte; | ||
292 | struct i915_page_dma scratch_page; | 293 | struct i915_page_dma scratch_page; |
293 | struct i915_page_table *scratch_pt; | 294 | struct i915_page_table *scratch_pt; |
294 | struct i915_page_directory *scratch_pd; | 295 | struct i915_page_directory *scratch_pd; |
@@ -335,12 +336,11 @@ struct i915_address_space { | |||
335 | /* Some systems support read-only mappings for GGTT and/or PPGTT */ | 336 | /* Some systems support read-only mappings for GGTT and/or PPGTT */ |
336 | bool has_read_only:1; | 337 | bool has_read_only:1; |
337 | 338 | ||
338 | /* FIXME: Need a more generic return type */ | 339 | u64 (*pte_encode)(dma_addr_t addr, |
339 | gen6_pte_t (*pte_encode)(dma_addr_t addr, | 340 | enum i915_cache_level level, |
340 | enum i915_cache_level level, | 341 | u32 flags); /* Create a valid PTE */ |
341 | u32 flags); /* Create a valid PTE */ | ||
342 | /* flags for pte_encode */ | ||
343 | #define PTE_READ_ONLY (1<<0) | 342 | #define PTE_READ_ONLY (1<<0) |
343 | |||
344 | int (*allocate_va_range)(struct i915_address_space *vm, | 344 | int (*allocate_va_range)(struct i915_address_space *vm, |
345 | u64 start, u64 length); | 345 | u64 start, u64 length); |
346 | void (*clear_range)(struct i915_address_space *vm, | 346 | void (*clear_range)(struct i915_address_space *vm, |
@@ -422,7 +422,6 @@ struct gen6_hw_ppgtt { | |||
422 | 422 | ||
423 | struct i915_vma *vma; | 423 | struct i915_vma *vma; |
424 | gen6_pte_t __iomem *pd_addr; | 424 | gen6_pte_t __iomem *pd_addr; |
425 | gen6_pte_t scratch_pte; | ||
426 | 425 | ||
427 | unsigned int pin_count; | 426 | unsigned int pin_count; |
428 | bool scan_for_unused_pt; | 427 | bool scan_for_unused_pt; |