[XTENSA] Add support for cache-aliasing

Add support for processors that have cache-aliasing issues, such as the Stretch S5000 processor. Cache-aliasing means that the size of the cache (for one way) is larger than the page size, thus, a page can end up in several places in cache depending on the virtual to physical translation. The method used here is to map a user page temporarily through the auto-refill way 0 and of of the DTLB. We probably will want to revisit this issue and use a better approach with kmap/kunmap. Signed-off-by: Chris Zankel <chris@zankel.net>
author: Chris Zankel <chris@zankel.net> 2007-08-22 13:14:51 -0400
committer: Chris Zankel <chris@zankel.net> 2007-08-27 16:54:16 -0400
commit: 6656920b0b50beacb6cb64cf55273cbb686e436e (patch)
tree: dab9fdb81821b455a29779de6ca3306dbdf05dbd /arch/xtensa/mm/init.c
parent: ff6fd469885aafa5ec387babcb6537f3c00d6df0 (diff)
1 files changed, 15 insertions, 237 deletions
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 8415c76f11c2..b3086f34a8e7 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -15,40 +15,24 @@
 * Kevin Chea
 */
-#include <linux/init.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
 #include <linux/bootmem.h>
 #include <linux/swap.h>
+#include <linux/mman.h>
+#include <linux/nodemask.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
 #include <asm/pgtable.h>
 #include <asm/bootparam.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
-#include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#define DEBUG 0
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-//static DEFINE_SPINLOCK(tlb_lock);
-/*
- * This flag is used to indicate that the page was mapped and modified in
- * kernel space, so the cache is probably dirty at that address.
- * If cache aliasing is enabled and the page color mismatches, update_mmu_cache
- * synchronizes the caches if this bit is set.
- */
-#define PG_cache_clean PG_arch_1
 /* References to section boundaries */
@@ -323,228 +307,22 @@ void show_mem(void)
        printk("%d free pages\n", free);
 }
-/* ------------------------------------------------------------------------- */
+struct kmem_cache *pgtable_cache __read_mostly;
-#if (DCACHE_WAY_SIZE > PAGE_SIZE)
-/*
- * With cache aliasing, the page color of the page in kernel space and user
- * space might mismatch. We temporarily map the page to a different virtual
- * address with the same color and clear the page there.
- */
-void clear_user_page(void *kaddr, unsigned long vaddr, struct page* page)
-{
-        /*  There shouldn't be any entries for this page. */
-        __flush_invalidate_dcache_page_phys(__pa(page_address(page)));
-        if (!PAGE_COLOR_EQ(vaddr, kaddr)) {
-                unsigned long v, p;
-                /* Temporarily map page to DTLB_WAY_DCACHE_ALIAS0. */
-                spin_lock(&tlb_lock);
-                p = (unsigned long)pte_val((mk_pte(page,PAGE_KERNEL)));
-                kaddr = (void*)PAGE_COLOR_MAP0(vaddr);
-                v = (unsigned long)kaddr | DTLB_WAY_DCACHE_ALIAS0;
-                __asm__ __volatile__("wdtlb %0,%1; dsync" : :"a" (p), "a" (v));
-                clear_page(kaddr);
-                spin_unlock(&tlb_lock);
-        } else {
-                clear_page(kaddr);
-        }
-        /* We need to make sure that i$ and d$ are coherent. */
-        clear_bit(PG_cache_clean, &page->flags);
-}
-/*
- * With cache aliasing, we have to make sure that the page color of the page
- * in kernel space matches that of the virtual user address before we read
- * the page. If the page color differ, we create a temporary DTLB entry with
- * the corrent page color and use this 'temporary' address as the source.
- * We then use the same approach as in clear_user_page and copy the data
- * to the kernel space and clear the PG_cache_clean bit to synchronize caches
- * later.
- *
- * Note:
- * Instead of using another 'way' for the temporary DTLB entry, we could
- * probably use the same entry that points to the kernel address (after
- * saving the original value and restoring it when we are done).
- */
-void copy_user_page(void* to, void* from, unsigned long vaddr,
+static void pgd_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
-                    struct page* to_page)
 {
-        /* There shouldn't be any entries for the new page. */
+        pte_t* ptep = (pte_t*)addr;
+        int i;
-        __flush_invalidate_dcache_page_phys(__pa(page_address(to_page)));
-        spin_lock(&tlb_lock);
-        if (!PAGE_COLOR_EQ(vaddr, from)) {
-                unsigned long v, p, t;
-                __asm__ __volatile__ ("pdtlb %1,%2; rdtlb1 %0,%1"
-                                      : "=a"(p), "=a"(t) : "a"(from));
-                from = (void*)PAGE_COLOR_MAP0(vaddr);
-                v = (unsigned long)from | DTLB_WAY_DCACHE_ALIAS0;
-                __asm__ __volatile__ ("wdtlb %0,%1; dsync" ::"a" (p), "a" (v));
-        }
-        if (!PAGE_COLOR_EQ(vaddr, to)) {
-                unsigned long v, p;
-                p = (unsigned long)pte_val((mk_pte(to_page,PAGE_KERNEL)));
-                to = (void*)PAGE_COLOR_MAP1(vaddr);
-                v = (unsigned long)to | DTLB_WAY_DCACHE_ALIAS1;
-                __asm__ __volatile__ ("wdtlb %0,%1; dsync" ::"a" (p), "a" (v));
-        }
-        copy_page(to, from);
-        spin_unlock(&tlb_lock);
-        /* We need to make sure that i$ and d$ are coherent. */
-        clear_bit(PG_cache_clean, &to_page->flags);
-}
-/*
- * Any time the kernel writes to a user page cache page, or it is about to
- * read from a page cache page this routine is called.
- *
- * Note:
- * The kernel currently only provides one architecture bit in the page
- * flags that we use for I$/D$ coherency. Maybe, in future, we can
- * use a sepearte bit for deferred dcache aliasing:
- * If the page is not mapped yet, we only need to set a flag,
- * if mapped, we need to invalidate the page.
- */
-// FIXME: we probably need this for WB caches not only for Page Coloring..
-void flush_dcache_page(struct page *page)
-{
-        unsigned long addr = __pa(page_address(page));
-        struct address_space *mapping = page_mapping(page);
-        __flush_invalidate_dcache_page_phys(addr);
-        if (!test_bit(PG_cache_clean, &page->flags))
-                return;
-        /* If this page hasn't been mapped, yet, handle I$/D$ coherency later.*/
-#if 0
-        if (mapping && !mapping_mapped(mapping))
-                clear_bit(PG_cache_clean, &page->flags);
-        else
-#endif
-                __invalidate_icache_page_phys(addr);
-}
-void flush_cache_range(struct vm_area_struct* vma, unsigned long s,
-                       unsigned long e)
-{
-        __flush_invalidate_cache_all();
-}
-void flush_cache_page(struct vm_area_struct* vma, unsigned long address,
-                      unsigned long pfn)
-{
-        struct page *page = pfn_to_page(pfn);
-        /* Remove any entry for the old mapping. */
-        if (current->active_mm == vma->vm_mm) {
-                unsigned long addr = __pa(page_address(page));
-                __flush_invalidate_dcache_page_phys(addr);
-                if ((vma->vm_flags & VM_EXEC) != 0)
-                        __invalidate_icache_page_phys(addr);
-        } else {
-                BUG();
-        }
-}
-#endif  /* (DCACHE_WAY_SIZE > PAGE_SIZE) */
-pte_t* pte_alloc_one_kernel (struct mm_struct* mm, unsigned long addr)
-{
-        pte_t* pte = (pte_t*)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 0);
-        if (likely(pte)) {
-                pte_t* ptep = (pte_t*)(pte_val(*pte) + PAGE_OFFSET);
-                int i;
-                for (i = 0; i < 1024; i++, ptep++)
-                        pte_clear(mm, addr, ptep);
-        }
-        return pte;
-}
-struct page* pte_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-        struct page *page;
-        page = alloc_pages(GFP_KERNEL | __GFP_REPEAT, 0);
-        if (likely(page)) {
-                pte_t* ptep = kmap_atomic(page, KM_USER0);
-                int i;
-                for (i = 0; i < 1024; i++, ptep++)
+        for (i = 0; i < 1024; i++, ptep++)
-                        pte_clear(mm, addr, ptep);
+                pte_clear(NULL, 0, ptep);
-                kunmap_atomic(ptep, KM_USER0);
-        }
-        return page;
 }
+void __init pgtable_cache_init(void)
-/*
- * Handle D$/I$ coherency.
- *
- * Note:
- * We only have one architecture bit for the page flags, so we cannot handle
- * cache aliasing, yet.
- */
-void
-update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t pte)
 {
-        unsigned long pfn = pte_pfn(pte);
+        pgtable_cache = kmem_cache_create("pgd",
-        struct page *page;
+                        PAGE_SIZE, PAGE_SIZE,
-        unsigned long vaddr = addr & PAGE_MASK;
+                        SLAB_HWCACHE_ALIGN,
+                        pgd_ctor);
-        if (!pfn_valid(pfn))
-                return;
-        page = pfn_to_page(pfn);
-        invalidate_itlb_mapping(addr);
-        invalidate_dtlb_mapping(addr);
-        /* We have a new mapping. Use it. */
-        write_dtlb_entry(pte, dtlb_probe(addr));
-        /* If the processor can execute from this page, synchronize D$/I$. */
-        if ((vma->vm_flags & VM_EXEC) != 0) {
-                write_itlb_entry(pte, itlb_probe(addr));
-                /* Synchronize caches, if not clean. */
-                if (!test_and_set_bit(PG_cache_clean, &page->flags)) {
-                        __flush_dcache_page(vaddr);
-                        __invalidate_icache_page(vaddr);
-                }
-        }
 }
author	Chris Zankel <chris@zankel.net>	2007-08-22 13:14:51 -0400
committer	Chris Zankel <chris@zankel.net>	2007-08-27 16:54:16 -0400
commit	6656920b0b50beacb6cb64cf55273cbb686e436e (patch)
tree	dab9fdb81821b455a29779de6ca3306dbdf05dbd /arch/xtensa/mm/init.c
parent	ff6fd469885aafa5ec387babcb6537f3c00d6df0 (diff)

diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 8415c76f11c2..b3086f34a8e7 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c
@@ -15,40 +15,24 @@
15	* Kevin Chea	15	* Kevin Chea
16	*/	16	*/
17		17
18	#include <linux/init.h>
19	#include <linux/signal.h>
20	#include <linux/sched.h>
21	#include <linux/kernel.h>	18	#include <linux/kernel.h>
22	#include <linux/errno.h>	19	#include <linux/errno.h>
23	#include <linux/string.h>
24	#include <linux/types.h>
25	#include <linux/ptrace.h>
26	#include <linux/bootmem.h>	20	#include <linux/bootmem.h>
27	#include <linux/swap.h>	21	#include <linux/swap.h>
		22	#include <linux/mman.h>
		23	#include <linux/nodemask.h>
		24	#include <linux/mm.h>
		25	#include <linux/slab.h>
28		26
29	#include <asm/pgtable.h>	27	#include <asm/pgtable.h>
30	#include <asm/bootparam.h>	28	#include <asm/bootparam.h>
31	#include <asm/mmu_context.h>	29	#include <asm/mmu_context.h>
32	#include <asm/tlb.h>	30	#include <asm/tlb.h>
33	#include <asm/tlbflush.h>
34	#include <asm/page.h>	31	#include <asm/page.h>
35	#include <asm/pgalloc.h>	32	#include <asm/pgalloc.h>
36	#include <asm/pgtable.h>
37
38		33
39	#define DEBUG 0
40		34
41	DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);	35	DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
42	//static DEFINE_SPINLOCK(tlb_lock);
43
44	/*
45	* This flag is used to indicate that the page was mapped and modified in
46	* kernel space, so the cache is probably dirty at that address.
47	* If cache aliasing is enabled and the page color mismatches, update_mmu_cache
48	* synchronizes the caches if this bit is set.
49	*/
50
51	#define PG_cache_clean PG_arch_1
52		36
53	/* References to section boundaries */	37	/* References to section boundaries */
54		38
@@ -323,228 +307,22 @@ void show_mem(void)
323	printk("%d free pages\n", free);	307	printk("%d free pages\n", free);
324	}	308	}
325		309
326	/* ------------------------------------------------------------------------- */	310	struct kmem_cache *pgtable_cache __read_mostly;
327
328	#if (DCACHE_WAY_SIZE > PAGE_SIZE)
329
330	/*
331	* With cache aliasing, the page color of the page in kernel space and user
332	* space might mismatch. We temporarily map the page to a different virtual
333	* address with the same color and clear the page there.
334	*/
335
336	void clear_user_page(void kaddr, unsigned long vaddr, struct page page)
337	{
338
339	/* There shouldn't be any entries for this page. */
340
341	__flush_invalidate_dcache_page_phys(__pa(page_address(page)));
342
343	if (!PAGE_COLOR_EQ(vaddr, kaddr)) {
344	unsigned long v, p;
345
346	/* Temporarily map page to DTLB_WAY_DCACHE_ALIAS0. */
347
348	spin_lock(&tlb_lock);
349
350	p = (unsigned long)pte_val((mk_pte(page,PAGE_KERNEL)));
351	kaddr = (void*)PAGE_COLOR_MAP0(vaddr);
352	v = (unsigned long)kaddr \| DTLB_WAY_DCACHE_ALIAS0;
353	__asm__ __volatile__("wdtlb %0,%1; dsync" : :"a" (p), "a" (v));
354
355	clear_page(kaddr);
356
357	spin_unlock(&tlb_lock);
358	} else {
359	clear_page(kaddr);
360	}
361
362	/* We need to make sure that i$ and d$ are coherent. */
363
364	clear_bit(PG_cache_clean, &page->flags);
365	}
366
367	/*
368	* With cache aliasing, we have to make sure that the page color of the page
369	* in kernel space matches that of the virtual user address before we read
370	* the page. If the page color differ, we create a temporary DTLB entry with
371	* the corrent page color and use this 'temporary' address as the source.
372	* We then use the same approach as in clear_user_page and copy the data
373	* to the kernel space and clear the PG_cache_clean bit to synchronize caches
374	* later.
375	*
376	* Note:
377	* Instead of using another 'way' for the temporary DTLB entry, we could
378	* probably use the same entry that points to the kernel address (after
379	* saving the original value and restoring it when we are done).
380	*/
381		311
382	void copy_user_page(void* to, void* from, unsigned long vaddr,	312	static void pgd_ctor(void addr, struct kmem_cache cache, unsigned long flags)
383	struct page* to_page)
384	{	313	{
385	/* There shouldn't be any entries for the new page. */	314	pte_t* ptep = (pte_t*)addr;
386		315	int i;
387	__flush_invalidate_dcache_page_phys(__pa(page_address(to_page)));
388
389	spin_lock(&tlb_lock);
390
391	if (!PAGE_COLOR_EQ(vaddr, from)) {
392	unsigned long v, p, t;
393
394	__asm__ __volatile__ ("pdtlb %1,%2; rdtlb1 %0,%1"
395	: "=a"(p), "=a"(t) : "a"(from));
396	from = (void*)PAGE_COLOR_MAP0(vaddr);
397	v = (unsigned long)from \| DTLB_WAY_DCACHE_ALIAS0;
398	__asm__ __volatile__ ("wdtlb %0,%1; dsync" ::"a" (p), "a" (v));
399	}
400
401	if (!PAGE_COLOR_EQ(vaddr, to)) {
402	unsigned long v, p;
403
404	p = (unsigned long)pte_val((mk_pte(to_page,PAGE_KERNEL)));
405	to = (void*)PAGE_COLOR_MAP1(vaddr);
406	v = (unsigned long)to \| DTLB_WAY_DCACHE_ALIAS1;
407	__asm__ __volatile__ ("wdtlb %0,%1; dsync" ::"a" (p), "a" (v));
408	}
409	copy_page(to, from);
410
411	spin_unlock(&tlb_lock);
412
413	/* We need to make sure that i$ and d$ are coherent. */
414
415	clear_bit(PG_cache_clean, &to_page->flags);
416	}
417
418
419
420	/*
421	* Any time the kernel writes to a user page cache page, or it is about to
422	* read from a page cache page this routine is called.
423	*
424	* Note:
425	* The kernel currently only provides one architecture bit in the page
426	* flags that we use for I$/D$ coherency. Maybe, in future, we can
427	* use a sepearte bit for deferred dcache aliasing:
428	* If the page is not mapped yet, we only need to set a flag,
429	* if mapped, we need to invalidate the page.
430	*/
431	// FIXME: we probably need this for WB caches not only for Page Coloring..
432
433	void flush_dcache_page(struct page *page)
434	{
435	unsigned long addr = __pa(page_address(page));
436	struct address_space *mapping = page_mapping(page);
437
438	__flush_invalidate_dcache_page_phys(addr);
439
440	if (!test_bit(PG_cache_clean, &page->flags))
441	return;
442
443	/* If this page hasn't been mapped, yet, handle I$/D$ coherency later.*/
444	#if 0
445	if (mapping && !mapping_mapped(mapping))
446	clear_bit(PG_cache_clean, &page->flags);
447	else
448	#endif
449	__invalidate_icache_page_phys(addr);
450	}
451
452	void flush_cache_range(struct vm_area_struct* vma, unsigned long s,
453	unsigned long e)
454	{
455	__flush_invalidate_cache_all();
456	}
457
458	void flush_cache_page(struct vm_area_struct* vma, unsigned long address,
459	unsigned long pfn)
460	{
461	struct page *page = pfn_to_page(pfn);
462
463	/* Remove any entry for the old mapping. */
464
465	if (current->active_mm == vma->vm_mm) {
466	unsigned long addr = __pa(page_address(page));
467	__flush_invalidate_dcache_page_phys(addr);
468	if ((vma->vm_flags & VM_EXEC) != 0)
469	__invalidate_icache_page_phys(addr);
470	} else {
471	BUG();
472	}
473	}
474
475	#endif /* (DCACHE_WAY_SIZE > PAGE_SIZE) */
476
477
478	pte_t* pte_alloc_one_kernel (struct mm_struct* mm, unsigned long addr)
479	{
480	pte_t* pte = (pte_t*)__get_free_pages(GFP_KERNEL\|__GFP_REPEAT, 0);
481	if (likely(pte)) {
482	pte_t* ptep = (pte_t)(pte_val(pte) + PAGE_OFFSET);
483	int i;
484	for (i = 0; i < 1024; i++, ptep++)
485	pte_clear(mm, addr, ptep);
486	}
487	return pte;
488	}
489
490	struct page* pte_alloc_one(struct mm_struct *mm, unsigned long addr)
491	{
492	struct page *page;
493
494	page = alloc_pages(GFP_KERNEL \| __GFP_REPEAT, 0);
495
496	if (likely(page)) {
497	pte_t* ptep = kmap_atomic(page, KM_USER0);
498	int i;
499		316
500	for (i = 0; i < 1024; i++, ptep++)	317	for (i = 0; i < 1024; i++, ptep++)
501	pte_clear(mm, addr, ptep);	318	pte_clear(NULL, 0, ptep);
502		319
503	kunmap_atomic(ptep, KM_USER0);
504	}
505	return page;
506	}	320	}
507		321
508		322	void __init pgtable_cache_init(void)
509	/*
510	* Handle D$/I$ coherency.
511	*
512	* Note:
513	* We only have one architecture bit for the page flags, so we cannot handle
514	* cache aliasing, yet.
515	*/
516
517	void
518	update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t pte)
519	{	323	{
520	unsigned long pfn = pte_pfn(pte);	324	pgtable_cache = kmem_cache_create("pgd",
521	struct page *page;	325	PAGE_SIZE, PAGE_SIZE,
522	unsigned long vaddr = addr & PAGE_MASK;	326	SLAB_HWCACHE_ALIGN,
523		327	pgd_ctor);
524	if (!pfn_valid(pfn))
525	return;
526
527	page = pfn_to_page(pfn);
528
529	invalidate_itlb_mapping(addr);
530	invalidate_dtlb_mapping(addr);
531
532	/* We have a new mapping. Use it. */
533
534	write_dtlb_entry(pte, dtlb_probe(addr));
535
536	/* If the processor can execute from this page, synchronize D$/I$. */
537
538	if ((vma->vm_flags & VM_EXEC) != 0) {
539
540	write_itlb_entry(pte, itlb_probe(addr));
541
542	/* Synchronize caches, if not clean. */
543
544	if (!test_and_set_bit(PG_cache_clean, &page->flags)) {
545	__flush_dcache_page(vaddr);
546	__invalidate_icache_page(vaddr);
547	}
548	}
549	}	328	}
550