5 files changed, 189 insertions, 73 deletions
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index dcebfc831cd6..758f597f488c 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -655,14 +655,6 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
        }
        /*
-         * NOTE: the one other type of access that might bring us here
-         * are the memory ops in __tns_atomic_acquire/__tns_atomic_release,
-         * but we don't have to check specially for them since we can
-         * always safely return to the address of the fault and retry,
-         * since no separate atomic locks are involved.
-         */
-        /*
         * Now that we have released the atomic lock (if necessary),
         * it's safe to spin if the PTE that caused the fault was migrating.
         */
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index d78df3a6ee15..cbe6f4f9eca3 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -179,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
        panic("Unsafe to continue.");
 }
+void flush_remote_page(struct page *page, int order)
+{
+        int i, pages = (1 << order);
+        for (i = 0; i < pages; ++i, ++page) {
+                void *p = kmap_atomic(page);
+                int hfh = 0;
+                int home = page_home(page);
+#if CHIP_HAS_CBOX_HOME_MAP()
+                if (home == PAGE_HOME_HASH)
+                        hfh = 1;
+                else
+#endif
+                        BUG_ON(home < 0 || home >= NR_CPUS);
+                finv_buffer_remote(p, PAGE_SIZE, hfh);
+                kunmap_atomic(p);
+        }
+}
 void homecache_evict(const struct cpumask *mask)
 {
        flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
 }
-/* Return a mask of the cpus whose caches currently own these pages. */
+/*
-static void homecache_mask(struct page *page, int pages,
+ * Return a mask of the cpus whose caches currently own these pages.
-                           struct cpumask *home_mask)
+ * The return value is whether the pages are all coherently cached
+ * (i.e. none are immutable, incoherent, or uncached).
+ */
+static int homecache_mask(struct page *page, int pages,
+                          struct cpumask *home_mask)
 {
        int i;
+        int cached_coherently = 1;
        cpumask_clear(home_mask);
        for (i = 0; i < pages; ++i) {
                int home = page_home(&page[i]);
                if (home == PAGE_HOME_IMMUTABLE ||
                    home == PAGE_HOME_INCOHERENT) {
                        cpumask_copy(home_mask, cpu_possible_mask);
-                        return;
+                        return 0;
                }
 #if CHIP_HAS_CBOX_HOME_MAP()
                if (home == PAGE_HOME_HASH) {
@@ -203,11 +226,14 @@ static void homecache_mask(struct page *page, int pages,
                        continue;
                }
 #endif
-                if (home == PAGE_HOME_UNCACHED)
+                if (home == PAGE_HOME_UNCACHED) {
+                        cached_coherently = 0;
                        continue;
+                }
                BUG_ON(home < 0 || home >= NR_CPUS);
                cpumask_set_cpu(home, home_mask);
        }
+        return cached_coherently;
 }
 /*
@@ -386,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home)
                pte_t *ptep = virt_to_pte(NULL, kva);
                pte_t pteval = *ptep;
                BUG_ON(!pte_present(pteval) || pte_huge(pteval));
-                *ptep = pte_set_home(pteval, home);
+                __set_pte(ptep, pte_set_home(pteval, home));
        }
 }
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 0b9ce69b0ee5..d6e87fda2fb2 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -53,22 +53,11 @@
 #include "migrate.h"
-/*
- * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)"
- * in the Tile Kconfig, but this generates configure warnings.
- * Do it here and force people to get it right to compile this file.
- * The problem is that with 4KB small pages and 16MB huge pages,
- * the default value doesn't allow us to group enough small pages
- * together to make up a huge page.
- */
-#if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1
-# error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size"
-#endif
 #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))
 #ifndef __tilegx__
 unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE;
+EXPORT_SYMBOL(VMALLOC_RESERVE);
 #endif
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -445,7 +434,7 @@ static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
 /* Temporary page table we use for staging. */
 static pgd_t pgtables[PTRS_PER_PGD]
- __attribute__((section(".init.page")));
+ __attribute__((aligned(HV_PAGE_TABLE_ALIGN)));
 /*
 * This maps the physical memory to kernel virtual address space, a total
@@ -653,6 +642,17 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
        memcpy(pgd_base, pgtables, sizeof(pgtables));
        __install_page_table(pgd_base, __get_cpu_var(current_asid),
                             swapper_pgprot);
+        /*
+         * We just read swapper_pgprot and thus brought it into the cache,
+         * with its new home & caching mode.  When we start the other CPUs,
+         * they're going to reference swapper_pgprot via their initial fake
+         * VA-is-PA mappings, which cache everything locally.  At that
+         * time, if it's in our cache with a conflicting home, the
+         * simulator's coherence checker will complain.  So, flush it out
+         * of our cache; we're not going to ever use it again anyway.
+         */
+        __insn_finv(&swapper_pgprot);
 }
 /*
@@ -950,11 +950,7 @@ struct kmem_cache *pgd_cache;
 void __init pgtable_cache_init(void)
 {
-        pgd_cache = kmem_cache_create("pgd",
+        pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL);
-                                PTRS_PER_PGD*sizeof(pgd_t),
-                                PTRS_PER_PGD*sizeof(pgd_t),
-                                0,
-                                NULL);
        if (!pgd_cache)
                panic("pgtable_cache_init(): Cannot create pgd cache");
 }
@@ -989,7 +985,7 @@ static long __write_once initfree = 1;
 static int __init set_initfree(char *str)
 {
        long val;
-        if (strict_strtol(str, 0, &val)) {
+        if (strict_strtol(str, 0, &val) == 0) {
                initfree = val;
                pr_info("initfree: %s free init pages\n",
                        initfree ? "will" : "won't");
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
index f738765cd1e6..ac01a7cdf77f 100644
--- a/arch/tile/mm/migrate_32.S
+++ b/arch/tile/mm/migrate_32.S
@@ -18,6 +18,7 @@
 #include <linux/linkage.h>
 #include <linux/threads.h>
 #include <asm/page.h>
+#include <asm/thread_info.h>
 #include <asm/types.h>
 #include <asm/asm-offsets.h>
 #include <hv/hypervisor.h>
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 1f5430c53d0d..1a2b36f8866d 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
 }
 #endif
+/**
+ * shatter_huge_page() - ensure a given address is mapped by a small page.
+ *
+ * This function converts a huge PTE mapping kernel LOWMEM into a bunch
+ * of small PTEs with the same caching.  No cache flush required, but we
+ * must do a global TLB flush.
+ *
+ * Any caller that wishes to modify a kernel mapping that might
+ * have been made with a huge page should call this function,
+ * since doing so properly avoids race conditions with installing the
+ * newly-shattered page and then flushing all the TLB entries.
+ *
+ * @addr: Address at which to shatter any existing huge page.
+ */
+void shatter_huge_page(unsigned long addr)
+{
+        pgd_t *pgd;
+        pud_t *pud;
+        pmd_t *pmd;
+        unsigned long flags = 0;  /* happy compiler */
+#ifdef __PAGETABLE_PMD_FOLDED
+        struct list_head *pos;
+#endif
+        /* Get a pointer to the pmd entry that we need to change. */
+        addr &= HPAGE_MASK;
+        BUG_ON(pgd_addr_invalid(addr));
+        BUG_ON(addr < PAGE_OFFSET);  /* only for kernel LOWMEM */
+        pgd = swapper_pg_dir + pgd_index(addr);
+        pud = pud_offset(pgd, addr);
+        BUG_ON(!pud_present(*pud));
+        pmd = pmd_offset(pud, addr);
+        BUG_ON(!pmd_present(*pmd));
+        if (!pmd_huge_page(*pmd))
+                return;
+        /*
+         * Grab the pgd_lock, since we may need it to walk the pgd_list,
+         * and since we need some kind of lock here to avoid races.
+         */
+        spin_lock_irqsave(&pgd_lock, flags);
+        if (!pmd_huge_page(*pmd)) {
+                /* Lost the race to convert the huge page. */
+                spin_unlock_irqrestore(&pgd_lock, flags);
+                return;
+        }
+        /* Shatter the huge page into the preallocated L2 page table. */
+        pmd_populate_kernel(&init_mm, pmd,
+                            get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
+#ifdef __PAGETABLE_PMD_FOLDED
+        /* Walk every pgd on the system and update the pmd there. */
+        list_for_each(pos, &pgd_list) {
+                pmd_t *copy_pmd;
+                pgd = list_to_pgd(pos) + pgd_index(addr);
+                pud = pud_offset(pgd, addr);
+                copy_pmd = pmd_offset(pud, addr);
+                __set_pmd(copy_pmd, *pmd);
+        }
+#endif
+        /* Tell every cpu to notice the change. */
+        flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
+                     cpu_possible_mask, NULL, 0);
+        /* Hold the lock until the TLB flush is finished to avoid races. */
+        spin_unlock_irqrestore(&pgd_lock, flags);
+}
 /*
 * List of all pgd's needed so it can invalidate entries in both cached
 * and uncached pgd's. This is essentially codepath-based locking
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd)
        BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
 #endif
-        clone_pgd_range(pgd + KERNEL_PGD_INDEX_START,
+        memcpy(pgd + KERNEL_PGD_INDEX_START,
-                        swapper_pg_dir + KERNEL_PGD_INDEX_START,
+               swapper_pg_dir + KERNEL_PGD_INDEX_START,
-                        KERNEL_PGD_PTRS);
+               KERNEL_PGD_PTRS * sizeof(pgd_t));
        pgd_list_add(pgd);
        spin_unlock_irqrestore(&pgd_lock, flags);
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-        gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP;
+        gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
        struct page *p;
+#if L2_USER_PGTABLE_ORDER > 0
+        int i;
+#endif
 #ifdef CONFIG_HIGHPTE
        flags |= __GFP_HIGHMEM;
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
        if (p == NULL)
                return NULL;
+#if L2_USER_PGTABLE_ORDER > 0
+        /*
+         * Make every page have a page_count() of one, not just the first.
+         * We don't use __GFP_COMP since it doesn't look like it works
+         * correctly with tlb_remove_page().
+         */
+        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+                init_page_count(p+i);
+                inc_zone_page_state(p+i, NR_PAGETABLE);
+        }
+#endif
        pgtable_page_ctor(p);
        return p;
 }
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 */
 void pte_free(struct mm_struct *mm, struct page *p)
 {
+        int i;
        pgtable_page_dtor(p);
-        __free_pages(p, L2_USER_PGTABLE_ORDER);
+        __free_page(p);
+        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+                __free_page(p+i);
+                dec_zone_page_state(p+i, NR_PAGETABLE);
+        }
 }
 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
@@ -252,18 +344,11 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
        int i;
        pgtable_page_dtor(pte);
-        tlb->need_flush = 1;
+        tlb_remove_page(tlb, pte);
-        if (tlb_fast_mode(tlb)) {
-                struct page *pte_pages[L2_USER_PGTABLE_PAGES];
+        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
-                for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i)
+                tlb_remove_page(tlb, pte + i);
-                        pte_pages[i] = pte + i;
+                dec_zone_page_state(pte + i, NR_PAGETABLE);
-                free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES);
-                return;
-        }
-        for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) {
-                tlb->pages[tlb->nr++] = pte + i;
-                if (tlb->nr >= FREE_PTE_NR)
-                        tlb_flush_mmu(tlb, 0, 0);
        }
 }
@@ -346,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot)
        return x + y * smp_width;
 }
-void set_pte_order(pte_t *ptep, pte_t pte, int order)
+/*
+ * Convert a kernel VA to a PA and homing information.
+ */
+int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte)
 {
-        unsigned long pfn = pte_pfn(pte);
+        struct page *page = virt_to_page(va);
-        struct page *page = pfn_to_page(pfn);
+        pte_t null_pte = { 0 };
-        /* Update the home of a PTE if necessary */
+        *cpa = __pa(va);
-        pte = pte_set_home(pte, page_home(page));
+        /* Note that this is not writing a page table, just returning a pte. */
+        *pte = pte_set_home(null_pte, page_home(page));
+        return 0; /* return non-zero if not hfh? */
+}
+EXPORT_SYMBOL(va_to_cpa_and_pte);
+void __set_pte(pte_t *ptep, pte_t pte)
+{
 #ifdef __tilegx__
        *ptep = pte;
 #else
-        /*
+# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
-         * When setting a PTE, write the high bits first, then write
+#  error Must write the present and migrating bits last
-         * the low bits.  This sets the "present" bit only after the
+# endif
-         * other bits are in place.  If a particular PTE update
+        if (pte_present(pte)) {
-         * involves transitioning from one valid PTE to another, it
+                ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
-         * may be necessary to call set_pte_order() more than once,
+                barrier();
-         * transitioning via a suitable intermediate state.
+                ((u32 *)ptep)[0] = (u32)(pte_val(pte));
-         * Note that this sequence also means that if we are transitioning
+        } else {
-         * from any migrating PTE to a non-migrating one, we will not
+                ((u32 *)ptep)[0] = (u32)(pte_val(pte));
-         * see a half-updated PTE with the migrating bit off.
+                barrier();
-         */
+                ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
-#if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
+        }
-# error Must write the present and migrating bits last
+#endif /* __tilegx__ */
-#endif
+}
-        ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
-        barrier();
+void set_pte(pte_t *ptep, pte_t pte)
-        ((u32 *)ptep)[0] = (u32)(pte_val(pte));
+{
-#endif
+        struct page *page = pfn_to_page(pte_pfn(pte));
+        /* Update the home of a PTE if necessary */
+        pte = pte_set_home(pte, page_home(page));
+        __set_pte(ptep, pte);
 }
 /* Can this mm load a PTE with cached_priority set? */

diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index dcebfc831cd6..758f597f488c 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c
@@ -655,14 +655,6 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
655	}	655	}
656		656
657	/*	657	/*
658	* NOTE: the one other type of access that might bring us here
659	* are the memory ops in __tns_atomic_acquire/__tns_atomic_release,
660	* but we don't have to check specially for them since we can
661	* always safely return to the address of the fault and retry,
662	* since no separate atomic locks are involved.
663	*/
664
665	/*
666	* Now that we have released the atomic lock (if necessary),	658	* Now that we have released the atomic lock (if necessary),
667	* it's safe to spin if the PTE that caused the fault was migrating.	659	* it's safe to spin if the PTE that caused the fault was migrating.
668	*/	660	*/


diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index d78df3a6ee15..cbe6f4f9eca3 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c
@@ -179,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
179	panic("Unsafe to continue.");	179	panic("Unsafe to continue.");
180	}	180	}
181		181
		182	void flush_remote_page(struct page *page, int order)
		183	{
		184	int i, pages = (1 << order);
		185	for (i = 0; i < pages; ++i, ++page) {
		186	void *p = kmap_atomic(page);
		187	int hfh = 0;
		188	int home = page_home(page);
		189	#if CHIP_HAS_CBOX_HOME_MAP()
		190	if (home == PAGE_HOME_HASH)
		191	hfh = 1;
		192	else
		193	#endif
		194	BUG_ON(home < 0 \|\| home >= NR_CPUS);
		195	finv_buffer_remote(p, PAGE_SIZE, hfh);
		196	kunmap_atomic(p);
		197	}
		198	}
		199
182	void homecache_evict(const struct cpumask *mask)	200	void homecache_evict(const struct cpumask *mask)
183	{	201	{
184	flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);	202	flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
185	}	203	}
186		204
187	/* Return a mask of the cpus whose caches currently own these pages. */	205	/*
188	static void homecache_mask(struct page *page, int pages,	206	* Return a mask of the cpus whose caches currently own these pages.
189	struct cpumask *home_mask)	207	* The return value is whether the pages are all coherently cached
		208	* (i.e. none are immutable, incoherent, or uncached).
		209	*/
		210	static int homecache_mask(struct page *page, int pages,
		211	struct cpumask *home_mask)
190	{	212	{
191	int i;	213	int i;
		214	int cached_coherently = 1;
192	cpumask_clear(home_mask);	215	cpumask_clear(home_mask);
193	for (i = 0; i < pages; ++i) {	216	for (i = 0; i < pages; ++i) {
194	int home = page_home(&page[i]);	217	int home = page_home(&page[i]);
195	if (home == PAGE_HOME_IMMUTABLE \|\|	218	if (home == PAGE_HOME_IMMUTABLE \|\|
196	home == PAGE_HOME_INCOHERENT) {	219	home == PAGE_HOME_INCOHERENT) {
197	cpumask_copy(home_mask, cpu_possible_mask);	220	cpumask_copy(home_mask, cpu_possible_mask);
198	return;	221	return 0;
199	}	222	}
200	#if CHIP_HAS_CBOX_HOME_MAP()	223	#if CHIP_HAS_CBOX_HOME_MAP()
201	if (home == PAGE_HOME_HASH) {	224	if (home == PAGE_HOME_HASH) {
@@ -203,11 +226,14 @@ static void homecache_mask(struct page *page, int pages,
203	continue;	226	continue;
204	}	227	}
205	#endif	228	#endif
206	if (home == PAGE_HOME_UNCACHED)	229	if (home == PAGE_HOME_UNCACHED) {
		230	cached_coherently = 0;
207	continue;	231	continue;
		232	}
208	BUG_ON(home < 0 \|\| home >= NR_CPUS);	233	BUG_ON(home < 0 \|\| home >= NR_CPUS);
209	cpumask_set_cpu(home, home_mask);	234	cpumask_set_cpu(home, home_mask);
210	}	235	}
		236	return cached_coherently;
211	}	237	}
212		238
213	/*	239	/*
@@ -386,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home)
386	pte_t *ptep = virt_to_pte(NULL, kva);	412	pte_t *ptep = virt_to_pte(NULL, kva);
387	pte_t pteval = *ptep;	413	pte_t pteval = *ptep;
388	BUG_ON(!pte_present(pteval) \|\| pte_huge(pteval));	414	BUG_ON(!pte_present(pteval) \|\| pte_huge(pteval));
389	*ptep = pte_set_home(pteval, home);	415	__set_pte(ptep, pte_set_home(pteval, home));
390	}	416	}
391	}	417	}
392		418


diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 0b9ce69b0ee5..d6e87fda2fb2 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c
@@ -53,22 +53,11 @@
53		53
54	#include "migrate.h"	54	#include "migrate.h"
55		55
56	/*
57	* We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)"
58	* in the Tile Kconfig, but this generates configure warnings.
59	* Do it here and force people to get it right to compile this file.
60	* The problem is that with 4KB small pages and 16MB huge pages,
61	* the default value doesn't allow us to group enough small pages
62	* together to make up a huge page.
63	*/
64	#if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1
65	# error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size"
66	#endif
67
68	#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))	56	#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))
69		57
70	#ifndef __tilegx__	58	#ifndef __tilegx__
71	unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE;	59	unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE;
		60	EXPORT_SYMBOL(VMALLOC_RESERVE);
72	#endif	61	#endif
73		62
74	DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);	63	DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -445,7 +434,7 @@ static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
445		434
446	/* Temporary page table we use for staging. */	435	/* Temporary page table we use for staging. */
447	static pgd_t pgtables[PTRS_PER_PGD]	436	static pgd_t pgtables[PTRS_PER_PGD]
448	__attribute__((section(".init.page")));	437	__attribute__((aligned(HV_PAGE_TABLE_ALIGN)));
449		438
450	/*	439	/*
451	* This maps the physical memory to kernel virtual address space, a total	440	* This maps the physical memory to kernel virtual address space, a total
@@ -653,6 +642,17 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
653	memcpy(pgd_base, pgtables, sizeof(pgtables));	642	memcpy(pgd_base, pgtables, sizeof(pgtables));
654	__install_page_table(pgd_base, __get_cpu_var(current_asid),	643	__install_page_table(pgd_base, __get_cpu_var(current_asid),
655	swapper_pgprot);	644	swapper_pgprot);
		645
		646	/*
		647	* We just read swapper_pgprot and thus brought it into the cache,
		648	* with its new home & caching mode. When we start the other CPUs,
		649	* they're going to reference swapper_pgprot via their initial fake
		650	* VA-is-PA mappings, which cache everything locally. At that
		651	* time, if it's in our cache with a conflicting home, the
		652	* simulator's coherence checker will complain. So, flush it out
		653	* of our cache; we're not going to ever use it again anyway.
		654	*/
		655	__insn_finv(&swapper_pgprot);
656	}	656	}
657		657
658	/*	658	/*
@@ -950,11 +950,7 @@ struct kmem_cache *pgd_cache;
950		950
951	void __init pgtable_cache_init(void)	951	void __init pgtable_cache_init(void)
952	{	952	{
953	pgd_cache = kmem_cache_create("pgd",	953	pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL);
954	PTRS_PER_PGD*sizeof(pgd_t),
955	PTRS_PER_PGD*sizeof(pgd_t),
956	0,
957	NULL);
958	if (!pgd_cache)	954	if (!pgd_cache)
959	panic("pgtable_cache_init(): Cannot create pgd cache");	955	panic("pgtable_cache_init(): Cannot create pgd cache");
960	}	956	}
@@ -989,7 +985,7 @@ static long __write_once initfree = 1;
989	static int __init set_initfree(char *str)	985	static int __init set_initfree(char *str)
990	{	986	{
991	long val;	987	long val;
992	if (strict_strtol(str, 0, &val)) {	988	if (strict_strtol(str, 0, &val) == 0) {
993	initfree = val;	989	initfree = val;
994	pr_info("initfree: %s free init pages\n",	990	pr_info("initfree: %s free init pages\n",
995	initfree ? "will" : "won't");	991	initfree ? "will" : "won't");


diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S index f738765cd1e6..ac01a7cdf77f 100644 --- a/arch/tile/mm/migrate_32.S +++ b/arch/tile/mm/migrate_32.S
@@ -18,6 +18,7 @@
18	#include <linux/linkage.h>	18	#include <linux/linkage.h>
19	#include <linux/threads.h>	19	#include <linux/threads.h>
20	#include <asm/page.h>	20	#include <asm/page.h>
		21	#include <asm/thread_info.h>
21	#include <asm/types.h>	22	#include <asm/types.h>
22	#include <asm/asm-offsets.h>	23	#include <asm/asm-offsets.h>
23	#include <hv/hypervisor.h>	24	#include <hv/hypervisor.h>


diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 1f5430c53d0d..1a2b36f8866d 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c
@@ -142,6 +142,76 @@ pte_t _pte_offset_map(pmd_t dir, unsigned long address)
142	}	142	}
143	#endif	143	#endif
144		144
		145	/**
		146	* shatter_huge_page() - ensure a given address is mapped by a small page.
		147	*
		148	* This function converts a huge PTE mapping kernel LOWMEM into a bunch
		149	* of small PTEs with the same caching. No cache flush required, but we
		150	* must do a global TLB flush.
		151	*
		152	* Any caller that wishes to modify a kernel mapping that might
		153	* have been made with a huge page should call this function,
		154	* since doing so properly avoids race conditions with installing the
		155	* newly-shattered page and then flushing all the TLB entries.
		156	*
		157	* @addr: Address at which to shatter any existing huge page.
		158	*/
		159	void shatter_huge_page(unsigned long addr)
		160	{
		161	pgd_t *pgd;
		162	pud_t *pud;
		163	pmd_t *pmd;
		164	unsigned long flags = 0; /* happy compiler */
		165	#ifdef __PAGETABLE_PMD_FOLDED
		166	struct list_head *pos;
		167	#endif
		168
		169	/* Get a pointer to the pmd entry that we need to change. */
		170	addr &= HPAGE_MASK;
		171	BUG_ON(pgd_addr_invalid(addr));
		172	BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
		173	pgd = swapper_pg_dir + pgd_index(addr);
		174	pud = pud_offset(pgd, addr);
		175	BUG_ON(!pud_present(*pud));
		176	pmd = pmd_offset(pud, addr);
		177	BUG_ON(!pmd_present(*pmd));
		178	if (!pmd_huge_page(*pmd))
		179	return;
		180
		181	/*
		182	* Grab the pgd_lock, since we may need it to walk the pgd_list,
		183	* and since we need some kind of lock here to avoid races.
		184	*/
		185	spin_lock_irqsave(&pgd_lock, flags);
		186	if (!pmd_huge_page(*pmd)) {
		187	/* Lost the race to convert the huge page. */
		188	spin_unlock_irqrestore(&pgd_lock, flags);
		189	return;
		190	}
		191
		192	/* Shatter the huge page into the preallocated L2 page table. */
		193	pmd_populate_kernel(&init_mm, pmd,
		194	get_prealloc_pte(pte_pfn((pte_t )pmd)));
		195
		196	#ifdef __PAGETABLE_PMD_FOLDED
		197	/* Walk every pgd on the system and update the pmd there. */
		198	list_for_each(pos, &pgd_list) {
		199	pmd_t *copy_pmd;
		200	pgd = list_to_pgd(pos) + pgd_index(addr);
		201	pud = pud_offset(pgd, addr);
		202	copy_pmd = pmd_offset(pud, addr);
		203	__set_pmd(copy_pmd, *pmd);
		204	}
		205	#endif
		206
		207	/* Tell every cpu to notice the change. */
		208	flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
		209	cpu_possible_mask, NULL, 0);
		210
		211	/* Hold the lock until the TLB flush is finished to avoid races. */
		212	spin_unlock_irqrestore(&pgd_lock, flags);
		213	}
		214
145	/*	215	/*
146	* List of all pgd's needed so it can invalidate entries in both cached	216	* List of all pgd's needed so it can invalidate entries in both cached
147	* and uncached pgd's. This is essentially codepath-based locking	217	* and uncached pgd's. This is essentially codepath-based locking
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd)
184	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);	254	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
185	#endif	255	#endif
186		256
187	clone_pgd_range(pgd + KERNEL_PGD_INDEX_START,	257	memcpy(pgd + KERNEL_PGD_INDEX_START,
188	swapper_pg_dir + KERNEL_PGD_INDEX_START,	258	swapper_pg_dir + KERNEL_PGD_INDEX_START,
189	KERNEL_PGD_PTRS);	259	KERNEL_PGD_PTRS * sizeof(pgd_t));
190		260
191	pgd_list_add(pgd);	261	pgd_list_add(pgd);
192	spin_unlock_irqrestore(&pgd_lock, flags);	262	spin_unlock_irqrestore(&pgd_lock, flags);
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct mm, pgd_t pgd)
220		290
221	struct page pte_alloc_one(struct mm_struct mm, unsigned long address)	291	struct page pte_alloc_one(struct mm_struct mm, unsigned long address)
222	{	292	{
223	gfp_t flags = GFP_KERNEL\|__GFP_REPEAT\|__GFP_ZERO\|__GFP_COMP;	293	gfp_t flags = GFP_KERNEL\|__GFP_REPEAT\|__GFP_ZERO;
224	struct page *p;	294	struct page *p;
		295	#if L2_USER_PGTABLE_ORDER > 0
		296	int i;
		297	#endif
225		298
226	#ifdef CONFIG_HIGHPTE	299	#ifdef CONFIG_HIGHPTE
227	flags \|= __GFP_HIGHMEM;	300	flags \|= __GFP_HIGHMEM;
@@ -231,6 +304,18 @@ struct page pte_alloc_one(struct mm_struct mm, unsigned long address)
231	if (p == NULL)	304	if (p == NULL)
232	return NULL;	305	return NULL;
233		306
		307	#if L2_USER_PGTABLE_ORDER > 0
		308	/*
		309	* Make every page have a page_count() of one, not just the first.
		310	* We don't use __GFP_COMP since it doesn't look like it works
		311	* correctly with tlb_remove_page().
		312	*/
		313	for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
		314	init_page_count(p+i);
		315	inc_zone_page_state(p+i, NR_PAGETABLE);
		316	}
		317	#endif
		318
234	pgtable_page_ctor(p);	319	pgtable_page_ctor(p);
235	return p;	320	return p;
236	}	321	}
@@ -242,8 +327,15 @@ struct page pte_alloc_one(struct mm_struct mm, unsigned long address)
242	*/	327	*/
243	void pte_free(struct mm_struct mm, struct page p)	328	void pte_free(struct mm_struct mm, struct page p)
244	{	329	{
		330	int i;
		331
245	pgtable_page_dtor(p);	332	pgtable_page_dtor(p);
246	__free_pages(p, L2_USER_PGTABLE_ORDER);	333	__free_page(p);
		334
		335	for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
		336	__free_page(p+i);
		337	dec_zone_page_state(p+i, NR_PAGETABLE);
		338	}
247	}	339	}
248		340
249	void __pte_free_tlb(struct mmu_gather tlb, struct page pte,	341	void __pte_free_tlb(struct mmu_gather tlb, struct page pte,
@@ -252,18 +344,11 @@ void __pte_free_tlb(struct mmu_gather tlb, struct page pte,
252	int i;	344	int i;
253		345
254	pgtable_page_dtor(pte);	346	pgtable_page_dtor(pte);
255	tlb->need_flush = 1;	347	tlb_remove_page(tlb, pte);
256	if (tlb_fast_mode(tlb)) {	348
257	struct page *pte_pages[L2_USER_PGTABLE_PAGES];	349	for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
258	for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i)	350	tlb_remove_page(tlb, pte + i);
259	pte_pages[i] = pte + i;	351	dec_zone_page_state(pte + i, NR_PAGETABLE);
260	free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES);
261	return;
262	}
263	for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) {
264	tlb->pages[tlb->nr++] = pte + i;
265	if (tlb->nr >= FREE_PTE_NR)
266	tlb_flush_mmu(tlb, 0, 0);
267	}	352	}
268	}	353	}
269		354
@@ -346,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot)
346	return x + y * smp_width;	431	return x + y * smp_width;
347	}	432	}
348		433
349	void set_pte_order(pte_t *ptep, pte_t pte, int order)	434	/*
		435	* Convert a kernel VA to a PA and homing information.
		436	*/
		437	int va_to_cpa_and_pte(void va, unsigned long long cpa, pte_t *pte)
350	{	438	{
351	unsigned long pfn = pte_pfn(pte);	439	struct page *page = virt_to_page(va);
352	struct page *page = pfn_to_page(pfn);	440	pte_t null_pte = { 0 };
353		441
354	/* Update the home of a PTE if necessary */	442	*cpa = __pa(va);
355	pte = pte_set_home(pte, page_home(page));
356		443
		444	/* Note that this is not writing a page table, just returning a pte. */
		445	*pte = pte_set_home(null_pte, page_home(page));
		446
		447	return 0; /* return non-zero if not hfh? */
		448	}
		449	EXPORT_SYMBOL(va_to_cpa_and_pte);
		450
		451	void __set_pte(pte_t *ptep, pte_t pte)
		452	{
357	#ifdef __tilegx__	453	#ifdef __tilegx__
358	*ptep = pte;	454	*ptep = pte;
359	#else	455	#else
360	/*	456	# if HV_PTE_INDEX_PRESENT >= 32 \|\| HV_PTE_INDEX_MIGRATING >= 32
361	* When setting a PTE, write the high bits first, then write	457	# error Must write the present and migrating bits last
362	* the low bits. This sets the "present" bit only after the	458	# endif
363	* other bits are in place. If a particular PTE update	459	if (pte_present(pte)) {
364	* involves transitioning from one valid PTE to another, it	460	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
365	* may be necessary to call set_pte_order() more than once,	461	barrier();
366	* transitioning via a suitable intermediate state.	462	((u32 *)ptep)[0] = (u32)(pte_val(pte));
367	* Note that this sequence also means that if we are transitioning	463	} else {
368	* from any migrating PTE to a non-migrating one, we will not	464	((u32 *)ptep)[0] = (u32)(pte_val(pte));
369	* see a half-updated PTE with the migrating bit off.	465	barrier();
370	*/	466	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
371	#if HV_PTE_INDEX_PRESENT >= 32 \|\| HV_PTE_INDEX_MIGRATING >= 32	467	}
372	# error Must write the present and migrating bits last	468	#endif /* __tilegx__ */
373	#endif	469	}
374	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);	470
375	barrier();	471	void set_pte(pte_t *ptep, pte_t pte)
376	((u32 *)ptep)[0] = (u32)(pte_val(pte));	472	{
377	#endif	473	struct page *page = pfn_to_page(pte_pfn(pte));
		474
		475	/* Update the home of a PTE if necessary */
		476	pte = pte_set_home(pte, page_home(page));
		477
		478	__set_pte(ptep, pte);
378	}	479	}
379		480
380	/* Can this mm load a PTE with cached_priority set? */	481	/* Can this mm load a PTE with cached_priority set? */