8 files changed, 440 insertions, 172 deletions
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 704f3e8a4385..25b7b90fd620 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -24,7 +24,6 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/tty.h>
@@ -44,8 +43,11 @@
 #include <arch/interrupts.h>
-static noinline void force_sig_info_fault(int si_signo, int si_code,
+static noinline void force_sig_info_fault(const char *type, int si_signo,
-        unsigned long address, int fault_num, struct task_struct *tsk)
+                                          int si_code, unsigned long address,
+                                          int fault_num,
+                                          struct task_struct *tsk,
+                                          struct pt_regs *regs)
 {
        siginfo_t info;
@@ -60,23 +62,25 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
        info.si_code = si_code;
        info.si_addr = (void __user *)address;
        info.si_trapno = fault_num;
+        trace_unhandled_signal(type, regs, address, si_signo);
        force_sig_info(si_signo, &info, tsk);
 }
 #ifndef __tilegx__
 /*
 * Synthesize the fault a PL0 process would get by doing a word-load of
- * an unaligned address or a high kernel address.  Called indirectly
+ * an unaligned address or a high kernel address.
- * from sys_cmpxchg() in kernel/intvec.S.
 */
-int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *regs)
+SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address,
+                struct pt_regs *, regs)
 {
        if (address >= PAGE_OFFSET)
-                force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address,
+                force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR,
-                                     INT_DTLB_MISS, current);
+                                     address, INT_DTLB_MISS, current, regs);
        else
-                force_sig_info_fault(SIGBUS, BUS_ADRALN, address,
+                force_sig_info_fault("atomic alignment fault", SIGBUS,
-                                     INT_UNALIGN_DATA, current);
+                                     BUS_ADRALN, address,
+                                     INT_UNALIGN_DATA, current, regs);
        /*
         * Adjust pc to point at the actual instruction, which is unusual
@@ -291,7 +295,7 @@ static int handle_page_fault(struct pt_regs *regs,
        /*
         * Early on, we need to check for migrating PTE entries;
         * see homecache.c.  If we find a migrating PTE, we wait until
-         * the backing page claims to be done migrating, then we procede.
+         * the backing page claims to be done migrating, then we proceed.
         * For kernel PTEs, we rewrite the PTE and return and retry.
         * Otherwise, we treat the fault like a normal "no PTE" fault,
         * rather than trying to patch up the existing PTE.
@@ -472,8 +476,8 @@ bad_area_nosemaphore:
                 */
                local_irq_enable();
-                force_sig_info_fault(SIGSEGV, si_code, address,
+                force_sig_info_fault("segfault", SIGSEGV, si_code, address,
-                                     fault_num, tsk);
+                                     fault_num, tsk, regs);
                return 0;
        }
@@ -548,7 +552,8 @@ do_sigbus:
        if (is_kernel_mode)
                goto no_context;
-        force_sig_info_fault(SIGBUS, BUS_ADRERR, address, fault_num, tsk);
+        force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address,
+                             fault_num, tsk, regs);
        return 0;
 }
@@ -563,10 +568,10 @@ do_sigbus:
 /*
 * When we take an ITLB or DTLB fault or access violation in the
 * supervisor while the critical section bit is set, the hypervisor is
- * reluctant to write new values into the EX_CONTEXT_1_x registers,
+ * reluctant to write new values into the EX_CONTEXT_K_x registers,
 * since that might indicate we have not yet squirreled the SPR
 * contents away and can thus safely take a recursive interrupt.
- * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2.
+ * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_K_2.
 *
 * Note that this routine is called before homecache_tlb_defer_enter(),
 * which means that we can properly unlock any atomics that might
@@ -610,7 +615,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
         * fault.  We didn't set up a kernel stack on initial entry to
         * sys_cmpxchg, but instead had one set up by the fault, which
         * (because sys_cmpxchg never releases ICS) came to us via the
-         * SYSTEM_SAVE_1_2 mechanism, and thus EX_CONTEXT_1_[01] are
+         * SYSTEM_SAVE_K_2 mechanism, and thus EX_CONTEXT_K_[01] are
         * still referencing the original user code.  We release the
         * atomic lock and rewrite pt_regs so that it appears that we
         * came from user-space directly, and after we finish the
@@ -656,14 +661,6 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
        }
        /*
-         * NOTE: the one other type of access that might bring us here
-         * are the memory ops in __tns_atomic_acquire/__tns_atomic_release,
-         * but we don't have to check specially for them since we can
-         * always safely return to the address of the fault and retry,
-         * since no separate atomic locks are involved.
-         */
-        /*
         * Now that we have released the atomic lock (if necessary),
         * it's safe to spin if the PTE that caused the fault was migrating.
         */
@@ -741,6 +738,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
                panic("Bad fault number %d in do_page_fault", fault_num);
        }
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
        if (EX1_PL(regs->ex1) != USER_PL) {
                struct async_tlb *async;
                switch (fault_num) {
@@ -784,6 +782,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
                        return;
                }
        }
+#endif
        handle_page_fault(regs, fault_num, is_page_fault, address, write);
 }
@@ -810,8 +809,6 @@ static void handle_async_page_fault(struct pt_regs *regs,
                                  async->address, async->is_write);
        }
 }
-#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
 /*
 * This routine effectively re-issues asynchronous page faults
@@ -833,6 +830,8 @@ void do_async_page_fault(struct pt_regs *regs)
        handle_async_page_fault(regs, &current->thread.sn_async_tlb);
 #endif
 }
+#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
 void vmalloc_sync_all(void)
 {
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 12ab137e7d4f..31dbbd9afe47 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -56,50 +56,6 @@ void kunmap(struct page *page)
 }
 EXPORT_SYMBOL(kunmap);
-static void debug_kmap_atomic_prot(enum km_type type)
-{
-#ifdef CONFIG_DEBUG_HIGHMEM
-        static unsigned warn_count = 10;
-        if (unlikely(warn_count == 0))
-                return;
-        if (unlikely(in_interrupt())) {
-                if (in_irq()) {
-                        if (type != KM_IRQ0 && type != KM_IRQ1 &&
-                            type != KM_BIO_SRC_IRQ &&
-                            /* type != KM_BIO_DST_IRQ && */
-                            type != KM_BOUNCE_READ) {
-                                WARN_ON(1);
-                                warn_count--;
-                        }
-                } else if (!irqs_disabled()) {  /* softirq */
-                        if (type != KM_IRQ0 && type != KM_IRQ1 &&
-                            type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
-                            type != KM_SKB_SUNRPC_DATA &&
-                            type != KM_SKB_DATA_SOFTIRQ &&
-                            type != KM_BOUNCE_READ) {
-                                WARN_ON(1);
-                                warn_count--;
-                        }
-                }
-        }
-        if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
-            type == KM_BIO_SRC_IRQ /* || type == KM_BIO_DST_IRQ */) {
-                if (!irqs_disabled()) {
-                        WARN_ON(1);
-                        warn_count--;
-                }
-        } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
-                if (irq_count() == 0 && !irqs_disabled()) {
-                        WARN_ON(1);
-                        warn_count--;
-                }
-        }
-#endif
-}
 /*
 * Describe a single atomic mapping of a page on a given cpu at a
 * given address, and allow it to be linked into a list.
@@ -240,10 +196,10 @@ void kmap_atomic_fix_kpte(struct page *page, int finished)
 * When holding an atomic kmap is is not legal to sleep, so atomic
 * kmaps are appropriate for short, tight code paths only.
 */
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
-        enum fixed_addresses idx;
        unsigned long vaddr;
+        int idx, type;
        pte_t *pte;
        /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
@@ -255,8 +211,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
        if (!PageHighMem(page))
                return page_address(page);
-        debug_kmap_atomic_prot(type);
+        type = kmap_atomic_idx_push();
        idx = type + KM_TYPE_NR*smp_processor_id();
        vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
        pte = kmap_get_pte(vaddr);
@@ -269,28 +224,35 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 }
 EXPORT_SYMBOL(kmap_atomic_prot);
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
 {
        /* PAGE_NONE is a magic value that tells us to check immutability. */
-        return kmap_atomic_prot(page, type, PAGE_NONE);
+        return kmap_atomic_prot(page, PAGE_NONE);
 }
-EXPORT_SYMBOL(kmap_atomic);
+EXPORT_SYMBOL(__kmap_atomic);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
        unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-        enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
-        /*
+        if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
-         * Force other mappings to Oops if they try to access this pte without
+            vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
-         * first remapping it.  Keeping stale mappings around is a bad idea.
-         */
-        if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) {
                pte_t *pte = kmap_get_pte(vaddr);
                pte_t pteval = *pte;
+                int idx, type;
+                type = kmap_atomic_idx();
+                idx = type + KM_TYPE_NR*smp_processor_id();
+                /*
+                 * Force other mappings to Oops if they try to access this pte
+                 * without first remapping it.  Keeping stale mappings around
+                 * is a bad idea.
+                 */
                BUG_ON(!pte_present(pteval) && !pte_migrating(pteval));
                kmap_atomic_unregister(pte_page(pteval), vaddr);
                kpte_clear_flush(pte, vaddr);
+                kmap_atomic_idx_pop();
        } else {
                /* Must be a lowmem page */
                BUG_ON(vaddr < PAGE_OFFSET);
@@ -300,19 +262,19 @@ void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
        arch_flush_lazy_mmu_mode();
        pagefault_enable();
 }
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
+EXPORT_SYMBOL(__kunmap_atomic);
 /*
 * This API is supposed to allow us to map memory without a "struct page".
 * Currently we don't support this, though this may change in the future.
 */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+void *kmap_atomic_pfn(unsigned long pfn)
 {
-        return kmap_atomic(pfn_to_page(pfn), type);
+        return kmap_atomic(pfn_to_page(pfn));
 }
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
-        return kmap_atomic_prot(pfn_to_page(pfn), type, prot);
+        return kmap_atomic_prot(pfn_to_page(pfn), prot);
 }
 struct page *kmap_atomic_to_page(void *ptr)
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index fb3b4a55cec4..cbe6f4f9eca3 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -37,6 +37,8 @@
 #include <asm/pgalloc.h>
 #include <asm/homecache.h>
+#include <arch/sim.h>
 #include "migrate.h"
@@ -177,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
        panic("Unsafe to continue.");
 }
+void flush_remote_page(struct page *page, int order)
+{
+        int i, pages = (1 << order);
+        for (i = 0; i < pages; ++i, ++page) {
+                void *p = kmap_atomic(page);
+                int hfh = 0;
+                int home = page_home(page);
+#if CHIP_HAS_CBOX_HOME_MAP()
+                if (home == PAGE_HOME_HASH)
+                        hfh = 1;
+                else
+#endif
+                        BUG_ON(home < 0 || home >= NR_CPUS);
+                finv_buffer_remote(p, PAGE_SIZE, hfh);
+                kunmap_atomic(p);
+        }
+}
 void homecache_evict(const struct cpumask *mask)
 {
        flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
 }
-/* Return a mask of the cpus whose caches currently own these pages. */
+/*
-static void homecache_mask(struct page *page, int pages,
+ * Return a mask of the cpus whose caches currently own these pages.
-                           struct cpumask *home_mask)
+ * The return value is whether the pages are all coherently cached
+ * (i.e. none are immutable, incoherent, or uncached).
+ */
+static int homecache_mask(struct page *page, int pages,
+                          struct cpumask *home_mask)
 {
        int i;
+        int cached_coherently = 1;
        cpumask_clear(home_mask);
        for (i = 0; i < pages; ++i) {
                int home = page_home(&page[i]);
                if (home == PAGE_HOME_IMMUTABLE ||
                    home == PAGE_HOME_INCOHERENT) {
                        cpumask_copy(home_mask, cpu_possible_mask);
-                        return;
+                        return 0;
                }
 #if CHIP_HAS_CBOX_HOME_MAP()
                if (home == PAGE_HOME_HASH) {
@@ -201,11 +226,14 @@ static void homecache_mask(struct page *page, int pages,
                        continue;
                }
 #endif
-                if (home == PAGE_HOME_UNCACHED)
+                if (home == PAGE_HOME_UNCACHED) {
+                        cached_coherently = 0;
                        continue;
+                }
                BUG_ON(home < 0 || home >= NR_CPUS);
                cpumask_set_cpu(home, home_mask);
        }
+        return cached_coherently;
 }
 /*
@@ -217,13 +245,6 @@ static unsigned long cache_flush_length(unsigned long length)
        return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length;
 }
-/* On the simulator, confirm lines have been evicted everywhere. */
-static void validate_lines_evicted(unsigned long pfn, size_t length)
-{
-        sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED,
-                    (HV_PhysAddr)pfn << PAGE_SHIFT, length);
-}
 /* Flush a page out of whatever cache(s) it is in. */
 void homecache_flush_cache(struct page *page, int order)
 {
@@ -234,7 +255,7 @@ void homecache_flush_cache(struct page *page, int order)
        homecache_mask(page, pages, &home_mask);
        flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0);
-        validate_lines_evicted(pfn, pages * PAGE_SIZE);
+        sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE);
 }
@@ -391,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home)
                pte_t *ptep = virt_to_pte(NULL, kva);
                pte_t pteval = *ptep;
                BUG_ON(!pte_present(pteval) || pte_huge(pteval));
-                *ptep = pte_set_home(pteval, home);
+                __set_pte(ptep, pte_set_home(pteval, home));
        }
 }
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 24688b697a8d..42cfcba4e1ef 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -21,7 +21,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sysctl.h>
@@ -220,7 +219,7 @@ try_again:
        if (mm->free_area_cache < len)
                goto fail;
-        /* either no address requested or cant fit in requested address hole */
+        /* either no address requested or can't fit in requested address hole */
        addr = (mm->free_area_cache - len) & huge_page_mask(h);
        do {
                /*
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index d89c9eacd162..4e10c4023028 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -53,26 +53,13 @@
 #include "migrate.h"
-/*
- * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)"
- * in the Tile Kconfig, but this generates configure warnings.
- * Do it here and force people to get it right to compile this file.
- * The problem is that with 4KB small pages and 16MB huge pages,
- * the default value doesn't allow us to group enough small pages
- * together to make up a huge page.
- */
-#if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1
-# error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size"
-#endif
 #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))
 #ifndef __tilegx__
 unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE;
+EXPORT_SYMBOL(VMALLOC_RESERVE);
 #endif
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 /* Create an L2 page table */
 static pte_t * __init alloc_pte(void)
 {
@@ -445,7 +432,7 @@ static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
 /* Temporary page table we use for staging. */
 static pgd_t pgtables[PTRS_PER_PGD]
- __attribute__((section(".init.page")));
+ __attribute__((aligned(HV_PAGE_TABLE_ALIGN)));
 /*
 * This maps the physical memory to kernel virtual address space, a total
@@ -653,6 +640,17 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
        memcpy(pgd_base, pgtables, sizeof(pgtables));
        __install_page_table(pgd_base, __get_cpu_var(current_asid),
                             swapper_pgprot);
+        /*
+         * We just read swapper_pgprot and thus brought it into the cache,
+         * with its new home & caching mode.  When we start the other CPUs,
+         * they're going to reference swapper_pgprot via their initial fake
+         * VA-is-PA mappings, which cache everything locally.  At that
+         * time, if it's in our cache with a conflicting home, the
+         * simulator's coherence checker will complain.  So, flush it out
+         * of our cache; we're not going to ever use it again anyway.
+         */
+        __insn_finv(&swapper_pgprot);
 }
 /*
@@ -950,11 +948,7 @@ struct kmem_cache *pgd_cache;
 void __init pgtable_cache_init(void)
 {
-        pgd_cache = kmem_cache_create("pgd",
+        pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL);
-                                PTRS_PER_PGD*sizeof(pgd_t),
-                                PTRS_PER_PGD*sizeof(pgd_t),
-                                0,
-                                NULL);
        if (!pgd_cache)
                panic("pgtable_cache_init(): Cannot create pgd cache");
 }
@@ -988,8 +982,12 @@ static long __write_once initfree = 1;
 /* Select whether to free (1) or mark unusable (0) the __init pages. */
 static int __init set_initfree(char *str)
 {
-        strict_strtol(str, 0, &initfree);
+        long val;
-        pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't");
+        if (strict_strtol(str, 0, &val) == 0) {
+                initfree = val;
+                pr_info("initfree: %s free init pages\n",
+                        initfree ? "will" : "won't");
+        }
        return 1;
 }
 __setup("initfree=", set_initfree);
@@ -1060,7 +1058,7 @@ void free_initmem(void)
        /*
         * Free the pages mapped from 0xc0000000 that correspond to code
-         * pages from 0xfd000000 that we won't use again after init.
+         * pages from MEM_SV_INTRPT that we won't use again after init.
         */
        free_init_pages("unused kernel text",
                        (unsigned long)_sinittext - text_delta,
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
index f738765cd1e6..ac01a7cdf77f 100644
--- a/arch/tile/mm/migrate_32.S
+++ b/arch/tile/mm/migrate_32.S
@@ -18,6 +18,7 @@
 #include <linux/linkage.h>
 #include <linux/threads.h>
 #include <asm/page.h>
+#include <asm/thread_info.h>
 #include <asm/types.h>
 #include <asm/asm-offsets.h>
 #include <hv/hypervisor.h>
diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S
new file mode 100644
index 000000000000..e76fea688beb
--- /dev/null
+++ b/arch/tile/mm/migrate_64.S
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This routine is a helper for migrating the home of a set of pages to
+ * a new cpu.  See the documentation in homecache.c for more information.
+ */
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/types.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+        .text
+/*
+ * First, some definitions that apply to all the code in the file.
+ */
+/* Locals (caller-save) */
+#define r_tmp           r10
+#define r_save_sp       r11
+/* What we save where in the stack frame; must include all callee-saves. */
+#define FRAME_SP        8
+#define FRAME_R30       16
+#define FRAME_R31       24
+#define FRAME_R32       32
+#define FRAME_R33       40
+#define FRAME_SIZE      48
+/*
+ * On entry:
+ *
+ *   r0 the new context PA to install (moved to r_context)
+ *   r1 PTE to use for context access (moved to r_access)
+ *   r2 ASID to use for new context (moved to r_asid)
+ *   r3 pointer to cpumask with just this cpu set in it (r_my_cpumask)
+ */
+/* Arguments (caller-save) */
+#define r_context_in    r0
+#define r_access_in     r1
+#define r_asid_in       r2
+#define r_my_cpumask    r3
+/* Locals (callee-save); must not be more than FRAME_xxx above. */
+#define r_save_ics      r30
+#define r_context       r31
+#define r_access        r32
+#define r_asid          r33
+/*
+ * Caller-save locals and frame constants are the same as
+ * for homecache_migrate_stack_and_flush.
+ */
+STD_ENTRY(flush_and_install_context)
+        /*
+         * Create a stack frame; we can't touch it once we flush the
+         * cache until we install the new page table and flush the TLB.
+         */
+        {
+         move r_save_sp, sp
+         st sp, lr
+         addi sp, sp, -FRAME_SIZE
+        }
+        addi r_tmp, sp, FRAME_SP
+        {
+         st r_tmp, r_save_sp
+         addi r_tmp, sp, FRAME_R30
+        }
+        {
+         st r_tmp, r30
+         addi r_tmp, sp, FRAME_R31
+        }
+        {
+         st r_tmp, r31
+         addi r_tmp, sp, FRAME_R32
+        }
+        {
+         st r_tmp, r32
+         addi r_tmp, sp, FRAME_R33
+        }
+        st r_tmp, r33
+        /* Move some arguments to callee-save registers. */
+        {
+         move r_context, r_context_in
+         move r_access, r_access_in
+        }
+        move r_asid, r_asid_in
+        /* Disable interrupts, since we can't use our stack. */
+        {
+         mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
+         movei r_tmp, 1
+        }
+        mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
+        /* First, flush our L2 cache. */
+        {
+         move r0, zero  /* cache_pa */
+         moveli r1, hw2_last(HV_FLUSH_EVICT_L2)  /* cache_control */
+        }
+        {
+         shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2)
+         move r2, r_my_cpumask  /* cache_cpumask */
+        }
+        {
+         shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2)
+         move r3, zero  /* tlb_va */
+        }
+        {
+         move r4, zero  /* tlb_length */
+         move r5, zero  /* tlb_pgsize */
+        }
+        {
+         move r6, zero  /* tlb_cpumask */
+         move r7, zero  /* asids */
+        }
+        {
+         move r8, zero  /* asidcount */
+         jal hv_flush_remote
+        }
+        bnez r0, 1f
+        /* Now install the new page table. */
+        {
+         move r0, r_context
+         move r1, r_access
+        }
+        {
+         move r2, r_asid
+         movei r3, HV_CTX_DIRECTIO
+        }
+        jal hv_install_context
+        bnez r0, 1f
+        /* Finally, flush the TLB. */
+        {
+         movei r0, 0   /* preserve_global */
+         jal hv_flush_all
+        }
+1:      /* Reset interrupts back how they were before. */
+        mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
+        /* Restore the callee-saved registers and return. */
+        addli lr, sp, FRAME_SIZE
+        {
+         ld lr, lr
+         addli r_tmp, sp, FRAME_R30
+        }
+        {
+         ld r30, r_tmp
+         addli r_tmp, sp, FRAME_R31
+        }
+        {
+         ld r31, r_tmp
+         addli r_tmp, sp, FRAME_R32
+        }
+        {
+         ld r32, r_tmp
+         addli r_tmp, sp, FRAME_R33
+        }
+        {
+         ld r33, r_tmp
+         addi sp, sp, FRAME_SIZE
+        }
+        jrp lr
+        STD_ENDPROC(flush_and_install_context)
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 335c24621c41..de7d8e21e01d 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -41,7 +41,7 @@
 * The normal show_free_areas() is too verbose on Tile, with dozens
 * of processors and often four NUMA zones each with high and lowmem.
 */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
        struct zone *zone;
@@ -134,14 +134,84 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 }
 #if defined(CONFIG_HIGHPTE)
-pte_t *_pte_offset_map(pmd_t *dir, unsigned long address, enum km_type type)
+pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
 {
-        pte_t *pte = kmap_atomic(pmd_page(*dir), type) +
+        pte_t *pte = kmap_atomic(pmd_page(*dir)) +
                (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
        return &pte[pte_index(address)];
 }
 #endif
+/**
+ * shatter_huge_page() - ensure a given address is mapped by a small page.
+ *
+ * This function converts a huge PTE mapping kernel LOWMEM into a bunch
+ * of small PTEs with the same caching.  No cache flush required, but we
+ * must do a global TLB flush.
+ *
+ * Any caller that wishes to modify a kernel mapping that might
+ * have been made with a huge page should call this function,
+ * since doing so properly avoids race conditions with installing the
+ * newly-shattered page and then flushing all the TLB entries.
+ *
+ * @addr: Address at which to shatter any existing huge page.
+ */
+void shatter_huge_page(unsigned long addr)
+{
+        pgd_t *pgd;
+        pud_t *pud;
+        pmd_t *pmd;
+        unsigned long flags = 0;  /* happy compiler */
+#ifdef __PAGETABLE_PMD_FOLDED
+        struct list_head *pos;
+#endif
+        /* Get a pointer to the pmd entry that we need to change. */
+        addr &= HPAGE_MASK;
+        BUG_ON(pgd_addr_invalid(addr));
+        BUG_ON(addr < PAGE_OFFSET);  /* only for kernel LOWMEM */
+        pgd = swapper_pg_dir + pgd_index(addr);
+        pud = pud_offset(pgd, addr);
+        BUG_ON(!pud_present(*pud));
+        pmd = pmd_offset(pud, addr);
+        BUG_ON(!pmd_present(*pmd));
+        if (!pmd_huge_page(*pmd))
+                return;
+        /*
+         * Grab the pgd_lock, since we may need it to walk the pgd_list,
+         * and since we need some kind of lock here to avoid races.
+         */
+        spin_lock_irqsave(&pgd_lock, flags);
+        if (!pmd_huge_page(*pmd)) {
+                /* Lost the race to convert the huge page. */
+                spin_unlock_irqrestore(&pgd_lock, flags);
+                return;
+        }
+        /* Shatter the huge page into the preallocated L2 page table. */
+        pmd_populate_kernel(&init_mm, pmd,
+                            get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
+#ifdef __PAGETABLE_PMD_FOLDED
+        /* Walk every pgd on the system and update the pmd there. */
+        list_for_each(pos, &pgd_list) {
+                pmd_t *copy_pmd;
+                pgd = list_to_pgd(pos) + pgd_index(addr);
+                pud = pud_offset(pgd, addr);
+                copy_pmd = pmd_offset(pud, addr);
+                __set_pmd(copy_pmd, *pmd);
+        }
+#endif
+        /* Tell every cpu to notice the change. */
+        flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
+                     cpu_possible_mask, NULL, 0);
+        /* Hold the lock until the TLB flush is finished to avoid races. */
+        spin_unlock_irqrestore(&pgd_lock, flags);
+}
 /*
 * List of all pgd's needed so it can invalidate entries in both cached
 * and uncached pgd's. This is essentially codepath-based locking
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd)
        BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
 #endif
-        clone_pgd_range(pgd + KERNEL_PGD_INDEX_START,
+        memcpy(pgd + KERNEL_PGD_INDEX_START,
-                        swapper_pg_dir + KERNEL_PGD_INDEX_START,
+               swapper_pg_dir + KERNEL_PGD_INDEX_START,
-                        KERNEL_PGD_PTRS);
+               KERNEL_PGD_PTRS * sizeof(pgd_t));
        pgd_list_add(pgd);
        spin_unlock_irqrestore(&pgd_lock, flags);
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-        gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP;
+        gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
        struct page *p;
+#if L2_USER_PGTABLE_ORDER > 0
+        int i;
+#endif
 #ifdef CONFIG_HIGHPTE
        flags |= __GFP_HIGHMEM;
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
        if (p == NULL)
                return NULL;
+#if L2_USER_PGTABLE_ORDER > 0
+        /*
+         * Make every page have a page_count() of one, not just the first.
+         * We don't use __GFP_COMP since it doesn't look like it works
+         * correctly with tlb_remove_page().
+         */
+        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+                init_page_count(p+i);
+                inc_zone_page_state(p+i, NR_PAGETABLE);
+        }
+#endif
        pgtable_page_ctor(p);
        return p;
 }
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 */
 void pte_free(struct mm_struct *mm, struct page *p)
 {
+        int i;
        pgtable_page_dtor(p);
-        __free_pages(p, L2_USER_PGTABLE_ORDER);
+        __free_page(p);
+        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+                __free_page(p+i);
+                dec_zone_page_state(p+i, NR_PAGETABLE);
+        }
 }
 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
@@ -252,18 +344,11 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
        int i;
        pgtable_page_dtor(pte);
-        tlb->need_flush = 1;
+        tlb_remove_page(tlb, pte);
-        if (tlb_fast_mode(tlb)) {
-                struct page *pte_pages[L2_USER_PGTABLE_PAGES];
+        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
-                for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i)
+                tlb_remove_page(tlb, pte + i);
-                        pte_pages[i] = pte + i;
+                dec_zone_page_state(pte + i, NR_PAGETABLE);
-                free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES);
-                return;
-        }
-        for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) {
-                tlb->pages[tlb->nr++] = pte + i;
-                if (tlb->nr >= FREE_PTE_NR)
-                        tlb_flush_mmu(tlb, 0, 0);
        }
 }
@@ -346,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot)
        return x + y * smp_width;
 }
-void set_pte_order(pte_t *ptep, pte_t pte, int order)
+/*
+ * Convert a kernel VA to a PA and homing information.
+ */
+int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte)
 {
-        unsigned long pfn = pte_pfn(pte);
+        struct page *page = virt_to_page(va);
-        struct page *page = pfn_to_page(pfn);
+        pte_t null_pte = { 0 };
-        /* Update the home of a PTE if necessary */
+        *cpa = __pa(va);
-        pte = pte_set_home(pte, page_home(page));
+        /* Note that this is not writing a page table, just returning a pte. */
+        *pte = pte_set_home(null_pte, page_home(page));
+        return 0; /* return non-zero if not hfh? */
+}
+EXPORT_SYMBOL(va_to_cpa_and_pte);
+void __set_pte(pte_t *ptep, pte_t pte)
+{
 #ifdef __tilegx__
        *ptep = pte;
 #else
-        /*
+# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
-         * When setting a PTE, write the high bits first, then write
+#  error Must write the present and migrating bits last
-         * the low bits.  This sets the "present" bit only after the
+# endif
-         * other bits are in place.  If a particular PTE update
+        if (pte_present(pte)) {
-         * involves transitioning from one valid PTE to another, it
+                ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
-         * may be necessary to call set_pte_order() more than once,
+                barrier();
-         * transitioning via a suitable intermediate state.
+                ((u32 *)ptep)[0] = (u32)(pte_val(pte));
-         * Note that this sequence also means that if we are transitioning
+        } else {
-         * from any migrating PTE to a non-migrating one, we will not
+                ((u32 *)ptep)[0] = (u32)(pte_val(pte));
-         * see a half-updated PTE with the migrating bit off.
+                barrier();
-         */
+                ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
-#if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
+        }
-# error Must write the present and migrating bits last
+#endif /* __tilegx__ */
-#endif
+}
-        ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
-        barrier();
+void set_pte(pte_t *ptep, pte_t pte)
-        ((u32 *)ptep)[0] = (u32)(pte_val(pte));
+{
-#endif
+        struct page *page = pfn_to_page(pte_pfn(pte));
+        /* Update the home of a PTE if necessary */
+        pte = pte_set_home(pte, page_home(page));
+        __set_pte(ptep, pte);
 }
 /* Can this mm load a PTE with cached_priority set? */