[S390] tlb flush fix.

The current tlb flushing code for page table entries violates the s390 architecture in a small detail. The relevant section from the principles of operation (SA22-7832-02 page 3-47): "A valid table entry must not be changed while it is attached to any CPU and may be used for translation by that CPU except to (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page-table entry, or (3) make a change by means of a COMPARE AND SWAP AND PURGE instruction that purges the TLB." That means if one thread of a multithreaded applciation uses a vma while another thread does an unmap on it, the page table entries of that vma needs to get removed with IPTE, IDTE or CSP. In some strange and rare situations a cpu could check-stop (die) because a entry has been pushed out of the TLB that is still needed to complete a (milli-coded) instruction. I've never seen it happen with the current code on any of the supported machines, so right now this is a theoretical problem. But I want to fix it nevertheless, to avoid headaches in the futures. To get this implemented correctly without changing common code the primitives ptep_get_and_clear, ptep_get_and_clear_full and ptep_set_wrprotect need to use the IPTE instruction to invalidate the pte before the new pte value gets stored. If IPTE is always used for the three primitives three important operations will have a performace hit: fork, mprotect and exit_mmap. Time for some workarounds: * 1: ptep_get_and_clear_full is used in unmap_vmas to remove page tables entries in a batched tlb gather operation. If the mmu_gather context passed to unmap_vmas has been started with full_mm_flush==1 or if only one cpu is online or if the only user of a mm_struct is the current process then the fullmm indication in the mmu_gather context is set to one. All TLBs for mm_struct are flushed by the tlb_gather_mmu call. No new TLBs can be created while the unmap is in progress. In this case ptep_get_and_clear_full clears the ptes with a simple store. * 2: ptep_get_and_clear is used in change_protection to clear the ptes from the page tables before they are reentered with the new access flags. At the end of the update flush_tlb_range clears the remaining TLBs. In general the ptep_get_and_clear has to issue IPTE for each pte and flush_tlb_range is a nop. But if there is only one user of the mm_struct then ptep_get_and_clear uses simple stores to do the update and flush_tlb_range will flush the TLBs. * 3: Similar to 2, ptep_set_wrprotect is used in copy_page_range for a fork to make all ptes of a cow mapping read-only. At the end of of copy_page_range dup_mmap will flush the TLBs with a call to flush_tlb_mm. Check for mm->mm_users and if there is only one user avoid using IPTE in ptep_set_wrprotect and let flush_tlb_mm clear the TLBs. Overall for single threaded programs the tlb flush code now performs better, for multi threaded programs it is slightly worse. In particular exit_mmap() now does a single IDTE for the mm and then just frees every page cache reference and every page table page directly without a delay over the mmu_gather structure. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
author: Martin Schwidefsky <schwidefsky@de.ibm.com> 2007-10-22 06:52:44 -0400
committer: Martin Schwidefsky <schwidefsky@de.ibm.com> 2007-10-22 06:52:48 -0400
commit: ba8a9229ab9e80278c28ad68b15053f65b2b0a7c (patch)
tree: d73e4f7d352d3b3edf8888973528cb7dd3e953f9 /include
parent: e3d3683d1402c1737687cb698451d545f57c32a7 (diff)
4 files changed, 244 insertions, 149 deletions
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index e45d3c9a4b7e..6cbbfe4f6749 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -82,7 +82,6 @@ static inline void pgd_free(pgd_t *pgd)
 */
 #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); })
 #define pmd_free(x)                     do { } while (0)
-#define __pmd_free_tlb(tlb,x)           do { } while (0)
 #define pgd_populate(mm, pmd, pte)      BUG()
 #define pgd_populate_kernel(mm, pmd, pte)       BUG()
 #else /* __s390x__ */
@@ -118,12 +117,6 @@ static inline void pmd_free (pmd_t *pmd)
        free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);
 }
-#define __pmd_free_tlb(tlb,pmd)                 \
-        do {                                    \
-                tlb_flush_mmu(tlb, 0, 0);       \
-                pmd_free(pmd);                  \
-         } while (0)
 static inline void
 pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
 {
@@ -224,14 +217,4 @@ static inline void pte_free(struct page *pte)
        __free_page(pte);
 }
-#define __pte_free_tlb(tlb, pte)                                        \
-({                                                                      \
-        struct mmu_gather *__tlb = (tlb);                               \
-        struct page *__pte = (pte);                                     \
-        struct page *shadow_page = get_shadow_page(__pte);              \
-        if (shadow_page)                                                \
-                tlb_remove_page(__tlb, shadow_page);                    \
-        tlb_remove_page(__tlb, __pte);                                  \
-})
 #endif /* _S390_PGALLOC_H */
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 39bb5192dc31..b424ab21f8bd 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -424,7 +424,8 @@ static inline pgd_t *get_shadow_pgd(pgd_t *pgdp)
 * within a page table are directly modified.  Thus, the following
 * hook is made available.
 */
-static inline void set_pte(pte_t *pteptr, pte_t pteval)
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+                              pte_t *pteptr, pte_t pteval)
 {
        pte_t *shadow_pte = get_shadow_pte(pteptr);
@@ -437,7 +438,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
                        pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY;
        }
 }
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 /*
 * pgd/pmd/pte query functions
@@ -508,7 +508,8 @@ static inline int pte_file(pte_t pte)
        return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
 }
-#define pte_same(a,b)   (pte_val(a) == pte_val(b))
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(a,b)  (pte_val(a) == pte_val(b))
 /*
 * query functions pte_write/pte_dirty/pte_young only work if
@@ -663,24 +664,19 @@ static inline pte_t pte_mkyoung(pte_t pte)
        return pte;
 }
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+                                            unsigned long addr, pte_t *ptep)
 {
        return 0;
 }
-static inline int
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-ptep_clear_flush_young(struct vm_area_struct *vma,
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
-                        unsigned long address, pte_t *ptep)
+                                         unsigned long address, pte_t *ptep)
 {
        /* No need to flush TLB; bits are in storage key */
-        return ptep_test_and_clear_young(vma, address, ptep);
+        return 0;
-}
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-        pte_t pte = *ptep;
-        pte_clear(mm, addr, ptep);
-        return pte;
 }
 static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
@@ -709,6 +705,32 @@ static inline void ptep_invalidate(unsigned long address, pte_t *ptep)
                __ptep_ipte(address, ptep);
 }
+/*
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ *   1) ptep_get_and_clear
+ *   2) set_pte_at
+ *   3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define ptep_get_and_clear(__mm, __address, __ptep)                     \
+({                                                                      \
+        pte_t __pte = *(__ptep);                                        \
+        if (atomic_read(&(__mm)->mm_users) > 1 ||                       \
+            (__mm) != current->active_mm)                               \
+                ptep_invalidate(__address, __ptep);                     \
+        else                                                            \
+                pte_clear((__mm), (__address), (__ptep));               \
+        __pte;                                                          \
+})
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
                                     unsigned long address, pte_t *ptep)
 {
@@ -717,12 +739,40 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
        return pte;
 }
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+/*
+ * The batched pte unmap code uses ptep_get_and_clear_full to clear the
+ * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
+ * tlbs of an mm if it can guarantee that the ptes of the mm_struct
+ * cannot be accessed while the batched unmap is running. In this case
+ * full==1 and a simple pte_clear is enough. See tlb.h.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+                                            unsigned long addr,
+                                            pte_t *ptep, int full)
 {
-        pte_t old_pte = *ptep;
+        pte_t pte = *ptep;
-        set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+        if (full)
+                pte_clear(mm, addr, ptep);
+        else
+                ptep_invalidate(addr, ptep);
+        return pte;
 }
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define ptep_set_wrprotect(__mm, __addr, __ptep)                        \
+({                                                                      \
+        pte_t __pte = *(__ptep);                                        \
+        if (pte_write(__pte)) {                                         \
+                if (atomic_read(&(__mm)->mm_users) > 1 ||               \
+                    (__mm) != current->active_mm)                       \
+                        ptep_invalidate(__addr, __ptep);                \
+                set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \
+        }                                                               \
+})
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 #define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty)  \
 ({                                                                      \
        int __changed = !pte_same(*(__ptep), __entry);                  \
@@ -740,11 +790,13 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 * should therefore only be called if it is not mapped in any
 * address space.
 */
+#define __HAVE_ARCH_PAGE_TEST_DIRTY
 static inline int page_test_dirty(struct page *page)
 {
        return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0;
 }
+#define __HAVE_ARCH_PAGE_CLEAR_DIRTY
 static inline void page_clear_dirty(struct page *page)
 {
        page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY);
@@ -753,6 +805,7 @@ static inline void page_clear_dirty(struct page *page)
 /*
 * Test and clear referenced bit in storage key.
 */
+#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
 static inline int page_test_and_clear_young(struct page *page)
 {
        unsigned long physpage = page_to_phys(page);
@@ -930,16 +983,6 @@ extern int remove_shared_memory(unsigned long start, unsigned long size);
 #define __HAVE_ARCH_MEMMAP_INIT
 extern void memmap_init(unsigned long, int, unsigned long, unsigned long);
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTE_SAME
-#define __HAVE_ARCH_PAGE_TEST_DIRTY
-#define __HAVE_ARCH_PAGE_CLEAR_DIRTY
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
 #include <asm-generic/pgtable.h>
 #endif /* _S390_PAGE_H */
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h
index 51bd957b85bd..55ae45ef31b5 100644
--- a/include/asm-s390/tlb.h
+++ b/include/asm-s390/tlb.h
@@ -2,19 +2,128 @@
 #define _S390_TLB_H
 /*
- * s390 doesn't need any special per-pte or
+ * TLB flushing on s390 is complicated. The following requirement
- * per-vma handling..
+ * from the principles of operation is the most arduous:
+ *
+ * "A valid table entry must not be changed while it is attached
+ * to any CPU and may be used for translation by that CPU except to
+ * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
+ * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
+ * table entry, or (3) make a change by means of a COMPARE AND SWAP
+ * AND PURGE instruction that purges the TLB."
+ *
+ * The modification of a pte of an active mm struct therefore is
+ * a two step process: i) invalidate the pte, ii) store the new pte.
+ * This is true for the page protection bit as well.
+ * The only possible optimization is to flush at the beginning of
+ * a tlb_gather_mmu cycle if the mm_struct is currently not in use.
+ *
+ * Pages used for the page tables is a different story. FIXME: more
 */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+#include <linux/mm.h>
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#include <linux/swap.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+#include <asm/smp.h>
+#include <asm/tlbflush.h>
+#ifndef CONFIG_SMP
+#define TLB_NR_PTRS     1
+#else
+#define TLB_NR_PTRS     508
+#endif
+struct mmu_gather {
+        struct mm_struct *mm;
+        unsigned int fullmm;
+        unsigned int nr_ptes;
+        unsigned int nr_pmds;
+        void *array[TLB_NR_PTRS];
+};
+DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm,
+                                                unsigned int full_mm_flush)
+{
+        struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+        tlb->mm = mm;
+        tlb->fullmm = full_mm_flush || (num_online_cpus() == 1) ||
+                (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm);
+        tlb->nr_ptes = 0;
+        tlb->nr_pmds = TLB_NR_PTRS;
+        if (tlb->fullmm)
+                __tlb_flush_mm(mm);
+        return tlb;
+}
+static inline void tlb_flush_mmu(struct mmu_gather *tlb,
+                                 unsigned long start, unsigned long end)
+{
+        if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pmds < TLB_NR_PTRS))
+                __tlb_flush_mm(tlb->mm);
+        while (tlb->nr_ptes > 0)
+                pte_free(tlb->array[--tlb->nr_ptes]);
+        while (tlb->nr_pmds < TLB_NR_PTRS)
+                pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]);
+}
+static inline void tlb_finish_mmu(struct mmu_gather *tlb,
+                                  unsigned long start, unsigned long end)
+{
+        tlb_flush_mmu(tlb, start, end);
+        /* keep the page table cache within bounds */
+        check_pgt_cache();
+        put_cpu_var(mmu_gathers);
+}
 /*
- * .. because we flush the whole mm when it
+ * Release the page cache reference for a pte removed by
- * fills up.
+ * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page
+ * has already been freed, so just do free_page_and_swap_cache.
 */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+        free_page_and_swap_cache(page);
+}
-#include <asm-generic/tlb.h>
+/*
+ * pte_free_tlb frees a pte table and clears the CRSTE for the
+ * page table from the tlb.
+ */
+static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page)
+{
+        if (!tlb->fullmm) {
+                tlb->array[tlb->nr_ptes++] = page;
+                if (tlb->nr_ptes >= tlb->nr_pmds)
+                        tlb_flush_mmu(tlb, 0, 0);
+        } else
+                pte_free(page);
+}
+/*
+ * pmd_free_tlb frees a pmd table and clears the CRSTE for the
+ * segment table entry from the tlb.
+ */
+static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+#ifdef __s390x__
+        if (!tlb->fullmm) {
+                tlb->array[--tlb->nr_pmds] = (struct page *) pmd;
+                if (tlb->nr_ptes >= tlb->nr_pmds)
+                        tlb_flush_mmu(tlb, 0, 0);
+        } else
+                pmd_free(pmd);
 #endif
+}
+#define tlb_start_vma(tlb, vma)                 do { } while (0)
+#define tlb_end_vma(tlb, vma)                   do { } while (0)
+#define tlb_remove_tlb_entry(tlb, ptep, addr)   do { } while (0)
+#define tlb_migrate_finish(mm)                  do { } while (0)
+#endif /* _S390_TLB_H */
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 6de2632a3e4f..3a9985fbc8af 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -6,68 +6,19 @@
 #include <asm/pgalloc.h>
 /*
- * TLB flushing:
+ * Flush all tlb entries on the local cpu.
- *
- *  - flush_tlb() flushes the current mm struct TLBs
- *  - flush_tlb_all() flushes all processes TLBs 
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- */
-/*
- * S/390 has three ways of flushing TLBs
- * 'ptlb' does a flush of the local processor
- * 'csp' flushes the TLBs on all PUs of a SMP
- * 'ipte' invalidates a pte in a page table and flushes that out of
- * the TLBs of all PUs of a SMP
- */
-#define local_flush_tlb() \
-do {  asm volatile("ptlb": : :"memory"); } while (0)
-#ifndef CONFIG_SMP
-/*
- * We always need to flush, since s390 does not flush tlb
- * on each context switch
 */
+static inline void __tlb_flush_local(void)
-static inline void flush_tlb(void)
 {
-        local_flush_tlb();
+        asm volatile("ptlb" : : : "memory");
 }
-static inline void flush_tlb_all(void)
-{
-        local_flush_tlb();
-}
-static inline void flush_tlb_mm(struct mm_struct *mm) 
-{
-        local_flush_tlb();
-}
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-                                  unsigned long addr)
-{
-        local_flush_tlb();
-}
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-                                   unsigned long start, unsigned long end)
-{
-        local_flush_tlb();
-}
-#define flush_tlb_kernel_range(start, end) \
-        local_flush_tlb();
-#else
-#include <asm/smp.h>
+/*
+ * Flush all tlb entries on all cpus.
-extern void smp_ptlb_all(void);
+ */
+static inline void __tlb_flush_global(void)
-static inline void global_flush_tlb(void)
 {
+        extern void smp_ptlb_all(void);
        register unsigned long reg2 asm("2");
        register unsigned long reg3 asm("3");
        register unsigned long reg4 asm("4");
@@ -89,66 +40,75 @@ static inline void global_flush_tlb(void)
 }
 /*
- * We only have to do global flush of tlb if process run since last
+ * Flush all tlb entries of a page table on all cpus.
- * flush on any other pu than current. 
- * If we have threads (mm->count > 1) we always do a global flush, 
- * since the process runs on more than one processor at the same time.
 */
+static inline void __tlb_flush_idte(pgd_t *pgd)
+{
+        asm volatile(
+                "       .insn   rrf,0xb98e0000,0,%0,%1,0"
+                : : "a" (2048), "a" (__pa(pgd) & PAGE_MASK) : "cc" );
+}
-static inline void __flush_tlb_mm(struct mm_struct * mm)
+static inline void __tlb_flush_mm(struct mm_struct * mm)
 {
        cpumask_t local_cpumask;
        if (unlikely(cpus_empty(mm->cpu_vm_mask)))
                return;
+        /*
+         * If the machine has IDTE we prefer to do a per mm flush
+         * on all cpus instead of doing a local flush if the mm
+         * only ran on the local cpu.
+         */
        if (MACHINE_HAS_IDTE) {
                pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd);
-                if (shadow_pgd) {
+                if (shadow_pgd)
-                        asm volatile(
+                        __tlb_flush_idte(shadow_pgd);
-                                "       .insn   rrf,0xb98e0000,0,%0,%1,0"
+                __tlb_flush_idte(mm->pgd);
-                                : : "a" (2048),
-                                "a" (__pa(shadow_pgd) & PAGE_MASK) : "cc" );
-                }
-                asm volatile(
-                        "       .insn   rrf,0xb98e0000,0,%0,%1,0"
-                        : : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc");
                return;
        }
        preempt_disable();
+        /*
+         * If the process only ran on the local cpu, do a local flush.
+         */
        local_cpumask = cpumask_of_cpu(smp_processor_id());
        if (cpus_equal(mm->cpu_vm_mask, local_cpumask))
-                local_flush_tlb();
+                __tlb_flush_local();
        else
-                global_flush_tlb();
+                __tlb_flush_global();
        preempt_enable();
 }
-static inline void flush_tlb(void)
+static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
-{
-        __flush_tlb_mm(current->mm);
-}
-static inline void flush_tlb_all(void)
-{
-        global_flush_tlb();
-}
-static inline void flush_tlb_mm(struct mm_struct *mm) 
-{
-        __flush_tlb_mm(mm); 
-}
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-                                  unsigned long addr)
-{
-        __flush_tlb_mm(vma->vm_mm);
-}
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-                                   unsigned long start, unsigned long end)
 {
-        __flush_tlb_mm(vma->vm_mm); 
+        if (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm)
+                __tlb_flush_mm(mm);
 }
-#define flush_tlb_kernel_range(start, end) global_flush_tlb()
+/*
+ * TLB flushing:
+ *  flush_tlb() - flushes the current mm struct TLBs
+ *  flush_tlb_all() - flushes all processes TLBs
+ *  flush_tlb_mm(mm) - flushes the specified mm context TLB's
+ *  flush_tlb_page(vma, vmaddr) - flushes one page
+ *  flush_tlb_range(vma, start, end) - flushes a range of pages
+ *  flush_tlb_kernel_range(start, end) - flushes a range of kernel pages
+ */
-#endif
+/*
+ * flush_tlb_mm goes together with ptep_set_wrprotect for the
+ * copy_page_range operation and flush_tlb_range is related to
+ * ptep_get_and_clear for change_protection. ptep_set_wrprotect and
+ * ptep_get_and_clear do not flush the TLBs directly if the mm has
+ * only one user. At the end of the update the flush_tlb_mm and
+ * flush_tlb_range functions need to do the flush.
+ */
+#define flush_tlb()                             do { } while (0)
+#define flush_tlb_all()                         do { } while (0)
+#define flush_tlb_mm(mm)                        __tlb_flush_mm_cond(mm)
+#define flush_tlb_page(vma, addr)               do { } while (0)
+#define flush_tlb_range(vma, start, end)        __tlb_flush_mm_cond(mm)
+#define flush_tlb_kernel_range(start, end)      __tlb_flush_mm(&init_mm)
 #endif /* _S390_TLBFLUSH_H */
author	Martin Schwidefsky <schwidefsky@de.ibm.com>	2007-10-22 06:52:44 -0400
committer	Martin Schwidefsky <schwidefsky@de.ibm.com>	2007-10-22 06:52:48 -0400
commit	ba8a9229ab9e80278c28ad68b15053f65b2b0a7c (patch)
tree	d73e4f7d352d3b3edf8888973528cb7dd3e953f9 /include
parent	e3d3683d1402c1737687cb698451d545f57c32a7 (diff)

diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index e45d3c9a4b7e..6cbbfe4f6749 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h
@@ -82,7 +82,6 @@ static inline void pgd_free(pgd_t *pgd)
82	*/	82	*/
83	#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })	83	#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
84	#define pmd_free(x) do { } while (0)	84	#define pmd_free(x) do { } while (0)
85	#define __pmd_free_tlb(tlb,x) do { } while (0)
86	#define pgd_populate(mm, pmd, pte) BUG()	85	#define pgd_populate(mm, pmd, pte) BUG()
87	#define pgd_populate_kernel(mm, pmd, pte) BUG()	86	#define pgd_populate_kernel(mm, pmd, pte) BUG()
88	#else /* __s390x__ */	87	#else /* __s390x__ */
@@ -118,12 +117,6 @@ static inline void pmd_free (pmd_t *pmd)
118	free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);	117	free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);
119	}	118	}
120		119
121	#define __pmd_free_tlb(tlb,pmd) \
122	do { \
123	tlb_flush_mmu(tlb, 0, 0); \
124	pmd_free(pmd); \
125	} while (0)
126
127	static inline void	120	static inline void
128	pgd_populate_kernel(struct mm_struct mm, pgd_t pgd, pmd_t *pmd)	121	pgd_populate_kernel(struct mm_struct mm, pgd_t pgd, pmd_t *pmd)
129	{	122	{
@@ -224,14 +217,4 @@ static inline void pte_free(struct page *pte)
224	__free_page(pte);	217	__free_page(pte);
225	}	218	}
226		219
227	#define __pte_free_tlb(tlb, pte) \
228	({ \
229	struct mmu_gather *__tlb = (tlb); \
230	struct page *__pte = (pte); \
231	struct page *shadow_page = get_shadow_page(__pte); \
232	if (shadow_page) \
233	tlb_remove_page(__tlb, shadow_page); \
234	tlb_remove_page(__tlb, __pte); \
235	})
236
237	#endif /* _S390_PGALLOC_H */	220	#endif /* _S390_PGALLOC_H */


diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 39bb5192dc31..b424ab21f8bd 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h
@@ -424,7 +424,8 @@ static inline pgd_t get_shadow_pgd(pgd_t pgdp)
424	* within a page table are directly modified. Thus, the following	424	* within a page table are directly modified. Thus, the following
425	* hook is made available.	425	* hook is made available.
426	*/	426	*/
427	static inline void set_pte(pte_t *pteptr, pte_t pteval)	427	static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
		428	pte_t *pteptr, pte_t pteval)
428	{	429	{
429	pte_t *shadow_pte = get_shadow_pte(pteptr);	430	pte_t *shadow_pte = get_shadow_pte(pteptr);
430		431
@@ -437,7 +438,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
437	pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY;	438	pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY;
438	}	439	}
439	}	440	}
440	#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
441		441
442	/*	442	/*
443	* pgd/pmd/pte query functions	443	* pgd/pmd/pte query functions
@@ -508,7 +508,8 @@ static inline int pte_file(pte_t pte)
508	return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;	508	return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
509	}	509	}
510		510
511	#define pte_same(a,b) (pte_val(a) == pte_val(b))	511	#define __HAVE_ARCH_PTE_SAME
		512	#define pte_same(a,b) (pte_val(a) == pte_val(b))
512		513
513	/*	514	/*
514	* query functions pte_write/pte_dirty/pte_young only work if	515	* query functions pte_write/pte_dirty/pte_young only work if
@@ -663,24 +664,19 @@ static inline pte_t pte_mkyoung(pte_t pte)
663	return pte;	664	return pte;
664	}	665	}
665		666
666	static inline int ptep_test_and_clear_young(struct vm_area_struct vma, unsigned long addr, pte_t ptep)	667	#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
		668	static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
		669	unsigned long addr, pte_t *ptep)
667	{	670	{
668	return 0;	671	return 0;
669	}	672	}
670		673
671	static inline int	674	#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
672	ptep_clear_flush_young(struct vm_area_struct *vma,	675	static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
673	unsigned long address, pte_t *ptep)	676	unsigned long address, pte_t *ptep)
674	{	677	{
675	/* No need to flush TLB; bits are in storage key */	678	/* No need to flush TLB; bits are in storage key */
676	return ptep_test_and_clear_young(vma, address, ptep);	679	return 0;
677	}
678
679	static inline pte_t ptep_get_and_clear(struct mm_struct mm, unsigned long addr, pte_t ptep)
680	{
681	pte_t pte = *ptep;
682	pte_clear(mm, addr, ptep);
683	return pte;
684	}	680	}
685		681
686	static inline void __ptep_ipte(unsigned long address, pte_t *ptep)	682	static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
@@ -709,6 +705,32 @@ static inline void ptep_invalidate(unsigned long address, pte_t *ptep)
709	__ptep_ipte(address, ptep);	705	__ptep_ipte(address, ptep);
710	}	706	}
711		707
		708	/*
		709	* This is hard to understand. ptep_get_and_clear and ptep_clear_flush
		710	* both clear the TLB for the unmapped pte. The reason is that
		711	* ptep_get_and_clear is used in common code (e.g. change_pte_range)
		712	* to modify an active pte. The sequence is
		713	* 1) ptep_get_and_clear
		714	* 2) set_pte_at
		715	* 3) flush_tlb_range
		716	* On s390 the tlb needs to get flushed with the modification of the pte
		717	* if the pte is active. The only way how this can be implemented is to
		718	* have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
		719	* is a nop.
		720	*/
		721	#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
		722	#define ptep_get_and_clear(__mm, __address, __ptep) \
		723	({ \
		724	pte_t __pte = *(__ptep); \
		725	if (atomic_read(&(__mm)->mm_users) > 1 \|\| \
		726	(__mm) != current->active_mm) \
		727	ptep_invalidate(__address, __ptep); \
		728	else \
		729	pte_clear((__mm), (__address), (__ptep)); \
		730	__pte; \
		731	})
		732
		733	#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
712	static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,	734	static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
713	unsigned long address, pte_t *ptep)	735	unsigned long address, pte_t *ptep)
714	{	736	{
@@ -717,12 +739,40 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
717	return pte;	739	return pte;
718	}	740	}
719		741
720	static inline void ptep_set_wrprotect(struct mm_struct mm, unsigned long addr, pte_t ptep)	742	/*
		743	* The batched pte unmap code uses ptep_get_and_clear_full to clear the
		744	* ptes. Here an optimization is possible. tlb_gather_mmu flushes all
		745	* tlbs of an mm if it can guarantee that the ptes of the mm_struct
		746	* cannot be accessed while the batched unmap is running. In this case
		747	* full==1 and a simple pte_clear is enough. See tlb.h.
		748	*/
		749	#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
		750	static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
		751	unsigned long addr,
		752	pte_t *ptep, int full)
721	{	753	{
722	pte_t old_pte = *ptep;	754	pte_t pte = *ptep;
723	set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));	755
		756	if (full)
		757	pte_clear(mm, addr, ptep);
		758	else
		759	ptep_invalidate(addr, ptep);
		760	return pte;
724	}	761	}
725		762
		763	#define __HAVE_ARCH_PTEP_SET_WRPROTECT
		764	#define ptep_set_wrprotect(__mm, __addr, __ptep) \
		765	({ \
		766	pte_t __pte = *(__ptep); \
		767	if (pte_write(__pte)) { \
		768	if (atomic_read(&(__mm)->mm_users) > 1 \|\| \
		769	(__mm) != current->active_mm) \
		770	ptep_invalidate(__addr, __ptep); \
		771	set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \
		772	} \
		773	})
		774
		775	#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
726	#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \	776	#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
727	({ \	777	({ \
728	int __changed = !pte_same(*(__ptep), __entry); \	778	int __changed = !pte_same(*(__ptep), __entry); \
@@ -740,11 +790,13 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
740	* should therefore only be called if it is not mapped in any	790	* should therefore only be called if it is not mapped in any
741	* address space.	791	* address space.
742	*/	792	*/
		793	#define __HAVE_ARCH_PAGE_TEST_DIRTY
743	static inline int page_test_dirty(struct page *page)	794	static inline int page_test_dirty(struct page *page)
744	{	795	{
745	return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0;	796	return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0;
746	}	797	}
747		798
		799	#define __HAVE_ARCH_PAGE_CLEAR_DIRTY
748	static inline void page_clear_dirty(struct page *page)	800	static inline void page_clear_dirty(struct page *page)
749	{	801	{
750	page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY);	802	page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY);
@@ -753,6 +805,7 @@ static inline void page_clear_dirty(struct page *page)
753	/*	805	/*
754	* Test and clear referenced bit in storage key.	806	* Test and clear referenced bit in storage key.
755	*/	807	*/
		808	#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
756	static inline int page_test_and_clear_young(struct page *page)	809	static inline int page_test_and_clear_young(struct page *page)
757	{	810	{
758	unsigned long physpage = page_to_phys(page);	811	unsigned long physpage = page_to_phys(page);
@@ -930,16 +983,6 @@ extern int remove_shared_memory(unsigned long start, unsigned long size);
930	#define __HAVE_ARCH_MEMMAP_INIT	983	#define __HAVE_ARCH_MEMMAP_INIT
931	extern void memmap_init(unsigned long, int, unsigned long, unsigned long);	984	extern void memmap_init(unsigned long, int, unsigned long, unsigned long);
932		985
933	#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
934	#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
935	#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
936	#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
937	#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
938	#define __HAVE_ARCH_PTEP_SET_WRPROTECT
939	#define __HAVE_ARCH_PTE_SAME
940	#define __HAVE_ARCH_PAGE_TEST_DIRTY
941	#define __HAVE_ARCH_PAGE_CLEAR_DIRTY
942	#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
943	#include <asm-generic/pgtable.h>	986	#include <asm-generic/pgtable.h>
944		987
945	#endif /* _S390_PAGE_H */	988	#endif /* _S390_PAGE_H */


diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 51bd957b85bd..55ae45ef31b5 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h
@@ -2,19 +2,128 @@
2	#define _S390_TLB_H	2	#define _S390_TLB_H
3		3
4	/*	4	/*
5	* s390 doesn't need any special per-pte or	5	* TLB flushing on s390 is complicated. The following requirement
6	* per-vma handling..	6	* from the principles of operation is the most arduous:
		7	*
		8	* "A valid table entry must not be changed while it is attached
		9	* to any CPU and may be used for translation by that CPU except to
		10	* (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
		11	* or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
		12	* table entry, or (3) make a change by means of a COMPARE AND SWAP
		13	* AND PURGE instruction that purges the TLB."
		14	*
		15	* The modification of a pte of an active mm struct therefore is
		16	* a two step process: i) invalidate the pte, ii) store the new pte.
		17	* This is true for the page protection bit as well.
		18	* The only possible optimization is to flush at the beginning of
		19	* a tlb_gather_mmu cycle if the mm_struct is currently not in use.
		20	*
		21	* Pages used for the page tables is a different story. FIXME: more
7	*/	22	*/
8	#define tlb_start_vma(tlb, vma) do { } while (0)	23
9	#define tlb_end_vma(tlb, vma) do { } while (0)	24	#include <linux/mm.h>
10	#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)	25	#include <linux/swap.h>
		26	#include <asm/processor.h>
		27	#include <asm/pgalloc.h>
		28	#include <asm/smp.h>
		29	#include <asm/tlbflush.h>
		30
		31	#ifndef CONFIG_SMP
		32	#define TLB_NR_PTRS 1
		33	#else
		34	#define TLB_NR_PTRS 508
		35	#endif
		36
		37	struct mmu_gather {
		38	struct mm_struct *mm;
		39	unsigned int fullmm;
		40	unsigned int nr_ptes;
		41	unsigned int nr_pmds;
		42	void *array[TLB_NR_PTRS];
		43	};
		44
		45	DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
		46
		47	static inline struct mmu_gather tlb_gather_mmu(struct mm_struct mm,
		48	unsigned int full_mm_flush)
		49	{
		50	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
		51
		52	tlb->mm = mm;
		53	tlb->fullmm = full_mm_flush \|\| (num_online_cpus() == 1) \|\|
		54	(atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm);
		55	tlb->nr_ptes = 0;
		56	tlb->nr_pmds = TLB_NR_PTRS;
		57	if (tlb->fullmm)
		58	__tlb_flush_mm(mm);
		59	return tlb;
		60	}
		61
		62	static inline void tlb_flush_mmu(struct mmu_gather *tlb,
		63	unsigned long start, unsigned long end)
		64	{
		65	if (!tlb->fullmm && (tlb->nr_ptes > 0 \|\| tlb->nr_pmds < TLB_NR_PTRS))
		66	__tlb_flush_mm(tlb->mm);
		67	while (tlb->nr_ptes > 0)
		68	pte_free(tlb->array[--tlb->nr_ptes]);
		69	while (tlb->nr_pmds < TLB_NR_PTRS)
		70	pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]);
		71	}
		72
		73	static inline void tlb_finish_mmu(struct mmu_gather *tlb,
		74	unsigned long start, unsigned long end)
		75	{
		76	tlb_flush_mmu(tlb, start, end);
		77
		78	/* keep the page table cache within bounds */
		79	check_pgt_cache();
		80
		81	put_cpu_var(mmu_gathers);
		82	}
11		83
12	/*	84	/*
13	* .. because we flush the whole mm when it	85	* Release the page cache reference for a pte removed by
14	* fills up.	86	* tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page
		87	* has already been freed, so just do free_page_and_swap_cache.
15	*/	88	*/
16	#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)	89	static inline void tlb_remove_page(struct mmu_gather tlb, struct page page)
		90	{
		91	free_page_and_swap_cache(page);
		92	}
17		93
18	#include <asm-generic/tlb.h>	94	/*
		95	* pte_free_tlb frees a pte table and clears the CRSTE for the
		96	* page table from the tlb.
		97	*/
		98	static inline void pte_free_tlb(struct mmu_gather tlb, struct page page)
		99	{
		100	if (!tlb->fullmm) {
		101	tlb->array[tlb->nr_ptes++] = page;
		102	if (tlb->nr_ptes >= tlb->nr_pmds)
		103	tlb_flush_mmu(tlb, 0, 0);
		104	} else
		105	pte_free(page);
		106	}
19		107
		108	/*
		109	* pmd_free_tlb frees a pmd table and clears the CRSTE for the
		110	* segment table entry from the tlb.
		111	*/
		112	static inline void pmd_free_tlb(struct mmu_gather tlb, pmd_t pmd)
		113	{
		114	#ifdef __s390x__
		115	if (!tlb->fullmm) {
		116	tlb->array[--tlb->nr_pmds] = (struct page *) pmd;
		117	if (tlb->nr_ptes >= tlb->nr_pmds)
		118	tlb_flush_mmu(tlb, 0, 0);
		119	} else
		120	pmd_free(pmd);
20	#endif	121	#endif
		122	}
		123
		124	#define tlb_start_vma(tlb, vma) do { } while (0)
		125	#define tlb_end_vma(tlb, vma) do { } while (0)
		126	#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
		127	#define tlb_migrate_finish(mm) do { } while (0)
		128
		129	#endif /* _S390_TLB_H */


diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h index 6de2632a3e4f..3a9985fbc8af 100644 --- a/include/asm-s390/tlbflush.h +++ b/include/asm-s390/tlbflush.h
@@ -6,68 +6,19 @@
6	#include <asm/pgalloc.h>	6	#include <asm/pgalloc.h>
7		7
8	/*	8	/*
9	* TLB flushing:	9	* Flush all tlb entries on the local cpu.
10	*
11	* - flush_tlb() flushes the current mm struct TLBs
12	* - flush_tlb_all() flushes all processes TLBs
13	* - flush_tlb_mm(mm) flushes the specified mm context TLB's
14	* - flush_tlb_page(vma, vmaddr) flushes one page
15	* - flush_tlb_range(vma, start, end) flushes a range of pages
16	* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
17	*/
18
19	/*
20	* S/390 has three ways of flushing TLBs
21	* 'ptlb' does a flush of the local processor
22	* 'csp' flushes the TLBs on all PUs of a SMP
23	* 'ipte' invalidates a pte in a page table and flushes that out of
24	* the TLBs of all PUs of a SMP
25	*/
26
27	#define local_flush_tlb() \
28	do { asm volatile("ptlb": : :"memory"); } while (0)
29
30	#ifndef CONFIG_SMP
31
32	/*
33	* We always need to flush, since s390 does not flush tlb
34	* on each context switch
35	*/	10	*/
36		11	static inline void __tlb_flush_local(void)
37	static inline void flush_tlb(void)
38	{	12	{
39	local_flush_tlb();	13	asm volatile("ptlb" : : : "memory");
40	}	14	}
41	static inline void flush_tlb_all(void)
42	{
43	local_flush_tlb();
44	}
45	static inline void flush_tlb_mm(struct mm_struct *mm)
46	{
47	local_flush_tlb();
48	}
49	static inline void flush_tlb_page(struct vm_area_struct *vma,
50	unsigned long addr)
51	{
52	local_flush_tlb();
53	}
54	static inline void flush_tlb_range(struct vm_area_struct *vma,
55	unsigned long start, unsigned long end)
56	{
57	local_flush_tlb();
58	}
59
60	#define flush_tlb_kernel_range(start, end) \
61	local_flush_tlb();
62
63	#else
64		15
65	#include <asm/smp.h>	16	/*
66		17	* Flush all tlb entries on all cpus.
67	extern void smp_ptlb_all(void);	18	*/
68		19	static inline void __tlb_flush_global(void)
69	static inline void global_flush_tlb(void)
70	{	20	{
		21	extern void smp_ptlb_all(void);
71	register unsigned long reg2 asm("2");	22	register unsigned long reg2 asm("2");
72	register unsigned long reg3 asm("3");	23	register unsigned long reg3 asm("3");
73	register unsigned long reg4 asm("4");	24	register unsigned long reg4 asm("4");
@@ -89,66 +40,75 @@ static inline void global_flush_tlb(void)
89	}	40	}
90		41
91	/*	42	/*
92	* We only have to do global flush of tlb if process run since last	43	* Flush all tlb entries of a page table on all cpus.
93	* flush on any other pu than current.
94	* If we have threads (mm->count > 1) we always do a global flush,
95	* since the process runs on more than one processor at the same time.
96	*/	44	*/
		45	static inline void __tlb_flush_idte(pgd_t *pgd)
		46	{
		47	asm volatile(
		48	" .insn rrf,0xb98e0000,0,%0,%1,0"
		49	: : "a" (2048), "a" (__pa(pgd) & PAGE_MASK) : "cc" );
		50	}
97		51
98	static inline void __flush_tlb_mm(struct mm_struct * mm)	52	static inline void __tlb_flush_mm(struct mm_struct * mm)
99	{	53	{
100	cpumask_t local_cpumask;	54	cpumask_t local_cpumask;
101		55
102	if (unlikely(cpus_empty(mm->cpu_vm_mask)))	56	if (unlikely(cpus_empty(mm->cpu_vm_mask)))
103	return;	57	return;
		58	/*
		59	* If the machine has IDTE we prefer to do a per mm flush
		60	* on all cpus instead of doing a local flush if the mm
		61	* only ran on the local cpu.
		62	*/
104	if (MACHINE_HAS_IDTE) {	63	if (MACHINE_HAS_IDTE) {
105	pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd);	64	pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd);
106		65
107	if (shadow_pgd) {	66	if (shadow_pgd)
108	asm volatile(	67	__tlb_flush_idte(shadow_pgd);
109	" .insn rrf,0xb98e0000,0,%0,%1,0"	68	__tlb_flush_idte(mm->pgd);
110	: : "a" (2048),
111	"a" (__pa(shadow_pgd) & PAGE_MASK) : "cc" );
112	}
113	asm volatile(
114	" .insn rrf,0xb98e0000,0,%0,%1,0"
115	: : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc");
116	return;	69	return;
117	}	70	}
118	preempt_disable();	71	preempt_disable();
		72	/*
		73	* If the process only ran on the local cpu, do a local flush.
		74	*/
119	local_cpumask = cpumask_of_cpu(smp_processor_id());	75	local_cpumask = cpumask_of_cpu(smp_processor_id());
120	if (cpus_equal(mm->cpu_vm_mask, local_cpumask))	76	if (cpus_equal(mm->cpu_vm_mask, local_cpumask))
121	local_flush_tlb();	77	__tlb_flush_local();
122	else	78	else
123	global_flush_tlb();	79	__tlb_flush_global();
124	preempt_enable();	80	preempt_enable();
125	}	81	}
126		82
127	static inline void flush_tlb(void)	83	static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
128	{
129	__flush_tlb_mm(current->mm);
130	}
131	static inline void flush_tlb_all(void)
132	{
133	global_flush_tlb();
134	}
135	static inline void flush_tlb_mm(struct mm_struct *mm)
136	{
137	__flush_tlb_mm(mm);
138	}
139	static inline void flush_tlb_page(struct vm_area_struct *vma,
140	unsigned long addr)
141	{
142	__flush_tlb_mm(vma->vm_mm);
143	}
144	static inline void flush_tlb_range(struct vm_area_struct *vma,
145	unsigned long start, unsigned long end)
146	{	84	{
147	__flush_tlb_mm(vma->vm_mm);	85	if (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm)
		86	__tlb_flush_mm(mm);
148	}	87	}
149		88
150	#define flush_tlb_kernel_range(start, end) global_flush_tlb()	89	/*
		90	* TLB flushing:
		91	* flush_tlb() - flushes the current mm struct TLBs
		92	* flush_tlb_all() - flushes all processes TLBs
		93	* flush_tlb_mm(mm) - flushes the specified mm context TLB's
		94	* flush_tlb_page(vma, vmaddr) - flushes one page
		95	* flush_tlb_range(vma, start, end) - flushes a range of pages
		96	* flush_tlb_kernel_range(start, end) - flushes a range of kernel pages
		97	*/
151		98
152	#endif	99	/*
		100	* flush_tlb_mm goes together with ptep_set_wrprotect for the
		101	* copy_page_range operation and flush_tlb_range is related to
		102	* ptep_get_and_clear for change_protection. ptep_set_wrprotect and
		103	* ptep_get_and_clear do not flush the TLBs directly if the mm has
		104	* only one user. At the end of the update the flush_tlb_mm and
		105	* flush_tlb_range functions need to do the flush.
		106	*/
		107	#define flush_tlb() do { } while (0)
		108	#define flush_tlb_all() do { } while (0)
		109	#define flush_tlb_mm(mm) __tlb_flush_mm_cond(mm)
		110	#define flush_tlb_page(vma, addr) do { } while (0)
		111	#define flush_tlb_range(vma, start, end) __tlb_flush_mm_cond(mm)
		112	#define flush_tlb_kernel_range(start, end) __tlb_flush_mm(&init_mm)
153		113
154	#endif /* _S390_TLBFLUSH_H */	114	#endif /* _S390_TLBFLUSH_H */