aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2007-10-22 06:52:44 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2007-10-22 06:52:48 -0400
commitba8a9229ab9e80278c28ad68b15053f65b2b0a7c (patch)
treed73e4f7d352d3b3edf8888973528cb7dd3e953f9 /include
parente3d3683d1402c1737687cb698451d545f57c32a7 (diff)
[S390] tlb flush fix.
The current tlb flushing code for page table entries violates the s390 architecture in a small detail. The relevant section from the principles of operation (SA22-7832-02 page 3-47): "A valid table entry must not be changed while it is attached to any CPU and may be used for translation by that CPU except to (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page-table entry, or (3) make a change by means of a COMPARE AND SWAP AND PURGE instruction that purges the TLB." That means if one thread of a multithreaded applciation uses a vma while another thread does an unmap on it, the page table entries of that vma needs to get removed with IPTE, IDTE or CSP. In some strange and rare situations a cpu could check-stop (die) because a entry has been pushed out of the TLB that is still needed to complete a (milli-coded) instruction. I've never seen it happen with the current code on any of the supported machines, so right now this is a theoretical problem. But I want to fix it nevertheless, to avoid headaches in the futures. To get this implemented correctly without changing common code the primitives ptep_get_and_clear, ptep_get_and_clear_full and ptep_set_wrprotect need to use the IPTE instruction to invalidate the pte before the new pte value gets stored. If IPTE is always used for the three primitives three important operations will have a performace hit: fork, mprotect and exit_mmap. Time for some workarounds: * 1: ptep_get_and_clear_full is used in unmap_vmas to remove page tables entries in a batched tlb gather operation. If the mmu_gather context passed to unmap_vmas has been started with full_mm_flush==1 or if only one cpu is online or if the only user of a mm_struct is the current process then the fullmm indication in the mmu_gather context is set to one. All TLBs for mm_struct are flushed by the tlb_gather_mmu call. No new TLBs can be created while the unmap is in progress. In this case ptep_get_and_clear_full clears the ptes with a simple store. * 2: ptep_get_and_clear is used in change_protection to clear the ptes from the page tables before they are reentered with the new access flags. At the end of the update flush_tlb_range clears the remaining TLBs. In general the ptep_get_and_clear has to issue IPTE for each pte and flush_tlb_range is a nop. But if there is only one user of the mm_struct then ptep_get_and_clear uses simple stores to do the update and flush_tlb_range will flush the TLBs. * 3: Similar to 2, ptep_set_wrprotect is used in copy_page_range for a fork to make all ptes of a cow mapping read-only. At the end of of copy_page_range dup_mmap will flush the TLBs with a call to flush_tlb_mm. Check for mm->mm_users and if there is only one user avoid using IPTE in ptep_set_wrprotect and let flush_tlb_mm clear the TLBs. Overall for single threaded programs the tlb flush code now performs better, for multi threaded programs it is slightly worse. In particular exit_mmap() now does a single IDTE for the mm and then just frees every page cache reference and every page table page directly without a delay over the mmu_gather structure. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'include')
-rw-r--r--include/asm-s390/pgalloc.h17
-rw-r--r--include/asm-s390/pgtable.h99
-rw-r--r--include/asm-s390/tlb.h127
-rw-r--r--include/asm-s390/tlbflush.h150
4 files changed, 244 insertions, 149 deletions
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index e45d3c9a4b7e..6cbbfe4f6749 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -82,7 +82,6 @@ static inline void pgd_free(pgd_t *pgd)
82 */ 82 */
83#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) 83#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
84#define pmd_free(x) do { } while (0) 84#define pmd_free(x) do { } while (0)
85#define __pmd_free_tlb(tlb,x) do { } while (0)
86#define pgd_populate(mm, pmd, pte) BUG() 85#define pgd_populate(mm, pmd, pte) BUG()
87#define pgd_populate_kernel(mm, pmd, pte) BUG() 86#define pgd_populate_kernel(mm, pmd, pte) BUG()
88#else /* __s390x__ */ 87#else /* __s390x__ */
@@ -118,12 +117,6 @@ static inline void pmd_free (pmd_t *pmd)
118 free_pages((unsigned long) pmd, PMD_ALLOC_ORDER); 117 free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);
119} 118}
120 119
121#define __pmd_free_tlb(tlb,pmd) \
122 do { \
123 tlb_flush_mmu(tlb, 0, 0); \
124 pmd_free(pmd); \
125 } while (0)
126
127static inline void 120static inline void
128pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) 121pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
129{ 122{
@@ -224,14 +217,4 @@ static inline void pte_free(struct page *pte)
224 __free_page(pte); 217 __free_page(pte);
225} 218}
226 219
227#define __pte_free_tlb(tlb, pte) \
228({ \
229 struct mmu_gather *__tlb = (tlb); \
230 struct page *__pte = (pte); \
231 struct page *shadow_page = get_shadow_page(__pte); \
232 if (shadow_page) \
233 tlb_remove_page(__tlb, shadow_page); \
234 tlb_remove_page(__tlb, __pte); \
235})
236
237#endif /* _S390_PGALLOC_H */ 220#endif /* _S390_PGALLOC_H */
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 39bb5192dc31..b424ab21f8bd 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -424,7 +424,8 @@ static inline pgd_t *get_shadow_pgd(pgd_t *pgdp)
424 * within a page table are directly modified. Thus, the following 424 * within a page table are directly modified. Thus, the following
425 * hook is made available. 425 * hook is made available.
426 */ 426 */
427static inline void set_pte(pte_t *pteptr, pte_t pteval) 427static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
428 pte_t *pteptr, pte_t pteval)
428{ 429{
429 pte_t *shadow_pte = get_shadow_pte(pteptr); 430 pte_t *shadow_pte = get_shadow_pte(pteptr);
430 431
@@ -437,7 +438,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
437 pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY; 438 pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY;
438 } 439 }
439} 440}
440#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
441 441
442/* 442/*
443 * pgd/pmd/pte query functions 443 * pgd/pmd/pte query functions
@@ -508,7 +508,8 @@ static inline int pte_file(pte_t pte)
508 return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; 508 return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
509} 509}
510 510
511#define pte_same(a,b) (pte_val(a) == pte_val(b)) 511#define __HAVE_ARCH_PTE_SAME
512#define pte_same(a,b) (pte_val(a) == pte_val(b))
512 513
513/* 514/*
514 * query functions pte_write/pte_dirty/pte_young only work if 515 * query functions pte_write/pte_dirty/pte_young only work if
@@ -663,24 +664,19 @@ static inline pte_t pte_mkyoung(pte_t pte)
663 return pte; 664 return pte;
664} 665}
665 666
666static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 667#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
668static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
669 unsigned long addr, pte_t *ptep)
667{ 670{
668 return 0; 671 return 0;
669} 672}
670 673
671static inline int 674#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
672ptep_clear_flush_young(struct vm_area_struct *vma, 675static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
673 unsigned long address, pte_t *ptep) 676 unsigned long address, pte_t *ptep)
674{ 677{
675 /* No need to flush TLB; bits are in storage key */ 678 /* No need to flush TLB; bits are in storage key */
676 return ptep_test_and_clear_young(vma, address, ptep); 679 return 0;
677}
678
679static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
680{
681 pte_t pte = *ptep;
682 pte_clear(mm, addr, ptep);
683 return pte;
684} 680}
685 681
686static inline void __ptep_ipte(unsigned long address, pte_t *ptep) 682static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
@@ -709,6 +705,32 @@ static inline void ptep_invalidate(unsigned long address, pte_t *ptep)
709 __ptep_ipte(address, ptep); 705 __ptep_ipte(address, ptep);
710} 706}
711 707
708/*
709 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
710 * both clear the TLB for the unmapped pte. The reason is that
711 * ptep_get_and_clear is used in common code (e.g. change_pte_range)
712 * to modify an active pte. The sequence is
713 * 1) ptep_get_and_clear
714 * 2) set_pte_at
715 * 3) flush_tlb_range
716 * On s390 the tlb needs to get flushed with the modification of the pte
717 * if the pte is active. The only way how this can be implemented is to
718 * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
719 * is a nop.
720 */
721#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
722#define ptep_get_and_clear(__mm, __address, __ptep) \
723({ \
724 pte_t __pte = *(__ptep); \
725 if (atomic_read(&(__mm)->mm_users) > 1 || \
726 (__mm) != current->active_mm) \
727 ptep_invalidate(__address, __ptep); \
728 else \
729 pte_clear((__mm), (__address), (__ptep)); \
730 __pte; \
731})
732
733#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
712static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, 734static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
713 unsigned long address, pte_t *ptep) 735 unsigned long address, pte_t *ptep)
714{ 736{
@@ -717,12 +739,40 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
717 return pte; 739 return pte;
718} 740}
719 741
720static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 742/*
743 * The batched pte unmap code uses ptep_get_and_clear_full to clear the
744 * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
745 * tlbs of an mm if it can guarantee that the ptes of the mm_struct
746 * cannot be accessed while the batched unmap is running. In this case
747 * full==1 and a simple pte_clear is enough. See tlb.h.
748 */
749#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
750static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
751 unsigned long addr,
752 pte_t *ptep, int full)
721{ 753{
722 pte_t old_pte = *ptep; 754 pte_t pte = *ptep;
723 set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); 755
756 if (full)
757 pte_clear(mm, addr, ptep);
758 else
759 ptep_invalidate(addr, ptep);
760 return pte;
724} 761}
725 762
763#define __HAVE_ARCH_PTEP_SET_WRPROTECT
764#define ptep_set_wrprotect(__mm, __addr, __ptep) \
765({ \
766 pte_t __pte = *(__ptep); \
767 if (pte_write(__pte)) { \
768 if (atomic_read(&(__mm)->mm_users) > 1 || \
769 (__mm) != current->active_mm) \
770 ptep_invalidate(__addr, __ptep); \
771 set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \
772 } \
773})
774
775#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
726#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ 776#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
727({ \ 777({ \
728 int __changed = !pte_same(*(__ptep), __entry); \ 778 int __changed = !pte_same(*(__ptep), __entry); \
@@ -740,11 +790,13 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
740 * should therefore only be called if it is not mapped in any 790 * should therefore only be called if it is not mapped in any
741 * address space. 791 * address space.
742 */ 792 */
793#define __HAVE_ARCH_PAGE_TEST_DIRTY
743static inline int page_test_dirty(struct page *page) 794static inline int page_test_dirty(struct page *page)
744{ 795{
745 return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; 796 return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0;
746} 797}
747 798
799#define __HAVE_ARCH_PAGE_CLEAR_DIRTY
748static inline void page_clear_dirty(struct page *page) 800static inline void page_clear_dirty(struct page *page)
749{ 801{
750 page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY); 802 page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY);
@@ -753,6 +805,7 @@ static inline void page_clear_dirty(struct page *page)
753/* 805/*
754 * Test and clear referenced bit in storage key. 806 * Test and clear referenced bit in storage key.
755 */ 807 */
808#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
756static inline int page_test_and_clear_young(struct page *page) 809static inline int page_test_and_clear_young(struct page *page)
757{ 810{
758 unsigned long physpage = page_to_phys(page); 811 unsigned long physpage = page_to_phys(page);
@@ -930,16 +983,6 @@ extern int remove_shared_memory(unsigned long start, unsigned long size);
930#define __HAVE_ARCH_MEMMAP_INIT 983#define __HAVE_ARCH_MEMMAP_INIT
931extern void memmap_init(unsigned long, int, unsigned long, unsigned long); 984extern void memmap_init(unsigned long, int, unsigned long, unsigned long);
932 985
933#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
934#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
935#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
936#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
937#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
938#define __HAVE_ARCH_PTEP_SET_WRPROTECT
939#define __HAVE_ARCH_PTE_SAME
940#define __HAVE_ARCH_PAGE_TEST_DIRTY
941#define __HAVE_ARCH_PAGE_CLEAR_DIRTY
942#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
943#include <asm-generic/pgtable.h> 986#include <asm-generic/pgtable.h>
944 987
945#endif /* _S390_PAGE_H */ 988#endif /* _S390_PAGE_H */
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h
index 51bd957b85bd..55ae45ef31b5 100644
--- a/include/asm-s390/tlb.h
+++ b/include/asm-s390/tlb.h
@@ -2,19 +2,128 @@
2#define _S390_TLB_H 2#define _S390_TLB_H
3 3
4/* 4/*
5 * s390 doesn't need any special per-pte or 5 * TLB flushing on s390 is complicated. The following requirement
6 * per-vma handling.. 6 * from the principles of operation is the most arduous:
7 *
8 * "A valid table entry must not be changed while it is attached
9 * to any CPU and may be used for translation by that CPU except to
10 * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
11 * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
12 * table entry, or (3) make a change by means of a COMPARE AND SWAP
13 * AND PURGE instruction that purges the TLB."
14 *
15 * The modification of a pte of an active mm struct therefore is
16 * a two step process: i) invalidate the pte, ii) store the new pte.
17 * This is true for the page protection bit as well.
18 * The only possible optimization is to flush at the beginning of
19 * a tlb_gather_mmu cycle if the mm_struct is currently not in use.
20 *
21 * Pages used for the page tables is a different story. FIXME: more
7 */ 22 */
8#define tlb_start_vma(tlb, vma) do { } while (0) 23
9#define tlb_end_vma(tlb, vma) do { } while (0) 24#include <linux/mm.h>
10#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) 25#include <linux/swap.h>
26#include <asm/processor.h>
27#include <asm/pgalloc.h>
28#include <asm/smp.h>
29#include <asm/tlbflush.h>
30
31#ifndef CONFIG_SMP
32#define TLB_NR_PTRS 1
33#else
34#define TLB_NR_PTRS 508
35#endif
36
37struct mmu_gather {
38 struct mm_struct *mm;
39 unsigned int fullmm;
40 unsigned int nr_ptes;
41 unsigned int nr_pmds;
42 void *array[TLB_NR_PTRS];
43};
44
45DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
46
47static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm,
48 unsigned int full_mm_flush)
49{
50 struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
51
52 tlb->mm = mm;
53 tlb->fullmm = full_mm_flush || (num_online_cpus() == 1) ||
54 (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm);
55 tlb->nr_ptes = 0;
56 tlb->nr_pmds = TLB_NR_PTRS;
57 if (tlb->fullmm)
58 __tlb_flush_mm(mm);
59 return tlb;
60}
61
62static inline void tlb_flush_mmu(struct mmu_gather *tlb,
63 unsigned long start, unsigned long end)
64{
65 if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pmds < TLB_NR_PTRS))
66 __tlb_flush_mm(tlb->mm);
67 while (tlb->nr_ptes > 0)
68 pte_free(tlb->array[--tlb->nr_ptes]);
69 while (tlb->nr_pmds < TLB_NR_PTRS)
70 pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]);
71}
72
73static inline void tlb_finish_mmu(struct mmu_gather *tlb,
74 unsigned long start, unsigned long end)
75{
76 tlb_flush_mmu(tlb, start, end);
77
78 /* keep the page table cache within bounds */
79 check_pgt_cache();
80
81 put_cpu_var(mmu_gathers);
82}
11 83
12/* 84/*
13 * .. because we flush the whole mm when it 85 * Release the page cache reference for a pte removed by
14 * fills up. 86 * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page
87 * has already been freed, so just do free_page_and_swap_cache.
15 */ 88 */
16#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) 89static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
90{
91 free_page_and_swap_cache(page);
92}
17 93
18#include <asm-generic/tlb.h> 94/*
95 * pte_free_tlb frees a pte table and clears the CRSTE for the
96 * page table from the tlb.
97 */
98static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page)
99{
100 if (!tlb->fullmm) {
101 tlb->array[tlb->nr_ptes++] = page;
102 if (tlb->nr_ptes >= tlb->nr_pmds)
103 tlb_flush_mmu(tlb, 0, 0);
104 } else
105 pte_free(page);
106}
19 107
108/*
109 * pmd_free_tlb frees a pmd table and clears the CRSTE for the
110 * segment table entry from the tlb.
111 */
112static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
113{
114#ifdef __s390x__
115 if (!tlb->fullmm) {
116 tlb->array[--tlb->nr_pmds] = (struct page *) pmd;
117 if (tlb->nr_ptes >= tlb->nr_pmds)
118 tlb_flush_mmu(tlb, 0, 0);
119 } else
120 pmd_free(pmd);
20#endif 121#endif
122}
123
124#define tlb_start_vma(tlb, vma) do { } while (0)
125#define tlb_end_vma(tlb, vma) do { } while (0)
126#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
127#define tlb_migrate_finish(mm) do { } while (0)
128
129#endif /* _S390_TLB_H */
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 6de2632a3e4f..3a9985fbc8af 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -6,68 +6,19 @@
6#include <asm/pgalloc.h> 6#include <asm/pgalloc.h>
7 7
8/* 8/*
9 * TLB flushing: 9 * Flush all tlb entries on the local cpu.
10 *
11 * - flush_tlb() flushes the current mm struct TLBs
12 * - flush_tlb_all() flushes all processes TLBs
13 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
14 * - flush_tlb_page(vma, vmaddr) flushes one page
15 * - flush_tlb_range(vma, start, end) flushes a range of pages
16 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
17 */
18
19/*
20 * S/390 has three ways of flushing TLBs
21 * 'ptlb' does a flush of the local processor
22 * 'csp' flushes the TLBs on all PUs of a SMP
23 * 'ipte' invalidates a pte in a page table and flushes that out of
24 * the TLBs of all PUs of a SMP
25 */
26
27#define local_flush_tlb() \
28do { asm volatile("ptlb": : :"memory"); } while (0)
29
30#ifndef CONFIG_SMP
31
32/*
33 * We always need to flush, since s390 does not flush tlb
34 * on each context switch
35 */ 10 */
36 11static inline void __tlb_flush_local(void)
37static inline void flush_tlb(void)
38{ 12{
39 local_flush_tlb(); 13 asm volatile("ptlb" : : : "memory");
40} 14}
41static inline void flush_tlb_all(void)
42{
43 local_flush_tlb();
44}
45static inline void flush_tlb_mm(struct mm_struct *mm)
46{
47 local_flush_tlb();
48}
49static inline void flush_tlb_page(struct vm_area_struct *vma,
50 unsigned long addr)
51{
52 local_flush_tlb();
53}
54static inline void flush_tlb_range(struct vm_area_struct *vma,
55 unsigned long start, unsigned long end)
56{
57 local_flush_tlb();
58}
59
60#define flush_tlb_kernel_range(start, end) \
61 local_flush_tlb();
62
63#else
64 15
65#include <asm/smp.h> 16/*
66 17 * Flush all tlb entries on all cpus.
67extern void smp_ptlb_all(void); 18 */
68 19static inline void __tlb_flush_global(void)
69static inline void global_flush_tlb(void)
70{ 20{
21 extern void smp_ptlb_all(void);
71 register unsigned long reg2 asm("2"); 22 register unsigned long reg2 asm("2");
72 register unsigned long reg3 asm("3"); 23 register unsigned long reg3 asm("3");
73 register unsigned long reg4 asm("4"); 24 register unsigned long reg4 asm("4");
@@ -89,66 +40,75 @@ static inline void global_flush_tlb(void)
89} 40}
90 41
91/* 42/*
92 * We only have to do global flush of tlb if process run since last 43 * Flush all tlb entries of a page table on all cpus.
93 * flush on any other pu than current.
94 * If we have threads (mm->count > 1) we always do a global flush,
95 * since the process runs on more than one processor at the same time.
96 */ 44 */
45static inline void __tlb_flush_idte(pgd_t *pgd)
46{
47 asm volatile(
48 " .insn rrf,0xb98e0000,0,%0,%1,0"
49 : : "a" (2048), "a" (__pa(pgd) & PAGE_MASK) : "cc" );
50}
97 51
98static inline void __flush_tlb_mm(struct mm_struct * mm) 52static inline void __tlb_flush_mm(struct mm_struct * mm)
99{ 53{
100 cpumask_t local_cpumask; 54 cpumask_t local_cpumask;
101 55
102 if (unlikely(cpus_empty(mm->cpu_vm_mask))) 56 if (unlikely(cpus_empty(mm->cpu_vm_mask)))
103 return; 57 return;
58 /*
59 * If the machine has IDTE we prefer to do a per mm flush
60 * on all cpus instead of doing a local flush if the mm
61 * only ran on the local cpu.
62 */
104 if (MACHINE_HAS_IDTE) { 63 if (MACHINE_HAS_IDTE) {
105 pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd); 64 pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd);
106 65
107 if (shadow_pgd) { 66 if (shadow_pgd)
108 asm volatile( 67 __tlb_flush_idte(shadow_pgd);
109 " .insn rrf,0xb98e0000,0,%0,%1,0" 68 __tlb_flush_idte(mm->pgd);
110 : : "a" (2048),
111 "a" (__pa(shadow_pgd) & PAGE_MASK) : "cc" );
112 }
113 asm volatile(
114 " .insn rrf,0xb98e0000,0,%0,%1,0"
115 : : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc");
116 return; 69 return;
117 } 70 }
118 preempt_disable(); 71 preempt_disable();
72 /*
73 * If the process only ran on the local cpu, do a local flush.
74 */
119 local_cpumask = cpumask_of_cpu(smp_processor_id()); 75 local_cpumask = cpumask_of_cpu(smp_processor_id());
120 if (cpus_equal(mm->cpu_vm_mask, local_cpumask)) 76 if (cpus_equal(mm->cpu_vm_mask, local_cpumask))
121 local_flush_tlb(); 77 __tlb_flush_local();
122 else 78 else
123 global_flush_tlb(); 79 __tlb_flush_global();
124 preempt_enable(); 80 preempt_enable();
125} 81}
126 82
127static inline void flush_tlb(void) 83static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
128{
129 __flush_tlb_mm(current->mm);
130}
131static inline void flush_tlb_all(void)
132{
133 global_flush_tlb();
134}
135static inline void flush_tlb_mm(struct mm_struct *mm)
136{
137 __flush_tlb_mm(mm);
138}
139static inline void flush_tlb_page(struct vm_area_struct *vma,
140 unsigned long addr)
141{
142 __flush_tlb_mm(vma->vm_mm);
143}
144static inline void flush_tlb_range(struct vm_area_struct *vma,
145 unsigned long start, unsigned long end)
146{ 84{
147 __flush_tlb_mm(vma->vm_mm); 85 if (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm)
86 __tlb_flush_mm(mm);
148} 87}
149 88
150#define flush_tlb_kernel_range(start, end) global_flush_tlb() 89/*
90 * TLB flushing:
91 * flush_tlb() - flushes the current mm struct TLBs
92 * flush_tlb_all() - flushes all processes TLBs
93 * flush_tlb_mm(mm) - flushes the specified mm context TLB's
94 * flush_tlb_page(vma, vmaddr) - flushes one page
95 * flush_tlb_range(vma, start, end) - flushes a range of pages
96 * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages
97 */
151 98
152#endif 99/*
100 * flush_tlb_mm goes together with ptep_set_wrprotect for the
101 * copy_page_range operation and flush_tlb_range is related to
102 * ptep_get_and_clear for change_protection. ptep_set_wrprotect and
103 * ptep_get_and_clear do not flush the TLBs directly if the mm has
104 * only one user. At the end of the update the flush_tlb_mm and
105 * flush_tlb_range functions need to do the flush.
106 */
107#define flush_tlb() do { } while (0)
108#define flush_tlb_all() do { } while (0)
109#define flush_tlb_mm(mm) __tlb_flush_mm_cond(mm)
110#define flush_tlb_page(vma, addr) do { } while (0)
111#define flush_tlb_range(vma, start, end) __tlb_flush_mm_cond(mm)
112#define flush_tlb_kernel_range(start, end) __tlb_flush_mm(&init_mm)
153 113
154#endif /* _S390_TLBFLUSH_H */ 114#endif /* _S390_TLBFLUSH_H */