aboutsummaryrefslogtreecommitdiffstats
path: root/include/asm-s390/tlb.h
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2007-10-22 06:52:44 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2007-10-22 06:52:48 -0400
commitba8a9229ab9e80278c28ad68b15053f65b2b0a7c (patch)
treed73e4f7d352d3b3edf8888973528cb7dd3e953f9 /include/asm-s390/tlb.h
parente3d3683d1402c1737687cb698451d545f57c32a7 (diff)
[S390] tlb flush fix.
The current tlb flushing code for page table entries violates the s390 architecture in a small detail. The relevant section from the principles of operation (SA22-7832-02 page 3-47): "A valid table entry must not be changed while it is attached to any CPU and may be used for translation by that CPU except to (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page-table entry, or (3) make a change by means of a COMPARE AND SWAP AND PURGE instruction that purges the TLB." That means if one thread of a multithreaded applciation uses a vma while another thread does an unmap on it, the page table entries of that vma needs to get removed with IPTE, IDTE or CSP. In some strange and rare situations a cpu could check-stop (die) because a entry has been pushed out of the TLB that is still needed to complete a (milli-coded) instruction. I've never seen it happen with the current code on any of the supported machines, so right now this is a theoretical problem. But I want to fix it nevertheless, to avoid headaches in the futures. To get this implemented correctly without changing common code the primitives ptep_get_and_clear, ptep_get_and_clear_full and ptep_set_wrprotect need to use the IPTE instruction to invalidate the pte before the new pte value gets stored. If IPTE is always used for the three primitives three important operations will have a performace hit: fork, mprotect and exit_mmap. Time for some workarounds: * 1: ptep_get_and_clear_full is used in unmap_vmas to remove page tables entries in a batched tlb gather operation. If the mmu_gather context passed to unmap_vmas has been started with full_mm_flush==1 or if only one cpu is online or if the only user of a mm_struct is the current process then the fullmm indication in the mmu_gather context is set to one. All TLBs for mm_struct are flushed by the tlb_gather_mmu call. No new TLBs can be created while the unmap is in progress. In this case ptep_get_and_clear_full clears the ptes with a simple store. * 2: ptep_get_and_clear is used in change_protection to clear the ptes from the page tables before they are reentered with the new access flags. At the end of the update flush_tlb_range clears the remaining TLBs. In general the ptep_get_and_clear has to issue IPTE for each pte and flush_tlb_range is a nop. But if there is only one user of the mm_struct then ptep_get_and_clear uses simple stores to do the update and flush_tlb_range will flush the TLBs. * 3: Similar to 2, ptep_set_wrprotect is used in copy_page_range for a fork to make all ptes of a cow mapping read-only. At the end of of copy_page_range dup_mmap will flush the TLBs with a call to flush_tlb_mm. Check for mm->mm_users and if there is only one user avoid using IPTE in ptep_set_wrprotect and let flush_tlb_mm clear the TLBs. Overall for single threaded programs the tlb flush code now performs better, for multi threaded programs it is slightly worse. In particular exit_mmap() now does a single IDTE for the mm and then just frees every page cache reference and every page table page directly without a delay over the mmu_gather structure. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'include/asm-s390/tlb.h')
-rw-r--r--include/asm-s390/tlb.h127
1 files changed, 118 insertions, 9 deletions
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h
index 51bd957b85bd..55ae45ef31b5 100644
--- a/include/asm-s390/tlb.h
+++ b/include/asm-s390/tlb.h
@@ -2,19 +2,128 @@
2#define _S390_TLB_H 2#define _S390_TLB_H
3 3
4/* 4/*
5 * s390 doesn't need any special per-pte or 5 * TLB flushing on s390 is complicated. The following requirement
6 * per-vma handling.. 6 * from the principles of operation is the most arduous:
7 *
8 * "A valid table entry must not be changed while it is attached
9 * to any CPU and may be used for translation by that CPU except to
10 * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
11 * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
12 * table entry, or (3) make a change by means of a COMPARE AND SWAP
13 * AND PURGE instruction that purges the TLB."
14 *
15 * The modification of a pte of an active mm struct therefore is
16 * a two step process: i) invalidate the pte, ii) store the new pte.
17 * This is true for the page protection bit as well.
18 * The only possible optimization is to flush at the beginning of
19 * a tlb_gather_mmu cycle if the mm_struct is currently not in use.
20 *
21 * Pages used for the page tables is a different story. FIXME: more
7 */ 22 */
8#define tlb_start_vma(tlb, vma) do { } while (0) 23
9#define tlb_end_vma(tlb, vma) do { } while (0) 24#include <linux/mm.h>
10#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) 25#include <linux/swap.h>
26#include <asm/processor.h>
27#include <asm/pgalloc.h>
28#include <asm/smp.h>
29#include <asm/tlbflush.h>
30
31#ifndef CONFIG_SMP
32#define TLB_NR_PTRS 1
33#else
34#define TLB_NR_PTRS 508
35#endif
36
37struct mmu_gather {
38 struct mm_struct *mm;
39 unsigned int fullmm;
40 unsigned int nr_ptes;
41 unsigned int nr_pmds;
42 void *array[TLB_NR_PTRS];
43};
44
45DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
46
47static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm,
48 unsigned int full_mm_flush)
49{
50 struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
51
52 tlb->mm = mm;
53 tlb->fullmm = full_mm_flush || (num_online_cpus() == 1) ||
54 (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm);
55 tlb->nr_ptes = 0;
56 tlb->nr_pmds = TLB_NR_PTRS;
57 if (tlb->fullmm)
58 __tlb_flush_mm(mm);
59 return tlb;
60}
61
62static inline void tlb_flush_mmu(struct mmu_gather *tlb,
63 unsigned long start, unsigned long end)
64{
65 if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pmds < TLB_NR_PTRS))
66 __tlb_flush_mm(tlb->mm);
67 while (tlb->nr_ptes > 0)
68 pte_free(tlb->array[--tlb->nr_ptes]);
69 while (tlb->nr_pmds < TLB_NR_PTRS)
70 pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]);
71}
72
73static inline void tlb_finish_mmu(struct mmu_gather *tlb,
74 unsigned long start, unsigned long end)
75{
76 tlb_flush_mmu(tlb, start, end);
77
78 /* keep the page table cache within bounds */
79 check_pgt_cache();
80
81 put_cpu_var(mmu_gathers);
82}
11 83
12/* 84/*
13 * .. because we flush the whole mm when it 85 * Release the page cache reference for a pte removed by
14 * fills up. 86 * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page
87 * has already been freed, so just do free_page_and_swap_cache.
15 */ 88 */
16#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) 89static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
90{
91 free_page_and_swap_cache(page);
92}
17 93
18#include <asm-generic/tlb.h> 94/*
95 * pte_free_tlb frees a pte table and clears the CRSTE for the
96 * page table from the tlb.
97 */
98static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page)
99{
100 if (!tlb->fullmm) {
101 tlb->array[tlb->nr_ptes++] = page;
102 if (tlb->nr_ptes >= tlb->nr_pmds)
103 tlb_flush_mmu(tlb, 0, 0);
104 } else
105 pte_free(page);
106}
19 107
108/*
109 * pmd_free_tlb frees a pmd table and clears the CRSTE for the
110 * segment table entry from the tlb.
111 */
112static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
113{
114#ifdef __s390x__
115 if (!tlb->fullmm) {
116 tlb->array[--tlb->nr_pmds] = (struct page *) pmd;
117 if (tlb->nr_ptes >= tlb->nr_pmds)
118 tlb_flush_mmu(tlb, 0, 0);
119 } else
120 pmd_free(pmd);
20#endif 121#endif
122}
123
124#define tlb_start_vma(tlb, vma) do { } while (0)
125#define tlb_end_vma(tlb, vma) do { } while (0)
126#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
127#define tlb_migrate_finish(mm) do { } while (0)
128
129#endif /* _S390_TLB_H */