aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2014-10-29 06:03:09 -0400
committerWill Deacon <will.deacon@arm.com>2014-11-17 05:12:42 -0500
commitfb7332a9fedfd62b1ba6530c86f39f0fa38afd49 (patch)
tree5e77bd4944da750634c4438df64257cdeaa58888
parent63648dd20fa0780ab6c1e923b5c276d257422cb3 (diff)
mmu_gather: move minimal range calculations into generic code
On architectures with hardware broadcasting of TLB invalidation messages , it makes sense to reduce the range of the mmu_gather structure when unmapping page ranges based on the dirty address information passed to tlb_remove_tlb_entry. arm64 already does this by directly manipulating the start/end fields of the gather structure, but this confuses the generic code which does not expect these fields to change and can end up calculating invalid, negative ranges when forcing a flush in zap_pte_range. This patch moves the minimal range calculation out of the arm64 code and into the generic implementation, simplifying zap_pte_range in the process (which no longer needs to care about start/end, since they will point to the appropriate ranges already). With the range being tracked by core code, the need_flush flag is dropped in favour of checking that the end of the range has actually been set. Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King - ARM Linux <linux@arm.linux.org.uk> Cc: Michal Simek <monstr@monstr.eu> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r--arch/arm64/include/asm/tlb.h67
-rw-r--r--arch/microblaze/include/asm/tlb.h3
-rw-r--r--arch/powerpc/include/asm/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/tlb.h1
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--include/asm-generic/tlb.h57
-rw-r--r--mm/memory.c30
7 files changed, 63 insertions, 100 deletions
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index a82c0c5c8b52..c028fe37456f 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -19,10 +19,6 @@
19#ifndef __ASM_TLB_H 19#ifndef __ASM_TLB_H
20#define __ASM_TLB_H 20#define __ASM_TLB_H
21 21
22#define __tlb_remove_pmd_tlb_entry __tlb_remove_pmd_tlb_entry
23
24#include <asm-generic/tlb.h>
25
26#include <linux/pagemap.h> 22#include <linux/pagemap.h>
27#include <linux/swap.h> 23#include <linux/swap.h>
28 24
@@ -37,71 +33,22 @@ static inline void __tlb_remove_table(void *_table)
37#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) 33#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry)
38#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ 34#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
39 35
40/* 36#include <asm-generic/tlb.h>
41 * There's three ways the TLB shootdown code is used: 37
42 * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region().
43 * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
44 * 2. Unmapping all vmas. See exit_mmap().
45 * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
46 * Page tables will be freed.
47 * 3. Unmapping argument pages. See shift_arg_pages().
48 * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
49 */
50static inline void tlb_flush(struct mmu_gather *tlb) 38static inline void tlb_flush(struct mmu_gather *tlb)
51{ 39{
52 if (tlb->fullmm) { 40 if (tlb->fullmm) {
53 flush_tlb_mm(tlb->mm); 41 flush_tlb_mm(tlb->mm);
54 } else if (tlb->end > 0) { 42 } else {
55 struct vm_area_struct vma = { .vm_mm = tlb->mm, }; 43 struct vm_area_struct vma = { .vm_mm = tlb->mm, };
56 flush_tlb_range(&vma, tlb->start, tlb->end); 44 flush_tlb_range(&vma, tlb->start, tlb->end);
57 tlb->start = TASK_SIZE;
58 tlb->end = 0;
59 }
60}
61
62static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
63{
64 if (!tlb->fullmm) {
65 tlb->start = min(tlb->start, addr);
66 tlb->end = max(tlb->end, addr + PAGE_SIZE);
67 }
68}
69
70/*
71 * Memorize the range for the TLB flush.
72 */
73static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
74 unsigned long addr)
75{
76 tlb_add_flush(tlb, addr);
77}
78
79/*
80 * In the case of tlb vma handling, we can optimise these away in the
81 * case where we're doing a full MM flush. When we're doing a munmap,
82 * the vmas are adjusted to only cover the region to be torn down.
83 */
84static inline void tlb_start_vma(struct mmu_gather *tlb,
85 struct vm_area_struct *vma)
86{
87 if (!tlb->fullmm) {
88 tlb->start = TASK_SIZE;
89 tlb->end = 0;
90 } 45 }
91} 46}
92 47
93static inline void tlb_end_vma(struct mmu_gather *tlb,
94 struct vm_area_struct *vma)
95{
96 if (!tlb->fullmm)
97 tlb_flush(tlb);
98}
99
100static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 48static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
101 unsigned long addr) 49 unsigned long addr)
102{ 50{
103 pgtable_page_dtor(pte); 51 pgtable_page_dtor(pte);
104 tlb_add_flush(tlb, addr);
105 tlb_remove_entry(tlb, pte); 52 tlb_remove_entry(tlb, pte);
106} 53}
107 54
@@ -109,7 +56,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
109static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, 56static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
110 unsigned long addr) 57 unsigned long addr)
111{ 58{
112 tlb_add_flush(tlb, addr);
113 tlb_remove_entry(tlb, virt_to_page(pmdp)); 59 tlb_remove_entry(tlb, virt_to_page(pmdp));
114} 60}
115#endif 61#endif
@@ -118,15 +64,8 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
118static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, 64static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
119 unsigned long addr) 65 unsigned long addr)
120{ 66{
121 tlb_add_flush(tlb, addr);
122 tlb_remove_entry(tlb, virt_to_page(pudp)); 67 tlb_remove_entry(tlb, virt_to_page(pudp));
123} 68}
124#endif 69#endif
125 70
126static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp,
127 unsigned long address)
128{
129 tlb_add_flush(tlb, address);
130}
131
132#endif 71#endif
diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index 8aa97817cc8c..99b6ded54849 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -14,7 +14,6 @@
14#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) 14#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
15 15
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <asm-generic/tlb.h>
18 17
19#ifdef CONFIG_MMU 18#ifdef CONFIG_MMU
20#define tlb_start_vma(tlb, vma) do { } while (0) 19#define tlb_start_vma(tlb, vma) do { } while (0)
@@ -22,4 +21,6 @@
22#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0) 21#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
23#endif 22#endif
24 23
24#include <asm-generic/tlb.h>
25
25#endif /* _ASM_MICROBLAZE_TLB_H */ 26#endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index e9a9f60e596d..fc3ee06eab87 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -3,7 +3,6 @@
3#ifdef __KERNEL__ 3#ifdef __KERNEL__
4 4
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <asm-generic/tlb.h>
7 6
8#ifdef CONFIG_PPC_BOOK3E 7#ifdef CONFIG_PPC_BOOK3E
9extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address); 8extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
@@ -14,6 +13,8 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
14} 13}
15#endif /* !CONFIG_PPC_BOOK3E */ 14#endif /* !CONFIG_PPC_BOOK3E */
16 15
16extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
17
17#ifdef CONFIG_PPC64 18#ifdef CONFIG_PPC64
18#include <asm/pgalloc-64.h> 19#include <asm/pgalloc-64.h>
19#else 20#else
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e2b428b0f7ba..20733fa518ae 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -27,6 +27,7 @@
27 27
28#define tlb_start_vma(tlb, vma) do { } while (0) 28#define tlb_start_vma(tlb, vma) do { } while (0)
29#define tlb_end_vma(tlb, vma) do { } while (0) 29#define tlb_end_vma(tlb, vma) do { } while (0)
30#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
30 31
31extern void tlb_flush(struct mmu_gather *tlb); 32extern void tlb_flush(struct mmu_gather *tlb);
32 33
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7e70ae968e5f..6a4a5fcb9730 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -517,8 +517,6 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
517 for (i = 0; i < num_hugepd; i++, hpdp++) 517 for (i = 0; i < num_hugepd; i++, hpdp++)
518 hpdp->pd = 0; 518 hpdp->pd = 0;
519 519
520 tlb->need_flush = 1;
521
522#ifdef CONFIG_PPC_FSL_BOOK3E 520#ifdef CONFIG_PPC_FSL_BOOK3E
523 hugepd_free(tlb, hugepte); 521 hugepd_free(tlb, hugepte);
524#else 522#else
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 5672d7ea1fa0..08848050922e 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -96,10 +96,9 @@ struct mmu_gather {
96#endif 96#endif
97 unsigned long start; 97 unsigned long start;
98 unsigned long end; 98 unsigned long end;
99 unsigned int need_flush : 1, /* Did free PTEs */
100 /* we are in the middle of an operation to clear 99 /* we are in the middle of an operation to clear
101 * a full mm and can make some optimizations */ 100 * a full mm and can make some optimizations */
102 fullmm : 1, 101 unsigned int fullmm : 1,
103 /* we have performed an operation which 102 /* we have performed an operation which
104 * requires a complete flush of the tlb */ 103 * requires a complete flush of the tlb */
105 need_flush_all : 1; 104 need_flush_all : 1;
@@ -128,16 +127,54 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
128 tlb_flush_mmu(tlb); 127 tlb_flush_mmu(tlb);
129} 128}
130 129
130static inline void __tlb_adjust_range(struct mmu_gather *tlb,
131 unsigned long address)
132{
133 tlb->start = min(tlb->start, address);
134 tlb->end = max(tlb->end, address + PAGE_SIZE);
135}
136
137static inline void __tlb_reset_range(struct mmu_gather *tlb)
138{
139 tlb->start = TASK_SIZE;
140 tlb->end = 0;
141}
142
143/*
144 * In the case of tlb vma handling, we can optimise these away in the
145 * case where we're doing a full MM flush. When we're doing a munmap,
146 * the vmas are adjusted to only cover the region to be torn down.
147 */
148#ifndef tlb_start_vma
149#define tlb_start_vma(tlb, vma) do { } while (0)
150#endif
151
152#define __tlb_end_vma(tlb, vma) \
153 do { \
154 if (!tlb->fullmm && tlb->end) { \
155 tlb_flush(tlb); \
156 __tlb_reset_range(tlb); \
157 } \
158 } while (0)
159
160#ifndef tlb_end_vma
161#define tlb_end_vma __tlb_end_vma
162#endif
163
164#ifndef __tlb_remove_tlb_entry
165#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
166#endif
167
131/** 168/**
132 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. 169 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
133 * 170 *
134 * Record the fact that pte's were really umapped in ->need_flush, so we can 171 * Record the fact that pte's were really unmapped by updating the range,
135 * later optimise away the tlb invalidate. This helps when userspace is 172 * so we can later optimise away the tlb invalidate. This helps when
136 * unmapping already-unmapped pages, which happens quite a lot. 173 * userspace is unmapping already-unmapped pages, which happens quite a lot.
137 */ 174 */
138#define tlb_remove_tlb_entry(tlb, ptep, address) \ 175#define tlb_remove_tlb_entry(tlb, ptep, address) \
139 do { \ 176 do { \
140 tlb->need_flush = 1; \ 177 __tlb_adjust_range(tlb, address); \
141 __tlb_remove_tlb_entry(tlb, ptep, address); \ 178 __tlb_remove_tlb_entry(tlb, ptep, address); \
142 } while (0) 179 } while (0)
143 180
@@ -151,27 +188,27 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
151 188
152#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ 189#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
153 do { \ 190 do { \
154 tlb->need_flush = 1; \ 191 __tlb_adjust_range(tlb, address); \
155 __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ 192 __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
156 } while (0) 193 } while (0)
157 194
158#define pte_free_tlb(tlb, ptep, address) \ 195#define pte_free_tlb(tlb, ptep, address) \
159 do { \ 196 do { \
160 tlb->need_flush = 1; \ 197 __tlb_adjust_range(tlb, address); \
161 __pte_free_tlb(tlb, ptep, address); \ 198 __pte_free_tlb(tlb, ptep, address); \
162 } while (0) 199 } while (0)
163 200
164#ifndef __ARCH_HAS_4LEVEL_HACK 201#ifndef __ARCH_HAS_4LEVEL_HACK
165#define pud_free_tlb(tlb, pudp, address) \ 202#define pud_free_tlb(tlb, pudp, address) \
166 do { \ 203 do { \
167 tlb->need_flush = 1; \ 204 __tlb_adjust_range(tlb, address); \
168 __pud_free_tlb(tlb, pudp, address); \ 205 __pud_free_tlb(tlb, pudp, address); \
169 } while (0) 206 } while (0)
170#endif 207#endif
171 208
172#define pmd_free_tlb(tlb, pmdp, address) \ 209#define pmd_free_tlb(tlb, pmdp, address) \
173 do { \ 210 do { \
174 tlb->need_flush = 1; \ 211 __tlb_adjust_range(tlb, address); \
175 __pmd_free_tlb(tlb, pmdp, address); \ 212 __pmd_free_tlb(tlb, pmdp, address); \
176 } while (0) 213 } while (0)
177 214
diff --git a/mm/memory.c b/mm/memory.c
index 1cc6bfbd872e..c71edae9ba44 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -220,9 +220,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
220 /* Is it from 0 to ~0? */ 220 /* Is it from 0 to ~0? */
221 tlb->fullmm = !(start | (end+1)); 221 tlb->fullmm = !(start | (end+1));
222 tlb->need_flush_all = 0; 222 tlb->need_flush_all = 0;
223 tlb->start = start;
224 tlb->end = end;
225 tlb->need_flush = 0;
226 tlb->local.next = NULL; 223 tlb->local.next = NULL;
227 tlb->local.nr = 0; 224 tlb->local.nr = 0;
228 tlb->local.max = ARRAY_SIZE(tlb->__pages); 225 tlb->local.max = ARRAY_SIZE(tlb->__pages);
@@ -232,15 +229,20 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
232#ifdef CONFIG_HAVE_RCU_TABLE_FREE 229#ifdef CONFIG_HAVE_RCU_TABLE_FREE
233 tlb->batch = NULL; 230 tlb->batch = NULL;
234#endif 231#endif
232
233 __tlb_reset_range(tlb);
235} 234}
236 235
237static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 236static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
238{ 237{
239 tlb->need_flush = 0; 238 if (!tlb->end)
239 return;
240
240 tlb_flush(tlb); 241 tlb_flush(tlb);
241#ifdef CONFIG_HAVE_RCU_TABLE_FREE 242#ifdef CONFIG_HAVE_RCU_TABLE_FREE
242 tlb_table_flush(tlb); 243 tlb_table_flush(tlb);
243#endif 244#endif
245 __tlb_reset_range(tlb);
244} 246}
245 247
246static void tlb_flush_mmu_free(struct mmu_gather *tlb) 248static void tlb_flush_mmu_free(struct mmu_gather *tlb)
@@ -256,8 +258,6 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
256 258
257void tlb_flush_mmu(struct mmu_gather *tlb) 259void tlb_flush_mmu(struct mmu_gather *tlb)
258{ 260{
259 if (!tlb->need_flush)
260 return;
261 tlb_flush_mmu_tlbonly(tlb); 261 tlb_flush_mmu_tlbonly(tlb);
262 tlb_flush_mmu_free(tlb); 262 tlb_flush_mmu_free(tlb);
263} 263}
@@ -292,7 +292,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
292{ 292{
293 struct mmu_gather_batch *batch; 293 struct mmu_gather_batch *batch;
294 294
295 VM_BUG_ON(!tlb->need_flush); 295 VM_BUG_ON(!tlb->end);
296 296
297 batch = tlb->active; 297 batch = tlb->active;
298 batch->pages[batch->nr++] = page; 298 batch->pages[batch->nr++] = page;
@@ -359,8 +359,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
359{ 359{
360 struct mmu_table_batch **batch = &tlb->batch; 360 struct mmu_table_batch **batch = &tlb->batch;
361 361
362 tlb->need_flush = 1;
363
364 /* 362 /*
365 * When there's less then two users of this mm there cannot be a 363 * When there's less then two users of this mm there cannot be a
366 * concurrent page-table walk. 364 * concurrent page-table walk.
@@ -1185,20 +1183,8 @@ again:
1185 arch_leave_lazy_mmu_mode(); 1183 arch_leave_lazy_mmu_mode();
1186 1184
1187 /* Do the actual TLB flush before dropping ptl */ 1185 /* Do the actual TLB flush before dropping ptl */
1188 if (force_flush) { 1186 if (force_flush)
1189 unsigned long old_end;
1190
1191 /*
1192 * Flush the TLB just for the previous segment,
1193 * then update the range to be the remaining
1194 * TLB range.
1195 */
1196 old_end = tlb->end;
1197 tlb->end = addr;
1198 tlb_flush_mmu_tlbonly(tlb); 1187 tlb_flush_mmu_tlbonly(tlb);
1199 tlb->start = addr;
1200 tlb->end = old_end;
1201 }
1202 pte_unmap_unlock(start_pte, ptl); 1188 pte_unmap_unlock(start_pte, ptl);
1203 1189
1204 /* 1190 /*