aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-05-24 20:12:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 11:39:16 -0400
commit267239116987d64850ad2037d8e0f3071dc3b5ce (patch)
tree142595897f7fc7bb673b791891dcc2fab31f6e91
parent1c395176962176660bb108f90e97e1686cfe0d85 (diff)
mm, powerpc: move the RCU page-table freeing into generic code
In case other architectures require RCU freed page-tables to implement gup_fast() and software filled hashes and similar things, provide the means to do so by moving the logic into generic code. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Requested-by: David Miller <davem@davemloft.net> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Jeff Dike <jdike@addtoit.com> Cc: Richard Weinberger <richard@nod.at> Cc: Tony Luck <tony.luck@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hugh Dickins <hughd@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Namhyung Kim <namhyung@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/pgalloc.h21
-rw-r--r--arch/powerpc/include/asm/tlb.h10
-rw-r--r--arch/powerpc/mm/pgtable.c98
-rw-r--r--arch/powerpc/mm/tlb_hash32.c3
-rw-r--r--arch/powerpc/mm/tlb_hash64.c3
-rw-r--r--arch/powerpc/mm/tlb_nohash.c3
-rw-r--r--include/asm-generic/tlb.h56
-rw-r--r--mm/memory.c77
10 files changed, 150 insertions, 125 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 8d24bacaa61e..26b0e2397a57 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -175,4 +175,7 @@ config HAVE_ARCH_JUMP_LABEL
175config HAVE_ARCH_MUTEX_CPU_RELAX 175config HAVE_ARCH_MUTEX_CPU_RELAX
176 bool 176 bool
177 177
178config HAVE_RCU_TABLE_FREE
179 bool
180
178source "kernel/gcov/Kconfig" 181source "kernel/gcov/Kconfig"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a3128ca0fe11..423145a6f7ba 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -140,6 +140,7 @@ config PPC
140 select IRQ_PER_CPU 140 select IRQ_PER_CPU
141 select GENERIC_IRQ_SHOW 141 select GENERIC_IRQ_SHOW
142 select GENERIC_IRQ_SHOW_LEVEL 142 select GENERIC_IRQ_SHOW_LEVEL
143 select HAVE_RCU_TABLE_FREE if SMP
143 144
144config EARLY_PRINTK 145config EARLY_PRINTK
145 bool 146 bool
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index df1b4cbb2e70..bf301ac62f35 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -31,14 +31,29 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
31#endif 31#endif
32 32
33#ifdef CONFIG_SMP 33#ifdef CONFIG_SMP
34extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift); 34struct mmu_gather;
35extern void pte_free_finish(struct mmu_gather *tlb); 35extern void tlb_remove_table(struct mmu_gather *, void *);
36
37static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
38{
39 unsigned long pgf = (unsigned long)table;
40 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
41 pgf |= shift;
42 tlb_remove_table(tlb, (void *)pgf);
43}
44
45static inline void __tlb_remove_table(void *_table)
46{
47 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
48 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
49
50 pgtable_free(table, shift);
51}
36#else /* CONFIG_SMP */ 52#else /* CONFIG_SMP */
37static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift) 53static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
38{ 54{
39 pgtable_free(table, shift); 55 pgtable_free(table, shift);
40} 56}
41static inline void pte_free_finish(struct mmu_gather *tlb) { }
42#endif /* !CONFIG_SMP */ 57#endif /* !CONFIG_SMP */
43 58
44static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage, 59static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 8f0ed7adcd12..e2b428b0f7ba 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -28,16 +28,6 @@
28#define tlb_start_vma(tlb, vma) do { } while (0) 28#define tlb_start_vma(tlb, vma) do { } while (0)
29#define tlb_end_vma(tlb, vma) do { } while (0) 29#define tlb_end_vma(tlb, vma) do { } while (0)
30 30
31#define HAVE_ARCH_MMU_GATHER 1
32
33struct pte_freelist_batch;
34
35struct arch_mmu_gather {
36 struct pte_freelist_batch *batch;
37};
38
39#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
40
41extern void tlb_flush(struct mmu_gather *tlb); 31extern void tlb_flush(struct mmu_gather *tlb);
42 32
43/* Get the generic bits... */ 33/* Get the generic bits... */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 6e72788598f8..af40c8768a78 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -33,104 +33,6 @@
33 33
34#include "mmu_decl.h" 34#include "mmu_decl.h"
35 35
36#ifdef CONFIG_SMP
37
38/*
39 * Handle batching of page table freeing on SMP. Page tables are
40 * queued up and send to be freed later by RCU in order to avoid
41 * freeing a page table page that is being walked without locks
42 */
43
44static unsigned long pte_freelist_forced_free;
45
46struct pte_freelist_batch
47{
48 struct rcu_head rcu;
49 unsigned int index;
50 unsigned long tables[0];
51};
52
53#define PTE_FREELIST_SIZE \
54 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
55 / sizeof(unsigned long))
56
57static void pte_free_smp_sync(void *arg)
58{
59 /* Do nothing, just ensure we sync with all CPUs */
60}
61
62/* This is only called when we are critically out of memory
63 * (and fail to get a page in pte_free_tlb).
64 */
65static void pgtable_free_now(void *table, unsigned shift)
66{
67 pte_freelist_forced_free++;
68
69 smp_call_function(pte_free_smp_sync, NULL, 1);
70
71 pgtable_free(table, shift);
72}
73
74static void pte_free_rcu_callback(struct rcu_head *head)
75{
76 struct pte_freelist_batch *batch =
77 container_of(head, struct pte_freelist_batch, rcu);
78 unsigned int i;
79
80 for (i = 0; i < batch->index; i++) {
81 void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
82 unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
83
84 pgtable_free(table, shift);
85 }
86
87 free_page((unsigned long)batch);
88}
89
90static void pte_free_submit(struct pte_freelist_batch *batch)
91{
92 call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
93}
94
95void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
96{
97 struct pte_freelist_batch **batchp = &tlb->arch.batch;
98 unsigned long pgf;
99
100 if (atomic_read(&tlb->mm->mm_users) < 2) {
101 pgtable_free(table, shift);
102 return;
103 }
104
105 if (*batchp == NULL) {
106 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
107 if (*batchp == NULL) {
108 pgtable_free_now(table, shift);
109 return;
110 }
111 (*batchp)->index = 0;
112 }
113 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
114 pgf = (unsigned long)table | shift;
115 (*batchp)->tables[(*batchp)->index++] = pgf;
116 if ((*batchp)->index == PTE_FREELIST_SIZE) {
117 pte_free_submit(*batchp);
118 *batchp = NULL;
119 }
120}
121
122void pte_free_finish(struct mmu_gather *tlb)
123{
124 struct pte_freelist_batch **batchp = &tlb->arch.batch;
125
126 if (*batchp == NULL)
127 return;
128 pte_free_submit(*batchp);
129 *batchp = NULL;
130}
131
132#endif /* CONFIG_SMP */
133
134static inline int is_exec_fault(void) 36static inline int is_exec_fault(void)
135{ 37{
136 return current->thread.regs && TRAP(current->thread.regs) == 0x400; 38 return current->thread.regs && TRAP(current->thread.regs) == 0x400;
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index d555cdb06bc8..27b863c14941 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -71,9 +71,6 @@ void tlb_flush(struct mmu_gather *tlb)
71 */ 71 */
72 _tlbia(); 72 _tlbia();
73 } 73 }
74
75 /* Push out batch of freed page tables */
76 pte_free_finish(tlb);
77} 74}
78 75
79/* 76/*
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 5c94ca34cd79..31f18207970b 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -165,9 +165,6 @@ void tlb_flush(struct mmu_gather *tlb)
165 __flush_tlb_pending(tlbbatch); 165 __flush_tlb_pending(tlbbatch);
166 166
167 put_cpu_var(ppc64_tlb_batch); 167 put_cpu_var(ppc64_tlb_batch);
168
169 /* Push out batch of freed page tables */
170 pte_free_finish(tlb);
171} 168}
172 169
173/** 170/**
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 8eaf67d32043..0bdad3aecc67 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -299,9 +299,6 @@ EXPORT_SYMBOL(flush_tlb_range);
299void tlb_flush(struct mmu_gather *tlb) 299void tlb_flush(struct mmu_gather *tlb)
300{ 300{
301 flush_tlb_mm(tlb->mm); 301 flush_tlb_mm(tlb->mm);
302
303 /* Push out batch of freed page tables */
304 pte_free_finish(tlb);
305} 302}
306 303
307/* 304/*
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 2d3547c84235..74f80f6b6cf1 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -29,6 +29,49 @@
29 #define tlb_fast_mode(tlb) 1 29 #define tlb_fast_mode(tlb) 1
30#endif 30#endif
31 31
32#ifdef CONFIG_HAVE_RCU_TABLE_FREE
33/*
34 * Semi RCU freeing of the page directories.
35 *
36 * This is needed by some architectures to implement software pagetable walkers.
37 *
38 * gup_fast() and other software pagetable walkers do a lockless page-table
39 * walk and therefore needs some synchronization with the freeing of the page
40 * directories. The chosen means to accomplish that is by disabling IRQs over
41 * the walk.
42 *
43 * Architectures that use IPIs to flush TLBs will then automagically DTRT,
44 * since we unlink the page, flush TLBs, free the page. Since the disabling of
45 * IRQs delays the completion of the TLB flush we can never observe an already
46 * freed page.
47 *
48 * Architectures that do not have this (PPC) need to delay the freeing by some
49 * other means, this is that means.
50 *
51 * What we do is batch the freed directory pages (tables) and RCU free them.
52 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
53 * holds off grace periods.
54 *
55 * However, in order to batch these pages we need to allocate storage, this
56 * allocation is deep inside the MM code and can thus easily fail on memory
57 * pressure. To guarantee progress we fall back to single table freeing, see
58 * the implementation of tlb_remove_table_one().
59 *
60 */
61struct mmu_table_batch {
62 struct rcu_head rcu;
63 unsigned int nr;
64 void *tables[0];
65};
66
67#define MAX_TABLE_BATCH \
68 ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
69
70extern void tlb_table_flush(struct mmu_gather *tlb);
71extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
72
73#endif
74
32/* 75/*
33 * If we can't allocate a page to make a big batch of page pointers 76 * If we can't allocate a page to make a big batch of page pointers
34 * to work on, then just handle a few from the on-stack structure. 77 * to work on, then just handle a few from the on-stack structure.
@@ -40,13 +83,13 @@
40 */ 83 */
41struct mmu_gather { 84struct mmu_gather {
42 struct mm_struct *mm; 85 struct mm_struct *mm;
86#ifdef CONFIG_HAVE_RCU_TABLE_FREE
87 struct mmu_table_batch *batch;
88#endif
43 unsigned int nr; /* set to ~0U means fast mode */ 89 unsigned int nr; /* set to ~0U means fast mode */
44 unsigned int max; /* nr < max */ 90 unsigned int max; /* nr < max */
45 unsigned int need_flush;/* Really unmapped some ptes? */ 91 unsigned int need_flush;/* Really unmapped some ptes? */
46 unsigned int fullmm; /* non-zero means full mm flush */ 92 unsigned int fullmm; /* non-zero means full mm flush */
47#ifdef HAVE_ARCH_MMU_GATHER
48 struct arch_mmu_gather arch;
49#endif
50 struct page **pages; 93 struct page **pages;
51 struct page *local[MMU_GATHER_BUNDLE]; 94 struct page *local[MMU_GATHER_BUNDLE];
52}; 95};
@@ -82,8 +125,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
82 125
83 tlb->fullmm = fullmm; 126 tlb->fullmm = fullmm;
84 127
85#ifdef HAVE_ARCH_MMU_GATHER 128#ifdef CONFIG_HAVE_RCU_TABLE_FREE
86 tlb->arch = ARCH_MMU_GATHER_INIT; 129 tlb->batch = NULL;
87#endif 130#endif
88} 131}
89 132
@@ -94,6 +137,9 @@ tlb_flush_mmu(struct mmu_gather *tlb)
94 return; 137 return;
95 tlb->need_flush = 0; 138 tlb->need_flush = 0;
96 tlb_flush(tlb); 139 tlb_flush(tlb);
140#ifdef CONFIG_HAVE_RCU_TABLE_FREE
141 tlb_table_flush(tlb);
142#endif
97 if (!tlb_fast_mode(tlb)) { 143 if (!tlb_fast_mode(tlb)) {
98 free_pages_and_swap_cache(tlb->pages, tlb->nr); 144 free_pages_and_swap_cache(tlb->pages, tlb->nr);
99 tlb->nr = 0; 145 tlb->nr = 0;
diff --git a/mm/memory.c b/mm/memory.c
index 19b2d44de9f0..a77fd23ee68a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -193,6 +193,83 @@ static void check_sync_rss_stat(struct task_struct *task)
193 193
194#endif 194#endif
195 195
196#ifdef CONFIG_HAVE_RCU_TABLE_FREE
197
198/*
199 * See the comment near struct mmu_table_batch.
200 */
201
202static void tlb_remove_table_smp_sync(void *arg)
203{
204 /* Simply deliver the interrupt */
205}
206
207static void tlb_remove_table_one(void *table)
208{
209 /*
210 * This isn't an RCU grace period and hence the page-tables cannot be
211 * assumed to be actually RCU-freed.
212 *
213 * It is however sufficient for software page-table walkers that rely on
214 * IRQ disabling. See the comment near struct mmu_table_batch.
215 */
216 smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
217 __tlb_remove_table(table);
218}
219
220static void tlb_remove_table_rcu(struct rcu_head *head)
221{
222 struct mmu_table_batch *batch;
223 int i;
224
225 batch = container_of(head, struct mmu_table_batch, rcu);
226
227 for (i = 0; i < batch->nr; i++)
228 __tlb_remove_table(batch->tables[i]);
229
230 free_page((unsigned long)batch);
231}
232
233void tlb_table_flush(struct mmu_gather *tlb)
234{
235 struct mmu_table_batch **batch = &tlb->batch;
236
237 if (*batch) {
238 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
239 *batch = NULL;
240 }
241}
242
243void tlb_remove_table(struct mmu_gather *tlb, void *table)
244{
245 struct mmu_table_batch **batch = &tlb->batch;
246
247 tlb->need_flush = 1;
248
249 /*
250 * When there's less then two users of this mm there cannot be a
251 * concurrent page-table walk.
252 */
253 if (atomic_read(&tlb->mm->mm_users) < 2) {
254 __tlb_remove_table(table);
255 return;
256 }
257
258 if (*batch == NULL) {
259 *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
260 if (*batch == NULL) {
261 tlb_remove_table_one(table);
262 return;
263 }
264 (*batch)->nr = 0;
265 }
266 (*batch)->tables[(*batch)->nr++] = table;
267 if ((*batch)->nr == MAX_TABLE_BATCH)
268 tlb_table_flush(tlb);
269}
270
271#endif
272
196/* 273/*
197 * If a p?d_bad entry is found while walking page tables, report 274 * If a p?d_bad entry is found while walking page tables, report
198 * the error, before resetting entry to p?d_none. Usually (but 275 * the error, before resetting entry to p?d_none. Usually (but