diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2011-06-06 08:14:41 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2011-06-06 08:14:56 -0400 |
commit | 36409f6353fc2d7b6516e631415f938eadd92ffa (patch) | |
tree | 6348a841e76a1ddff366bc1259c1ea64685d87b2 /arch/s390/mm | |
parent | 3ec90878bade9280dee87c9e27d759f1cee07e70 (diff) |
[S390] use generic RCU page-table freeing code
Replace the s390 specific rcu page-table freeing code with the
generic variant. This requires to duplicate the definition for the
struct mmu_table_batch as s390 does not use the generic tlb flush
code.
While we are at it remove the restriction that page table fragments
can not be reused after a single fragment has been freed with rcu
and split out allocation and freeing of page tables with pgstes.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/pgtable.c | 292 |
1 files changed, 119 insertions, 173 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b09763fe5da1..37a23c223705 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -24,94 +24,12 @@ | |||
24 | #include <asm/tlbflush.h> | 24 | #include <asm/tlbflush.h> |
25 | #include <asm/mmu_context.h> | 25 | #include <asm/mmu_context.h> |
26 | 26 | ||
27 | struct rcu_table_freelist { | ||
28 | struct rcu_head rcu; | ||
29 | struct mm_struct *mm; | ||
30 | unsigned int pgt_index; | ||
31 | unsigned int crst_index; | ||
32 | unsigned long *table[0]; | ||
33 | }; | ||
34 | |||
35 | #define RCU_FREELIST_SIZE \ | ||
36 | ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ | ||
37 | / sizeof(unsigned long)) | ||
38 | |||
39 | static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); | ||
40 | |||
41 | static void __page_table_free(struct mm_struct *mm, unsigned long *table); | ||
42 | |||
43 | static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) | ||
44 | { | ||
45 | struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); | ||
46 | struct rcu_table_freelist *batch = *batchp; | ||
47 | |||
48 | if (batch) | ||
49 | return batch; | ||
50 | batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); | ||
51 | if (batch) { | ||
52 | batch->mm = mm; | ||
53 | batch->pgt_index = 0; | ||
54 | batch->crst_index = RCU_FREELIST_SIZE; | ||
55 | *batchp = batch; | ||
56 | } | ||
57 | return batch; | ||
58 | } | ||
59 | |||
60 | static void rcu_table_freelist_callback(struct rcu_head *head) | ||
61 | { | ||
62 | struct rcu_table_freelist *batch = | ||
63 | container_of(head, struct rcu_table_freelist, rcu); | ||
64 | |||
65 | while (batch->pgt_index > 0) | ||
66 | __page_table_free(batch->mm, batch->table[--batch->pgt_index]); | ||
67 | while (batch->crst_index < RCU_FREELIST_SIZE) | ||
68 | crst_table_free(batch->mm, batch->table[batch->crst_index++]); | ||
69 | free_page((unsigned long) batch); | ||
70 | } | ||
71 | |||
72 | void rcu_table_freelist_finish(void) | ||
73 | { | ||
74 | struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist); | ||
75 | struct rcu_table_freelist *batch = *batchp; | ||
76 | |||
77 | if (!batch) | ||
78 | goto out; | ||
79 | call_rcu(&batch->rcu, rcu_table_freelist_callback); | ||
80 | *batchp = NULL; | ||
81 | out: | ||
82 | put_cpu_var(rcu_table_freelist); | ||
83 | } | ||
84 | |||
85 | static void smp_sync(void *arg) | ||
86 | { | ||
87 | } | ||
88 | |||
89 | #ifndef CONFIG_64BIT | 27 | #ifndef CONFIG_64BIT |
90 | #define ALLOC_ORDER 1 | 28 | #define ALLOC_ORDER 1 |
91 | #define TABLES_PER_PAGE 4 | 29 | #define FRAG_MASK 0x0f |
92 | #define FRAG_MASK 15UL | ||
93 | #define SECOND_HALVES 10UL | ||
94 | |||
95 | void clear_table_pgstes(unsigned long *table) | ||
96 | { | ||
97 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
98 | memset(table + 256, 0, PAGE_SIZE/4); | ||
99 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
100 | memset(table + 768, 0, PAGE_SIZE/4); | ||
101 | } | ||
102 | |||
103 | #else | 30 | #else |
104 | #define ALLOC_ORDER 2 | 31 | #define ALLOC_ORDER 2 |
105 | #define TABLES_PER_PAGE 2 | 32 | #define FRAG_MASK 0x03 |
106 | #define FRAG_MASK 3UL | ||
107 | #define SECOND_HALVES 2UL | ||
108 | |||
109 | void clear_table_pgstes(unsigned long *table) | ||
110 | { | ||
111 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
112 | memset(table + 256, 0, PAGE_SIZE/2); | ||
113 | } | ||
114 | |||
115 | #endif | 33 | #endif |
116 | 34 | ||
117 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; | 35 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; |
@@ -140,29 +58,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) | |||
140 | free_pages((unsigned long) table, ALLOC_ORDER); | 58 | free_pages((unsigned long) table, ALLOC_ORDER); |
141 | } | 59 | } |
142 | 60 | ||
143 | void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) | ||
144 | { | ||
145 | struct rcu_table_freelist *batch; | ||
146 | |||
147 | preempt_disable(); | ||
148 | if (atomic_read(&mm->mm_users) < 2 && | ||
149 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { | ||
150 | crst_table_free(mm, table); | ||
151 | goto out; | ||
152 | } | ||
153 | batch = rcu_table_freelist_get(mm); | ||
154 | if (!batch) { | ||
155 | smp_call_function(smp_sync, NULL, 1); | ||
156 | crst_table_free(mm, table); | ||
157 | goto out; | ||
158 | } | ||
159 | batch->table[--batch->crst_index] = table; | ||
160 | if (batch->pgt_index >= batch->crst_index) | ||
161 | rcu_table_freelist_finish(); | ||
162 | out: | ||
163 | preempt_enable(); | ||
164 | } | ||
165 | |||
166 | #ifdef CONFIG_64BIT | 61 | #ifdef CONFIG_64BIT |
167 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | 62 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) |
168 | { | 63 | { |
@@ -238,124 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |||
238 | } | 133 | } |
239 | #endif | 134 | #endif |
240 | 135 | ||
136 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | ||
137 | { | ||
138 | unsigned int old, new; | ||
139 | |||
140 | do { | ||
141 | old = atomic_read(v); | ||
142 | new = old ^ bits; | ||
143 | } while (atomic_cmpxchg(v, old, new) != old); | ||
144 | return new; | ||
145 | } | ||
146 | |||
241 | /* | 147 | /* |
242 | * page table entry allocation/free routines. | 148 | * page table entry allocation/free routines. |
243 | */ | 149 | */ |
150 | #ifdef CONFIG_PGSTE | ||
151 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) | ||
152 | { | ||
153 | struct page *page; | ||
154 | unsigned long *table; | ||
155 | |||
156 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | ||
157 | if (!page) | ||
158 | return NULL; | ||
159 | pgtable_page_ctor(page); | ||
160 | atomic_set(&page->_mapcount, 3); | ||
161 | table = (unsigned long *) page_to_phys(page); | ||
162 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
163 | clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); | ||
164 | return table; | ||
165 | } | ||
166 | |||
167 | static inline void page_table_free_pgste(unsigned long *table) | ||
168 | { | ||
169 | struct page *page; | ||
170 | |||
171 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
172 | pgtable_page_ctor(page); | ||
173 | atomic_set(&page->_mapcount, -1); | ||
174 | __free_page(page); | ||
175 | } | ||
176 | #endif | ||
177 | |||
244 | unsigned long *page_table_alloc(struct mm_struct *mm) | 178 | unsigned long *page_table_alloc(struct mm_struct *mm) |
245 | { | 179 | { |
246 | struct page *page; | 180 | struct page *page; |
247 | unsigned long *table; | 181 | unsigned long *table; |
248 | unsigned long bits; | 182 | unsigned int mask, bit; |
249 | 183 | ||
250 | bits = (mm->context.has_pgste) ? 3UL : 1UL; | 184 | #ifdef CONFIG_PGSTE |
185 | if (mm_has_pgste(mm)) | ||
186 | return page_table_alloc_pgste(mm); | ||
187 | #endif | ||
188 | /* Allocate fragments of a 4K page as 1K/2K page table */ | ||
251 | spin_lock_bh(&mm->context.list_lock); | 189 | spin_lock_bh(&mm->context.list_lock); |
252 | page = NULL; | 190 | mask = FRAG_MASK; |
253 | if (!list_empty(&mm->context.pgtable_list)) { | 191 | if (!list_empty(&mm->context.pgtable_list)) { |
254 | page = list_first_entry(&mm->context.pgtable_list, | 192 | page = list_first_entry(&mm->context.pgtable_list, |
255 | struct page, lru); | 193 | struct page, lru); |
256 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 194 | table = (unsigned long *) page_to_phys(page); |
257 | page = NULL; | 195 | mask = atomic_read(&page->_mapcount); |
196 | mask = mask | (mask >> 4); | ||
258 | } | 197 | } |
259 | if (!page) { | 198 | if ((mask & FRAG_MASK) == FRAG_MASK) { |
260 | spin_unlock_bh(&mm->context.list_lock); | 199 | spin_unlock_bh(&mm->context.list_lock); |
261 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 200 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
262 | if (!page) | 201 | if (!page) |
263 | return NULL; | 202 | return NULL; |
264 | pgtable_page_ctor(page); | 203 | pgtable_page_ctor(page); |
265 | page->flags &= ~FRAG_MASK; | 204 | atomic_set(&page->_mapcount, 1); |
266 | table = (unsigned long *) page_to_phys(page); | 205 | table = (unsigned long *) page_to_phys(page); |
267 | if (mm->context.has_pgste) | 206 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); |
268 | clear_table_pgstes(table); | ||
269 | else | ||
270 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | ||
271 | spin_lock_bh(&mm->context.list_lock); | 207 | spin_lock_bh(&mm->context.list_lock); |
272 | list_add(&page->lru, &mm->context.pgtable_list); | 208 | list_add(&page->lru, &mm->context.pgtable_list); |
209 | } else { | ||
210 | for (bit = 1; mask & bit; bit <<= 1) | ||
211 | table += PTRS_PER_PTE; | ||
212 | mask = atomic_xor_bits(&page->_mapcount, bit); | ||
213 | if ((mask & FRAG_MASK) == FRAG_MASK) | ||
214 | list_del(&page->lru); | ||
273 | } | 215 | } |
274 | table = (unsigned long *) page_to_phys(page); | ||
275 | while (page->flags & bits) { | ||
276 | table += 256; | ||
277 | bits <<= 1; | ||
278 | } | ||
279 | page->flags |= bits; | ||
280 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | ||
281 | list_move_tail(&page->lru, &mm->context.pgtable_list); | ||
282 | spin_unlock_bh(&mm->context.list_lock); | 216 | spin_unlock_bh(&mm->context.list_lock); |
283 | return table; | 217 | return table; |
284 | } | 218 | } |
285 | 219 | ||
286 | static void __page_table_free(struct mm_struct *mm, unsigned long *table) | 220 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
287 | { | 221 | { |
288 | struct page *page; | 222 | struct page *page; |
289 | unsigned long bits; | 223 | unsigned int bit, mask; |
290 | 224 | ||
291 | bits = ((unsigned long) table) & 15; | 225 | #ifdef CONFIG_PGSTE |
292 | table = (unsigned long *)(((unsigned long) table) ^ bits); | 226 | if (mm_has_pgste(mm)) |
227 | return page_table_free_pgste(table); | ||
228 | #endif | ||
229 | /* Free 1K/2K page table fragment of a 4K page */ | ||
293 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 230 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
294 | page->flags ^= bits; | 231 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
295 | if (!(page->flags & FRAG_MASK)) { | 232 | spin_lock_bh(&mm->context.list_lock); |
233 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | ||
234 | list_del(&page->lru); | ||
235 | mask = atomic_xor_bits(&page->_mapcount, bit); | ||
236 | if (mask & FRAG_MASK) | ||
237 | list_add(&page->lru, &mm->context.pgtable_list); | ||
238 | spin_unlock_bh(&mm->context.list_lock); | ||
239 | if (mask == 0) { | ||
296 | pgtable_page_dtor(page); | 240 | pgtable_page_dtor(page); |
241 | atomic_set(&page->_mapcount, -1); | ||
297 | __free_page(page); | 242 | __free_page(page); |
298 | } | 243 | } |
299 | } | 244 | } |
300 | 245 | ||
301 | void page_table_free(struct mm_struct *mm, unsigned long *table) | 246 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
247 | |||
248 | static void __page_table_free_rcu(void *table, unsigned bit) | ||
302 | { | 249 | { |
303 | struct page *page; | 250 | struct page *page; |
304 | unsigned long bits; | ||
305 | 251 | ||
306 | bits = (mm->context.has_pgste) ? 3UL : 1UL; | 252 | #ifdef CONFIG_PGSTE |
307 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 253 | if (bit == FRAG_MASK) |
254 | return page_table_free_pgste(table); | ||
255 | #endif | ||
256 | /* Free 1K/2K page table fragment of a 4K page */ | ||
308 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 257 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
309 | spin_lock_bh(&mm->context.list_lock); | 258 | if (atomic_xor_bits(&page->_mapcount, bit) == 0) { |
310 | page->flags ^= bits; | ||
311 | if (page->flags & FRAG_MASK) { | ||
312 | /* Page now has some free pgtable fragments. */ | ||
313 | if (!list_empty(&page->lru)) | ||
314 | list_move(&page->lru, &mm->context.pgtable_list); | ||
315 | page = NULL; | ||
316 | } else | ||
317 | /* All fragments of the 4K page have been freed. */ | ||
318 | list_del(&page->lru); | ||
319 | spin_unlock_bh(&mm->context.list_lock); | ||
320 | if (page) { | ||
321 | pgtable_page_dtor(page); | 259 | pgtable_page_dtor(page); |
260 | atomic_set(&page->_mapcount, -1); | ||
322 | __free_page(page); | 261 | __free_page(page); |
323 | } | 262 | } |
324 | } | 263 | } |
325 | 264 | ||
326 | void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) | 265 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) |
327 | { | 266 | { |
328 | struct rcu_table_freelist *batch; | 267 | struct mm_struct *mm; |
329 | struct page *page; | 268 | struct page *page; |
330 | unsigned long bits; | 269 | unsigned int bit, mask; |
331 | 270 | ||
332 | preempt_disable(); | 271 | mm = tlb->mm; |
333 | if (atomic_read(&mm->mm_users) < 2 && | 272 | #ifdef CONFIG_PGSTE |
334 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { | 273 | if (mm_has_pgste(mm)) { |
335 | page_table_free(mm, table); | 274 | table = (unsigned long *) (__pa(table) | FRAG_MASK); |
336 | goto out; | 275 | tlb_remove_table(tlb, table); |
337 | } | 276 | return; |
338 | batch = rcu_table_freelist_get(mm); | ||
339 | if (!batch) { | ||
340 | smp_call_function(smp_sync, NULL, 1); | ||
341 | page_table_free(mm, table); | ||
342 | goto out; | ||
343 | } | 277 | } |
344 | bits = (mm->context.has_pgste) ? 3UL : 1UL; | 278 | #endif |
345 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 279 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); |
346 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 280 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
347 | spin_lock_bh(&mm->context.list_lock); | 281 | spin_lock_bh(&mm->context.list_lock); |
348 | /* Delayed freeing with rcu prevents reuse of pgtable fragments */ | 282 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
349 | list_del_init(&page->lru); | 283 | list_del(&page->lru); |
284 | mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); | ||
285 | if (mask & FRAG_MASK) | ||
286 | list_add_tail(&page->lru, &mm->context.pgtable_list); | ||
350 | spin_unlock_bh(&mm->context.list_lock); | 287 | spin_unlock_bh(&mm->context.list_lock); |
351 | table = (unsigned long *)(((unsigned long) table) | bits); | 288 | table = (unsigned long *) (__pa(table) | (bit << 4)); |
352 | batch->table[batch->pgt_index++] = table; | 289 | tlb_remove_table(tlb, table); |
353 | if (batch->pgt_index >= batch->crst_index) | ||
354 | rcu_table_freelist_finish(); | ||
355 | out: | ||
356 | preempt_enable(); | ||
357 | } | 290 | } |
358 | 291 | ||
292 | void __tlb_remove_table(void *_table) | ||
293 | { | ||
294 | void *table = (void *)((unsigned long) _table & PAGE_MASK); | ||
295 | unsigned type = (unsigned long) _table & ~PAGE_MASK; | ||
296 | |||
297 | if (type) | ||
298 | __page_table_free_rcu(table, type); | ||
299 | else | ||
300 | free_pages((unsigned long) table, ALLOC_ORDER); | ||
301 | } | ||
302 | |||
303 | #endif | ||
304 | |||
359 | /* | 305 | /* |
360 | * switch on pgstes for its userspace process (for kvm) | 306 | * switch on pgstes for its userspace process (for kvm) |
361 | */ | 307 | */ |
@@ -369,7 +315,7 @@ int s390_enable_sie(void) | |||
369 | return -EINVAL; | 315 | return -EINVAL; |
370 | 316 | ||
371 | /* Do we have pgstes? if yes, we are done */ | 317 | /* Do we have pgstes? if yes, we are done */ |
372 | if (tsk->mm->context.has_pgste) | 318 | if (mm_has_pgste(tsk->mm)) |
373 | return 0; | 319 | return 0; |
374 | 320 | ||
375 | /* lets check if we are allowed to replace the mm */ | 321 | /* lets check if we are allowed to replace the mm */ |