aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm/pgtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r--arch/s390/mm/pgtable.c292
1 files changed, 119 insertions, 173 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b09763fe5da1..37a23c223705 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,94 +24,12 @@
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
26 26
27struct rcu_table_freelist {
28 struct rcu_head rcu;
29 struct mm_struct *mm;
30 unsigned int pgt_index;
31 unsigned int crst_index;
32 unsigned long *table[0];
33};
34
35#define RCU_FREELIST_SIZE \
36 ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
37 / sizeof(unsigned long))
38
39static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
40
41static void __page_table_free(struct mm_struct *mm, unsigned long *table);
42
43static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
44{
45 struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist);
46 struct rcu_table_freelist *batch = *batchp;
47
48 if (batch)
49 return batch;
50 batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC);
51 if (batch) {
52 batch->mm = mm;
53 batch->pgt_index = 0;
54 batch->crst_index = RCU_FREELIST_SIZE;
55 *batchp = batch;
56 }
57 return batch;
58}
59
60static void rcu_table_freelist_callback(struct rcu_head *head)
61{
62 struct rcu_table_freelist *batch =
63 container_of(head, struct rcu_table_freelist, rcu);
64
65 while (batch->pgt_index > 0)
66 __page_table_free(batch->mm, batch->table[--batch->pgt_index]);
67 while (batch->crst_index < RCU_FREELIST_SIZE)
68 crst_table_free(batch->mm, batch->table[batch->crst_index++]);
69 free_page((unsigned long) batch);
70}
71
72void rcu_table_freelist_finish(void)
73{
74 struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist);
75 struct rcu_table_freelist *batch = *batchp;
76
77 if (!batch)
78 goto out;
79 call_rcu(&batch->rcu, rcu_table_freelist_callback);
80 *batchp = NULL;
81out:
82 put_cpu_var(rcu_table_freelist);
83}
84
85static void smp_sync(void *arg)
86{
87}
88
89#ifndef CONFIG_64BIT 27#ifndef CONFIG_64BIT
90#define ALLOC_ORDER 1 28#define ALLOC_ORDER 1
91#define TABLES_PER_PAGE 4 29#define FRAG_MASK 0x0f
92#define FRAG_MASK 15UL
93#define SECOND_HALVES 10UL
94
95void clear_table_pgstes(unsigned long *table)
96{
97 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
98 memset(table + 256, 0, PAGE_SIZE/4);
99 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
100 memset(table + 768, 0, PAGE_SIZE/4);
101}
102
103#else 30#else
104#define ALLOC_ORDER 2 31#define ALLOC_ORDER 2
105#define TABLES_PER_PAGE 2 32#define FRAG_MASK 0x03
106#define FRAG_MASK 3UL
107#define SECOND_HALVES 2UL
108
109void clear_table_pgstes(unsigned long *table)
110{
111 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
112 memset(table + 256, 0, PAGE_SIZE/2);
113}
114
115#endif 33#endif
116 34
117unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; 35unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
@@ -140,29 +58,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
140 free_pages((unsigned long) table, ALLOC_ORDER); 58 free_pages((unsigned long) table, ALLOC_ORDER);
141} 59}
142 60
143void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
144{
145 struct rcu_table_freelist *batch;
146
147 preempt_disable();
148 if (atomic_read(&mm->mm_users) < 2 &&
149 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
150 crst_table_free(mm, table);
151 goto out;
152 }
153 batch = rcu_table_freelist_get(mm);
154 if (!batch) {
155 smp_call_function(smp_sync, NULL, 1);
156 crst_table_free(mm, table);
157 goto out;
158 }
159 batch->table[--batch->crst_index] = table;
160 if (batch->pgt_index >= batch->crst_index)
161 rcu_table_freelist_finish();
162out:
163 preempt_enable();
164}
165
166#ifdef CONFIG_64BIT 61#ifdef CONFIG_64BIT
167int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 62int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
168{ 63{
@@ -238,124 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
238} 133}
239#endif 134#endif
240 135
136static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
137{
138 unsigned int old, new;
139
140 do {
141 old = atomic_read(v);
142 new = old ^ bits;
143 } while (atomic_cmpxchg(v, old, new) != old);
144 return new;
145}
146
241/* 147/*
242 * page table entry allocation/free routines. 148 * page table entry allocation/free routines.
243 */ 149 */
150#ifdef CONFIG_PGSTE
151static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
152{
153 struct page *page;
154 unsigned long *table;
155
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157 if (!page)
158 return NULL;
159 pgtable_page_ctor(page);
160 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
164 return table;
165}
166
167static inline void page_table_free_pgste(unsigned long *table)
168{
169 struct page *page;
170
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1);
174 __free_page(page);
175}
176#endif
177
244unsigned long *page_table_alloc(struct mm_struct *mm) 178unsigned long *page_table_alloc(struct mm_struct *mm)
245{ 179{
246 struct page *page; 180 struct page *page;
247 unsigned long *table; 181 unsigned long *table;
248 unsigned long bits; 182 unsigned int mask, bit;
249 183
250 bits = (mm->context.has_pgste) ? 3UL : 1UL; 184#ifdef CONFIG_PGSTE
185 if (mm_has_pgste(mm))
186 return page_table_alloc_pgste(mm);
187#endif
188 /* Allocate fragments of a 4K page as 1K/2K page table */
251 spin_lock_bh(&mm->context.list_lock); 189 spin_lock_bh(&mm->context.list_lock);
252 page = NULL; 190 mask = FRAG_MASK;
253 if (!list_empty(&mm->context.pgtable_list)) { 191 if (!list_empty(&mm->context.pgtable_list)) {
254 page = list_first_entry(&mm->context.pgtable_list, 192 page = list_first_entry(&mm->context.pgtable_list,
255 struct page, lru); 193 struct page, lru);
256 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 194 table = (unsigned long *) page_to_phys(page);
257 page = NULL; 195 mask = atomic_read(&page->_mapcount);
196 mask = mask | (mask >> 4);
258 } 197 }
259 if (!page) { 198 if ((mask & FRAG_MASK) == FRAG_MASK) {
260 spin_unlock_bh(&mm->context.list_lock); 199 spin_unlock_bh(&mm->context.list_lock);
261 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 200 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
262 if (!page) 201 if (!page)
263 return NULL; 202 return NULL;
264 pgtable_page_ctor(page); 203 pgtable_page_ctor(page);
265 page->flags &= ~FRAG_MASK; 204 atomic_set(&page->_mapcount, 1);
266 table = (unsigned long *) page_to_phys(page); 205 table = (unsigned long *) page_to_phys(page);
267 if (mm->context.has_pgste) 206 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
268 clear_table_pgstes(table);
269 else
270 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
271 spin_lock_bh(&mm->context.list_lock); 207 spin_lock_bh(&mm->context.list_lock);
272 list_add(&page->lru, &mm->context.pgtable_list); 208 list_add(&page->lru, &mm->context.pgtable_list);
209 } else {
210 for (bit = 1; mask & bit; bit <<= 1)
211 table += PTRS_PER_PTE;
212 mask = atomic_xor_bits(&page->_mapcount, bit);
213 if ((mask & FRAG_MASK) == FRAG_MASK)
214 list_del(&page->lru);
273 } 215 }
274 table = (unsigned long *) page_to_phys(page);
275 while (page->flags & bits) {
276 table += 256;
277 bits <<= 1;
278 }
279 page->flags |= bits;
280 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
281 list_move_tail(&page->lru, &mm->context.pgtable_list);
282 spin_unlock_bh(&mm->context.list_lock); 216 spin_unlock_bh(&mm->context.list_lock);
283 return table; 217 return table;
284} 218}
285 219
286static void __page_table_free(struct mm_struct *mm, unsigned long *table) 220void page_table_free(struct mm_struct *mm, unsigned long *table)
287{ 221{
288 struct page *page; 222 struct page *page;
289 unsigned long bits; 223 unsigned int bit, mask;
290 224
291 bits = ((unsigned long) table) & 15; 225#ifdef CONFIG_PGSTE
292 table = (unsigned long *)(((unsigned long) table) ^ bits); 226 if (mm_has_pgste(mm))
227 return page_table_free_pgste(table);
228#endif
229 /* Free 1K/2K page table fragment of a 4K page */
293 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
294 page->flags ^= bits; 231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
295 if (!(page->flags & FRAG_MASK)) { 232 spin_lock_bh(&mm->context.list_lock);
233 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
234 list_del(&page->lru);
235 mask = atomic_xor_bits(&page->_mapcount, bit);
236 if (mask & FRAG_MASK)
237 list_add(&page->lru, &mm->context.pgtable_list);
238 spin_unlock_bh(&mm->context.list_lock);
239 if (mask == 0) {
296 pgtable_page_dtor(page); 240 pgtable_page_dtor(page);
241 atomic_set(&page->_mapcount, -1);
297 __free_page(page); 242 __free_page(page);
298 } 243 }
299} 244}
300 245
301void page_table_free(struct mm_struct *mm, unsigned long *table) 246#ifdef CONFIG_HAVE_RCU_TABLE_FREE
247
248static void __page_table_free_rcu(void *table, unsigned bit)
302{ 249{
303 struct page *page; 250 struct page *page;
304 unsigned long bits;
305 251
306 bits = (mm->context.has_pgste) ? 3UL : 1UL; 252#ifdef CONFIG_PGSTE
307 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 253 if (bit == FRAG_MASK)
254 return page_table_free_pgste(table);
255#endif
256 /* Free 1K/2K page table fragment of a 4K page */
308 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
309 spin_lock_bh(&mm->context.list_lock); 258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
310 page->flags ^= bits;
311 if (page->flags & FRAG_MASK) {
312 /* Page now has some free pgtable fragments. */
313 if (!list_empty(&page->lru))
314 list_move(&page->lru, &mm->context.pgtable_list);
315 page = NULL;
316 } else
317 /* All fragments of the 4K page have been freed. */
318 list_del(&page->lru);
319 spin_unlock_bh(&mm->context.list_lock);
320 if (page) {
321 pgtable_page_dtor(page); 259 pgtable_page_dtor(page);
260 atomic_set(&page->_mapcount, -1);
322 __free_page(page); 261 __free_page(page);
323 } 262 }
324} 263}
325 264
326void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) 265void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
327{ 266{
328 struct rcu_table_freelist *batch; 267 struct mm_struct *mm;
329 struct page *page; 268 struct page *page;
330 unsigned long bits; 269 unsigned int bit, mask;
331 270
332 preempt_disable(); 271 mm = tlb->mm;
333 if (atomic_read(&mm->mm_users) < 2 && 272#ifdef CONFIG_PGSTE
334 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { 273 if (mm_has_pgste(mm)) {
335 page_table_free(mm, table); 274 table = (unsigned long *) (__pa(table) | FRAG_MASK);
336 goto out; 275 tlb_remove_table(tlb, table);
337 } 276 return;
338 batch = rcu_table_freelist_get(mm);
339 if (!batch) {
340 smp_call_function(smp_sync, NULL, 1);
341 page_table_free(mm, table);
342 goto out;
343 } 277 }
344 bits = (mm->context.has_pgste) ? 3UL : 1UL; 278#endif
345 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
346 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
347 spin_lock_bh(&mm->context.list_lock); 281 spin_lock_bh(&mm->context.list_lock);
348 /* Delayed freeing with rcu prevents reuse of pgtable fragments */ 282 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
349 list_del_init(&page->lru); 283 list_del(&page->lru);
284 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285 if (mask & FRAG_MASK)
286 list_add_tail(&page->lru, &mm->context.pgtable_list);
350 spin_unlock_bh(&mm->context.list_lock); 287 spin_unlock_bh(&mm->context.list_lock);
351 table = (unsigned long *)(((unsigned long) table) | bits); 288 table = (unsigned long *) (__pa(table) | (bit << 4));
352 batch->table[batch->pgt_index++] = table; 289 tlb_remove_table(tlb, table);
353 if (batch->pgt_index >= batch->crst_index)
354 rcu_table_freelist_finish();
355out:
356 preempt_enable();
357} 290}
358 291
292void __tlb_remove_table(void *_table)
293{
294 void *table = (void *)((unsigned long) _table & PAGE_MASK);
295 unsigned type = (unsigned long) _table & ~PAGE_MASK;
296
297 if (type)
298 __page_table_free_rcu(table, type);
299 else
300 free_pages((unsigned long) table, ALLOC_ORDER);
301}
302
303#endif
304
359/* 305/*
360 * switch on pgstes for its userspace process (for kvm) 306 * switch on pgstes for its userspace process (for kvm)
361 */ 307 */
@@ -369,7 +315,7 @@ int s390_enable_sie(void)
369 return -EINVAL; 315 return -EINVAL;
370 316
371 /* Do we have pgstes? if yes, we are done */ 317 /* Do we have pgstes? if yes, we are done */
372 if (tsk->mm->context.has_pgste) 318 if (mm_has_pgste(tsk->mm))
373 return 0; 319 return 0;
374 320
375 /* lets check if we are allowed to replace the mm */ 321 /* lets check if we are allowed to replace the mm */