aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm/pgtable.c
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2010-10-25 10:10:11 -0400
committerMartin Schwidefsky <sky@mschwide.boeblingen.de.ibm.com>2010-10-25 10:10:15 -0400
commit80217147a3d80c8a4e48f06e2f6e965455f3fe2a (patch)
treeb419ae9ee3ab0e5b92c0ed2a30ff59b76d6a4978 /arch/s390/mm/pgtable.c
parent87799ebab760dd1460f6e4193d4f71ba416d1451 (diff)
[S390] lockless get_user_pages_fast()
Implement get_user_pages_fast without locking in the fastpath on s390. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r--arch/s390/mm/pgtable.c171
1 files changed, 154 insertions, 17 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8d999249d357..19338d228c9b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -15,6 +15,7 @@
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/quicklist.h> 17#include <linux/quicklist.h>
18#include <linux/rcupdate.h>
18 19
19#include <asm/system.h> 20#include <asm/system.h>
20#include <asm/pgtable.h> 21#include <asm/pgtable.h>
@@ -23,6 +24,67 @@
23#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
24#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
25 26
27struct rcu_table_freelist {
28 struct rcu_head rcu;
29 struct mm_struct *mm;
30 unsigned int pgt_index;
31 unsigned int crst_index;
32 unsigned long *table[0];
33};
34
35#define RCU_FREELIST_SIZE \
36 ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
37 / sizeof(unsigned long))
38
39DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
40static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
41
42static void __page_table_free(struct mm_struct *mm, unsigned long *table);
43static void __crst_table_free(struct mm_struct *mm, unsigned long *table);
44
45static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
46{
47 struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist);
48 struct rcu_table_freelist *batch = *batchp;
49
50 if (batch)
51 return batch;
52 batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC);
53 if (batch) {
54 batch->mm = mm;
55 batch->pgt_index = 0;
56 batch->crst_index = RCU_FREELIST_SIZE;
57 *batchp = batch;
58 }
59 return batch;
60}
61
62static void rcu_table_freelist_callback(struct rcu_head *head)
63{
64 struct rcu_table_freelist *batch =
65 container_of(head, struct rcu_table_freelist, rcu);
66
67 while (batch->pgt_index > 0)
68 __page_table_free(batch->mm, batch->table[--batch->pgt_index]);
69 while (batch->crst_index < RCU_FREELIST_SIZE)
70 __crst_table_free(batch->mm, batch->table[batch->crst_index++]);
71 free_page((unsigned long) batch);
72}
73
74void rcu_table_freelist_finish(void)
75{
76 struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist);
77
78 if (!batch)
79 return;
80 call_rcu(&batch->rcu, rcu_table_freelist_callback);
81 __get_cpu_var(rcu_table_freelist) = NULL;
82}
83
84static void smp_sync(void *arg)
85{
86}
87
26#ifndef CONFIG_64BIT 88#ifndef CONFIG_64BIT
27#define ALLOC_ORDER 1 89#define ALLOC_ORDER 1
28#define TABLES_PER_PAGE 4 90#define TABLES_PER_PAGE 4
@@ -78,25 +140,55 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
78 } 140 }
79 page->index = page_to_phys(shadow); 141 page->index = page_to_phys(shadow);
80 } 142 }
81 spin_lock(&mm->context.list_lock); 143 spin_lock_bh(&mm->context.list_lock);
82 list_add(&page->lru, &mm->context.crst_list); 144 list_add(&page->lru, &mm->context.crst_list);
83 spin_unlock(&mm->context.list_lock); 145 spin_unlock_bh(&mm->context.list_lock);
84 return (unsigned long *) page_to_phys(page); 146 return (unsigned long *) page_to_phys(page);
85} 147}
86 148
87void crst_table_free(struct mm_struct *mm, unsigned long *table) 149static void __crst_table_free(struct mm_struct *mm, unsigned long *table)
88{ 150{
89 unsigned long *shadow = get_shadow_table(table); 151 unsigned long *shadow = get_shadow_table(table);
90 struct page *page = virt_to_page(table);
91 152
92 spin_lock(&mm->context.list_lock);
93 list_del(&page->lru);
94 spin_unlock(&mm->context.list_lock);
95 if (shadow) 153 if (shadow)
96 free_pages((unsigned long) shadow, ALLOC_ORDER); 154 free_pages((unsigned long) shadow, ALLOC_ORDER);
97 free_pages((unsigned long) table, ALLOC_ORDER); 155 free_pages((unsigned long) table, ALLOC_ORDER);
98} 156}
99 157
158void crst_table_free(struct mm_struct *mm, unsigned long *table)
159{
160 struct page *page = virt_to_page(table);
161
162 spin_lock_bh(&mm->context.list_lock);
163 list_del(&page->lru);
164 spin_unlock_bh(&mm->context.list_lock);
165 __crst_table_free(mm, table);
166}
167
168void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
169{
170 struct rcu_table_freelist *batch;
171 struct page *page = virt_to_page(table);
172
173 spin_lock_bh(&mm->context.list_lock);
174 list_del(&page->lru);
175 spin_unlock_bh(&mm->context.list_lock);
176 if (atomic_read(&mm->mm_users) < 2 &&
177 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
178 __crst_table_free(mm, table);
179 return;
180 }
181 batch = rcu_table_freelist_get(mm);
182 if (!batch) {
183 smp_call_function(smp_sync, NULL, 1);
184 __crst_table_free(mm, table);
185 return;
186 }
187 batch->table[--batch->crst_index] = table;
188 if (batch->pgt_index >= batch->crst_index)
189 rcu_table_freelist_finish();
190}
191
100#ifdef CONFIG_64BIT 192#ifdef CONFIG_64BIT
101int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 193int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
102{ 194{
@@ -108,7 +200,7 @@ repeat:
108 table = crst_table_alloc(mm, mm->context.noexec); 200 table = crst_table_alloc(mm, mm->context.noexec);
109 if (!table) 201 if (!table)
110 return -ENOMEM; 202 return -ENOMEM;
111 spin_lock(&mm->page_table_lock); 203 spin_lock_bh(&mm->page_table_lock);
112 if (mm->context.asce_limit < limit) { 204 if (mm->context.asce_limit < limit) {
113 pgd = (unsigned long *) mm->pgd; 205 pgd = (unsigned long *) mm->pgd;
114 if (mm->context.asce_limit <= (1UL << 31)) { 206 if (mm->context.asce_limit <= (1UL << 31)) {
@@ -130,7 +222,7 @@ repeat:
130 mm->task_size = mm->context.asce_limit; 222 mm->task_size = mm->context.asce_limit;
131 table = NULL; 223 table = NULL;
132 } 224 }
133 spin_unlock(&mm->page_table_lock); 225 spin_unlock_bh(&mm->page_table_lock);
134 if (table) 226 if (table)
135 crst_table_free(mm, table); 227 crst_table_free(mm, table);
136 if (mm->context.asce_limit < limit) 228 if (mm->context.asce_limit < limit)
@@ -182,7 +274,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
182 unsigned long bits; 274 unsigned long bits;
183 275
184 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 276 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
185 spin_lock(&mm->context.list_lock); 277 spin_lock_bh(&mm->context.list_lock);
186 page = NULL; 278 page = NULL;
187 if (!list_empty(&mm->context.pgtable_list)) { 279 if (!list_empty(&mm->context.pgtable_list)) {
188 page = list_first_entry(&mm->context.pgtable_list, 280 page = list_first_entry(&mm->context.pgtable_list,
@@ -191,7 +283,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
191 page = NULL; 283 page = NULL;
192 } 284 }
193 if (!page) { 285 if (!page) {
194 spin_unlock(&mm->context.list_lock); 286 spin_unlock_bh(&mm->context.list_lock);
195 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 287 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
196 if (!page) 288 if (!page)
197 return NULL; 289 return NULL;
@@ -202,7 +294,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
202 clear_table_pgstes(table); 294 clear_table_pgstes(table);
203 else 295 else
204 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 296 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
205 spin_lock(&mm->context.list_lock); 297 spin_lock_bh(&mm->context.list_lock);
206 list_add(&page->lru, &mm->context.pgtable_list); 298 list_add(&page->lru, &mm->context.pgtable_list);
207 } 299 }
208 table = (unsigned long *) page_to_phys(page); 300 table = (unsigned long *) page_to_phys(page);
@@ -213,10 +305,25 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
213 page->flags |= bits; 305 page->flags |= bits;
214 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 306 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
215 list_move_tail(&page->lru, &mm->context.pgtable_list); 307 list_move_tail(&page->lru, &mm->context.pgtable_list);
216 spin_unlock(&mm->context.list_lock); 308 spin_unlock_bh(&mm->context.list_lock);
217 return table; 309 return table;
218} 310}
219 311
312static void __page_table_free(struct mm_struct *mm, unsigned long *table)
313{
314 struct page *page;
315 unsigned long bits;
316
317 bits = ((unsigned long) table) & 15;
318 table = (unsigned long *)(((unsigned long) table) ^ bits);
319 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
320 page->flags ^= bits;
321 if (!(page->flags & FRAG_MASK)) {
322 pgtable_page_dtor(page);
323 __free_page(page);
324 }
325}
326
220void page_table_free(struct mm_struct *mm, unsigned long *table) 327void page_table_free(struct mm_struct *mm, unsigned long *table)
221{ 328{
222 struct page *page; 329 struct page *page;
@@ -225,7 +332,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
225 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 332 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
226 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 333 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
227 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 334 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
228 spin_lock(&mm->context.list_lock); 335 spin_lock_bh(&mm->context.list_lock);
229 page->flags ^= bits; 336 page->flags ^= bits;
230 if (page->flags & FRAG_MASK) { 337 if (page->flags & FRAG_MASK) {
231 /* Page now has some free pgtable fragments. */ 338 /* Page now has some free pgtable fragments. */
@@ -234,18 +341,48 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
234 } else 341 } else
235 /* All fragments of the 4K page have been freed. */ 342 /* All fragments of the 4K page have been freed. */
236 list_del(&page->lru); 343 list_del(&page->lru);
237 spin_unlock(&mm->context.list_lock); 344 spin_unlock_bh(&mm->context.list_lock);
238 if (page) { 345 if (page) {
239 pgtable_page_dtor(page); 346 pgtable_page_dtor(page);
240 __free_page(page); 347 __free_page(page);
241 } 348 }
242} 349}
243 350
351void page_table_free_rcu(struct mm_struct *mm, unsigned long *table)
352{
353 struct rcu_table_freelist *batch;
354 struct page *page;
355 unsigned long bits;
356
357 if (atomic_read(&mm->mm_users) < 2 &&
358 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
359 page_table_free(mm, table);
360 return;
361 }
362 batch = rcu_table_freelist_get(mm);
363 if (!batch) {
364 smp_call_function(smp_sync, NULL, 1);
365 page_table_free(mm, table);
366 return;
367 }
368 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
369 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
370 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
371 spin_lock_bh(&mm->context.list_lock);
372 /* Delayed freeing with rcu prevents reuse of pgtable fragments */
373 list_del_init(&page->lru);
374 spin_unlock_bh(&mm->context.list_lock);
375 table = (unsigned long *)(((unsigned long) table) | bits);
376 batch->table[batch->pgt_index++] = table;
377 if (batch->pgt_index >= batch->crst_index)
378 rcu_table_freelist_finish();
379}
380
244void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) 381void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
245{ 382{
246 struct page *page; 383 struct page *page;
247 384
248 spin_lock(&mm->context.list_lock); 385 spin_lock_bh(&mm->context.list_lock);
249 /* Free shadow region and segment tables. */ 386 /* Free shadow region and segment tables. */
250 list_for_each_entry(page, &mm->context.crst_list, lru) 387 list_for_each_entry(page, &mm->context.crst_list, lru)
251 if (page->index) { 388 if (page->index) {
@@ -255,7 +392,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
255 /* "Free" second halves of page tables. */ 392 /* "Free" second halves of page tables. */
256 list_for_each_entry(page, &mm->context.pgtable_list, lru) 393 list_for_each_entry(page, &mm->context.pgtable_list, lru)
257 page->flags &= ~SECOND_HALVES; 394 page->flags &= ~SECOND_HALVES;
258 spin_unlock(&mm->context.list_lock); 395 spin_unlock_bh(&mm->context.list_lock);
259 mm->context.noexec = 0; 396 mm->context.noexec = 0;
260 update_mm(mm, tsk); 397 update_mm(mm, tsk);
261} 398}