diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2010-10-25 10:10:11 -0400 |
---|---|---|
committer | Martin Schwidefsky <sky@mschwide.boeblingen.de.ibm.com> | 2010-10-25 10:10:15 -0400 |
commit | 80217147a3d80c8a4e48f06e2f6e965455f3fe2a (patch) | |
tree | b419ae9ee3ab0e5b92c0ed2a30ff59b76d6a4978 /arch/s390/mm/pgtable.c | |
parent | 87799ebab760dd1460f6e4193d4f71ba416d1451 (diff) |
[S390] lockless get_user_pages_fast()
Implement get_user_pages_fast without locking in the fastpath on s390.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r-- | arch/s390/mm/pgtable.c | 171 |
1 files changed, 154 insertions, 17 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8d999249d357..19338d228c9b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/quicklist.h> | 17 | #include <linux/quicklist.h> |
18 | #include <linux/rcupdate.h> | ||
18 | 19 | ||
19 | #include <asm/system.h> | 20 | #include <asm/system.h> |
20 | #include <asm/pgtable.h> | 21 | #include <asm/pgtable.h> |
@@ -23,6 +24,67 @@ | |||
23 | #include <asm/tlbflush.h> | 24 | #include <asm/tlbflush.h> |
24 | #include <asm/mmu_context.h> | 25 | #include <asm/mmu_context.h> |
25 | 26 | ||
27 | struct rcu_table_freelist { | ||
28 | struct rcu_head rcu; | ||
29 | struct mm_struct *mm; | ||
30 | unsigned int pgt_index; | ||
31 | unsigned int crst_index; | ||
32 | unsigned long *table[0]; | ||
33 | }; | ||
34 | |||
35 | #define RCU_FREELIST_SIZE \ | ||
36 | ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ | ||
37 | / sizeof(unsigned long)) | ||
38 | |||
39 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
40 | static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); | ||
41 | |||
42 | static void __page_table_free(struct mm_struct *mm, unsigned long *table); | ||
43 | static void __crst_table_free(struct mm_struct *mm, unsigned long *table); | ||
44 | |||
45 | static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) | ||
46 | { | ||
47 | struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); | ||
48 | struct rcu_table_freelist *batch = *batchp; | ||
49 | |||
50 | if (batch) | ||
51 | return batch; | ||
52 | batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); | ||
53 | if (batch) { | ||
54 | batch->mm = mm; | ||
55 | batch->pgt_index = 0; | ||
56 | batch->crst_index = RCU_FREELIST_SIZE; | ||
57 | *batchp = batch; | ||
58 | } | ||
59 | return batch; | ||
60 | } | ||
61 | |||
62 | static void rcu_table_freelist_callback(struct rcu_head *head) | ||
63 | { | ||
64 | struct rcu_table_freelist *batch = | ||
65 | container_of(head, struct rcu_table_freelist, rcu); | ||
66 | |||
67 | while (batch->pgt_index > 0) | ||
68 | __page_table_free(batch->mm, batch->table[--batch->pgt_index]); | ||
69 | while (batch->crst_index < RCU_FREELIST_SIZE) | ||
70 | __crst_table_free(batch->mm, batch->table[batch->crst_index++]); | ||
71 | free_page((unsigned long) batch); | ||
72 | } | ||
73 | |||
74 | void rcu_table_freelist_finish(void) | ||
75 | { | ||
76 | struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); | ||
77 | |||
78 | if (!batch) | ||
79 | return; | ||
80 | call_rcu(&batch->rcu, rcu_table_freelist_callback); | ||
81 | __get_cpu_var(rcu_table_freelist) = NULL; | ||
82 | } | ||
83 | |||
84 | static void smp_sync(void *arg) | ||
85 | { | ||
86 | } | ||
87 | |||
26 | #ifndef CONFIG_64BIT | 88 | #ifndef CONFIG_64BIT |
27 | #define ALLOC_ORDER 1 | 89 | #define ALLOC_ORDER 1 |
28 | #define TABLES_PER_PAGE 4 | 90 | #define TABLES_PER_PAGE 4 |
@@ -78,25 +140,55 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | |||
78 | } | 140 | } |
79 | page->index = page_to_phys(shadow); | 141 | page->index = page_to_phys(shadow); |
80 | } | 142 | } |
81 | spin_lock(&mm->context.list_lock); | 143 | spin_lock_bh(&mm->context.list_lock); |
82 | list_add(&page->lru, &mm->context.crst_list); | 144 | list_add(&page->lru, &mm->context.crst_list); |
83 | spin_unlock(&mm->context.list_lock); | 145 | spin_unlock_bh(&mm->context.list_lock); |
84 | return (unsigned long *) page_to_phys(page); | 146 | return (unsigned long *) page_to_phys(page); |
85 | } | 147 | } |
86 | 148 | ||
87 | void crst_table_free(struct mm_struct *mm, unsigned long *table) | 149 | static void __crst_table_free(struct mm_struct *mm, unsigned long *table) |
88 | { | 150 | { |
89 | unsigned long *shadow = get_shadow_table(table); | 151 | unsigned long *shadow = get_shadow_table(table); |
90 | struct page *page = virt_to_page(table); | ||
91 | 152 | ||
92 | spin_lock(&mm->context.list_lock); | ||
93 | list_del(&page->lru); | ||
94 | spin_unlock(&mm->context.list_lock); | ||
95 | if (shadow) | 153 | if (shadow) |
96 | free_pages((unsigned long) shadow, ALLOC_ORDER); | 154 | free_pages((unsigned long) shadow, ALLOC_ORDER); |
97 | free_pages((unsigned long) table, ALLOC_ORDER); | 155 | free_pages((unsigned long) table, ALLOC_ORDER); |
98 | } | 156 | } |
99 | 157 | ||
158 | void crst_table_free(struct mm_struct *mm, unsigned long *table) | ||
159 | { | ||
160 | struct page *page = virt_to_page(table); | ||
161 | |||
162 | spin_lock_bh(&mm->context.list_lock); | ||
163 | list_del(&page->lru); | ||
164 | spin_unlock_bh(&mm->context.list_lock); | ||
165 | __crst_table_free(mm, table); | ||
166 | } | ||
167 | |||
168 | void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) | ||
169 | { | ||
170 | struct rcu_table_freelist *batch; | ||
171 | struct page *page = virt_to_page(table); | ||
172 | |||
173 | spin_lock_bh(&mm->context.list_lock); | ||
174 | list_del(&page->lru); | ||
175 | spin_unlock_bh(&mm->context.list_lock); | ||
176 | if (atomic_read(&mm->mm_users) < 2 && | ||
177 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { | ||
178 | __crst_table_free(mm, table); | ||
179 | return; | ||
180 | } | ||
181 | batch = rcu_table_freelist_get(mm); | ||
182 | if (!batch) { | ||
183 | smp_call_function(smp_sync, NULL, 1); | ||
184 | __crst_table_free(mm, table); | ||
185 | return; | ||
186 | } | ||
187 | batch->table[--batch->crst_index] = table; | ||
188 | if (batch->pgt_index >= batch->crst_index) | ||
189 | rcu_table_freelist_finish(); | ||
190 | } | ||
191 | |||
100 | #ifdef CONFIG_64BIT | 192 | #ifdef CONFIG_64BIT |
101 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | 193 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) |
102 | { | 194 | { |
@@ -108,7 +200,7 @@ repeat: | |||
108 | table = crst_table_alloc(mm, mm->context.noexec); | 200 | table = crst_table_alloc(mm, mm->context.noexec); |
109 | if (!table) | 201 | if (!table) |
110 | return -ENOMEM; | 202 | return -ENOMEM; |
111 | spin_lock(&mm->page_table_lock); | 203 | spin_lock_bh(&mm->page_table_lock); |
112 | if (mm->context.asce_limit < limit) { | 204 | if (mm->context.asce_limit < limit) { |
113 | pgd = (unsigned long *) mm->pgd; | 205 | pgd = (unsigned long *) mm->pgd; |
114 | if (mm->context.asce_limit <= (1UL << 31)) { | 206 | if (mm->context.asce_limit <= (1UL << 31)) { |
@@ -130,7 +222,7 @@ repeat: | |||
130 | mm->task_size = mm->context.asce_limit; | 222 | mm->task_size = mm->context.asce_limit; |
131 | table = NULL; | 223 | table = NULL; |
132 | } | 224 | } |
133 | spin_unlock(&mm->page_table_lock); | 225 | spin_unlock_bh(&mm->page_table_lock); |
134 | if (table) | 226 | if (table) |
135 | crst_table_free(mm, table); | 227 | crst_table_free(mm, table); |
136 | if (mm->context.asce_limit < limit) | 228 | if (mm->context.asce_limit < limit) |
@@ -182,7 +274,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
182 | unsigned long bits; | 274 | unsigned long bits; |
183 | 275 | ||
184 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 276 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; |
185 | spin_lock(&mm->context.list_lock); | 277 | spin_lock_bh(&mm->context.list_lock); |
186 | page = NULL; | 278 | page = NULL; |
187 | if (!list_empty(&mm->context.pgtable_list)) { | 279 | if (!list_empty(&mm->context.pgtable_list)) { |
188 | page = list_first_entry(&mm->context.pgtable_list, | 280 | page = list_first_entry(&mm->context.pgtable_list, |
@@ -191,7 +283,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
191 | page = NULL; | 283 | page = NULL; |
192 | } | 284 | } |
193 | if (!page) { | 285 | if (!page) { |
194 | spin_unlock(&mm->context.list_lock); | 286 | spin_unlock_bh(&mm->context.list_lock); |
195 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 287 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
196 | if (!page) | 288 | if (!page) |
197 | return NULL; | 289 | return NULL; |
@@ -202,7 +294,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
202 | clear_table_pgstes(table); | 294 | clear_table_pgstes(table); |
203 | else | 295 | else |
204 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 296 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); |
205 | spin_lock(&mm->context.list_lock); | 297 | spin_lock_bh(&mm->context.list_lock); |
206 | list_add(&page->lru, &mm->context.pgtable_list); | 298 | list_add(&page->lru, &mm->context.pgtable_list); |
207 | } | 299 | } |
208 | table = (unsigned long *) page_to_phys(page); | 300 | table = (unsigned long *) page_to_phys(page); |
@@ -213,10 +305,25 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
213 | page->flags |= bits; | 305 | page->flags |= bits; |
214 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 306 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) |
215 | list_move_tail(&page->lru, &mm->context.pgtable_list); | 307 | list_move_tail(&page->lru, &mm->context.pgtable_list); |
216 | spin_unlock(&mm->context.list_lock); | 308 | spin_unlock_bh(&mm->context.list_lock); |
217 | return table; | 309 | return table; |
218 | } | 310 | } |
219 | 311 | ||
312 | static void __page_table_free(struct mm_struct *mm, unsigned long *table) | ||
313 | { | ||
314 | struct page *page; | ||
315 | unsigned long bits; | ||
316 | |||
317 | bits = ((unsigned long) table) & 15; | ||
318 | table = (unsigned long *)(((unsigned long) table) ^ bits); | ||
319 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
320 | page->flags ^= bits; | ||
321 | if (!(page->flags & FRAG_MASK)) { | ||
322 | pgtable_page_dtor(page); | ||
323 | __free_page(page); | ||
324 | } | ||
325 | } | ||
326 | |||
220 | void page_table_free(struct mm_struct *mm, unsigned long *table) | 327 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
221 | { | 328 | { |
222 | struct page *page; | 329 | struct page *page; |
@@ -225,7 +332,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
225 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 332 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; |
226 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 333 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); |
227 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 334 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
228 | spin_lock(&mm->context.list_lock); | 335 | spin_lock_bh(&mm->context.list_lock); |
229 | page->flags ^= bits; | 336 | page->flags ^= bits; |
230 | if (page->flags & FRAG_MASK) { | 337 | if (page->flags & FRAG_MASK) { |
231 | /* Page now has some free pgtable fragments. */ | 338 | /* Page now has some free pgtable fragments. */ |
@@ -234,18 +341,48 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
234 | } else | 341 | } else |
235 | /* All fragments of the 4K page have been freed. */ | 342 | /* All fragments of the 4K page have been freed. */ |
236 | list_del(&page->lru); | 343 | list_del(&page->lru); |
237 | spin_unlock(&mm->context.list_lock); | 344 | spin_unlock_bh(&mm->context.list_lock); |
238 | if (page) { | 345 | if (page) { |
239 | pgtable_page_dtor(page); | 346 | pgtable_page_dtor(page); |
240 | __free_page(page); | 347 | __free_page(page); |
241 | } | 348 | } |
242 | } | 349 | } |
243 | 350 | ||
351 | void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) | ||
352 | { | ||
353 | struct rcu_table_freelist *batch; | ||
354 | struct page *page; | ||
355 | unsigned long bits; | ||
356 | |||
357 | if (atomic_read(&mm->mm_users) < 2 && | ||
358 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { | ||
359 | page_table_free(mm, table); | ||
360 | return; | ||
361 | } | ||
362 | batch = rcu_table_freelist_get(mm); | ||
363 | if (!batch) { | ||
364 | smp_call_function(smp_sync, NULL, 1); | ||
365 | page_table_free(mm, table); | ||
366 | return; | ||
367 | } | ||
368 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | ||
369 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | ||
370 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
371 | spin_lock_bh(&mm->context.list_lock); | ||
372 | /* Delayed freeing with rcu prevents reuse of pgtable fragments */ | ||
373 | list_del_init(&page->lru); | ||
374 | spin_unlock_bh(&mm->context.list_lock); | ||
375 | table = (unsigned long *)(((unsigned long) table) | bits); | ||
376 | batch->table[batch->pgt_index++] = table; | ||
377 | if (batch->pgt_index >= batch->crst_index) | ||
378 | rcu_table_freelist_finish(); | ||
379 | } | ||
380 | |||
244 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | 381 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) |
245 | { | 382 | { |
246 | struct page *page; | 383 | struct page *page; |
247 | 384 | ||
248 | spin_lock(&mm->context.list_lock); | 385 | spin_lock_bh(&mm->context.list_lock); |
249 | /* Free shadow region and segment tables. */ | 386 | /* Free shadow region and segment tables. */ |
250 | list_for_each_entry(page, &mm->context.crst_list, lru) | 387 | list_for_each_entry(page, &mm->context.crst_list, lru) |
251 | if (page->index) { | 388 | if (page->index) { |
@@ -255,7 +392,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | |||
255 | /* "Free" second halves of page tables. */ | 392 | /* "Free" second halves of page tables. */ |
256 | list_for_each_entry(page, &mm->context.pgtable_list, lru) | 393 | list_for_each_entry(page, &mm->context.pgtable_list, lru) |
257 | page->flags &= ~SECOND_HALVES; | 394 | page->flags &= ~SECOND_HALVES; |
258 | spin_unlock(&mm->context.list_lock); | 395 | spin_unlock_bh(&mm->context.list_lock); |
259 | mm->context.noexec = 0; | 396 | mm->context.noexec = 0; |
260 | update_mm(mm, tsk); | 397 | update_mm(mm, tsk); |
261 | } | 398 | } |