diff options
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r-- | arch/s390/mm/pgtable.c | 238 |
1 files changed, 141 insertions, 97 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8d999249d357..37a23c223705 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/quicklist.h> | 17 | #include <linux/quicklist.h> |
18 | #include <linux/rcupdate.h> | ||
18 | 19 | ||
19 | #include <asm/system.h> | 20 | #include <asm/system.h> |
20 | #include <asm/pgtable.h> | 21 | #include <asm/pgtable.h> |
@@ -25,30 +26,10 @@ | |||
25 | 26 | ||
26 | #ifndef CONFIG_64BIT | 27 | #ifndef CONFIG_64BIT |
27 | #define ALLOC_ORDER 1 | 28 | #define ALLOC_ORDER 1 |
28 | #define TABLES_PER_PAGE 4 | 29 | #define FRAG_MASK 0x0f |
29 | #define FRAG_MASK 15UL | ||
30 | #define SECOND_HALVES 10UL | ||
31 | |||
32 | void clear_table_pgstes(unsigned long *table) | ||
33 | { | ||
34 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
35 | memset(table + 256, 0, PAGE_SIZE/4); | ||
36 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
37 | memset(table + 768, 0, PAGE_SIZE/4); | ||
38 | } | ||
39 | |||
40 | #else | 30 | #else |
41 | #define ALLOC_ORDER 2 | 31 | #define ALLOC_ORDER 2 |
42 | #define TABLES_PER_PAGE 2 | 32 | #define FRAG_MASK 0x03 |
43 | #define FRAG_MASK 3UL | ||
44 | #define SECOND_HALVES 2UL | ||
45 | |||
46 | void clear_table_pgstes(unsigned long *table) | ||
47 | { | ||
48 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
49 | memset(table + 256, 0, PAGE_SIZE/2); | ||
50 | } | ||
51 | |||
52 | #endif | 33 | #endif |
53 | 34 | ||
54 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; | 35 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; |
@@ -63,37 +44,17 @@ static int __init parse_vmalloc(char *arg) | |||
63 | } | 44 | } |
64 | early_param("vmalloc", parse_vmalloc); | 45 | early_param("vmalloc", parse_vmalloc); |
65 | 46 | ||
66 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | 47 | unsigned long *crst_table_alloc(struct mm_struct *mm) |
67 | { | 48 | { |
68 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 49 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
69 | 50 | ||
70 | if (!page) | 51 | if (!page) |
71 | return NULL; | 52 | return NULL; |
72 | page->index = 0; | ||
73 | if (noexec) { | ||
74 | struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | ||
75 | if (!shadow) { | ||
76 | __free_pages(page, ALLOC_ORDER); | ||
77 | return NULL; | ||
78 | } | ||
79 | page->index = page_to_phys(shadow); | ||
80 | } | ||
81 | spin_lock(&mm->context.list_lock); | ||
82 | list_add(&page->lru, &mm->context.crst_list); | ||
83 | spin_unlock(&mm->context.list_lock); | ||
84 | return (unsigned long *) page_to_phys(page); | 53 | return (unsigned long *) page_to_phys(page); |
85 | } | 54 | } |
86 | 55 | ||
87 | void crst_table_free(struct mm_struct *mm, unsigned long *table) | 56 | void crst_table_free(struct mm_struct *mm, unsigned long *table) |
88 | { | 57 | { |
89 | unsigned long *shadow = get_shadow_table(table); | ||
90 | struct page *page = virt_to_page(table); | ||
91 | |||
92 | spin_lock(&mm->context.list_lock); | ||
93 | list_del(&page->lru); | ||
94 | spin_unlock(&mm->context.list_lock); | ||
95 | if (shadow) | ||
96 | free_pages((unsigned long) shadow, ALLOC_ORDER); | ||
97 | free_pages((unsigned long) table, ALLOC_ORDER); | 58 | free_pages((unsigned long) table, ALLOC_ORDER); |
98 | } | 59 | } |
99 | 60 | ||
@@ -105,10 +66,10 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | |||
105 | 66 | ||
106 | BUG_ON(limit > (1UL << 53)); | 67 | BUG_ON(limit > (1UL << 53)); |
107 | repeat: | 68 | repeat: |
108 | table = crst_table_alloc(mm, mm->context.noexec); | 69 | table = crst_table_alloc(mm); |
109 | if (!table) | 70 | if (!table) |
110 | return -ENOMEM; | 71 | return -ENOMEM; |
111 | spin_lock(&mm->page_table_lock); | 72 | spin_lock_bh(&mm->page_table_lock); |
112 | if (mm->context.asce_limit < limit) { | 73 | if (mm->context.asce_limit < limit) { |
113 | pgd = (unsigned long *) mm->pgd; | 74 | pgd = (unsigned long *) mm->pgd; |
114 | if (mm->context.asce_limit <= (1UL << 31)) { | 75 | if (mm->context.asce_limit <= (1UL << 31)) { |
@@ -130,7 +91,7 @@ repeat: | |||
130 | mm->task_size = mm->context.asce_limit; | 91 | mm->task_size = mm->context.asce_limit; |
131 | table = NULL; | 92 | table = NULL; |
132 | } | 93 | } |
133 | spin_unlock(&mm->page_table_lock); | 94 | spin_unlock_bh(&mm->page_table_lock); |
134 | if (table) | 95 | if (table) |
135 | crst_table_free(mm, table); | 96 | crst_table_free(mm, table); |
136 | if (mm->context.asce_limit < limit) | 97 | if (mm->context.asce_limit < limit) |
@@ -172,94 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |||
172 | } | 133 | } |
173 | #endif | 134 | #endif |
174 | 135 | ||
136 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | ||
137 | { | ||
138 | unsigned int old, new; | ||
139 | |||
140 | do { | ||
141 | old = atomic_read(v); | ||
142 | new = old ^ bits; | ||
143 | } while (atomic_cmpxchg(v, old, new) != old); | ||
144 | return new; | ||
145 | } | ||
146 | |||
175 | /* | 147 | /* |
176 | * page table entry allocation/free routines. | 148 | * page table entry allocation/free routines. |
177 | */ | 149 | */ |
150 | #ifdef CONFIG_PGSTE | ||
151 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) | ||
152 | { | ||
153 | struct page *page; | ||
154 | unsigned long *table; | ||
155 | |||
156 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | ||
157 | if (!page) | ||
158 | return NULL; | ||
159 | pgtable_page_ctor(page); | ||
160 | atomic_set(&page->_mapcount, 3); | ||
161 | table = (unsigned long *) page_to_phys(page); | ||
162 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
163 | clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); | ||
164 | return table; | ||
165 | } | ||
166 | |||
167 | static inline void page_table_free_pgste(unsigned long *table) | ||
168 | { | ||
169 | struct page *page; | ||
170 | |||
171 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
172 | pgtable_page_ctor(page); | ||
173 | atomic_set(&page->_mapcount, -1); | ||
174 | __free_page(page); | ||
175 | } | ||
176 | #endif | ||
177 | |||
178 | unsigned long *page_table_alloc(struct mm_struct *mm) | 178 | unsigned long *page_table_alloc(struct mm_struct *mm) |
179 | { | 179 | { |
180 | struct page *page; | 180 | struct page *page; |
181 | unsigned long *table; | 181 | unsigned long *table; |
182 | unsigned long bits; | 182 | unsigned int mask, bit; |
183 | 183 | ||
184 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 184 | #ifdef CONFIG_PGSTE |
185 | spin_lock(&mm->context.list_lock); | 185 | if (mm_has_pgste(mm)) |
186 | page = NULL; | 186 | return page_table_alloc_pgste(mm); |
187 | #endif | ||
188 | /* Allocate fragments of a 4K page as 1K/2K page table */ | ||
189 | spin_lock_bh(&mm->context.list_lock); | ||
190 | mask = FRAG_MASK; | ||
187 | if (!list_empty(&mm->context.pgtable_list)) { | 191 | if (!list_empty(&mm->context.pgtable_list)) { |
188 | page = list_first_entry(&mm->context.pgtable_list, | 192 | page = list_first_entry(&mm->context.pgtable_list, |
189 | struct page, lru); | 193 | struct page, lru); |
190 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 194 | table = (unsigned long *) page_to_phys(page); |
191 | page = NULL; | 195 | mask = atomic_read(&page->_mapcount); |
196 | mask = mask | (mask >> 4); | ||
192 | } | 197 | } |
193 | if (!page) { | 198 | if ((mask & FRAG_MASK) == FRAG_MASK) { |
194 | spin_unlock(&mm->context.list_lock); | 199 | spin_unlock_bh(&mm->context.list_lock); |
195 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 200 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
196 | if (!page) | 201 | if (!page) |
197 | return NULL; | 202 | return NULL; |
198 | pgtable_page_ctor(page); | 203 | pgtable_page_ctor(page); |
199 | page->flags &= ~FRAG_MASK; | 204 | atomic_set(&page->_mapcount, 1); |
200 | table = (unsigned long *) page_to_phys(page); | 205 | table = (unsigned long *) page_to_phys(page); |
201 | if (mm->context.has_pgste) | 206 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); |
202 | clear_table_pgstes(table); | 207 | spin_lock_bh(&mm->context.list_lock); |
203 | else | ||
204 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | ||
205 | spin_lock(&mm->context.list_lock); | ||
206 | list_add(&page->lru, &mm->context.pgtable_list); | 208 | list_add(&page->lru, &mm->context.pgtable_list); |
209 | } else { | ||
210 | for (bit = 1; mask & bit; bit <<= 1) | ||
211 | table += PTRS_PER_PTE; | ||
212 | mask = atomic_xor_bits(&page->_mapcount, bit); | ||
213 | if ((mask & FRAG_MASK) == FRAG_MASK) | ||
214 | list_del(&page->lru); | ||
207 | } | 215 | } |
208 | table = (unsigned long *) page_to_phys(page); | 216 | spin_unlock_bh(&mm->context.list_lock); |
209 | while (page->flags & bits) { | ||
210 | table += 256; | ||
211 | bits <<= 1; | ||
212 | } | ||
213 | page->flags |= bits; | ||
214 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | ||
215 | list_move_tail(&page->lru, &mm->context.pgtable_list); | ||
216 | spin_unlock(&mm->context.list_lock); | ||
217 | return table; | 217 | return table; |
218 | } | 218 | } |
219 | 219 | ||
220 | void page_table_free(struct mm_struct *mm, unsigned long *table) | 220 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
221 | { | 221 | { |
222 | struct page *page; | 222 | struct page *page; |
223 | unsigned long bits; | 223 | unsigned int bit, mask; |
224 | 224 | ||
225 | bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 225 | #ifdef CONFIG_PGSTE |
226 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 226 | if (mm_has_pgste(mm)) |
227 | return page_table_free_pgste(table); | ||
228 | #endif | ||
229 | /* Free 1K/2K page table fragment of a 4K page */ | ||
227 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 230 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
228 | spin_lock(&mm->context.list_lock); | 231 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
229 | page->flags ^= bits; | 232 | spin_lock_bh(&mm->context.list_lock); |
230 | if (page->flags & FRAG_MASK) { | 233 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
231 | /* Page now has some free pgtable fragments. */ | ||
232 | list_move(&page->lru, &mm->context.pgtable_list); | ||
233 | page = NULL; | ||
234 | } else | ||
235 | /* All fragments of the 4K page have been freed. */ | ||
236 | list_del(&page->lru); | 234 | list_del(&page->lru); |
237 | spin_unlock(&mm->context.list_lock); | 235 | mask = atomic_xor_bits(&page->_mapcount, bit); |
238 | if (page) { | 236 | if (mask & FRAG_MASK) |
237 | list_add(&page->lru, &mm->context.pgtable_list); | ||
238 | spin_unlock_bh(&mm->context.list_lock); | ||
239 | if (mask == 0) { | ||
239 | pgtable_page_dtor(page); | 240 | pgtable_page_dtor(page); |
241 | atomic_set(&page->_mapcount, -1); | ||
240 | __free_page(page); | 242 | __free_page(page); |
241 | } | 243 | } |
242 | } | 244 | } |
243 | 245 | ||
244 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | 246 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
247 | |||
248 | static void __page_table_free_rcu(void *table, unsigned bit) | ||
245 | { | 249 | { |
246 | struct page *page; | 250 | struct page *page; |
247 | 251 | ||
248 | spin_lock(&mm->context.list_lock); | 252 | #ifdef CONFIG_PGSTE |
249 | /* Free shadow region and segment tables. */ | 253 | if (bit == FRAG_MASK) |
250 | list_for_each_entry(page, &mm->context.crst_list, lru) | 254 | return page_table_free_pgste(table); |
251 | if (page->index) { | 255 | #endif |
252 | free_pages((unsigned long) page->index, ALLOC_ORDER); | 256 | /* Free 1K/2K page table fragment of a 4K page */ |
253 | page->index = 0; | 257 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
254 | } | 258 | if (atomic_xor_bits(&page->_mapcount, bit) == 0) { |
255 | /* "Free" second halves of page tables. */ | 259 | pgtable_page_dtor(page); |
256 | list_for_each_entry(page, &mm->context.pgtable_list, lru) | 260 | atomic_set(&page->_mapcount, -1); |
257 | page->flags &= ~SECOND_HALVES; | 261 | __free_page(page); |
258 | spin_unlock(&mm->context.list_lock); | 262 | } |
259 | mm->context.noexec = 0; | ||
260 | update_mm(mm, tsk); | ||
261 | } | 263 | } |
262 | 264 | ||
265 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | ||
266 | { | ||
267 | struct mm_struct *mm; | ||
268 | struct page *page; | ||
269 | unsigned int bit, mask; | ||
270 | |||
271 | mm = tlb->mm; | ||
272 | #ifdef CONFIG_PGSTE | ||
273 | if (mm_has_pgste(mm)) { | ||
274 | table = (unsigned long *) (__pa(table) | FRAG_MASK); | ||
275 | tlb_remove_table(tlb, table); | ||
276 | return; | ||
277 | } | ||
278 | #endif | ||
279 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); | ||
280 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
281 | spin_lock_bh(&mm->context.list_lock); | ||
282 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | ||
283 | list_del(&page->lru); | ||
284 | mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); | ||
285 | if (mask & FRAG_MASK) | ||
286 | list_add_tail(&page->lru, &mm->context.pgtable_list); | ||
287 | spin_unlock_bh(&mm->context.list_lock); | ||
288 | table = (unsigned long *) (__pa(table) | (bit << 4)); | ||
289 | tlb_remove_table(tlb, table); | ||
290 | } | ||
291 | |||
292 | void __tlb_remove_table(void *_table) | ||
293 | { | ||
294 | void *table = (void *)((unsigned long) _table & PAGE_MASK); | ||
295 | unsigned type = (unsigned long) _table & ~PAGE_MASK; | ||
296 | |||
297 | if (type) | ||
298 | __page_table_free_rcu(table, type); | ||
299 | else | ||
300 | free_pages((unsigned long) table, ALLOC_ORDER); | ||
301 | } | ||
302 | |||
303 | #endif | ||
304 | |||
263 | /* | 305 | /* |
264 | * switch on pgstes for its userspace process (for kvm) | 306 | * switch on pgstes for its userspace process (for kvm) |
265 | */ | 307 | */ |
@@ -273,7 +315,7 @@ int s390_enable_sie(void) | |||
273 | return -EINVAL; | 315 | return -EINVAL; |
274 | 316 | ||
275 | /* Do we have pgstes? if yes, we are done */ | 317 | /* Do we have pgstes? if yes, we are done */ |
276 | if (tsk->mm->context.has_pgste) | 318 | if (mm_has_pgste(tsk->mm)) |
277 | return 0; | 319 | return 0; |
278 | 320 | ||
279 | /* lets check if we are allowed to replace the mm */ | 321 | /* lets check if we are allowed to replace the mm */ |
@@ -312,6 +354,8 @@ int s390_enable_sie(void) | |||
312 | tsk->mm = tsk->active_mm = mm; | 354 | tsk->mm = tsk->active_mm = mm; |
313 | preempt_disable(); | 355 | preempt_disable(); |
314 | update_mm(mm, tsk); | 356 | update_mm(mm, tsk); |
357 | atomic_inc(&mm->context.attach_count); | ||
358 | atomic_dec(&old_mm->context.attach_count); | ||
315 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | 359 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); |
316 | preempt_enable(); | 360 | preempt_enable(); |
317 | task_unlock(tsk); | 361 | task_unlock(tsk); |