aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm/pgtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r--arch/s390/mm/pgtable.c238
1 files changed, 141 insertions, 97 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8d999249d357..37a23c223705 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -15,6 +15,7 @@
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/quicklist.h> 17#include <linux/quicklist.h>
18#include <linux/rcupdate.h>
18 19
19#include <asm/system.h> 20#include <asm/system.h>
20#include <asm/pgtable.h> 21#include <asm/pgtable.h>
@@ -25,30 +26,10 @@
25 26
26#ifndef CONFIG_64BIT 27#ifndef CONFIG_64BIT
27#define ALLOC_ORDER 1 28#define ALLOC_ORDER 1
28#define TABLES_PER_PAGE 4 29#define FRAG_MASK 0x0f
29#define FRAG_MASK 15UL
30#define SECOND_HALVES 10UL
31
32void clear_table_pgstes(unsigned long *table)
33{
34 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
35 memset(table + 256, 0, PAGE_SIZE/4);
36 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 768, 0, PAGE_SIZE/4);
38}
39
40#else 30#else
41#define ALLOC_ORDER 2 31#define ALLOC_ORDER 2
42#define TABLES_PER_PAGE 2 32#define FRAG_MASK 0x03
43#define FRAG_MASK 3UL
44#define SECOND_HALVES 2UL
45
46void clear_table_pgstes(unsigned long *table)
47{
48 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
49 memset(table + 256, 0, PAGE_SIZE/2);
50}
51
52#endif 33#endif
53 34
54unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; 35unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
@@ -63,37 +44,17 @@ static int __init parse_vmalloc(char *arg)
63} 44}
64early_param("vmalloc", parse_vmalloc); 45early_param("vmalloc", parse_vmalloc);
65 46
66unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 47unsigned long *crst_table_alloc(struct mm_struct *mm)
67{ 48{
68 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 49 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
69 50
70 if (!page) 51 if (!page)
71 return NULL; 52 return NULL;
72 page->index = 0;
73 if (noexec) {
74 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
75 if (!shadow) {
76 __free_pages(page, ALLOC_ORDER);
77 return NULL;
78 }
79 page->index = page_to_phys(shadow);
80 }
81 spin_lock(&mm->context.list_lock);
82 list_add(&page->lru, &mm->context.crst_list);
83 spin_unlock(&mm->context.list_lock);
84 return (unsigned long *) page_to_phys(page); 53 return (unsigned long *) page_to_phys(page);
85} 54}
86 55
87void crst_table_free(struct mm_struct *mm, unsigned long *table) 56void crst_table_free(struct mm_struct *mm, unsigned long *table)
88{ 57{
89 unsigned long *shadow = get_shadow_table(table);
90 struct page *page = virt_to_page(table);
91
92 spin_lock(&mm->context.list_lock);
93 list_del(&page->lru);
94 spin_unlock(&mm->context.list_lock);
95 if (shadow)
96 free_pages((unsigned long) shadow, ALLOC_ORDER);
97 free_pages((unsigned long) table, ALLOC_ORDER); 58 free_pages((unsigned long) table, ALLOC_ORDER);
98} 59}
99 60
@@ -105,10 +66,10 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
105 66
106 BUG_ON(limit > (1UL << 53)); 67 BUG_ON(limit > (1UL << 53));
107repeat: 68repeat:
108 table = crst_table_alloc(mm, mm->context.noexec); 69 table = crst_table_alloc(mm);
109 if (!table) 70 if (!table)
110 return -ENOMEM; 71 return -ENOMEM;
111 spin_lock(&mm->page_table_lock); 72 spin_lock_bh(&mm->page_table_lock);
112 if (mm->context.asce_limit < limit) { 73 if (mm->context.asce_limit < limit) {
113 pgd = (unsigned long *) mm->pgd; 74 pgd = (unsigned long *) mm->pgd;
114 if (mm->context.asce_limit <= (1UL << 31)) { 75 if (mm->context.asce_limit <= (1UL << 31)) {
@@ -130,7 +91,7 @@ repeat:
130 mm->task_size = mm->context.asce_limit; 91 mm->task_size = mm->context.asce_limit;
131 table = NULL; 92 table = NULL;
132 } 93 }
133 spin_unlock(&mm->page_table_lock); 94 spin_unlock_bh(&mm->page_table_lock);
134 if (table) 95 if (table)
135 crst_table_free(mm, table); 96 crst_table_free(mm, table);
136 if (mm->context.asce_limit < limit) 97 if (mm->context.asce_limit < limit)
@@ -172,94 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
172} 133}
173#endif 134#endif
174 135
136static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
137{
138 unsigned int old, new;
139
140 do {
141 old = atomic_read(v);
142 new = old ^ bits;
143 } while (atomic_cmpxchg(v, old, new) != old);
144 return new;
145}
146
175/* 147/*
176 * page table entry allocation/free routines. 148 * page table entry allocation/free routines.
177 */ 149 */
150#ifdef CONFIG_PGSTE
151static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
152{
153 struct page *page;
154 unsigned long *table;
155
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157 if (!page)
158 return NULL;
159 pgtable_page_ctor(page);
160 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
164 return table;
165}
166
167static inline void page_table_free_pgste(unsigned long *table)
168{
169 struct page *page;
170
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1);
174 __free_page(page);
175}
176#endif
177
178unsigned long *page_table_alloc(struct mm_struct *mm) 178unsigned long *page_table_alloc(struct mm_struct *mm)
179{ 179{
180 struct page *page; 180 struct page *page;
181 unsigned long *table; 181 unsigned long *table;
182 unsigned long bits; 182 unsigned int mask, bit;
183 183
184 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 184#ifdef CONFIG_PGSTE
185 spin_lock(&mm->context.list_lock); 185 if (mm_has_pgste(mm))
186 page = NULL; 186 return page_table_alloc_pgste(mm);
187#endif
188 /* Allocate fragments of a 4K page as 1K/2K page table */
189 spin_lock_bh(&mm->context.list_lock);
190 mask = FRAG_MASK;
187 if (!list_empty(&mm->context.pgtable_list)) { 191 if (!list_empty(&mm->context.pgtable_list)) {
188 page = list_first_entry(&mm->context.pgtable_list, 192 page = list_first_entry(&mm->context.pgtable_list,
189 struct page, lru); 193 struct page, lru);
190 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 194 table = (unsigned long *) page_to_phys(page);
191 page = NULL; 195 mask = atomic_read(&page->_mapcount);
196 mask = mask | (mask >> 4);
192 } 197 }
193 if (!page) { 198 if ((mask & FRAG_MASK) == FRAG_MASK) {
194 spin_unlock(&mm->context.list_lock); 199 spin_unlock_bh(&mm->context.list_lock);
195 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 200 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
196 if (!page) 201 if (!page)
197 return NULL; 202 return NULL;
198 pgtable_page_ctor(page); 203 pgtable_page_ctor(page);
199 page->flags &= ~FRAG_MASK; 204 atomic_set(&page->_mapcount, 1);
200 table = (unsigned long *) page_to_phys(page); 205 table = (unsigned long *) page_to_phys(page);
201 if (mm->context.has_pgste) 206 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
202 clear_table_pgstes(table); 207 spin_lock_bh(&mm->context.list_lock);
203 else
204 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
205 spin_lock(&mm->context.list_lock);
206 list_add(&page->lru, &mm->context.pgtable_list); 208 list_add(&page->lru, &mm->context.pgtable_list);
209 } else {
210 for (bit = 1; mask & bit; bit <<= 1)
211 table += PTRS_PER_PTE;
212 mask = atomic_xor_bits(&page->_mapcount, bit);
213 if ((mask & FRAG_MASK) == FRAG_MASK)
214 list_del(&page->lru);
207 } 215 }
208 table = (unsigned long *) page_to_phys(page); 216 spin_unlock_bh(&mm->context.list_lock);
209 while (page->flags & bits) {
210 table += 256;
211 bits <<= 1;
212 }
213 page->flags |= bits;
214 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
215 list_move_tail(&page->lru, &mm->context.pgtable_list);
216 spin_unlock(&mm->context.list_lock);
217 return table; 217 return table;
218} 218}
219 219
220void page_table_free(struct mm_struct *mm, unsigned long *table) 220void page_table_free(struct mm_struct *mm, unsigned long *table)
221{ 221{
222 struct page *page; 222 struct page *page;
223 unsigned long bits; 223 unsigned int bit, mask;
224 224
225 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 225#ifdef CONFIG_PGSTE
226 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 226 if (mm_has_pgste(mm))
227 return page_table_free_pgste(table);
228#endif
229 /* Free 1K/2K page table fragment of a 4K page */
227 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
228 spin_lock(&mm->context.list_lock); 231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
229 page->flags ^= bits; 232 spin_lock_bh(&mm->context.list_lock);
230 if (page->flags & FRAG_MASK) { 233 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
231 /* Page now has some free pgtable fragments. */
232 list_move(&page->lru, &mm->context.pgtable_list);
233 page = NULL;
234 } else
235 /* All fragments of the 4K page have been freed. */
236 list_del(&page->lru); 234 list_del(&page->lru);
237 spin_unlock(&mm->context.list_lock); 235 mask = atomic_xor_bits(&page->_mapcount, bit);
238 if (page) { 236 if (mask & FRAG_MASK)
237 list_add(&page->lru, &mm->context.pgtable_list);
238 spin_unlock_bh(&mm->context.list_lock);
239 if (mask == 0) {
239 pgtable_page_dtor(page); 240 pgtable_page_dtor(page);
241 atomic_set(&page->_mapcount, -1);
240 __free_page(page); 242 __free_page(page);
241 } 243 }
242} 244}
243 245
244void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) 246#ifdef CONFIG_HAVE_RCU_TABLE_FREE
247
248static void __page_table_free_rcu(void *table, unsigned bit)
245{ 249{
246 struct page *page; 250 struct page *page;
247 251
248 spin_lock(&mm->context.list_lock); 252#ifdef CONFIG_PGSTE
249 /* Free shadow region and segment tables. */ 253 if (bit == FRAG_MASK)
250 list_for_each_entry(page, &mm->context.crst_list, lru) 254 return page_table_free_pgste(table);
251 if (page->index) { 255#endif
252 free_pages((unsigned long) page->index, ALLOC_ORDER); 256 /* Free 1K/2K page table fragment of a 4K page */
253 page->index = 0; 257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
254 } 258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
255 /* "Free" second halves of page tables. */ 259 pgtable_page_dtor(page);
256 list_for_each_entry(page, &mm->context.pgtable_list, lru) 260 atomic_set(&page->_mapcount, -1);
257 page->flags &= ~SECOND_HALVES; 261 __free_page(page);
258 spin_unlock(&mm->context.list_lock); 262 }
259 mm->context.noexec = 0;
260 update_mm(mm, tsk);
261} 263}
262 264
265void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
266{
267 struct mm_struct *mm;
268 struct page *page;
269 unsigned int bit, mask;
270
271 mm = tlb->mm;
272#ifdef CONFIG_PGSTE
273 if (mm_has_pgste(mm)) {
274 table = (unsigned long *) (__pa(table) | FRAG_MASK);
275 tlb_remove_table(tlb, table);
276 return;
277 }
278#endif
279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
281 spin_lock_bh(&mm->context.list_lock);
282 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
283 list_del(&page->lru);
284 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285 if (mask & FRAG_MASK)
286 list_add_tail(&page->lru, &mm->context.pgtable_list);
287 spin_unlock_bh(&mm->context.list_lock);
288 table = (unsigned long *) (__pa(table) | (bit << 4));
289 tlb_remove_table(tlb, table);
290}
291
292void __tlb_remove_table(void *_table)
293{
294 void *table = (void *)((unsigned long) _table & PAGE_MASK);
295 unsigned type = (unsigned long) _table & ~PAGE_MASK;
296
297 if (type)
298 __page_table_free_rcu(table, type);
299 else
300 free_pages((unsigned long) table, ALLOC_ORDER);
301}
302
303#endif
304
263/* 305/*
264 * switch on pgstes for its userspace process (for kvm) 306 * switch on pgstes for its userspace process (for kvm)
265 */ 307 */
@@ -273,7 +315,7 @@ int s390_enable_sie(void)
273 return -EINVAL; 315 return -EINVAL;
274 316
275 /* Do we have pgstes? if yes, we are done */ 317 /* Do we have pgstes? if yes, we are done */
276 if (tsk->mm->context.has_pgste) 318 if (mm_has_pgste(tsk->mm))
277 return 0; 319 return 0;
278 320
279 /* lets check if we are allowed to replace the mm */ 321 /* lets check if we are allowed to replace the mm */
@@ -312,6 +354,8 @@ int s390_enable_sie(void)
312 tsk->mm = tsk->active_mm = mm; 354 tsk->mm = tsk->active_mm = mm;
313 preempt_disable(); 355 preempt_disable();
314 update_mm(mm, tsk); 356 update_mm(mm, tsk);
357 atomic_inc(&mm->context.attach_count);
358 atomic_dec(&old_mm->context.attach_count);
315 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); 359 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
316 preempt_enable(); 360 preempt_enable();
317 task_unlock(tsk); 361 task_unlock(tsk);