aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2011-07-24 04:48:20 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2011-07-24 04:48:21 -0400
commite5992f2e6c3829cd43dbc4438ee13dcd6506f7f3 (patch)
treeb2d5d9fbfc610bd788532eafcd4b56e9ef7dbdd3 /arch
parent144d634a21caff1d54cb4bb0d073774e88130045 (diff)
[S390] kvm guest address space mapping
Add code that allows KVM to control the virtual memory layout that is seen by a guest. The guest address space uses a second page table that shares the last level pte-tables with the process page table. If a page is unmapped from the process page table it is automatically unmapped from the guest page table as well. The guest address space mapping starts out empty, KVM can map any individual 1MB segments from the process virtual memory to any 1MB aligned location in the guest virtual memory. If a target segment in the process virtual memory does not exist or is unmapped while a guest mapping exists the desired target address is stored as an invalid segment table entry in the guest page table. The population of the guest page table is fault driven. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/include/asm/lowcore.h2
-rw-r--r--arch/s390/include/asm/mmu.h4
-rw-r--r--arch/s390/include/asm/pgalloc.h7
-rw-r--r--arch/s390/include/asm/pgtable.h42
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/include/asm/tlbflush.h2
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/mm/fault.c18
-rw-r--r--arch/s390/mm/hugetlbpage.c2
-rw-r--r--arch/s390/mm/pgtable.c421
-rw-r--r--arch/s390/mm/vmem.c8
11 files changed, 473 insertions, 36 deletions
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 228cf0b295db..f26280d9e88d 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -268,7 +268,7 @@ struct _lowcore {
268 __u64 vdso_per_cpu_data; /* 0x0358 */ 268 __u64 vdso_per_cpu_data; /* 0x0358 */
269 __u64 machine_flags; /* 0x0360 */ 269 __u64 machine_flags; /* 0x0360 */
270 __u64 ftrace_func; /* 0x0368 */ 270 __u64 ftrace_func; /* 0x0368 */
271 __u64 sie_hook; /* 0x0370 */ 271 __u64 gmap; /* 0x0370 */
272 __u64 cmf_hpp; /* 0x0378 */ 272 __u64 cmf_hpp; /* 0x0378 */
273 273
274 /* Interrupt response block. */ 274 /* Interrupt response block. */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 82d0847896a0..4506791adcd5 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -6,6 +6,7 @@ typedef struct {
6 unsigned int flush_mm; 6 unsigned int flush_mm;
7 spinlock_t list_lock; 7 spinlock_t list_lock;
8 struct list_head pgtable_list; 8 struct list_head pgtable_list;
9 struct list_head gmap_list;
9 unsigned long asce_bits; 10 unsigned long asce_bits;
10 unsigned long asce_limit; 11 unsigned long asce_limit;
11 unsigned long vdso_base; 12 unsigned long vdso_base;
@@ -17,6 +18,7 @@ typedef struct {
17 18
18#define INIT_MM_CONTEXT(name) \ 19#define INIT_MM_CONTEXT(name) \
19 .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ 20 .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \
20 .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), 21 .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
22 .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
21 23
22#endif 24#endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 38e71ebcd3c2..8eef9b5b3cf4 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -20,7 +20,7 @@
20unsigned long *crst_table_alloc(struct mm_struct *); 20unsigned long *crst_table_alloc(struct mm_struct *);
21void crst_table_free(struct mm_struct *, unsigned long *); 21void crst_table_free(struct mm_struct *, unsigned long *);
22 22
23unsigned long *page_table_alloc(struct mm_struct *); 23unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
24void page_table_free(struct mm_struct *, unsigned long *); 24void page_table_free(struct mm_struct *, unsigned long *);
25#ifdef CONFIG_HAVE_RCU_TABLE_FREE 25#ifdef CONFIG_HAVE_RCU_TABLE_FREE
26void page_table_free_rcu(struct mmu_gather *, unsigned long *); 26void page_table_free_rcu(struct mmu_gather *, unsigned long *);
@@ -115,6 +115,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
115{ 115{
116 spin_lock_init(&mm->context.list_lock); 116 spin_lock_init(&mm->context.list_lock);
117 INIT_LIST_HEAD(&mm->context.pgtable_list); 117 INIT_LIST_HEAD(&mm->context.pgtable_list);
118 INIT_LIST_HEAD(&mm->context.gmap_list);
118 return (pgd_t *) crst_table_alloc(mm); 119 return (pgd_t *) crst_table_alloc(mm);
119} 120}
120#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) 121#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
@@ -133,8 +134,8 @@ static inline void pmd_populate(struct mm_struct *mm,
133/* 134/*
134 * page table entry allocation/free routines. 135 * page table entry allocation/free routines.
135 */ 136 */
136#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) 137#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
137#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) 138#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
138 139
139#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) 140#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
140#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) 141#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 801fbe1d837d..519eb5f187ef 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -654,6 +654,48 @@ static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
654#endif 654#endif
655} 655}
656 656
657/**
658 * struct gmap_struct - guest address space
659 * @mm: pointer to the parent mm_struct
660 * @table: pointer to the page directory
661 * @crst_list: list of all crst tables used in the guest address space
662 */
663struct gmap {
664 struct list_head list;
665 struct mm_struct *mm;
666 unsigned long *table;
667 struct list_head crst_list;
668};
669
670/**
671 * struct gmap_rmap - reverse mapping for segment table entries
672 * @next: pointer to the next gmap_rmap structure in the list
673 * @entry: pointer to a segment table entry
674 */
675struct gmap_rmap {
676 struct list_head list;
677 unsigned long *entry;
678};
679
680/**
681 * struct gmap_pgtable - gmap information attached to a page table
682 * @vmaddr: address of the 1MB segment in the process virtual memory
683 * @mapper: list of segment table entries maping a page table
684 */
685struct gmap_pgtable {
686 unsigned long vmaddr;
687 struct list_head mapper;
688};
689
690struct gmap *gmap_alloc(struct mm_struct *mm);
691void gmap_free(struct gmap *gmap);
692void gmap_enable(struct gmap *gmap);
693void gmap_disable(struct gmap *gmap);
694int gmap_map_segment(struct gmap *gmap, unsigned long from,
695 unsigned long to, unsigned long length);
696int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
697unsigned long gmap_fault(unsigned long address, struct gmap *);
698
657/* 699/*
658 * Certain architectures need to do special things when PTEs 700 * Certain architectures need to do special things when PTEs
659 * within a page table are directly modified. Thus, the following 701 * within a page table are directly modified. Thus, the following
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 1300c3025334..55dfcc8bdc0d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -80,6 +80,7 @@ struct thread_struct {
80 mm_segment_t mm_segment; 80 mm_segment_t mm_segment;
81 unsigned long prot_addr; /* address of protection-excep. */ 81 unsigned long prot_addr; /* address of protection-excep. */
82 unsigned int trap_no; 82 unsigned int trap_no;
83 unsigned long gmap_addr; /* address of last gmap fault. */
83 struct per_regs per_user; /* User specified PER registers */ 84 struct per_regs per_user; /* User specified PER registers */
84 struct per_event per_event; /* Cause of the last PER trap */ 85 struct per_event per_event; /* Cause of the last PER trap */
85 /* pfault_wait is used to block the process on a pfault event */ 86 /* pfault_wait is used to block the process on a pfault event */
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index b7a4f2eb0057..304445382382 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -80,7 +80,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
80 * on all cpus instead of doing a local flush if the mm 80 * on all cpus instead of doing a local flush if the mm
81 * only ran on the local cpu. 81 * only ran on the local cpu.
82 */ 82 */
83 if (MACHINE_HAS_IDTE) 83 if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
84 __tlb_flush_idte((unsigned long) mm->pgd | 84 __tlb_flush_idte((unsigned long) mm->pgd |
85 mm->context.asce_bits); 85 mm->context.asce_bits);
86 else 86 else
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index edfbd17d7082..05d8f38734ec 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -151,7 +151,7 @@ int main(void)
151 DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); 151 DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
152 DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); 152 DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
153 DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); 153 DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
154 DEFINE(__LC_SIE_HOOK, offsetof(struct _lowcore, sie_hook)); 154 DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
155 DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); 155 DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp));
156#endif /* CONFIG_32BIT */ 156#endif /* CONFIG_32BIT */
157 return 0; 157 return 0;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 095f782a5512..9564fc779b27 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -303,9 +303,24 @@ static inline int do_exception(struct pt_regs *regs, int access,
303 flags = FAULT_FLAG_ALLOW_RETRY; 303 flags = FAULT_FLAG_ALLOW_RETRY;
304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) 304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
305 flags |= FAULT_FLAG_WRITE; 305 flags |= FAULT_FLAG_WRITE;
306retry:
307 down_read(&mm->mmap_sem); 306 down_read(&mm->mmap_sem);
308 307
308#ifdef CONFIG_PGSTE
309 if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
310 address = gmap_fault(address,
311 (struct gmap *) S390_lowcore.gmap);
312 if (address == -EFAULT) {
313 fault = VM_FAULT_BADMAP;
314 goto out_up;
315 }
316 if (address == -ENOMEM) {
317 fault = VM_FAULT_OOM;
318 goto out_up;
319 }
320 }
321#endif
322
323retry:
309 fault = VM_FAULT_BADMAP; 324 fault = VM_FAULT_BADMAP;
310 vma = find_vma(mm, address); 325 vma = find_vma(mm, address);
311 if (!vma) 326 if (!vma)
@@ -356,6 +371,7 @@ retry:
356 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 371 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
357 * of starvation. */ 372 * of starvation. */
358 flags &= ~FAULT_FLAG_ALLOW_RETRY; 373 flags &= ~FAULT_FLAG_ALLOW_RETRY;
374 down_read(&mm->mmap_sem);
359 goto retry; 375 goto retry;
360 } 376 }
361 } 377 }
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index a4d856db9154..597bb2d27c3c 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -35,7 +35,7 @@ int arch_prepare_hugepage(struct page *page)
35 if (MACHINE_HAS_HPAGE) 35 if (MACHINE_HAS_HPAGE)
36 return 0; 36 return 0;
37 37
38 ptep = (pte_t *) pte_alloc_one(&init_mm, address); 38 ptep = (pte_t *) pte_alloc_one(&init_mm, addr);
39 if (!ptep) 39 if (!ptep)
40 return -ENOMEM; 40 return -ENOMEM;
41 41
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 37a23c223705..2adb23938a7f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -16,6 +16,7 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/quicklist.h> 17#include <linux/quicklist.h>
18#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
19#include <linux/slab.h>
19 20
20#include <asm/system.h> 21#include <asm/system.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
@@ -133,30 +134,374 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
133} 134}
134#endif 135#endif
135 136
136static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 137#ifdef CONFIG_PGSTE
138
139/**
140 * gmap_alloc - allocate a guest address space
141 * @mm: pointer to the parent mm_struct
142 *
143 * Returns a guest address space structure.
144 */
145struct gmap *gmap_alloc(struct mm_struct *mm)
137{ 146{
138 unsigned int old, new; 147 struct gmap *gmap;
148 struct page *page;
149 unsigned long *table;
139 150
140 do { 151 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
141 old = atomic_read(v); 152 if (!gmap)
142 new = old ^ bits; 153 goto out;
143 } while (atomic_cmpxchg(v, old, new) != old); 154 INIT_LIST_HEAD(&gmap->crst_list);
144 return new; 155 gmap->mm = mm;
156 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
157 if (!page)
158 goto out_free;
159 list_add(&page->lru, &gmap->crst_list);
160 table = (unsigned long *) page_to_phys(page);
161 crst_table_init(table, _REGION1_ENTRY_EMPTY);
162 gmap->table = table;
163 list_add(&gmap->list, &mm->context.gmap_list);
164 return gmap;
165
166out_free:
167 kfree(gmap);
168out:
169 return NULL;
145} 170}
171EXPORT_SYMBOL_GPL(gmap_alloc);
146 172
147/* 173static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
148 * page table entry allocation/free routines. 174{
175 struct gmap_pgtable *mp;
176 struct gmap_rmap *rmap;
177 struct page *page;
178
179 if (*table & _SEGMENT_ENTRY_INV)
180 return 0;
181 page = pfn_to_page(*table >> PAGE_SHIFT);
182 mp = (struct gmap_pgtable *) page->index;
183 list_for_each_entry(rmap, &mp->mapper, list) {
184 if (rmap->entry != table)
185 continue;
186 list_del(&rmap->list);
187 kfree(rmap);
188 break;
189 }
190 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
191 return 1;
192}
193
194static void gmap_flush_tlb(struct gmap *gmap)
195{
196 if (MACHINE_HAS_IDTE)
197 __tlb_flush_idte((unsigned long) gmap->table |
198 _ASCE_TYPE_REGION1);
199 else
200 __tlb_flush_global();
201}
202
203/**
204 * gmap_free - free a guest address space
205 * @gmap: pointer to the guest address space structure
149 */ 206 */
150#ifdef CONFIG_PGSTE 207void gmap_free(struct gmap *gmap)
151static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) 208{
209 struct page *page, *next;
210 unsigned long *table;
211 int i;
212
213
214 /* Flush tlb. */
215 if (MACHINE_HAS_IDTE)
216 __tlb_flush_idte((unsigned long) gmap->table |
217 _ASCE_TYPE_REGION1);
218 else
219 __tlb_flush_global();
220
221 /* Free all segment & region tables. */
222 down_read(&gmap->mm->mmap_sem);
223 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
224 table = (unsigned long *) page_to_phys(page);
225 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
226 /* Remove gmap rmap structures for segment table. */
227 for (i = 0; i < PTRS_PER_PMD; i++, table++)
228 gmap_unlink_segment(gmap, table);
229 __free_pages(page, ALLOC_ORDER);
230 }
231 up_read(&gmap->mm->mmap_sem);
232 list_del(&gmap->list);
233 kfree(gmap);
234}
235EXPORT_SYMBOL_GPL(gmap_free);
236
237/**
238 * gmap_enable - switch primary space to the guest address space
239 * @gmap: pointer to the guest address space structure
240 */
241void gmap_enable(struct gmap *gmap)
242{
243 /* Load primary space page table origin. */
244 S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
245 _ASCE_USER_BITS | __pa(gmap->table);
246 asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) );
247 S390_lowcore.gmap = (unsigned long) gmap;
248}
249EXPORT_SYMBOL_GPL(gmap_enable);
250
251/**
252 * gmap_disable - switch back to the standard primary address space
253 * @gmap: pointer to the guest address space structure
254 */
255void gmap_disable(struct gmap *gmap)
256{
257 /* Load primary space page table origin. */
258 S390_lowcore.user_asce =
259 gmap->mm->context.asce_bits | __pa(gmap->mm->pgd);
260 asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) );
261 S390_lowcore.gmap = 0UL;
262}
263EXPORT_SYMBOL_GPL(gmap_disable);
264
265static int gmap_alloc_table(struct gmap *gmap,
266 unsigned long *table, unsigned long init)
267{
268 struct page *page;
269 unsigned long *new;
270
271 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
272 if (!page)
273 return -ENOMEM;
274 new = (unsigned long *) page_to_phys(page);
275 crst_table_init(new, init);
276 down_read(&gmap->mm->mmap_sem);
277 if (*table & _REGION_ENTRY_INV) {
278 list_add(&page->lru, &gmap->crst_list);
279 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
280 (*table & _REGION_ENTRY_TYPE_MASK);
281 } else
282 __free_pages(page, ALLOC_ORDER);
283 up_read(&gmap->mm->mmap_sem);
284 return 0;
285}
286
287/**
288 * gmap_unmap_segment - unmap segment from the guest address space
289 * @gmap: pointer to the guest address space structure
290 * @addr: address in the guest address space
291 * @len: length of the memory area to unmap
292 *
293 * Returns 0 if the unmap succeded, -EINVAL if not.
294 */
295int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
296{
297 unsigned long *table;
298 unsigned long off;
299 int flush;
300
301 if ((to | len) & (PMD_SIZE - 1))
302 return -EINVAL;
303 if (len == 0 || to + len < to)
304 return -EINVAL;
305
306 flush = 0;
307 down_read(&gmap->mm->mmap_sem);
308 for (off = 0; off < len; off += PMD_SIZE) {
309 /* Walk the guest addr space page table */
310 table = gmap->table + (((to + off) >> 53) & 0x7ff);
311 if (*table & _REGION_ENTRY_INV)
312 return 0;
313 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
314 table = table + (((to + off) >> 42) & 0x7ff);
315 if (*table & _REGION_ENTRY_INV)
316 return 0;
317 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
318 table = table + (((to + off) >> 31) & 0x7ff);
319 if (*table & _REGION_ENTRY_INV)
320 return 0;
321 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
322 table = table + (((to + off) >> 20) & 0x7ff);
323
324 /* Clear segment table entry in guest address space. */
325 flush |= gmap_unlink_segment(gmap, table);
326 *table = _SEGMENT_ENTRY_INV;
327 }
328 up_read(&gmap->mm->mmap_sem);
329 if (flush)
330 gmap_flush_tlb(gmap);
331 return 0;
332}
333EXPORT_SYMBOL_GPL(gmap_unmap_segment);
334
335/**
336 * gmap_mmap_segment - map a segment to the guest address space
337 * @gmap: pointer to the guest address space structure
338 * @from: source address in the parent address space
339 * @to: target address in the guest address space
340 *
341 * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
342 */
343int gmap_map_segment(struct gmap *gmap, unsigned long from,
344 unsigned long to, unsigned long len)
345{
346 unsigned long *table;
347 unsigned long off;
348 int flush;
349
350 if ((from | to | len) & (PMD_SIZE - 1))
351 return -EINVAL;
352 if (len == 0 || from + len > PGDIR_SIZE ||
353 from + len < from || to + len < to)
354 return -EINVAL;
355
356 flush = 0;
357 down_read(&gmap->mm->mmap_sem);
358 for (off = 0; off < len; off += PMD_SIZE) {
359 /* Walk the gmap address space page table */
360 table = gmap->table + (((to + off) >> 53) & 0x7ff);
361 if ((*table & _REGION_ENTRY_INV) &&
362 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
363 goto out_unmap;
364 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
365 table = table + (((to + off) >> 42) & 0x7ff);
366 if ((*table & _REGION_ENTRY_INV) &&
367 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
368 goto out_unmap;
369 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
370 table = table + (((to + off) >> 31) & 0x7ff);
371 if ((*table & _REGION_ENTRY_INV) &&
372 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
373 goto out_unmap;
374 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
375 table = table + (((to + off) >> 20) & 0x7ff);
376
377 /* Store 'from' address in an invalid segment table entry. */
378 flush |= gmap_unlink_segment(gmap, table);
379 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
380 }
381 up_read(&gmap->mm->mmap_sem);
382 if (flush)
383 gmap_flush_tlb(gmap);
384 return 0;
385
386out_unmap:
387 up_read(&gmap->mm->mmap_sem);
388 gmap_unmap_segment(gmap, to, len);
389 return -ENOMEM;
390}
391EXPORT_SYMBOL_GPL(gmap_map_segment);
392
393unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
394{
395 unsigned long *table, vmaddr, segment;
396 struct mm_struct *mm;
397 struct gmap_pgtable *mp;
398 struct gmap_rmap *rmap;
399 struct vm_area_struct *vma;
400 struct page *page;
401 pgd_t *pgd;
402 pud_t *pud;
403 pmd_t *pmd;
404
405 current->thread.gmap_addr = address;
406 mm = gmap->mm;
407 /* Walk the gmap address space page table */
408 table = gmap->table + ((address >> 53) & 0x7ff);
409 if (unlikely(*table & _REGION_ENTRY_INV))
410 return -EFAULT;
411 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
412 table = table + ((address >> 42) & 0x7ff);
413 if (unlikely(*table & _REGION_ENTRY_INV))
414 return -EFAULT;
415 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
416 table = table + ((address >> 31) & 0x7ff);
417 if (unlikely(*table & _REGION_ENTRY_INV))
418 return -EFAULT;
419 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
420 table = table + ((address >> 20) & 0x7ff);
421
422 /* Convert the gmap address to an mm address. */
423 segment = *table;
424 if (likely(!(segment & _SEGMENT_ENTRY_INV))) {
425 page = pfn_to_page(segment >> PAGE_SHIFT);
426 mp = (struct gmap_pgtable *) page->index;
427 return mp->vmaddr | (address & ~PMD_MASK);
428 } else if (segment & _SEGMENT_ENTRY_RO) {
429 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
430 vma = find_vma(mm, vmaddr);
431 if (!vma || vma->vm_start > vmaddr)
432 return -EFAULT;
433
434 /* Walk the parent mm page table */
435 pgd = pgd_offset(mm, vmaddr);
436 pud = pud_alloc(mm, pgd, vmaddr);
437 if (!pud)
438 return -ENOMEM;
439 pmd = pmd_alloc(mm, pud, vmaddr);
440 if (!pmd)
441 return -ENOMEM;
442 if (!pmd_present(*pmd) &&
443 __pte_alloc(mm, vma, pmd, vmaddr))
444 return -ENOMEM;
445 /* pmd now points to a valid segment table entry. */
446 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
447 if (!rmap)
448 return -ENOMEM;
449 /* Link gmap segment table entry location to page table. */
450 page = pmd_page(*pmd);
451 mp = (struct gmap_pgtable *) page->index;
452 rmap->entry = table;
453 list_add(&rmap->list, &mp->mapper);
454 /* Set gmap segment table entry to page table. */
455 *table = pmd_val(*pmd) & PAGE_MASK;
456 return vmaddr | (address & ~PMD_MASK);
457 }
458 return -EFAULT;
459
460}
461EXPORT_SYMBOL_GPL(gmap_fault);
462
463void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
464{
465 struct gmap_rmap *rmap, *next;
466 struct gmap_pgtable *mp;
467 struct page *page;
468 int flush;
469
470 flush = 0;
471 spin_lock(&mm->page_table_lock);
472 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
473 mp = (struct gmap_pgtable *) page->index;
474 list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
475 *rmap->entry =
476 _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
477 list_del(&rmap->list);
478 kfree(rmap);
479 flush = 1;
480 }
481 spin_unlock(&mm->page_table_lock);
482 if (flush)
483 __tlb_flush_global();
484}
485
486static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
487 unsigned long vmaddr)
152{ 488{
153 struct page *page; 489 struct page *page;
154 unsigned long *table; 490 unsigned long *table;
491 struct gmap_pgtable *mp;
155 492
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 493 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157 if (!page) 494 if (!page)
158 return NULL; 495 return NULL;
496 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
497 if (!mp) {
498 __free_page(page);
499 return NULL;
500 }
159 pgtable_page_ctor(page); 501 pgtable_page_ctor(page);
502 mp->vmaddr = vmaddr & PMD_MASK;
503 INIT_LIST_HEAD(&mp->mapper);
504 page->index = (unsigned long) mp;
160 atomic_set(&page->_mapcount, 3); 505 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page); 506 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 507 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
@@ -167,24 +512,57 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
167static inline void page_table_free_pgste(unsigned long *table) 512static inline void page_table_free_pgste(unsigned long *table)
168{ 513{
169 struct page *page; 514 struct page *page;
515 struct gmap_pgtable *mp;
170 516
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 517 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
518 mp = (struct gmap_pgtable *) page->index;
519 BUG_ON(!list_empty(&mp->mapper));
172 pgtable_page_ctor(page); 520 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1); 521 atomic_set(&page->_mapcount, -1);
522 kfree(mp);
174 __free_page(page); 523 __free_page(page);
175} 524}
176#endif
177 525
178unsigned long *page_table_alloc(struct mm_struct *mm) 526#else /* CONFIG_PGSTE */
527
528static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
529 unsigned long vmaddr)
530{
531}
532
533static inline void page_table_free_pgste(unsigned long *table)
534{
535}
536
537static inline void gmap_unmap_notifier(struct mm_struct *mm,
538 unsigned long *table)
539{
540}
541
542#endif /* CONFIG_PGSTE */
543
544static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
545{
546 unsigned int old, new;
547
548 do {
549 old = atomic_read(v);
550 new = old ^ bits;
551 } while (atomic_cmpxchg(v, old, new) != old);
552 return new;
553}
554
555/*
556 * page table entry allocation/free routines.
557 */
558unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
179{ 559{
180 struct page *page; 560 struct page *page;
181 unsigned long *table; 561 unsigned long *table;
182 unsigned int mask, bit; 562 unsigned int mask, bit;
183 563
184#ifdef CONFIG_PGSTE
185 if (mm_has_pgste(mm)) 564 if (mm_has_pgste(mm))
186 return page_table_alloc_pgste(mm); 565 return page_table_alloc_pgste(mm, vmaddr);
187#endif
188 /* Allocate fragments of a 4K page as 1K/2K page table */ 566 /* Allocate fragments of a 4K page as 1K/2K page table */
189 spin_lock_bh(&mm->context.list_lock); 567 spin_lock_bh(&mm->context.list_lock);
190 mask = FRAG_MASK; 568 mask = FRAG_MASK;
@@ -222,10 +600,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
222 struct page *page; 600 struct page *page;
223 unsigned int bit, mask; 601 unsigned int bit, mask;
224 602
225#ifdef CONFIG_PGSTE 603 if (mm_has_pgste(mm)) {
226 if (mm_has_pgste(mm)) 604 gmap_unmap_notifier(mm, table);
227 return page_table_free_pgste(table); 605 return page_table_free_pgste(table);
228#endif 606 }
229 /* Free 1K/2K page table fragment of a 4K page */ 607 /* Free 1K/2K page table fragment of a 4K page */
230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 608 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 609 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
@@ -249,10 +627,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
249{ 627{
250 struct page *page; 628 struct page *page;
251 629
252#ifdef CONFIG_PGSTE
253 if (bit == FRAG_MASK) 630 if (bit == FRAG_MASK)
254 return page_table_free_pgste(table); 631 return page_table_free_pgste(table);
255#endif
256 /* Free 1K/2K page table fragment of a 4K page */ 632 /* Free 1K/2K page table fragment of a 4K page */
257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 633 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 634 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
@@ -269,13 +645,12 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
269 unsigned int bit, mask; 645 unsigned int bit, mask;
270 646
271 mm = tlb->mm; 647 mm = tlb->mm;
272#ifdef CONFIG_PGSTE
273 if (mm_has_pgste(mm)) { 648 if (mm_has_pgste(mm)) {
649 gmap_unmap_notifier(mm, table);
274 table = (unsigned long *) (__pa(table) | FRAG_MASK); 650 table = (unsigned long *) (__pa(table) | FRAG_MASK);
275 tlb_remove_table(tlb, table); 651 tlb_remove_table(tlb, table);
276 return; 652 return;
277 } 653 }
278#endif
279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 654 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 655 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
281 spin_lock_bh(&mm->context.list_lock); 656 spin_lock_bh(&mm->context.list_lock);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8c1970d1dd91..781ff5169560 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -61,12 +61,12 @@ static inline pmd_t *vmem_pmd_alloc(void)
61 return pmd; 61 return pmd;
62} 62}
63 63
64static pte_t __ref *vmem_pte_alloc(void) 64static pte_t __ref *vmem_pte_alloc(unsigned long address)
65{ 65{
66 pte_t *pte; 66 pte_t *pte;
67 67
68 if (slab_is_available()) 68 if (slab_is_available())
69 pte = (pte_t *) page_table_alloc(&init_mm); 69 pte = (pte_t *) page_table_alloc(&init_mm, address);
70 else 70 else
71 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); 71 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
72 if (!pte) 72 if (!pte)
@@ -120,7 +120,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
120 } 120 }
121#endif 121#endif
122 if (pmd_none(*pm_dir)) { 122 if (pmd_none(*pm_dir)) {
123 pt_dir = vmem_pte_alloc(); 123 pt_dir = vmem_pte_alloc(address);
124 if (!pt_dir) 124 if (!pt_dir)
125 goto out; 125 goto out;
126 pmd_populate(&init_mm, pm_dir, pt_dir); 126 pmd_populate(&init_mm, pm_dir, pt_dir);
@@ -205,7 +205,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
205 205
206 pm_dir = pmd_offset(pu_dir, address); 206 pm_dir = pmd_offset(pu_dir, address);
207 if (pmd_none(*pm_dir)) { 207 if (pmd_none(*pm_dir)) {
208 pt_dir = vmem_pte_alloc(); 208 pt_dir = vmem_pte_alloc(address);
209 if (!pt_dir) 209 if (!pt_dir)
210 goto out; 210 goto out;
211 pmd_populate(&init_mm, pm_dir, pt_dir); 211 pmd_populate(&init_mm, pm_dir, pt_dir);