aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorCarsten Otte <cotte@de.ibm.com>2008-03-25 13:47:10 -0400
committerAvi Kivity <avi@qumranet.com>2008-04-27 05:00:40 -0400
commit402b08622d9ac6e32e25289573272e0f21bb58a7 (patch)
tree40d7386154cef85c9bfd2bd862db025933820776 /arch/s390/mm
parent37817f2982d0f559f90cecc66e150dd9d2c2df05 (diff)
s390: KVM preparation: provide hook to enable pgstes in user pagetable
The SIE instruction on s390 uses the 2nd half of the page table page to virtualize the storage keys of a guest. This patch offers the s390_enable_sie function, which reorganizes the page tables of a single-threaded process to reserve space in the page table: s390_enable_sie makes sure that the process is single threaded and then uses dup_mm to create a new mm with reorganized page tables. The old mm is freed and the process has now a page status extended field after every page table. Code that wants to exploit pgstes should SELECT CONFIG_PGSTE. This patch has a small common code hit, namely making dup_mm non-static. Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's review feedback. Now we do have the prototype for dup_mm in include/linux/sched.h. Following Martin's suggestion, s390_enable_sie() does now call task_lock() to prevent race against ptrace modification of mm_users. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/pgtable.c65
1 files changed, 62 insertions, 3 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd072013f88c..5c1aea97cd12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
30#define TABLES_PER_PAGE 4 30#define TABLES_PER_PAGE 4
31#define FRAG_MASK 15UL 31#define FRAG_MASK 15UL
32#define SECOND_HALVES 10UL 32#define SECOND_HALVES 10UL
33
34void clear_table_pgstes(unsigned long *table)
35{
36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 256, 0, PAGE_SIZE/4);
38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
39 memset(table + 768, 0, PAGE_SIZE/4);
40}
41
33#else 42#else
34#define ALLOC_ORDER 2 43#define ALLOC_ORDER 2
35#define TABLES_PER_PAGE 2 44#define TABLES_PER_PAGE 2
36#define FRAG_MASK 3UL 45#define FRAG_MASK 3UL
37#define SECOND_HALVES 2UL 46#define SECOND_HALVES 2UL
47
48void clear_table_pgstes(unsigned long *table)
49{
50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
51 memset(table + 256, 0, PAGE_SIZE/2);
52}
53
38#endif 54#endif
39 55
40unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 56unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
153 unsigned long *table; 169 unsigned long *table;
154 unsigned long bits; 170 unsigned long bits;
155 171
156 bits = mm->context.noexec ? 3UL : 1UL; 172 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
157 spin_lock(&mm->page_table_lock); 173 spin_lock(&mm->page_table_lock);
158 page = NULL; 174 page = NULL;
159 if (!list_empty(&mm->context.pgtable_list)) { 175 if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
170 pgtable_page_ctor(page); 186 pgtable_page_ctor(page);
171 page->flags &= ~FRAG_MASK; 187 page->flags &= ~FRAG_MASK;
172 table = (unsigned long *) page_to_phys(page); 188 table = (unsigned long *) page_to_phys(page);
173 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 189 if (mm->context.pgstes)
190 clear_table_pgstes(table);
191 else
192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
174 spin_lock(&mm->page_table_lock); 193 spin_lock(&mm->page_table_lock);
175 list_add(&page->lru, &mm->context.pgtable_list); 194 list_add(&page->lru, &mm->context.pgtable_list);
176 } 195 }
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
191 struct page *page; 210 struct page *page;
192 unsigned long bits; 211 unsigned long bits;
193 212
194 bits = mm->context.noexec ? 3UL : 1UL; 213 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
195 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
196 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
197 spin_lock(&mm->page_table_lock); 216 spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
228 mm->context.noexec = 0; 247 mm->context.noexec = 0;
229 update_mm(mm, tsk); 248 update_mm(mm, tsk);
230} 249}
250
251/*
252 * switch on pgstes for its userspace process (for kvm)
253 */
254int s390_enable_sie(void)
255{
256 struct task_struct *tsk = current;
257 struct mm_struct *mm;
258 int rc;
259
260 task_lock(tsk);
261
262 rc = 0;
263 if (tsk->mm->context.pgstes)
264 goto unlock;
265
266 rc = -EINVAL;
267 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
268 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
269 goto unlock;
270
271 tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
272 mm = dup_mm(tsk);
273 tsk->mm->context.pgstes = 0;
274
275 rc = -ENOMEM;
276 if (!mm)
277 goto unlock;
278 mmput(tsk->mm);
279 tsk->mm = tsk->active_mm = mm;
280 preempt_disable();
281 update_mm(mm, tsk);
282 cpu_set(smp_processor_id(), mm->cpu_vm_mask);
283 preempt_enable();
284 rc = 0;
285unlock:
286 task_unlock(tsk);
287 return rc;
288}
289EXPORT_SYMBOL_GPL(s390_enable_sie);