diff options
author | Carsten Otte <cotte@de.ibm.com> | 2008-03-25 13:47:10 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-04-27 05:00:40 -0400 |
commit | 402b08622d9ac6e32e25289573272e0f21bb58a7 (patch) | |
tree | 40d7386154cef85c9bfd2bd862db025933820776 /arch/s390 | |
parent | 37817f2982d0f559f90cecc66e150dd9d2c2df05 (diff) |
s390: KVM preparation: provide hook to enable pgstes in user pagetable
The SIE instruction on s390 uses the 2nd half of the page table page to
virtualize the storage keys of a guest. This patch offers the s390_enable_sie
function, which reorganizes the page tables of a single-threaded process to
reserve space in the page table:
s390_enable_sie makes sure that the process is single threaded and then uses
dup_mm to create a new mm with reorganized page tables. The old mm is freed
and the process has now a page status extended field after every page table.
Code that wants to exploit pgstes should SELECT CONFIG_PGSTE.
This patch has a small common code hit, namely making dup_mm non-static.
Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's
review feedback. Now we do have the prototype for dup_mm in
include/linux/sched.h. Following Martin's suggestion, s390_enable_sie() does now
call task_lock() to prevent race against ptrace modification of mm_users.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/Kconfig | 4 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 65 |
3 files changed, 70 insertions, 3 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f6a68e178fc5..513a0589e81d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK | |||
62 | default y | 62 | default y |
63 | depends on SMP && PREEMPT | 63 | depends on SMP && PREEMPT |
64 | 64 | ||
65 | config PGSTE | ||
66 | bool | ||
67 | default y if KVM | ||
68 | |||
65 | mainmenu "Linux Kernel Configuration" | 69 | mainmenu "Linux Kernel Configuration" |
66 | 70 | ||
67 | config S390 | 71 | config S390 |
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7141147e6b63..2f35133ebc18 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c | |||
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p) | |||
316 | early_param("ipldelay", early_parse_ipldelay); | 316 | early_param("ipldelay", early_parse_ipldelay); |
317 | 317 | ||
318 | #ifdef CONFIG_S390_SWITCH_AMODE | 318 | #ifdef CONFIG_S390_SWITCH_AMODE |
319 | #ifdef CONFIG_PGSTE | ||
320 | unsigned int switch_amode = 1; | ||
321 | #else | ||
319 | unsigned int switch_amode = 0; | 322 | unsigned int switch_amode = 0; |
323 | #endif | ||
320 | EXPORT_SYMBOL_GPL(switch_amode); | 324 | EXPORT_SYMBOL_GPL(switch_amode); |
321 | 325 | ||
322 | static void set_amode_and_uaccess(unsigned long user_amode, | 326 | static void set_amode_and_uaccess(unsigned long user_amode, |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index fd072013f88c..5c1aea97cd12 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -30,11 +30,27 @@ | |||
30 | #define TABLES_PER_PAGE 4 | 30 | #define TABLES_PER_PAGE 4 |
31 | #define FRAG_MASK 15UL | 31 | #define FRAG_MASK 15UL |
32 | #define SECOND_HALVES 10UL | 32 | #define SECOND_HALVES 10UL |
33 | |||
34 | void clear_table_pgstes(unsigned long *table) | ||
35 | { | ||
36 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
37 | memset(table + 256, 0, PAGE_SIZE/4); | ||
38 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
39 | memset(table + 768, 0, PAGE_SIZE/4); | ||
40 | } | ||
41 | |||
33 | #else | 42 | #else |
34 | #define ALLOC_ORDER 2 | 43 | #define ALLOC_ORDER 2 |
35 | #define TABLES_PER_PAGE 2 | 44 | #define TABLES_PER_PAGE 2 |
36 | #define FRAG_MASK 3UL | 45 | #define FRAG_MASK 3UL |
37 | #define SECOND_HALVES 2UL | 46 | #define SECOND_HALVES 2UL |
47 | |||
48 | void clear_table_pgstes(unsigned long *table) | ||
49 | { | ||
50 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
51 | memset(table + 256, 0, PAGE_SIZE/2); | ||
52 | } | ||
53 | |||
38 | #endif | 54 | #endif |
39 | 55 | ||
40 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | 56 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) |
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
153 | unsigned long *table; | 169 | unsigned long *table; |
154 | unsigned long bits; | 170 | unsigned long bits; |
155 | 171 | ||
156 | bits = mm->context.noexec ? 3UL : 1UL; | 172 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; |
157 | spin_lock(&mm->page_table_lock); | 173 | spin_lock(&mm->page_table_lock); |
158 | page = NULL; | 174 | page = NULL; |
159 | if (!list_empty(&mm->context.pgtable_list)) { | 175 | if (!list_empty(&mm->context.pgtable_list)) { |
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
170 | pgtable_page_ctor(page); | 186 | pgtable_page_ctor(page); |
171 | page->flags &= ~FRAG_MASK; | 187 | page->flags &= ~FRAG_MASK; |
172 | table = (unsigned long *) page_to_phys(page); | 188 | table = (unsigned long *) page_to_phys(page); |
173 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 189 | if (mm->context.pgstes) |
190 | clear_table_pgstes(table); | ||
191 | else | ||
192 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | ||
174 | spin_lock(&mm->page_table_lock); | 193 | spin_lock(&mm->page_table_lock); |
175 | list_add(&page->lru, &mm->context.pgtable_list); | 194 | list_add(&page->lru, &mm->context.pgtable_list); |
176 | } | 195 | } |
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
191 | struct page *page; | 210 | struct page *page; |
192 | unsigned long bits; | 211 | unsigned long bits; |
193 | 212 | ||
194 | bits = mm->context.noexec ? 3UL : 1UL; | 213 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; |
195 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 214 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); |
196 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 215 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
197 | spin_lock(&mm->page_table_lock); | 216 | spin_lock(&mm->page_table_lock); |
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | |||
228 | mm->context.noexec = 0; | 247 | mm->context.noexec = 0; |
229 | update_mm(mm, tsk); | 248 | update_mm(mm, tsk); |
230 | } | 249 | } |
250 | |||
251 | /* | ||
252 | * switch on pgstes for its userspace process (for kvm) | ||
253 | */ | ||
254 | int s390_enable_sie(void) | ||
255 | { | ||
256 | struct task_struct *tsk = current; | ||
257 | struct mm_struct *mm; | ||
258 | int rc; | ||
259 | |||
260 | task_lock(tsk); | ||
261 | |||
262 | rc = 0; | ||
263 | if (tsk->mm->context.pgstes) | ||
264 | goto unlock; | ||
265 | |||
266 | rc = -EINVAL; | ||
267 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | ||
268 | tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) | ||
269 | goto unlock; | ||
270 | |||
271 | tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ | ||
272 | mm = dup_mm(tsk); | ||
273 | tsk->mm->context.pgstes = 0; | ||
274 | |||
275 | rc = -ENOMEM; | ||
276 | if (!mm) | ||
277 | goto unlock; | ||
278 | mmput(tsk->mm); | ||
279 | tsk->mm = tsk->active_mm = mm; | ||
280 | preempt_disable(); | ||
281 | update_mm(mm, tsk); | ||
282 | cpu_set(smp_processor_id(), mm->cpu_vm_mask); | ||
283 | preempt_enable(); | ||
284 | rc = 0; | ||
285 | unlock: | ||
286 | task_unlock(tsk); | ||
287 | return rc; | ||
288 | } | ||
289 | EXPORT_SYMBOL_GPL(s390_enable_sie); | ||