aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCarsten Otte <cotte@de.ibm.com>2008-03-25 13:47:10 -0400
committerAvi Kivity <avi@qumranet.com>2008-04-27 05:00:40 -0400
commit402b08622d9ac6e32e25289573272e0f21bb58a7 (patch)
tree40d7386154cef85c9bfd2bd862db025933820776
parent37817f2982d0f559f90cecc66e150dd9d2c2df05 (diff)
s390: KVM preparation: provide hook to enable pgstes in user pagetable
The SIE instruction on s390 uses the 2nd half of the page table page to virtualize the storage keys of a guest. This patch offers the s390_enable_sie function, which reorganizes the page tables of a single-threaded process to reserve space in the page table: s390_enable_sie makes sure that the process is single threaded and then uses dup_mm to create a new mm with reorganized page tables. The old mm is freed and the process has now a page status extended field after every page table. Code that wants to exploit pgstes should SELECT CONFIG_PGSTE. This patch has a small common code hit, namely making dup_mm non-static. Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's review feedback. Now we do have the prototype for dup_mm in include/linux/sched.h. Following Martin's suggestion, s390_enable_sie() does now call task_lock() to prevent race against ptrace modification of mm_users. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--arch/s390/Kconfig4
-rw-r--r--arch/s390/kernel/setup.c4
-rw-r--r--arch/s390/mm/pgtable.c65
-rw-r--r--include/asm-s390/mmu.h1
-rw-r--r--include/asm-s390/mmu_context.h8
-rw-r--r--include/asm-s390/pgtable.h1
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/fork.c2
8 files changed, 82 insertions, 5 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a68e178fc5..513a0589e81d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
62 default y 62 default y
63 depends on SMP && PREEMPT 63 depends on SMP && PREEMPT
64 64
65config PGSTE
66 bool
67 default y if KVM
68
65mainmenu "Linux Kernel Configuration" 69mainmenu "Linux Kernel Configuration"
66 70
67config S390 71config S390
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7141147e6b63..2f35133ebc18 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
316early_param("ipldelay", early_parse_ipldelay); 316early_param("ipldelay", early_parse_ipldelay);
317 317
318#ifdef CONFIG_S390_SWITCH_AMODE 318#ifdef CONFIG_S390_SWITCH_AMODE
319#ifdef CONFIG_PGSTE
320unsigned int switch_amode = 1;
321#else
319unsigned int switch_amode = 0; 322unsigned int switch_amode = 0;
323#endif
320EXPORT_SYMBOL_GPL(switch_amode); 324EXPORT_SYMBOL_GPL(switch_amode);
321 325
322static void set_amode_and_uaccess(unsigned long user_amode, 326static void set_amode_and_uaccess(unsigned long user_amode,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd072013f88c..5c1aea97cd12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
30#define TABLES_PER_PAGE 4 30#define TABLES_PER_PAGE 4
31#define FRAG_MASK 15UL 31#define FRAG_MASK 15UL
32#define SECOND_HALVES 10UL 32#define SECOND_HALVES 10UL
33
34void clear_table_pgstes(unsigned long *table)
35{
36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 256, 0, PAGE_SIZE/4);
38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
39 memset(table + 768, 0, PAGE_SIZE/4);
40}
41
33#else 42#else
34#define ALLOC_ORDER 2 43#define ALLOC_ORDER 2
35#define TABLES_PER_PAGE 2 44#define TABLES_PER_PAGE 2
36#define FRAG_MASK 3UL 45#define FRAG_MASK 3UL
37#define SECOND_HALVES 2UL 46#define SECOND_HALVES 2UL
47
48void clear_table_pgstes(unsigned long *table)
49{
50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
51 memset(table + 256, 0, PAGE_SIZE/2);
52}
53
38#endif 54#endif
39 55
40unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 56unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
153 unsigned long *table; 169 unsigned long *table;
154 unsigned long bits; 170 unsigned long bits;
155 171
156 bits = mm->context.noexec ? 3UL : 1UL; 172 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
157 spin_lock(&mm->page_table_lock); 173 spin_lock(&mm->page_table_lock);
158 page = NULL; 174 page = NULL;
159 if (!list_empty(&mm->context.pgtable_list)) { 175 if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
170 pgtable_page_ctor(page); 186 pgtable_page_ctor(page);
171 page->flags &= ~FRAG_MASK; 187 page->flags &= ~FRAG_MASK;
172 table = (unsigned long *) page_to_phys(page); 188 table = (unsigned long *) page_to_phys(page);
173 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 189 if (mm->context.pgstes)
190 clear_table_pgstes(table);
191 else
192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
174 spin_lock(&mm->page_table_lock); 193 spin_lock(&mm->page_table_lock);
175 list_add(&page->lru, &mm->context.pgtable_list); 194 list_add(&page->lru, &mm->context.pgtable_list);
176 } 195 }
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
191 struct page *page; 210 struct page *page;
192 unsigned long bits; 211 unsigned long bits;
193 212
194 bits = mm->context.noexec ? 3UL : 1UL; 213 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
195 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
196 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
197 spin_lock(&mm->page_table_lock); 216 spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
228 mm->context.noexec = 0; 247 mm->context.noexec = 0;
229 update_mm(mm, tsk); 248 update_mm(mm, tsk);
230} 249}
250
251/*
252 * switch on pgstes for its userspace process (for kvm)
253 */
254int s390_enable_sie(void)
255{
256 struct task_struct *tsk = current;
257 struct mm_struct *mm;
258 int rc;
259
260 task_lock(tsk);
261
262 rc = 0;
263 if (tsk->mm->context.pgstes)
264 goto unlock;
265
266 rc = -EINVAL;
267 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
268 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
269 goto unlock;
270
271 tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
272 mm = dup_mm(tsk);
273 tsk->mm->context.pgstes = 0;
274
275 rc = -ENOMEM;
276 if (!mm)
277 goto unlock;
278 mmput(tsk->mm);
279 tsk->mm = tsk->active_mm = mm;
280 preempt_disable();
281 update_mm(mm, tsk);
282 cpu_set(smp_processor_id(), mm->cpu_vm_mask);
283 preempt_enable();
284 rc = 0;
285unlock:
286 task_unlock(tsk);
287 return rc;
288}
289EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h
index 1698e29c5b20..5dd5e7b3476f 100644
--- a/include/asm-s390/mmu.h
+++ b/include/asm-s390/mmu.h
@@ -7,6 +7,7 @@ typedef struct {
7 unsigned long asce_bits; 7 unsigned long asce_bits;
8 unsigned long asce_limit; 8 unsigned long asce_limit;
9 int noexec; 9 int noexec;
10 int pgstes;
10} mm_context_t; 11} mm_context_t;
11 12
12#endif 13#endif
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h
index b5a34c6f91a9..4c2fbf48c9c4 100644
--- a/include/asm-s390/mmu_context.h
+++ b/include/asm-s390/mmu_context.h
@@ -20,7 +20,13 @@ static inline int init_new_context(struct task_struct *tsk,
20#ifdef CONFIG_64BIT 20#ifdef CONFIG_64BIT
21 mm->context.asce_bits |= _ASCE_TYPE_REGION3; 21 mm->context.asce_bits |= _ASCE_TYPE_REGION3;
22#endif 22#endif
23 mm->context.noexec = s390_noexec; 23 if (current->mm->context.pgstes) {
24 mm->context.noexec = 0;
25 mm->context.pgstes = 1;
26 } else {
27 mm->context.noexec = s390_noexec;
28 mm->context.pgstes = 0;
29 }
24 mm->context.asce_limit = STACK_TOP_MAX; 30 mm->context.asce_limit = STACK_TOP_MAX;
25 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); 31 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
26 return 0; 32 return 0;
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 65154dc9a9e5..8e9a629dc199 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
966 966
967extern int add_shared_memory(unsigned long start, unsigned long size); 967extern int add_shared_memory(unsigned long start, unsigned long size);
968extern int remove_shared_memory(unsigned long start, unsigned long size); 968extern int remove_shared_memory(unsigned long start, unsigned long size);
969extern int s390_enable_sie(void);
969 970
970/* 971/*
971 * No page table caches to initialise 972 * No page table caches to initialise
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0bd97044abd..9a4f3e63e3bf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1798,6 +1798,8 @@ extern void mmput(struct mm_struct *);
1798extern struct mm_struct *get_task_mm(struct task_struct *task); 1798extern struct mm_struct *get_task_mm(struct task_struct *task);
1799/* Remove the current tasks stale references to the old mm_struct */ 1799/* Remove the current tasks stale references to the old mm_struct */
1800extern void mm_release(struct task_struct *, struct mm_struct *); 1800extern void mm_release(struct task_struct *, struct mm_struct *);
1801/* Allocate a new mm structure and copy contents from tsk->mm */
1802extern struct mm_struct *dup_mm(struct task_struct *tsk);
1801 1803
1802extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); 1804extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
1803extern void flush_thread(void); 1805extern void flush_thread(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index cb46befdd3a0..c674aa8d3c31 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
521 * Allocate a new mm structure and copy contents from the 521 * Allocate a new mm structure and copy contents from the
522 * mm structure of the passed in task structure. 522 * mm structure of the passed in task structure.
523 */ 523 */
524static struct mm_struct *dup_mm(struct task_struct *tsk) 524struct mm_struct *dup_mm(struct task_struct *tsk)
525{ 525{
526 struct mm_struct *mm, *oldmm = current->mm; 526 struct mm_struct *mm, *oldmm = current->mm;
527 int err; 527 int err;