aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 21:15:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 21:15:06 -0400
commitae7a835cc546fc67df90edaaa0c48ae2b22a29fe (patch)
treeb1235437fde066ab0f272f164d75dc1b98a244cf /arch/s390
parentcf39c8e5352b4fb9efedfe7e9acb566a85ed847c (diff)
parent6b9e4fa07443f5baf5bbd7ab043abd6976f8d7bc (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Gleb Natapov: "The highlights of the release are nested EPT and pv-ticketlocks support (hypervisor part, guest part, which is most of the code, goes through tip tree). Apart of that there are many fixes for all arches" Fix up semantic conflicts as discussed in the pull request thread.. * 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (88 commits) ARM: KVM: Add newlines to panic strings ARM: KVM: Work around older compiler bug ARM: KVM: Simplify tracepoint text ARM: KVM: Fix kvm_set_pte assignment ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256 ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs ARM: KVM: vgic: fix GICD_ICFGRn access ARM: KVM: vgic: simplify vgic_get_target_reg KVM: MMU: remove unused parameter KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate() KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX KVM: x86: update masterclock when kvmclock_offset is calculated (v2) KVM: PPC: Book3S: Fix compile error in XICS emulation KVM: PPC: Book3S PR: return appropriate error when allocation fails arch: powerpc: kvm: add signed type cast for comparation KVM: x86: add comments where MMIO does not return to the emulator KVM: vmx: count exits to userspace during invalid guest emulation KVM: rename __kvm_io_bus_sort_cmp to kvm_io_bus_cmp kvm: optimize away THP checks in kvm_is_mmio_pfn() ...
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/kvm_host.h8
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h19
-rw-r--r--arch/s390/include/asm/pgtable.h11
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/kvm/diag.c17
-rw-r--r--arch/s390/kvm/kvm-s390.c27
-rw-r--r--arch/s390/kvm/kvm-s390.h10
-rw-r--r--arch/s390/kvm/priv.c32
-rw-r--r--arch/s390/mm/pgtable.c183
10 files changed, 195 insertions, 116 deletions
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3238d4004e84..e87ecaa2c569 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -274,6 +274,14 @@ struct kvm_arch{
274 int css_support; 274 int css_support;
275}; 275};
276 276
277#define KVM_HVA_ERR_BAD (-1UL)
278#define KVM_HVA_ERR_RO_BAD (-2UL)
279
280static inline bool kvm_is_error_hva(unsigned long addr)
281{
282 return IS_ERR_VALUE(addr);
283}
284
277extern int sie64a(struct kvm_s390_sie_block *, u64 *); 285extern int sie64a(struct kvm_s390_sie_block *, u64 *);
278extern char sie_exit; 286extern char sie_exit;
279#endif 287#endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6340178748bf..ff132ac64ddd 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -12,8 +12,6 @@ typedef struct {
12 unsigned long asce_bits; 12 unsigned long asce_bits;
13 unsigned long asce_limit; 13 unsigned long asce_limit;
14 unsigned long vdso_base; 14 unsigned long vdso_base;
15 /* Cloned contexts will be created with extended page tables. */
16 unsigned int alloc_pgste:1;
17 /* The mmu context has extended page tables. */ 15 /* The mmu context has extended page tables. */
18 unsigned int has_pgste:1; 16 unsigned int has_pgste:1;
19} mm_context_t; 17} mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 7b7fce4e8469..9f973d8de90e 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk,
21#ifdef CONFIG_64BIT 21#ifdef CONFIG_64BIT
22 mm->context.asce_bits |= _ASCE_TYPE_REGION3; 22 mm->context.asce_bits |= _ASCE_TYPE_REGION3;
23#endif 23#endif
24 if (current->mm && current->mm->context.alloc_pgste) { 24 mm->context.has_pgste = 0;
25 /*
26 * alloc_pgste indicates, that any NEW context will be created
27 * with extended page tables. The old context is unchanged. The
28 * page table allocation and the page table operations will
29 * look at has_pgste to distinguish normal and extended page
30 * tables. The only way to create extended page tables is to
31 * set alloc_pgste and then create a new context (e.g. dup_mm).
32 * The page table allocation is called after init_new_context
33 * and if has_pgste is set, it will create extended page
34 * tables.
35 */
36 mm->context.has_pgste = 1;
37 mm->context.alloc_pgste = 1;
38 } else {
39 mm->context.has_pgste = 0;
40 mm->context.alloc_pgste = 0;
41 }
42 mm->context.asce_limit = STACK_TOP_MAX; 25 mm->context.asce_limit = STACK_TOP_MAX;
43 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); 26 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
44 return 0; 27 return 0;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 9f215b40109e..9b60a36c348d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1442,6 +1442,17 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
1442} 1442}
1443#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ 1443#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
1444 1444
1445static inline void pmdp_flush_lazy(struct mm_struct *mm,
1446 unsigned long address, pmd_t *pmdp)
1447{
1448 int active = (mm == current->active_mm) ? 1 : 0;
1449
1450 if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
1451 __pmd_idte(address, pmdp);
1452 else
1453 mm->context.flush_mm = 1;
1454}
1455
1445#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1456#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1446 1457
1447#define __HAVE_ARCH_PGTABLE_DEPOSIT 1458#define __HAVE_ARCH_PGTABLE_DEPOSIT
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b0e6435b2f02..0eb37505cab1 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -43,6 +43,7 @@ extern void execve_tail(void);
43#ifndef CONFIG_64BIT 43#ifndef CONFIG_64BIT
44 44
45#define TASK_SIZE (1UL << 31) 45#define TASK_SIZE (1UL << 31)
46#define TASK_MAX_SIZE (1UL << 31)
46#define TASK_UNMAPPED_BASE (1UL << 30) 47#define TASK_UNMAPPED_BASE (1UL << 30)
47 48
48#else /* CONFIG_64BIT */ 49#else /* CONFIG_64BIT */
@@ -51,6 +52,7 @@ extern void execve_tail(void);
51#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ 52#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
52 (1UL << 30) : (1UL << 41)) 53 (1UL << 30) : (1UL << 41))
53#define TASK_SIZE TASK_SIZE_OF(current) 54#define TASK_SIZE TASK_SIZE_OF(current)
55#define TASK_MAX_SIZE (1UL << 53)
54 56
55#endif /* CONFIG_64BIT */ 57#endif /* CONFIG_64BIT */
56 58
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3074475c8ae0..3a74d8af0d69 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
119 * The layout is as follows: 119 * The layout is as follows:
120 * - gpr 2 contains the subchannel id (passed as addr) 120 * - gpr 2 contains the subchannel id (passed as addr)
121 * - gpr 3 contains the virtqueue index (passed as datamatch) 121 * - gpr 3 contains the virtqueue index (passed as datamatch)
122 * - gpr 4 contains the index on the bus (optionally)
122 */ 123 */
123 ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, 124 ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
124 vcpu->run->s.regs.gprs[2], 125 vcpu->run->s.regs.gprs[2],
125 8, &vcpu->run->s.regs.gprs[3]); 126 8, &vcpu->run->s.regs.gprs[3],
127 vcpu->run->s.regs.gprs[4]);
126 srcu_read_unlock(&vcpu->kvm->srcu, idx); 128 srcu_read_unlock(&vcpu->kvm->srcu, idx);
127 /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ 129
130 /*
131 * Return cookie in gpr 2, but don't overwrite the register if the
132 * diagnose will be handled by userspace.
133 */
134 if (ret != -EOPNOTSUPP)
135 vcpu->run->s.regs.gprs[2] = ret;
136 /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
128 return ret < 0 ? ret : 0; 137 return ret < 0 ? ret : 0;
129} 138}
130 139
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 34c1c9a90be2..776dafe918db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
28#include <asm/pgtable.h> 28#include <asm/pgtable.h>
29#include <asm/nmi.h> 29#include <asm/nmi.h>
30#include <asm/switch_to.h> 30#include <asm/switch_to.h>
31#include <asm/facility.h>
31#include <asm/sclp.h> 32#include <asm/sclp.h>
32#include "kvm-s390.h" 33#include "kvm-s390.h"
33#include "gaccess.h" 34#include "gaccess.h"
@@ -84,9 +85,15 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
84 { NULL } 85 { NULL }
85}; 86};
86 87
87static unsigned long long *facilities; 88unsigned long *vfacilities;
88static struct gmap_notifier gmap_notifier; 89static struct gmap_notifier gmap_notifier;
89 90
91/* test availability of vfacility */
92static inline int test_vfacility(unsigned long nr)
93{
94 return __test_facility(nr, (void *) vfacilities);
95}
96
90/* Section: not file related */ 97/* Section: not file related */
91int kvm_arch_hardware_enable(void *garbage) 98int kvm_arch_hardware_enable(void *garbage)
92{ 99{
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
387 vcpu->arch.sie_block->ecb = 6; 394 vcpu->arch.sie_block->ecb = 6;
388 vcpu->arch.sie_block->ecb2 = 8; 395 vcpu->arch.sie_block->ecb2 = 8;
389 vcpu->arch.sie_block->eca = 0xC1002001U; 396 vcpu->arch.sie_block->eca = 0xC1002001U;
390 vcpu->arch.sie_block->fac = (int) (long) facilities; 397 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
391 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 398 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
392 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 399 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
393 (unsigned long) vcpu); 400 (unsigned long) vcpu);
@@ -1063,6 +1070,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
1063 return 0; 1070 return 0;
1064} 1071}
1065 1072
1073void kvm_arch_memslots_updated(struct kvm *kvm)
1074{
1075}
1076
1066/* Section: memory related */ 1077/* Section: memory related */
1067int kvm_arch_prepare_memory_region(struct kvm *kvm, 1078int kvm_arch_prepare_memory_region(struct kvm *kvm,
1068 struct kvm_memory_slot *memslot, 1079 struct kvm_memory_slot *memslot,
@@ -1129,20 +1140,20 @@ static int __init kvm_s390_init(void)
1129 * to hold the maximum amount of facilities. On the other hand, we 1140 * to hold the maximum amount of facilities. On the other hand, we
1130 * only set facilities that are known to work in KVM. 1141 * only set facilities that are known to work in KVM.
1131 */ 1142 */
1132 facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); 1143 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1133 if (!facilities) { 1144 if (!vfacilities) {
1134 kvm_exit(); 1145 kvm_exit();
1135 return -ENOMEM; 1146 return -ENOMEM;
1136 } 1147 }
1137 memcpy(facilities, S390_lowcore.stfle_fac_list, 16); 1148 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1138 facilities[0] &= 0xff82fff3f47c0000ULL; 1149 vfacilities[0] &= 0xff82fff3f47c0000UL;
1139 facilities[1] &= 0x001c000000000000ULL; 1150 vfacilities[1] &= 0x001c000000000000UL;
1140 return 0; 1151 return 0;
1141} 1152}
1142 1153
1143static void __exit kvm_s390_exit(void) 1154static void __exit kvm_s390_exit(void)
1144{ 1155{
1145 free_page((unsigned long) facilities); 1156 free_page((unsigned long) vfacilities);
1146 kvm_exit(); 1157 kvm_exit();
1147} 1158}
1148 1159
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 028ca9fd2158..dc99f1ca4267 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,6 +24,9 @@
24 24
25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); 25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
26 26
27/* declare vfacilities extern */
28extern unsigned long *vfacilities;
29
27/* negativ values are error codes, positive values for internal conditions */ 30/* negativ values are error codes, positive values for internal conditions */
28#define SIE_INTERCEPT_RERUNVCPU (1<<0) 31#define SIE_INTERCEPT_RERUNVCPU (1<<0)
29#define SIE_INTERCEPT_UCONTROL (1<<1) 32#define SIE_INTERCEPT_UCONTROL (1<<1)
@@ -112,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
112 return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; 115 return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
113} 116}
114 117
118/* Set the condition code in the guest program status word */
119static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
120{
121 vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
122 vcpu->arch.sie_block->gpsw.mask |= cc << 44;
123}
124
115int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); 125int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
116enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); 126enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
117void kvm_s390_tasklet(unsigned long parm); 127void kvm_s390_tasklet(unsigned long parm);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4cdc54e63ebc..59200ee275e5 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -164,8 +164,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
164 kfree(inti); 164 kfree(inti);
165no_interrupt: 165no_interrupt:
166 /* Set condition code and we're done. */ 166 /* Set condition code and we're done. */
167 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 167 kvm_s390_set_psw_cc(vcpu, cc);
168 vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
169 return 0; 168 return 0;
170} 169}
171 170
@@ -220,15 +219,13 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
220 * Set condition code 3 to stop the guest from issueing channel 219 * Set condition code 3 to stop the guest from issueing channel
221 * I/O instructions. 220 * I/O instructions.
222 */ 221 */
223 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 222 kvm_s390_set_psw_cc(vcpu, 3);
224 vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
225 return 0; 223 return 0;
226 } 224 }
227} 225}
228 226
229static int handle_stfl(struct kvm_vcpu *vcpu) 227static int handle_stfl(struct kvm_vcpu *vcpu)
230{ 228{
231 unsigned int facility_list;
232 int rc; 229 int rc;
233 230
234 vcpu->stat.instruction_stfl++; 231 vcpu->stat.instruction_stfl++;
@@ -236,15 +233,13 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
236 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 233 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
237 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 234 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
238 235
239 /* only pass the facility bits, which we can handle */
240 facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3;
241
242 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), 236 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
243 &facility_list, sizeof(facility_list)); 237 vfacilities, 4);
244 if (rc) 238 if (rc)
245 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 239 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
246 VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); 240 VCPU_EVENT(vcpu, 5, "store facility list value %x",
247 trace_kvm_s390_handle_stfl(vcpu, facility_list); 241 *(unsigned int *) vfacilities);
242 trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
248 return 0; 243 return 0;
249} 244}
250 245
@@ -387,7 +382,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
387 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 382 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
388 383
389 if (fc > 3) { 384 if (fc > 3) {
390 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ 385 kvm_s390_set_psw_cc(vcpu, 3);
391 return 0; 386 return 0;
392 } 387 }
393 388
@@ -397,7 +392,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
397 392
398 if (fc == 0) { 393 if (fc == 0) {
399 vcpu->run->s.regs.gprs[0] = 3 << 28; 394 vcpu->run->s.regs.gprs[0] = 3 << 28;
400 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ 395 kvm_s390_set_psw_cc(vcpu, 0);
401 return 0; 396 return 0;
402 } 397 }
403 398
@@ -431,12 +426,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
431 } 426 }
432 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); 427 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
433 free_page(mem); 428 free_page(mem);
434 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 429 kvm_s390_set_psw_cc(vcpu, 0);
435 vcpu->run->s.regs.gprs[0] = 0; 430 vcpu->run->s.regs.gprs[0] = 0;
436 return 0; 431 return 0;
437out_no_data: 432out_no_data:
438 /* condition code 3 */ 433 kvm_s390_set_psw_cc(vcpu, 3);
439 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
440out_exception: 434out_exception:
441 free_page(mem); 435 free_page(mem);
442 return rc; 436 return rc;
@@ -494,12 +488,12 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
494 kvm_s390_get_regs_rre(vcpu, &reg1, &reg2); 488 kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
495 489
496 /* This basically extracts the mask half of the psw. */ 490 /* This basically extracts the mask half of the psw. */
497 vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; 491 vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
498 vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; 492 vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
499 if (reg2) { 493 if (reg2) {
500 vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; 494 vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
501 vcpu->run->s.regs.gprs[reg2] |= 495 vcpu->run->s.regs.gprs[reg2] |=
502 vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; 496 vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
503 } 497 }
504 return 0; 498 return 0;
505} 499}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6d16132d0850..bf7c0dc64a76 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
335 335
336 if ((from | to | len) & (PMD_SIZE - 1)) 336 if ((from | to | len) & (PMD_SIZE - 1))
337 return -EINVAL; 337 return -EINVAL;
338 if (len == 0 || from + len > PGDIR_SIZE || 338 if (len == 0 || from + len > TASK_MAX_SIZE ||
339 from + len < from || to + len < to) 339 from + len < from || to + len < to)
340 return -EINVAL; 340 return -EINVAL;
341 341
@@ -732,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
732 spin_unlock(&gmap_notifier_lock); 732 spin_unlock(&gmap_notifier_lock);
733} 733}
734 734
735static inline int page_table_with_pgste(struct page *page)
736{
737 return atomic_read(&page->_mapcount) == 0;
738}
739
735static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 740static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
736 unsigned long vmaddr) 741 unsigned long vmaddr)
737{ 742{
@@ -751,7 +756,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
751 mp->vmaddr = vmaddr & PMD_MASK; 756 mp->vmaddr = vmaddr & PMD_MASK;
752 INIT_LIST_HEAD(&mp->mapper); 757 INIT_LIST_HEAD(&mp->mapper);
753 page->index = (unsigned long) mp; 758 page->index = (unsigned long) mp;
754 atomic_set(&page->_mapcount, 3); 759 atomic_set(&page->_mapcount, 0);
755 table = (unsigned long *) page_to_phys(page); 760 table = (unsigned long *) page_to_phys(page);
756 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 761 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
757 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, 762 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
@@ -818,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key);
818 823
819#else /* CONFIG_PGSTE */ 824#else /* CONFIG_PGSTE */
820 825
826static inline int page_table_with_pgste(struct page *page)
827{
828 return 0;
829}
830
821static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 831static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
822 unsigned long vmaddr) 832 unsigned long vmaddr)
823{ 833{
@@ -894,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
894 struct page *page; 904 struct page *page;
895 unsigned int bit, mask; 905 unsigned int bit, mask;
896 906
897 if (mm_has_pgste(mm)) { 907 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
908 if (page_table_with_pgste(page)) {
898 gmap_disconnect_pgtable(mm, table); 909 gmap_disconnect_pgtable(mm, table);
899 return page_table_free_pgste(table); 910 return page_table_free_pgste(table);
900 } 911 }
901 /* Free 1K/2K page table fragment of a 4K page */ 912 /* Free 1K/2K page table fragment of a 4K page */
902 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
903 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 913 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
904 spin_lock_bh(&mm->context.list_lock); 914 spin_lock_bh(&mm->context.list_lock);
905 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 915 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -937,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
937 unsigned int bit, mask; 947 unsigned int bit, mask;
938 948
939 mm = tlb->mm; 949 mm = tlb->mm;
940 if (mm_has_pgste(mm)) { 950 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
951 if (page_table_with_pgste(page)) {
941 gmap_disconnect_pgtable(mm, table); 952 gmap_disconnect_pgtable(mm, table);
942 table = (unsigned long *) (__pa(table) | FRAG_MASK); 953 table = (unsigned long *) (__pa(table) | FRAG_MASK);
943 tlb_remove_table(tlb, table); 954 tlb_remove_table(tlb, table);
944 return; 955 return;
945 } 956 }
946 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 957 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
947 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
948 spin_lock_bh(&mm->context.list_lock); 958 spin_lock_bh(&mm->context.list_lock);
949 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 959 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
950 list_del(&page->lru); 960 list_del(&page->lru);
@@ -1030,36 +1040,120 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
1030} 1040}
1031 1041
1032#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1042#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1033void thp_split_vma(struct vm_area_struct *vma) 1043static inline void thp_split_vma(struct vm_area_struct *vma)
1034{ 1044{
1035 unsigned long addr; 1045 unsigned long addr;
1036 struct page *page;
1037 1046
1038 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1047 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
1039 page = follow_page(vma, addr, FOLL_SPLIT); 1048 follow_page(vma, addr, FOLL_SPLIT);
1040 }
1041} 1049}
1042 1050
1043void thp_split_mm(struct mm_struct *mm) 1051static inline void thp_split_mm(struct mm_struct *mm)
1044{ 1052{
1045 struct vm_area_struct *vma = mm->mmap; 1053 struct vm_area_struct *vma;
1046 1054
1047 while (vma != NULL) { 1055 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1048 thp_split_vma(vma); 1056 thp_split_vma(vma);
1049 vma->vm_flags &= ~VM_HUGEPAGE; 1057 vma->vm_flags &= ~VM_HUGEPAGE;
1050 vma->vm_flags |= VM_NOHUGEPAGE; 1058 vma->vm_flags |= VM_NOHUGEPAGE;
1051 vma = vma->vm_next;
1052 } 1059 }
1060 mm->def_flags |= VM_NOHUGEPAGE;
1061}
1062#else
1063static inline void thp_split_mm(struct mm_struct *mm)
1064{
1053} 1065}
1054#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1066#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1055 1067
1068static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
1069 struct mm_struct *mm, pud_t *pud,
1070 unsigned long addr, unsigned long end)
1071{
1072 unsigned long next, *table, *new;
1073 struct page *page;
1074 pmd_t *pmd;
1075
1076 pmd = pmd_offset(pud, addr);
1077 do {
1078 next = pmd_addr_end(addr, end);
1079again:
1080 if (pmd_none_or_clear_bad(pmd))
1081 continue;
1082 table = (unsigned long *) pmd_deref(*pmd);
1083 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1084 if (page_table_with_pgste(page))
1085 continue;
1086 /* Allocate new page table with pgstes */
1087 new = page_table_alloc_pgste(mm, addr);
1088 if (!new) {
1089 mm->context.has_pgste = 0;
1090 continue;
1091 }
1092 spin_lock(&mm->page_table_lock);
1093 if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
1094 /* Nuke pmd entry pointing to the "short" page table */
1095 pmdp_flush_lazy(mm, addr, pmd);
1096 pmd_clear(pmd);
1097 /* Copy ptes from old table to new table */
1098 memcpy(new, table, PAGE_SIZE/2);
1099 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
1100 /* Establish new table */
1101 pmd_populate(mm, pmd, (pte_t *) new);
1102 /* Free old table with rcu, there might be a walker! */
1103 page_table_free_rcu(tlb, table);
1104 new = NULL;
1105 }
1106 spin_unlock(&mm->page_table_lock);
1107 if (new) {
1108 page_table_free_pgste(new);
1109 goto again;
1110 }
1111 } while (pmd++, addr = next, addr != end);
1112
1113 return addr;
1114}
1115
1116static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
1117 struct mm_struct *mm, pgd_t *pgd,
1118 unsigned long addr, unsigned long end)
1119{
1120 unsigned long next;
1121 pud_t *pud;
1122
1123 pud = pud_offset(pgd, addr);
1124 do {
1125 next = pud_addr_end(addr, end);
1126 if (pud_none_or_clear_bad(pud))
1127 continue;
1128 next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
1129 } while (pud++, addr = next, addr != end);
1130
1131 return addr;
1132}
1133
1134static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
1135 unsigned long addr, unsigned long end)
1136{
1137 unsigned long next;
1138 pgd_t *pgd;
1139
1140 pgd = pgd_offset(mm, addr);
1141 do {
1142 next = pgd_addr_end(addr, end);
1143 if (pgd_none_or_clear_bad(pgd))
1144 continue;
1145 next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
1146 } while (pgd++, addr = next, addr != end);
1147}
1148
1056/* 1149/*
1057 * switch on pgstes for its userspace process (for kvm) 1150 * switch on pgstes for its userspace process (for kvm)
1058 */ 1151 */
1059int s390_enable_sie(void) 1152int s390_enable_sie(void)
1060{ 1153{
1061 struct task_struct *tsk = current; 1154 struct task_struct *tsk = current;
1062 struct mm_struct *mm, *old_mm; 1155 struct mm_struct *mm = tsk->mm;
1156 struct mmu_gather tlb;
1063 1157
1064 /* Do we have switched amode? If no, we cannot do sie */ 1158 /* Do we have switched amode? If no, we cannot do sie */
1065 if (s390_user_mode == HOME_SPACE_MODE) 1159 if (s390_user_mode == HOME_SPACE_MODE)
@@ -1069,57 +1163,16 @@ int s390_enable_sie(void)
1069 if (mm_has_pgste(tsk->mm)) 1163 if (mm_has_pgste(tsk->mm))
1070 return 0; 1164 return 0;
1071 1165
1072 /* lets check if we are allowed to replace the mm */ 1166 down_write(&mm->mmap_sem);
1073 task_lock(tsk);
1074 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
1075#ifdef CONFIG_AIO
1076 !hlist_empty(&tsk->mm->ioctx_list) ||
1077#endif
1078 tsk->mm != tsk->active_mm) {
1079 task_unlock(tsk);
1080 return -EINVAL;
1081 }
1082 task_unlock(tsk);
1083
1084 /* we copy the mm and let dup_mm create the page tables with_pgstes */
1085 tsk->mm->context.alloc_pgste = 1;
1086 /* make sure that both mms have a correct rss state */
1087 sync_mm_rss(tsk->mm);
1088 mm = dup_mm(tsk);
1089 tsk->mm->context.alloc_pgste = 0;
1090 if (!mm)
1091 return -ENOMEM;
1092
1093#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1094 /* split thp mappings and disable thp for future mappings */ 1167 /* split thp mappings and disable thp for future mappings */
1095 thp_split_mm(mm); 1168 thp_split_mm(mm);
1096 mm->def_flags |= VM_NOHUGEPAGE; 1169 /* Reallocate the page tables with pgstes */
1097#endif 1170 mm->context.has_pgste = 1;
1098 1171 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
1099 /* Now lets check again if something happened */ 1172 page_table_realloc(&tlb, mm, 0, TASK_SIZE);
1100 task_lock(tsk); 1173 tlb_finish_mmu(&tlb, 0, TASK_SIZE);
1101 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 1174 up_write(&mm->mmap_sem);
1102#ifdef CONFIG_AIO 1175 return mm->context.has_pgste ? 0 : -ENOMEM;
1103 !hlist_empty(&tsk->mm->ioctx_list) ||
1104#endif
1105 tsk->mm != tsk->active_mm) {
1106 mmput(mm);
1107 task_unlock(tsk);
1108 return -EINVAL;
1109 }
1110
1111 /* ok, we are alone. No ptrace, no threads, etc. */
1112 old_mm = tsk->mm;
1113 tsk->mm = tsk->active_mm = mm;
1114 preempt_disable();
1115 update_mm(mm, tsk);
1116 atomic_inc(&mm->context.attach_count);
1117 atomic_dec(&old_mm->context.attach_count);
1118 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
1119 preempt_enable();
1120 task_unlock(tsk);
1121 mmput(old_mm);
1122 return 0;
1123} 1176}
1124EXPORT_SYMBOL_GPL(s390_enable_sie); 1177EXPORT_SYMBOL_GPL(s390_enable_sie);
1125 1178