aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/include/asm')
-rw-r--r--arch/s390/include/asm/cmpxchg.h1
-rw-r--r--arch/s390/include/asm/elf.h12
-rw-r--r--arch/s390/include/asm/hugetlb.h17
-rw-r--r--arch/s390/include/asm/irq.h1
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/mmu.h9
-rw-r--r--arch/s390/include/asm/mmu_context.h6
-rw-r--r--arch/s390/include/asm/page.h60
-rw-r--r--arch/s390/include/asm/percpu.h68
-rw-r--r--arch/s390/include/asm/pgalloc.h57
-rw-r--r--arch/s390/include/asm/pgtable.h607
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/include/asm/tlbflush.h13
-rw-r--r--arch/s390/include/asm/unistd.h1
14 files changed, 460 insertions, 397 deletions
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 7488e52efa97..81d7908416cf 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -167,7 +167,6 @@ static inline unsigned long __cmpxchg(void *ptr, unsigned long old,
167#ifdef CONFIG_64BIT 167#ifdef CONFIG_64BIT
168#define cmpxchg64(ptr, o, n) \ 168#define cmpxchg64(ptr, o, n) \
169({ \ 169({ \
170 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
171 cmpxchg((ptr), (o), (n)); \ 170 cmpxchg((ptr), (o), (n)); \
172}) 171})
173#else /* CONFIG_64BIT */ 172#else /* CONFIG_64BIT */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 10c029cfcc7d..64b61bf72e93 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -196,18 +196,6 @@ do { \
196} while (0) 196} while (0)
197#endif /* __s390x__ */ 197#endif /* __s390x__ */
198 198
199/*
200 * An executable for which elf_read_implies_exec() returns TRUE will
201 * have the READ_IMPLIES_EXEC personality flag set automatically.
202 */
203#define elf_read_implies_exec(ex, executable_stack) \
204({ \
205 if (current->mm->context.noexec && \
206 executable_stack != EXSTACK_DISABLE_X) \
207 disable_noexec(current->mm, current); \
208 current->mm->context.noexec == 0; \
209})
210
211#define STACK_RND_MASK 0x7ffUL 199#define STACK_RND_MASK 0x7ffUL
212 200
213#define ARCH_DLINFO \ 201#define ARCH_DLINFO \
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index b56403c2df28..799ed0f1643d 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -111,21 +111,10 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
111{ 111{
112 pmd_t *pmdp = (pmd_t *) ptep; 112 pmd_t *pmdp = (pmd_t *) ptep;
113 113
114 if (!MACHINE_HAS_IDTE) { 114 if (MACHINE_HAS_IDTE)
115 __pmd_csp(pmdp);
116 if (mm->context.noexec) {
117 pmdp = get_shadow_table(pmdp);
118 __pmd_csp(pmdp);
119 }
120 return;
121 }
122
123 __pmd_idte(address, pmdp);
124 if (mm->context.noexec) {
125 pmdp = get_shadow_table(pmdp);
126 __pmd_idte(address, pmdp); 115 __pmd_idte(address, pmdp);
127 } 116 else
128 return; 117 __pmd_csp(pmdp);
129} 118}
130 119
131#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ 120#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index db14a311f1d2..1544b90bd6d6 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -15,6 +15,7 @@ enum interruption_class {
15 EXTINT_VRT, 15 EXTINT_VRT,
16 EXTINT_SCP, 16 EXTINT_SCP,
17 EXTINT_IUC, 17 EXTINT_IUC,
18 EXTINT_CPM,
18 IOINT_QAI, 19 IOINT_QAI,
19 IOINT_QDI, 20 IOINT_QDI,
20 IOINT_DAS, 21 IOINT_DAS,
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 65e172f8209d..228cf0b295db 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -124,7 +124,7 @@ struct _lowcore {
124 /* Address space pointer. */ 124 /* Address space pointer. */
125 __u32 kernel_asce; /* 0x02ac */ 125 __u32 kernel_asce; /* 0x02ac */
126 __u32 user_asce; /* 0x02b0 */ 126 __u32 user_asce; /* 0x02b0 */
127 __u32 user_exec_asce; /* 0x02b4 */ 127 __u32 current_pid; /* 0x02b4 */
128 128
129 /* SMP info area */ 129 /* SMP info area */
130 __u32 cpu_nr; /* 0x02b8 */ 130 __u32 cpu_nr; /* 0x02b8 */
@@ -255,7 +255,7 @@ struct _lowcore {
255 /* Address space pointer. */ 255 /* Address space pointer. */
256 __u64 kernel_asce; /* 0x0310 */ 256 __u64 kernel_asce; /* 0x0310 */
257 __u64 user_asce; /* 0x0318 */ 257 __u64 user_asce; /* 0x0318 */
258 __u64 user_exec_asce; /* 0x0320 */ 258 __u64 current_pid; /* 0x0320 */
259 259
260 /* SMP info area */ 260 /* SMP info area */
261 __u32 cpu_nr; /* 0x0328 */ 261 __u32 cpu_nr; /* 0x0328 */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 78522cdefdd4..82d0847896a0 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -5,19 +5,18 @@ typedef struct {
5 atomic_t attach_count; 5 atomic_t attach_count;
6 unsigned int flush_mm; 6 unsigned int flush_mm;
7 spinlock_t list_lock; 7 spinlock_t list_lock;
8 struct list_head crst_list;
9 struct list_head pgtable_list; 8 struct list_head pgtable_list;
10 unsigned long asce_bits; 9 unsigned long asce_bits;
11 unsigned long asce_limit; 10 unsigned long asce_limit;
12 unsigned long vdso_base; 11 unsigned long vdso_base;
13 int noexec; 12 /* Cloned contexts will be created with extended page tables. */
14 int has_pgste; /* The mmu context has extended page tables */ 13 unsigned int alloc_pgste:1;
15 int alloc_pgste; /* cloned contexts will have extended page tables */ 14 /* The mmu context has extended page tables. */
15 unsigned int has_pgste:1;
16} mm_context_t; 16} mm_context_t;
17 17
18#define INIT_MM_CONTEXT(name) \ 18#define INIT_MM_CONTEXT(name) \
19 .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ 19 .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \
20 .context.crst_list = LIST_HEAD_INIT(name.context.crst_list), \
21 .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), 20 .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list),
22 21
23#endif 22#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8c277caa8d3a..5682f160ff82 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -35,11 +35,9 @@ static inline int init_new_context(struct task_struct *tsk,
35 * and if has_pgste is set, it will create extended page 35 * and if has_pgste is set, it will create extended page
36 * tables. 36 * tables.
37 */ 37 */
38 mm->context.noexec = 0;
39 mm->context.has_pgste = 1; 38 mm->context.has_pgste = 1;
40 mm->context.alloc_pgste = 1; 39 mm->context.alloc_pgste = 1;
41 } else { 40 } else {
42 mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE);
43 mm->context.has_pgste = 0; 41 mm->context.has_pgste = 0;
44 mm->context.alloc_pgste = 0; 42 mm->context.alloc_pgste = 0;
45 } 43 }
@@ -63,10 +61,8 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
63 S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); 61 S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
64 if (user_mode != HOME_SPACE_MODE) { 62 if (user_mode != HOME_SPACE_MODE) {
65 /* Load primary space page table origin. */ 63 /* Load primary space page table origin. */
66 pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd;
67 S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd);
68 asm volatile(LCTL_OPCODE" 1,1,%0\n" 64 asm volatile(LCTL_OPCODE" 1,1,%0\n"
69 : : "m" (S390_lowcore.user_exec_asce) ); 65 : : "m" (S390_lowcore.user_asce) );
70 } else 66 } else
71 /* Load home space page table origin. */ 67 /* Load home space page table origin. */
72 asm volatile(LCTL_OPCODE" 13,13,%0" 68 asm volatile(LCTL_OPCODE" 13,13,%0"
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 3c987e9ec8d6..accb372ddc7e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -90,6 +90,7 @@ static inline void copy_page(void *to, void *from)
90 */ 90 */
91 91
92typedef struct { unsigned long pgprot; } pgprot_t; 92typedef struct { unsigned long pgprot; } pgprot_t;
93typedef struct { unsigned long pgste; } pgste_t;
93typedef struct { unsigned long pte; } pte_t; 94typedef struct { unsigned long pte; } pte_t;
94typedef struct { unsigned long pmd; } pmd_t; 95typedef struct { unsigned long pmd; } pmd_t;
95typedef struct { unsigned long pud; } pud_t; 96typedef struct { unsigned long pud; } pud_t;
@@ -97,18 +98,21 @@ typedef struct { unsigned long pgd; } pgd_t;
97typedef pte_t *pgtable_t; 98typedef pte_t *pgtable_t;
98 99
99#define pgprot_val(x) ((x).pgprot) 100#define pgprot_val(x) ((x).pgprot)
101#define pgste_val(x) ((x).pgste)
100#define pte_val(x) ((x).pte) 102#define pte_val(x) ((x).pte)
101#define pmd_val(x) ((x).pmd) 103#define pmd_val(x) ((x).pmd)
102#define pud_val(x) ((x).pud) 104#define pud_val(x) ((x).pud)
103#define pgd_val(x) ((x).pgd) 105#define pgd_val(x) ((x).pgd)
104 106
107#define __pgste(x) ((pgste_t) { (x) } )
105#define __pte(x) ((pte_t) { (x) } ) 108#define __pte(x) ((pte_t) { (x) } )
106#define __pmd(x) ((pmd_t) { (x) } ) 109#define __pmd(x) ((pmd_t) { (x) } )
110#define __pud(x) ((pud_t) { (x) } )
107#define __pgd(x) ((pgd_t) { (x) } ) 111#define __pgd(x) ((pgd_t) { (x) } )
108#define __pgprot(x) ((pgprot_t) { (x) } ) 112#define __pgprot(x) ((pgprot_t) { (x) } )
109 113
110static inline void 114static inline void page_set_storage_key(unsigned long addr,
111page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) 115 unsigned char skey, int mapped)
112{ 116{
113 if (!mapped) 117 if (!mapped)
114 asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" 118 asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
@@ -117,15 +121,59 @@ page_set_storage_key(unsigned long addr, unsigned int skey, int mapped)
117 asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); 121 asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
118} 122}
119 123
120static inline unsigned int 124static inline unsigned char page_get_storage_key(unsigned long addr)
121page_get_storage_key(unsigned long addr)
122{ 125{
123 unsigned int skey; 126 unsigned char skey;
124 127
125 asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0)); 128 asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr));
126 return skey; 129 return skey;
127} 130}
128 131
132static inline int page_reset_referenced(unsigned long addr)
133{
134 unsigned int ipm;
135
136 asm volatile(
137 " rrbe 0,%1\n"
138 " ipm %0\n"
139 : "=d" (ipm) : "a" (addr) : "cc");
140 return !!(ipm & 0x20000000);
141}
142
143/* Bits int the storage key */
144#define _PAGE_CHANGED 0x02 /* HW changed bit */
145#define _PAGE_REFERENCED 0x04 /* HW referenced bit */
146#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
147#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
148
149/*
150 * Test and clear dirty bit in storage key.
151 * We can't clear the changed bit atomically. This is a potential
152 * race against modification of the referenced bit. This function
153 * should therefore only be called if it is not mapped in any
154 * address space.
155 */
156#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
157static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped)
158{
159 unsigned char skey;
160
161 skey = page_get_storage_key(pfn << PAGE_SHIFT);
162 if (!(skey & _PAGE_CHANGED))
163 return 0;
164 page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped);
165 return 1;
166}
167
168/*
169 * Test and clear referenced bit in storage key.
170 */
171#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
172static inline int page_test_and_clear_young(unsigned long pfn)
173{
174 return page_reset_referenced(pfn << PAGE_SHIFT);
175}
176
129struct page; 177struct page;
130void arch_free_page(struct page *page, int order); 178void arch_free_page(struct page *page, int order);
131void arch_alloc_page(struct page *page, int order); 179void arch_alloc_page(struct page *page, int order);
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index f7ad8719d02d..5325c89a5843 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -1,6 +1,9 @@
1#ifndef __ARCH_S390_PERCPU__ 1#ifndef __ARCH_S390_PERCPU__
2#define __ARCH_S390_PERCPU__ 2#define __ARCH_S390_PERCPU__
3 3
4#include <linux/preempt.h>
5#include <asm/cmpxchg.h>
6
4/* 7/*
5 * s390 uses its own implementation for per cpu data, the offset of 8 * s390 uses its own implementation for per cpu data, the offset of
6 * the cpu local data area is cached in the cpu's lowcore memory. 9 * the cpu local data area is cached in the cpu's lowcore memory.
@@ -16,6 +19,71 @@
16#define ARCH_NEEDS_WEAK_PER_CPU 19#define ARCH_NEEDS_WEAK_PER_CPU
17#endif 20#endif
18 21
22#define arch_irqsafe_cpu_to_op(pcp, val, op) \
23do { \
24 typedef typeof(pcp) pcp_op_T__; \
25 pcp_op_T__ old__, new__, prev__; \
26 pcp_op_T__ *ptr__; \
27 preempt_disable(); \
28 ptr__ = __this_cpu_ptr(&(pcp)); \
29 prev__ = *ptr__; \
30 do { \
31 old__ = prev__; \
32 new__ = old__ op (val); \
33 switch (sizeof(*ptr__)) { \
34 case 8: \
35 prev__ = cmpxchg64(ptr__, old__, new__); \
36 break; \
37 default: \
38 prev__ = cmpxchg(ptr__, old__, new__); \
39 } \
40 } while (prev__ != old__); \
41 preempt_enable(); \
42} while (0)
43
44#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
45#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
46#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
47#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
48
49#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
50#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
51#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
52#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
53
54#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
55#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
56#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
57#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
58
59#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
60#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
61#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
62#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
63
64#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) \
65({ \
66 typedef typeof(pcp) pcp_op_T__; \
67 pcp_op_T__ ret__; \
68 pcp_op_T__ *ptr__; \
69 preempt_disable(); \
70 ptr__ = __this_cpu_ptr(&(pcp)); \
71 switch (sizeof(*ptr__)) { \
72 case 8: \
73 ret__ = cmpxchg64(ptr__, oval, nval); \
74 break; \
75 default: \
76 ret__ = cmpxchg(ptr__, oval, nval); \
77 } \
78 preempt_enable(); \
79 ret__; \
80})
81
82#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
83#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
84#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
85#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
86
19#include <asm-generic/percpu.h> 87#include <asm-generic/percpu.h>
20 88
21#endif /* __ARCH_S390_PERCPU__ */ 89#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 082eb4e50e8b..f6314af3b354 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -19,14 +19,13 @@
19 19
20#define check_pgt_cache() do {} while (0) 20#define check_pgt_cache() do {} while (0)
21 21
22unsigned long *crst_table_alloc(struct mm_struct *, int); 22unsigned long *crst_table_alloc(struct mm_struct *);
23void crst_table_free(struct mm_struct *, unsigned long *); 23void crst_table_free(struct mm_struct *, unsigned long *);
24void crst_table_free_rcu(struct mm_struct *, unsigned long *); 24void crst_table_free_rcu(struct mm_struct *, unsigned long *);
25 25
26unsigned long *page_table_alloc(struct mm_struct *); 26unsigned long *page_table_alloc(struct mm_struct *);
27void page_table_free(struct mm_struct *, unsigned long *); 27void page_table_free(struct mm_struct *, unsigned long *);
28void page_table_free_rcu(struct mm_struct *, unsigned long *); 28void page_table_free_rcu(struct mm_struct *, unsigned long *);
29void disable_noexec(struct mm_struct *, struct task_struct *);
30 29
31static inline void clear_table(unsigned long *s, unsigned long val, size_t n) 30static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
32{ 31{
@@ -50,9 +49,6 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
50static inline void crst_table_init(unsigned long *crst, unsigned long entry) 49static inline void crst_table_init(unsigned long *crst, unsigned long entry)
51{ 50{
52 clear_table(crst, entry, sizeof(unsigned long)*2048); 51 clear_table(crst, entry, sizeof(unsigned long)*2048);
53 crst = get_shadow_table(crst);
54 if (crst)
55 clear_table(crst, entry, sizeof(unsigned long)*2048);
56} 52}
57 53
58#ifndef __s390x__ 54#ifndef __s390x__
@@ -69,10 +65,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
69#define pmd_free(mm, x) do { } while (0) 65#define pmd_free(mm, x) do { } while (0)
70 66
71#define pgd_populate(mm, pgd, pud) BUG() 67#define pgd_populate(mm, pgd, pud) BUG()
72#define pgd_populate_kernel(mm, pgd, pud) BUG()
73
74#define pud_populate(mm, pud, pmd) BUG() 68#define pud_populate(mm, pud, pmd) BUG()
75#define pud_populate_kernel(mm, pud, pmd) BUG()
76 69
77#else /* __s390x__ */ 70#else /* __s390x__ */
78 71
@@ -90,7 +83,7 @@ void crst_table_downgrade(struct mm_struct *, unsigned long limit);
90 83
91static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) 84static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
92{ 85{
93 unsigned long *table = crst_table_alloc(mm, mm->context.noexec); 86 unsigned long *table = crst_table_alloc(mm);
94 if (table) 87 if (table)
95 crst_table_init(table, _REGION3_ENTRY_EMPTY); 88 crst_table_init(table, _REGION3_ENTRY_EMPTY);
96 return (pud_t *) table; 89 return (pud_t *) table;
@@ -99,43 +92,21 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
99 92
100static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) 93static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
101{ 94{
102 unsigned long *table = crst_table_alloc(mm, mm->context.noexec); 95 unsigned long *table = crst_table_alloc(mm);
103 if (table) 96 if (table)
104 crst_table_init(table, _SEGMENT_ENTRY_EMPTY); 97 crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
105 return (pmd_t *) table; 98 return (pmd_t *) table;
106} 99}
107#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) 100#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd)
108 101
109static inline void pgd_populate_kernel(struct mm_struct *mm,
110 pgd_t *pgd, pud_t *pud)
111{
112 pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
113}
114
115static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) 102static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
116{ 103{
117 pgd_populate_kernel(mm, pgd, pud); 104 pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
118 if (mm->context.noexec) {
119 pgd = get_shadow_table(pgd);
120 pud = get_shadow_table(pud);
121 pgd_populate_kernel(mm, pgd, pud);
122 }
123}
124
125static inline void pud_populate_kernel(struct mm_struct *mm,
126 pud_t *pud, pmd_t *pmd)
127{
128 pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
129} 105}
130 106
131static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 107static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
132{ 108{
133 pud_populate_kernel(mm, pud, pmd); 109 pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
134 if (mm->context.noexec) {
135 pud = get_shadow_table(pud);
136 pmd = get_shadow_table(pmd);
137 pud_populate_kernel(mm, pud, pmd);
138 }
139} 110}
140 111
141#endif /* __s390x__ */ 112#endif /* __s390x__ */
@@ -143,29 +114,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
143static inline pgd_t *pgd_alloc(struct mm_struct *mm) 114static inline pgd_t *pgd_alloc(struct mm_struct *mm)
144{ 115{
145 spin_lock_init(&mm->context.list_lock); 116 spin_lock_init(&mm->context.list_lock);
146 INIT_LIST_HEAD(&mm->context.crst_list);
147 INIT_LIST_HEAD(&mm->context.pgtable_list); 117 INIT_LIST_HEAD(&mm->context.pgtable_list);
148 return (pgd_t *) 118 return (pgd_t *) crst_table_alloc(mm);
149 crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE);
150} 119}
151#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) 120#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
152 121
153static inline void pmd_populate_kernel(struct mm_struct *mm,
154 pmd_t *pmd, pte_t *pte)
155{
156 pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
157}
158
159static inline void pmd_populate(struct mm_struct *mm, 122static inline void pmd_populate(struct mm_struct *mm,
160 pmd_t *pmd, pgtable_t pte) 123 pmd_t *pmd, pgtable_t pte)
161{ 124{
162 pmd_populate_kernel(mm, pmd, pte); 125 pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
163 if (mm->context.noexec) {
164 pmd = get_shadow_table(pmd);
165 pmd_populate_kernel(mm, pmd, pte + PTRS_PER_PTE);
166 }
167} 126}
168 127
128#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
129
169#define pmd_pgtable(pmd) \ 130#define pmd_pgtable(pmd) \
170 (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) 131 (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
171 132
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 02ace3491c51..c4773a2ef3d3 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -31,9 +31,8 @@
31#ifndef __ASSEMBLY__ 31#ifndef __ASSEMBLY__
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/mm_types.h> 33#include <linux/mm_types.h>
34#include <asm/bitops.h>
35#include <asm/bug.h> 34#include <asm/bug.h>
36#include <asm/processor.h> 35#include <asm/page.h>
37 36
38extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); 37extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
39extern void paging_init(void); 38extern void paging_init(void);
@@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START;
243/* Software bits in the page table entry */ 242/* Software bits in the page table entry */
244#define _PAGE_SWT 0x001 /* SW pte type bit t */ 243#define _PAGE_SWT 0x001 /* SW pte type bit t */
245#define _PAGE_SWX 0x002 /* SW pte type bit x */ 244#define _PAGE_SWX 0x002 /* SW pte type bit x */
246#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ 245#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */
246#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */
247#define _PAGE_SPECIAL 0x010 /* SW associated with special page */
247#define __HAVE_ARCH_PTE_SPECIAL 248#define __HAVE_ARCH_PTE_SPECIAL
248 249
249/* Set of bits not changed in pte_modify */ 250/* Set of bits not changed in pte_modify */
250#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) 251#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR)
251 252
252/* Six different types of pages. */ 253/* Six different types of pages. */
253#define _PAGE_TYPE_EMPTY 0x400 254#define _PAGE_TYPE_EMPTY 0x400
@@ -256,8 +257,6 @@ extern unsigned long VMALLOC_START;
256#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ 257#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
257#define _PAGE_TYPE_RO 0x200 258#define _PAGE_TYPE_RO 0x200
258#define _PAGE_TYPE_RW 0x000 259#define _PAGE_TYPE_RW 0x000
259#define _PAGE_TYPE_EX_RO 0x202
260#define _PAGE_TYPE_EX_RW 0x002
261 260
262/* 261/*
263 * Only four types for huge pages, using the invalid bit and protection bit 262 * Only four types for huge pages, using the invalid bit and protection bit
@@ -287,8 +286,6 @@ extern unsigned long VMALLOC_START;
287 * _PAGE_TYPE_FILE 11?1 -> 11?1 286 * _PAGE_TYPE_FILE 11?1 -> 11?1
288 * _PAGE_TYPE_RO 0100 -> 1100 287 * _PAGE_TYPE_RO 0100 -> 1100
289 * _PAGE_TYPE_RW 0000 -> 1000 288 * _PAGE_TYPE_RW 0000 -> 1000
290 * _PAGE_TYPE_EX_RO 0110 -> 1110
291 * _PAGE_TYPE_EX_RW 0010 -> 1010
292 * 289 *
293 * pte_none is true for bits combinations 1000, 1010, 1100, 1110 290 * pte_none is true for bits combinations 1000, 1010, 1100, 1110
294 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 291 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
@@ -297,14 +294,17 @@ extern unsigned long VMALLOC_START;
297 */ 294 */
298 295
299/* Page status table bits for virtualization */ 296/* Page status table bits for virtualization */
300#define RCP_PCL_BIT 55 297#define RCP_ACC_BITS 0xf000000000000000UL
301#define RCP_HR_BIT 54 298#define RCP_FP_BIT 0x0800000000000000UL
302#define RCP_HC_BIT 53 299#define RCP_PCL_BIT 0x0080000000000000UL
303#define RCP_GR_BIT 50 300#define RCP_HR_BIT 0x0040000000000000UL
304#define RCP_GC_BIT 49 301#define RCP_HC_BIT 0x0020000000000000UL
305 302#define RCP_GR_BIT 0x0004000000000000UL
306/* User dirty bit for KVM's migration feature */ 303#define RCP_GC_BIT 0x0002000000000000UL
307#define KVM_UD_BIT 47 304
305/* User dirty / referenced bit for KVM's migration feature */
306#define KVM_UR_BIT 0x0000800000000000UL
307#define KVM_UC_BIT 0x0000400000000000UL
308 308
309#ifndef __s390x__ 309#ifndef __s390x__
310 310
@@ -377,85 +377,54 @@ extern unsigned long VMALLOC_START;
377#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ 377#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
378 _ASCE_ALT_EVENT) 378 _ASCE_ALT_EVENT)
379 379
380/* Bits int the storage key */
381#define _PAGE_CHANGED 0x02 /* HW changed bit */
382#define _PAGE_REFERENCED 0x04 /* HW referenced bit */
383
384/* 380/*
385 * Page protection definitions. 381 * Page protection definitions.
386 */ 382 */
387#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) 383#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE)
388#define PAGE_RO __pgprot(_PAGE_TYPE_RO) 384#define PAGE_RO __pgprot(_PAGE_TYPE_RO)
389#define PAGE_RW __pgprot(_PAGE_TYPE_RW) 385#define PAGE_RW __pgprot(_PAGE_TYPE_RW)
390#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO)
391#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW)
392 386
393#define PAGE_KERNEL PAGE_RW 387#define PAGE_KERNEL PAGE_RW
394#define PAGE_COPY PAGE_RO 388#define PAGE_COPY PAGE_RO
395 389
396/* 390/*
397 * Dependent on the EXEC_PROTECT option s390 can do execute protection. 391 * On s390 the page table entry has an invalid bit and a read-only bit.
398 * Write permission always implies read permission. In theory with a 392 * Read permission implies execute permission and write permission
399 * primary/secondary page table execute only can be implemented but 393 * implies read permission.
400 * it would cost an additional bit in the pte to distinguish all the
401 * different pte types. To avoid that execute permission currently
402 * implies read permission as well.
403 */ 394 */
404 /*xwr*/ 395 /*xwr*/
405#define __P000 PAGE_NONE 396#define __P000 PAGE_NONE
406#define __P001 PAGE_RO 397#define __P001 PAGE_RO
407#define __P010 PAGE_RO 398#define __P010 PAGE_RO
408#define __P011 PAGE_RO 399#define __P011 PAGE_RO
409#define __P100 PAGE_EX_RO 400#define __P100 PAGE_RO
410#define __P101 PAGE_EX_RO 401#define __P101 PAGE_RO
411#define __P110 PAGE_EX_RO 402#define __P110 PAGE_RO
412#define __P111 PAGE_EX_RO 403#define __P111 PAGE_RO
413 404
414#define __S000 PAGE_NONE 405#define __S000 PAGE_NONE
415#define __S001 PAGE_RO 406#define __S001 PAGE_RO
416#define __S010 PAGE_RW 407#define __S010 PAGE_RW
417#define __S011 PAGE_RW 408#define __S011 PAGE_RW
418#define __S100 PAGE_EX_RO 409#define __S100 PAGE_RO
419#define __S101 PAGE_EX_RO 410#define __S101 PAGE_RO
420#define __S110 PAGE_EX_RW 411#define __S110 PAGE_RW
421#define __S111 PAGE_EX_RW 412#define __S111 PAGE_RW
422
423#ifndef __s390x__
424# define PxD_SHADOW_SHIFT 1
425#else /* __s390x__ */
426# define PxD_SHADOW_SHIFT 2
427#endif /* __s390x__ */
428 413
429static inline void *get_shadow_table(void *table) 414static inline int mm_exclusive(struct mm_struct *mm)
430{ 415{
431 unsigned long addr, offset; 416 return likely(mm == current->active_mm &&
432 struct page *page; 417 atomic_read(&mm->context.attach_count) <= 1);
433
434 addr = (unsigned long) table;
435 offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1);
436 page = virt_to_page((void *)(addr ^ offset));
437 return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL);
438} 418}
439 419
440/* 420static inline int mm_has_pgste(struct mm_struct *mm)
441 * Certain architectures need to do special things when PTEs
442 * within a page table are directly modified. Thus, the following
443 * hook is made available.
444 */
445static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
446 pte_t *ptep, pte_t entry)
447{ 421{
448 *ptep = entry; 422#ifdef CONFIG_PGSTE
449 if (mm->context.noexec) { 423 if (unlikely(mm->context.has_pgste))
450 if (!(pte_val(entry) & _PAGE_INVALID) && 424 return 1;
451 (pte_val(entry) & _PAGE_SWX)) 425#endif
452 pte_val(entry) |= _PAGE_RO; 426 return 0;
453 else
454 pte_val(entry) = _PAGE_TYPE_EMPTY;
455 ptep[PTRS_PER_PTE] = entry;
456 }
457} 427}
458
459/* 428/*
460 * pgd/pmd/pte query functions 429 * pgd/pmd/pte query functions
461 */ 430 */
@@ -568,52 +537,127 @@ static inline int pte_special(pte_t pte)
568} 537}
569 538
570#define __HAVE_ARCH_PTE_SAME 539#define __HAVE_ARCH_PTE_SAME
571#define pte_same(a,b) (pte_val(a) == pte_val(b)) 540static inline int pte_same(pte_t a, pte_t b)
541{
542 return pte_val(a) == pte_val(b);
543}
572 544
573static inline void rcp_lock(pte_t *ptep) 545static inline pgste_t pgste_get_lock(pte_t *ptep)
574{ 546{
547 unsigned long new = 0;
575#ifdef CONFIG_PGSTE 548#ifdef CONFIG_PGSTE
576 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 549 unsigned long old;
550
577 preempt_disable(); 551 preempt_disable();
578 while (test_and_set_bit(RCP_PCL_BIT, pgste)) 552 asm(
579 ; 553 " lg %0,%2\n"
554 "0: lgr %1,%0\n"
555 " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */
556 " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */
557 " csg %0,%1,%2\n"
558 " jl 0b\n"
559 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
560 : "Q" (ptep[PTRS_PER_PTE]) : "cc");
580#endif 561#endif
562 return __pgste(new);
581} 563}
582 564
583static inline void rcp_unlock(pte_t *ptep) 565static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
584{ 566{
585#ifdef CONFIG_PGSTE 567#ifdef CONFIG_PGSTE
586 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 568 asm(
587 clear_bit(RCP_PCL_BIT, pgste); 569 " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */
570 " stg %1,%0\n"
571 : "=Q" (ptep[PTRS_PER_PTE])
572 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc");
588 preempt_enable(); 573 preempt_enable();
589#endif 574#endif
590} 575}
591 576
592/* forward declaration for SetPageUptodate in page-flags.h*/ 577static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
593static inline void page_clear_dirty(struct page *page, int mapped);
594#include <linux/page-flags.h>
595
596static inline void ptep_rcp_copy(pte_t *ptep)
597{ 578{
598#ifdef CONFIG_PGSTE 579#ifdef CONFIG_PGSTE
599 struct page *page = virt_to_page(pte_val(*ptep)); 580 unsigned long pfn, bits;
600 unsigned int skey; 581 unsigned char skey;
601 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 582
602 583 pfn = pte_val(*ptep) >> PAGE_SHIFT;
603 skey = page_get_storage_key(page_to_phys(page)); 584 skey = page_get_storage_key(pfn);
604 if (skey & _PAGE_CHANGED) { 585 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
605 set_bit_simple(RCP_GC_BIT, pgste); 586 /* Clear page changed & referenced bit in the storage key */
606 set_bit_simple(KVM_UD_BIT, pgste); 587 if (bits) {
588 skey ^= bits;
589 page_set_storage_key(pfn, skey, 1);
607 } 590 }
608 if (skey & _PAGE_REFERENCED) 591 /* Transfer page changed & referenced bit to guest bits in pgste */
609 set_bit_simple(RCP_GR_BIT, pgste); 592 pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
610 if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { 593 /* Get host changed & referenced bits from pgste */
611 SetPageDirty(page); 594 bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
612 set_bit_simple(KVM_UD_BIT, pgste); 595 /* Clear host bits in pgste. */
613 } 596 pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
614 if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) 597 pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
615 SetPageReferenced(page); 598 /* Copy page access key and fetch protection bit to pgste */
599 pgste_val(pgste) |=
600 (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
601 /* Transfer changed and referenced to kvm user bits */
602 pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */
603 /* Transfer changed & referenced to pte sofware bits */
604 pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */
616#endif 605#endif
606 return pgste;
607
608}
609
610static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
611{
612#ifdef CONFIG_PGSTE
613 int young;
614
615 young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
616 /* Transfer page referenced bit to pte software bit (host view) */
617 if (young || (pgste_val(pgste) & RCP_HR_BIT))
618 pte_val(*ptep) |= _PAGE_SWR;
619 /* Clear host referenced bit in pgste. */
620 pgste_val(pgste) &= ~RCP_HR_BIT;
621 /* Transfer page referenced bit to guest bit in pgste */
622 pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */
623#endif
624 return pgste;
625
626}
627
628static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
629{
630#ifdef CONFIG_PGSTE
631 unsigned long pfn;
632 unsigned long okey, nkey;
633
634 pfn = pte_val(*ptep) >> PAGE_SHIFT;
635 okey = nkey = page_get_storage_key(pfn);
636 nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
637 /* Set page access key and fetch protection bit from pgste */
638 nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
639 if (okey != nkey)
640 page_set_storage_key(pfn, nkey, 1);
641#endif
642}
643
644/*
645 * Certain architectures need to do special things when PTEs
646 * within a page table are directly modified. Thus, the following
647 * hook is made available.
648 */
649static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
650 pte_t *ptep, pte_t entry)
651{
652 pgste_t pgste;
653
654 if (mm_has_pgste(mm)) {
655 pgste = pgste_get_lock(ptep);
656 pgste_set_pte(ptep, pgste);
657 *ptep = entry;
658 pgste_set_unlock(ptep, pgste);
659 } else
660 *ptep = entry;
617} 661}
618 662
619/* 663/*
@@ -627,19 +671,19 @@ static inline int pte_write(pte_t pte)
627 671
628static inline int pte_dirty(pte_t pte) 672static inline int pte_dirty(pte_t pte)
629{ 673{
630 /* A pte is neither clean nor dirty on s/390. The dirty bit 674#ifdef CONFIG_PGSTE
631 * is in the storage key. See page_test_and_clear_dirty for 675 if (pte_val(pte) & _PAGE_SWC)
632 * details. 676 return 1;
633 */ 677#endif
634 return 0; 678 return 0;
635} 679}
636 680
637static inline int pte_young(pte_t pte) 681static inline int pte_young(pte_t pte)
638{ 682{
639 /* A pte is neither young nor old on s/390. The young bit 683#ifdef CONFIG_PGSTE
640 * is in the storage key. See page_test_and_clear_young for 684 if (pte_val(pte) & _PAGE_SWR)
641 * details. 685 return 1;
642 */ 686#endif
643 return 0; 687 return 0;
644} 688}
645 689
@@ -647,64 +691,30 @@ static inline int pte_young(pte_t pte)
647 * pgd/pmd/pte modification functions 691 * pgd/pmd/pte modification functions
648 */ 692 */
649 693
650#ifndef __s390x__ 694static inline void pgd_clear(pgd_t *pgd)
651
652#define pgd_clear(pgd) do { } while (0)
653#define pud_clear(pud) do { } while (0)
654
655#else /* __s390x__ */
656
657static inline void pgd_clear_kernel(pgd_t * pgd)
658{ 695{
696#ifdef __s390x__
659 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) 697 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
660 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; 698 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
699#endif
661} 700}
662 701
663static inline void pgd_clear(pgd_t * pgd) 702static inline void pud_clear(pud_t *pud)
664{
665 pgd_t *shadow = get_shadow_table(pgd);
666
667 pgd_clear_kernel(pgd);
668 if (shadow)
669 pgd_clear_kernel(shadow);
670}
671
672static inline void pud_clear_kernel(pud_t *pud)
673{ 703{
704#ifdef __s390x__
674 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 705 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
675 pud_val(*pud) = _REGION3_ENTRY_EMPTY; 706 pud_val(*pud) = _REGION3_ENTRY_EMPTY;
707#endif
676} 708}
677 709
678static inline void pud_clear(pud_t *pud) 710static inline void pmd_clear(pmd_t *pmdp)
679{
680 pud_t *shadow = get_shadow_table(pud);
681
682 pud_clear_kernel(pud);
683 if (shadow)
684 pud_clear_kernel(shadow);
685}
686
687#endif /* __s390x__ */
688
689static inline void pmd_clear_kernel(pmd_t * pmdp)
690{ 711{
691 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; 712 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
692} 713}
693 714
694static inline void pmd_clear(pmd_t *pmd)
695{
696 pmd_t *shadow = get_shadow_table(pmd);
697
698 pmd_clear_kernel(pmd);
699 if (shadow)
700 pmd_clear_kernel(shadow);
701}
702
703static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 715static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
704{ 716{
705 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 717 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
706 if (mm->context.noexec)
707 pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
708} 718}
709 719
710/* 720/*
@@ -734,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte)
734 744
735static inline pte_t pte_mkclean(pte_t pte) 745static inline pte_t pte_mkclean(pte_t pte)
736{ 746{
737 /* The only user of pte_mkclean is the fork() code. 747#ifdef CONFIG_PGSTE
738 We must *not* clear the *physical* page dirty bit 748 pte_val(pte) &= ~_PAGE_SWC;
739 just because fork() wants to clear the dirty bit in 749#endif
740 *one* of the page's mappings. So we just do nothing. */
741 return pte; 750 return pte;
742} 751}
743 752
744static inline pte_t pte_mkdirty(pte_t pte) 753static inline pte_t pte_mkdirty(pte_t pte)
745{ 754{
746 /* We do not explicitly set the dirty bit because the
747 * sske instruction is slow. It is faster to let the
748 * next instruction set the dirty bit.
749 */
750 return pte; 755 return pte;
751} 756}
752 757
753static inline pte_t pte_mkold(pte_t pte) 758static inline pte_t pte_mkold(pte_t pte)
754{ 759{
755 /* S/390 doesn't keep its dirty/referenced bit in the pte. 760#ifdef CONFIG_PGSTE
756 * There is no point in clearing the real referenced bit. 761 pte_val(pte) &= ~_PAGE_SWR;
757 */ 762#endif
758 return pte; 763 return pte;
759} 764}
760 765
761static inline pte_t pte_mkyoung(pte_t pte) 766static inline pte_t pte_mkyoung(pte_t pte)
762{ 767{
763 /* S/390 doesn't keep its dirty/referenced bit in the pte.
764 * There is no point in setting the real referenced bit.
765 */
766 return pte; 768 return pte;
767} 769}
768 770
@@ -800,62 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte)
800} 802}
801#endif 803#endif
802 804
803#ifdef CONFIG_PGSTE
804/* 805/*
805 * Get (and clear) the user dirty bit for a PTE. 806 * Get (and clear) the user dirty bit for a pte.
806 */ 807 */
807static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, 808static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
808 pte_t *ptep) 809 pte_t *ptep)
809{ 810{
810 int dirty; 811 pgste_t pgste;
811 unsigned long *pgste; 812 int dirty = 0;
812 struct page *page; 813
813 unsigned int skey; 814 if (mm_has_pgste(mm)) {
814 815 pgste = pgste_get_lock(ptep);
815 if (!mm->context.has_pgste) 816 pgste = pgste_update_all(ptep, pgste);
816 return -EINVAL; 817 dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
817 rcp_lock(ptep); 818 pgste_val(pgste) &= ~KVM_UC_BIT;
818 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 819 pgste_set_unlock(ptep, pgste);
819 page = virt_to_page(pte_val(*ptep)); 820 return dirty;
820 skey = page_get_storage_key(page_to_phys(page));
821 if (skey & _PAGE_CHANGED) {
822 set_bit_simple(RCP_GC_BIT, pgste);
823 set_bit_simple(KVM_UD_BIT, pgste);
824 } 821 }
825 if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
826 SetPageDirty(page);
827 set_bit_simple(KVM_UD_BIT, pgste);
828 }
829 dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
830 if (skey & _PAGE_CHANGED)
831 page_clear_dirty(page, 1);
832 rcp_unlock(ptep);
833 return dirty; 822 return dirty;
834} 823}
835#endif 824
825/*
826 * Get (and clear) the user referenced bit for a pte.
827 */
828static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
829 pte_t *ptep)
830{
831 pgste_t pgste;
832 int young = 0;
833
834 if (mm_has_pgste(mm)) {
835 pgste = pgste_get_lock(ptep);
836 pgste = pgste_update_young(ptep, pgste);
837 young = !!(pgste_val(pgste) & KVM_UR_BIT);
838 pgste_val(pgste) &= ~KVM_UR_BIT;
839 pgste_set_unlock(ptep, pgste);
840 }
841 return young;
842}
836 843
837#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 844#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
838static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 845static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
839 unsigned long addr, pte_t *ptep) 846 unsigned long addr, pte_t *ptep)
840{ 847{
841#ifdef CONFIG_PGSTE 848 pgste_t pgste;
842 unsigned long physpage; 849 pte_t pte;
843 int young;
844 unsigned long *pgste;
845 850
846 if (!vma->vm_mm->context.has_pgste) 851 if (mm_has_pgste(vma->vm_mm)) {
847 return 0; 852 pgste = pgste_get_lock(ptep);
848 physpage = pte_val(*ptep) & PAGE_MASK; 853 pgste = pgste_update_young(ptep, pgste);
849 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 854 pte = *ptep;
850 855 *ptep = pte_mkold(pte);
851 young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); 856 pgste_set_unlock(ptep, pgste);
852 rcp_lock(ptep); 857 return pte_young(pte);
853 if (young) 858 }
854 set_bit_simple(RCP_GR_BIT, pgste);
855 young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste);
856 rcp_unlock(ptep);
857 return young;
858#endif
859 return 0; 859 return 0;
860} 860}
861 861
@@ -867,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
867 * On s390 reference bits are in storage key and never in TLB 867 * On s390 reference bits are in storage key and never in TLB
868 * With virtualization we handle the reference bit, without we 868 * With virtualization we handle the reference bit, without we
869 * we can simply return */ 869 * we can simply return */
870#ifdef CONFIG_PGSTE
871 return ptep_test_and_clear_young(vma, address, ptep); 870 return ptep_test_and_clear_young(vma, address, ptep);
872#endif
873 return 0;
874} 871}
875 872
876static inline void __ptep_ipte(unsigned long address, pte_t *ptep) 873static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
@@ -890,25 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
890 } 887 }
891} 888}
892 889
893static inline void ptep_invalidate(struct mm_struct *mm,
894 unsigned long address, pte_t *ptep)
895{
896 if (mm->context.has_pgste) {
897 rcp_lock(ptep);
898 __ptep_ipte(address, ptep);
899 ptep_rcp_copy(ptep);
900 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
901 rcp_unlock(ptep);
902 return;
903 }
904 __ptep_ipte(address, ptep);
905 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
906 if (mm->context.noexec) {
907 __ptep_ipte(address, ptep + PTRS_PER_PTE);
908 pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
909 }
910}
911
912/* 890/*
913 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush 891 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
914 * both clear the TLB for the unmapped pte. The reason is that 892 * both clear the TLB for the unmapped pte. The reason is that
@@ -923,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm,
923 * is a nop. 901 * is a nop.
924 */ 902 */
925#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 903#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
926#define ptep_get_and_clear(__mm, __address, __ptep) \ 904static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
927({ \ 905 unsigned long address, pte_t *ptep)
928 pte_t __pte = *(__ptep); \ 906{
929 (__mm)->context.flush_mm = 1; \ 907 pgste_t pgste;
930 if (atomic_read(&(__mm)->context.attach_count) > 1 || \ 908 pte_t pte;
931 (__mm) != current->active_mm) \ 909
932 ptep_invalidate(__mm, __address, __ptep); \ 910 mm->context.flush_mm = 1;
933 else \ 911 if (mm_has_pgste(mm))
934 pte_clear((__mm), (__address), (__ptep)); \ 912 pgste = pgste_get_lock(ptep);
935 __pte; \ 913
936}) 914 pte = *ptep;
915 if (!mm_exclusive(mm))
916 __ptep_ipte(address, ptep);
917 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
918
919 if (mm_has_pgste(mm)) {
920 pgste = pgste_update_all(&pte, pgste);
921 pgste_set_unlock(ptep, pgste);
922 }
923 return pte;
924}
925
926#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
927static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
928 unsigned long address,
929 pte_t *ptep)
930{
931 pte_t pte;
932
933 mm->context.flush_mm = 1;
934 if (mm_has_pgste(mm))
935 pgste_get_lock(ptep);
936
937 pte = *ptep;
938 if (!mm_exclusive(mm))
939 __ptep_ipte(address, ptep);
940 return pte;
941}
942
943static inline void ptep_modify_prot_commit(struct mm_struct *mm,
944 unsigned long address,
945 pte_t *ptep, pte_t pte)
946{
947 *ptep = pte;
948 if (mm_has_pgste(mm))
949 pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE));
950}
937 951
938#define __HAVE_ARCH_PTEP_CLEAR_FLUSH 952#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
939static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, 953static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
940 unsigned long address, pte_t *ptep) 954 unsigned long address, pte_t *ptep)
941{ 955{
942 pte_t pte = *ptep; 956 pgste_t pgste;
943 ptep_invalidate(vma->vm_mm, address, ptep); 957 pte_t pte;
958
959 if (mm_has_pgste(vma->vm_mm))
960 pgste = pgste_get_lock(ptep);
961
962 pte = *ptep;
963 __ptep_ipte(address, ptep);
964 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
965
966 if (mm_has_pgste(vma->vm_mm)) {
967 pgste = pgste_update_all(&pte, pgste);
968 pgste_set_unlock(ptep, pgste);
969 }
944 return pte; 970 return pte;
945} 971}
946 972
@@ -953,76 +979,67 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
953 */ 979 */
954#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 980#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
955static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 981static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
956 unsigned long addr, 982 unsigned long address,
957 pte_t *ptep, int full) 983 pte_t *ptep, int full)
958{ 984{
959 pte_t pte = *ptep; 985 pgste_t pgste;
986 pte_t pte;
987
988 if (mm_has_pgste(mm))
989 pgste = pgste_get_lock(ptep);
990
991 pte = *ptep;
992 if (!full)
993 __ptep_ipte(address, ptep);
994 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
960 995
961 if (full) 996 if (mm_has_pgste(mm)) {
962 pte_clear(mm, addr, ptep); 997 pgste = pgste_update_all(&pte, pgste);
963 else 998 pgste_set_unlock(ptep, pgste);
964 ptep_invalidate(mm, addr, ptep); 999 }
965 return pte; 1000 return pte;
966} 1001}
967 1002
968#define __HAVE_ARCH_PTEP_SET_WRPROTECT 1003#define __HAVE_ARCH_PTEP_SET_WRPROTECT
969#define ptep_set_wrprotect(__mm, __addr, __ptep) \ 1004static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
970({ \ 1005 unsigned long address, pte_t *ptep)
971 pte_t __pte = *(__ptep); \ 1006{
972 if (pte_write(__pte)) { \ 1007 pgste_t pgste;
973 (__mm)->context.flush_mm = 1; \ 1008 pte_t pte = *ptep;
974 if (atomic_read(&(__mm)->context.attach_count) > 1 || \
975 (__mm) != current->active_mm) \
976 ptep_invalidate(__mm, __addr, __ptep); \
977 set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \
978 } \
979})
980 1009
981#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 1010 if (pte_write(pte)) {
982#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ 1011 mm->context.flush_mm = 1;
983({ \ 1012 if (mm_has_pgste(mm))
984 int __changed = !pte_same(*(__ptep), __entry); \ 1013 pgste = pgste_get_lock(ptep);
985 if (__changed) { \
986 ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
987 set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
988 } \
989 __changed; \
990})
991 1014
992/* 1015 if (!mm_exclusive(mm))
993 * Test and clear dirty bit in storage key. 1016 __ptep_ipte(address, ptep);
994 * We can't clear the changed bit atomically. This is a potential 1017 *ptep = pte_wrprotect(pte);
995 * race against modification of the referenced bit. This function
996 * should therefore only be called if it is not mapped in any
997 * address space.
998 */
999#define __HAVE_ARCH_PAGE_TEST_DIRTY
1000static inline int page_test_dirty(struct page *page)
1001{
1002 return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0;
1003}
1004 1018
1005#define __HAVE_ARCH_PAGE_CLEAR_DIRTY 1019 if (mm_has_pgste(mm))
1006static inline void page_clear_dirty(struct page *page, int mapped) 1020 pgste_set_unlock(ptep, pgste);
1007{ 1021 }
1008 page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); 1022 return pte;
1009} 1023}
1010 1024
1011/* 1025#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1012 * Test and clear referenced bit in storage key. 1026static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1013 */ 1027 unsigned long address, pte_t *ptep,
1014#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG 1028 pte_t entry, int dirty)
1015static inline int page_test_and_clear_young(struct page *page)
1016{ 1029{
1017 unsigned long physpage = page_to_phys(page); 1030 pgste_t pgste;
1018 int ccode; 1031
1019 1032 if (pte_same(*ptep, entry))
1020 asm volatile( 1033 return 0;
1021 " rrbe 0,%1\n" 1034 if (mm_has_pgste(vma->vm_mm))
1022 " ipm %0\n" 1035 pgste = pgste_get_lock(ptep);
1023 " srl %0,28\n" 1036
1024 : "=d" (ccode) : "a" (physpage) : "cc" ); 1037 __ptep_ipte(address, ptep);
1025 return ccode & 2; 1038 *ptep = entry;
1039
1040 if (mm_has_pgste(vma->vm_mm))
1041 pgste_set_unlock(ptep, pgste);
1042 return 1;
1026} 1043}
1027 1044
1028/* 1045/*
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 2c79b6416271..1300c3025334 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -84,6 +84,7 @@ struct thread_struct {
84 struct per_event per_event; /* Cause of the last PER trap */ 84 struct per_event per_event; /* Cause of the last PER trap */
85 /* pfault_wait is used to block the process on a pfault event */ 85 /* pfault_wait is used to block the process on a pfault event */
86 unsigned long pfault_wait; 86 unsigned long pfault_wait;
87 struct list_head list;
87}; 88};
88 89
89typedef struct thread_struct thread_struct; 90typedef struct thread_struct thread_struct;
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 29d5d6d4becc..b7a4f2eb0057 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -50,7 +50,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
50 /* 50 /*
51 * If the process only ran on the local cpu, do a local flush. 51 * If the process only ran on the local cpu, do a local flush.
52 */ 52 */
53 local_cpumask = cpumask_of_cpu(smp_processor_id()); 53 cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id()));
54 if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) 54 if (cpumask_equal(mm_cpumask(mm), &local_cpumask))
55 __tlb_flush_local(); 55 __tlb_flush_local();
56 else 56 else
@@ -80,16 +80,11 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
80 * on all cpus instead of doing a local flush if the mm 80 * on all cpus instead of doing a local flush if the mm
81 * only ran on the local cpu. 81 * only ran on the local cpu.
82 */ 82 */
83 if (MACHINE_HAS_IDTE) { 83 if (MACHINE_HAS_IDTE)
84 if (mm->context.noexec)
85 __tlb_flush_idte((unsigned long)
86 get_shadow_table(mm->pgd) |
87 mm->context.asce_bits);
88 __tlb_flush_idte((unsigned long) mm->pgd | 84 __tlb_flush_idte((unsigned long) mm->pgd |
89 mm->context.asce_bits); 85 mm->context.asce_bits);
90 return; 86 else
91 } 87 __tlb_flush_full(mm);
92 __tlb_flush_full(mm);
93} 88}
94 89
95static inline void __tlb_flush_mm_cond(struct mm_struct * mm) 90static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index e82152572377..9208e69245a0 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -385,6 +385,7 @@
385 385
386/* Ignore system calls that are also reachable via sys_socket */ 386/* Ignore system calls that are also reachable via sys_socket */
387#define __IGNORE_recvmmsg 387#define __IGNORE_recvmmsg
388#define __IGNORE_sendmmsg
388 389
389#define __ARCH_WANT_IPC_PARSE_VERSION 390#define __ARCH_WANT_IPC_PARSE_VERSION
390#define __ARCH_WANT_OLD_READDIR 391#define __ARCH_WANT_OLD_READDIR