aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-06-07 21:47:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-07 21:47:53 -0400
commitaec040e29e804b40fa2934ec7a5be9f515f23098 (patch)
treec9afca2811091ceab7362a34fb22916fc881be10 /arch
parent8ea656bdc9d3c39e18f2bd5d8ffe0301fde64f72 (diff)
parent6c61cfe91be53b444abc1da2dbab14efa77706c0 (diff)
Merge branch 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6
* 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6: [S390] fix kvm defines for 31 bit compile [S390] use generic RCU page-table freeing code [S390] qdio: Split SBAL entry flags [S390] kvm-s390: fix stfle facilities numbers >=64 [S390] kvm-s390: Fix host crash on misbehaving guests
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h39
-rw-r--r--arch/s390/include/asm/qdio.h119
-rw-r--r--arch/s390/include/asm/tlb.h94
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/sie64a.S2
-rw-r--r--arch/s390/mm/pgtable.c292
8 files changed, 223 insertions, 333 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9fab2aa9c2c8..90d77bd078f5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -89,6 +89,7 @@ config S390
89 select HAVE_GET_USER_PAGES_FAST 89 select HAVE_GET_USER_PAGES_FAST
90 select HAVE_ARCH_MUTEX_CPU_RELAX 90 select HAVE_ARCH_MUTEX_CPU_RELAX
91 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 91 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
92 select HAVE_RCU_TABLE_FREE if SMP
92 select ARCH_INLINE_SPIN_TRYLOCK 93 select ARCH_INLINE_SPIN_TRYLOCK
93 select ARCH_INLINE_SPIN_TRYLOCK_BH 94 select ARCH_INLINE_SPIN_TRYLOCK_BH
94 select ARCH_INLINE_SPIN_LOCK 95 select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f6314af3b354..38e71ebcd3c2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -17,15 +17,15 @@
17#include <linux/gfp.h> 17#include <linux/gfp.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19 19
20#define check_pgt_cache() do {} while (0)
21
22unsigned long *crst_table_alloc(struct mm_struct *); 20unsigned long *crst_table_alloc(struct mm_struct *);
23void crst_table_free(struct mm_struct *, unsigned long *); 21void crst_table_free(struct mm_struct *, unsigned long *);
24void crst_table_free_rcu(struct mm_struct *, unsigned long *);
25 22
26unsigned long *page_table_alloc(struct mm_struct *); 23unsigned long *page_table_alloc(struct mm_struct *);
27void page_table_free(struct mm_struct *, unsigned long *); 24void page_table_free(struct mm_struct *, unsigned long *);
28void page_table_free_rcu(struct mm_struct *, unsigned long *); 25#ifdef CONFIG_HAVE_RCU_TABLE_FREE
26void page_table_free_rcu(struct mmu_gather *, unsigned long *);
27void __tlb_remove_table(void *_table);
28#endif
29 29
30static inline void clear_table(unsigned long *s, unsigned long val, size_t n) 30static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
31{ 31{
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index e4efacfe1b63..801fbe1d837d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -293,19 +293,6 @@ extern unsigned long VMALLOC_START;
293 * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. 293 * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
294 */ 294 */
295 295
296/* Page status table bits for virtualization */
297#define RCP_ACC_BITS 0xf000000000000000UL
298#define RCP_FP_BIT 0x0800000000000000UL
299#define RCP_PCL_BIT 0x0080000000000000UL
300#define RCP_HR_BIT 0x0040000000000000UL
301#define RCP_HC_BIT 0x0020000000000000UL
302#define RCP_GR_BIT 0x0004000000000000UL
303#define RCP_GC_BIT 0x0002000000000000UL
304
305/* User dirty / referenced bit for KVM's migration feature */
306#define KVM_UR_BIT 0x0000800000000000UL
307#define KVM_UC_BIT 0x0000400000000000UL
308
309#ifndef __s390x__ 296#ifndef __s390x__
310 297
311/* Bits in the segment table address-space-control-element */ 298/* Bits in the segment table address-space-control-element */
@@ -325,6 +312,19 @@ extern unsigned long VMALLOC_START;
325#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) 312#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
326#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) 313#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
327 314
315/* Page status table bits for virtualization */
316#define RCP_ACC_BITS 0xf0000000UL
317#define RCP_FP_BIT 0x08000000UL
318#define RCP_PCL_BIT 0x00800000UL
319#define RCP_HR_BIT 0x00400000UL
320#define RCP_HC_BIT 0x00200000UL
321#define RCP_GR_BIT 0x00040000UL
322#define RCP_GC_BIT 0x00020000UL
323
324/* User dirty / referenced bit for KVM's migration feature */
325#define KVM_UR_BIT 0x00008000UL
326#define KVM_UC_BIT 0x00004000UL
327
328#else /* __s390x__ */ 328#else /* __s390x__ */
329 329
330/* Bits in the segment/region table address-space-control-element */ 330/* Bits in the segment/region table address-space-control-element */
@@ -367,6 +367,19 @@ extern unsigned long VMALLOC_START;
367#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ 367#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
368#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ 368#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
369 369
370/* Page status table bits for virtualization */
371#define RCP_ACC_BITS 0xf000000000000000UL
372#define RCP_FP_BIT 0x0800000000000000UL
373#define RCP_PCL_BIT 0x0080000000000000UL
374#define RCP_HR_BIT 0x0040000000000000UL
375#define RCP_HC_BIT 0x0020000000000000UL
376#define RCP_GR_BIT 0x0004000000000000UL
377#define RCP_GC_BIT 0x0002000000000000UL
378
379/* User dirty / referenced bit for KVM's migration feature */
380#define KVM_UR_BIT 0x0000800000000000UL
381#define KVM_UC_BIT 0x0000400000000000UL
382
370#endif /* __s390x__ */ 383#endif /* __s390x__ */
371 384
372/* 385/*
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 350e7ee5952d..15c97625df8d 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -139,110 +139,47 @@ struct slib {
139 struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; 139 struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
140} __attribute__ ((packed, aligned(2048))); 140} __attribute__ ((packed, aligned(2048)));
141 141
142/** 142#define SBAL_EFLAGS_LAST_ENTRY 0x40
143 * struct sbal_flags - storage block address list flags 143#define SBAL_EFLAGS_CONTIGUOUS 0x20
144 * @last: last entry 144#define SBAL_EFLAGS_FIRST_FRAG 0x04
145 * @cont: contiguous storage 145#define SBAL_EFLAGS_MIDDLE_FRAG 0x08
146 * @frag: fragmentation 146#define SBAL_EFLAGS_LAST_FRAG 0x0c
147 */ 147#define SBAL_EFLAGS_MASK 0x6f
148struct sbal_flags {
149 u8 : 1;
150 u8 last : 1;
151 u8 cont : 1;
152 u8 : 1;
153 u8 frag : 2;
154 u8 : 2;
155} __attribute__ ((packed));
156
157#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL
158#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL
159#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL
160#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL
161#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL
162 148
163#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL 149#define SBAL_SFLAGS0_PCI_REQ 0x40
150#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20
164 151
165/* Awesome OpenFCP extensions */ 152/* Awesome OpenFCP extensions */
166#define SBAL_FLAGS0_TYPE_STATUS 0x00UL 153#define SBAL_SFLAGS0_TYPE_STATUS 0x00
167#define SBAL_FLAGS0_TYPE_WRITE 0x08UL 154#define SBAL_SFLAGS0_TYPE_WRITE 0x08
168#define SBAL_FLAGS0_TYPE_READ 0x10UL 155#define SBAL_SFLAGS0_TYPE_READ 0x10
169#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL 156#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18
170#define SBAL_FLAGS0_MORE_SBALS 0x04UL 157#define SBAL_SFLAGS0_MORE_SBALS 0x04
171#define SBAL_FLAGS0_COMMAND 0x02UL 158#define SBAL_SFLAGS0_COMMAND 0x02
172#define SBAL_FLAGS0_LAST_SBAL 0x00UL 159#define SBAL_SFLAGS0_LAST_SBAL 0x00
173#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND 160#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND
174#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS 161#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS
175#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND 162#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
176#define SBAL_FLAGS0_PCI 0x40
177
178/**
179 * struct sbal_sbalf_0 - sbal flags for sbale 0
180 * @pci: PCI indicator
181 * @cont: data continuation
182 * @sbtype: storage-block type (FCP)
183 */
184struct sbal_sbalf_0 {
185 u8 : 1;
186 u8 pci : 1;
187 u8 cont : 1;
188 u8 sbtype : 2;
189 u8 : 3;
190} __attribute__ ((packed));
191
192/**
193 * struct sbal_sbalf_1 - sbal flags for sbale 1
194 * @key: storage key
195 */
196struct sbal_sbalf_1 {
197 u8 : 4;
198 u8 key : 4;
199} __attribute__ ((packed));
200
201/**
202 * struct sbal_sbalf_14 - sbal flags for sbale 14
203 * @erridx: error index
204 */
205struct sbal_sbalf_14 {
206 u8 : 4;
207 u8 erridx : 4;
208} __attribute__ ((packed));
209
210/**
211 * struct sbal_sbalf_15 - sbal flags for sbale 15
212 * @reason: reason for error state
213 */
214struct sbal_sbalf_15 {
215 u8 reason;
216} __attribute__ ((packed));
217
218/**
219 * union sbal_sbalf - storage block address list flags
220 * @i0: sbalf0
221 * @i1: sbalf1
222 * @i14: sbalf14
223 * @i15: sblaf15
224 * @value: raw value
225 */
226union sbal_sbalf {
227 struct sbal_sbalf_0 i0;
228 struct sbal_sbalf_1 i1;
229 struct sbal_sbalf_14 i14;
230 struct sbal_sbalf_15 i15;
231 u8 value;
232};
233 163
234/** 164/**
235 * struct qdio_buffer_element - SBAL entry 165 * struct qdio_buffer_element - SBAL entry
236 * @flags: flags 166 * @eflags: SBAL entry flags
167 * @scount: SBAL count
168 * @sflags: whole SBAL flags
237 * @length: length 169 * @length: length
238 * @addr: address 170 * @addr: address
239*/ 171*/
240struct qdio_buffer_element { 172struct qdio_buffer_element {
241 u32 flags; 173 u8 eflags;
174 /* private: */
175 u8 res1;
176 /* public: */
177 u8 scount;
178 u8 sflags;
242 u32 length; 179 u32 length;
243#ifdef CONFIG_32BIT 180#ifdef CONFIG_32BIT
244 /* private: */ 181 /* private: */
245 void *reserved; 182 void *res2;
246 /* public: */ 183 /* public: */
247#endif 184#endif
248 void *addr; 185 void *addr;
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 77eee5477a52..c687a2c83462 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -26,67 +26,60 @@
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <asm/processor.h> 27#include <asm/processor.h>
28#include <asm/pgalloc.h> 28#include <asm/pgalloc.h>
29#include <asm/smp.h>
30#include <asm/tlbflush.h> 29#include <asm/tlbflush.h>
31 30
32struct mmu_gather { 31struct mmu_gather {
33 struct mm_struct *mm; 32 struct mm_struct *mm;
33#ifdef CONFIG_HAVE_RCU_TABLE_FREE
34 struct mmu_table_batch *batch;
35#endif
34 unsigned int fullmm; 36 unsigned int fullmm;
35 unsigned int nr_ptes; 37 unsigned int need_flush;
36 unsigned int nr_pxds;
37 unsigned int max;
38 void **array;
39 void *local[8];
40}; 38};
41 39
42static inline void __tlb_alloc_page(struct mmu_gather *tlb) 40#ifdef CONFIG_HAVE_RCU_TABLE_FREE
43{ 41struct mmu_table_batch {
44 unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); 42 struct rcu_head rcu;
43 unsigned int nr;
44 void *tables[0];
45};
45 46
46 if (addr) { 47#define MAX_TABLE_BATCH \
47 tlb->array = (void *) addr; 48 ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
48 tlb->max = PAGE_SIZE / sizeof(void *); 49
49 } 50extern void tlb_table_flush(struct mmu_gather *tlb);
50} 51extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
52#endif
51 53
52static inline void tlb_gather_mmu(struct mmu_gather *tlb, 54static inline void tlb_gather_mmu(struct mmu_gather *tlb,
53 struct mm_struct *mm, 55 struct mm_struct *mm,
54 unsigned int full_mm_flush) 56 unsigned int full_mm_flush)
55{ 57{
56 tlb->mm = mm; 58 tlb->mm = mm;
57 tlb->max = ARRAY_SIZE(tlb->local);
58 tlb->array = tlb->local;
59 tlb->fullmm = full_mm_flush; 59 tlb->fullmm = full_mm_flush;
60 tlb->need_flush = 0;
61#ifdef CONFIG_HAVE_RCU_TABLE_FREE
62 tlb->batch = NULL;
63#endif
60 if (tlb->fullmm) 64 if (tlb->fullmm)
61 __tlb_flush_mm(mm); 65 __tlb_flush_mm(mm);
62 else
63 __tlb_alloc_page(tlb);
64 tlb->nr_ptes = 0;
65 tlb->nr_pxds = tlb->max;
66} 66}
67 67
68static inline void tlb_flush_mmu(struct mmu_gather *tlb) 68static inline void tlb_flush_mmu(struct mmu_gather *tlb)
69{ 69{
70 if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max)) 70 if (!tlb->need_flush)
71 __tlb_flush_mm(tlb->mm); 71 return;
72 while (tlb->nr_ptes > 0) 72 tlb->need_flush = 0;
73 page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); 73 __tlb_flush_mm(tlb->mm);
74 while (tlb->nr_pxds < tlb->max) 74#ifdef CONFIG_HAVE_RCU_TABLE_FREE
75 crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); 75 tlb_table_flush(tlb);
76#endif
76} 77}
77 78
78static inline void tlb_finish_mmu(struct mmu_gather *tlb, 79static inline void tlb_finish_mmu(struct mmu_gather *tlb,
79 unsigned long start, unsigned long end) 80 unsigned long start, unsigned long end)
80{ 81{
81 tlb_flush_mmu(tlb); 82 tlb_flush_mmu(tlb);
82
83 rcu_table_freelist_finish();
84
85 /* keep the page table cache within bounds */
86 check_pgt_cache();
87
88 if (tlb->array != tlb->local)
89 free_pages((unsigned long) tlb->array, 0);
90} 83}
91 84
92/* 85/*
@@ -112,12 +105,11 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
112static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 105static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
113 unsigned long address) 106 unsigned long address)
114{ 107{
115 if (!tlb->fullmm) { 108#ifdef CONFIG_HAVE_RCU_TABLE_FREE
116 tlb->array[tlb->nr_ptes++] = pte; 109 if (!tlb->fullmm)
117 if (tlb->nr_ptes >= tlb->nr_pxds) 110 return page_table_free_rcu(tlb, (unsigned long *) pte);
118 tlb_flush_mmu(tlb); 111#endif
119 } else 112 page_table_free(tlb->mm, (unsigned long *) pte);
120 page_table_free(tlb->mm, (unsigned long *) pte);
121} 113}
122 114
123/* 115/*
@@ -133,12 +125,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
133#ifdef __s390x__ 125#ifdef __s390x__
134 if (tlb->mm->context.asce_limit <= (1UL << 31)) 126 if (tlb->mm->context.asce_limit <= (1UL << 31))
135 return; 127 return;
136 if (!tlb->fullmm) { 128#ifdef CONFIG_HAVE_RCU_TABLE_FREE
137 tlb->array[--tlb->nr_pxds] = pmd; 129 if (!tlb->fullmm)
138 if (tlb->nr_ptes >= tlb->nr_pxds) 130 return tlb_remove_table(tlb, pmd);
139 tlb_flush_mmu(tlb); 131#endif
140 } else 132 crst_table_free(tlb->mm, (unsigned long *) pmd);
141 crst_table_free(tlb->mm, (unsigned long *) pmd);
142#endif 133#endif
143} 134}
144 135
@@ -155,12 +146,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
155#ifdef __s390x__ 146#ifdef __s390x__
156 if (tlb->mm->context.asce_limit <= (1UL << 42)) 147 if (tlb->mm->context.asce_limit <= (1UL << 42))
157 return; 148 return;
158 if (!tlb->fullmm) { 149#ifdef CONFIG_HAVE_RCU_TABLE_FREE
159 tlb->array[--tlb->nr_pxds] = pud; 150 if (!tlb->fullmm)
160 if (tlb->nr_ptes >= tlb->nr_pxds) 151 return tlb_remove_table(tlb, pud);
161 tlb_flush_mmu(tlb); 152#endif
162 } else 153 crst_table_free(tlb->mm, (unsigned long *) pud);
163 crst_table_free(tlb->mm, (unsigned long *) pud);
164#endif 154#endif
165} 155}
166 156
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 30ca85cce314..67345ae7ce8d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -731,6 +731,7 @@ static int __init kvm_s390_init(void)
731 } 731 }
732 memcpy(facilities, S390_lowcore.stfle_fac_list, 16); 732 memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
733 facilities[0] &= 0xff00fff3f47c0000ULL; 733 facilities[0] &= 0xff00fff3f47c0000ULL;
734 facilities[1] &= 0x201c000000000000ULL;
734 return 0; 735 return 0;
735} 736}
736 737
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index ab0e041ac54c..5faa1b1b23fa 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -93,4 +93,6 @@ sie_err:
93 93
94 .section __ex_table,"a" 94 .section __ex_table,"a"
95 .quad sie_inst,sie_err 95 .quad sie_inst,sie_err
96 .quad sie_exit,sie_err
97 .quad sie_reenter,sie_err
96 .previous 98 .previous
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b09763fe5da1..37a23c223705 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,94 +24,12 @@
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
26 26
27struct rcu_table_freelist {
28 struct rcu_head rcu;
29 struct mm_struct *mm;
30 unsigned int pgt_index;
31 unsigned int crst_index;
32 unsigned long *table[0];
33};
34
35#define RCU_FREELIST_SIZE \
36 ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
37 / sizeof(unsigned long))
38
39static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
40
41static void __page_table_free(struct mm_struct *mm, unsigned long *table);
42
43static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
44{
45 struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist);
46 struct rcu_table_freelist *batch = *batchp;
47
48 if (batch)
49 return batch;
50 batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC);
51 if (batch) {
52 batch->mm = mm;
53 batch->pgt_index = 0;
54 batch->crst_index = RCU_FREELIST_SIZE;
55 *batchp = batch;
56 }
57 return batch;
58}
59
60static void rcu_table_freelist_callback(struct rcu_head *head)
61{
62 struct rcu_table_freelist *batch =
63 container_of(head, struct rcu_table_freelist, rcu);
64
65 while (batch->pgt_index > 0)
66 __page_table_free(batch->mm, batch->table[--batch->pgt_index]);
67 while (batch->crst_index < RCU_FREELIST_SIZE)
68 crst_table_free(batch->mm, batch->table[batch->crst_index++]);
69 free_page((unsigned long) batch);
70}
71
72void rcu_table_freelist_finish(void)
73{
74 struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist);
75 struct rcu_table_freelist *batch = *batchp;
76
77 if (!batch)
78 goto out;
79 call_rcu(&batch->rcu, rcu_table_freelist_callback);
80 *batchp = NULL;
81out:
82 put_cpu_var(rcu_table_freelist);
83}
84
85static void smp_sync(void *arg)
86{
87}
88
89#ifndef CONFIG_64BIT 27#ifndef CONFIG_64BIT
90#define ALLOC_ORDER 1 28#define ALLOC_ORDER 1
91#define TABLES_PER_PAGE 4 29#define FRAG_MASK 0x0f
92#define FRAG_MASK 15UL
93#define SECOND_HALVES 10UL
94
95void clear_table_pgstes(unsigned long *table)
96{
97 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
98 memset(table + 256, 0, PAGE_SIZE/4);
99 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
100 memset(table + 768, 0, PAGE_SIZE/4);
101}
102
103#else 30#else
104#define ALLOC_ORDER 2 31#define ALLOC_ORDER 2
105#define TABLES_PER_PAGE 2 32#define FRAG_MASK 0x03
106#define FRAG_MASK 3UL
107#define SECOND_HALVES 2UL
108
109void clear_table_pgstes(unsigned long *table)
110{
111 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
112 memset(table + 256, 0, PAGE_SIZE/2);
113}
114
115#endif 33#endif
116 34
117unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; 35unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
@@ -140,29 +58,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
140 free_pages((unsigned long) table, ALLOC_ORDER); 58 free_pages((unsigned long) table, ALLOC_ORDER);
141} 59}
142 60
143void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
144{
145 struct rcu_table_freelist *batch;
146
147 preempt_disable();
148 if (atomic_read(&mm->mm_users) < 2 &&
149 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
150 crst_table_free(mm, table);
151 goto out;
152 }
153 batch = rcu_table_freelist_get(mm);
154 if (!batch) {
155 smp_call_function(smp_sync, NULL, 1);
156 crst_table_free(mm, table);
157 goto out;
158 }
159 batch->table[--batch->crst_index] = table;
160 if (batch->pgt_index >= batch->crst_index)
161 rcu_table_freelist_finish();
162out:
163 preempt_enable();
164}
165
166#ifdef CONFIG_64BIT 61#ifdef CONFIG_64BIT
167int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 62int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
168{ 63{
@@ -238,124 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
238} 133}
239#endif 134#endif
240 135
136static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
137{
138 unsigned int old, new;
139
140 do {
141 old = atomic_read(v);
142 new = old ^ bits;
143 } while (atomic_cmpxchg(v, old, new) != old);
144 return new;
145}
146
241/* 147/*
242 * page table entry allocation/free routines. 148 * page table entry allocation/free routines.
243 */ 149 */
150#ifdef CONFIG_PGSTE
151static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
152{
153 struct page *page;
154 unsigned long *table;
155
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157 if (!page)
158 return NULL;
159 pgtable_page_ctor(page);
160 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
164 return table;
165}
166
167static inline void page_table_free_pgste(unsigned long *table)
168{
169 struct page *page;
170
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1);
174 __free_page(page);
175}
176#endif
177
244unsigned long *page_table_alloc(struct mm_struct *mm) 178unsigned long *page_table_alloc(struct mm_struct *mm)
245{ 179{
246 struct page *page; 180 struct page *page;
247 unsigned long *table; 181 unsigned long *table;
248 unsigned long bits; 182 unsigned int mask, bit;
249 183
250 bits = (mm->context.has_pgste) ? 3UL : 1UL; 184#ifdef CONFIG_PGSTE
185 if (mm_has_pgste(mm))
186 return page_table_alloc_pgste(mm);
187#endif
188 /* Allocate fragments of a 4K page as 1K/2K page table */
251 spin_lock_bh(&mm->context.list_lock); 189 spin_lock_bh(&mm->context.list_lock);
252 page = NULL; 190 mask = FRAG_MASK;
253 if (!list_empty(&mm->context.pgtable_list)) { 191 if (!list_empty(&mm->context.pgtable_list)) {
254 page = list_first_entry(&mm->context.pgtable_list, 192 page = list_first_entry(&mm->context.pgtable_list,
255 struct page, lru); 193 struct page, lru);
256 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 194 table = (unsigned long *) page_to_phys(page);
257 page = NULL; 195 mask = atomic_read(&page->_mapcount);
196 mask = mask | (mask >> 4);
258 } 197 }
259 if (!page) { 198 if ((mask & FRAG_MASK) == FRAG_MASK) {
260 spin_unlock_bh(&mm->context.list_lock); 199 spin_unlock_bh(&mm->context.list_lock);
261 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 200 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
262 if (!page) 201 if (!page)
263 return NULL; 202 return NULL;
264 pgtable_page_ctor(page); 203 pgtable_page_ctor(page);
265 page->flags &= ~FRAG_MASK; 204 atomic_set(&page->_mapcount, 1);
266 table = (unsigned long *) page_to_phys(page); 205 table = (unsigned long *) page_to_phys(page);
267 if (mm->context.has_pgste) 206 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
268 clear_table_pgstes(table);
269 else
270 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
271 spin_lock_bh(&mm->context.list_lock); 207 spin_lock_bh(&mm->context.list_lock);
272 list_add(&page->lru, &mm->context.pgtable_list); 208 list_add(&page->lru, &mm->context.pgtable_list);
209 } else {
210 for (bit = 1; mask & bit; bit <<= 1)
211 table += PTRS_PER_PTE;
212 mask = atomic_xor_bits(&page->_mapcount, bit);
213 if ((mask & FRAG_MASK) == FRAG_MASK)
214 list_del(&page->lru);
273 } 215 }
274 table = (unsigned long *) page_to_phys(page);
275 while (page->flags & bits) {
276 table += 256;
277 bits <<= 1;
278 }
279 page->flags |= bits;
280 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
281 list_move_tail(&page->lru, &mm->context.pgtable_list);
282 spin_unlock_bh(&mm->context.list_lock); 216 spin_unlock_bh(&mm->context.list_lock);
283 return table; 217 return table;
284} 218}
285 219
286static void __page_table_free(struct mm_struct *mm, unsigned long *table) 220void page_table_free(struct mm_struct *mm, unsigned long *table)
287{ 221{
288 struct page *page; 222 struct page *page;
289 unsigned long bits; 223 unsigned int bit, mask;
290 224
291 bits = ((unsigned long) table) & 15; 225#ifdef CONFIG_PGSTE
292 table = (unsigned long *)(((unsigned long) table) ^ bits); 226 if (mm_has_pgste(mm))
227 return page_table_free_pgste(table);
228#endif
229 /* Free 1K/2K page table fragment of a 4K page */
293 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
294 page->flags ^= bits; 231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
295 if (!(page->flags & FRAG_MASK)) { 232 spin_lock_bh(&mm->context.list_lock);
233 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
234 list_del(&page->lru);
235 mask = atomic_xor_bits(&page->_mapcount, bit);
236 if (mask & FRAG_MASK)
237 list_add(&page->lru, &mm->context.pgtable_list);
238 spin_unlock_bh(&mm->context.list_lock);
239 if (mask == 0) {
296 pgtable_page_dtor(page); 240 pgtable_page_dtor(page);
241 atomic_set(&page->_mapcount, -1);
297 __free_page(page); 242 __free_page(page);
298 } 243 }
299} 244}
300 245
301void page_table_free(struct mm_struct *mm, unsigned long *table) 246#ifdef CONFIG_HAVE_RCU_TABLE_FREE
247
248static void __page_table_free_rcu(void *table, unsigned bit)
302{ 249{
303 struct page *page; 250 struct page *page;
304 unsigned long bits;
305 251
306 bits = (mm->context.has_pgste) ? 3UL : 1UL; 252#ifdef CONFIG_PGSTE
307 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 253 if (bit == FRAG_MASK)
254 return page_table_free_pgste(table);
255#endif
256 /* Free 1K/2K page table fragment of a 4K page */
308 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
309 spin_lock_bh(&mm->context.list_lock); 258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
310 page->flags ^= bits;
311 if (page->flags & FRAG_MASK) {
312 /* Page now has some free pgtable fragments. */
313 if (!list_empty(&page->lru))
314 list_move(&page->lru, &mm->context.pgtable_list);
315 page = NULL;
316 } else
317 /* All fragments of the 4K page have been freed. */
318 list_del(&page->lru);
319 spin_unlock_bh(&mm->context.list_lock);
320 if (page) {
321 pgtable_page_dtor(page); 259 pgtable_page_dtor(page);
260 atomic_set(&page->_mapcount, -1);
322 __free_page(page); 261 __free_page(page);
323 } 262 }
324} 263}
325 264
326void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) 265void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
327{ 266{
328 struct rcu_table_freelist *batch; 267 struct mm_struct *mm;
329 struct page *page; 268 struct page *page;
330 unsigned long bits; 269 unsigned int bit, mask;
331 270
332 preempt_disable(); 271 mm = tlb->mm;
333 if (atomic_read(&mm->mm_users) < 2 && 272#ifdef CONFIG_PGSTE
334 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { 273 if (mm_has_pgste(mm)) {
335 page_table_free(mm, table); 274 table = (unsigned long *) (__pa(table) | FRAG_MASK);
336 goto out; 275 tlb_remove_table(tlb, table);
337 } 276 return;
338 batch = rcu_table_freelist_get(mm);
339 if (!batch) {
340 smp_call_function(smp_sync, NULL, 1);
341 page_table_free(mm, table);
342 goto out;
343 } 277 }
344 bits = (mm->context.has_pgste) ? 3UL : 1UL; 278#endif
345 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
346 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
347 spin_lock_bh(&mm->context.list_lock); 281 spin_lock_bh(&mm->context.list_lock);
348 /* Delayed freeing with rcu prevents reuse of pgtable fragments */ 282 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
349 list_del_init(&page->lru); 283 list_del(&page->lru);
284 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285 if (mask & FRAG_MASK)
286 list_add_tail(&page->lru, &mm->context.pgtable_list);
350 spin_unlock_bh(&mm->context.list_lock); 287 spin_unlock_bh(&mm->context.list_lock);
351 table = (unsigned long *)(((unsigned long) table) | bits); 288 table = (unsigned long *) (__pa(table) | (bit << 4));
352 batch->table[batch->pgt_index++] = table; 289 tlb_remove_table(tlb, table);
353 if (batch->pgt_index >= batch->crst_index)
354 rcu_table_freelist_finish();
355out:
356 preempt_enable();
357} 290}
358 291
292void __tlb_remove_table(void *_table)
293{
294 void *table = (void *)((unsigned long) _table & PAGE_MASK);
295 unsigned type = (unsigned long) _table & ~PAGE_MASK;
296
297 if (type)
298 __page_table_free_rcu(table, type);
299 else
300 free_pages((unsigned long) table, ALLOC_ORDER);
301}
302
303#endif
304
359/* 305/*
360 * switch on pgstes for its userspace process (for kvm) 306 * switch on pgstes for its userspace process (for kvm)
361 */ 307 */
@@ -369,7 +315,7 @@ int s390_enable_sie(void)
369 return -EINVAL; 315 return -EINVAL;
370 316
371 /* Do we have pgstes? if yes, we are done */ 317 /* Do we have pgstes? if yes, we are done */
372 if (tsk->mm->context.has_pgste) 318 if (mm_has_pgste(tsk->mm))
373 return 0; 319 return 0;
374 320
375 /* lets check if we are allowed to replace the mm */ 321 /* lets check if we are allowed to replace the mm */