aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2016-08-22 06:06:21 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-08-29 05:05:01 -0400
commit7f79695cc1b6aa6d80a861780d9f8ce75d3dddcb (patch)
treedd34f15ed90df7e9920e29b753aa2ff0cf6feff3
parent0eab11c7e0d30de14a15ccd8269eef238321a8e1 (diff)
s390/fpu: improve kernel_fpu_[begin|end]
In case of nested user of the FPU or vector registers in the kernel the current code uses the mask of the FPU/vector registers of the previous contexts to decide which registers to save and restore. E.g. if the previous context used KERNEL_VXR_V0V7 and the next context wants to use KERNEL_VXR_V24V31 the first 8 vector registers are stored to the FPU state structure. But this is not necessary as the next context does not use these registers. Rework the FPU/vector register save and restore code. The new code does a few things differently: 1) A lowcore field is used instead of a per-cpu variable. 2) The kernel_fpu_end function now has two parameters just like kernel_fpu_begin. The register flags are required by both functions to save / restore the minimal register set. 3) The inline functions kernel_fpu_begin/kernel_fpu_end now do the update of the register masks. If the user space FPU registers have already been stored neither save_fpu_regs nor the __kernel_fpu_begin/__kernel_fpu_end functions have to be called for the first context. In this case kernel_fpu_begin adds 7 instructions and kernel_fpu_end adds 4 instructions. 3) The inline assemblies in __kernel_fpu_begin / __kernel_fpu_end to save / restore the vector registers are simplified a bit. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/crypto/crc32-vx.c2
-rw-r--r--arch/s390/include/asm/fpu/api.h32
-rw-r--r--arch/s390/include/asm/lowcore.h3
-rw-r--r--arch/s390/kernel/fpu.c317
-rw-r--r--arch/s390/kernel/sysinfo.c2
5 files changed, 147 insertions, 209 deletions
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 2bad9d837029..992e630c227b 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -67,7 +67,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
67 \ 67 \
68 kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ 68 kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
69 crc = ___crc32_vx(crc, data, aligned); \ 69 crc = ___crc32_vx(crc, data, aligned); \
70 kernel_fpu_end(&vxstate); \ 70 kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
71 \ 71 \
72 if (remaining) \ 72 if (remaining) \
73 crc = ___crc32_sw(crc, data + aligned, remaining); \ 73 crc = ___crc32_sw(crc, data + aligned, remaining); \
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 6aba6fc406ad..02124d66bfb5 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -64,18 +64,18 @@ static inline int test_fp_ctl(u32 fpc)
64 return rc; 64 return rc;
65} 65}
66 66
67#define KERNEL_VXR_V0V7 1 67#define KERNEL_FPC 1
68#define KERNEL_VXR_V8V15 2 68#define KERNEL_VXR_V0V7 2
69#define KERNEL_VXR_V16V23 4 69#define KERNEL_VXR_V8V15 4
70#define KERNEL_VXR_V24V31 8 70#define KERNEL_VXR_V16V23 8
71#define KERNEL_FPR 16 71#define KERNEL_VXR_V24V31 16
72#define KERNEL_FPC 256
73 72
74#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) 73#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
75#define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) 74#define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
76#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) 75#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
77 76
78#define KERNEL_FPU_MASK (KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR) 77#define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
78#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7)
79 79
80struct kernel_fpu; 80struct kernel_fpu;
81 81
@@ -87,18 +87,28 @@ struct kernel_fpu;
87 * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. 87 * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
88 */ 88 */
89void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); 89void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
90void __kernel_fpu_end(struct kernel_fpu *state); 90void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
91 91
92 92
93static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) 93static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
94{ 94{
95 preempt_disable(); 95 preempt_disable();
96 __kernel_fpu_begin(state, flags); 96 state->mask = S390_lowcore.fpu_flags;
97 if (!test_cpu_flag(CIF_FPU))
98 /* Save user space FPU state and register contents */
99 save_fpu_regs();
100 else if (state->mask & flags)
101 /* Save FPU/vector register in-use by the kernel */
102 __kernel_fpu_begin(state, flags);
103 S390_lowcore.fpu_flags |= flags;
97} 104}
98 105
99static inline void kernel_fpu_end(struct kernel_fpu *state) 106static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
100{ 107{
101 __kernel_fpu_end(state); 108 S390_lowcore.fpu_flags = state->mask;
109 if (state->mask & flags)
110 /* Restore FPU/vector register in-use by the kernel */
111 __kernel_fpu_end(state, flags);
102 preempt_enable(); 112 preempt_enable();
103} 113}
104 114
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index d79ba7cf75b0..7b93b78f423c 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -129,7 +129,8 @@ struct lowcore {
129 __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ 129 __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */
130 __u64 gmap; /* 0x0398 */ 130 __u64 gmap; /* 0x0398 */
131 __u32 spinlock_lockval; /* 0x03a0 */ 131 __u32 spinlock_lockval; /* 0x03a0 */
132 __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ 132 __u32 fpu_flags; /* 0x03a4 */
133 __u8 pad_0x03a8[0x0400-0x03a8]; /* 0x03a8 */
133 134
134 /* Per cpu primary space access list */ 135 /* Per cpu primary space access list */
135 __u32 paste[16]; /* 0x0400 */ 136 __u32 paste[16]; /* 0x0400 */
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 81d1d1887507..1235b9438df4 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -10,240 +10,167 @@
10#include <asm/fpu/types.h> 10#include <asm/fpu/types.h>
11#include <asm/fpu/api.h> 11#include <asm/fpu/api.h>
12 12
13/* 13asm(".include \"asm/vx-insn.h\"\n");
14 * Per-CPU variable to maintain FPU register ranges that are in use
15 * by the kernel.
16 */
17static DEFINE_PER_CPU(u32, kernel_fpu_state);
18
19#define KERNEL_FPU_STATE_MASK (KERNEL_FPU_MASK|KERNEL_FPC)
20
21 14
22void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) 15void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
23{ 16{
24 if (!__this_cpu_read(kernel_fpu_state)) {
25 /*
26 * Save user space FPU state and register contents. Multiple
27 * calls because of interruptions do not matter and return
28 * immediately. This also sets CIF_FPU to lazy restore FP/VX
29 * register contents when returning to user space.
30 */
31 save_fpu_regs();
32 }
33
34 /* Update flags to use the vector facility for KERNEL_FPR */
35 if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
36 flags |= KERNEL_VXR_LOW | KERNEL_FPC;
37 flags &= ~KERNEL_FPR;
38 }
39
40 /* Save and update current kernel VX state */
41 state->mask = __this_cpu_read(kernel_fpu_state);
42 __this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
43
44 /* 17 /*
45 * If this is the first call to __kernel_fpu_begin(), no additional 18 * Limit the save to the FPU/vector registers already
46 * work is required. 19 * in use by the previous context
47 */ 20 */
48 if (!(state->mask & KERNEL_FPU_STATE_MASK)) 21 flags &= state->mask;
49 return;
50 22
51 /* 23 if (flags & KERNEL_FPC)
52 * If KERNEL_FPR is still set, the vector facility is not available 24 /* Save floating point control */
53 * and, thus, save floating-point control and registers only. 25 asm volatile("stfpc %0" : "=m" (state->fpc));
54 */ 26
55 if (state->mask & KERNEL_FPR) { 27 if (!MACHINE_HAS_VX) {
56 asm volatile("stfpc %0" : "=Q" (state->fpc)); 28 if (flags & KERNEL_VXR_V0V7) {
57 asm volatile("std 0,%0" : "=Q" (state->fprs[0])); 29 /* Save floating-point registers */
58 asm volatile("std 1,%0" : "=Q" (state->fprs[1])); 30 asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
59 asm volatile("std 2,%0" : "=Q" (state->fprs[2])); 31 asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
60 asm volatile("std 3,%0" : "=Q" (state->fprs[3])); 32 asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
61 asm volatile("std 4,%0" : "=Q" (state->fprs[4])); 33 asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
62 asm volatile("std 5,%0" : "=Q" (state->fprs[5])); 34 asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
63 asm volatile("std 6,%0" : "=Q" (state->fprs[6])); 35 asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
64 asm volatile("std 7,%0" : "=Q" (state->fprs[7])); 36 asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
65 asm volatile("std 8,%0" : "=Q" (state->fprs[8])); 37 asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
66 asm volatile("std 9,%0" : "=Q" (state->fprs[9])); 38 asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
67 asm volatile("std 10,%0" : "=Q" (state->fprs[10])); 39 asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
68 asm volatile("std 11,%0" : "=Q" (state->fprs[11])); 40 asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
69 asm volatile("std 12,%0" : "=Q" (state->fprs[12])); 41 asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
70 asm volatile("std 13,%0" : "=Q" (state->fprs[13])); 42 asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
71 asm volatile("std 14,%0" : "=Q" (state->fprs[14])); 43 asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
72 asm volatile("std 15,%0" : "=Q" (state->fprs[15])); 44 asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
45 asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
46 }
73 return; 47 return;
74 } 48 }
75 49
76 /*
77 * If this is a nested call to __kernel_fpu_begin(), check the saved
78 * state mask to save and later restore the vector registers that
79 * are already in use. Let's start with checking floating-point
80 * controls.
81 */
82 if (state->mask & KERNEL_FPC)
83 asm volatile("stfpc %0" : "=m" (state->fpc));
84
85 /* Test and save vector registers */ 50 /* Test and save vector registers */
86 asm volatile ( 51 asm volatile (
87 /* 52 /*
88 * Test if any vector register must be saved and, if so, 53 * Test if any vector register must be saved and, if so,
89 * test if all register can be saved. 54 * test if all register can be saved.
90 */ 55 */
91 " tmll %[m],15\n" /* KERNEL_VXR_MASK */
92 " jz 20f\n" /* no work -> done */
93 " la 1,%[vxrs]\n" /* load save area */ 56 " la 1,%[vxrs]\n" /* load save area */
94 " jo 18f\n" /* -> save V0..V31 */ 57 " tmll %[m],30\n" /* KERNEL_VXR */
95 58 " jz 7f\n" /* no work -> done */
59 " jo 5f\n" /* -> save V0..V31 */
96 /* 60 /*
97 * Test if V8..V23 can be saved at once... this speeds up 61 * Test for special case KERNEL_FPU_MID only. In this
98 * for KERNEL_fpu_MID only. Otherwise continue to split the 62 * case a vstm V8..V23 is the best instruction
99 * range of vector registers into two halves and test them
100 * separately.
101 */ 63 */
102 " tmll %[m],6\n" /* KERNEL_VXR_MID */ 64 " chi %[m],12\n" /* KERNEL_VXR_MID */
103 " jo 17f\n" /* -> save V8..V23 */ 65 " jne 0f\n" /* -> save V8..V23 */
104 66 " VSTM 8,23,128,1\n" /* vstm %v8,%v23,128(%r1) */
67 " j 7f\n"
105 /* Test and save the first half of 16 vector registers */ 68 /* Test and save the first half of 16 vector registers */
106 "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ 69 "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
107 " jz 10f\n" /* -> KERNEL_VXR_HIGH */ 70 " jz 3f\n" /* -> KERNEL_VXR_HIGH */
108 " jo 2f\n" /* 11 -> save V0..V15 */ 71 " jo 2f\n" /* 11 -> save V0..V15 */
109 " brc 4,3f\n" /* 01 -> save V0..V7 */ 72 " brc 2,1f\n" /* 10 -> save V8..V15 */
110 " brc 2,4f\n" /* 10 -> save V8..V15 */ 73 " VSTM 0,7,0,1\n" /* vstm %v0,%v7,0(%r1) */
111 74 " j 3f\n"
75 "1: VSTM 8,15,128,1\n" /* vstm %v8,%v15,128(%r1) */
76 " j 3f\n"
77 "2: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
112 /* Test and save the second half of 16 vector registers */ 78 /* Test and save the second half of 16 vector registers */
113 "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ 79 "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
114 " jo 19f\n" /* 11 -> save V16..V31 */ 80 " jz 7f\n"
115 " brc 4,11f\n" /* 01 -> save V16..V23 */ 81 " jo 6f\n" /* 11 -> save V16..V31 */
116 " brc 2,12f\n" /* 10 -> save V24..V31 */ 82 " brc 2,4f\n" /* 10 -> save V24..V31 */
117 " j 20f\n" /* 00 -> done */ 83 " VSTM 16,23,256,1\n" /* vstm %v16,%v23,256(%r1) */
118 84 " j 7f\n"
119 /* 85 "4: VSTM 24,31,384,1\n" /* vstm %v24,%v31,384(%r1) */
120 * Below are the vstm combinations to save multiple vector 86 " j 7f\n"
121 * registers at once. 87 "5: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
122 */ 88 "6: VSTM 16,31,256,1\n" /* vstm %v16,%v31,256(%r1) */
123 "2: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ 89 "7:"
124 " j 10b\n" /* -> VXR_HIGH */
125 "3: .word 0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
126 " j 10b\n" /* -> VXR_HIGH */
127 "4: .word 0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
128 " j 10b\n" /* -> VXR_HIGH */
129 "\n"
130 "11: .word 0xe707,0x1100,0x0c3e\n" /* vstm 16,23,256(1) */
131 " j 20f\n" /* -> done */
132 "12: .word 0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
133 " j 20f\n" /* -> done */
134 "\n"
135 "17: .word 0xe787,0x1080,0x043e\n" /* vstm 8,23,128(1) */
136 " nill %[m],249\n" /* m &= ~VXR_MID */
137 " j 1b\n" /* -> VXR_LOW */
138 "\n"
139 "18: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
140 "19: .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
141 "20:"
142 : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) 90 : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
143 : [m] "d" (state->mask) 91 : [m] "d" (flags)
144 : "1", "cc"); 92 : "1", "cc");
145} 93}
146EXPORT_SYMBOL(__kernel_fpu_begin); 94EXPORT_SYMBOL(__kernel_fpu_begin);
147 95
148void __kernel_fpu_end(struct kernel_fpu *state) 96void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
149{ 97{
150 /* Just update the per-CPU state if there is nothing to restore */
151 if (!(state->mask & KERNEL_FPU_STATE_MASK))
152 goto update_fpu_state;
153
154 /* 98 /*
155 * If KERNEL_FPR is specified, the vector facility is not available 99 * Limit the restore to the FPU/vector registers of the
156 * and, thus, restore floating-point control and registers only. 100 * previous context that have been overwritte by the
101 * current context
157 */ 102 */
158 if (state->mask & KERNEL_FPR) { 103 flags &= state->mask;
159 asm volatile("lfpc %0" : : "Q" (state->fpc));
160 asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
161 asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
162 asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
163 asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
164 asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
165 asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
166 asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
167 asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
168 asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
169 asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
170 asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
171 asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
172 asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
173 asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
174 asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
175 asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
176 goto update_fpu_state;
177 }
178 104
179 /* Test and restore floating-point controls */ 105 if (flags & KERNEL_FPC)
180 if (state->mask & KERNEL_FPC) 106 /* Restore floating-point controls */
181 asm volatile("lfpc %0" : : "Q" (state->fpc)); 107 asm volatile("lfpc %0" : : "Q" (state->fpc));
182 108
109 if (!MACHINE_HAS_VX) {
110 if (flags & KERNEL_VXR_V0V7) {
111 /* Restore floating-point registers */
112 asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
113 asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
114 asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
115 asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
116 asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
117 asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
118 asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
119 asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
120 asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
121 asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
122 asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
123 asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
124 asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
125 asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
126 asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
127 asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
128 }
129 return;
130 }
131
183 /* Test and restore (load) vector registers */ 132 /* Test and restore (load) vector registers */
184 asm volatile ( 133 asm volatile (
185 /* 134 /*
186 * Test if any vector registers must be loaded and, if so, 135 * Test if any vector register must be loaded and, if so,
187 * test if all registers can be loaded at once. 136 * test if all registers can be loaded at once.
188 */ 137 */
189 " tmll %[m],15\n" /* KERNEL_VXR_MASK */ 138 " la 1,%[vxrs]\n" /* load restore area */
190 " jz 20f\n" /* no work -> done */ 139 " tmll %[m],30\n" /* KERNEL_VXR */
191 " la 1,%[vxrs]\n" /* load load area */ 140 " jz 7f\n" /* no work -> done */
192 " jo 18f\n" /* -> load V0..V31 */ 141 " jo 5f\n" /* -> restore V0..V31 */
193
194 /*
195 * Test if V8..V23 can be restored at once... this speeds up
196 * for KERNEL_VXR_MID only. Otherwise continue to split the
197 * range of vector registers into two halves and test them
198 * separately.
199 */
200 " tmll %[m],6\n" /* KERNEL_VXR_MID */
201 " jo 17f\n" /* -> load V8..V23 */
202
203 /* Test and load the first half of 16 vector registers */
204 "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */
205 " jz 10f\n" /* -> KERNEL_VXR_HIGH */
206 " jo 2f\n" /* 11 -> load V0..V15 */
207 " brc 4,3f\n" /* 01 -> load V0..V7 */
208 " brc 2,4f\n" /* 10 -> load V8..V15 */
209
210 /* Test and load the second half of 16 vector registers */
211 "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */
212 " jo 19f\n" /* 11 -> load V16..V31 */
213 " brc 4,11f\n" /* 01 -> load V16..V23 */
214 " brc 2,12f\n" /* 10 -> load V24..V31 */
215 " j 20f\n" /* 00 -> done */
216
217 /* 142 /*
218 * Below are the vstm combinations to load multiple vector 143 * Test for special case KERNEL_FPU_MID only. In this
219 * registers at once. 144 * case a vlm V8..V23 is the best instruction
220 */ 145 */
221 "2: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ 146 " chi %[m],12\n" /* KERNEL_VXR_MID */
222 " j 10b\n" /* -> VXR_HIGH */ 147 " jne 0f\n" /* -> restore V8..V23 */
223 "3: .word 0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */ 148 " VLM 8,23,128,1\n" /* vlm %v8,%v23,128(%r1) */
224 " j 10b\n" /* -> VXR_HIGH */ 149 " j 7f\n"
225 "4: .word 0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */ 150 /* Test and restore the first half of 16 vector registers */
226 " j 10b\n" /* -> VXR_HIGH */ 151 "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
227 "\n" 152 " jz 3f\n" /* -> KERNEL_VXR_HIGH */
228 "11: .word 0xe707,0x1100,0x0c36\n" /* vlm 16,23,256(1) */ 153 " jo 2f\n" /* 11 -> restore V0..V15 */
229 " j 20f\n" /* -> done */ 154 " brc 2,1f\n" /* 10 -> restore V8..V15 */
230 "12: .word 0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */ 155 " VLM 0,7,0,1\n" /* vlm %v0,%v7,0(%r1) */
231 " j 20f\n" /* -> done */ 156 " j 3f\n"
232 "\n" 157 "1: VLM 8,15,128,1\n" /* vlm %v8,%v15,128(%r1) */
233 "17: .word 0xe787,0x1080,0x0436\n" /* vlm 8,23,128(1) */ 158 " j 3f\n"
234 " nill %[m],249\n" /* m &= ~VXR_MID */ 159 "2: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
235 " j 1b\n" /* -> VXR_LOW */ 160 /* Test and restore the second half of 16 vector registers */
236 "\n" 161 "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
237 "18: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ 162 " jz 7f\n"
238 "19: .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ 163 " jo 6f\n" /* 11 -> restore V16..V31 */
239 "20:" 164 " brc 2,4f\n" /* 10 -> restore V24..V31 */
240 : 165 " VLM 16,23,256,1\n" /* vlm %v16,%v23,256(%r1) */
241 : [vxrs] "Q" (*(struct vx_array *) &state->vxrs), 166 " j 7f\n"
242 [m] "d" (state->mask) 167 "4: VLM 24,31,384,1\n" /* vlm %v24,%v31,384(%r1) */
168 " j 7f\n"
169 "5: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
170 "6: VLM 16,31,256,1\n" /* vlm %v16,%v31,256(%r1) */
171 "7:"
172 : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
173 : [m] "d" (flags)
243 : "1", "cc"); 174 : "1", "cc");
244
245update_fpu_state:
246 /* Update current kernel VX state */
247 __this_cpu_write(kernel_fpu_state, state->mask);
248} 175}
249EXPORT_SYMBOL(__kernel_fpu_end); 176EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 050b8d067d3b..bfda6aa40280 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -454,7 +454,7 @@ void s390_adjust_jiffies(void)
454 : "Q" (info->capability), "d" (10000000), "d" (0) 454 : "Q" (info->capability), "d" (10000000), "d" (0)
455 : "cc" 455 : "cc"
456 ); 456 );
457 kernel_fpu_end(&fpu); 457 kernel_fpu_end(&fpu, KERNEL_FPR);
458 } else 458 } else
459 /* 459 /*
460 * Really old machine without stsi block for basic 460 * Really old machine without stsi block for basic