diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-08-22 06:06:21 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-08-29 05:05:01 -0400 |
commit | 7f79695cc1b6aa6d80a861780d9f8ce75d3dddcb (patch) | |
tree | dd34f15ed90df7e9920e29b753aa2ff0cf6feff3 | |
parent | 0eab11c7e0d30de14a15ccd8269eef238321a8e1 (diff) |
s390/fpu: improve kernel_fpu_[begin|end]
In case of nested user of the FPU or vector registers in the kernel
the current code uses the mask of the FPU/vector registers of the
previous contexts to decide which registers to save and restore.
E.g. if the previous context used KERNEL_VXR_V0V7 and the next
context wants to use KERNEL_VXR_V24V31 the first 8 vector registers
are stored to the FPU state structure. But this is not necessary
as the next context does not use these registers.
Rework the FPU/vector register save and restore code. The new code
does a few things differently:
1) A lowcore field is used instead of a per-cpu variable.
2) The kernel_fpu_end function now has two parameters just like
kernel_fpu_begin. The register flags are required by both
functions to save / restore the minimal register set.
3) The inline functions kernel_fpu_begin/kernel_fpu_end now do the
update of the register masks. If the user space FPU registers
have already been stored neither save_fpu_regs nor the
__kernel_fpu_begin/__kernel_fpu_end functions have to be called
for the first context. In this case kernel_fpu_begin adds 7
instructions and kernel_fpu_end adds 4 instructions.
3) The inline assemblies in __kernel_fpu_begin / __kernel_fpu_end
to save / restore the vector registers are simplified a bit.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r-- | arch/s390/crypto/crc32-vx.c | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/fpu/api.h | 32 | ||||
-rw-r--r-- | arch/s390/include/asm/lowcore.h | 3 | ||||
-rw-r--r-- | arch/s390/kernel/fpu.c | 317 | ||||
-rw-r--r-- | arch/s390/kernel/sysinfo.c | 2 |
5 files changed, 147 insertions, 209 deletions
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 2bad9d837029..992e630c227b 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c | |||
@@ -67,7 +67,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); | |||
67 | \ | 67 | \ |
68 | kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ | 68 | kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ |
69 | crc = ___crc32_vx(crc, data, aligned); \ | 69 | crc = ___crc32_vx(crc, data, aligned); \ |
70 | kernel_fpu_end(&vxstate); \ | 70 | kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ |
71 | \ | 71 | \ |
72 | if (remaining) \ | 72 | if (remaining) \ |
73 | crc = ___crc32_sw(crc, data + aligned, remaining); \ | 73 | crc = ___crc32_sw(crc, data + aligned, remaining); \ |
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 6aba6fc406ad..02124d66bfb5 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h | |||
@@ -64,18 +64,18 @@ static inline int test_fp_ctl(u32 fpc) | |||
64 | return rc; | 64 | return rc; |
65 | } | 65 | } |
66 | 66 | ||
67 | #define KERNEL_VXR_V0V7 1 | 67 | #define KERNEL_FPC 1 |
68 | #define KERNEL_VXR_V8V15 2 | 68 | #define KERNEL_VXR_V0V7 2 |
69 | #define KERNEL_VXR_V16V23 4 | 69 | #define KERNEL_VXR_V8V15 4 |
70 | #define KERNEL_VXR_V24V31 8 | 70 | #define KERNEL_VXR_V16V23 8 |
71 | #define KERNEL_FPR 16 | 71 | #define KERNEL_VXR_V24V31 16 |
72 | #define KERNEL_FPC 256 | ||
73 | 72 | ||
74 | #define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) | 73 | #define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) |
75 | #define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) | 74 | #define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) |
76 | #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) | 75 | #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) |
77 | 76 | ||
78 | #define KERNEL_FPU_MASK (KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR) | 77 | #define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) |
78 | #define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) | ||
79 | 79 | ||
80 | struct kernel_fpu; | 80 | struct kernel_fpu; |
81 | 81 | ||
@@ -87,18 +87,28 @@ struct kernel_fpu; | |||
87 | * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. | 87 | * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. |
88 | */ | 88 | */ |
89 | void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); | 89 | void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); |
90 | void __kernel_fpu_end(struct kernel_fpu *state); | 90 | void __kernel_fpu_end(struct kernel_fpu *state, u32 flags); |
91 | 91 | ||
92 | 92 | ||
93 | static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) | 93 | static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) |
94 | { | 94 | { |
95 | preempt_disable(); | 95 | preempt_disable(); |
96 | __kernel_fpu_begin(state, flags); | 96 | state->mask = S390_lowcore.fpu_flags; |
97 | if (!test_cpu_flag(CIF_FPU)) | ||
98 | /* Save user space FPU state and register contents */ | ||
99 | save_fpu_regs(); | ||
100 | else if (state->mask & flags) | ||
101 | /* Save FPU/vector register in-use by the kernel */ | ||
102 | __kernel_fpu_begin(state, flags); | ||
103 | S390_lowcore.fpu_flags |= flags; | ||
97 | } | 104 | } |
98 | 105 | ||
99 | static inline void kernel_fpu_end(struct kernel_fpu *state) | 106 | static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags) |
100 | { | 107 | { |
101 | __kernel_fpu_end(state); | 108 | S390_lowcore.fpu_flags = state->mask; |
109 | if (state->mask & flags) | ||
110 | /* Restore FPU/vector register in-use by the kernel */ | ||
111 | __kernel_fpu_end(state, flags); | ||
102 | preempt_enable(); | 112 | preempt_enable(); |
103 | } | 113 | } |
104 | 114 | ||
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index d79ba7cf75b0..7b93b78f423c 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h | |||
@@ -129,7 +129,8 @@ struct lowcore { | |||
129 | __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ | 129 | __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ |
130 | __u64 gmap; /* 0x0398 */ | 130 | __u64 gmap; /* 0x0398 */ |
131 | __u32 spinlock_lockval; /* 0x03a0 */ | 131 | __u32 spinlock_lockval; /* 0x03a0 */ |
132 | __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ | 132 | __u32 fpu_flags; /* 0x03a4 */ |
133 | __u8 pad_0x03a8[0x0400-0x03a8]; /* 0x03a8 */ | ||
133 | 134 | ||
134 | /* Per cpu primary space access list */ | 135 | /* Per cpu primary space access list */ |
135 | __u32 paste[16]; /* 0x0400 */ | 136 | __u32 paste[16]; /* 0x0400 */ |
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 81d1d1887507..1235b9438df4 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c | |||
@@ -10,240 +10,167 @@ | |||
10 | #include <asm/fpu/types.h> | 10 | #include <asm/fpu/types.h> |
11 | #include <asm/fpu/api.h> | 11 | #include <asm/fpu/api.h> |
12 | 12 | ||
13 | /* | 13 | asm(".include \"asm/vx-insn.h\"\n"); |
14 | * Per-CPU variable to maintain FPU register ranges that are in use | ||
15 | * by the kernel. | ||
16 | */ | ||
17 | static DEFINE_PER_CPU(u32, kernel_fpu_state); | ||
18 | |||
19 | #define KERNEL_FPU_STATE_MASK (KERNEL_FPU_MASK|KERNEL_FPC) | ||
20 | |||
21 | 14 | ||
22 | void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) | 15 | void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) |
23 | { | 16 | { |
24 | if (!__this_cpu_read(kernel_fpu_state)) { | ||
25 | /* | ||
26 | * Save user space FPU state and register contents. Multiple | ||
27 | * calls because of interruptions do not matter and return | ||
28 | * immediately. This also sets CIF_FPU to lazy restore FP/VX | ||
29 | * register contents when returning to user space. | ||
30 | */ | ||
31 | save_fpu_regs(); | ||
32 | } | ||
33 | |||
34 | /* Update flags to use the vector facility for KERNEL_FPR */ | ||
35 | if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) { | ||
36 | flags |= KERNEL_VXR_LOW | KERNEL_FPC; | ||
37 | flags &= ~KERNEL_FPR; | ||
38 | } | ||
39 | |||
40 | /* Save and update current kernel VX state */ | ||
41 | state->mask = __this_cpu_read(kernel_fpu_state); | ||
42 | __this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK); | ||
43 | |||
44 | /* | 17 | /* |
45 | * If this is the first call to __kernel_fpu_begin(), no additional | 18 | * Limit the save to the FPU/vector registers already |
46 | * work is required. | 19 | * in use by the previous context |
47 | */ | 20 | */ |
48 | if (!(state->mask & KERNEL_FPU_STATE_MASK)) | 21 | flags &= state->mask; |
49 | return; | ||
50 | 22 | ||
51 | /* | 23 | if (flags & KERNEL_FPC) |
52 | * If KERNEL_FPR is still set, the vector facility is not available | 24 | /* Save floating point control */ |
53 | * and, thus, save floating-point control and registers only. | 25 | asm volatile("stfpc %0" : "=m" (state->fpc)); |
54 | */ | 26 | |
55 | if (state->mask & KERNEL_FPR) { | 27 | if (!MACHINE_HAS_VX) { |
56 | asm volatile("stfpc %0" : "=Q" (state->fpc)); | 28 | if (flags & KERNEL_VXR_V0V7) { |
57 | asm volatile("std 0,%0" : "=Q" (state->fprs[0])); | 29 | /* Save floating-point registers */ |
58 | asm volatile("std 1,%0" : "=Q" (state->fprs[1])); | 30 | asm volatile("std 0,%0" : "=Q" (state->fprs[0])); |
59 | asm volatile("std 2,%0" : "=Q" (state->fprs[2])); | 31 | asm volatile("std 1,%0" : "=Q" (state->fprs[1])); |
60 | asm volatile("std 3,%0" : "=Q" (state->fprs[3])); | 32 | asm volatile("std 2,%0" : "=Q" (state->fprs[2])); |
61 | asm volatile("std 4,%0" : "=Q" (state->fprs[4])); | 33 | asm volatile("std 3,%0" : "=Q" (state->fprs[3])); |
62 | asm volatile("std 5,%0" : "=Q" (state->fprs[5])); | 34 | asm volatile("std 4,%0" : "=Q" (state->fprs[4])); |
63 | asm volatile("std 6,%0" : "=Q" (state->fprs[6])); | 35 | asm volatile("std 5,%0" : "=Q" (state->fprs[5])); |
64 | asm volatile("std 7,%0" : "=Q" (state->fprs[7])); | 36 | asm volatile("std 6,%0" : "=Q" (state->fprs[6])); |
65 | asm volatile("std 8,%0" : "=Q" (state->fprs[8])); | 37 | asm volatile("std 7,%0" : "=Q" (state->fprs[7])); |
66 | asm volatile("std 9,%0" : "=Q" (state->fprs[9])); | 38 | asm volatile("std 8,%0" : "=Q" (state->fprs[8])); |
67 | asm volatile("std 10,%0" : "=Q" (state->fprs[10])); | 39 | asm volatile("std 9,%0" : "=Q" (state->fprs[9])); |
68 | asm volatile("std 11,%0" : "=Q" (state->fprs[11])); | 40 | asm volatile("std 10,%0" : "=Q" (state->fprs[10])); |
69 | asm volatile("std 12,%0" : "=Q" (state->fprs[12])); | 41 | asm volatile("std 11,%0" : "=Q" (state->fprs[11])); |
70 | asm volatile("std 13,%0" : "=Q" (state->fprs[13])); | 42 | asm volatile("std 12,%0" : "=Q" (state->fprs[12])); |
71 | asm volatile("std 14,%0" : "=Q" (state->fprs[14])); | 43 | asm volatile("std 13,%0" : "=Q" (state->fprs[13])); |
72 | asm volatile("std 15,%0" : "=Q" (state->fprs[15])); | 44 | asm volatile("std 14,%0" : "=Q" (state->fprs[14])); |
45 | asm volatile("std 15,%0" : "=Q" (state->fprs[15])); | ||
46 | } | ||
73 | return; | 47 | return; |
74 | } | 48 | } |
75 | 49 | ||
76 | /* | ||
77 | * If this is a nested call to __kernel_fpu_begin(), check the saved | ||
78 | * state mask to save and later restore the vector registers that | ||
79 | * are already in use. Let's start with checking floating-point | ||
80 | * controls. | ||
81 | */ | ||
82 | if (state->mask & KERNEL_FPC) | ||
83 | asm volatile("stfpc %0" : "=m" (state->fpc)); | ||
84 | |||
85 | /* Test and save vector registers */ | 50 | /* Test and save vector registers */ |
86 | asm volatile ( | 51 | asm volatile ( |
87 | /* | 52 | /* |
88 | * Test if any vector register must be saved and, if so, | 53 | * Test if any vector register must be saved and, if so, |
89 | * test if all register can be saved. | 54 | * test if all register can be saved. |
90 | */ | 55 | */ |
91 | " tmll %[m],15\n" /* KERNEL_VXR_MASK */ | ||
92 | " jz 20f\n" /* no work -> done */ | ||
93 | " la 1,%[vxrs]\n" /* load save area */ | 56 | " la 1,%[vxrs]\n" /* load save area */ |
94 | " jo 18f\n" /* -> save V0..V31 */ | 57 | " tmll %[m],30\n" /* KERNEL_VXR */ |
95 | 58 | " jz 7f\n" /* no work -> done */ | |
59 | " jo 5f\n" /* -> save V0..V31 */ | ||
96 | /* | 60 | /* |
97 | * Test if V8..V23 can be saved at once... this speeds up | 61 | * Test for special case KERNEL_FPU_MID only. In this |
98 | * for KERNEL_fpu_MID only. Otherwise continue to split the | 62 | * case a vstm V8..V23 is the best instruction |
99 | * range of vector registers into two halves and test them | ||
100 | * separately. | ||
101 | */ | 63 | */ |
102 | " tmll %[m],6\n" /* KERNEL_VXR_MID */ | 64 | " chi %[m],12\n" /* KERNEL_VXR_MID */ |
103 | " jo 17f\n" /* -> save V8..V23 */ | 65 | " jne 0f\n" /* -> save V8..V23 */ |
104 | 66 | " VSTM 8,23,128,1\n" /* vstm %v8,%v23,128(%r1) */ | |
67 | " j 7f\n" | ||
105 | /* Test and save the first half of 16 vector registers */ | 68 | /* Test and save the first half of 16 vector registers */ |
106 | "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ | 69 | "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */ |
107 | " jz 10f\n" /* -> KERNEL_VXR_HIGH */ | 70 | " jz 3f\n" /* -> KERNEL_VXR_HIGH */ |
108 | " jo 2f\n" /* 11 -> save V0..V15 */ | 71 | " jo 2f\n" /* 11 -> save V0..V15 */ |
109 | " brc 4,3f\n" /* 01 -> save V0..V7 */ | 72 | " brc 2,1f\n" /* 10 -> save V8..V15 */ |
110 | " brc 2,4f\n" /* 10 -> save V8..V15 */ | 73 | " VSTM 0,7,0,1\n" /* vstm %v0,%v7,0(%r1) */ |
111 | 74 | " j 3f\n" | |
75 | "1: VSTM 8,15,128,1\n" /* vstm %v8,%v15,128(%r1) */ | ||
76 | " j 3f\n" | ||
77 | "2: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */ | ||
112 | /* Test and save the second half of 16 vector registers */ | 78 | /* Test and save the second half of 16 vector registers */ |
113 | "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ | 79 | "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */ |
114 | " jo 19f\n" /* 11 -> save V16..V31 */ | 80 | " jz 7f\n" |
115 | " brc 4,11f\n" /* 01 -> save V16..V23 */ | 81 | " jo 6f\n" /* 11 -> save V16..V31 */ |
116 | " brc 2,12f\n" /* 10 -> save V24..V31 */ | 82 | " brc 2,4f\n" /* 10 -> save V24..V31 */ |
117 | " j 20f\n" /* 00 -> done */ | 83 | " VSTM 16,23,256,1\n" /* vstm %v16,%v23,256(%r1) */ |
118 | 84 | " j 7f\n" | |
119 | /* | 85 | "4: VSTM 24,31,384,1\n" /* vstm %v24,%v31,384(%r1) */ |
120 | * Below are the vstm combinations to save multiple vector | 86 | " j 7f\n" |
121 | * registers at once. | 87 | "5: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */ |
122 | */ | 88 | "6: VSTM 16,31,256,1\n" /* vstm %v16,%v31,256(%r1) */ |
123 | "2: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ | 89 | "7:" |
124 | " j 10b\n" /* -> VXR_HIGH */ | ||
125 | "3: .word 0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */ | ||
126 | " j 10b\n" /* -> VXR_HIGH */ | ||
127 | "4: .word 0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */ | ||
128 | " j 10b\n" /* -> VXR_HIGH */ | ||
129 | "\n" | ||
130 | "11: .word 0xe707,0x1100,0x0c3e\n" /* vstm 16,23,256(1) */ | ||
131 | " j 20f\n" /* -> done */ | ||
132 | "12: .word 0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */ | ||
133 | " j 20f\n" /* -> done */ | ||
134 | "\n" | ||
135 | "17: .word 0xe787,0x1080,0x043e\n" /* vstm 8,23,128(1) */ | ||
136 | " nill %[m],249\n" /* m &= ~VXR_MID */ | ||
137 | " j 1b\n" /* -> VXR_LOW */ | ||
138 | "\n" | ||
139 | "18: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ | ||
140 | "19: .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ | ||
141 | "20:" | ||
142 | : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) | 90 | : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) |
143 | : [m] "d" (state->mask) | 91 | : [m] "d" (flags) |
144 | : "1", "cc"); | 92 | : "1", "cc"); |
145 | } | 93 | } |
146 | EXPORT_SYMBOL(__kernel_fpu_begin); | 94 | EXPORT_SYMBOL(__kernel_fpu_begin); |
147 | 95 | ||
148 | void __kernel_fpu_end(struct kernel_fpu *state) | 96 | void __kernel_fpu_end(struct kernel_fpu *state, u32 flags) |
149 | { | 97 | { |
150 | /* Just update the per-CPU state if there is nothing to restore */ | ||
151 | if (!(state->mask & KERNEL_FPU_STATE_MASK)) | ||
152 | goto update_fpu_state; | ||
153 | |||
154 | /* | 98 | /* |
155 | * If KERNEL_FPR is specified, the vector facility is not available | 99 | * Limit the restore to the FPU/vector registers of the |
156 | * and, thus, restore floating-point control and registers only. | 100 | * previous context that have been overwritte by the |
101 | * current context | ||
157 | */ | 102 | */ |
158 | if (state->mask & KERNEL_FPR) { | 103 | flags &= state->mask; |
159 | asm volatile("lfpc %0" : : "Q" (state->fpc)); | ||
160 | asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); | ||
161 | asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); | ||
162 | asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); | ||
163 | asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); | ||
164 | asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); | ||
165 | asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); | ||
166 | asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); | ||
167 | asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); | ||
168 | asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); | ||
169 | asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); | ||
170 | asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); | ||
171 | asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); | ||
172 | asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); | ||
173 | asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); | ||
174 | asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); | ||
175 | asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); | ||
176 | goto update_fpu_state; | ||
177 | } | ||
178 | 104 | ||
179 | /* Test and restore floating-point controls */ | 105 | if (flags & KERNEL_FPC) |
180 | if (state->mask & KERNEL_FPC) | 106 | /* Restore floating-point controls */ |
181 | asm volatile("lfpc %0" : : "Q" (state->fpc)); | 107 | asm volatile("lfpc %0" : : "Q" (state->fpc)); |
182 | 108 | ||
109 | if (!MACHINE_HAS_VX) { | ||
110 | if (flags & KERNEL_VXR_V0V7) { | ||
111 | /* Restore floating-point registers */ | ||
112 | asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); | ||
113 | asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); | ||
114 | asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); | ||
115 | asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); | ||
116 | asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); | ||
117 | asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); | ||
118 | asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); | ||
119 | asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); | ||
120 | asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); | ||
121 | asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); | ||
122 | asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); | ||
123 | asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); | ||
124 | asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); | ||
125 | asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); | ||
126 | asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); | ||
127 | asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); | ||
128 | } | ||
129 | return; | ||
130 | } | ||
131 | |||
183 | /* Test and restore (load) vector registers */ | 132 | /* Test and restore (load) vector registers */ |
184 | asm volatile ( | 133 | asm volatile ( |
185 | /* | 134 | /* |
186 | * Test if any vector registers must be loaded and, if so, | 135 | * Test if any vector register must be loaded and, if so, |
187 | * test if all registers can be loaded at once. | 136 | * test if all registers can be loaded at once. |
188 | */ | 137 | */ |
189 | " tmll %[m],15\n" /* KERNEL_VXR_MASK */ | 138 | " la 1,%[vxrs]\n" /* load restore area */ |
190 | " jz 20f\n" /* no work -> done */ | 139 | " tmll %[m],30\n" /* KERNEL_VXR */ |
191 | " la 1,%[vxrs]\n" /* load load area */ | 140 | " jz 7f\n" /* no work -> done */ |
192 | " jo 18f\n" /* -> load V0..V31 */ | 141 | " jo 5f\n" /* -> restore V0..V31 */ |
193 | |||
194 | /* | ||
195 | * Test if V8..V23 can be restored at once... this speeds up | ||
196 | * for KERNEL_VXR_MID only. Otherwise continue to split the | ||
197 | * range of vector registers into two halves and test them | ||
198 | * separately. | ||
199 | */ | ||
200 | " tmll %[m],6\n" /* KERNEL_VXR_MID */ | ||
201 | " jo 17f\n" /* -> load V8..V23 */ | ||
202 | |||
203 | /* Test and load the first half of 16 vector registers */ | ||
204 | "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ | ||
205 | " jz 10f\n" /* -> KERNEL_VXR_HIGH */ | ||
206 | " jo 2f\n" /* 11 -> load V0..V15 */ | ||
207 | " brc 4,3f\n" /* 01 -> load V0..V7 */ | ||
208 | " brc 2,4f\n" /* 10 -> load V8..V15 */ | ||
209 | |||
210 | /* Test and load the second half of 16 vector registers */ | ||
211 | "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ | ||
212 | " jo 19f\n" /* 11 -> load V16..V31 */ | ||
213 | " brc 4,11f\n" /* 01 -> load V16..V23 */ | ||
214 | " brc 2,12f\n" /* 10 -> load V24..V31 */ | ||
215 | " j 20f\n" /* 00 -> done */ | ||
216 | |||
217 | /* | 142 | /* |
218 | * Below are the vstm combinations to load multiple vector | 143 | * Test for special case KERNEL_FPU_MID only. In this |
219 | * registers at once. | 144 | * case a vlm V8..V23 is the best instruction |
220 | */ | 145 | */ |
221 | "2: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ | 146 | " chi %[m],12\n" /* KERNEL_VXR_MID */ |
222 | " j 10b\n" /* -> VXR_HIGH */ | 147 | " jne 0f\n" /* -> restore V8..V23 */ |
223 | "3: .word 0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */ | 148 | " VLM 8,23,128,1\n" /* vlm %v8,%v23,128(%r1) */ |
224 | " j 10b\n" /* -> VXR_HIGH */ | 149 | " j 7f\n" |
225 | "4: .word 0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */ | 150 | /* Test and restore the first half of 16 vector registers */ |
226 | " j 10b\n" /* -> VXR_HIGH */ | 151 | "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */ |
227 | "\n" | 152 | " jz 3f\n" /* -> KERNEL_VXR_HIGH */ |
228 | "11: .word 0xe707,0x1100,0x0c36\n" /* vlm 16,23,256(1) */ | 153 | " jo 2f\n" /* 11 -> restore V0..V15 */ |
229 | " j 20f\n" /* -> done */ | 154 | " brc 2,1f\n" /* 10 -> restore V8..V15 */ |
230 | "12: .word 0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */ | 155 | " VLM 0,7,0,1\n" /* vlm %v0,%v7,0(%r1) */ |
231 | " j 20f\n" /* -> done */ | 156 | " j 3f\n" |
232 | "\n" | 157 | "1: VLM 8,15,128,1\n" /* vlm %v8,%v15,128(%r1) */ |
233 | "17: .word 0xe787,0x1080,0x0436\n" /* vlm 8,23,128(1) */ | 158 | " j 3f\n" |
234 | " nill %[m],249\n" /* m &= ~VXR_MID */ | 159 | "2: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */ |
235 | " j 1b\n" /* -> VXR_LOW */ | 160 | /* Test and restore the second half of 16 vector registers */ |
236 | "\n" | 161 | "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */ |
237 | "18: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ | 162 | " jz 7f\n" |
238 | "19: .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ | 163 | " jo 6f\n" /* 11 -> restore V16..V31 */ |
239 | "20:" | 164 | " brc 2,4f\n" /* 10 -> restore V24..V31 */ |
240 | : | 165 | " VLM 16,23,256,1\n" /* vlm %v16,%v23,256(%r1) */ |
241 | : [vxrs] "Q" (*(struct vx_array *) &state->vxrs), | 166 | " j 7f\n" |
242 | [m] "d" (state->mask) | 167 | "4: VLM 24,31,384,1\n" /* vlm %v24,%v31,384(%r1) */ |
168 | " j 7f\n" | ||
169 | "5: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */ | ||
170 | "6: VLM 16,31,256,1\n" /* vlm %v16,%v31,256(%r1) */ | ||
171 | "7:" | ||
172 | : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) | ||
173 | : [m] "d" (flags) | ||
243 | : "1", "cc"); | 174 | : "1", "cc"); |
244 | |||
245 | update_fpu_state: | ||
246 | /* Update current kernel VX state */ | ||
247 | __this_cpu_write(kernel_fpu_state, state->mask); | ||
248 | } | 175 | } |
249 | EXPORT_SYMBOL(__kernel_fpu_end); | 176 | EXPORT_SYMBOL(__kernel_fpu_end); |
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 050b8d067d3b..bfda6aa40280 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c | |||
@@ -454,7 +454,7 @@ void s390_adjust_jiffies(void) | |||
454 | : "Q" (info->capability), "d" (10000000), "d" (0) | 454 | : "Q" (info->capability), "d" (10000000), "d" (0) |
455 | : "cc" | 455 | : "cc" |
456 | ); | 456 | ); |
457 | kernel_fpu_end(&fpu); | 457 | kernel_fpu_end(&fpu, KERNEL_FPR); |
458 | } else | 458 | } else |
459 | /* | 459 | /* |
460 | * Really old machine without stsi block for basic | 460 | * Really old machine without stsi block for basic |