aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c38
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c38
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c38
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c47
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S782
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c12
-rw-r--r--arch/x86/crypto/morus1280_glue.c40
-rw-r--r--arch/x86/crypto/morus640_glue.c39
-rw-r--r--arch/x86/crypto/poly1305-sse2-x86_64.S4
-rw-r--r--arch/x86/events/core.c14
-rw-r--r--arch/x86/events/intel/core.c25
-rw-r--r--arch/x86/events/intel/uncore_snbep.c4
-rw-r--r--arch/x86/events/perf_event.h16
-rw-r--r--arch/x86/ia32/ia32_aout.c157
-rw-r--r--arch/x86/include/asm/a.out-core.h67
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h2
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/uaccess.h7
-rw-r--r--arch/x86/include/asm/uv/bios.h8
-rw-r--r--arch/x86/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/uapi/asm/socket.h1
-rw-r--r--arch/x86/kernel/cpu/mce/core.c1
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/mmu.c18
-rw-r--r--arch/x86/kvm/vmx/nested.c12
-rw-r--r--arch/x86/kvm/vmx/vmx.c29
-rw-r--r--arch/x86/kvm/vmx/vmx.h10
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/mm/extable.c59
-rw-r--r--arch/x86/mm/pageattr.c50
-rw-r--r--arch/x86/net/bpf_jit_comp.c46
-rw-r--r--arch/x86/net/bpf_jit_comp32.c121
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c1
-rw-r--r--arch/x86/platform/uv/bios_uv.c23
-rw-r--r--arch/x86/um/Kconfig1
39 files changed, 627 insertions, 1100 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 68261430fe6e..ade12ec4224b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,7 +14,6 @@ config X86_32
14 select ARCH_WANT_IPC_PARSE_VERSION 14 select ARCH_WANT_IPC_PARSE_VERSION
15 select CLKSRC_I8253 15 select CLKSRC_I8253
16 select CLONE_BACKWARDS 16 select CLONE_BACKWARDS
17 select HAVE_AOUT
18 select HAVE_GENERIC_DMA_COHERENT 17 select HAVE_GENERIC_DMA_COHERENT
19 select MODULES_USE_ELF_REL 18 select MODULES_USE_ELF_REL
20 select OLD_SIGACTION 19 select OLD_SIGACTION
@@ -2843,6 +2842,7 @@ config IA32_EMULATION
2843config IA32_AOUT 2842config IA32_AOUT
2844 tristate "IA32 a.out support" 2843 tristate "IA32 a.out support"
2845 depends on IA32_EMULATION 2844 depends on IA32_EMULATION
2845 depends on BROKEN
2846 ---help--- 2846 ---help---
2847 Support old a.out binaries in the 32bit emulation. 2847 Support old a.out binaries in the 32bit emulation.
2848 2848
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f105ae8651c9..f62e347862cc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -602,10 +602,12 @@ ENTRY(trampoline_32bit_src)
6023: 6023:
603 /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ 603 /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
604 pushl %ecx 604 pushl %ecx
605 pushl %edx
605 movl $MSR_EFER, %ecx 606 movl $MSR_EFER, %ecx
606 rdmsr 607 rdmsr
607 btsl $_EFER_LME, %eax 608 btsl $_EFER_LME, %eax
608 wrmsr 609 wrmsr
610 popl %edx
609 popl %ecx 611 popl %ecx
610 612
611 /* Enable PAE and LA57 (if required) paging modes */ 613 /* Enable PAE and LA57 (if required) paging modes */
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 2a356b948720..3ea71b871813 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -119,31 +119,20 @@ static void crypto_aegis128_aesni_process_ad(
119} 119}
120 120
121static void crypto_aegis128_aesni_process_crypt( 121static void crypto_aegis128_aesni_process_crypt(
122 struct aegis_state *state, struct aead_request *req, 122 struct aegis_state *state, struct skcipher_walk *walk,
123 const struct aegis_crypt_ops *ops) 123 const struct aegis_crypt_ops *ops)
124{ 124{
125 struct skcipher_walk walk; 125 while (walk->nbytes >= AEGIS128_BLOCK_SIZE) {
126 u8 *src, *dst; 126 ops->crypt_blocks(state,
127 unsigned int chunksize, base; 127 round_down(walk->nbytes, AEGIS128_BLOCK_SIZE),
128 128 walk->src.virt.addr, walk->dst.virt.addr);
129 ops->skcipher_walk_init(&walk, req, false); 129 skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
130 130 }
131 while (walk.nbytes) {
132 src = walk.src.virt.addr;
133 dst = walk.dst.virt.addr;
134 chunksize = walk.nbytes;
135
136 ops->crypt_blocks(state, chunksize, src, dst);
137
138 base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1);
139 src += base;
140 dst += base;
141 chunksize &= AEGIS128_BLOCK_SIZE - 1;
142
143 if (chunksize > 0)
144 ops->crypt_tail(state, chunksize, src, dst);
145 131
146 skcipher_walk_done(&walk, 0); 132 if (walk->nbytes) {
133 ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
134 walk->dst.virt.addr);
135 skcipher_walk_done(walk, 0);
147 } 136 }
148} 137}
149 138
@@ -186,13 +175,16 @@ static void crypto_aegis128_aesni_crypt(struct aead_request *req,
186{ 175{
187 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 176 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
188 struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); 177 struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
178 struct skcipher_walk walk;
189 struct aegis_state state; 179 struct aegis_state state;
190 180
181 ops->skcipher_walk_init(&walk, req, true);
182
191 kernel_fpu_begin(); 183 kernel_fpu_begin();
192 184
193 crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); 185 crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
194 crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); 186 crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
195 crypto_aegis128_aesni_process_crypt(&state, req, ops); 187 crypto_aegis128_aesni_process_crypt(&state, &walk, ops);
196 crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); 188 crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
197 189
198 kernel_fpu_end(); 190 kernel_fpu_end();
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index dbe8bb980da1..1b1b39c66c5e 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -119,31 +119,20 @@ static void crypto_aegis128l_aesni_process_ad(
119} 119}
120 120
121static void crypto_aegis128l_aesni_process_crypt( 121static void crypto_aegis128l_aesni_process_crypt(
122 struct aegis_state *state, struct aead_request *req, 122 struct aegis_state *state, struct skcipher_walk *walk,
123 const struct aegis_crypt_ops *ops) 123 const struct aegis_crypt_ops *ops)
124{ 124{
125 struct skcipher_walk walk; 125 while (walk->nbytes >= AEGIS128L_BLOCK_SIZE) {
126 u8 *src, *dst; 126 ops->crypt_blocks(state, round_down(walk->nbytes,
127 unsigned int chunksize, base; 127 AEGIS128L_BLOCK_SIZE),
128 128 walk->src.virt.addr, walk->dst.virt.addr);
129 ops->skcipher_walk_init(&walk, req, false); 129 skcipher_walk_done(walk, walk->nbytes % AEGIS128L_BLOCK_SIZE);
130 130 }
131 while (walk.nbytes) {
132 src = walk.src.virt.addr;
133 dst = walk.dst.virt.addr;
134 chunksize = walk.nbytes;
135
136 ops->crypt_blocks(state, chunksize, src, dst);
137
138 base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1);
139 src += base;
140 dst += base;
141 chunksize &= AEGIS128L_BLOCK_SIZE - 1;
142
143 if (chunksize > 0)
144 ops->crypt_tail(state, chunksize, src, dst);
145 131
146 skcipher_walk_done(&walk, 0); 132 if (walk->nbytes) {
133 ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
134 walk->dst.virt.addr);
135 skcipher_walk_done(walk, 0);
147 } 136 }
148} 137}
149 138
@@ -186,13 +175,16 @@ static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
186{ 175{
187 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 176 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
188 struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm); 177 struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
178 struct skcipher_walk walk;
189 struct aegis_state state; 179 struct aegis_state state;
190 180
181 ops->skcipher_walk_init(&walk, req, true);
182
191 kernel_fpu_begin(); 183 kernel_fpu_begin();
192 184
193 crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv); 185 crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
194 crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen); 186 crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
195 crypto_aegis128l_aesni_process_crypt(&state, req, ops); 187 crypto_aegis128l_aesni_process_crypt(&state, &walk, ops);
196 crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen); 188 crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
197 189
198 kernel_fpu_end(); 190 kernel_fpu_end();
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 8bebda2de92f..6227ca3220a0 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -119,31 +119,20 @@ static void crypto_aegis256_aesni_process_ad(
119} 119}
120 120
121static void crypto_aegis256_aesni_process_crypt( 121static void crypto_aegis256_aesni_process_crypt(
122 struct aegis_state *state, struct aead_request *req, 122 struct aegis_state *state, struct skcipher_walk *walk,
123 const struct aegis_crypt_ops *ops) 123 const struct aegis_crypt_ops *ops)
124{ 124{
125 struct skcipher_walk walk; 125 while (walk->nbytes >= AEGIS256_BLOCK_SIZE) {
126 u8 *src, *dst; 126 ops->crypt_blocks(state,
127 unsigned int chunksize, base; 127 round_down(walk->nbytes, AEGIS256_BLOCK_SIZE),
128 128 walk->src.virt.addr, walk->dst.virt.addr);
129 ops->skcipher_walk_init(&walk, req, false); 129 skcipher_walk_done(walk, walk->nbytes % AEGIS256_BLOCK_SIZE);
130 130 }
131 while (walk.nbytes) {
132 src = walk.src.virt.addr;
133 dst = walk.dst.virt.addr;
134 chunksize = walk.nbytes;
135
136 ops->crypt_blocks(state, chunksize, src, dst);
137
138 base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1);
139 src += base;
140 dst += base;
141 chunksize &= AEGIS256_BLOCK_SIZE - 1;
142
143 if (chunksize > 0)
144 ops->crypt_tail(state, chunksize, src, dst);
145 131
146 skcipher_walk_done(&walk, 0); 132 if (walk->nbytes) {
133 ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
134 walk->dst.virt.addr);
135 skcipher_walk_done(walk, 0);
147 } 136 }
148} 137}
149 138
@@ -186,13 +175,16 @@ static void crypto_aegis256_aesni_crypt(struct aead_request *req,
186{ 175{
187 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 176 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
188 struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm); 177 struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
178 struct skcipher_walk walk;
189 struct aegis_state state; 179 struct aegis_state state;
190 180
181 ops->skcipher_walk_init(&walk, req, true);
182
191 kernel_fpu_begin(); 183 kernel_fpu_begin();
192 184
193 crypto_aegis256_aesni_init(&state, ctx->key, req->iv); 185 crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
194 crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen); 186 crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
195 crypto_aegis256_aesni_process_crypt(&state, req, ops); 187 crypto_aegis256_aesni_process_crypt(&state, &walk, ops);
196 crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen); 188 crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
197 189
198 kernel_fpu_end(); 190 kernel_fpu_end();
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 1321700d6647..1e3d2102033a 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -175,26 +175,18 @@ asmlinkage void aesni_gcm_finalize(void *ctx,
175 struct gcm_context_data *gdata, 175 struct gcm_context_data *gdata,
176 u8 *auth_tag, unsigned long auth_tag_len); 176 u8 *auth_tag, unsigned long auth_tag_len);
177 177
178static struct aesni_gcm_tfm_s { 178static const struct aesni_gcm_tfm_s {
179void (*init)(void *ctx, 179 void (*init)(void *ctx, struct gcm_context_data *gdata, u8 *iv,
180 struct gcm_context_data *gdata, 180 u8 *hash_subkey, const u8 *aad, unsigned long aad_len);
181 u8 *iv, 181 void (*enc_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
182 u8 *hash_subkey, const u8 *aad, 182 const u8 *in, unsigned long plaintext_len);
183 unsigned long aad_len); 183 void (*dec_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
184void (*enc_update)(void *ctx, 184 const u8 *in, unsigned long ciphertext_len);
185 struct gcm_context_data *gdata, u8 *out, 185 void (*finalize)(void *ctx, struct gcm_context_data *gdata,
186 const u8 *in, 186 u8 *auth_tag, unsigned long auth_tag_len);
187 unsigned long plaintext_len);
188void (*dec_update)(void *ctx,
189 struct gcm_context_data *gdata, u8 *out,
190 const u8 *in,
191 unsigned long ciphertext_len);
192void (*finalize)(void *ctx,
193 struct gcm_context_data *gdata,
194 u8 *auth_tag, unsigned long auth_tag_len);
195} *aesni_gcm_tfm; 187} *aesni_gcm_tfm;
196 188
197struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = { 189static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
198 .init = &aesni_gcm_init, 190 .init = &aesni_gcm_init,
199 .enc_update = &aesni_gcm_enc_update, 191 .enc_update = &aesni_gcm_enc_update,
200 .dec_update = &aesni_gcm_dec_update, 192 .dec_update = &aesni_gcm_dec_update,
@@ -243,7 +235,7 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx,
243 const u8 *aad, unsigned long aad_len, 235 const u8 *aad, unsigned long aad_len,
244 u8 *auth_tag, unsigned long auth_tag_len); 236 u8 *auth_tag, unsigned long auth_tag_len);
245 237
246struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { 238static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
247 .init = &aesni_gcm_init_avx_gen2, 239 .init = &aesni_gcm_init_avx_gen2,
248 .enc_update = &aesni_gcm_enc_update_avx_gen2, 240 .enc_update = &aesni_gcm_enc_update_avx_gen2,
249 .dec_update = &aesni_gcm_dec_update_avx_gen2, 241 .dec_update = &aesni_gcm_dec_update_avx_gen2,
@@ -288,7 +280,7 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx,
288 const u8 *aad, unsigned long aad_len, 280 const u8 *aad, unsigned long aad_len,
289 u8 *auth_tag, unsigned long auth_tag_len); 281 u8 *auth_tag, unsigned long auth_tag_len);
290 282
291struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { 283static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
292 .init = &aesni_gcm_init_avx_gen4, 284 .init = &aesni_gcm_init_avx_gen4,
293 .enc_update = &aesni_gcm_enc_update_avx_gen4, 285 .enc_update = &aesni_gcm_enc_update_avx_gen4,
294 .dec_update = &aesni_gcm_dec_update_avx_gen4, 286 .dec_update = &aesni_gcm_dec_update_avx_gen4,
@@ -778,7 +770,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
778{ 770{
779 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 771 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
780 unsigned long auth_tag_len = crypto_aead_authsize(tfm); 772 unsigned long auth_tag_len = crypto_aead_authsize(tfm);
781 struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; 773 const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
782 struct gcm_context_data data AESNI_ALIGN_ATTR; 774 struct gcm_context_data data AESNI_ALIGN_ATTR;
783 struct scatter_walk dst_sg_walk = {}; 775 struct scatter_walk dst_sg_walk = {};
784 unsigned long left = req->cryptlen; 776 unsigned long left = req->cryptlen;
@@ -821,11 +813,14 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
821 scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); 813 scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
822 } 814 }
823 815
824 src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); 816 if (left) {
825 scatterwalk_start(&src_sg_walk, src_sg); 817 src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
826 if (req->src != req->dst) { 818 scatterwalk_start(&src_sg_walk, src_sg);
827 dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen); 819 if (req->src != req->dst) {
828 scatterwalk_start(&dst_sg_walk, dst_sg); 820 dst_sg = scatterwalk_ffwd(dst_start, req->dst,
821 req->assoclen);
822 scatterwalk_start(&dst_sg_walk, dst_sg);
823 }
829 } 824 }
830 825
831 kernel_fpu_begin(); 826 kernel_fpu_begin();
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
index de04d3e98d8d..3d873e67749d 100644
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -43,609 +43,291 @@
43# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 43# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 44# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46########################################################################
47# Function API:
48# UINT16 crc_t10dif_pcl(
49# UINT16 init_crc, //initial CRC value, 16 bits
50# const unsigned char *buf, //buffer pointer to calculate CRC on
51# UINT64 len //buffer length in bytes (64-bit data)
52# );
53# 46#
54# Reference paper titled "Fast CRC Computation for Generic 47# Reference paper titled "Fast CRC Computation for Generic
55# Polynomials Using PCLMULQDQ Instruction" 48# Polynomials Using PCLMULQDQ Instruction"
56# URL: http://www.intel.com/content/dam/www/public/us/en/documents 49# URL: http://www.intel.com/content/dam/www/public/us/en/documents
57# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf 50# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
58# 51#
59#
60 52
61#include <linux/linkage.h> 53#include <linux/linkage.h>
62 54
63.text 55.text
64 56
65#define arg1 %rdi 57#define init_crc %edi
66#define arg2 %rsi 58#define buf %rsi
67#define arg3 %rdx 59#define len %rdx
68 60
69#define arg1_low32 %edi 61#define FOLD_CONSTS %xmm10
62#define BSWAP_MASK %xmm11
63
64# Fold reg1, reg2 into the next 32 data bytes, storing the result back into
65# reg1, reg2.
66.macro fold_32_bytes offset, reg1, reg2
67 movdqu \offset(buf), %xmm9
68 movdqu \offset+16(buf), %xmm12
69 pshufb BSWAP_MASK, %xmm9
70 pshufb BSWAP_MASK, %xmm12
71 movdqa \reg1, %xmm8
72 movdqa \reg2, %xmm13
73 pclmulqdq $0x00, FOLD_CONSTS, \reg1
74 pclmulqdq $0x11, FOLD_CONSTS, %xmm8
75 pclmulqdq $0x00, FOLD_CONSTS, \reg2
76 pclmulqdq $0x11, FOLD_CONSTS, %xmm13
77 pxor %xmm9 , \reg1
78 xorps %xmm8 , \reg1
79 pxor %xmm12, \reg2
80 xorps %xmm13, \reg2
81.endm
82
83# Fold src_reg into dst_reg.
84.macro fold_16_bytes src_reg, dst_reg
85 movdqa \src_reg, %xmm8
86 pclmulqdq $0x11, FOLD_CONSTS, \src_reg
87 pclmulqdq $0x00, FOLD_CONSTS, %xmm8
88 pxor %xmm8, \dst_reg
89 xorps \src_reg, \dst_reg
90.endm
70 91
71ENTRY(crc_t10dif_pcl) 92#
93# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
94#
95# Assumes len >= 16.
96#
72.align 16 97.align 16
98ENTRY(crc_t10dif_pcl)
73 99
74 # adjust the 16-bit initial_crc value, scale it to 32 bits 100 movdqa .Lbswap_mask(%rip), BSWAP_MASK
75 shl $16, arg1_low32 101
76 102 # For sizes less than 256 bytes, we can't fold 128 bytes at a time.
77 # Allocate Stack Space 103 cmp $256, len
78 mov %rsp, %rcx 104 jl .Lless_than_256_bytes
79 sub $16*2, %rsp 105
80 # align stack to 16 byte boundary 106 # Load the first 128 data bytes. Byte swapping is necessary to make the
81 and $~(0x10 - 1), %rsp 107 # bit order match the polynomial coefficient order.
82 108 movdqu 16*0(buf), %xmm0
83 # check if smaller than 256 109 movdqu 16*1(buf), %xmm1
84 cmp $256, arg3 110 movdqu 16*2(buf), %xmm2
85 111 movdqu 16*3(buf), %xmm3
86 # for sizes less than 128, we can't fold 64B at a time... 112 movdqu 16*4(buf), %xmm4
87 jl _less_than_128 113 movdqu 16*5(buf), %xmm5
88 114 movdqu 16*6(buf), %xmm6
89 115 movdqu 16*7(buf), %xmm7
90 # load the initial crc value 116 add $128, buf
91 movd arg1_low32, %xmm10 # initial crc 117 pshufb BSWAP_MASK, %xmm0
92 118 pshufb BSWAP_MASK, %xmm1
93 # crc value does not need to be byte-reflected, but it needs 119 pshufb BSWAP_MASK, %xmm2
94 # to be moved to the high part of the register. 120 pshufb BSWAP_MASK, %xmm3
95 # because data will be byte-reflected and will align with 121 pshufb BSWAP_MASK, %xmm4
96 # initial crc at correct place. 122 pshufb BSWAP_MASK, %xmm5
97 pslldq $12, %xmm10 123 pshufb BSWAP_MASK, %xmm6
98 124 pshufb BSWAP_MASK, %xmm7
99 movdqa SHUF_MASK(%rip), %xmm11 125
100 # receive the initial 64B data, xor the initial crc value 126 # XOR the first 16 data *bits* with the initial CRC value.
101 movdqu 16*0(arg2), %xmm0 127 pxor %xmm8, %xmm8
102 movdqu 16*1(arg2), %xmm1 128 pinsrw $7, init_crc, %xmm8
103 movdqu 16*2(arg2), %xmm2 129 pxor %xmm8, %xmm0
104 movdqu 16*3(arg2), %xmm3 130
105 movdqu 16*4(arg2), %xmm4 131 movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS
106 movdqu 16*5(arg2), %xmm5 132
107 movdqu 16*6(arg2), %xmm6 133 # Subtract 128 for the 128 data bytes just consumed. Subtract another
108 movdqu 16*7(arg2), %xmm7 134 # 128 to simplify the termination condition of the following loop.
109 135 sub $256, len
110 pshufb %xmm11, %xmm0 136
111 # XOR the initial_crc value 137 # While >= 128 data bytes remain (not counting xmm0-7), fold the 128
112 pxor %xmm10, %xmm0 138 # bytes xmm0-7 into them, storing the result back into xmm0-7.
113 pshufb %xmm11, %xmm1 139.Lfold_128_bytes_loop:
114 pshufb %xmm11, %xmm2 140 fold_32_bytes 0, %xmm0, %xmm1
115 pshufb %xmm11, %xmm3 141 fold_32_bytes 32, %xmm2, %xmm3
116 pshufb %xmm11, %xmm4 142 fold_32_bytes 64, %xmm4, %xmm5
117 pshufb %xmm11, %xmm5 143 fold_32_bytes 96, %xmm6, %xmm7
118 pshufb %xmm11, %xmm6 144 add $128, buf
119 pshufb %xmm11, %xmm7 145 sub $128, len
120 146 jge .Lfold_128_bytes_loop
121 movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 147
122 #imm value of pclmulqdq instruction 148 # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
123 #will determine which constant to use 149
124 150 # Fold across 64 bytes.
125 ################################################################# 151 movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS
126 # we subtract 256 instead of 128 to save one instruction from the loop 152 fold_16_bytes %xmm0, %xmm4
127 sub $256, arg3 153 fold_16_bytes %xmm1, %xmm5
128 154 fold_16_bytes %xmm2, %xmm6
129 # at this section of the code, there is 64*x+y (0<=y<64) bytes of 155 fold_16_bytes %xmm3, %xmm7
130 # buffer. The _fold_64_B_loop will fold 64B at a time 156 # Fold across 32 bytes.
131 # until we have 64+y Bytes of buffer 157 movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS
132 158 fold_16_bytes %xmm4, %xmm6
133 159 fold_16_bytes %xmm5, %xmm7
134 # fold 64B at a time. This section of the code folds 4 xmm 160 # Fold across 16 bytes.
135 # registers in parallel 161 movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
136_fold_64_B_loop: 162 fold_16_bytes %xmm6, %xmm7
137 163
138 # update the buffer pointer 164 # Add 128 to get the correct number of data bytes remaining in 0...127
139 add $128, arg2 # buf += 64# 165 # (not counting xmm7), following the previous extra subtraction by 128.
140 166 # Then subtract 16 to simplify the termination condition of the
141 movdqu 16*0(arg2), %xmm9 167 # following loop.
142 movdqu 16*1(arg2), %xmm12 168 add $128-16, len
143 pshufb %xmm11, %xmm9 169
144 pshufb %xmm11, %xmm12 170 # While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
145 movdqa %xmm0, %xmm8 171 # xmm7 into them, storing the result back into xmm7.
146 movdqa %xmm1, %xmm13 172 jl .Lfold_16_bytes_loop_done
147 pclmulqdq $0x0 , %xmm10, %xmm0 173.Lfold_16_bytes_loop:
148 pclmulqdq $0x11, %xmm10, %xmm8
149 pclmulqdq $0x0 , %xmm10, %xmm1
150 pclmulqdq $0x11, %xmm10, %xmm13
151 pxor %xmm9 , %xmm0
152 xorps %xmm8 , %xmm0
153 pxor %xmm12, %xmm1
154 xorps %xmm13, %xmm1
155
156 movdqu 16*2(arg2), %xmm9
157 movdqu 16*3(arg2), %xmm12
158 pshufb %xmm11, %xmm9
159 pshufb %xmm11, %xmm12
160 movdqa %xmm2, %xmm8
161 movdqa %xmm3, %xmm13
162 pclmulqdq $0x0, %xmm10, %xmm2
163 pclmulqdq $0x11, %xmm10, %xmm8
164 pclmulqdq $0x0, %xmm10, %xmm3
165 pclmulqdq $0x11, %xmm10, %xmm13
166 pxor %xmm9 , %xmm2
167 xorps %xmm8 , %xmm2
168 pxor %xmm12, %xmm3
169 xorps %xmm13, %xmm3
170
171 movdqu 16*4(arg2), %xmm9
172 movdqu 16*5(arg2), %xmm12
173 pshufb %xmm11, %xmm9
174 pshufb %xmm11, %xmm12
175 movdqa %xmm4, %xmm8
176 movdqa %xmm5, %xmm13
177 pclmulqdq $0x0, %xmm10, %xmm4
178 pclmulqdq $0x11, %xmm10, %xmm8
179 pclmulqdq $0x0, %xmm10, %xmm5
180 pclmulqdq $0x11, %xmm10, %xmm13
181 pxor %xmm9 , %xmm4
182 xorps %xmm8 , %xmm4
183 pxor %xmm12, %xmm5
184 xorps %xmm13, %xmm5
185
186 movdqu 16*6(arg2), %xmm9
187 movdqu 16*7(arg2), %xmm12
188 pshufb %xmm11, %xmm9
189 pshufb %xmm11, %xmm12
190 movdqa %xmm6 , %xmm8
191 movdqa %xmm7 , %xmm13
192 pclmulqdq $0x0 , %xmm10, %xmm6
193 pclmulqdq $0x11, %xmm10, %xmm8
194 pclmulqdq $0x0 , %xmm10, %xmm7
195 pclmulqdq $0x11, %xmm10, %xmm13
196 pxor %xmm9 , %xmm6
197 xorps %xmm8 , %xmm6
198 pxor %xmm12, %xmm7
199 xorps %xmm13, %xmm7
200
201 sub $128, arg3
202
203 # check if there is another 64B in the buffer to be able to fold
204 jge _fold_64_B_loop
205 ##################################################################
206
207
208 add $128, arg2
209 # at this point, the buffer pointer is pointing at the last y Bytes
210 # of the buffer the 64B of folded data is in 4 of the xmm
211 # registers: xmm0, xmm1, xmm2, xmm3
212
213
214 # fold the 8 xmm registers to 1 xmm register with different constants
215
216 movdqa rk9(%rip), %xmm10
217 movdqa %xmm0, %xmm8
218 pclmulqdq $0x11, %xmm10, %xmm0
219 pclmulqdq $0x0 , %xmm10, %xmm8
220 pxor %xmm8, %xmm7
221 xorps %xmm0, %xmm7
222
223 movdqa rk11(%rip), %xmm10
224 movdqa %xmm1, %xmm8
225 pclmulqdq $0x11, %xmm10, %xmm1
226 pclmulqdq $0x0 , %xmm10, %xmm8
227 pxor %xmm8, %xmm7
228 xorps %xmm1, %xmm7
229
230 movdqa rk13(%rip), %xmm10
231 movdqa %xmm2, %xmm8
232 pclmulqdq $0x11, %xmm10, %xmm2
233 pclmulqdq $0x0 , %xmm10, %xmm8
234 pxor %xmm8, %xmm7
235 pxor %xmm2, %xmm7
236
237 movdqa rk15(%rip), %xmm10
238 movdqa %xmm3, %xmm8
239 pclmulqdq $0x11, %xmm10, %xmm3
240 pclmulqdq $0x0 , %xmm10, %xmm8
241 pxor %xmm8, %xmm7
242 xorps %xmm3, %xmm7
243
244 movdqa rk17(%rip), %xmm10
245 movdqa %xmm4, %xmm8
246 pclmulqdq $0x11, %xmm10, %xmm4
247 pclmulqdq $0x0 , %xmm10, %xmm8
248 pxor %xmm8, %xmm7
249 pxor %xmm4, %xmm7
250
251 movdqa rk19(%rip), %xmm10
252 movdqa %xmm5, %xmm8
253 pclmulqdq $0x11, %xmm10, %xmm5
254 pclmulqdq $0x0 , %xmm10, %xmm8
255 pxor %xmm8, %xmm7
256 xorps %xmm5, %xmm7
257
258 movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
259 #imm value of pclmulqdq instruction
260 #will determine which constant to use
261 movdqa %xmm6, %xmm8
262 pclmulqdq $0x11, %xmm10, %xmm6
263 pclmulqdq $0x0 , %xmm10, %xmm8
264 pxor %xmm8, %xmm7
265 pxor %xmm6, %xmm7
266
267
268 # instead of 64, we add 48 to the loop counter to save 1 instruction
269 # from the loop instead of a cmp instruction, we use the negative
270 # flag with the jl instruction
271 add $128-16, arg3
272 jl _final_reduction_for_128
273
274 # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
275 # and the rest is in memory. We can fold 16 bytes at a time if y>=16
276 # continue folding 16B at a time
277
278_16B_reduction_loop:
279 movdqa %xmm7, %xmm8 174 movdqa %xmm7, %xmm8
280 pclmulqdq $0x11, %xmm10, %xmm7 175 pclmulqdq $0x11, FOLD_CONSTS, %xmm7
281 pclmulqdq $0x0 , %xmm10, %xmm8 176 pclmulqdq $0x00, FOLD_CONSTS, %xmm8
282 pxor %xmm8, %xmm7 177 pxor %xmm8, %xmm7
283 movdqu (arg2), %xmm0 178 movdqu (buf), %xmm0
284 pshufb %xmm11, %xmm0 179 pshufb BSWAP_MASK, %xmm0
285 pxor %xmm0 , %xmm7 180 pxor %xmm0 , %xmm7
286 add $16, arg2 181 add $16, buf
287 sub $16, arg3 182 sub $16, len
288 # instead of a cmp instruction, we utilize the flags with the 183 jge .Lfold_16_bytes_loop
289 # jge instruction equivalent of: cmp arg3, 16-16 184
290 # check if there is any more 16B in the buffer to be able to fold 185.Lfold_16_bytes_loop_done:
291 jge _16B_reduction_loop 186 # Add 16 to get the correct number of data bytes remaining in 0...15
292 187 # (not counting xmm7), following the previous extra subtraction by 16.
293 #now we have 16+z bytes left to reduce, where 0<= z < 16. 188 add $16, len
294 #first, we reduce the data in the xmm7 register 189 je .Lreduce_final_16_bytes
295 190
296 191.Lhandle_partial_segment:
297_final_reduction_for_128: 192 # Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
298 # check if any more data to fold. If not, compute the CRC of 193 # bytes are in xmm7 and the rest are the remaining data in 'buf'. To do
299 # the final 128 bits 194 # this without needing a fold constant for each possible 'len', redivide
300 add $16, arg3 195 # the bytes into a first chunk of 'len' bytes and a second chunk of 16
301 je _128_done 196 # bytes, then fold the first chunk into the second.
302 197
303 # here we are getting data that is less than 16 bytes.
304 # since we know that there was data before the pointer, we can
305 # offset the input pointer before the actual point, to receive
306 # exactly 16 bytes. after that the registers need to be adjusted.
307_get_last_two_xmms:
308 movdqa %xmm7, %xmm2 198 movdqa %xmm7, %xmm2
309 199
310 movdqu -16(arg2, arg3), %xmm1 200 # xmm1 = last 16 original data bytes
311 pshufb %xmm11, %xmm1 201 movdqu -16(buf, len), %xmm1
202 pshufb BSWAP_MASK, %xmm1
312 203
313 # get rid of the extra data that was loaded before 204 # xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
314 # load the shift constant 205 lea .Lbyteshift_table+16(%rip), %rax
315 lea pshufb_shf_table+16(%rip), %rax 206 sub len, %rax
316 sub arg3, %rax
317 movdqu (%rax), %xmm0 207 movdqu (%rax), %xmm0
318
319 # shift xmm2 to the left by arg3 bytes
320 pshufb %xmm0, %xmm2 208 pshufb %xmm0, %xmm2
321 209
322 # shift xmm7 to the right by 16-arg3 bytes 210 # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
323 pxor mask1(%rip), %xmm0 211 pxor .Lmask1(%rip), %xmm0
324 pshufb %xmm0, %xmm7 212 pshufb %xmm0, %xmm7
213
214 # xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
215 # then '16-len' bytes from xmm2 (high-order bytes).
325 pblendvb %xmm2, %xmm1 #xmm0 is implicit 216 pblendvb %xmm2, %xmm1 #xmm0 is implicit
326 217
327 # fold 16 Bytes 218 # Fold the first chunk into the second chunk, storing the result in xmm7.
328 movdqa %xmm1, %xmm2
329 movdqa %xmm7, %xmm8 219 movdqa %xmm7, %xmm8
330 pclmulqdq $0x11, %xmm10, %xmm7 220 pclmulqdq $0x11, FOLD_CONSTS, %xmm7
331 pclmulqdq $0x0 , %xmm10, %xmm8 221 pclmulqdq $0x00, FOLD_CONSTS, %xmm8
332 pxor %xmm8, %xmm7 222 pxor %xmm8, %xmm7
333 pxor %xmm2, %xmm7 223 pxor %xmm1, %xmm7
334 224
335_128_done: 225.Lreduce_final_16_bytes:
336 # compute crc of a 128-bit value 226 # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
337 movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
338 movdqa %xmm7, %xmm0
339 227
340 #64b fold 228 # Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
341 pclmulqdq $0x1, %xmm10, %xmm7 229 movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS
342 pslldq $8 , %xmm0
343 pxor %xmm0, %xmm7
344 230
345 #32b fold 231 # Fold the high 64 bits into the low 64 bits, while also multiplying by
232 # x^64. This produces a 128-bit value congruent to x^64 * M(x) and
233 # whose low 48 bits are 0.
346 movdqa %xmm7, %xmm0 234 movdqa %xmm7, %xmm0
235 pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
236 pslldq $8, %xmm0
237 pxor %xmm0, %xmm7 # + low bits * x^64
347 238
348 pand mask2(%rip), %xmm0 239 # Fold the high 32 bits into the low 96 bits. This produces a 96-bit
349 240 # value congruent to x^64 * M(x) and whose low 48 bits are 0.
350 psrldq $12, %xmm7
351 pclmulqdq $0x10, %xmm10, %xmm7
352 pxor %xmm0, %xmm7
353
354 #barrett reduction
355_barrett:
356 movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
357 movdqa %xmm7, %xmm0 241 movdqa %xmm7, %xmm0
358 pclmulqdq $0x01, %xmm10, %xmm7 242 pand .Lmask2(%rip), %xmm0 # zero high 32 bits
359 pslldq $4, %xmm7 243 psrldq $12, %xmm7 # extract high 32 bits
360 pclmulqdq $0x11, %xmm10, %xmm7 244 pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
245 pxor %xmm0, %xmm7 # + low bits
361 246
362 pslldq $4, %xmm7 247 # Load G(x) and floor(x^48 / G(x)).
363 pxor %xmm0, %xmm7 248 movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS
364 pextrd $1, %xmm7, %eax
365 249
366_cleanup: 250 # Use Barrett reduction to compute the final CRC value.
367 # scale the result back to 16 bits 251 movdqa %xmm7, %xmm0
368 shr $16, %eax 252 pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
369 mov %rcx, %rsp 253 psrlq $32, %xmm7 # /= x^32
254 pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x)
255 psrlq $48, %xmm0
256 pxor %xmm7, %xmm0 # + low 16 nonzero bits
257 # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
258
259 pextrw $0, %xmm0, %eax
370 ret 260 ret
371 261
372########################################################################
373
374.align 16 262.align 16
375_less_than_128: 263.Lless_than_256_bytes:
376 264 # Checksumming a buffer of length 16...255 bytes
377 # check if there is enough buffer to be able to fold 16B at a time
378 cmp $32, arg3
379 jl _less_than_32
380 movdqa SHUF_MASK(%rip), %xmm11
381 265
382 # now if there is, load the constants 266 # Load the first 16 data bytes.
383 movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 267 movdqu (buf), %xmm7
268 pshufb BSWAP_MASK, %xmm7
269 add $16, buf
384 270
385 movd arg1_low32, %xmm0 # get the initial crc value 271 # XOR the first 16 data *bits* with the initial CRC value.
386 pslldq $12, %xmm0 # align it to its correct place 272 pxor %xmm0, %xmm0
387 movdqu (arg2), %xmm7 # load the plaintext 273 pinsrw $7, init_crc, %xmm0
388 pshufb %xmm11, %xmm7 # byte-reflect the plaintext
389 pxor %xmm0, %xmm7 274 pxor %xmm0, %xmm7
390 275
391 276 movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
392 # update the buffer pointer 277 cmp $16, len
393 add $16, arg2 278 je .Lreduce_final_16_bytes # len == 16
394 279 sub $32, len
395 # update the counter. subtract 32 instead of 16 to save one 280 jge .Lfold_16_bytes_loop # 32 <= len <= 255
396 # instruction from the loop 281 add $16, len
397 sub $32, arg3 282 jmp .Lhandle_partial_segment # 17 <= len <= 31
398
399 jmp _16B_reduction_loop
400
401
402.align 16
403_less_than_32:
404 # mov initial crc to the return value. this is necessary for
405 # zero-length buffers.
406 mov arg1_low32, %eax
407 test arg3, arg3
408 je _cleanup
409
410 movdqa SHUF_MASK(%rip), %xmm11
411
412 movd arg1_low32, %xmm0 # get the initial crc value
413 pslldq $12, %xmm0 # align it to its correct place
414
415 cmp $16, arg3
416 je _exact_16_left
417 jl _less_than_16_left
418
419 movdqu (arg2), %xmm7 # load the plaintext
420 pshufb %xmm11, %xmm7 # byte-reflect the plaintext
421 pxor %xmm0 , %xmm7 # xor the initial crc value
422 add $16, arg2
423 sub $16, arg3
424 movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
425 jmp _get_last_two_xmms
426
427
428.align 16
429_less_than_16_left:
430 # use stack space to load data less than 16 bytes, zero-out
431 # the 16B in memory first.
432
433 pxor %xmm1, %xmm1
434 mov %rsp, %r11
435 movdqa %xmm1, (%r11)
436
437 cmp $4, arg3
438 jl _only_less_than_4
439
440 # backup the counter value
441 mov arg3, %r9
442 cmp $8, arg3
443 jl _less_than_8_left
444
445 # load 8 Bytes
446 mov (arg2), %rax
447 mov %rax, (%r11)
448 add $8, %r11
449 sub $8, arg3
450 add $8, arg2
451_less_than_8_left:
452
453 cmp $4, arg3
454 jl _less_than_4_left
455
456 # load 4 Bytes
457 mov (arg2), %eax
458 mov %eax, (%r11)
459 add $4, %r11
460 sub $4, arg3
461 add $4, arg2
462_less_than_4_left:
463
464 cmp $2, arg3
465 jl _less_than_2_left
466
467 # load 2 Bytes
468 mov (arg2), %ax
469 mov %ax, (%r11)
470 add $2, %r11
471 sub $2, arg3
472 add $2, arg2
473_less_than_2_left:
474 cmp $1, arg3
475 jl _zero_left
476
477 # load 1 Byte
478 mov (arg2), %al
479 mov %al, (%r11)
480_zero_left:
481 movdqa (%rsp), %xmm7
482 pshufb %xmm11, %xmm7
483 pxor %xmm0 , %xmm7 # xor the initial crc value
484
485 # shl r9, 4
486 lea pshufb_shf_table+16(%rip), %rax
487 sub %r9, %rax
488 movdqu (%rax), %xmm0
489 pxor mask1(%rip), %xmm0
490
491 pshufb %xmm0, %xmm7
492 jmp _128_done
493
494.align 16
495_exact_16_left:
496 movdqu (arg2), %xmm7
497 pshufb %xmm11, %xmm7
498 pxor %xmm0 , %xmm7 # xor the initial crc value
499
500 jmp _128_done
501
502_only_less_than_4:
503 cmp $3, arg3
504 jl _only_less_than_3
505
506 # load 3 Bytes
507 mov (arg2), %al
508 mov %al, (%r11)
509
510 mov 1(arg2), %al
511 mov %al, 1(%r11)
512
513 mov 2(arg2), %al
514 mov %al, 2(%r11)
515
516 movdqa (%rsp), %xmm7
517 pshufb %xmm11, %xmm7
518 pxor %xmm0 , %xmm7 # xor the initial crc value
519
520 psrldq $5, %xmm7
521
522 jmp _barrett
523_only_less_than_3:
524 cmp $2, arg3
525 jl _only_less_than_2
526
527 # load 2 Bytes
528 mov (arg2), %al
529 mov %al, (%r11)
530
531 mov 1(arg2), %al
532 mov %al, 1(%r11)
533
534 movdqa (%rsp), %xmm7
535 pshufb %xmm11, %xmm7
536 pxor %xmm0 , %xmm7 # xor the initial crc value
537
538 psrldq $6, %xmm7
539
540 jmp _barrett
541_only_less_than_2:
542
543 # load 1 Byte
544 mov (arg2), %al
545 mov %al, (%r11)
546
547 movdqa (%rsp), %xmm7
548 pshufb %xmm11, %xmm7
549 pxor %xmm0 , %xmm7 # xor the initial crc value
550
551 psrldq $7, %xmm7
552
553 jmp _barrett
554
555ENDPROC(crc_t10dif_pcl) 283ENDPROC(crc_t10dif_pcl)
556 284
557.section .rodata, "a", @progbits 285.section .rodata, "a", @progbits
558.align 16 286.align 16
559# precomputed constants
560# these constants are precomputed from the poly:
561# 0x8bb70000 (0x8bb7 scaled to 32 bits)
562# Q = 0x18BB70000
563# rk1 = 2^(32*3) mod Q << 32
564# rk2 = 2^(32*5) mod Q << 32
565# rk3 = 2^(32*15) mod Q << 32
566# rk4 = 2^(32*17) mod Q << 32
567# rk5 = 2^(32*3) mod Q << 32
568# rk6 = 2^(32*2) mod Q << 32
569# rk7 = floor(2^64/Q)
570# rk8 = Q
571rk1:
572.quad 0x2d56000000000000
573rk2:
574.quad 0x06df000000000000
575rk3:
576.quad 0x9d9d000000000000
577rk4:
578.quad 0x7cf5000000000000
579rk5:
580.quad 0x2d56000000000000
581rk6:
582.quad 0x1368000000000000
583rk7:
584.quad 0x00000001f65a57f8
585rk8:
586.quad 0x000000018bb70000
587
588rk9:
589.quad 0xceae000000000000
590rk10:
591.quad 0xbfd6000000000000
592rk11:
593.quad 0x1e16000000000000
594rk12:
595.quad 0x713c000000000000
596rk13:
597.quad 0xf7f9000000000000
598rk14:
599.quad 0x80a6000000000000
600rk15:
601.quad 0x044c000000000000
602rk16:
603.quad 0xe658000000000000
604rk17:
605.quad 0xad18000000000000
606rk18:
607.quad 0xa497000000000000
608rk19:
609.quad 0x6ee3000000000000
610rk20:
611.quad 0xe7b5000000000000
612
613 287
288# Fold constants precomputed from the polynomial 0x18bb7
289# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
290.Lfold_across_128_bytes_consts:
291 .quad 0x0000000000006123 # x^(8*128) mod G(x)
292 .quad 0x0000000000002295 # x^(8*128+64) mod G(x)
293.Lfold_across_64_bytes_consts:
294 .quad 0x0000000000001069 # x^(4*128) mod G(x)
295 .quad 0x000000000000dd31 # x^(4*128+64) mod G(x)
296.Lfold_across_32_bytes_consts:
297 .quad 0x000000000000857d # x^(2*128) mod G(x)
298 .quad 0x0000000000007acc # x^(2*128+64) mod G(x)
299.Lfold_across_16_bytes_consts:
300 .quad 0x000000000000a010 # x^(1*128) mod G(x)
301 .quad 0x0000000000001faa # x^(1*128+64) mod G(x)
302.Lfinal_fold_consts:
303 .quad 0x1368000000000000 # x^48 * (x^48 mod G(x))
304 .quad 0x2d56000000000000 # x^48 * (x^80 mod G(x))
305.Lbarrett_reduction_consts:
306 .quad 0x0000000000018bb7 # G(x)
307 .quad 0x00000001f65a57f8 # floor(x^48 / G(x))
614 308
615.section .rodata.cst16.mask1, "aM", @progbits, 16 309.section .rodata.cst16.mask1, "aM", @progbits, 16
616.align 16 310.align 16
617mask1: 311.Lmask1:
618.octa 0x80808080808080808080808080808080 312 .octa 0x80808080808080808080808080808080
619 313
620.section .rodata.cst16.mask2, "aM", @progbits, 16 314.section .rodata.cst16.mask2, "aM", @progbits, 16
621.align 16 315.align 16
622mask2: 316.Lmask2:
623.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF 317 .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
318
319.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
320.align 16
321.Lbswap_mask:
322 .octa 0x000102030405060708090A0B0C0D0E0F
624 323
625.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 324.section .rodata.cst32.byteshift_table, "aM", @progbits, 32
626.align 16 325.align 16
627SHUF_MASK: 326# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
628.octa 0x000102030405060708090A0B0C0D0E0F 327# is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
629 328# 0x80} XOR the index vector to shift right by '16 - len' bytes.
630.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32 329.Lbyteshift_table:
631.align 32 330 .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
632pshufb_shf_table: 331 .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
633# use these values for shift constants for the pshufb instruction 332 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
634# different alignments result in values as shown: 333 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0
635# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
636# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
637# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
638# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
639# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
640# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
641# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
642# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
643# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
644# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
645# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
646# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
647# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
648# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
649# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
650.octa 0x8f8e8d8c8b8a89888786858483828100
651.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index cd4df9322501..0e785c0b2354 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -33,18 +33,12 @@
33#include <asm/cpufeatures.h> 33#include <asm/cpufeatures.h>
34#include <asm/cpu_device_id.h> 34#include <asm/cpu_device_id.h>
35 35
36asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, 36asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len);
37 size_t len);
38 37
39struct chksum_desc_ctx { 38struct chksum_desc_ctx {
40 __u16 crc; 39 __u16 crc;
41}; 40};
42 41
43/*
44 * Steps through buffer one byte at at time, calculates reflected
45 * crc using table.
46 */
47
48static int chksum_init(struct shash_desc *desc) 42static int chksum_init(struct shash_desc *desc)
49{ 43{
50 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 44 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
@@ -59,7 +53,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data,
59{ 53{
60 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 54 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
61 55
62 if (irq_fpu_usable()) { 56 if (length >= 16 && irq_fpu_usable()) {
63 kernel_fpu_begin(); 57 kernel_fpu_begin();
64 ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); 58 ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
65 kernel_fpu_end(); 59 kernel_fpu_end();
@@ -79,7 +73,7 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
79static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, 73static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
80 u8 *out) 74 u8 *out)
81{ 75{
82 if (irq_fpu_usable()) { 76 if (len >= 16 && irq_fpu_usable()) {
83 kernel_fpu_begin(); 77 kernel_fpu_begin();
84 *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); 78 *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
85 kernel_fpu_end(); 79 kernel_fpu_end();
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
index 0dccdda1eb3a..7e600f8bcdad 100644
--- a/arch/x86/crypto/morus1280_glue.c
+++ b/arch/x86/crypto/morus1280_glue.c
@@ -85,31 +85,20 @@ static void crypto_morus1280_glue_process_ad(
85 85
86static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state, 86static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
87 struct morus1280_ops ops, 87 struct morus1280_ops ops,
88 struct aead_request *req) 88 struct skcipher_walk *walk)
89{ 89{
90 struct skcipher_walk walk; 90 while (walk->nbytes >= MORUS1280_BLOCK_SIZE) {
91 u8 *cursor_src, *cursor_dst; 91 ops.crypt_blocks(state, walk->src.virt.addr,
92 unsigned int chunksize, base; 92 walk->dst.virt.addr,
93 93 round_down(walk->nbytes,
94 ops.skcipher_walk_init(&walk, req, false); 94 MORUS1280_BLOCK_SIZE));
95 95 skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE);
96 while (walk.nbytes) { 96 }
97 cursor_src = walk.src.virt.addr;
98 cursor_dst = walk.dst.virt.addr;
99 chunksize = walk.nbytes;
100
101 ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
102
103 base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1);
104 cursor_src += base;
105 cursor_dst += base;
106 chunksize &= MORUS1280_BLOCK_SIZE - 1;
107
108 if (chunksize > 0)
109 ops.crypt_tail(state, cursor_src, cursor_dst,
110 chunksize);
111 97
112 skcipher_walk_done(&walk, 0); 98 if (walk->nbytes) {
99 ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
100 walk->nbytes);
101 skcipher_walk_done(walk, 0);
113 } 102 }
114} 103}
115 104
@@ -147,12 +136,15 @@ static void crypto_morus1280_glue_crypt(struct aead_request *req,
147 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 136 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
148 struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); 137 struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
149 struct morus1280_state state; 138 struct morus1280_state state;
139 struct skcipher_walk walk;
140
141 ops.skcipher_walk_init(&walk, req, true);
150 142
151 kernel_fpu_begin(); 143 kernel_fpu_begin();
152 144
153 ctx->ops->init(&state, &ctx->key, req->iv); 145 ctx->ops->init(&state, &ctx->key, req->iv);
154 crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); 146 crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
155 crypto_morus1280_glue_process_crypt(&state, ops, req); 147 crypto_morus1280_glue_process_crypt(&state, ops, &walk);
156 ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); 148 ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
157 149
158 kernel_fpu_end(); 150 kernel_fpu_end();
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
index 7b58fe4d9bd1..cb3a81732016 100644
--- a/arch/x86/crypto/morus640_glue.c
+++ b/arch/x86/crypto/morus640_glue.c
@@ -85,31 +85,19 @@ static void crypto_morus640_glue_process_ad(
85 85
86static void crypto_morus640_glue_process_crypt(struct morus640_state *state, 86static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
87 struct morus640_ops ops, 87 struct morus640_ops ops,
88 struct aead_request *req) 88 struct skcipher_walk *walk)
89{ 89{
90 struct skcipher_walk walk; 90 while (walk->nbytes >= MORUS640_BLOCK_SIZE) {
91 u8 *cursor_src, *cursor_dst; 91 ops.crypt_blocks(state, walk->src.virt.addr,
92 unsigned int chunksize, base; 92 walk->dst.virt.addr,
93 93 round_down(walk->nbytes, MORUS640_BLOCK_SIZE));
94 ops.skcipher_walk_init(&walk, req, false); 94 skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE);
95 95 }
96 while (walk.nbytes) {
97 cursor_src = walk.src.virt.addr;
98 cursor_dst = walk.dst.virt.addr;
99 chunksize = walk.nbytes;
100
101 ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
102
103 base = chunksize & ~(MORUS640_BLOCK_SIZE - 1);
104 cursor_src += base;
105 cursor_dst += base;
106 chunksize &= MORUS640_BLOCK_SIZE - 1;
107
108 if (chunksize > 0)
109 ops.crypt_tail(state, cursor_src, cursor_dst,
110 chunksize);
111 96
112 skcipher_walk_done(&walk, 0); 97 if (walk->nbytes) {
98 ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
99 walk->nbytes);
100 skcipher_walk_done(walk, 0);
113 } 101 }
114} 102}
115 103
@@ -143,12 +131,15 @@ static void crypto_morus640_glue_crypt(struct aead_request *req,
143 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 131 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
144 struct morus640_ctx *ctx = crypto_aead_ctx(tfm); 132 struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
145 struct morus640_state state; 133 struct morus640_state state;
134 struct skcipher_walk walk;
135
136 ops.skcipher_walk_init(&walk, req, true);
146 137
147 kernel_fpu_begin(); 138 kernel_fpu_begin();
148 139
149 ctx->ops->init(&state, &ctx->key, req->iv); 140 ctx->ops->init(&state, &ctx->key, req->iv);
150 crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); 141 crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
151 crypto_morus640_glue_process_crypt(&state, ops, req); 142 crypto_morus640_glue_process_crypt(&state, ops, &walk);
152 ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); 143 ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
153 144
154 kernel_fpu_end(); 145 kernel_fpu_end();
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index c88c670cb5fc..e6add74d78a5 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -272,6 +272,10 @@ ENTRY(poly1305_block_sse2)
272 dec %rcx 272 dec %rcx
273 jnz .Ldoblock 273 jnz .Ldoblock
274 274
275 # Zeroing of key material
276 mov %rcx,0x00(%rsp)
277 mov %rcx,0x08(%rsp)
278
275 add $0x10,%rsp 279 add $0x10,%rsp
276 pop %r12 280 pop %r12
277 pop %rbx 281 pop %rbx
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 374a19712e20..b684f0294f35 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2278,6 +2278,19 @@ void perf_check_microcode(void)
2278 x86_pmu.check_microcode(); 2278 x86_pmu.check_microcode();
2279} 2279}
2280 2280
2281static int x86_pmu_check_period(struct perf_event *event, u64 value)
2282{
2283 if (x86_pmu.check_period && x86_pmu.check_period(event, value))
2284 return -EINVAL;
2285
2286 if (value && x86_pmu.limit_period) {
2287 if (x86_pmu.limit_period(event, value) > value)
2288 return -EINVAL;
2289 }
2290
2291 return 0;
2292}
2293
2281static struct pmu pmu = { 2294static struct pmu pmu = {
2282 .pmu_enable = x86_pmu_enable, 2295 .pmu_enable = x86_pmu_enable,
2283 .pmu_disable = x86_pmu_disable, 2296 .pmu_disable = x86_pmu_disable,
@@ -2302,6 +2315,7 @@ static struct pmu pmu = {
2302 .event_idx = x86_pmu_event_idx, 2315 .event_idx = x86_pmu_event_idx,
2303 .sched_task = x86_pmu_sched_task, 2316 .sched_task = x86_pmu_sched_task,
2304 .task_ctx_size = sizeof(struct x86_perf_task_context), 2317 .task_ctx_size = sizeof(struct x86_perf_task_context),
2318 .check_period = x86_pmu_check_period,
2305}; 2319};
2306 2320
2307void arch_perf_update_userpage(struct perf_event *event, 2321void arch_perf_update_userpage(struct perf_event *event,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f6..730978dff63f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3559,6 +3559,14 @@ static void free_excl_cntrs(int cpu)
3559 3559
3560static void intel_pmu_cpu_dying(int cpu) 3560static void intel_pmu_cpu_dying(int cpu)
3561{ 3561{
3562 fini_debug_store_on_cpu(cpu);
3563
3564 if (x86_pmu.counter_freezing)
3565 disable_counter_freeze();
3566}
3567
3568static void intel_pmu_cpu_dead(int cpu)
3569{
3562 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3570 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
3563 struct intel_shared_regs *pc; 3571 struct intel_shared_regs *pc;
3564 3572
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
3570 } 3578 }
3571 3579
3572 free_excl_cntrs(cpu); 3580 free_excl_cntrs(cpu);
3573
3574 fini_debug_store_on_cpu(cpu);
3575
3576 if (x86_pmu.counter_freezing)
3577 disable_counter_freeze();
3578} 3581}
3579 3582
3580static void intel_pmu_sched_task(struct perf_event_context *ctx, 3583static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3584,6 +3587,11 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
3584 intel_pmu_lbr_sched_task(ctx, sched_in); 3587 intel_pmu_lbr_sched_task(ctx, sched_in);
3585} 3588}
3586 3589
3590static int intel_pmu_check_period(struct perf_event *event, u64 value)
3591{
3592 return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
3593}
3594
3587PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 3595PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
3588 3596
3589PMU_FORMAT_ATTR(ldlat, "config1:0-15"); 3597PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3663,6 +3671,9 @@ static __initconst const struct x86_pmu core_pmu = {
3663 .cpu_prepare = intel_pmu_cpu_prepare, 3671 .cpu_prepare = intel_pmu_cpu_prepare,
3664 .cpu_starting = intel_pmu_cpu_starting, 3672 .cpu_starting = intel_pmu_cpu_starting,
3665 .cpu_dying = intel_pmu_cpu_dying, 3673 .cpu_dying = intel_pmu_cpu_dying,
3674 .cpu_dead = intel_pmu_cpu_dead,
3675
3676 .check_period = intel_pmu_check_period,
3666}; 3677};
3667 3678
3668static struct attribute *intel_pmu_attrs[]; 3679static struct attribute *intel_pmu_attrs[];
@@ -3703,8 +3714,12 @@ static __initconst const struct x86_pmu intel_pmu = {
3703 .cpu_prepare = intel_pmu_cpu_prepare, 3714 .cpu_prepare = intel_pmu_cpu_prepare,
3704 .cpu_starting = intel_pmu_cpu_starting, 3715 .cpu_starting = intel_pmu_cpu_starting,
3705 .cpu_dying = intel_pmu_cpu_dying, 3716 .cpu_dying = intel_pmu_cpu_dying,
3717 .cpu_dead = intel_pmu_cpu_dead,
3718
3706 .guest_get_msrs = intel_guest_get_msrs, 3719 .guest_get_msrs = intel_guest_get_msrs,
3707 .sched_task = intel_pmu_sched_task, 3720 .sched_task = intel_pmu_sched_task,
3721
3722 .check_period = intel_pmu_check_period,
3708}; 3723};
3709 3724
3710static __init void intel_clovertown_quirk(void) 3725static __init void intel_clovertown_quirk(void)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index c07bee31abe8..b10e04387f38 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
1222 .id_table = snbep_uncore_pci_ids, 1222 .id_table = snbep_uncore_pci_ids,
1223}; 1223};
1224 1224
1225#define NODE_ID_MASK 0x7
1226
1225/* 1227/*
1226 * build pci bus to socket mapping 1228 * build pci bus to socket mapping
1227 */ 1229 */
@@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
1243 err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); 1245 err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
1244 if (err) 1246 if (err)
1245 break; 1247 break;
1246 nodeid = config; 1248 nodeid = config & NODE_ID_MASK;
1247 /* get the Node ID mapping */ 1249 /* get the Node ID mapping */
1248 err = pci_read_config_dword(ubox_dev, idmap_loc, &config); 1250 err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
1249 if (err) 1251 if (err)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78d7b7031bfc..d46fd6754d92 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -646,6 +646,11 @@ struct x86_pmu {
646 * Intel host/guest support (KVM) 646 * Intel host/guest support (KVM)
647 */ 647 */
648 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 648 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
649
650 /*
651 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
652 */
653 int (*check_period) (struct perf_event *event, u64 period);
649}; 654};
650 655
651struct x86_perf_task_context { 656struct x86_perf_task_context {
@@ -857,7 +862,7 @@ static inline int amd_pmu_init(void)
857 862
858#ifdef CONFIG_CPU_SUP_INTEL 863#ifdef CONFIG_CPU_SUP_INTEL
859 864
860static inline bool intel_pmu_has_bts(struct perf_event *event) 865static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period)
861{ 866{
862 struct hw_perf_event *hwc = &event->hw; 867 struct hw_perf_event *hwc = &event->hw;
863 unsigned int hw_event, bts_event; 868 unsigned int hw_event, bts_event;
@@ -868,7 +873,14 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
868 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 873 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
869 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 874 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
870 875
871 return hw_event == bts_event && hwc->sample_period == 1; 876 return hw_event == bts_event && period == 1;
877}
878
879static inline bool intel_pmu_has_bts(struct perf_event *event)
880{
881 struct hw_perf_event *hwc = &event->hw;
882
883 return intel_pmu_has_bts_period(event, hwc->sample_period);
872} 884}
873 885
874int intel_pmu_save_and_restart(struct perf_event *event); 886int intel_pmu_save_and_restart(struct perf_event *event);
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f65b78d32f5e..3c135084e1eb 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -39,82 +39,10 @@
39static int load_aout_binary(struct linux_binprm *); 39static int load_aout_binary(struct linux_binprm *);
40static int load_aout_library(struct file *); 40static int load_aout_library(struct file *);
41 41
42#ifdef CONFIG_COREDUMP
43static int aout_core_dump(struct coredump_params *);
44
45static unsigned long get_dr(int n)
46{
47 struct perf_event *bp = current->thread.ptrace_bps[n];
48 return bp ? bp->hw.info.address : 0;
49}
50
51/*
52 * fill in the user structure for a core dump..
53 */
54static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
55{
56 u32 fs, gs;
57 memset(dump, 0, sizeof(*dump));
58
59/* changed the size calculations - should hopefully work better. lbt */
60 dump->magic = CMAGIC;
61 dump->start_code = 0;
62 dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
63 dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
64 dump->u_dsize = ((unsigned long)
65 (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
66 dump->u_dsize -= dump->u_tsize;
67 dump->u_debugreg[0] = get_dr(0);
68 dump->u_debugreg[1] = get_dr(1);
69 dump->u_debugreg[2] = get_dr(2);
70 dump->u_debugreg[3] = get_dr(3);
71 dump->u_debugreg[6] = current->thread.debugreg6;
72 dump->u_debugreg[7] = current->thread.ptrace_dr7;
73
74 if (dump->start_stack < 0xc0000000) {
75 unsigned long tmp;
76
77 tmp = (unsigned long) (0xc0000000 - dump->start_stack);
78 dump->u_ssize = tmp >> PAGE_SHIFT;
79 }
80
81 dump->regs.ebx = regs->bx;
82 dump->regs.ecx = regs->cx;
83 dump->regs.edx = regs->dx;
84 dump->regs.esi = regs->si;
85 dump->regs.edi = regs->di;
86 dump->regs.ebp = regs->bp;
87 dump->regs.eax = regs->ax;
88 dump->regs.ds = current->thread.ds;
89 dump->regs.es = current->thread.es;
90 savesegment(fs, fs);
91 dump->regs.fs = fs;
92 savesegment(gs, gs);
93 dump->regs.gs = gs;
94 dump->regs.orig_eax = regs->orig_ax;
95 dump->regs.eip = regs->ip;
96 dump->regs.cs = regs->cs;
97 dump->regs.eflags = regs->flags;
98 dump->regs.esp = regs->sp;
99 dump->regs.ss = regs->ss;
100
101#if 1 /* FIXME */
102 dump->u_fpvalid = 0;
103#else
104 dump->u_fpvalid = dump_fpu(regs, &dump->i387);
105#endif
106}
107
108#endif
109
110static struct linux_binfmt aout_format = { 42static struct linux_binfmt aout_format = {
111 .module = THIS_MODULE, 43 .module = THIS_MODULE,
112 .load_binary = load_aout_binary, 44 .load_binary = load_aout_binary,
113 .load_shlib = load_aout_library, 45 .load_shlib = load_aout_library,
114#ifdef CONFIG_COREDUMP
115 .core_dump = aout_core_dump,
116#endif
117 .min_coredump = PAGE_SIZE
118}; 46};
119 47
120static int set_brk(unsigned long start, unsigned long end) 48static int set_brk(unsigned long start, unsigned long end)
@@ -126,91 +54,6 @@ static int set_brk(unsigned long start, unsigned long end)
126 return vm_brk(start, end - start); 54 return vm_brk(start, end - start);
127} 55}
128 56
129#ifdef CONFIG_COREDUMP
130/*
131 * These are the only things you should do on a core-file: use only these
132 * macros to write out all the necessary info.
133 */
134
135#include <linux/coredump.h>
136
137#define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
138#define START_STACK(u) (u.start_stack)
139
140/*
141 * Routine writes a core dump image in the current directory.
142 * Currently only a stub-function.
143 *
144 * Note that setuid/setgid files won't make a core-dump if the uid/gid
145 * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
146 * field, which also makes sure the core-dumps won't be recursive if the
147 * dumping of the process results in another error..
148 */
149
150static int aout_core_dump(struct coredump_params *cprm)
151{
152 mm_segment_t fs;
153 int has_dumped = 0;
154 unsigned long dump_start, dump_size;
155 struct user32 dump;
156
157 fs = get_fs();
158 set_fs(KERNEL_DS);
159 has_dumped = 1;
160 strncpy(dump.u_comm, current->comm, sizeof(current->comm));
161 dump.u_ar0 = offsetof(struct user32, regs);
162 dump.signal = cprm->siginfo->si_signo;
163 dump_thread32(cprm->regs, &dump);
164
165 /*
166 * If the size of the dump file exceeds the rlimit, then see
167 * what would happen if we wrote the stack, but not the data
168 * area.
169 */
170 if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
171 dump.u_dsize = 0;
172
173 /* Make sure we have enough room to write the stack and data areas. */
174 if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
175 dump.u_ssize = 0;
176
177 /* make sure we actually have a data and stack area to dump */
178 set_fs(USER_DS);
179 if (!access_ok((void *) (unsigned long)START_DATA(dump),
180 dump.u_dsize << PAGE_SHIFT))
181 dump.u_dsize = 0;
182 if (!access_ok((void *) (unsigned long)START_STACK(dump),
183 dump.u_ssize << PAGE_SHIFT))
184 dump.u_ssize = 0;
185
186 set_fs(KERNEL_DS);
187 /* struct user */
188 if (!dump_emit(cprm, &dump, sizeof(dump)))
189 goto end_coredump;
190 /* Now dump all of the user data. Include malloced stuff as well */
191 if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
192 goto end_coredump;
193 /* now we start writing out the user space info */
194 set_fs(USER_DS);
195 /* Dump the data area */
196 if (dump.u_dsize != 0) {
197 dump_start = START_DATA(dump);
198 dump_size = dump.u_dsize << PAGE_SHIFT;
199 if (!dump_emit(cprm, (void *)dump_start, dump_size))
200 goto end_coredump;
201 }
202 /* Now prepare to dump the stack area */
203 if (dump.u_ssize != 0) {
204 dump_start = START_STACK(dump);
205 dump_size = dump.u_ssize << PAGE_SHIFT;
206 if (!dump_emit(cprm, (void *)dump_start, dump_size))
207 goto end_coredump;
208 }
209end_coredump:
210 set_fs(fs);
211 return has_dumped;
212}
213#endif
214 57
215/* 58/*
216 * create_aout_tables() parses the env- and arg-strings in new user 59 * create_aout_tables() parses the env- and arg-strings in new user
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
deleted file mode 100644
index 7d3ece8bfb61..000000000000
--- a/arch/x86/include/asm/a.out-core.h
+++ /dev/null
@@ -1,67 +0,0 @@
1/* a.out coredump register dumper
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#ifndef _ASM_X86_A_OUT_CORE_H
13#define _ASM_X86_A_OUT_CORE_H
14
15#ifdef __KERNEL__
16#ifdef CONFIG_X86_32
17
18#include <linux/user.h>
19#include <linux/elfcore.h>
20#include <linux/mm_types.h>
21
22#include <asm/debugreg.h>
23
24/*
25 * fill in the user structure for an a.out core dump
26 */
27static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
28{
29/* changed the size calculations - should hopefully work better. lbt */
30 dump->magic = CMAGIC;
31 dump->start_code = 0;
32 dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
33 dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT;
34 dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1)))
35 >> PAGE_SHIFT;
36 dump->u_dsize -= dump->u_tsize;
37 dump->u_ssize = 0;
38 aout_dump_debugregs(dump);
39
40 if (dump->start_stack < TASK_SIZE)
41 dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
42 >> PAGE_SHIFT;
43
44 dump->regs.bx = regs->bx;
45 dump->regs.cx = regs->cx;
46 dump->regs.dx = regs->dx;
47 dump->regs.si = regs->si;
48 dump->regs.di = regs->di;
49 dump->regs.bp = regs->bp;
50 dump->regs.ax = regs->ax;
51 dump->regs.ds = (u16)regs->ds;
52 dump->regs.es = (u16)regs->es;
53 dump->regs.fs = (u16)regs->fs;
54 dump->regs.gs = get_user_gs(regs);
55 dump->regs.orig_ax = regs->orig_ax;
56 dump->regs.ip = regs->ip;
57 dump->regs.cs = (u16)regs->cs;
58 dump->regs.flags = regs->flags;
59 dump->regs.sp = regs->sp;
60 dump->regs.ss = (u16)regs->ss;
61
62 dump->u_fpvalid = dump_fpu(regs, &dump->i387);
63}
64
65#endif /* CONFIG_X86_32 */
66#endif /* __KERNEL__ */
67#endif /* _ASM_X86_A_OUT_CORE_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 705dafc2d11a..2bdbbbcfa393 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -841,7 +841,7 @@ union hv_gpa_page_range {
841 * count is equal with how many entries of union hv_gpa_page_range can 841 * count is equal with how many entries of union hv_gpa_page_range can
842 * be populated into the input parameter page. 842 * be populated into the input parameter page.
843 */ 843 */
844#define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) / \ 844#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) / \
845 sizeof(union hv_gpa_page_range)) 845 sizeof(union hv_gpa_page_range))
846 846
847struct hv_guest_mapping_flush_list { 847struct hv_guest_mapping_flush_list {
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index d9a9993af882..9f15384c504a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -52,6 +52,8 @@
52 52
53#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 53#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
54 54
55#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
56
55/* "Small Core" Processors (Atom) */ 57/* "Small Core" Processors (Atom) */
56 58
57#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ 59#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4660ce90de7f..180373360e34 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -299,6 +299,7 @@ union kvm_mmu_extended_role {
299 unsigned int cr4_smap:1; 299 unsigned int cr4_smap:1;
300 unsigned int cr4_smep:1; 300 unsigned int cr4_smep:1;
301 unsigned int cr4_la57:1; 301 unsigned int cr4_la57:1;
302 unsigned int maxphyaddr:6;
302 }; 303 };
303}; 304};
304 305
@@ -397,6 +398,7 @@ struct kvm_mmu {
397 void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 398 void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
398 u64 *spte, const void *pte); 399 u64 *spte, const void *pte);
399 hpa_t root_hpa; 400 hpa_t root_hpa;
401 gpa_t root_cr3;
400 union kvm_mmu_role mmu_role; 402 union kvm_mmu_role mmu_role;
401 u8 root_level; 403 u8 root_level;
402 u8 shadow_root_level; 404 u8 shadow_root_level;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e805292..2779ace16d23 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
1065static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, 1065static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1066 pmd_t *pmdp, pmd_t pmd) 1066 pmd_t *pmdp, pmd_t pmd)
1067{ 1067{
1068 native_set_pmd(pmdp, pmd); 1068 set_pmd(pmdp, pmd);
1069} 1069}
1070 1070
1071static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, 1071static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 780f2b42c8ef..5e49a0acb5ee 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,6 @@
25#define KERNEL_DS MAKE_MM_SEG(-1UL) 25#define KERNEL_DS MAKE_MM_SEG(-1UL)
26#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) 26#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
27 27
28#define get_ds() (KERNEL_DS)
29#define get_fs() (current->thread.addr_limit) 28#define get_fs() (current->thread.addr_limit)
30static inline void set_fs(mm_segment_t fs) 29static inline void set_fs(mm_segment_t fs)
31{ 30{
@@ -284,7 +283,7 @@ do { \
284 __put_user_goto(x, ptr, "l", "k", "ir", label); \ 283 __put_user_goto(x, ptr, "l", "k", "ir", label); \
285 break; \ 284 break; \
286 case 8: \ 285 case 8: \
287 __put_user_goto_u64((__typeof__(*ptr))(x), ptr, label); \ 286 __put_user_goto_u64(x, ptr, label); \
288 break; \ 287 break; \
289 default: \ 288 default: \
290 __put_user_bad(); \ 289 __put_user_bad(); \
@@ -431,8 +430,10 @@ do { \
431({ \ 430({ \
432 __label__ __pu_label; \ 431 __label__ __pu_label; \
433 int __pu_err = -EFAULT; \ 432 int __pu_err = -EFAULT; \
433 __typeof__(*(ptr)) __pu_val; \
434 __pu_val = x; \
434 __uaccess_begin(); \ 435 __uaccess_begin(); \
435 __put_user_size((x), (ptr), (size), __pu_label); \ 436 __put_user_size(__pu_val, (ptr), (size), __pu_label); \
436 __pu_err = 0; \ 437 __pu_err = 0; \
437__pu_label: \ 438__pu_label: \
438 __uaccess_end(); \ 439 __uaccess_end(); \
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index e652a7cc6186..3f697a9e3f59 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -48,7 +48,8 @@ enum {
48 BIOS_STATUS_SUCCESS = 0, 48 BIOS_STATUS_SUCCESS = 0,
49 BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, 49 BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
50 BIOS_STATUS_EINVAL = -EINVAL, 50 BIOS_STATUS_EINVAL = -EINVAL,
51 BIOS_STATUS_UNAVAIL = -EBUSY 51 BIOS_STATUS_UNAVAIL = -EBUSY,
52 BIOS_STATUS_ABORT = -EINTR,
52}; 53};
53 54
54/* Address map parameters */ 55/* Address map parameters */
@@ -167,4 +168,9 @@ extern long system_serial_number;
167 168
168extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ 169extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
169 170
171/*
172 * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details
173 */
174extern struct semaphore __efi_uv_runtime_lock;
175
170#endif /* _ASM_X86_UV_BIOS_H */ 176#endif /* _ASM_X86_UV_BIOS_H */
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index f6648e9928b3..efe701b7c6ce 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -3,3 +3,4 @@ include include/uapi/asm-generic/Kbuild.asm
3generated-y += unistd_32.h 3generated-y += unistd_32.h
4generated-y += unistd_64.h 4generated-y += unistd_64.h
5generated-y += unistd_x32.h 5generated-y += unistd_x32.h
6generic-y += socket.h
diff --git a/arch/x86/include/uapi/asm/socket.h b/arch/x86/include/uapi/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/x86/include/uapi/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/socket.h>
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 672c7225cb1b..6ce290c506d9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
784 quirk_no_way_out(i, m, regs); 784 quirk_no_way_out(i, m, regs);
785 785
786 if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { 786 if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
787 m->bank = i;
787 mce_read_aux(m, i); 788 mce_read_aux(m, i);
788 *msg = tmp; 789 *msg = tmp;
789 return 1; 790 return 1;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbffa6c54697..c07958b59f50 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -335,6 +335,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
335 unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; 335 unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
336 unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; 336 unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
337 unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; 337 unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
338 unsigned f_la57 = 0;
338 339
339 /* cpuid 1.edx */ 340 /* cpuid 1.edx */
340 const u32 kvm_cpuid_1_edx_x86_features = 341 const u32 kvm_cpuid_1_edx_x86_features =
@@ -489,7 +490,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
489 // TSC_ADJUST is emulated 490 // TSC_ADJUST is emulated
490 entry->ebx |= F(TSC_ADJUST); 491 entry->ebx |= F(TSC_ADJUST);
491 entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; 492 entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
493 f_la57 = entry->ecx & F(LA57);
492 cpuid_mask(&entry->ecx, CPUID_7_ECX); 494 cpuid_mask(&entry->ecx, CPUID_7_ECX);
495 /* Set LA57 based on hardware capability. */
496 entry->ecx |= f_la57;
493 entry->ecx |= f_umip; 497 entry->ecx |= f_umip;
494 /* PKU is not yet implemented for shadow paging. */ 498 /* PKU is not yet implemented for shadow paging. */
495 if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) 499 if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index da9c42349b1f..f2d1d230d5b8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3555,6 +3555,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
3555 &invalid_list); 3555 &invalid_list);
3556 mmu->root_hpa = INVALID_PAGE; 3556 mmu->root_hpa = INVALID_PAGE;
3557 } 3557 }
3558 mmu->root_cr3 = 0;
3558 } 3559 }
3559 3560
3560 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 3561 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -3610,6 +3611,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3610 vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); 3611 vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
3611 } else 3612 } else
3612 BUG(); 3613 BUG();
3614 vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
3613 3615
3614 return 0; 3616 return 0;
3615} 3617}
@@ -3618,10 +3620,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3618{ 3620{
3619 struct kvm_mmu_page *sp; 3621 struct kvm_mmu_page *sp;
3620 u64 pdptr, pm_mask; 3622 u64 pdptr, pm_mask;
3621 gfn_t root_gfn; 3623 gfn_t root_gfn, root_cr3;
3622 int i; 3624 int i;
3623 3625
3624 root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT; 3626 root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
3627 root_gfn = root_cr3 >> PAGE_SHIFT;
3625 3628
3626 if (mmu_check_root(vcpu, root_gfn)) 3629 if (mmu_check_root(vcpu, root_gfn))
3627 return 1; 3630 return 1;
@@ -3646,7 +3649,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3646 ++sp->root_count; 3649 ++sp->root_count;
3647 spin_unlock(&vcpu->kvm->mmu_lock); 3650 spin_unlock(&vcpu->kvm->mmu_lock);
3648 vcpu->arch.mmu->root_hpa = root; 3651 vcpu->arch.mmu->root_hpa = root;
3649 return 0; 3652 goto set_root_cr3;
3650 } 3653 }
3651 3654
3652 /* 3655 /*
@@ -3712,6 +3715,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3712 vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); 3715 vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
3713 } 3716 }
3714 3717
3718set_root_cr3:
3719 vcpu->arch.mmu->root_cr3 = root_cr3;
3720
3715 return 0; 3721 return 0;
3716} 3722}
3717 3723
@@ -4163,7 +4169,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
4163 struct kvm_mmu_root_info root; 4169 struct kvm_mmu_root_info root;
4164 struct kvm_mmu *mmu = vcpu->arch.mmu; 4170 struct kvm_mmu *mmu = vcpu->arch.mmu;
4165 4171
4166 root.cr3 = mmu->get_cr3(vcpu); 4172 root.cr3 = mmu->root_cr3;
4167 root.hpa = mmu->root_hpa; 4173 root.hpa = mmu->root_hpa;
4168 4174
4169 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { 4175 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
@@ -4176,6 +4182,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
4176 } 4182 }
4177 4183
4178 mmu->root_hpa = root.hpa; 4184 mmu->root_hpa = root.hpa;
4185 mmu->root_cr3 = root.cr3;
4179 4186
4180 return i < KVM_MMU_NUM_PREV_ROOTS; 4187 return i < KVM_MMU_NUM_PREV_ROOTS;
4181} 4188}
@@ -4770,6 +4777,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
4770 ext.cr4_pse = !!is_pse(vcpu); 4777 ext.cr4_pse = !!is_pse(vcpu);
4771 ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE); 4778 ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
4772 ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57); 4779 ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
4780 ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
4773 4781
4774 ext.valid = 1; 4782 ext.valid = 1;
4775 4783
@@ -5516,11 +5524,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
5516 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; 5524 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
5517 5525
5518 vcpu->arch.root_mmu.root_hpa = INVALID_PAGE; 5526 vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
5527 vcpu->arch.root_mmu.root_cr3 = 0;
5519 vcpu->arch.root_mmu.translate_gpa = translate_gpa; 5528 vcpu->arch.root_mmu.translate_gpa = translate_gpa;
5520 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) 5529 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
5521 vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; 5530 vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
5522 5531
5523 vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE; 5532 vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
5533 vcpu->arch.guest_mmu.root_cr3 = 0;
5524 vcpu->arch.guest_mmu.translate_gpa = translate_gpa; 5534 vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
5525 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) 5535 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
5526 vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; 5536 vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d8ea4ebd79e7..d737a51a53ca 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2473,6 +2473,10 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
2473 (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) 2473 (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
2474 return -EINVAL; 2474 return -EINVAL;
2475 2475
2476 if (!nested_cpu_has_preemption_timer(vmcs12) &&
2477 nested_cpu_has_save_preemption_timer(vmcs12))
2478 return -EINVAL;
2479
2476 if (nested_cpu_has_ept(vmcs12) && 2480 if (nested_cpu_has_ept(vmcs12) &&
2477 !valid_ept_address(vcpu, vmcs12->ept_pointer)) 2481 !valid_ept_address(vcpu, vmcs12->ept_pointer))
2478 return -EINVAL; 2482 return -EINVAL;
@@ -5557,9 +5561,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
5557 * secondary cpu-based controls. Do not include those that 5561 * secondary cpu-based controls. Do not include those that
5558 * depend on CPUID bits, they are added later by vmx_cpuid_update. 5562 * depend on CPUID bits, they are added later by vmx_cpuid_update.
5559 */ 5563 */
5560 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, 5564 if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
5561 msrs->secondary_ctls_low, 5565 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
5562 msrs->secondary_ctls_high); 5566 msrs->secondary_ctls_low,
5567 msrs->secondary_ctls_high);
5568
5563 msrs->secondary_ctls_low = 0; 5569 msrs->secondary_ctls_low = 0;
5564 msrs->secondary_ctls_high &= 5570 msrs->secondary_ctls_high &=
5565 SECONDARY_EXEC_DESC | 5571 SECONDARY_EXEC_DESC |
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 95d618045001..30a6bcd735ec 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -863,7 +863,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
863 if (!entry_only) 863 if (!entry_only)
864 j = find_msr(&m->host, msr); 864 j = find_msr(&m->host, msr);
865 865
866 if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) { 866 if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
867 (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) {
867 printk_once(KERN_WARNING "Not enough msr switch entries. " 868 printk_once(KERN_WARNING "Not enough msr switch entries. "
868 "Can't add msr %x\n", msr); 869 "Can't add msr %x\n", msr);
869 return; 870 return;
@@ -1193,21 +1194,6 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1193 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) 1194 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
1194 return; 1195 return;
1195 1196
1196 /*
1197 * First handle the simple case where no cmpxchg is necessary; just
1198 * allow posting non-urgent interrupts.
1199 *
1200 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
1201 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
1202 * expects the VCPU to be on the blocked_vcpu_list that matches
1203 * PI.NDST.
1204 */
1205 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
1206 vcpu->cpu == cpu) {
1207 pi_clear_sn(pi_desc);
1208 return;
1209 }
1210
1211 /* The full case. */ 1197 /* The full case. */
1212 do { 1198 do {
1213 old.control = new.control = pi_desc->control; 1199 old.control = new.control = pi_desc->control;
@@ -1222,6 +1208,17 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1222 new.sn = 0; 1208 new.sn = 0;
1223 } while (cmpxchg64(&pi_desc->control, old.control, 1209 } while (cmpxchg64(&pi_desc->control, old.control,
1224 new.control) != old.control); 1210 new.control) != old.control);
1211
1212 /*
1213 * Clear SN before reading the bitmap. The VT-d firmware
1214 * writes the bitmap and reads SN atomically (5.2.3 in the
1215 * spec), so it doesn't really have a memory barrier that
1216 * pairs with this, but we cannot do that and we need one.
1217 */
1218 smp_mb__after_atomic();
1219
1220 if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
1221 pi_set_on(pi_desc);
1225} 1222}
1226 1223
1227/* 1224/*
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 99328954c2fc..0ac0a64c7790 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -337,16 +337,16 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
337 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); 337 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
338} 338}
339 339
340static inline void pi_clear_sn(struct pi_desc *pi_desc) 340static inline void pi_set_sn(struct pi_desc *pi_desc)
341{ 341{
342 return clear_bit(POSTED_INTR_SN, 342 return set_bit(POSTED_INTR_SN,
343 (unsigned long *)&pi_desc->control); 343 (unsigned long *)&pi_desc->control);
344} 344}
345 345
346static inline void pi_set_sn(struct pi_desc *pi_desc) 346static inline void pi_set_on(struct pi_desc *pi_desc)
347{ 347{
348 return set_bit(POSTED_INTR_SN, 348 set_bit(POSTED_INTR_ON,
349 (unsigned long *)&pi_desc->control); 349 (unsigned long *)&pi_desc->control);
350} 350}
351 351
352static inline void pi_clear_on(struct pi_desc *pi_desc) 352static inline void pi_clear_on(struct pi_desc *pi_desc)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e67ecf25e690..941f932373d0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7801,7 +7801,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
7801 * 1) We should set ->mode before checking ->requests. Please see 7801 * 1) We should set ->mode before checking ->requests. Please see
7802 * the comment in kvm_vcpu_exiting_guest_mode(). 7802 * the comment in kvm_vcpu_exiting_guest_mode().
7803 * 7803 *
7804 * 2) For APICv, we should set ->mode before checking PIR.ON. This 7804 * 2) For APICv, we should set ->mode before checking PID.ON. This
7805 * pairs with the memory barrier implicit in pi_test_and_set_on 7805 * pairs with the memory barrier implicit in pi_test_and_set_on
7806 * (see vmx_deliver_posted_interrupt). 7806 * (see vmx_deliver_posted_interrupt).
7807 * 7807 *
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 6521134057e8..3c4568f8fb28 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -117,67 +117,12 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
117} 117}
118EXPORT_SYMBOL_GPL(ex_handler_fprestore); 118EXPORT_SYMBOL_GPL(ex_handler_fprestore);
119 119
120/* Helper to check whether a uaccess fault indicates a kernel bug. */
121static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
122 unsigned long fault_addr)
123{
124 /* This is the normal case: #PF with a fault address in userspace. */
125 if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
126 return false;
127
128 /*
129 * This code can be reached for machine checks, but only if the #MC
130 * handler has already decided that it looks like a candidate for fixup.
131 * This e.g. happens when attempting to access userspace memory which
132 * the CPU can't access because of uncorrectable bad memory.
133 */
134 if (trapnr == X86_TRAP_MC)
135 return false;
136
137 /*
138 * There are two remaining exception types we might encounter here:
139 * - #PF for faulting accesses to kernel addresses
140 * - #GP for faulting accesses to noncanonical addresses
141 * Complain about anything else.
142 */
143 if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
144 WARN(1, "unexpected trap %d in uaccess\n", trapnr);
145 return false;
146 }
147
148 /*
149 * This is a faulting memory access in kernel space, on a kernel
150 * address, in a usercopy function. This can e.g. be caused by improper
151 * use of helpers like __put_user and by improper attempts to access
152 * userspace addresses in KERNEL_DS regions.
153 * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
154 * which can be invoked from places like kgdb, /dev/mem (for reading)
155 * and privileged BPF code (for reading).
156 * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
157 * to tell us that faulting on kernel addresses, and even noncanonical
158 * addresses, in a userspace accessor does not necessarily imply a
159 * kernel bug, root might just be doing weird stuff.
160 */
161 if (current->kernel_uaccess_faults_ok)
162 return false;
163
164 /* This is bad. Refuse the fixup so that we go into die(). */
165 if (trapnr == X86_TRAP_PF) {
166 pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
167 fault_addr);
168 } else {
169 pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
170 }
171 return true;
172}
173
174__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, 120__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
175 struct pt_regs *regs, int trapnr, 121 struct pt_regs *regs, int trapnr,
176 unsigned long error_code, 122 unsigned long error_code,
177 unsigned long fault_addr) 123 unsigned long fault_addr)
178{ 124{
179 if (bogus_uaccess(regs, trapnr, fault_addr)) 125 WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
180 return false;
181 regs->ip = ex_fixup_addr(fixup); 126 regs->ip = ex_fixup_addr(fixup);
182 return true; 127 return true;
183} 128}
@@ -188,8 +133,6 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
188 unsigned long error_code, 133 unsigned long error_code,
189 unsigned long fault_addr) 134 unsigned long fault_addr)
190{ 135{
191 if (bogus_uaccess(regs, trapnr, fault_addr))
192 return false;
193 /* Special hack for uaccess_err */ 136 /* Special hack for uaccess_err */
194 current->thread.uaccess_err = 1; 137 current->thread.uaccess_err = 1;
195 regs->ip = ex_fixup_addr(fixup); 138 regs->ip = ex_fixup_addr(fixup);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4f8972311a77..14e6119838a6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn)
230 230
231#endif 231#endif
232 232
233/*
234 * See set_mce_nospec().
235 *
236 * Machine check recovery code needs to change cache mode of poisoned pages to
237 * UC to avoid speculative access logging another error. But passing the
238 * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
239 * speculative access. So we cheat and flip the top bit of the address. This
240 * works fine for the code that updates the page tables. But at the end of the
241 * process we need to flush the TLB and cache and the non-canonical address
242 * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
243 *
244 * But in the common case we already have a canonical address. This code
245 * will fix the top bit if needed and is a no-op otherwise.
246 */
247static inline unsigned long fix_addr(unsigned long addr)
248{
249#ifdef CONFIG_X86_64
250 return (long)(addr << 1) >> 1;
251#else
252 return addr;
253#endif
254}
255
233static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) 256static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
234{ 257{
235 if (cpa->flags & CPA_PAGES_ARRAY) { 258 if (cpa->flags & CPA_PAGES_ARRAY) {
@@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data)
313 unsigned int i; 336 unsigned int i;
314 337
315 for (i = 0; i < cpa->numpages; i++) 338 for (i = 0; i < cpa->numpages; i++)
316 __flush_tlb_one_kernel(__cpa_addr(cpa, i)); 339 __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
317} 340}
318 341
319static void cpa_flush(struct cpa_data *data, int cache) 342static void cpa_flush(struct cpa_data *data, int cache)
@@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
347 * Only flush present addresses: 370 * Only flush present addresses:
348 */ 371 */
349 if (pte && (pte_val(*pte) & _PAGE_PRESENT)) 372 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
350 clflush_cache_range_opt((void *)addr, PAGE_SIZE); 373 clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
351 } 374 }
352 mb(); 375 mb();
353} 376}
@@ -1627,29 +1650,6 @@ out:
1627 return ret; 1650 return ret;
1628} 1651}
1629 1652
1630/*
1631 * Machine check recovery code needs to change cache mode of poisoned
1632 * pages to UC to avoid speculative access logging another error. But
1633 * passing the address of the 1:1 mapping to set_memory_uc() is a fine
1634 * way to encourage a speculative access. So we cheat and flip the top
1635 * bit of the address. This works fine for the code that updates the
1636 * page tables. But at the end of the process we need to flush the cache
1637 * and the non-canonical address causes a #GP fault when used by the
1638 * CLFLUSH instruction.
1639 *
1640 * But in the common case we already have a canonical address. This code
1641 * will fix the top bit if needed and is a no-op otherwise.
1642 */
1643static inline unsigned long make_addr_canonical_again(unsigned long addr)
1644{
1645#ifdef CONFIG_X86_64
1646 return (long)(addr << 1) >> 1;
1647#else
1648 return addr;
1649#endif
1650}
1651
1652
1653static int change_page_attr_set_clr(unsigned long *addr, int numpages, 1653static int change_page_attr_set_clr(unsigned long *addr, int numpages,
1654 pgprot_t mask_set, pgprot_t mask_clr, 1654 pgprot_t mask_set, pgprot_t mask_clr,
1655 int force_split, int in_flag, 1655 int force_split, int in_flag,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5542303c43d9..afabf597c855 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -881,20 +881,41 @@ xadd: if (is_imm8(insn->off))
881 case BPF_JMP | BPF_JSLT | BPF_X: 881 case BPF_JMP | BPF_JSLT | BPF_X:
882 case BPF_JMP | BPF_JSGE | BPF_X: 882 case BPF_JMP | BPF_JSGE | BPF_X:
883 case BPF_JMP | BPF_JSLE | BPF_X: 883 case BPF_JMP | BPF_JSLE | BPF_X:
884 case BPF_JMP32 | BPF_JEQ | BPF_X:
885 case BPF_JMP32 | BPF_JNE | BPF_X:
886 case BPF_JMP32 | BPF_JGT | BPF_X:
887 case BPF_JMP32 | BPF_JLT | BPF_X:
888 case BPF_JMP32 | BPF_JGE | BPF_X:
889 case BPF_JMP32 | BPF_JLE | BPF_X:
890 case BPF_JMP32 | BPF_JSGT | BPF_X:
891 case BPF_JMP32 | BPF_JSLT | BPF_X:
892 case BPF_JMP32 | BPF_JSGE | BPF_X:
893 case BPF_JMP32 | BPF_JSLE | BPF_X:
884 /* cmp dst_reg, src_reg */ 894 /* cmp dst_reg, src_reg */
885 EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, 895 if (BPF_CLASS(insn->code) == BPF_JMP)
886 add_2reg(0xC0, dst_reg, src_reg)); 896 EMIT1(add_2mod(0x48, dst_reg, src_reg));
897 else if (is_ereg(dst_reg) || is_ereg(src_reg))
898 EMIT1(add_2mod(0x40, dst_reg, src_reg));
899 EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
887 goto emit_cond_jmp; 900 goto emit_cond_jmp;
888 901
889 case BPF_JMP | BPF_JSET | BPF_X: 902 case BPF_JMP | BPF_JSET | BPF_X:
903 case BPF_JMP32 | BPF_JSET | BPF_X:
890 /* test dst_reg, src_reg */ 904 /* test dst_reg, src_reg */
891 EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, 905 if (BPF_CLASS(insn->code) == BPF_JMP)
892 add_2reg(0xC0, dst_reg, src_reg)); 906 EMIT1(add_2mod(0x48, dst_reg, src_reg));
907 else if (is_ereg(dst_reg) || is_ereg(src_reg))
908 EMIT1(add_2mod(0x40, dst_reg, src_reg));
909 EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
893 goto emit_cond_jmp; 910 goto emit_cond_jmp;
894 911
895 case BPF_JMP | BPF_JSET | BPF_K: 912 case BPF_JMP | BPF_JSET | BPF_K:
913 case BPF_JMP32 | BPF_JSET | BPF_K:
896 /* test dst_reg, imm32 */ 914 /* test dst_reg, imm32 */
897 EMIT1(add_1mod(0x48, dst_reg)); 915 if (BPF_CLASS(insn->code) == BPF_JMP)
916 EMIT1(add_1mod(0x48, dst_reg));
917 else if (is_ereg(dst_reg))
918 EMIT1(add_1mod(0x40, dst_reg));
898 EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); 919 EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
899 goto emit_cond_jmp; 920 goto emit_cond_jmp;
900 921
@@ -908,8 +929,21 @@ xadd: if (is_imm8(insn->off))
908 case BPF_JMP | BPF_JSLT | BPF_K: 929 case BPF_JMP | BPF_JSLT | BPF_K:
909 case BPF_JMP | BPF_JSGE | BPF_K: 930 case BPF_JMP | BPF_JSGE | BPF_K:
910 case BPF_JMP | BPF_JSLE | BPF_K: 931 case BPF_JMP | BPF_JSLE | BPF_K:
932 case BPF_JMP32 | BPF_JEQ | BPF_K:
933 case BPF_JMP32 | BPF_JNE | BPF_K:
934 case BPF_JMP32 | BPF_JGT | BPF_K:
935 case BPF_JMP32 | BPF_JLT | BPF_K:
936 case BPF_JMP32 | BPF_JGE | BPF_K:
937 case BPF_JMP32 | BPF_JLE | BPF_K:
938 case BPF_JMP32 | BPF_JSGT | BPF_K:
939 case BPF_JMP32 | BPF_JSLT | BPF_K:
940 case BPF_JMP32 | BPF_JSGE | BPF_K:
941 case BPF_JMP32 | BPF_JSLE | BPF_K:
911 /* cmp dst_reg, imm8/32 */ 942 /* cmp dst_reg, imm8/32 */
912 EMIT1(add_1mod(0x48, dst_reg)); 943 if (BPF_CLASS(insn->code) == BPF_JMP)
944 EMIT1(add_1mod(0x48, dst_reg));
945 else if (is_ereg(dst_reg))
946 EMIT1(add_1mod(0x40, dst_reg));
913 947
914 if (is_imm8(imm32)) 948 if (is_imm8(imm32))
915 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); 949 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 8f6cc71e0848..0d9cdffce6ac 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2072,7 +2072,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
2072 case BPF_JMP | BPF_JSGT | BPF_X: 2072 case BPF_JMP | BPF_JSGT | BPF_X:
2073 case BPF_JMP | BPF_JSLE | BPF_X: 2073 case BPF_JMP | BPF_JSLE | BPF_X:
2074 case BPF_JMP | BPF_JSLT | BPF_X: 2074 case BPF_JMP | BPF_JSLT | BPF_X:
2075 case BPF_JMP | BPF_JSGE | BPF_X: { 2075 case BPF_JMP | BPF_JSGE | BPF_X:
2076 case BPF_JMP32 | BPF_JEQ | BPF_X:
2077 case BPF_JMP32 | BPF_JNE | BPF_X:
2078 case BPF_JMP32 | BPF_JGT | BPF_X:
2079 case BPF_JMP32 | BPF_JLT | BPF_X:
2080 case BPF_JMP32 | BPF_JGE | BPF_X:
2081 case BPF_JMP32 | BPF_JLE | BPF_X:
2082 case BPF_JMP32 | BPF_JSGT | BPF_X:
2083 case BPF_JMP32 | BPF_JSLE | BPF_X:
2084 case BPF_JMP32 | BPF_JSLT | BPF_X:
2085 case BPF_JMP32 | BPF_JSGE | BPF_X: {
2086 bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2076 u8 dreg_lo = dstk ? IA32_EAX : dst_lo; 2087 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2077 u8 dreg_hi = dstk ? IA32_EDX : dst_hi; 2088 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2078 u8 sreg_lo = sstk ? IA32_ECX : src_lo; 2089 u8 sreg_lo = sstk ? IA32_ECX : src_lo;
@@ -2081,25 +2092,35 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
2081 if (dstk) { 2092 if (dstk) {
2082 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), 2093 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2083 STACK_VAR(dst_lo)); 2094 STACK_VAR(dst_lo));
2084 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), 2095 if (is_jmp64)
2085 STACK_VAR(dst_hi)); 2096 EMIT3(0x8B,
2097 add_2reg(0x40, IA32_EBP,
2098 IA32_EDX),
2099 STACK_VAR(dst_hi));
2086 } 2100 }
2087 2101
2088 if (sstk) { 2102 if (sstk) {
2089 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), 2103 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2090 STACK_VAR(src_lo)); 2104 STACK_VAR(src_lo));
2091 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), 2105 if (is_jmp64)
2092 STACK_VAR(src_hi)); 2106 EMIT3(0x8B,
2107 add_2reg(0x40, IA32_EBP,
2108 IA32_EBX),
2109 STACK_VAR(src_hi));
2093 } 2110 }
2094 2111
2095 /* cmp dreg_hi,sreg_hi */ 2112 if (is_jmp64) {
2096 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); 2113 /* cmp dreg_hi,sreg_hi */
2097 EMIT2(IA32_JNE, 2); 2114 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2115 EMIT2(IA32_JNE, 2);
2116 }
2098 /* cmp dreg_lo,sreg_lo */ 2117 /* cmp dreg_lo,sreg_lo */
2099 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); 2118 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2100 goto emit_cond_jmp; 2119 goto emit_cond_jmp;
2101 } 2120 }
2102 case BPF_JMP | BPF_JSET | BPF_X: { 2121 case BPF_JMP | BPF_JSET | BPF_X:
2122 case BPF_JMP32 | BPF_JSET | BPF_X: {
2123 bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2103 u8 dreg_lo = dstk ? IA32_EAX : dst_lo; 2124 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2104 u8 dreg_hi = dstk ? IA32_EDX : dst_hi; 2125 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2105 u8 sreg_lo = sstk ? IA32_ECX : src_lo; 2126 u8 sreg_lo = sstk ? IA32_ECX : src_lo;
@@ -2108,15 +2129,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
2108 if (dstk) { 2129 if (dstk) {
2109 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), 2130 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2110 STACK_VAR(dst_lo)); 2131 STACK_VAR(dst_lo));
2111 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), 2132 if (is_jmp64)
2112 STACK_VAR(dst_hi)); 2133 EMIT3(0x8B,
2134 add_2reg(0x40, IA32_EBP,
2135 IA32_EDX),
2136 STACK_VAR(dst_hi));
2113 } 2137 }
2114 2138
2115 if (sstk) { 2139 if (sstk) {
2116 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), 2140 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2117 STACK_VAR(src_lo)); 2141 STACK_VAR(src_lo));
2118 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), 2142 if (is_jmp64)
2119 STACK_VAR(src_hi)); 2143 EMIT3(0x8B,
2144 add_2reg(0x40, IA32_EBP,
2145 IA32_EBX),
2146 STACK_VAR(src_hi));
2120 } 2147 }
2121 /* and dreg_lo,sreg_lo */ 2148 /* and dreg_lo,sreg_lo */
2122 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); 2149 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
@@ -2126,32 +2153,39 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
2126 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); 2153 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2127 goto emit_cond_jmp; 2154 goto emit_cond_jmp;
2128 } 2155 }
2129 case BPF_JMP | BPF_JSET | BPF_K: { 2156 case BPF_JMP | BPF_JSET | BPF_K:
2130 u32 hi; 2157 case BPF_JMP32 | BPF_JSET | BPF_K: {
2158 bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2131 u8 dreg_lo = dstk ? IA32_EAX : dst_lo; 2159 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2132 u8 dreg_hi = dstk ? IA32_EDX : dst_hi; 2160 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2133 u8 sreg_lo = IA32_ECX; 2161 u8 sreg_lo = IA32_ECX;
2134 u8 sreg_hi = IA32_EBX; 2162 u8 sreg_hi = IA32_EBX;
2163 u32 hi;
2135 2164
2136 if (dstk) { 2165 if (dstk) {
2137 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), 2166 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2138 STACK_VAR(dst_lo)); 2167 STACK_VAR(dst_lo));
2139 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), 2168 if (is_jmp64)
2140 STACK_VAR(dst_hi)); 2169 EMIT3(0x8B,
2170 add_2reg(0x40, IA32_EBP,
2171 IA32_EDX),
2172 STACK_VAR(dst_hi));
2141 } 2173 }
2142 hi = imm32 & (1<<31) ? (u32)~0 : 0;
2143 2174
2144 /* mov ecx,imm32 */ 2175 /* mov ecx,imm32 */
2145 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); 2176 EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
2146 /* mov ebx,imm32 */
2147 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2148 2177
2149 /* and dreg_lo,sreg_lo */ 2178 /* and dreg_lo,sreg_lo */
2150 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); 2179 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2151 /* and dreg_hi,sreg_hi */ 2180 if (is_jmp64) {
2152 EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); 2181 hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2153 /* or dreg_lo,dreg_hi */ 2182 /* mov ebx,imm32 */
2154 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); 2183 EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
2184 /* and dreg_hi,sreg_hi */
2185 EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2186 /* or dreg_lo,dreg_hi */
2187 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2188 }
2155 goto emit_cond_jmp; 2189 goto emit_cond_jmp;
2156 } 2190 }
2157 case BPF_JMP | BPF_JEQ | BPF_K: 2191 case BPF_JMP | BPF_JEQ | BPF_K:
@@ -2163,29 +2197,44 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
2163 case BPF_JMP | BPF_JSGT | BPF_K: 2197 case BPF_JMP | BPF_JSGT | BPF_K:
2164 case BPF_JMP | BPF_JSLE | BPF_K: 2198 case BPF_JMP | BPF_JSLE | BPF_K:
2165 case BPF_JMP | BPF_JSLT | BPF_K: 2199 case BPF_JMP | BPF_JSLT | BPF_K:
2166 case BPF_JMP | BPF_JSGE | BPF_K: { 2200 case BPF_JMP | BPF_JSGE | BPF_K:
2167 u32 hi; 2201 case BPF_JMP32 | BPF_JEQ | BPF_K:
2202 case BPF_JMP32 | BPF_JNE | BPF_K:
2203 case BPF_JMP32 | BPF_JGT | BPF_K:
2204 case BPF_JMP32 | BPF_JLT | BPF_K:
2205 case BPF_JMP32 | BPF_JGE | BPF_K:
2206 case BPF_JMP32 | BPF_JLE | BPF_K:
2207 case BPF_JMP32 | BPF_JSGT | BPF_K:
2208 case BPF_JMP32 | BPF_JSLE | BPF_K:
2209 case BPF_JMP32 | BPF_JSLT | BPF_K:
2210 case BPF_JMP32 | BPF_JSGE | BPF_K: {
2211 bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2168 u8 dreg_lo = dstk ? IA32_EAX : dst_lo; 2212 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2169 u8 dreg_hi = dstk ? IA32_EDX : dst_hi; 2213 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2170 u8 sreg_lo = IA32_ECX; 2214 u8 sreg_lo = IA32_ECX;
2171 u8 sreg_hi = IA32_EBX; 2215 u8 sreg_hi = IA32_EBX;
2216 u32 hi;
2172 2217
2173 if (dstk) { 2218 if (dstk) {
2174 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), 2219 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2175 STACK_VAR(dst_lo)); 2220 STACK_VAR(dst_lo));
2176 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), 2221 if (is_jmp64)
2177 STACK_VAR(dst_hi)); 2222 EMIT3(0x8B,
2223 add_2reg(0x40, IA32_EBP,
2224 IA32_EDX),
2225 STACK_VAR(dst_hi));
2178 } 2226 }
2179 2227
2180 hi = imm32 & (1<<31) ? (u32)~0 : 0;
2181 /* mov ecx,imm32 */ 2228 /* mov ecx,imm32 */
2182 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); 2229 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2183 /* mov ebx,imm32 */ 2230 if (is_jmp64) {
2184 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); 2231 hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2185 2232 /* mov ebx,imm32 */
2186 /* cmp dreg_hi,sreg_hi */ 2233 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2187 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); 2234 /* cmp dreg_hi,sreg_hi */
2188 EMIT2(IA32_JNE, 2); 2235 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2236 EMIT2(IA32_JNE, 2);
2237 }
2189 /* cmp dreg_lo,sreg_lo */ 2238 /* cmp dreg_lo,sreg_lo */
2190 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); 2239 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2191 2240
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
index 96f438d4b026..1421d5330b2c 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -44,7 +44,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = {
44 */ 44 */
45 .microvolts = 2000000, /* 1.8V */ 45 .microvolts = 2000000, /* 1.8V */
46 .startup_delay = 250 * 1000, /* 250ms */ 46 .startup_delay = 250 * 1000, /* 250ms */
47 .enable_high = 1, /* active high */
48 .enabled_at_boot = 0, /* disabled at boot */ 47 .enabled_at_boot = 0, /* disabled at boot */
49 .init_data = &bcm43xx_vmmc_data, 48 .init_data = &bcm43xx_vmmc_data,
50}; 49};
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 4a6a5a26c582..eb33432f2f24 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -29,7 +29,8 @@
29 29
30struct uv_systab *uv_systab; 30struct uv_systab *uv_systab;
31 31
32s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 32static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
33 u64 a4, u64 a5)
33{ 34{
34 struct uv_systab *tab = uv_systab; 35 struct uv_systab *tab = uv_systab;
35 s64 ret; 36 s64 ret;
@@ -51,6 +52,19 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
51 52
52 return ret; 53 return ret;
53} 54}
55
56s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
57{
58 s64 ret;
59
60 if (down_interruptible(&__efi_uv_runtime_lock))
61 return BIOS_STATUS_ABORT;
62
63 ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
64 up(&__efi_uv_runtime_lock);
65
66 return ret;
67}
54EXPORT_SYMBOL_GPL(uv_bios_call); 68EXPORT_SYMBOL_GPL(uv_bios_call);
55 69
56s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, 70s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -59,10 +73,15 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
59 unsigned long bios_flags; 73 unsigned long bios_flags;
60 s64 ret; 74 s64 ret;
61 75
76 if (down_interruptible(&__efi_uv_runtime_lock))
77 return BIOS_STATUS_ABORT;
78
62 local_irq_save(bios_flags); 79 local_irq_save(bios_flags);
63 ret = uv_bios_call(which, a1, a2, a3, a4, a5); 80 ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
64 local_irq_restore(bios_flags); 81 local_irq_restore(bios_flags);
65 82
83 up(&__efi_uv_runtime_lock);
84
66 return ret; 85 return ret;
67} 86}
68 87
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index f518b4744ff8..494eeb51e4e1 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -16,7 +16,6 @@ config 64BIT
16 16
17config X86_32 17config X86_32
18 def_bool !64BIT 18 def_bool !64BIT
19 select HAVE_AOUT
20 select ARCH_WANT_IPC_PARSE_VERSION 19 select ARCH_WANT_IPC_PARSE_VERSION
21 select MODULES_USE_ELF_REL 20 select MODULES_USE_ELF_REL
22 select CLONE_BACKWARDS 21 select CLONE_BACKWARDS