diff options
Diffstat (limited to 'arch/x86')
39 files changed, 627 insertions, 1100 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 68261430fe6e..ade12ec4224b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -14,7 +14,6 @@ config X86_32 | |||
| 14 | select ARCH_WANT_IPC_PARSE_VERSION | 14 | select ARCH_WANT_IPC_PARSE_VERSION |
| 15 | select CLKSRC_I8253 | 15 | select CLKSRC_I8253 |
| 16 | select CLONE_BACKWARDS | 16 | select CLONE_BACKWARDS |
| 17 | select HAVE_AOUT | ||
| 18 | select HAVE_GENERIC_DMA_COHERENT | 17 | select HAVE_GENERIC_DMA_COHERENT |
| 19 | select MODULES_USE_ELF_REL | 18 | select MODULES_USE_ELF_REL |
| 20 | select OLD_SIGACTION | 19 | select OLD_SIGACTION |
| @@ -2843,6 +2842,7 @@ config IA32_EMULATION | |||
| 2843 | config IA32_AOUT | 2842 | config IA32_AOUT |
| 2844 | tristate "IA32 a.out support" | 2843 | tristate "IA32 a.out support" |
| 2845 | depends on IA32_EMULATION | 2844 | depends on IA32_EMULATION |
| 2845 | depends on BROKEN | ||
| 2846 | ---help--- | 2846 | ---help--- |
| 2847 | Support old a.out binaries in the 32bit emulation. | 2847 | Support old a.out binaries in the 32bit emulation. |
| 2848 | 2848 | ||
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index f105ae8651c9..f62e347862cc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
| @@ -602,10 +602,12 @@ ENTRY(trampoline_32bit_src) | |||
| 602 | 3: | 602 | 3: |
| 603 | /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ | 603 | /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ |
| 604 | pushl %ecx | 604 | pushl %ecx |
| 605 | pushl %edx | ||
| 605 | movl $MSR_EFER, %ecx | 606 | movl $MSR_EFER, %ecx |
| 606 | rdmsr | 607 | rdmsr |
| 607 | btsl $_EFER_LME, %eax | 608 | btsl $_EFER_LME, %eax |
| 608 | wrmsr | 609 | wrmsr |
| 610 | popl %edx | ||
| 609 | popl %ecx | 611 | popl %ecx |
| 610 | 612 | ||
| 611 | /* Enable PAE and LA57 (if required) paging modes */ | 613 | /* Enable PAE and LA57 (if required) paging modes */ |
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 2a356b948720..3ea71b871813 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c | |||
| @@ -119,31 +119,20 @@ static void crypto_aegis128_aesni_process_ad( | |||
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | static void crypto_aegis128_aesni_process_crypt( | 121 | static void crypto_aegis128_aesni_process_crypt( |
| 122 | struct aegis_state *state, struct aead_request *req, | 122 | struct aegis_state *state, struct skcipher_walk *walk, |
| 123 | const struct aegis_crypt_ops *ops) | 123 | const struct aegis_crypt_ops *ops) |
| 124 | { | 124 | { |
| 125 | struct skcipher_walk walk; | 125 | while (walk->nbytes >= AEGIS128_BLOCK_SIZE) { |
| 126 | u8 *src, *dst; | 126 | ops->crypt_blocks(state, |
| 127 | unsigned int chunksize, base; | 127 | round_down(walk->nbytes, AEGIS128_BLOCK_SIZE), |
| 128 | 128 | walk->src.virt.addr, walk->dst.virt.addr); | |
| 129 | ops->skcipher_walk_init(&walk, req, false); | 129 | skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); |
| 130 | 130 | } | |
| 131 | while (walk.nbytes) { | ||
| 132 | src = walk.src.virt.addr; | ||
| 133 | dst = walk.dst.virt.addr; | ||
| 134 | chunksize = walk.nbytes; | ||
| 135 | |||
| 136 | ops->crypt_blocks(state, chunksize, src, dst); | ||
| 137 | |||
| 138 | base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1); | ||
| 139 | src += base; | ||
| 140 | dst += base; | ||
| 141 | chunksize &= AEGIS128_BLOCK_SIZE - 1; | ||
| 142 | |||
| 143 | if (chunksize > 0) | ||
| 144 | ops->crypt_tail(state, chunksize, src, dst); | ||
| 145 | 131 | ||
| 146 | skcipher_walk_done(&walk, 0); | 132 | if (walk->nbytes) { |
| 133 | ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, | ||
| 134 | walk->dst.virt.addr); | ||
| 135 | skcipher_walk_done(walk, 0); | ||
| 147 | } | 136 | } |
| 148 | } | 137 | } |
| 149 | 138 | ||
| @@ -186,13 +175,16 @@ static void crypto_aegis128_aesni_crypt(struct aead_request *req, | |||
| 186 | { | 175 | { |
| 187 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 176 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
| 188 | struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); | 177 | struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); |
| 178 | struct skcipher_walk walk; | ||
| 189 | struct aegis_state state; | 179 | struct aegis_state state; |
| 190 | 180 | ||
| 181 | ops->skcipher_walk_init(&walk, req, true); | ||
| 182 | |||
| 191 | kernel_fpu_begin(); | 183 | kernel_fpu_begin(); |
| 192 | 184 | ||
| 193 | crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); | 185 | crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); |
| 194 | crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); | 186 | crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); |
| 195 | crypto_aegis128_aesni_process_crypt(&state, req, ops); | 187 | crypto_aegis128_aesni_process_crypt(&state, &walk, ops); |
| 196 | crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); | 188 | crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); |
| 197 | 189 | ||
| 198 | kernel_fpu_end(); | 190 | kernel_fpu_end(); |
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c index dbe8bb980da1..1b1b39c66c5e 100644 --- a/arch/x86/crypto/aegis128l-aesni-glue.c +++ b/arch/x86/crypto/aegis128l-aesni-glue.c | |||
| @@ -119,31 +119,20 @@ static void crypto_aegis128l_aesni_process_ad( | |||
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | static void crypto_aegis128l_aesni_process_crypt( | 121 | static void crypto_aegis128l_aesni_process_crypt( |
| 122 | struct aegis_state *state, struct aead_request *req, | 122 | struct aegis_state *state, struct skcipher_walk *walk, |
| 123 | const struct aegis_crypt_ops *ops) | 123 | const struct aegis_crypt_ops *ops) |
| 124 | { | 124 | { |
| 125 | struct skcipher_walk walk; | 125 | while (walk->nbytes >= AEGIS128L_BLOCK_SIZE) { |
| 126 | u8 *src, *dst; | 126 | ops->crypt_blocks(state, round_down(walk->nbytes, |
| 127 | unsigned int chunksize, base; | 127 | AEGIS128L_BLOCK_SIZE), |
| 128 | 128 | walk->src.virt.addr, walk->dst.virt.addr); | |
| 129 | ops->skcipher_walk_init(&walk, req, false); | 129 | skcipher_walk_done(walk, walk->nbytes % AEGIS128L_BLOCK_SIZE); |
| 130 | 130 | } | |
| 131 | while (walk.nbytes) { | ||
| 132 | src = walk.src.virt.addr; | ||
| 133 | dst = walk.dst.virt.addr; | ||
| 134 | chunksize = walk.nbytes; | ||
| 135 | |||
| 136 | ops->crypt_blocks(state, chunksize, src, dst); | ||
| 137 | |||
| 138 | base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1); | ||
| 139 | src += base; | ||
| 140 | dst += base; | ||
| 141 | chunksize &= AEGIS128L_BLOCK_SIZE - 1; | ||
| 142 | |||
| 143 | if (chunksize > 0) | ||
| 144 | ops->crypt_tail(state, chunksize, src, dst); | ||
| 145 | 131 | ||
| 146 | skcipher_walk_done(&walk, 0); | 132 | if (walk->nbytes) { |
| 133 | ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, | ||
| 134 | walk->dst.virt.addr); | ||
| 135 | skcipher_walk_done(walk, 0); | ||
| 147 | } | 136 | } |
| 148 | } | 137 | } |
| 149 | 138 | ||
| @@ -186,13 +175,16 @@ static void crypto_aegis128l_aesni_crypt(struct aead_request *req, | |||
| 186 | { | 175 | { |
| 187 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 176 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
| 188 | struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm); | 177 | struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm); |
| 178 | struct skcipher_walk walk; | ||
| 189 | struct aegis_state state; | 179 | struct aegis_state state; |
| 190 | 180 | ||
| 181 | ops->skcipher_walk_init(&walk, req, true); | ||
| 182 | |||
| 191 | kernel_fpu_begin(); | 183 | kernel_fpu_begin(); |
| 192 | 184 | ||
| 193 | crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv); | 185 | crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv); |
| 194 | crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen); | 186 | crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen); |
| 195 | crypto_aegis128l_aesni_process_crypt(&state, req, ops); | 187 | crypto_aegis128l_aesni_process_crypt(&state, &walk, ops); |
| 196 | crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen); | 188 | crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen); |
| 197 | 189 | ||
| 198 | kernel_fpu_end(); | 190 | kernel_fpu_end(); |
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c index 8bebda2de92f..6227ca3220a0 100644 --- a/arch/x86/crypto/aegis256-aesni-glue.c +++ b/arch/x86/crypto/aegis256-aesni-glue.c | |||
| @@ -119,31 +119,20 @@ static void crypto_aegis256_aesni_process_ad( | |||
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | static void crypto_aegis256_aesni_process_crypt( | 121 | static void crypto_aegis256_aesni_process_crypt( |
| 122 | struct aegis_state *state, struct aead_request *req, | 122 | struct aegis_state *state, struct skcipher_walk *walk, |
| 123 | const struct aegis_crypt_ops *ops) | 123 | const struct aegis_crypt_ops *ops) |
| 124 | { | 124 | { |
| 125 | struct skcipher_walk walk; | 125 | while (walk->nbytes >= AEGIS256_BLOCK_SIZE) { |
| 126 | u8 *src, *dst; | 126 | ops->crypt_blocks(state, |
| 127 | unsigned int chunksize, base; | 127 | round_down(walk->nbytes, AEGIS256_BLOCK_SIZE), |
| 128 | 128 | walk->src.virt.addr, walk->dst.virt.addr); | |
| 129 | ops->skcipher_walk_init(&walk, req, false); | 129 | skcipher_walk_done(walk, walk->nbytes % AEGIS256_BLOCK_SIZE); |
| 130 | 130 | } | |
| 131 | while (walk.nbytes) { | ||
| 132 | src = walk.src.virt.addr; | ||
| 133 | dst = walk.dst.virt.addr; | ||
| 134 | chunksize = walk.nbytes; | ||
| 135 | |||
| 136 | ops->crypt_blocks(state, chunksize, src, dst); | ||
| 137 | |||
| 138 | base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1); | ||
| 139 | src += base; | ||
| 140 | dst += base; | ||
| 141 | chunksize &= AEGIS256_BLOCK_SIZE - 1; | ||
| 142 | |||
| 143 | if (chunksize > 0) | ||
| 144 | ops->crypt_tail(state, chunksize, src, dst); | ||
| 145 | 131 | ||
| 146 | skcipher_walk_done(&walk, 0); | 132 | if (walk->nbytes) { |
| 133 | ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, | ||
| 134 | walk->dst.virt.addr); | ||
| 135 | skcipher_walk_done(walk, 0); | ||
| 147 | } | 136 | } |
| 148 | } | 137 | } |
| 149 | 138 | ||
| @@ -186,13 +175,16 @@ static void crypto_aegis256_aesni_crypt(struct aead_request *req, | |||
| 186 | { | 175 | { |
| 187 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 176 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
| 188 | struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm); | 177 | struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm); |
| 178 | struct skcipher_walk walk; | ||
| 189 | struct aegis_state state; | 179 | struct aegis_state state; |
| 190 | 180 | ||
| 181 | ops->skcipher_walk_init(&walk, req, true); | ||
| 182 | |||
| 191 | kernel_fpu_begin(); | 183 | kernel_fpu_begin(); |
| 192 | 184 | ||
| 193 | crypto_aegis256_aesni_init(&state, ctx->key, req->iv); | 185 | crypto_aegis256_aesni_init(&state, ctx->key, req->iv); |
| 194 | crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen); | 186 | crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen); |
| 195 | crypto_aegis256_aesni_process_crypt(&state, req, ops); | 187 | crypto_aegis256_aesni_process_crypt(&state, &walk, ops); |
| 196 | crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen); | 188 | crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen); |
| 197 | 189 | ||
| 198 | kernel_fpu_end(); | 190 | kernel_fpu_end(); |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 1321700d6647..1e3d2102033a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
| @@ -175,26 +175,18 @@ asmlinkage void aesni_gcm_finalize(void *ctx, | |||
| 175 | struct gcm_context_data *gdata, | 175 | struct gcm_context_data *gdata, |
| 176 | u8 *auth_tag, unsigned long auth_tag_len); | 176 | u8 *auth_tag, unsigned long auth_tag_len); |
| 177 | 177 | ||
| 178 | static struct aesni_gcm_tfm_s { | 178 | static const struct aesni_gcm_tfm_s { |
| 179 | void (*init)(void *ctx, | 179 | void (*init)(void *ctx, struct gcm_context_data *gdata, u8 *iv, |
| 180 | struct gcm_context_data *gdata, | 180 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len); |
| 181 | u8 *iv, | 181 | void (*enc_update)(void *ctx, struct gcm_context_data *gdata, u8 *out, |
| 182 | u8 *hash_subkey, const u8 *aad, | 182 | const u8 *in, unsigned long plaintext_len); |
| 183 | unsigned long aad_len); | 183 | void (*dec_update)(void *ctx, struct gcm_context_data *gdata, u8 *out, |
| 184 | void (*enc_update)(void *ctx, | 184 | const u8 *in, unsigned long ciphertext_len); |
| 185 | struct gcm_context_data *gdata, u8 *out, | 185 | void (*finalize)(void *ctx, struct gcm_context_data *gdata, |
| 186 | const u8 *in, | 186 | u8 *auth_tag, unsigned long auth_tag_len); |
| 187 | unsigned long plaintext_len); | ||
| 188 | void (*dec_update)(void *ctx, | ||
| 189 | struct gcm_context_data *gdata, u8 *out, | ||
| 190 | const u8 *in, | ||
| 191 | unsigned long ciphertext_len); | ||
| 192 | void (*finalize)(void *ctx, | ||
| 193 | struct gcm_context_data *gdata, | ||
| 194 | u8 *auth_tag, unsigned long auth_tag_len); | ||
| 195 | } *aesni_gcm_tfm; | 187 | } *aesni_gcm_tfm; |
| 196 | 188 | ||
| 197 | struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = { | 189 | static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = { |
| 198 | .init = &aesni_gcm_init, | 190 | .init = &aesni_gcm_init, |
| 199 | .enc_update = &aesni_gcm_enc_update, | 191 | .enc_update = &aesni_gcm_enc_update, |
| 200 | .dec_update = &aesni_gcm_dec_update, | 192 | .dec_update = &aesni_gcm_dec_update, |
| @@ -243,7 +235,7 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, | |||
| 243 | const u8 *aad, unsigned long aad_len, | 235 | const u8 *aad, unsigned long aad_len, |
| 244 | u8 *auth_tag, unsigned long auth_tag_len); | 236 | u8 *auth_tag, unsigned long auth_tag_len); |
| 245 | 237 | ||
| 246 | struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { | 238 | static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { |
| 247 | .init = &aesni_gcm_init_avx_gen2, | 239 | .init = &aesni_gcm_init_avx_gen2, |
| 248 | .enc_update = &aesni_gcm_enc_update_avx_gen2, | 240 | .enc_update = &aesni_gcm_enc_update_avx_gen2, |
| 249 | .dec_update = &aesni_gcm_dec_update_avx_gen2, | 241 | .dec_update = &aesni_gcm_dec_update_avx_gen2, |
| @@ -288,7 +280,7 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, | |||
| 288 | const u8 *aad, unsigned long aad_len, | 280 | const u8 *aad, unsigned long aad_len, |
| 289 | u8 *auth_tag, unsigned long auth_tag_len); | 281 | u8 *auth_tag, unsigned long auth_tag_len); |
| 290 | 282 | ||
| 291 | struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { | 283 | static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { |
| 292 | .init = &aesni_gcm_init_avx_gen4, | 284 | .init = &aesni_gcm_init_avx_gen4, |
| 293 | .enc_update = &aesni_gcm_enc_update_avx_gen4, | 285 | .enc_update = &aesni_gcm_enc_update_avx_gen4, |
| 294 | .dec_update = &aesni_gcm_dec_update_avx_gen4, | 286 | .dec_update = &aesni_gcm_dec_update_avx_gen4, |
| @@ -778,7 +770,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, | |||
| 778 | { | 770 | { |
| 779 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 771 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
| 780 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | 772 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); |
| 781 | struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; | 773 | const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; |
| 782 | struct gcm_context_data data AESNI_ALIGN_ATTR; | 774 | struct gcm_context_data data AESNI_ALIGN_ATTR; |
| 783 | struct scatter_walk dst_sg_walk = {}; | 775 | struct scatter_walk dst_sg_walk = {}; |
| 784 | unsigned long left = req->cryptlen; | 776 | unsigned long left = req->cryptlen; |
| @@ -821,11 +813,14 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, | |||
| 821 | scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); | 813 | scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); |
| 822 | } | 814 | } |
| 823 | 815 | ||
| 824 | src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); | 816 | if (left) { |
| 825 | scatterwalk_start(&src_sg_walk, src_sg); | 817 | src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); |
| 826 | if (req->src != req->dst) { | 818 | scatterwalk_start(&src_sg_walk, src_sg); |
| 827 | dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen); | 819 | if (req->src != req->dst) { |
| 828 | scatterwalk_start(&dst_sg_walk, dst_sg); | 820 | dst_sg = scatterwalk_ffwd(dst_start, req->dst, |
| 821 | req->assoclen); | ||
| 822 | scatterwalk_start(&dst_sg_walk, dst_sg); | ||
| 823 | } | ||
| 829 | } | 824 | } |
| 830 | 825 | ||
| 831 | kernel_fpu_begin(); | 826 | kernel_fpu_begin(); |
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index de04d3e98d8d..3d873e67749d 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S | |||
| @@ -43,609 +43,291 @@ | |||
| 43 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | 43 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 44 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 44 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 45 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 45 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 46 | ######################################################################## | ||
| 47 | # Function API: | ||
| 48 | # UINT16 crc_t10dif_pcl( | ||
| 49 | # UINT16 init_crc, //initial CRC value, 16 bits | ||
| 50 | # const unsigned char *buf, //buffer pointer to calculate CRC on | ||
| 51 | # UINT64 len //buffer length in bytes (64-bit data) | ||
| 52 | # ); | ||
| 53 | # | 46 | # |
| 54 | # Reference paper titled "Fast CRC Computation for Generic | 47 | # Reference paper titled "Fast CRC Computation for Generic |
| 55 | # Polynomials Using PCLMULQDQ Instruction" | 48 | # Polynomials Using PCLMULQDQ Instruction" |
| 56 | # URL: http://www.intel.com/content/dam/www/public/us/en/documents | 49 | # URL: http://www.intel.com/content/dam/www/public/us/en/documents |
| 57 | # /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 50 | # /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
| 58 | # | 51 | # |
| 59 | # | ||
| 60 | 52 | ||
| 61 | #include <linux/linkage.h> | 53 | #include <linux/linkage.h> |
| 62 | 54 | ||
| 63 | .text | 55 | .text |
| 64 | 56 | ||
| 65 | #define arg1 %rdi | 57 | #define init_crc %edi |
| 66 | #define arg2 %rsi | 58 | #define buf %rsi |
| 67 | #define arg3 %rdx | 59 | #define len %rdx |
| 68 | 60 | ||
| 69 | #define arg1_low32 %edi | 61 | #define FOLD_CONSTS %xmm10 |
| 62 | #define BSWAP_MASK %xmm11 | ||
| 63 | |||
| 64 | # Fold reg1, reg2 into the next 32 data bytes, storing the result back into | ||
| 65 | # reg1, reg2. | ||
| 66 | .macro fold_32_bytes offset, reg1, reg2 | ||
| 67 | movdqu \offset(buf), %xmm9 | ||
| 68 | movdqu \offset+16(buf), %xmm12 | ||
| 69 | pshufb BSWAP_MASK, %xmm9 | ||
| 70 | pshufb BSWAP_MASK, %xmm12 | ||
| 71 | movdqa \reg1, %xmm8 | ||
| 72 | movdqa \reg2, %xmm13 | ||
| 73 | pclmulqdq $0x00, FOLD_CONSTS, \reg1 | ||
| 74 | pclmulqdq $0x11, FOLD_CONSTS, %xmm8 | ||
| 75 | pclmulqdq $0x00, FOLD_CONSTS, \reg2 | ||
| 76 | pclmulqdq $0x11, FOLD_CONSTS, %xmm13 | ||
| 77 | pxor %xmm9 , \reg1 | ||
| 78 | xorps %xmm8 , \reg1 | ||
| 79 | pxor %xmm12, \reg2 | ||
| 80 | xorps %xmm13, \reg2 | ||
| 81 | .endm | ||
| 82 | |||
| 83 | # Fold src_reg into dst_reg. | ||
| 84 | .macro fold_16_bytes src_reg, dst_reg | ||
| 85 | movdqa \src_reg, %xmm8 | ||
| 86 | pclmulqdq $0x11, FOLD_CONSTS, \src_reg | ||
| 87 | pclmulqdq $0x00, FOLD_CONSTS, %xmm8 | ||
| 88 | pxor %xmm8, \dst_reg | ||
| 89 | xorps \src_reg, \dst_reg | ||
| 90 | .endm | ||
| 70 | 91 | ||
| 71 | ENTRY(crc_t10dif_pcl) | 92 | # |
| 93 | # u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len); | ||
| 94 | # | ||
| 95 | # Assumes len >= 16. | ||
| 96 | # | ||
| 72 | .align 16 | 97 | .align 16 |
| 98 | ENTRY(crc_t10dif_pcl) | ||
| 73 | 99 | ||
| 74 | # adjust the 16-bit initial_crc value, scale it to 32 bits | 100 | movdqa .Lbswap_mask(%rip), BSWAP_MASK |
| 75 | shl $16, arg1_low32 | 101 | |
| 76 | 102 | # For sizes less than 256 bytes, we can't fold 128 bytes at a time. | |
| 77 | # Allocate Stack Space | 103 | cmp $256, len |
| 78 | mov %rsp, %rcx | 104 | jl .Lless_than_256_bytes |
| 79 | sub $16*2, %rsp | 105 | |
| 80 | # align stack to 16 byte boundary | 106 | # Load the first 128 data bytes. Byte swapping is necessary to make the |
| 81 | and $~(0x10 - 1), %rsp | 107 | # bit order match the polynomial coefficient order. |
| 82 | 108 | movdqu 16*0(buf), %xmm0 | |
| 83 | # check if smaller than 256 | 109 | movdqu 16*1(buf), %xmm1 |
| 84 | cmp $256, arg3 | 110 | movdqu 16*2(buf), %xmm2 |
| 85 | 111 | movdqu 16*3(buf), %xmm3 | |
| 86 | # for sizes less than 128, we can't fold 64B at a time... | 112 | movdqu 16*4(buf), %xmm4 |
| 87 | jl _less_than_128 | 113 | movdqu 16*5(buf), %xmm5 |
| 88 | 114 | movdqu 16*6(buf), %xmm6 | |
| 89 | 115 | movdqu 16*7(buf), %xmm7 | |
| 90 | # load the initial crc value | 116 | add $128, buf |
| 91 | movd arg1_low32, %xmm10 # initial crc | 117 | pshufb BSWAP_MASK, %xmm0 |
| 92 | 118 | pshufb BSWAP_MASK, %xmm1 | |
| 93 | # crc value does not need to be byte-reflected, but it needs | 119 | pshufb BSWAP_MASK, %xmm2 |
| 94 | # to be moved to the high part of the register. | 120 | pshufb BSWAP_MASK, %xmm3 |
| 95 | # because data will be byte-reflected and will align with | 121 | pshufb BSWAP_MASK, %xmm4 |
| 96 | # initial crc at correct place. | 122 | pshufb BSWAP_MASK, %xmm5 |
| 97 | pslldq $12, %xmm10 | 123 | pshufb BSWAP_MASK, %xmm6 |
| 98 | 124 | pshufb BSWAP_MASK, %xmm7 | |
| 99 | movdqa SHUF_MASK(%rip), %xmm11 | 125 | |
| 100 | # receive the initial 64B data, xor the initial crc value | 126 | # XOR the first 16 data *bits* with the initial CRC value. |
| 101 | movdqu 16*0(arg2), %xmm0 | 127 | pxor %xmm8, %xmm8 |
| 102 | movdqu 16*1(arg2), %xmm1 | 128 | pinsrw $7, init_crc, %xmm8 |
| 103 | movdqu 16*2(arg2), %xmm2 | 129 | pxor %xmm8, %xmm0 |
| 104 | movdqu 16*3(arg2), %xmm3 | 130 | |
| 105 | movdqu 16*4(arg2), %xmm4 | 131 | movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS |
| 106 | movdqu 16*5(arg2), %xmm5 | 132 | |
| 107 | movdqu 16*6(arg2), %xmm6 | 133 | # Subtract 128 for the 128 data bytes just consumed. Subtract another |
| 108 | movdqu 16*7(arg2), %xmm7 | 134 | # 128 to simplify the termination condition of the following loop. |
| 109 | 135 | sub $256, len | |
| 110 | pshufb %xmm11, %xmm0 | 136 | |
| 111 | # XOR the initial_crc value | 137 | # While >= 128 data bytes remain (not counting xmm0-7), fold the 128 |
| 112 | pxor %xmm10, %xmm0 | 138 | # bytes xmm0-7 into them, storing the result back into xmm0-7. |
| 113 | pshufb %xmm11, %xmm1 | 139 | .Lfold_128_bytes_loop: |
| 114 | pshufb %xmm11, %xmm2 | 140 | fold_32_bytes 0, %xmm0, %xmm1 |
| 115 | pshufb %xmm11, %xmm3 | 141 | fold_32_bytes 32, %xmm2, %xmm3 |
| 116 | pshufb %xmm11, %xmm4 | 142 | fold_32_bytes 64, %xmm4, %xmm5 |
| 117 | pshufb %xmm11, %xmm5 | 143 | fold_32_bytes 96, %xmm6, %xmm7 |
| 118 | pshufb %xmm11, %xmm6 | 144 | add $128, buf |
| 119 | pshufb %xmm11, %xmm7 | 145 | sub $128, len |
| 120 | 146 | jge .Lfold_128_bytes_loop | |
| 121 | movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 | 147 | |
| 122 | #imm value of pclmulqdq instruction | 148 | # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7. |
| 123 | #will determine which constant to use | 149 | |
| 124 | 150 | # Fold across 64 bytes. | |
| 125 | ################################################################# | 151 | movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS |
| 126 | # we subtract 256 instead of 128 to save one instruction from the loop | 152 | fold_16_bytes %xmm0, %xmm4 |
| 127 | sub $256, arg3 | 153 | fold_16_bytes %xmm1, %xmm5 |
| 128 | 154 | fold_16_bytes %xmm2, %xmm6 | |
| 129 | # at this section of the code, there is 64*x+y (0<=y<64) bytes of | 155 | fold_16_bytes %xmm3, %xmm7 |
| 130 | # buffer. The _fold_64_B_loop will fold 64B at a time | 156 | # Fold across 32 bytes. |
| 131 | # until we have 64+y Bytes of buffer | 157 | movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS |
| 132 | 158 | fold_16_bytes %xmm4, %xmm6 | |
| 133 | 159 | fold_16_bytes %xmm5, %xmm7 | |
| 134 | # fold 64B at a time. This section of the code folds 4 xmm | 160 | # Fold across 16 bytes. |
| 135 | # registers in parallel | 161 | movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS |
| 136 | _fold_64_B_loop: | 162 | fold_16_bytes %xmm6, %xmm7 |
| 137 | 163 | ||
| 138 | # update the buffer pointer | 164 | # Add 128 to get the correct number of data bytes remaining in 0...127 |
| 139 | add $128, arg2 # buf += 64# | 165 | # (not counting xmm7), following the previous extra subtraction by 128. |
| 140 | 166 | # Then subtract 16 to simplify the termination condition of the | |
| 141 | movdqu 16*0(arg2), %xmm9 | 167 | # following loop. |
| 142 | movdqu 16*1(arg2), %xmm12 | 168 | add $128-16, len |
| 143 | pshufb %xmm11, %xmm9 | 169 | |
| 144 | pshufb %xmm11, %xmm12 | 170 | # While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes |
| 145 | movdqa %xmm0, %xmm8 | 171 | # xmm7 into them, storing the result back into xmm7. |
| 146 | movdqa %xmm1, %xmm13 | 172 | jl .Lfold_16_bytes_loop_done |
| 147 | pclmulqdq $0x0 , %xmm10, %xmm0 | 173 | .Lfold_16_bytes_loop: |
| 148 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
| 149 | pclmulqdq $0x0 , %xmm10, %xmm1 | ||
| 150 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
| 151 | pxor %xmm9 , %xmm0 | ||
| 152 | xorps %xmm8 , %xmm0 | ||
| 153 | pxor %xmm12, %xmm1 | ||
| 154 | xorps %xmm13, %xmm1 | ||
| 155 | |||
| 156 | movdqu 16*2(arg2), %xmm9 | ||
| 157 | movdqu 16*3(arg2), %xmm12 | ||
| 158 | pshufb %xmm11, %xmm9 | ||
| 159 | pshufb %xmm11, %xmm12 | ||
| 160 | movdqa %xmm2, %xmm8 | ||
| 161 | movdqa %xmm3, %xmm13 | ||
| 162 | pclmulqdq $0x0, %xmm10, %xmm2 | ||
| 163 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
| 164 | pclmulqdq $0x0, %xmm10, %xmm3 | ||
| 165 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
| 166 | pxor %xmm9 , %xmm2 | ||
| 167 | xorps %xmm8 , %xmm2 | ||
| 168 | pxor %xmm12, %xmm3 | ||
| 169 | xorps %xmm13, %xmm3 | ||
| 170 | |||
| 171 | movdqu 16*4(arg2), %xmm9 | ||
| 172 | movdqu 16*5(arg2), %xmm12 | ||
| 173 | pshufb %xmm11, %xmm9 | ||
| 174 | pshufb %xmm11, %xmm12 | ||
| 175 | movdqa %xmm4, %xmm8 | ||
| 176 | movdqa %xmm5, %xmm13 | ||
| 177 | pclmulqdq $0x0, %xmm10, %xmm4 | ||
| 178 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
| 179 | pclmulqdq $0x0, %xmm10, %xmm5 | ||
| 180 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
| 181 | pxor %xmm9 , %xmm4 | ||
| 182 | xorps %xmm8 , %xmm4 | ||
| 183 | pxor %xmm12, %xmm5 | ||
| 184 | xorps %xmm13, %xmm5 | ||
| 185 | |||
| 186 | movdqu 16*6(arg2), %xmm9 | ||
| 187 | movdqu 16*7(arg2), %xmm12 | ||
| 188 | pshufb %xmm11, %xmm9 | ||
| 189 | pshufb %xmm11, %xmm12 | ||
| 190 | movdqa %xmm6 , %xmm8 | ||
| 191 | movdqa %xmm7 , %xmm13 | ||
| 192 | pclmulqdq $0x0 , %xmm10, %xmm6 | ||
| 193 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
| 194 | pclmulqdq $0x0 , %xmm10, %xmm7 | ||
| 195 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
| 196 | pxor %xmm9 , %xmm6 | ||
| 197 | xorps %xmm8 , %xmm6 | ||
| 198 | pxor %xmm12, %xmm7 | ||
| 199 | xorps %xmm13, %xmm7 | ||
| 200 | |||
| 201 | sub $128, arg3 | ||
| 202 | |||
| 203 | # check if there is another 64B in the buffer to be able to fold | ||
| 204 | jge _fold_64_B_loop | ||
| 205 | ################################################################## | ||
| 206 | |||
| 207 | |||
| 208 | add $128, arg2 | ||
| 209 | # at this point, the buffer pointer is pointing at the last y Bytes | ||
| 210 | # of the buffer the 64B of folded data is in 4 of the xmm | ||
| 211 | # registers: xmm0, xmm1, xmm2, xmm3 | ||
| 212 | |||
| 213 | |||
| 214 | # fold the 8 xmm registers to 1 xmm register with different constants | ||
| 215 | |||
| 216 | movdqa rk9(%rip), %xmm10 | ||
| 217 | movdqa %xmm0, %xmm8 | ||
| 218 | pclmulqdq $0x11, %xmm10, %xmm0 | ||
| 219 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
| 220 | pxor %xmm8, %xmm7 | ||
| 221 | xorps %xmm0, %xmm7 | ||
| 222 | |||
| 223 | movdqa rk11(%rip), %xmm10 | ||
| 224 | movdqa %xmm1, %xmm8 | ||
| 225 | pclmulqdq $0x11, %xmm10, %xmm1 | ||
| 226 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
| 227 | pxor %xmm8, %xmm7 | ||
| 228 | xorps %xmm1, %xmm7 | ||
| 229 | |||
| 230 | movdqa rk13(%rip), %xmm10 | ||
| 231 | movdqa %xmm2, %xmm8 | ||
| 232 | pclmulqdq $0x11, %xmm10, %xmm2 | ||
| 233 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
| 234 | pxor %xmm8, %xmm7 | ||
| 235 | pxor %xmm2, %xmm7 | ||
| 236 | |||
| 237 | movdqa rk15(%rip), %xmm10 | ||
| 238 | movdqa %xmm3, %xmm8 | ||
| 239 | pclmulqdq $0x11, %xmm10, %xmm3 | ||
| 240 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
| 241 | pxor %xmm8, %xmm7 | ||
| 242 | xorps %xmm3, %xmm7 | ||
| 243 | |||
| 244 | movdqa rk17(%rip), %xmm10 | ||
| 245 | movdqa %xmm4, %xmm8 | ||
| 246 | pclmulqdq $0x11, %xmm10, %xmm4 | ||
| 247 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
| 248 | pxor %xmm8, %xmm7 | ||
| 249 | pxor %xmm4, %xmm7 | ||
| 250 | |||
| 251 | movdqa rk19(%rip), %xmm10 | ||
| 252 | movdqa %xmm5, %xmm8 | ||
| 253 | pclmulqdq $0x11, %xmm10, %xmm5 | ||
| 254 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
| 255 | pxor %xmm8, %xmm7 | ||
| 256 | xorps %xmm5, %xmm7 | ||
| 257 | |||
| 258 | movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 | ||
| 259 | #imm value of pclmulqdq instruction | ||
| 260 | #will determine which constant to use | ||
| 261 | movdqa %xmm6, %xmm8 | ||
| 262 | pclmulqdq $0x11, %xmm10, %xmm6 | ||
| 263 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
| 264 | pxor %xmm8, %xmm7 | ||
| 265 | pxor %xmm6, %xmm7 | ||
| 266 | |||
| 267 | |||
| 268 | # instead of 64, we add 48 to the loop counter to save 1 instruction | ||
| 269 | # from the loop instead of a cmp instruction, we use the negative | ||
| 270 | # flag with the jl instruction | ||
| 271 | add $128-16, arg3 | ||
| 272 | jl _final_reduction_for_128 | ||
| 273 | |||
| 274 | # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 | ||
| 275 | # and the rest is in memory. We can fold 16 bytes at a time if y>=16 | ||
| 276 | # continue folding 16B at a time | ||
| 277 | |||
| 278 | _16B_reduction_loop: | ||
| 279 | movdqa %xmm7, %xmm8 | 174 | movdqa %xmm7, %xmm8 |
| 280 | pclmulqdq $0x11, %xmm10, %xmm7 | 175 | pclmulqdq $0x11, FOLD_CONSTS, %xmm7 |
| 281 | pclmulqdq $0x0 , %xmm10, %xmm8 | 176 | pclmulqdq $0x00, FOLD_CONSTS, %xmm8 |
| 282 | pxor %xmm8, %xmm7 | 177 | pxor %xmm8, %xmm7 |
| 283 | movdqu (arg2), %xmm0 | 178 | movdqu (buf), %xmm0 |
| 284 | pshufb %xmm11, %xmm0 | 179 | pshufb BSWAP_MASK, %xmm0 |
| 285 | pxor %xmm0 , %xmm7 | 180 | pxor %xmm0 , %xmm7 |
| 286 | add $16, arg2 | 181 | add $16, buf |
| 287 | sub $16, arg3 | 182 | sub $16, len |
| 288 | # instead of a cmp instruction, we utilize the flags with the | 183 | jge .Lfold_16_bytes_loop |
| 289 | # jge instruction equivalent of: cmp arg3, 16-16 | 184 | |
| 290 | # check if there is any more 16B in the buffer to be able to fold | 185 | .Lfold_16_bytes_loop_done: |
| 291 | jge _16B_reduction_loop | 186 | # Add 16 to get the correct number of data bytes remaining in 0...15 |
| 292 | 187 | # (not counting xmm7), following the previous extra subtraction by 16. | |
| 293 | #now we have 16+z bytes left to reduce, where 0<= z < 16. | 188 | add $16, len |
| 294 | #first, we reduce the data in the xmm7 register | 189 | je .Lreduce_final_16_bytes |
| 295 | 190 | ||
| 296 | 191 | .Lhandle_partial_segment: | |
| 297 | _final_reduction_for_128: | 192 | # Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16 |
| 298 | # check if any more data to fold. If not, compute the CRC of | 193 | # bytes are in xmm7 and the rest are the remaining data in 'buf'. To do |
| 299 | # the final 128 bits | 194 | # this without needing a fold constant for each possible 'len', redivide |
| 300 | add $16, arg3 | 195 | # the bytes into a first chunk of 'len' bytes and a second chunk of 16 |
| 301 | je _128_done | 196 | # bytes, then fold the first chunk into the second. |
| 302 | 197 | ||
| 303 | # here we are getting data that is less than 16 bytes. | ||
| 304 | # since we know that there was data before the pointer, we can | ||
| 305 | # offset the input pointer before the actual point, to receive | ||
| 306 | # exactly 16 bytes. after that the registers need to be adjusted. | ||
| 307 | _get_last_two_xmms: | ||
| 308 | movdqa %xmm7, %xmm2 | 198 | movdqa %xmm7, %xmm2 |
| 309 | 199 | ||
| 310 | movdqu -16(arg2, arg3), %xmm1 | 200 | # xmm1 = last 16 original data bytes |
| 311 | pshufb %xmm11, %xmm1 | 201 | movdqu -16(buf, len), %xmm1 |
| 202 | pshufb BSWAP_MASK, %xmm1 | ||
| 312 | 203 | ||
| 313 | # get rid of the extra data that was loaded before | 204 | # xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes. |
| 314 | # load the shift constant | 205 | lea .Lbyteshift_table+16(%rip), %rax |
| 315 | lea pshufb_shf_table+16(%rip), %rax | 206 | sub len, %rax |
| 316 | sub arg3, %rax | ||
| 317 | movdqu (%rax), %xmm0 | 207 | movdqu (%rax), %xmm0 |
| 318 | |||
| 319 | # shift xmm2 to the left by arg3 bytes | ||
| 320 | pshufb %xmm0, %xmm2 | 208 | pshufb %xmm0, %xmm2 |
| 321 | 209 | ||
| 322 | # shift xmm7 to the right by 16-arg3 bytes | 210 | # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes. |
| 323 | pxor mask1(%rip), %xmm0 | 211 | pxor .Lmask1(%rip), %xmm0 |
| 324 | pshufb %xmm0, %xmm7 | 212 | pshufb %xmm0, %xmm7 |
| 213 | |||
| 214 | # xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes), | ||
| 215 | # then '16-len' bytes from xmm2 (high-order bytes). | ||
| 325 | pblendvb %xmm2, %xmm1 #xmm0 is implicit | 216 | pblendvb %xmm2, %xmm1 #xmm0 is implicit |
| 326 | 217 | ||
| 327 | # fold 16 Bytes | 218 | # Fold the first chunk into the second chunk, storing the result in xmm7. |
| 328 | movdqa %xmm1, %xmm2 | ||
| 329 | movdqa %xmm7, %xmm8 | 219 | movdqa %xmm7, %xmm8 |
| 330 | pclmulqdq $0x11, %xmm10, %xmm7 | 220 | pclmulqdq $0x11, FOLD_CONSTS, %xmm7 |
| 331 | pclmulqdq $0x0 , %xmm10, %xmm8 | 221 | pclmulqdq $0x00, FOLD_CONSTS, %xmm8 |
| 332 | pxor %xmm8, %xmm7 | 222 | pxor %xmm8, %xmm7 |
| 333 | pxor %xmm2, %xmm7 | 223 | pxor %xmm1, %xmm7 |
| 334 | 224 | ||
| 335 | _128_done: | 225 | .Lreduce_final_16_bytes: |
| 336 | # compute crc of a 128-bit value | 226 | # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC |
| 337 | movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 | ||
| 338 | movdqa %xmm7, %xmm0 | ||
| 339 | 227 | ||
| 340 | #64b fold | 228 | # Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'. |
| 341 | pclmulqdq $0x1, %xmm10, %xmm7 | 229 | movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS |
| 342 | pslldq $8 , %xmm0 | ||
| 343 | pxor %xmm0, %xmm7 | ||
| 344 | 230 | ||
| 345 | #32b fold | 231 | # Fold the high 64 bits into the low 64 bits, while also multiplying by |
| 232 | # x^64. This produces a 128-bit value congruent to x^64 * M(x) and | ||
| 233 | # whose low 48 bits are 0. | ||
| 346 | movdqa %xmm7, %xmm0 | 234 | movdqa %xmm7, %xmm0 |
| 235 | pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x)) | ||
| 236 | pslldq $8, %xmm0 | ||
| 237 | pxor %xmm0, %xmm7 # + low bits * x^64 | ||
| 347 | 238 | ||
| 348 | pand mask2(%rip), %xmm0 | 239 | # Fold the high 32 bits into the low 96 bits. This produces a 96-bit |
| 349 | 240 | # value congruent to x^64 * M(x) and whose low 48 bits are 0. | |
| 350 | psrldq $12, %xmm7 | ||
| 351 | pclmulqdq $0x10, %xmm10, %xmm7 | ||
| 352 | pxor %xmm0, %xmm7 | ||
| 353 | |||
| 354 | #barrett reduction | ||
| 355 | _barrett: | ||
| 356 | movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 | ||
| 357 | movdqa %xmm7, %xmm0 | 241 | movdqa %xmm7, %xmm0 |
| 358 | pclmulqdq $0x01, %xmm10, %xmm7 | 242 | pand .Lmask2(%rip), %xmm0 # zero high 32 bits |
| 359 | pslldq $4, %xmm7 | 243 | psrldq $12, %xmm7 # extract high 32 bits |
| 360 | pclmulqdq $0x11, %xmm10, %xmm7 | 244 | pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x)) |
| 245 | pxor %xmm0, %xmm7 # + low bits | ||
| 361 | 246 | ||
| 362 | pslldq $4, %xmm7 | 247 | # Load G(x) and floor(x^48 / G(x)). |
| 363 | pxor %xmm0, %xmm7 | 248 | movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS |
| 364 | pextrd $1, %xmm7, %eax | ||
| 365 | 249 | ||
| 366 | _cleanup: | 250 | # Use Barrett reduction to compute the final CRC value. |
| 367 | # scale the result back to 16 bits | 251 | movdqa %xmm7, %xmm0 |
| 368 | shr $16, %eax | 252 | pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x)) |
| 369 | mov %rcx, %rsp | 253 | psrlq $32, %xmm7 # /= x^32 |
| 254 | pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x) | ||
| 255 | psrlq $48, %xmm0 | ||
| 256 | pxor %xmm7, %xmm0 # + low 16 nonzero bits | ||
| 257 | # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0. | ||
| 258 | |||
| 259 | pextrw $0, %xmm0, %eax | ||
| 370 | ret | 260 | ret |
| 371 | 261 | ||
| 372 | ######################################################################## | ||
| 373 | |||
| 374 | .align 16 | 262 | .align 16 |
| 375 | _less_than_128: | 263 | .Lless_than_256_bytes: |
| 376 | 264 | # Checksumming a buffer of length 16...255 bytes | |
| 377 | # check if there is enough buffer to be able to fold 16B at a time | ||
| 378 | cmp $32, arg3 | ||
| 379 | jl _less_than_32 | ||
| 380 | movdqa SHUF_MASK(%rip), %xmm11 | ||
| 381 | 265 | ||
| 382 | # now if there is, load the constants | 266 | # Load the first 16 data bytes. |
| 383 | movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 | 267 | movdqu (buf), %xmm7 |
| 268 | pshufb BSWAP_MASK, %xmm7 | ||
| 269 | add $16, buf | ||
| 384 | 270 | ||
| 385 | movd arg1_low32, %xmm0 # get the initial crc value | 271 | # XOR the first 16 data *bits* with the initial CRC value. |
| 386 | pslldq $12, %xmm0 # align it to its correct place | 272 | pxor %xmm0, %xmm0 |
| 387 | movdqu (arg2), %xmm7 # load the plaintext | 273 | pinsrw $7, init_crc, %xmm0 |
| 388 | pshufb %xmm11, %xmm7 # byte-reflect the plaintext | ||
| 389 | pxor %xmm0, %xmm7 | 274 | pxor %xmm0, %xmm7 |
| 390 | 275 | ||
| 391 | 276 | movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS | |
| 392 | # update the buffer pointer | 277 | cmp $16, len |
| 393 | add $16, arg2 | 278 | je .Lreduce_final_16_bytes # len == 16 |
| 394 | 279 | sub $32, len | |
| 395 | # update the counter. subtract 32 instead of 16 to save one | 280 | jge .Lfold_16_bytes_loop # 32 <= len <= 255 |
| 396 | # instruction from the loop | 281 | add $16, len |
| 397 | sub $32, arg3 | 282 | jmp .Lhandle_partial_segment # 17 <= len <= 31 |
| 398 | |||
| 399 | jmp _16B_reduction_loop | ||
| 400 | |||
| 401 | |||
| 402 | .align 16 | ||
| 403 | _less_than_32: | ||
| 404 | # mov initial crc to the return value. this is necessary for | ||
| 405 | # zero-length buffers. | ||
| 406 | mov arg1_low32, %eax | ||
| 407 | test arg3, arg3 | ||
| 408 | je _cleanup | ||
| 409 | |||
| 410 | movdqa SHUF_MASK(%rip), %xmm11 | ||
| 411 | |||
| 412 | movd arg1_low32, %xmm0 # get the initial crc value | ||
| 413 | pslldq $12, %xmm0 # align it to its correct place | ||
| 414 | |||
| 415 | cmp $16, arg3 | ||
| 416 | je _exact_16_left | ||
| 417 | jl _less_than_16_left | ||
| 418 | |||
| 419 | movdqu (arg2), %xmm7 # load the plaintext | ||
| 420 | pshufb %xmm11, %xmm7 # byte-reflect the plaintext | ||
| 421 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
| 422 | add $16, arg2 | ||
| 423 | sub $16, arg3 | ||
| 424 | movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 | ||
| 425 | jmp _get_last_two_xmms | ||
| 426 | |||
| 427 | |||
| 428 | .align 16 | ||
| 429 | _less_than_16_left: | ||
| 430 | # use stack space to load data less than 16 bytes, zero-out | ||
| 431 | # the 16B in memory first. | ||
| 432 | |||
| 433 | pxor %xmm1, %xmm1 | ||
| 434 | mov %rsp, %r11 | ||
| 435 | movdqa %xmm1, (%r11) | ||
| 436 | |||
| 437 | cmp $4, arg3 | ||
| 438 | jl _only_less_than_4 | ||
| 439 | |||
| 440 | # backup the counter value | ||
| 441 | mov arg3, %r9 | ||
| 442 | cmp $8, arg3 | ||
| 443 | jl _less_than_8_left | ||
| 444 | |||
| 445 | # load 8 Bytes | ||
| 446 | mov (arg2), %rax | ||
| 447 | mov %rax, (%r11) | ||
| 448 | add $8, %r11 | ||
| 449 | sub $8, arg3 | ||
| 450 | add $8, arg2 | ||
| 451 | _less_than_8_left: | ||
| 452 | |||
| 453 | cmp $4, arg3 | ||
| 454 | jl _less_than_4_left | ||
| 455 | |||
| 456 | # load 4 Bytes | ||
| 457 | mov (arg2), %eax | ||
| 458 | mov %eax, (%r11) | ||
| 459 | add $4, %r11 | ||
| 460 | sub $4, arg3 | ||
| 461 | add $4, arg2 | ||
| 462 | _less_than_4_left: | ||
| 463 | |||
| 464 | cmp $2, arg3 | ||
| 465 | jl _less_than_2_left | ||
| 466 | |||
| 467 | # load 2 Bytes | ||
| 468 | mov (arg2), %ax | ||
| 469 | mov %ax, (%r11) | ||
| 470 | add $2, %r11 | ||
| 471 | sub $2, arg3 | ||
| 472 | add $2, arg2 | ||
| 473 | _less_than_2_left: | ||
| 474 | cmp $1, arg3 | ||
| 475 | jl _zero_left | ||
| 476 | |||
| 477 | # load 1 Byte | ||
| 478 | mov (arg2), %al | ||
| 479 | mov %al, (%r11) | ||
| 480 | _zero_left: | ||
| 481 | movdqa (%rsp), %xmm7 | ||
| 482 | pshufb %xmm11, %xmm7 | ||
| 483 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
| 484 | |||
| 485 | # shl r9, 4 | ||
| 486 | lea pshufb_shf_table+16(%rip), %rax | ||
| 487 | sub %r9, %rax | ||
| 488 | movdqu (%rax), %xmm0 | ||
| 489 | pxor mask1(%rip), %xmm0 | ||
| 490 | |||
| 491 | pshufb %xmm0, %xmm7 | ||
| 492 | jmp _128_done | ||
| 493 | |||
| 494 | .align 16 | ||
| 495 | _exact_16_left: | ||
| 496 | movdqu (arg2), %xmm7 | ||
| 497 | pshufb %xmm11, %xmm7 | ||
| 498 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
| 499 | |||
| 500 | jmp _128_done | ||
| 501 | |||
| 502 | _only_less_than_4: | ||
| 503 | cmp $3, arg3 | ||
| 504 | jl _only_less_than_3 | ||
| 505 | |||
| 506 | # load 3 Bytes | ||
| 507 | mov (arg2), %al | ||
| 508 | mov %al, (%r11) | ||
| 509 | |||
| 510 | mov 1(arg2), %al | ||
| 511 | mov %al, 1(%r11) | ||
| 512 | |||
| 513 | mov 2(arg2), %al | ||
| 514 | mov %al, 2(%r11) | ||
| 515 | |||
| 516 | movdqa (%rsp), %xmm7 | ||
| 517 | pshufb %xmm11, %xmm7 | ||
| 518 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
| 519 | |||
| 520 | psrldq $5, %xmm7 | ||
| 521 | |||
| 522 | jmp _barrett | ||
| 523 | _only_less_than_3: | ||
| 524 | cmp $2, arg3 | ||
| 525 | jl _only_less_than_2 | ||
| 526 | |||
| 527 | # load 2 Bytes | ||
| 528 | mov (arg2), %al | ||
| 529 | mov %al, (%r11) | ||
| 530 | |||
| 531 | mov 1(arg2), %al | ||
| 532 | mov %al, 1(%r11) | ||
| 533 | |||
| 534 | movdqa (%rsp), %xmm7 | ||
| 535 | pshufb %xmm11, %xmm7 | ||
| 536 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
| 537 | |||
| 538 | psrldq $6, %xmm7 | ||
| 539 | |||
| 540 | jmp _barrett | ||
| 541 | _only_less_than_2: | ||
| 542 | |||
| 543 | # load 1 Byte | ||
| 544 | mov (arg2), %al | ||
| 545 | mov %al, (%r11) | ||
| 546 | |||
| 547 | movdqa (%rsp), %xmm7 | ||
| 548 | pshufb %xmm11, %xmm7 | ||
| 549 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
| 550 | |||
| 551 | psrldq $7, %xmm7 | ||
| 552 | |||
| 553 | jmp _barrett | ||
| 554 | |||
| 555 | ENDPROC(crc_t10dif_pcl) | 283 | ENDPROC(crc_t10dif_pcl) |
| 556 | 284 | ||
| 557 | .section .rodata, "a", @progbits | 285 | .section .rodata, "a", @progbits |
| 558 | .align 16 | 286 | .align 16 |
| 559 | # precomputed constants | ||
| 560 | # these constants are precomputed from the poly: | ||
| 561 | # 0x8bb70000 (0x8bb7 scaled to 32 bits) | ||
| 562 | # Q = 0x18BB70000 | ||
| 563 | # rk1 = 2^(32*3) mod Q << 32 | ||
| 564 | # rk2 = 2^(32*5) mod Q << 32 | ||
| 565 | # rk3 = 2^(32*15) mod Q << 32 | ||
| 566 | # rk4 = 2^(32*17) mod Q << 32 | ||
| 567 | # rk5 = 2^(32*3) mod Q << 32 | ||
| 568 | # rk6 = 2^(32*2) mod Q << 32 | ||
| 569 | # rk7 = floor(2^64/Q) | ||
| 570 | # rk8 = Q | ||
| 571 | rk1: | ||
| 572 | .quad 0x2d56000000000000 | ||
| 573 | rk2: | ||
| 574 | .quad 0x06df000000000000 | ||
| 575 | rk3: | ||
| 576 | .quad 0x9d9d000000000000 | ||
| 577 | rk4: | ||
| 578 | .quad 0x7cf5000000000000 | ||
| 579 | rk5: | ||
| 580 | .quad 0x2d56000000000000 | ||
| 581 | rk6: | ||
| 582 | .quad 0x1368000000000000 | ||
| 583 | rk7: | ||
| 584 | .quad 0x00000001f65a57f8 | ||
| 585 | rk8: | ||
| 586 | .quad 0x000000018bb70000 | ||
| 587 | |||
| 588 | rk9: | ||
| 589 | .quad 0xceae000000000000 | ||
| 590 | rk10: | ||
| 591 | .quad 0xbfd6000000000000 | ||
| 592 | rk11: | ||
| 593 | .quad 0x1e16000000000000 | ||
| 594 | rk12: | ||
| 595 | .quad 0x713c000000000000 | ||
| 596 | rk13: | ||
| 597 | .quad 0xf7f9000000000000 | ||
| 598 | rk14: | ||
| 599 | .quad 0x80a6000000000000 | ||
| 600 | rk15: | ||
| 601 | .quad 0x044c000000000000 | ||
| 602 | rk16: | ||
| 603 | .quad 0xe658000000000000 | ||
| 604 | rk17: | ||
| 605 | .quad 0xad18000000000000 | ||
| 606 | rk18: | ||
| 607 | .quad 0xa497000000000000 | ||
| 608 | rk19: | ||
| 609 | .quad 0x6ee3000000000000 | ||
| 610 | rk20: | ||
| 611 | .quad 0xe7b5000000000000 | ||
| 612 | |||
| 613 | 287 | ||
| 288 | # Fold constants precomputed from the polynomial 0x18bb7 | ||
| 289 | # G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 | ||
| 290 | .Lfold_across_128_bytes_consts: | ||
| 291 | .quad 0x0000000000006123 # x^(8*128) mod G(x) | ||
| 292 | .quad 0x0000000000002295 # x^(8*128+64) mod G(x) | ||
| 293 | .Lfold_across_64_bytes_consts: | ||
| 294 | .quad 0x0000000000001069 # x^(4*128) mod G(x) | ||
| 295 | .quad 0x000000000000dd31 # x^(4*128+64) mod G(x) | ||
| 296 | .Lfold_across_32_bytes_consts: | ||
| 297 | .quad 0x000000000000857d # x^(2*128) mod G(x) | ||
| 298 | .quad 0x0000000000007acc # x^(2*128+64) mod G(x) | ||
| 299 | .Lfold_across_16_bytes_consts: | ||
| 300 | .quad 0x000000000000a010 # x^(1*128) mod G(x) | ||
| 301 | .quad 0x0000000000001faa # x^(1*128+64) mod G(x) | ||
| 302 | .Lfinal_fold_consts: | ||
| 303 | .quad 0x1368000000000000 # x^48 * (x^48 mod G(x)) | ||
| 304 | .quad 0x2d56000000000000 # x^48 * (x^80 mod G(x)) | ||
| 305 | .Lbarrett_reduction_consts: | ||
| 306 | .quad 0x0000000000018bb7 # G(x) | ||
| 307 | .quad 0x00000001f65a57f8 # floor(x^48 / G(x)) | ||
| 614 | 308 | ||
| 615 | .section .rodata.cst16.mask1, "aM", @progbits, 16 | 309 | .section .rodata.cst16.mask1, "aM", @progbits, 16 |
| 616 | .align 16 | 310 | .align 16 |
| 617 | mask1: | 311 | .Lmask1: |
| 618 | .octa 0x80808080808080808080808080808080 | 312 | .octa 0x80808080808080808080808080808080 |
| 619 | 313 | ||
| 620 | .section .rodata.cst16.mask2, "aM", @progbits, 16 | 314 | .section .rodata.cst16.mask2, "aM", @progbits, 16 |
| 621 | .align 16 | 315 | .align 16 |
| 622 | mask2: | 316 | .Lmask2: |
| 623 | .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF | 317 | .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF |
| 318 | |||
| 319 | .section .rodata.cst16.bswap_mask, "aM", @progbits, 16 | ||
| 320 | .align 16 | ||
| 321 | .Lbswap_mask: | ||
| 322 | .octa 0x000102030405060708090A0B0C0D0E0F | ||
| 624 | 323 | ||
| 625 | .section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 | 324 | .section .rodata.cst32.byteshift_table, "aM", @progbits, 32 |
| 626 | .align 16 | 325 | .align 16 |
| 627 | SHUF_MASK: | 326 | # For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len] |
| 628 | .octa 0x000102030405060708090A0B0C0D0E0F | 327 | # is the index vector to shift left by 'len' bytes, and is also {0x80, ..., |
| 629 | 328 | # 0x80} XOR the index vector to shift right by '16 - len' bytes. | |
| 630 | .section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32 | 329 | .Lbyteshift_table: |
| 631 | .align 32 | 330 | .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 |
| 632 | pshufb_shf_table: | 331 | .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f |
| 633 | # use these values for shift constants for the pshufb instruction | 332 | .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 |
| 634 | # different alignments result in values as shown: | 333 | .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 |
| 635 | # DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 | ||
| 636 | # DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 | ||
| 637 | # DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 | ||
| 638 | # DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 | ||
| 639 | # DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 | ||
| 640 | # DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 | ||
| 641 | # DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 | ||
| 642 | # DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 | ||
| 643 | # DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 | ||
| 644 | # DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 | ||
| 645 | # DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 | ||
| 646 | # DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 | ||
| 647 | # DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 | ||
| 648 | # DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 | ||
| 649 | # DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 | ||
| 650 | .octa 0x8f8e8d8c8b8a89888786858483828100 | ||
| 651 | .octa 0x000e0d0c0b0a09080706050403020100 | ||
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index cd4df9322501..0e785c0b2354 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c | |||
| @@ -33,18 +33,12 @@ | |||
| 33 | #include <asm/cpufeatures.h> | 33 | #include <asm/cpufeatures.h> |
| 34 | #include <asm/cpu_device_id.h> | 34 | #include <asm/cpu_device_id.h> |
| 35 | 35 | ||
| 36 | asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, | 36 | asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); |
| 37 | size_t len); | ||
| 38 | 37 | ||
| 39 | struct chksum_desc_ctx { | 38 | struct chksum_desc_ctx { |
| 40 | __u16 crc; | 39 | __u16 crc; |
| 41 | }; | 40 | }; |
| 42 | 41 | ||
| 43 | /* | ||
| 44 | * Steps through buffer one byte at at time, calculates reflected | ||
| 45 | * crc using table. | ||
| 46 | */ | ||
| 47 | |||
| 48 | static int chksum_init(struct shash_desc *desc) | 42 | static int chksum_init(struct shash_desc *desc) |
| 49 | { | 43 | { |
| 50 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | 44 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); |
| @@ -59,7 +53,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data, | |||
| 59 | { | 53 | { |
| 60 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | 54 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); |
| 61 | 55 | ||
| 62 | if (irq_fpu_usable()) { | 56 | if (length >= 16 && irq_fpu_usable()) { |
| 63 | kernel_fpu_begin(); | 57 | kernel_fpu_begin(); |
| 64 | ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); | 58 | ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); |
| 65 | kernel_fpu_end(); | 59 | kernel_fpu_end(); |
| @@ -79,7 +73,7 @@ static int chksum_final(struct shash_desc *desc, u8 *out) | |||
| 79 | static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, | 73 | static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, |
| 80 | u8 *out) | 74 | u8 *out) |
| 81 | { | 75 | { |
| 82 | if (irq_fpu_usable()) { | 76 | if (len >= 16 && irq_fpu_usable()) { |
| 83 | kernel_fpu_begin(); | 77 | kernel_fpu_begin(); |
| 84 | *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); | 78 | *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); |
| 85 | kernel_fpu_end(); | 79 | kernel_fpu_end(); |
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c index 0dccdda1eb3a..7e600f8bcdad 100644 --- a/arch/x86/crypto/morus1280_glue.c +++ b/arch/x86/crypto/morus1280_glue.c | |||
| @@ -85,31 +85,20 @@ static void crypto_morus1280_glue_process_ad( | |||
| 85 | 85 | ||
| 86 | static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state, | 86 | static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state, |
| 87 | struct morus1280_ops ops, | 87 | struct morus1280_ops ops, |
| 88 | struct aead_request *req) | 88 | struct skcipher_walk *walk) |
| 89 | { | 89 | { |
| 90 | struct skcipher_walk walk; | 90 | while (walk->nbytes >= MORUS1280_BLOCK_SIZE) { |
| 91 | u8 *cursor_src, *cursor_dst; | 91 | ops.crypt_blocks(state, walk->src.virt.addr, |
| 92 | unsigned int chunksize, base; | 92 | walk->dst.virt.addr, |
| 93 | 93 | round_down(walk->nbytes, | |
| 94 | ops.skcipher_walk_init(&walk, req, false); | 94 | MORUS1280_BLOCK_SIZE)); |
| 95 | 95 | skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE); | |
| 96 | while (walk.nbytes) { | 96 | } |
| 97 | cursor_src = walk.src.virt.addr; | ||
| 98 | cursor_dst = walk.dst.virt.addr; | ||
| 99 | chunksize = walk.nbytes; | ||
| 100 | |||
| 101 | ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize); | ||
| 102 | |||
| 103 | base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1); | ||
| 104 | cursor_src += base; | ||
| 105 | cursor_dst += base; | ||
| 106 | chunksize &= MORUS1280_BLOCK_SIZE - 1; | ||
| 107 | |||
| 108 | if (chunksize > 0) | ||
| 109 | ops.crypt_tail(state, cursor_src, cursor_dst, | ||
| 110 | chunksize); | ||
| 111 | 97 | ||
| 112 | skcipher_walk_done(&walk, 0); | 98 | if (walk->nbytes) { |
| 99 | ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr, | ||
| 100 | walk->nbytes); | ||
| 101 | skcipher_walk_done(walk, 0); | ||
| 113 | } | 102 | } |
| 114 | } | 103 | } |
| 115 | 104 | ||
| @@ -147,12 +136,15 @@ static void crypto_morus1280_glue_crypt(struct aead_request *req, | |||
| 147 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 136 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
| 148 | struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); | 137 | struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); |
| 149 | struct morus1280_state state; | 138 | struct morus1280_state state; |
| 139 | struct skcipher_walk walk; | ||
| 140 | |||
| 141 | ops.skcipher_walk_init(&walk, req, true); | ||
| 150 | 142 | ||
| 151 | kernel_fpu_begin(); | 143 | kernel_fpu_begin(); |
| 152 | 144 | ||
| 153 | ctx->ops->init(&state, &ctx->key, req->iv); | 145 | ctx->ops->init(&state, &ctx->key, req->iv); |
| 154 | crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); | 146 | crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); |
| 155 | crypto_morus1280_glue_process_crypt(&state, ops, req); | 147 | crypto_morus1280_glue_process_crypt(&state, ops, &walk); |
| 156 | ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); | 148 | ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); |
| 157 | 149 | ||
| 158 | kernel_fpu_end(); | 150 | kernel_fpu_end(); |
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c index 7b58fe4d9bd1..cb3a81732016 100644 --- a/arch/x86/crypto/morus640_glue.c +++ b/arch/x86/crypto/morus640_glue.c | |||
| @@ -85,31 +85,19 @@ static void crypto_morus640_glue_process_ad( | |||
| 85 | 85 | ||
| 86 | static void crypto_morus640_glue_process_crypt(struct morus640_state *state, | 86 | static void crypto_morus640_glue_process_crypt(struct morus640_state *state, |
| 87 | struct morus640_ops ops, | 87 | struct morus640_ops ops, |
| 88 | struct aead_request *req) | 88 | struct skcipher_walk *walk) |
| 89 | { | 89 | { |
| 90 | struct skcipher_walk walk; | 90 | while (walk->nbytes >= MORUS640_BLOCK_SIZE) { |
| 91 | u8 *cursor_src, *cursor_dst; | 91 | ops.crypt_blocks(state, walk->src.virt.addr, |
| 92 | unsigned int chunksize, base; | 92 | walk->dst.virt.addr, |
| 93 | 93 | round_down(walk->nbytes, MORUS640_BLOCK_SIZE)); | |
| 94 | ops.skcipher_walk_init(&walk, req, false); | 94 | skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE); |
| 95 | 95 | } | |
| 96 | while (walk.nbytes) { | ||
| 97 | cursor_src = walk.src.virt.addr; | ||
| 98 | cursor_dst = walk.dst.virt.addr; | ||
| 99 | chunksize = walk.nbytes; | ||
| 100 | |||
| 101 | ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize); | ||
| 102 | |||
| 103 | base = chunksize & ~(MORUS640_BLOCK_SIZE - 1); | ||
| 104 | cursor_src += base; | ||
| 105 | cursor_dst += base; | ||
| 106 | chunksize &= MORUS640_BLOCK_SIZE - 1; | ||
| 107 | |||
| 108 | if (chunksize > 0) | ||
| 109 | ops.crypt_tail(state, cursor_src, cursor_dst, | ||
| 110 | chunksize); | ||
| 111 | 96 | ||
| 112 | skcipher_walk_done(&walk, 0); | 97 | if (walk->nbytes) { |
| 98 | ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr, | ||
| 99 | walk->nbytes); | ||
| 100 | skcipher_walk_done(walk, 0); | ||
| 113 | } | 101 | } |
| 114 | } | 102 | } |
| 115 | 103 | ||
| @@ -143,12 +131,15 @@ static void crypto_morus640_glue_crypt(struct aead_request *req, | |||
| 143 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 131 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
| 144 | struct morus640_ctx *ctx = crypto_aead_ctx(tfm); | 132 | struct morus640_ctx *ctx = crypto_aead_ctx(tfm); |
| 145 | struct morus640_state state; | 133 | struct morus640_state state; |
| 134 | struct skcipher_walk walk; | ||
| 135 | |||
| 136 | ops.skcipher_walk_init(&walk, req, true); | ||
| 146 | 137 | ||
| 147 | kernel_fpu_begin(); | 138 | kernel_fpu_begin(); |
| 148 | 139 | ||
| 149 | ctx->ops->init(&state, &ctx->key, req->iv); | 140 | ctx->ops->init(&state, &ctx->key, req->iv); |
| 150 | crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); | 141 | crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); |
| 151 | crypto_morus640_glue_process_crypt(&state, ops, req); | 142 | crypto_morus640_glue_process_crypt(&state, ops, &walk); |
| 152 | ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); | 143 | ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); |
| 153 | 144 | ||
| 154 | kernel_fpu_end(); | 145 | kernel_fpu_end(); |
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index c88c670cb5fc..e6add74d78a5 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S | |||
| @@ -272,6 +272,10 @@ ENTRY(poly1305_block_sse2) | |||
| 272 | dec %rcx | 272 | dec %rcx |
| 273 | jnz .Ldoblock | 273 | jnz .Ldoblock |
| 274 | 274 | ||
| 275 | # Zeroing of key material | ||
| 276 | mov %rcx,0x00(%rsp) | ||
| 277 | mov %rcx,0x08(%rsp) | ||
| 278 | |||
| 275 | add $0x10,%rsp | 279 | add $0x10,%rsp |
| 276 | pop %r12 | 280 | pop %r12 |
| 277 | pop %rbx | 281 | pop %rbx |
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 374a19712e20..b684f0294f35 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c | |||
| @@ -2278,6 +2278,19 @@ void perf_check_microcode(void) | |||
| 2278 | x86_pmu.check_microcode(); | 2278 | x86_pmu.check_microcode(); |
| 2279 | } | 2279 | } |
| 2280 | 2280 | ||
| 2281 | static int x86_pmu_check_period(struct perf_event *event, u64 value) | ||
| 2282 | { | ||
| 2283 | if (x86_pmu.check_period && x86_pmu.check_period(event, value)) | ||
| 2284 | return -EINVAL; | ||
| 2285 | |||
| 2286 | if (value && x86_pmu.limit_period) { | ||
| 2287 | if (x86_pmu.limit_period(event, value) > value) | ||
| 2288 | return -EINVAL; | ||
| 2289 | } | ||
| 2290 | |||
| 2291 | return 0; | ||
| 2292 | } | ||
| 2293 | |||
| 2281 | static struct pmu pmu = { | 2294 | static struct pmu pmu = { |
| 2282 | .pmu_enable = x86_pmu_enable, | 2295 | .pmu_enable = x86_pmu_enable, |
| 2283 | .pmu_disable = x86_pmu_disable, | 2296 | .pmu_disable = x86_pmu_disable, |
| @@ -2302,6 +2315,7 @@ static struct pmu pmu = { | |||
| 2302 | .event_idx = x86_pmu_event_idx, | 2315 | .event_idx = x86_pmu_event_idx, |
| 2303 | .sched_task = x86_pmu_sched_task, | 2316 | .sched_task = x86_pmu_sched_task, |
| 2304 | .task_ctx_size = sizeof(struct x86_perf_task_context), | 2317 | .task_ctx_size = sizeof(struct x86_perf_task_context), |
| 2318 | .check_period = x86_pmu_check_period, | ||
| 2305 | }; | 2319 | }; |
| 2306 | 2320 | ||
| 2307 | void arch_perf_update_userpage(struct perf_event *event, | 2321 | void arch_perf_update_userpage(struct perf_event *event, |
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 40e12cfc87f6..730978dff63f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
| @@ -3559,6 +3559,14 @@ static void free_excl_cntrs(int cpu) | |||
| 3559 | 3559 | ||
| 3560 | static void intel_pmu_cpu_dying(int cpu) | 3560 | static void intel_pmu_cpu_dying(int cpu) |
| 3561 | { | 3561 | { |
| 3562 | fini_debug_store_on_cpu(cpu); | ||
| 3563 | |||
| 3564 | if (x86_pmu.counter_freezing) | ||
| 3565 | disable_counter_freeze(); | ||
| 3566 | } | ||
| 3567 | |||
| 3568 | static void intel_pmu_cpu_dead(int cpu) | ||
| 3569 | { | ||
| 3562 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 3570 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 3563 | struct intel_shared_regs *pc; | 3571 | struct intel_shared_regs *pc; |
| 3564 | 3572 | ||
| @@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu) | |||
| 3570 | } | 3578 | } |
| 3571 | 3579 | ||
| 3572 | free_excl_cntrs(cpu); | 3580 | free_excl_cntrs(cpu); |
| 3573 | |||
| 3574 | fini_debug_store_on_cpu(cpu); | ||
| 3575 | |||
| 3576 | if (x86_pmu.counter_freezing) | ||
| 3577 | disable_counter_freeze(); | ||
| 3578 | } | 3581 | } |
| 3579 | 3582 | ||
| 3580 | static void intel_pmu_sched_task(struct perf_event_context *ctx, | 3583 | static void intel_pmu_sched_task(struct perf_event_context *ctx, |
| @@ -3584,6 +3587,11 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx, | |||
| 3584 | intel_pmu_lbr_sched_task(ctx, sched_in); | 3587 | intel_pmu_lbr_sched_task(ctx, sched_in); |
| 3585 | } | 3588 | } |
| 3586 | 3589 | ||
| 3590 | static int intel_pmu_check_period(struct perf_event *event, u64 value) | ||
| 3591 | { | ||
| 3592 | return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; | ||
| 3593 | } | ||
| 3594 | |||
| 3587 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); | 3595 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); |
| 3588 | 3596 | ||
| 3589 | PMU_FORMAT_ATTR(ldlat, "config1:0-15"); | 3597 | PMU_FORMAT_ATTR(ldlat, "config1:0-15"); |
| @@ -3663,6 +3671,9 @@ static __initconst const struct x86_pmu core_pmu = { | |||
| 3663 | .cpu_prepare = intel_pmu_cpu_prepare, | 3671 | .cpu_prepare = intel_pmu_cpu_prepare, |
| 3664 | .cpu_starting = intel_pmu_cpu_starting, | 3672 | .cpu_starting = intel_pmu_cpu_starting, |
| 3665 | .cpu_dying = intel_pmu_cpu_dying, | 3673 | .cpu_dying = intel_pmu_cpu_dying, |
| 3674 | .cpu_dead = intel_pmu_cpu_dead, | ||
| 3675 | |||
| 3676 | .check_period = intel_pmu_check_period, | ||
| 3666 | }; | 3677 | }; |
| 3667 | 3678 | ||
| 3668 | static struct attribute *intel_pmu_attrs[]; | 3679 | static struct attribute *intel_pmu_attrs[]; |
| @@ -3703,8 +3714,12 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
| 3703 | .cpu_prepare = intel_pmu_cpu_prepare, | 3714 | .cpu_prepare = intel_pmu_cpu_prepare, |
| 3704 | .cpu_starting = intel_pmu_cpu_starting, | 3715 | .cpu_starting = intel_pmu_cpu_starting, |
| 3705 | .cpu_dying = intel_pmu_cpu_dying, | 3716 | .cpu_dying = intel_pmu_cpu_dying, |
| 3717 | .cpu_dead = intel_pmu_cpu_dead, | ||
| 3718 | |||
| 3706 | .guest_get_msrs = intel_guest_get_msrs, | 3719 | .guest_get_msrs = intel_guest_get_msrs, |
| 3707 | .sched_task = intel_pmu_sched_task, | 3720 | .sched_task = intel_pmu_sched_task, |
| 3721 | |||
| 3722 | .check_period = intel_pmu_check_period, | ||
| 3708 | }; | 3723 | }; |
| 3709 | 3724 | ||
| 3710 | static __init void intel_clovertown_quirk(void) | 3725 | static __init void intel_clovertown_quirk(void) |
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index c07bee31abe8..b10e04387f38 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c | |||
| @@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = { | |||
| 1222 | .id_table = snbep_uncore_pci_ids, | 1222 | .id_table = snbep_uncore_pci_ids, |
| 1223 | }; | 1223 | }; |
| 1224 | 1224 | ||
| 1225 | #define NODE_ID_MASK 0x7 | ||
| 1226 | |||
| 1225 | /* | 1227 | /* |
| 1226 | * build pci bus to socket mapping | 1228 | * build pci bus to socket mapping |
| 1227 | */ | 1229 | */ |
| @@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool | |||
| 1243 | err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); | 1245 | err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); |
| 1244 | if (err) | 1246 | if (err) |
| 1245 | break; | 1247 | break; |
| 1246 | nodeid = config; | 1248 | nodeid = config & NODE_ID_MASK; |
| 1247 | /* get the Node ID mapping */ | 1249 | /* get the Node ID mapping */ |
| 1248 | err = pci_read_config_dword(ubox_dev, idmap_loc, &config); | 1250 | err = pci_read_config_dword(ubox_dev, idmap_loc, &config); |
| 1249 | if (err) | 1251 | if (err) |
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78d7b7031bfc..d46fd6754d92 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h | |||
| @@ -646,6 +646,11 @@ struct x86_pmu { | |||
| 646 | * Intel host/guest support (KVM) | 646 | * Intel host/guest support (KVM) |
| 647 | */ | 647 | */ |
| 648 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | 648 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); |
| 649 | |||
| 650 | /* | ||
| 651 | * Check period value for PERF_EVENT_IOC_PERIOD ioctl. | ||
| 652 | */ | ||
| 653 | int (*check_period) (struct perf_event *event, u64 period); | ||
| 649 | }; | 654 | }; |
| 650 | 655 | ||
| 651 | struct x86_perf_task_context { | 656 | struct x86_perf_task_context { |
| @@ -857,7 +862,7 @@ static inline int amd_pmu_init(void) | |||
| 857 | 862 | ||
| 858 | #ifdef CONFIG_CPU_SUP_INTEL | 863 | #ifdef CONFIG_CPU_SUP_INTEL |
| 859 | 864 | ||
| 860 | static inline bool intel_pmu_has_bts(struct perf_event *event) | 865 | static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) |
| 861 | { | 866 | { |
| 862 | struct hw_perf_event *hwc = &event->hw; | 867 | struct hw_perf_event *hwc = &event->hw; |
| 863 | unsigned int hw_event, bts_event; | 868 | unsigned int hw_event, bts_event; |
| @@ -868,7 +873,14 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) | |||
| 868 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; | 873 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
| 869 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); | 874 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
| 870 | 875 | ||
| 871 | return hw_event == bts_event && hwc->sample_period == 1; | 876 | return hw_event == bts_event && period == 1; |
| 877 | } | ||
| 878 | |||
| 879 | static inline bool intel_pmu_has_bts(struct perf_event *event) | ||
| 880 | { | ||
| 881 | struct hw_perf_event *hwc = &event->hw; | ||
| 882 | |||
| 883 | return intel_pmu_has_bts_period(event, hwc->sample_period); | ||
| 872 | } | 884 | } |
| 873 | 885 | ||
| 874 | int intel_pmu_save_and_restart(struct perf_event *event); | 886 | int intel_pmu_save_and_restart(struct perf_event *event); |
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index f65b78d32f5e..3c135084e1eb 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
| @@ -39,82 +39,10 @@ | |||
| 39 | static int load_aout_binary(struct linux_binprm *); | 39 | static int load_aout_binary(struct linux_binprm *); |
| 40 | static int load_aout_library(struct file *); | 40 | static int load_aout_library(struct file *); |
| 41 | 41 | ||
| 42 | #ifdef CONFIG_COREDUMP | ||
| 43 | static int aout_core_dump(struct coredump_params *); | ||
| 44 | |||
| 45 | static unsigned long get_dr(int n) | ||
| 46 | { | ||
| 47 | struct perf_event *bp = current->thread.ptrace_bps[n]; | ||
| 48 | return bp ? bp->hw.info.address : 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | /* | ||
| 52 | * fill in the user structure for a core dump.. | ||
| 53 | */ | ||
| 54 | static void dump_thread32(struct pt_regs *regs, struct user32 *dump) | ||
| 55 | { | ||
| 56 | u32 fs, gs; | ||
| 57 | memset(dump, 0, sizeof(*dump)); | ||
| 58 | |||
| 59 | /* changed the size calculations - should hopefully work better. lbt */ | ||
| 60 | dump->magic = CMAGIC; | ||
| 61 | dump->start_code = 0; | ||
| 62 | dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); | ||
| 63 | dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; | ||
| 64 | dump->u_dsize = ((unsigned long) | ||
| 65 | (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; | ||
| 66 | dump->u_dsize -= dump->u_tsize; | ||
| 67 | dump->u_debugreg[0] = get_dr(0); | ||
| 68 | dump->u_debugreg[1] = get_dr(1); | ||
| 69 | dump->u_debugreg[2] = get_dr(2); | ||
| 70 | dump->u_debugreg[3] = get_dr(3); | ||
| 71 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
| 72 | dump->u_debugreg[7] = current->thread.ptrace_dr7; | ||
| 73 | |||
| 74 | if (dump->start_stack < 0xc0000000) { | ||
| 75 | unsigned long tmp; | ||
| 76 | |||
| 77 | tmp = (unsigned long) (0xc0000000 - dump->start_stack); | ||
| 78 | dump->u_ssize = tmp >> PAGE_SHIFT; | ||
| 79 | } | ||
| 80 | |||
| 81 | dump->regs.ebx = regs->bx; | ||
| 82 | dump->regs.ecx = regs->cx; | ||
| 83 | dump->regs.edx = regs->dx; | ||
| 84 | dump->regs.esi = regs->si; | ||
| 85 | dump->regs.edi = regs->di; | ||
| 86 | dump->regs.ebp = regs->bp; | ||
| 87 | dump->regs.eax = regs->ax; | ||
| 88 | dump->regs.ds = current->thread.ds; | ||
| 89 | dump->regs.es = current->thread.es; | ||
| 90 | savesegment(fs, fs); | ||
| 91 | dump->regs.fs = fs; | ||
| 92 | savesegment(gs, gs); | ||
| 93 | dump->regs.gs = gs; | ||
| 94 | dump->regs.orig_eax = regs->orig_ax; | ||
| 95 | dump->regs.eip = regs->ip; | ||
| 96 | dump->regs.cs = regs->cs; | ||
| 97 | dump->regs.eflags = regs->flags; | ||
| 98 | dump->regs.esp = regs->sp; | ||
| 99 | dump->regs.ss = regs->ss; | ||
| 100 | |||
| 101 | #if 1 /* FIXME */ | ||
| 102 | dump->u_fpvalid = 0; | ||
| 103 | #else | ||
| 104 | dump->u_fpvalid = dump_fpu(regs, &dump->i387); | ||
| 105 | #endif | ||
| 106 | } | ||
| 107 | |||
| 108 | #endif | ||
| 109 | |||
| 110 | static struct linux_binfmt aout_format = { | 42 | static struct linux_binfmt aout_format = { |
| 111 | .module = THIS_MODULE, | 43 | .module = THIS_MODULE, |
| 112 | .load_binary = load_aout_binary, | 44 | .load_binary = load_aout_binary, |
| 113 | .load_shlib = load_aout_library, | 45 | .load_shlib = load_aout_library, |
| 114 | #ifdef CONFIG_COREDUMP | ||
| 115 | .core_dump = aout_core_dump, | ||
| 116 | #endif | ||
| 117 | .min_coredump = PAGE_SIZE | ||
| 118 | }; | 46 | }; |
| 119 | 47 | ||
| 120 | static int set_brk(unsigned long start, unsigned long end) | 48 | static int set_brk(unsigned long start, unsigned long end) |
| @@ -126,91 +54,6 @@ static int set_brk(unsigned long start, unsigned long end) | |||
| 126 | return vm_brk(start, end - start); | 54 | return vm_brk(start, end - start); |
| 127 | } | 55 | } |
| 128 | 56 | ||
| 129 | #ifdef CONFIG_COREDUMP | ||
| 130 | /* | ||
| 131 | * These are the only things you should do on a core-file: use only these | ||
| 132 | * macros to write out all the necessary info. | ||
| 133 | */ | ||
| 134 | |||
| 135 | #include <linux/coredump.h> | ||
| 136 | |||
| 137 | #define START_DATA(u) (u.u_tsize << PAGE_SHIFT) | ||
| 138 | #define START_STACK(u) (u.start_stack) | ||
| 139 | |||
| 140 | /* | ||
| 141 | * Routine writes a core dump image in the current directory. | ||
| 142 | * Currently only a stub-function. | ||
| 143 | * | ||
| 144 | * Note that setuid/setgid files won't make a core-dump if the uid/gid | ||
| 145 | * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable" | ||
| 146 | * field, which also makes sure the core-dumps won't be recursive if the | ||
| 147 | * dumping of the process results in another error.. | ||
| 148 | */ | ||
| 149 | |||
| 150 | static int aout_core_dump(struct coredump_params *cprm) | ||
| 151 | { | ||
| 152 | mm_segment_t fs; | ||
| 153 | int has_dumped = 0; | ||
| 154 | unsigned long dump_start, dump_size; | ||
| 155 | struct user32 dump; | ||
| 156 | |||
| 157 | fs = get_fs(); | ||
| 158 | set_fs(KERNEL_DS); | ||
| 159 | has_dumped = 1; | ||
| 160 | strncpy(dump.u_comm, current->comm, sizeof(current->comm)); | ||
| 161 | dump.u_ar0 = offsetof(struct user32, regs); | ||
| 162 | dump.signal = cprm->siginfo->si_signo; | ||
| 163 | dump_thread32(cprm->regs, &dump); | ||
| 164 | |||
| 165 | /* | ||
| 166 | * If the size of the dump file exceeds the rlimit, then see | ||
| 167 | * what would happen if we wrote the stack, but not the data | ||
| 168 | * area. | ||
| 169 | */ | ||
| 170 | if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > cprm->limit) | ||
| 171 | dump.u_dsize = 0; | ||
| 172 | |||
| 173 | /* Make sure we have enough room to write the stack and data areas. */ | ||
| 174 | if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit) | ||
| 175 | dump.u_ssize = 0; | ||
| 176 | |||
| 177 | /* make sure we actually have a data and stack area to dump */ | ||
| 178 | set_fs(USER_DS); | ||
| 179 | if (!access_ok((void *) (unsigned long)START_DATA(dump), | ||
| 180 | dump.u_dsize << PAGE_SHIFT)) | ||
| 181 | dump.u_dsize = 0; | ||
| 182 | if (!access_ok((void *) (unsigned long)START_STACK(dump), | ||
| 183 | dump.u_ssize << PAGE_SHIFT)) | ||
| 184 | dump.u_ssize = 0; | ||
| 185 | |||
| 186 | set_fs(KERNEL_DS); | ||
| 187 | /* struct user */ | ||
| 188 | if (!dump_emit(cprm, &dump, sizeof(dump))) | ||
| 189 | goto end_coredump; | ||
| 190 | /* Now dump all of the user data. Include malloced stuff as well */ | ||
| 191 | if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump))) | ||
| 192 | goto end_coredump; | ||
| 193 | /* now we start writing out the user space info */ | ||
| 194 | set_fs(USER_DS); | ||
| 195 | /* Dump the data area */ | ||
| 196 | if (dump.u_dsize != 0) { | ||
| 197 | dump_start = START_DATA(dump); | ||
| 198 | dump_size = dump.u_dsize << PAGE_SHIFT; | ||
| 199 | if (!dump_emit(cprm, (void *)dump_start, dump_size)) | ||
| 200 | goto end_coredump; | ||
| 201 | } | ||
| 202 | /* Now prepare to dump the stack area */ | ||
| 203 | if (dump.u_ssize != 0) { | ||
| 204 | dump_start = START_STACK(dump); | ||
| 205 | dump_size = dump.u_ssize << PAGE_SHIFT; | ||
| 206 | if (!dump_emit(cprm, (void *)dump_start, dump_size)) | ||
| 207 | goto end_coredump; | ||
| 208 | } | ||
| 209 | end_coredump: | ||
| 210 | set_fs(fs); | ||
| 211 | return has_dumped; | ||
| 212 | } | ||
| 213 | #endif | ||
| 214 | 57 | ||
| 215 | /* | 58 | /* |
| 216 | * create_aout_tables() parses the env- and arg-strings in new user | 59 | * create_aout_tables() parses the env- and arg-strings in new user |
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h deleted file mode 100644 index 7d3ece8bfb61..000000000000 --- a/arch/x86/include/asm/a.out-core.h +++ /dev/null | |||
| @@ -1,67 +0,0 @@ | |||
| 1 | /* a.out coredump register dumper | ||
| 2 | * | ||
| 3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #ifndef _ASM_X86_A_OUT_CORE_H | ||
| 13 | #define _ASM_X86_A_OUT_CORE_H | ||
| 14 | |||
| 15 | #ifdef __KERNEL__ | ||
| 16 | #ifdef CONFIG_X86_32 | ||
| 17 | |||
| 18 | #include <linux/user.h> | ||
| 19 | #include <linux/elfcore.h> | ||
| 20 | #include <linux/mm_types.h> | ||
| 21 | |||
| 22 | #include <asm/debugreg.h> | ||
| 23 | |||
| 24 | /* | ||
| 25 | * fill in the user structure for an a.out core dump | ||
| 26 | */ | ||
| 27 | static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) | ||
| 28 | { | ||
| 29 | /* changed the size calculations - should hopefully work better. lbt */ | ||
| 30 | dump->magic = CMAGIC; | ||
| 31 | dump->start_code = 0; | ||
| 32 | dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); | ||
| 33 | dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT; | ||
| 34 | dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1))) | ||
| 35 | >> PAGE_SHIFT; | ||
| 36 | dump->u_dsize -= dump->u_tsize; | ||
| 37 | dump->u_ssize = 0; | ||
| 38 | aout_dump_debugregs(dump); | ||
| 39 | |||
| 40 | if (dump->start_stack < TASK_SIZE) | ||
| 41 | dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) | ||
| 42 | >> PAGE_SHIFT; | ||
| 43 | |||
| 44 | dump->regs.bx = regs->bx; | ||
| 45 | dump->regs.cx = regs->cx; | ||
| 46 | dump->regs.dx = regs->dx; | ||
| 47 | dump->regs.si = regs->si; | ||
| 48 | dump->regs.di = regs->di; | ||
| 49 | dump->regs.bp = regs->bp; | ||
| 50 | dump->regs.ax = regs->ax; | ||
| 51 | dump->regs.ds = (u16)regs->ds; | ||
| 52 | dump->regs.es = (u16)regs->es; | ||
| 53 | dump->regs.fs = (u16)regs->fs; | ||
| 54 | dump->regs.gs = get_user_gs(regs); | ||
| 55 | dump->regs.orig_ax = regs->orig_ax; | ||
| 56 | dump->regs.ip = regs->ip; | ||
| 57 | dump->regs.cs = (u16)regs->cs; | ||
| 58 | dump->regs.flags = regs->flags; | ||
| 59 | dump->regs.sp = regs->sp; | ||
| 60 | dump->regs.ss = (u16)regs->ss; | ||
| 61 | |||
| 62 | dump->u_fpvalid = dump_fpu(regs, &dump->i387); | ||
| 63 | } | ||
| 64 | |||
| 65 | #endif /* CONFIG_X86_32 */ | ||
| 66 | #endif /* __KERNEL__ */ | ||
| 67 | #endif /* _ASM_X86_A_OUT_CORE_H */ | ||
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 705dafc2d11a..2bdbbbcfa393 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h | |||
| @@ -841,7 +841,7 @@ union hv_gpa_page_range { | |||
| 841 | * count is equal with how many entries of union hv_gpa_page_range can | 841 | * count is equal with how many entries of union hv_gpa_page_range can |
| 842 | * be populated into the input parameter page. | 842 | * be populated into the input parameter page. |
| 843 | */ | 843 | */ |
| 844 | #define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) / \ | 844 | #define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) / \ |
| 845 | sizeof(union hv_gpa_page_range)) | 845 | sizeof(union hv_gpa_page_range)) |
| 846 | 846 | ||
| 847 | struct hv_guest_mapping_flush_list { | 847 | struct hv_guest_mapping_flush_list { |
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index d9a9993af882..9f15384c504a 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h | |||
| @@ -52,6 +52,8 @@ | |||
| 52 | 52 | ||
| 53 | #define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 | 53 | #define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 |
| 54 | 54 | ||
| 55 | #define INTEL_FAM6_ICELAKE_MOBILE 0x7E | ||
| 56 | |||
| 55 | /* "Small Core" Processors (Atom) */ | 57 | /* "Small Core" Processors (Atom) */ |
| 56 | 58 | ||
| 57 | #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ | 59 | #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4660ce90de7f..180373360e34 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -299,6 +299,7 @@ union kvm_mmu_extended_role { | |||
| 299 | unsigned int cr4_smap:1; | 299 | unsigned int cr4_smap:1; |
| 300 | unsigned int cr4_smep:1; | 300 | unsigned int cr4_smep:1; |
| 301 | unsigned int cr4_la57:1; | 301 | unsigned int cr4_la57:1; |
| 302 | unsigned int maxphyaddr:6; | ||
| 302 | }; | 303 | }; |
| 303 | }; | 304 | }; |
| 304 | 305 | ||
| @@ -397,6 +398,7 @@ struct kvm_mmu { | |||
| 397 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 398 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
| 398 | u64 *spte, const void *pte); | 399 | u64 *spte, const void *pte); |
| 399 | hpa_t root_hpa; | 400 | hpa_t root_hpa; |
| 401 | gpa_t root_cr3; | ||
| 400 | union kvm_mmu_role mmu_role; | 402 | union kvm_mmu_role mmu_role; |
| 401 | u8 root_level; | 403 | u8 root_level; |
| 402 | u8 shadow_root_level; | 404 | u8 shadow_root_level; |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 40616e805292..2779ace16d23 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
| @@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
| 1065 | static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | 1065 | static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, |
| 1066 | pmd_t *pmdp, pmd_t pmd) | 1066 | pmd_t *pmdp, pmd_t pmd) |
| 1067 | { | 1067 | { |
| 1068 | native_set_pmd(pmdp, pmd); | 1068 | set_pmd(pmdp, pmd); |
| 1069 | } | 1069 | } |
| 1070 | 1070 | ||
| 1071 | static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, | 1071 | static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 780f2b42c8ef..5e49a0acb5ee 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | #define KERNEL_DS MAKE_MM_SEG(-1UL) | 25 | #define KERNEL_DS MAKE_MM_SEG(-1UL) |
| 26 | #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) | 26 | #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) |
| 27 | 27 | ||
| 28 | #define get_ds() (KERNEL_DS) | ||
| 29 | #define get_fs() (current->thread.addr_limit) | 28 | #define get_fs() (current->thread.addr_limit) |
| 30 | static inline void set_fs(mm_segment_t fs) | 29 | static inline void set_fs(mm_segment_t fs) |
| 31 | { | 30 | { |
| @@ -284,7 +283,7 @@ do { \ | |||
| 284 | __put_user_goto(x, ptr, "l", "k", "ir", label); \ | 283 | __put_user_goto(x, ptr, "l", "k", "ir", label); \ |
| 285 | break; \ | 284 | break; \ |
| 286 | case 8: \ | 285 | case 8: \ |
| 287 | __put_user_goto_u64((__typeof__(*ptr))(x), ptr, label); \ | 286 | __put_user_goto_u64(x, ptr, label); \ |
| 288 | break; \ | 287 | break; \ |
| 289 | default: \ | 288 | default: \ |
| 290 | __put_user_bad(); \ | 289 | __put_user_bad(); \ |
| @@ -431,8 +430,10 @@ do { \ | |||
| 431 | ({ \ | 430 | ({ \ |
| 432 | __label__ __pu_label; \ | 431 | __label__ __pu_label; \ |
| 433 | int __pu_err = -EFAULT; \ | 432 | int __pu_err = -EFAULT; \ |
| 433 | __typeof__(*(ptr)) __pu_val; \ | ||
| 434 | __pu_val = x; \ | ||
| 434 | __uaccess_begin(); \ | 435 | __uaccess_begin(); \ |
| 435 | __put_user_size((x), (ptr), (size), __pu_label); \ | 436 | __put_user_size(__pu_val, (ptr), (size), __pu_label); \ |
| 436 | __pu_err = 0; \ | 437 | __pu_err = 0; \ |
| 437 | __pu_label: \ | 438 | __pu_label: \ |
| 438 | __uaccess_end(); \ | 439 | __uaccess_end(); \ |
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index e652a7cc6186..3f697a9e3f59 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h | |||
| @@ -48,7 +48,8 @@ enum { | |||
| 48 | BIOS_STATUS_SUCCESS = 0, | 48 | BIOS_STATUS_SUCCESS = 0, |
| 49 | BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, | 49 | BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, |
| 50 | BIOS_STATUS_EINVAL = -EINVAL, | 50 | BIOS_STATUS_EINVAL = -EINVAL, |
| 51 | BIOS_STATUS_UNAVAIL = -EBUSY | 51 | BIOS_STATUS_UNAVAIL = -EBUSY, |
| 52 | BIOS_STATUS_ABORT = -EINTR, | ||
| 52 | }; | 53 | }; |
| 53 | 54 | ||
| 54 | /* Address map parameters */ | 55 | /* Address map parameters */ |
| @@ -167,4 +168,9 @@ extern long system_serial_number; | |||
| 167 | 168 | ||
| 168 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ | 169 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ |
| 169 | 170 | ||
| 171 | /* | ||
| 172 | * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details | ||
| 173 | */ | ||
| 174 | extern struct semaphore __efi_uv_runtime_lock; | ||
| 175 | |||
| 170 | #endif /* _ASM_X86_UV_BIOS_H */ | 176 | #endif /* _ASM_X86_UV_BIOS_H */ |
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index f6648e9928b3..efe701b7c6ce 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild | |||
| @@ -3,3 +3,4 @@ include include/uapi/asm-generic/Kbuild.asm | |||
| 3 | generated-y += unistd_32.h | 3 | generated-y += unistd_32.h |
| 4 | generated-y += unistd_64.h | 4 | generated-y += unistd_64.h |
| 5 | generated-y += unistd_x32.h | 5 | generated-y += unistd_x32.h |
| 6 | generic-y += socket.h | ||
diff --git a/arch/x86/include/uapi/asm/socket.h b/arch/x86/include/uapi/asm/socket.h deleted file mode 100644 index 6b71384b9d8b..000000000000 --- a/arch/x86/include/uapi/asm/socket.h +++ /dev/null | |||
| @@ -1 +0,0 @@ | |||
| 1 | #include <asm-generic/socket.h> | ||
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 672c7225cb1b..6ce290c506d9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c | |||
| @@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, | |||
| 784 | quirk_no_way_out(i, m, regs); | 784 | quirk_no_way_out(i, m, regs); |
| 785 | 785 | ||
| 786 | if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { | 786 | if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { |
| 787 | m->bank = i; | ||
| 787 | mce_read_aux(m, i); | 788 | mce_read_aux(m, i); |
| 788 | *msg = tmp; | 789 | *msg = tmp; |
| 789 | return 1; | 790 | return 1; |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bbffa6c54697..c07958b59f50 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -335,6 +335,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 335 | unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; | 335 | unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; |
| 336 | unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; | 336 | unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; |
| 337 | unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; | 337 | unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; |
| 338 | unsigned f_la57 = 0; | ||
| 338 | 339 | ||
| 339 | /* cpuid 1.edx */ | 340 | /* cpuid 1.edx */ |
| 340 | const u32 kvm_cpuid_1_edx_x86_features = | 341 | const u32 kvm_cpuid_1_edx_x86_features = |
| @@ -489,7 +490,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 489 | // TSC_ADJUST is emulated | 490 | // TSC_ADJUST is emulated |
| 490 | entry->ebx |= F(TSC_ADJUST); | 491 | entry->ebx |= F(TSC_ADJUST); |
| 491 | entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; | 492 | entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; |
| 493 | f_la57 = entry->ecx & F(LA57); | ||
| 492 | cpuid_mask(&entry->ecx, CPUID_7_ECX); | 494 | cpuid_mask(&entry->ecx, CPUID_7_ECX); |
| 495 | /* Set LA57 based on hardware capability. */ | ||
| 496 | entry->ecx |= f_la57; | ||
| 493 | entry->ecx |= f_umip; | 497 | entry->ecx |= f_umip; |
| 494 | /* PKU is not yet implemented for shadow paging. */ | 498 | /* PKU is not yet implemented for shadow paging. */ |
| 495 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) | 499 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index da9c42349b1f..f2d1d230d5b8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -3555,6 +3555,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
| 3555 | &invalid_list); | 3555 | &invalid_list); |
| 3556 | mmu->root_hpa = INVALID_PAGE; | 3556 | mmu->root_hpa = INVALID_PAGE; |
| 3557 | } | 3557 | } |
| 3558 | mmu->root_cr3 = 0; | ||
| 3558 | } | 3559 | } |
| 3559 | 3560 | ||
| 3560 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3561 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
| @@ -3610,6 +3611,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
| 3610 | vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); | 3611 | vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); |
| 3611 | } else | 3612 | } else |
| 3612 | BUG(); | 3613 | BUG(); |
| 3614 | vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu); | ||
| 3613 | 3615 | ||
| 3614 | return 0; | 3616 | return 0; |
| 3615 | } | 3617 | } |
| @@ -3618,10 +3620,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3618 | { | 3620 | { |
| 3619 | struct kvm_mmu_page *sp; | 3621 | struct kvm_mmu_page *sp; |
| 3620 | u64 pdptr, pm_mask; | 3622 | u64 pdptr, pm_mask; |
| 3621 | gfn_t root_gfn; | 3623 | gfn_t root_gfn, root_cr3; |
| 3622 | int i; | 3624 | int i; |
| 3623 | 3625 | ||
| 3624 | root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT; | 3626 | root_cr3 = vcpu->arch.mmu->get_cr3(vcpu); |
| 3627 | root_gfn = root_cr3 >> PAGE_SHIFT; | ||
| 3625 | 3628 | ||
| 3626 | if (mmu_check_root(vcpu, root_gfn)) | 3629 | if (mmu_check_root(vcpu, root_gfn)) |
| 3627 | return 1; | 3630 | return 1; |
| @@ -3646,7 +3649,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3646 | ++sp->root_count; | 3649 | ++sp->root_count; |
| 3647 | spin_unlock(&vcpu->kvm->mmu_lock); | 3650 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 3648 | vcpu->arch.mmu->root_hpa = root; | 3651 | vcpu->arch.mmu->root_hpa = root; |
| 3649 | return 0; | 3652 | goto set_root_cr3; |
| 3650 | } | 3653 | } |
| 3651 | 3654 | ||
| 3652 | /* | 3655 | /* |
| @@ -3712,6 +3715,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3712 | vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); | 3715 | vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); |
| 3713 | } | 3716 | } |
| 3714 | 3717 | ||
| 3718 | set_root_cr3: | ||
| 3719 | vcpu->arch.mmu->root_cr3 = root_cr3; | ||
| 3720 | |||
| 3715 | return 0; | 3721 | return 0; |
| 3716 | } | 3722 | } |
| 3717 | 3723 | ||
| @@ -4163,7 +4169,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, | |||
| 4163 | struct kvm_mmu_root_info root; | 4169 | struct kvm_mmu_root_info root; |
| 4164 | struct kvm_mmu *mmu = vcpu->arch.mmu; | 4170 | struct kvm_mmu *mmu = vcpu->arch.mmu; |
| 4165 | 4171 | ||
| 4166 | root.cr3 = mmu->get_cr3(vcpu); | 4172 | root.cr3 = mmu->root_cr3; |
| 4167 | root.hpa = mmu->root_hpa; | 4173 | root.hpa = mmu->root_hpa; |
| 4168 | 4174 | ||
| 4169 | for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { | 4175 | for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { |
| @@ -4176,6 +4182,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, | |||
| 4176 | } | 4182 | } |
| 4177 | 4183 | ||
| 4178 | mmu->root_hpa = root.hpa; | 4184 | mmu->root_hpa = root.hpa; |
| 4185 | mmu->root_cr3 = root.cr3; | ||
| 4179 | 4186 | ||
| 4180 | return i < KVM_MMU_NUM_PREV_ROOTS; | 4187 | return i < KVM_MMU_NUM_PREV_ROOTS; |
| 4181 | } | 4188 | } |
| @@ -4770,6 +4777,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu) | |||
| 4770 | ext.cr4_pse = !!is_pse(vcpu); | 4777 | ext.cr4_pse = !!is_pse(vcpu); |
| 4771 | ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE); | 4778 | ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE); |
| 4772 | ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57); | 4779 | ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57); |
| 4780 | ext.maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
| 4773 | 4781 | ||
| 4774 | ext.valid = 1; | 4782 | ext.valid = 1; |
| 4775 | 4783 | ||
| @@ -5516,11 +5524,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) | |||
| 5516 | vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; | 5524 | vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; |
| 5517 | 5525 | ||
| 5518 | vcpu->arch.root_mmu.root_hpa = INVALID_PAGE; | 5526 | vcpu->arch.root_mmu.root_hpa = INVALID_PAGE; |
| 5527 | vcpu->arch.root_mmu.root_cr3 = 0; | ||
| 5519 | vcpu->arch.root_mmu.translate_gpa = translate_gpa; | 5528 | vcpu->arch.root_mmu.translate_gpa = translate_gpa; |
| 5520 | for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) | 5529 | for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) |
| 5521 | vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; | 5530 | vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; |
| 5522 | 5531 | ||
| 5523 | vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE; | 5532 | vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE; |
| 5533 | vcpu->arch.guest_mmu.root_cr3 = 0; | ||
| 5524 | vcpu->arch.guest_mmu.translate_gpa = translate_gpa; | 5534 | vcpu->arch.guest_mmu.translate_gpa = translate_gpa; |
| 5525 | for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) | 5535 | for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) |
| 5526 | vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; | 5536 | vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; |
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d8ea4ebd79e7..d737a51a53ca 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c | |||
| @@ -2473,6 +2473,10 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu, | |||
| 2473 | (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) | 2473 | (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) |
| 2474 | return -EINVAL; | 2474 | return -EINVAL; |
| 2475 | 2475 | ||
| 2476 | if (!nested_cpu_has_preemption_timer(vmcs12) && | ||
| 2477 | nested_cpu_has_save_preemption_timer(vmcs12)) | ||
| 2478 | return -EINVAL; | ||
| 2479 | |||
| 2476 | if (nested_cpu_has_ept(vmcs12) && | 2480 | if (nested_cpu_has_ept(vmcs12) && |
| 2477 | !valid_ept_address(vcpu, vmcs12->ept_pointer)) | 2481 | !valid_ept_address(vcpu, vmcs12->ept_pointer)) |
| 2478 | return -EINVAL; | 2482 | return -EINVAL; |
| @@ -5557,9 +5561,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, | |||
| 5557 | * secondary cpu-based controls. Do not include those that | 5561 | * secondary cpu-based controls. Do not include those that |
| 5558 | * depend on CPUID bits, they are added later by vmx_cpuid_update. | 5562 | * depend on CPUID bits, they are added later by vmx_cpuid_update. |
| 5559 | */ | 5563 | */ |
| 5560 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | 5564 | if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) |
| 5561 | msrs->secondary_ctls_low, | 5565 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, |
| 5562 | msrs->secondary_ctls_high); | 5566 | msrs->secondary_ctls_low, |
| 5567 | msrs->secondary_ctls_high); | ||
| 5568 | |||
| 5563 | msrs->secondary_ctls_low = 0; | 5569 | msrs->secondary_ctls_low = 0; |
| 5564 | msrs->secondary_ctls_high &= | 5570 | msrs->secondary_ctls_high &= |
| 5565 | SECONDARY_EXEC_DESC | | 5571 | SECONDARY_EXEC_DESC | |
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 95d618045001..30a6bcd735ec 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
| @@ -863,7 +863,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
| 863 | if (!entry_only) | 863 | if (!entry_only) |
| 864 | j = find_msr(&m->host, msr); | 864 | j = find_msr(&m->host, msr); |
| 865 | 865 | ||
| 866 | if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) { | 866 | if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) || |
| 867 | (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) { | ||
| 867 | printk_once(KERN_WARNING "Not enough msr switch entries. " | 868 | printk_once(KERN_WARNING "Not enough msr switch entries. " |
| 868 | "Can't add msr %x\n", msr); | 869 | "Can't add msr %x\n", msr); |
| 869 | return; | 870 | return; |
| @@ -1193,21 +1194,6 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 1193 | if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) | 1194 | if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) |
| 1194 | return; | 1195 | return; |
| 1195 | 1196 | ||
| 1196 | /* | ||
| 1197 | * First handle the simple case where no cmpxchg is necessary; just | ||
| 1198 | * allow posting non-urgent interrupts. | ||
| 1199 | * | ||
| 1200 | * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change | ||
| 1201 | * PI.NDST: pi_post_block will do it for us and the wakeup_handler | ||
| 1202 | * expects the VCPU to be on the blocked_vcpu_list that matches | ||
| 1203 | * PI.NDST. | ||
| 1204 | */ | ||
| 1205 | if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || | ||
| 1206 | vcpu->cpu == cpu) { | ||
| 1207 | pi_clear_sn(pi_desc); | ||
| 1208 | return; | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | /* The full case. */ | 1197 | /* The full case. */ |
| 1212 | do { | 1198 | do { |
| 1213 | old.control = new.control = pi_desc->control; | 1199 | old.control = new.control = pi_desc->control; |
| @@ -1222,6 +1208,17 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 1222 | new.sn = 0; | 1208 | new.sn = 0; |
| 1223 | } while (cmpxchg64(&pi_desc->control, old.control, | 1209 | } while (cmpxchg64(&pi_desc->control, old.control, |
| 1224 | new.control) != old.control); | 1210 | new.control) != old.control); |
| 1211 | |||
| 1212 | /* | ||
| 1213 | * Clear SN before reading the bitmap. The VT-d firmware | ||
| 1214 | * writes the bitmap and reads SN atomically (5.2.3 in the | ||
| 1215 | * spec), so it doesn't really have a memory barrier that | ||
| 1216 | * pairs with this, but we cannot do that and we need one. | ||
| 1217 | */ | ||
| 1218 | smp_mb__after_atomic(); | ||
| 1219 | |||
| 1220 | if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS)) | ||
| 1221 | pi_set_on(pi_desc); | ||
| 1225 | } | 1222 | } |
| 1226 | 1223 | ||
| 1227 | /* | 1224 | /* |
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 99328954c2fc..0ac0a64c7790 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h | |||
| @@ -337,16 +337,16 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | |||
| 337 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); | 337 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); |
| 338 | } | 338 | } |
| 339 | 339 | ||
| 340 | static inline void pi_clear_sn(struct pi_desc *pi_desc) | 340 | static inline void pi_set_sn(struct pi_desc *pi_desc) |
| 341 | { | 341 | { |
| 342 | return clear_bit(POSTED_INTR_SN, | 342 | return set_bit(POSTED_INTR_SN, |
| 343 | (unsigned long *)&pi_desc->control); | 343 | (unsigned long *)&pi_desc->control); |
| 344 | } | 344 | } |
| 345 | 345 | ||
| 346 | static inline void pi_set_sn(struct pi_desc *pi_desc) | 346 | static inline void pi_set_on(struct pi_desc *pi_desc) |
| 347 | { | 347 | { |
| 348 | return set_bit(POSTED_INTR_SN, | 348 | set_bit(POSTED_INTR_ON, |
| 349 | (unsigned long *)&pi_desc->control); | 349 | (unsigned long *)&pi_desc->control); |
| 350 | } | 350 | } |
| 351 | 351 | ||
| 352 | static inline void pi_clear_on(struct pi_desc *pi_desc) | 352 | static inline void pi_clear_on(struct pi_desc *pi_desc) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e67ecf25e690..941f932373d0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -7801,7 +7801,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 7801 | * 1) We should set ->mode before checking ->requests. Please see | 7801 | * 1) We should set ->mode before checking ->requests. Please see |
| 7802 | * the comment in kvm_vcpu_exiting_guest_mode(). | 7802 | * the comment in kvm_vcpu_exiting_guest_mode(). |
| 7803 | * | 7803 | * |
| 7804 | * 2) For APICv, we should set ->mode before checking PIR.ON. This | 7804 | * 2) For APICv, we should set ->mode before checking PID.ON. This |
| 7805 | * pairs with the memory barrier implicit in pi_test_and_set_on | 7805 | * pairs with the memory barrier implicit in pi_test_and_set_on |
| 7806 | * (see vmx_deliver_posted_interrupt). | 7806 | * (see vmx_deliver_posted_interrupt). |
| 7807 | * | 7807 | * |
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 6521134057e8..3c4568f8fb28 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
| @@ -117,67 +117,12 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, | |||
| 117 | } | 117 | } |
| 118 | EXPORT_SYMBOL_GPL(ex_handler_fprestore); | 118 | EXPORT_SYMBOL_GPL(ex_handler_fprestore); |
| 119 | 119 | ||
| 120 | /* Helper to check whether a uaccess fault indicates a kernel bug. */ | ||
| 121 | static bool bogus_uaccess(struct pt_regs *regs, int trapnr, | ||
| 122 | unsigned long fault_addr) | ||
| 123 | { | ||
| 124 | /* This is the normal case: #PF with a fault address in userspace. */ | ||
| 125 | if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX) | ||
| 126 | return false; | ||
| 127 | |||
| 128 | /* | ||
| 129 | * This code can be reached for machine checks, but only if the #MC | ||
| 130 | * handler has already decided that it looks like a candidate for fixup. | ||
| 131 | * This e.g. happens when attempting to access userspace memory which | ||
| 132 | * the CPU can't access because of uncorrectable bad memory. | ||
| 133 | */ | ||
| 134 | if (trapnr == X86_TRAP_MC) | ||
| 135 | return false; | ||
| 136 | |||
| 137 | /* | ||
| 138 | * There are two remaining exception types we might encounter here: | ||
| 139 | * - #PF for faulting accesses to kernel addresses | ||
| 140 | * - #GP for faulting accesses to noncanonical addresses | ||
| 141 | * Complain about anything else. | ||
| 142 | */ | ||
| 143 | if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) { | ||
| 144 | WARN(1, "unexpected trap %d in uaccess\n", trapnr); | ||
| 145 | return false; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* | ||
| 149 | * This is a faulting memory access in kernel space, on a kernel | ||
| 150 | * address, in a usercopy function. This can e.g. be caused by improper | ||
| 151 | * use of helpers like __put_user and by improper attempts to access | ||
| 152 | * userspace addresses in KERNEL_DS regions. | ||
| 153 | * The one (semi-)legitimate exception are probe_kernel_{read,write}(), | ||
| 154 | * which can be invoked from places like kgdb, /dev/mem (for reading) | ||
| 155 | * and privileged BPF code (for reading). | ||
| 156 | * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag | ||
| 157 | * to tell us that faulting on kernel addresses, and even noncanonical | ||
| 158 | * addresses, in a userspace accessor does not necessarily imply a | ||
| 159 | * kernel bug, root might just be doing weird stuff. | ||
| 160 | */ | ||
| 161 | if (current->kernel_uaccess_faults_ok) | ||
| 162 | return false; | ||
| 163 | |||
| 164 | /* This is bad. Refuse the fixup so that we go into die(). */ | ||
| 165 | if (trapnr == X86_TRAP_PF) { | ||
| 166 | pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n", | ||
| 167 | fault_addr); | ||
| 168 | } else { | ||
| 169 | pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n"); | ||
| 170 | } | ||
| 171 | return true; | ||
| 172 | } | ||
| 173 | |||
| 174 | __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, | 120 | __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, |
| 175 | struct pt_regs *regs, int trapnr, | 121 | struct pt_regs *regs, int trapnr, |
| 176 | unsigned long error_code, | 122 | unsigned long error_code, |
| 177 | unsigned long fault_addr) | 123 | unsigned long fault_addr) |
| 178 | { | 124 | { |
| 179 | if (bogus_uaccess(regs, trapnr, fault_addr)) | 125 | WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); |
| 180 | return false; | ||
| 181 | regs->ip = ex_fixup_addr(fixup); | 126 | regs->ip = ex_fixup_addr(fixup); |
| 182 | return true; | 127 | return true; |
| 183 | } | 128 | } |
| @@ -188,8 +133,6 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup, | |||
| 188 | unsigned long error_code, | 133 | unsigned long error_code, |
| 189 | unsigned long fault_addr) | 134 | unsigned long fault_addr) |
| 190 | { | 135 | { |
| 191 | if (bogus_uaccess(regs, trapnr, fault_addr)) | ||
| 192 | return false; | ||
| 193 | /* Special hack for uaccess_err */ | 136 | /* Special hack for uaccess_err */ |
| 194 | current->thread.uaccess_err = 1; | 137 | current->thread.uaccess_err = 1; |
| 195 | regs->ip = ex_fixup_addr(fixup); | 138 | regs->ip = ex_fixup_addr(fixup); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4f8972311a77..14e6119838a6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn) | |||
| 230 | 230 | ||
| 231 | #endif | 231 | #endif |
| 232 | 232 | ||
| 233 | /* | ||
| 234 | * See set_mce_nospec(). | ||
| 235 | * | ||
| 236 | * Machine check recovery code needs to change cache mode of poisoned pages to | ||
| 237 | * UC to avoid speculative access logging another error. But passing the | ||
| 238 | * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a | ||
| 239 | * speculative access. So we cheat and flip the top bit of the address. This | ||
| 240 | * works fine for the code that updates the page tables. But at the end of the | ||
| 241 | * process we need to flush the TLB and cache and the non-canonical address | ||
| 242 | * causes a #GP fault when used by the INVLPG and CLFLUSH instructions. | ||
| 243 | * | ||
| 244 | * But in the common case we already have a canonical address. This code | ||
| 245 | * will fix the top bit if needed and is a no-op otherwise. | ||
| 246 | */ | ||
| 247 | static inline unsigned long fix_addr(unsigned long addr) | ||
| 248 | { | ||
| 249 | #ifdef CONFIG_X86_64 | ||
| 250 | return (long)(addr << 1) >> 1; | ||
| 251 | #else | ||
| 252 | return addr; | ||
| 253 | #endif | ||
| 254 | } | ||
| 255 | |||
| 233 | static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) | 256 | static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) |
| 234 | { | 257 | { |
| 235 | if (cpa->flags & CPA_PAGES_ARRAY) { | 258 | if (cpa->flags & CPA_PAGES_ARRAY) { |
| @@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data) | |||
| 313 | unsigned int i; | 336 | unsigned int i; |
| 314 | 337 | ||
| 315 | for (i = 0; i < cpa->numpages; i++) | 338 | for (i = 0; i < cpa->numpages; i++) |
| 316 | __flush_tlb_one_kernel(__cpa_addr(cpa, i)); | 339 | __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); |
| 317 | } | 340 | } |
| 318 | 341 | ||
| 319 | static void cpa_flush(struct cpa_data *data, int cache) | 342 | static void cpa_flush(struct cpa_data *data, int cache) |
| @@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache) | |||
| 347 | * Only flush present addresses: | 370 | * Only flush present addresses: |
| 348 | */ | 371 | */ |
| 349 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) | 372 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) |
| 350 | clflush_cache_range_opt((void *)addr, PAGE_SIZE); | 373 | clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); |
| 351 | } | 374 | } |
| 352 | mb(); | 375 | mb(); |
| 353 | } | 376 | } |
| @@ -1627,29 +1650,6 @@ out: | |||
| 1627 | return ret; | 1650 | return ret; |
| 1628 | } | 1651 | } |
| 1629 | 1652 | ||
| 1630 | /* | ||
| 1631 | * Machine check recovery code needs to change cache mode of poisoned | ||
| 1632 | * pages to UC to avoid speculative access logging another error. But | ||
| 1633 | * passing the address of the 1:1 mapping to set_memory_uc() is a fine | ||
| 1634 | * way to encourage a speculative access. So we cheat and flip the top | ||
| 1635 | * bit of the address. This works fine for the code that updates the | ||
| 1636 | * page tables. But at the end of the process we need to flush the cache | ||
| 1637 | * and the non-canonical address causes a #GP fault when used by the | ||
| 1638 | * CLFLUSH instruction. | ||
| 1639 | * | ||
| 1640 | * But in the common case we already have a canonical address. This code | ||
| 1641 | * will fix the top bit if needed and is a no-op otherwise. | ||
| 1642 | */ | ||
| 1643 | static inline unsigned long make_addr_canonical_again(unsigned long addr) | ||
| 1644 | { | ||
| 1645 | #ifdef CONFIG_X86_64 | ||
| 1646 | return (long)(addr << 1) >> 1; | ||
| 1647 | #else | ||
| 1648 | return addr; | ||
| 1649 | #endif | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | |||
| 1653 | static int change_page_attr_set_clr(unsigned long *addr, int numpages, | 1653 | static int change_page_attr_set_clr(unsigned long *addr, int numpages, |
| 1654 | pgprot_t mask_set, pgprot_t mask_clr, | 1654 | pgprot_t mask_set, pgprot_t mask_clr, |
| 1655 | int force_split, int in_flag, | 1655 | int force_split, int in_flag, |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5542303c43d9..afabf597c855 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
| @@ -881,20 +881,41 @@ xadd: if (is_imm8(insn->off)) | |||
| 881 | case BPF_JMP | BPF_JSLT | BPF_X: | 881 | case BPF_JMP | BPF_JSLT | BPF_X: |
| 882 | case BPF_JMP | BPF_JSGE | BPF_X: | 882 | case BPF_JMP | BPF_JSGE | BPF_X: |
| 883 | case BPF_JMP | BPF_JSLE | BPF_X: | 883 | case BPF_JMP | BPF_JSLE | BPF_X: |
| 884 | case BPF_JMP32 | BPF_JEQ | BPF_X: | ||
| 885 | case BPF_JMP32 | BPF_JNE | BPF_X: | ||
| 886 | case BPF_JMP32 | BPF_JGT | BPF_X: | ||
| 887 | case BPF_JMP32 | BPF_JLT | BPF_X: | ||
| 888 | case BPF_JMP32 | BPF_JGE | BPF_X: | ||
| 889 | case BPF_JMP32 | BPF_JLE | BPF_X: | ||
| 890 | case BPF_JMP32 | BPF_JSGT | BPF_X: | ||
| 891 | case BPF_JMP32 | BPF_JSLT | BPF_X: | ||
| 892 | case BPF_JMP32 | BPF_JSGE | BPF_X: | ||
| 893 | case BPF_JMP32 | BPF_JSLE | BPF_X: | ||
| 884 | /* cmp dst_reg, src_reg */ | 894 | /* cmp dst_reg, src_reg */ |
| 885 | EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, | 895 | if (BPF_CLASS(insn->code) == BPF_JMP) |
| 886 | add_2reg(0xC0, dst_reg, src_reg)); | 896 | EMIT1(add_2mod(0x48, dst_reg, src_reg)); |
| 897 | else if (is_ereg(dst_reg) || is_ereg(src_reg)) | ||
| 898 | EMIT1(add_2mod(0x40, dst_reg, src_reg)); | ||
| 899 | EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg)); | ||
| 887 | goto emit_cond_jmp; | 900 | goto emit_cond_jmp; |
| 888 | 901 | ||
| 889 | case BPF_JMP | BPF_JSET | BPF_X: | 902 | case BPF_JMP | BPF_JSET | BPF_X: |
| 903 | case BPF_JMP32 | BPF_JSET | BPF_X: | ||
| 890 | /* test dst_reg, src_reg */ | 904 | /* test dst_reg, src_reg */ |
| 891 | EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, | 905 | if (BPF_CLASS(insn->code) == BPF_JMP) |
| 892 | add_2reg(0xC0, dst_reg, src_reg)); | 906 | EMIT1(add_2mod(0x48, dst_reg, src_reg)); |
| 907 | else if (is_ereg(dst_reg) || is_ereg(src_reg)) | ||
| 908 | EMIT1(add_2mod(0x40, dst_reg, src_reg)); | ||
| 909 | EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg)); | ||
| 893 | goto emit_cond_jmp; | 910 | goto emit_cond_jmp; |
| 894 | 911 | ||
| 895 | case BPF_JMP | BPF_JSET | BPF_K: | 912 | case BPF_JMP | BPF_JSET | BPF_K: |
| 913 | case BPF_JMP32 | BPF_JSET | BPF_K: | ||
| 896 | /* test dst_reg, imm32 */ | 914 | /* test dst_reg, imm32 */ |
| 897 | EMIT1(add_1mod(0x48, dst_reg)); | 915 | if (BPF_CLASS(insn->code) == BPF_JMP) |
| 916 | EMIT1(add_1mod(0x48, dst_reg)); | ||
| 917 | else if (is_ereg(dst_reg)) | ||
| 918 | EMIT1(add_1mod(0x40, dst_reg)); | ||
| 898 | EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); | 919 | EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); |
| 899 | goto emit_cond_jmp; | 920 | goto emit_cond_jmp; |
| 900 | 921 | ||
| @@ -908,8 +929,21 @@ xadd: if (is_imm8(insn->off)) | |||
| 908 | case BPF_JMP | BPF_JSLT | BPF_K: | 929 | case BPF_JMP | BPF_JSLT | BPF_K: |
| 909 | case BPF_JMP | BPF_JSGE | BPF_K: | 930 | case BPF_JMP | BPF_JSGE | BPF_K: |
| 910 | case BPF_JMP | BPF_JSLE | BPF_K: | 931 | case BPF_JMP | BPF_JSLE | BPF_K: |
| 932 | case BPF_JMP32 | BPF_JEQ | BPF_K: | ||
| 933 | case BPF_JMP32 | BPF_JNE | BPF_K: | ||
| 934 | case BPF_JMP32 | BPF_JGT | BPF_K: | ||
| 935 | case BPF_JMP32 | BPF_JLT | BPF_K: | ||
| 936 | case BPF_JMP32 | BPF_JGE | BPF_K: | ||
| 937 | case BPF_JMP32 | BPF_JLE | BPF_K: | ||
| 938 | case BPF_JMP32 | BPF_JSGT | BPF_K: | ||
| 939 | case BPF_JMP32 | BPF_JSLT | BPF_K: | ||
| 940 | case BPF_JMP32 | BPF_JSGE | BPF_K: | ||
| 941 | case BPF_JMP32 | BPF_JSLE | BPF_K: | ||
| 911 | /* cmp dst_reg, imm8/32 */ | 942 | /* cmp dst_reg, imm8/32 */ |
| 912 | EMIT1(add_1mod(0x48, dst_reg)); | 943 | if (BPF_CLASS(insn->code) == BPF_JMP) |
| 944 | EMIT1(add_1mod(0x48, dst_reg)); | ||
| 945 | else if (is_ereg(dst_reg)) | ||
| 946 | EMIT1(add_1mod(0x40, dst_reg)); | ||
| 913 | 947 | ||
| 914 | if (is_imm8(imm32)) | 948 | if (is_imm8(imm32)) |
| 915 | EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); | 949 | EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); |
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 8f6cc71e0848..0d9cdffce6ac 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c | |||
| @@ -2072,7 +2072,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 2072 | case BPF_JMP | BPF_JSGT | BPF_X: | 2072 | case BPF_JMP | BPF_JSGT | BPF_X: |
| 2073 | case BPF_JMP | BPF_JSLE | BPF_X: | 2073 | case BPF_JMP | BPF_JSLE | BPF_X: |
| 2074 | case BPF_JMP | BPF_JSLT | BPF_X: | 2074 | case BPF_JMP | BPF_JSLT | BPF_X: |
| 2075 | case BPF_JMP | BPF_JSGE | BPF_X: { | 2075 | case BPF_JMP | BPF_JSGE | BPF_X: |
| 2076 | case BPF_JMP32 | BPF_JEQ | BPF_X: | ||
| 2077 | case BPF_JMP32 | BPF_JNE | BPF_X: | ||
| 2078 | case BPF_JMP32 | BPF_JGT | BPF_X: | ||
| 2079 | case BPF_JMP32 | BPF_JLT | BPF_X: | ||
| 2080 | case BPF_JMP32 | BPF_JGE | BPF_X: | ||
| 2081 | case BPF_JMP32 | BPF_JLE | BPF_X: | ||
| 2082 | case BPF_JMP32 | BPF_JSGT | BPF_X: | ||
| 2083 | case BPF_JMP32 | BPF_JSLE | BPF_X: | ||
| 2084 | case BPF_JMP32 | BPF_JSLT | BPF_X: | ||
| 2085 | case BPF_JMP32 | BPF_JSGE | BPF_X: { | ||
| 2086 | bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; | ||
| 2076 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | 2087 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; |
| 2077 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | 2088 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; |
| 2078 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | 2089 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; |
| @@ -2081,25 +2092,35 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 2081 | if (dstk) { | 2092 | if (dstk) { |
| 2082 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | 2093 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), |
| 2083 | STACK_VAR(dst_lo)); | 2094 | STACK_VAR(dst_lo)); |
| 2084 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | 2095 | if (is_jmp64) |
| 2085 | STACK_VAR(dst_hi)); | 2096 | EMIT3(0x8B, |
| 2097 | add_2reg(0x40, IA32_EBP, | ||
| 2098 | IA32_EDX), | ||
| 2099 | STACK_VAR(dst_hi)); | ||
| 2086 | } | 2100 | } |
| 2087 | 2101 | ||
| 2088 | if (sstk) { | 2102 | if (sstk) { |
| 2089 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | 2103 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), |
| 2090 | STACK_VAR(src_lo)); | 2104 | STACK_VAR(src_lo)); |
| 2091 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | 2105 | if (is_jmp64) |
| 2092 | STACK_VAR(src_hi)); | 2106 | EMIT3(0x8B, |
| 2107 | add_2reg(0x40, IA32_EBP, | ||
| 2108 | IA32_EBX), | ||
| 2109 | STACK_VAR(src_hi)); | ||
| 2093 | } | 2110 | } |
| 2094 | 2111 | ||
| 2095 | /* cmp dreg_hi,sreg_hi */ | 2112 | if (is_jmp64) { |
| 2096 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | 2113 | /* cmp dreg_hi,sreg_hi */ |
| 2097 | EMIT2(IA32_JNE, 2); | 2114 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); |
| 2115 | EMIT2(IA32_JNE, 2); | ||
| 2116 | } | ||
| 2098 | /* cmp dreg_lo,sreg_lo */ | 2117 | /* cmp dreg_lo,sreg_lo */ |
| 2099 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | 2118 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); |
| 2100 | goto emit_cond_jmp; | 2119 | goto emit_cond_jmp; |
| 2101 | } | 2120 | } |
| 2102 | case BPF_JMP | BPF_JSET | BPF_X: { | 2121 | case BPF_JMP | BPF_JSET | BPF_X: |
| 2122 | case BPF_JMP32 | BPF_JSET | BPF_X: { | ||
| 2123 | bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; | ||
| 2103 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | 2124 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; |
| 2104 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | 2125 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; |
| 2105 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | 2126 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; |
| @@ -2108,15 +2129,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 2108 | if (dstk) { | 2129 | if (dstk) { |
| 2109 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | 2130 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), |
| 2110 | STACK_VAR(dst_lo)); | 2131 | STACK_VAR(dst_lo)); |
| 2111 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | 2132 | if (is_jmp64) |
| 2112 | STACK_VAR(dst_hi)); | 2133 | EMIT3(0x8B, |
| 2134 | add_2reg(0x40, IA32_EBP, | ||
| 2135 | IA32_EDX), | ||
| 2136 | STACK_VAR(dst_hi)); | ||
| 2113 | } | 2137 | } |
| 2114 | 2138 | ||
| 2115 | if (sstk) { | 2139 | if (sstk) { |
| 2116 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | 2140 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), |
| 2117 | STACK_VAR(src_lo)); | 2141 | STACK_VAR(src_lo)); |
| 2118 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | 2142 | if (is_jmp64) |
| 2119 | STACK_VAR(src_hi)); | 2143 | EMIT3(0x8B, |
| 2144 | add_2reg(0x40, IA32_EBP, | ||
| 2145 | IA32_EBX), | ||
| 2146 | STACK_VAR(src_hi)); | ||
| 2120 | } | 2147 | } |
| 2121 | /* and dreg_lo,sreg_lo */ | 2148 | /* and dreg_lo,sreg_lo */ |
| 2122 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | 2149 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); |
| @@ -2126,32 +2153,39 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 2126 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | 2153 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); |
| 2127 | goto emit_cond_jmp; | 2154 | goto emit_cond_jmp; |
| 2128 | } | 2155 | } |
| 2129 | case BPF_JMP | BPF_JSET | BPF_K: { | 2156 | case BPF_JMP | BPF_JSET | BPF_K: |
| 2130 | u32 hi; | 2157 | case BPF_JMP32 | BPF_JSET | BPF_K: { |
| 2158 | bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; | ||
| 2131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | 2159 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; |
| 2132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | 2160 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; |
| 2133 | u8 sreg_lo = IA32_ECX; | 2161 | u8 sreg_lo = IA32_ECX; |
| 2134 | u8 sreg_hi = IA32_EBX; | 2162 | u8 sreg_hi = IA32_EBX; |
| 2163 | u32 hi; | ||
| 2135 | 2164 | ||
| 2136 | if (dstk) { | 2165 | if (dstk) { |
| 2137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | 2166 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), |
| 2138 | STACK_VAR(dst_lo)); | 2167 | STACK_VAR(dst_lo)); |
| 2139 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | 2168 | if (is_jmp64) |
| 2140 | STACK_VAR(dst_hi)); | 2169 | EMIT3(0x8B, |
| 2170 | add_2reg(0x40, IA32_EBP, | ||
| 2171 | IA32_EDX), | ||
| 2172 | STACK_VAR(dst_hi)); | ||
| 2141 | } | 2173 | } |
| 2142 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 2143 | 2174 | ||
| 2144 | /* mov ecx,imm32 */ | 2175 | /* mov ecx,imm32 */ |
| 2145 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | 2176 | EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32); |
| 2146 | /* mov ebx,imm32 */ | ||
| 2147 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
| 2148 | 2177 | ||
| 2149 | /* and dreg_lo,sreg_lo */ | 2178 | /* and dreg_lo,sreg_lo */ |
| 2150 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | 2179 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); |
| 2151 | /* and dreg_hi,sreg_hi */ | 2180 | if (is_jmp64) { |
| 2152 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | 2181 | hi = imm32 & (1 << 31) ? (u32)~0 : 0; |
| 2153 | /* or dreg_lo,dreg_hi */ | 2182 | /* mov ebx,imm32 */ |
| 2154 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | 2183 | EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi); |
| 2184 | /* and dreg_hi,sreg_hi */ | ||
| 2185 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
| 2186 | /* or dreg_lo,dreg_hi */ | ||
| 2187 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 2188 | } | ||
| 2155 | goto emit_cond_jmp; | 2189 | goto emit_cond_jmp; |
| 2156 | } | 2190 | } |
| 2157 | case BPF_JMP | BPF_JEQ | BPF_K: | 2191 | case BPF_JMP | BPF_JEQ | BPF_K: |
| @@ -2163,29 +2197,44 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 2163 | case BPF_JMP | BPF_JSGT | BPF_K: | 2197 | case BPF_JMP | BPF_JSGT | BPF_K: |
| 2164 | case BPF_JMP | BPF_JSLE | BPF_K: | 2198 | case BPF_JMP | BPF_JSLE | BPF_K: |
| 2165 | case BPF_JMP | BPF_JSLT | BPF_K: | 2199 | case BPF_JMP | BPF_JSLT | BPF_K: |
| 2166 | case BPF_JMP | BPF_JSGE | BPF_K: { | 2200 | case BPF_JMP | BPF_JSGE | BPF_K: |
| 2167 | u32 hi; | 2201 | case BPF_JMP32 | BPF_JEQ | BPF_K: |
| 2202 | case BPF_JMP32 | BPF_JNE | BPF_K: | ||
| 2203 | case BPF_JMP32 | BPF_JGT | BPF_K: | ||
| 2204 | case BPF_JMP32 | BPF_JLT | BPF_K: | ||
| 2205 | case BPF_JMP32 | BPF_JGE | BPF_K: | ||
| 2206 | case BPF_JMP32 | BPF_JLE | BPF_K: | ||
| 2207 | case BPF_JMP32 | BPF_JSGT | BPF_K: | ||
| 2208 | case BPF_JMP32 | BPF_JSLE | BPF_K: | ||
| 2209 | case BPF_JMP32 | BPF_JSLT | BPF_K: | ||
| 2210 | case BPF_JMP32 | BPF_JSGE | BPF_K: { | ||
| 2211 | bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; | ||
| 2168 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | 2212 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; |
| 2169 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | 2213 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; |
| 2170 | u8 sreg_lo = IA32_ECX; | 2214 | u8 sreg_lo = IA32_ECX; |
| 2171 | u8 sreg_hi = IA32_EBX; | 2215 | u8 sreg_hi = IA32_EBX; |
| 2216 | u32 hi; | ||
| 2172 | 2217 | ||
| 2173 | if (dstk) { | 2218 | if (dstk) { |
| 2174 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | 2219 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), |
| 2175 | STACK_VAR(dst_lo)); | 2220 | STACK_VAR(dst_lo)); |
| 2176 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | 2221 | if (is_jmp64) |
| 2177 | STACK_VAR(dst_hi)); | 2222 | EMIT3(0x8B, |
| 2223 | add_2reg(0x40, IA32_EBP, | ||
| 2224 | IA32_EDX), | ||
| 2225 | STACK_VAR(dst_hi)); | ||
| 2178 | } | 2226 | } |
| 2179 | 2227 | ||
| 2180 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 2181 | /* mov ecx,imm32 */ | 2228 | /* mov ecx,imm32 */ |
| 2182 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | 2229 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); |
| 2183 | /* mov ebx,imm32 */ | 2230 | if (is_jmp64) { |
| 2184 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | 2231 | hi = imm32 & (1 << 31) ? (u32)~0 : 0; |
| 2185 | 2232 | /* mov ebx,imm32 */ | |
| 2186 | /* cmp dreg_hi,sreg_hi */ | 2233 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); |
| 2187 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | 2234 | /* cmp dreg_hi,sreg_hi */ |
| 2188 | EMIT2(IA32_JNE, 2); | 2235 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); |
| 2236 | EMIT2(IA32_JNE, 2); | ||
| 2237 | } | ||
| 2189 | /* cmp dreg_lo,sreg_lo */ | 2238 | /* cmp dreg_lo,sreg_lo */ |
| 2190 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | 2239 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); |
| 2191 | 2240 | ||
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c index 96f438d4b026..1421d5330b2c 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c | |||
| @@ -44,7 +44,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = { | |||
| 44 | */ | 44 | */ |
| 45 | .microvolts = 2000000, /* 1.8V */ | 45 | .microvolts = 2000000, /* 1.8V */ |
| 46 | .startup_delay = 250 * 1000, /* 250ms */ | 46 | .startup_delay = 250 * 1000, /* 250ms */ |
| 47 | .enable_high = 1, /* active high */ | ||
| 48 | .enabled_at_boot = 0, /* disabled at boot */ | 47 | .enabled_at_boot = 0, /* disabled at boot */ |
| 49 | .init_data = &bcm43xx_vmmc_data, | 48 | .init_data = &bcm43xx_vmmc_data, |
| 50 | }; | 49 | }; |
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 4a6a5a26c582..eb33432f2f24 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c | |||
| @@ -29,7 +29,8 @@ | |||
| 29 | 29 | ||
| 30 | struct uv_systab *uv_systab; | 30 | struct uv_systab *uv_systab; |
| 31 | 31 | ||
| 32 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | 32 | static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, |
| 33 | u64 a4, u64 a5) | ||
| 33 | { | 34 | { |
| 34 | struct uv_systab *tab = uv_systab; | 35 | struct uv_systab *tab = uv_systab; |
| 35 | s64 ret; | 36 | s64 ret; |
| @@ -51,6 +52,19 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | |||
| 51 | 52 | ||
| 52 | return ret; | 53 | return ret; |
| 53 | } | 54 | } |
| 55 | |||
| 56 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | ||
| 57 | { | ||
| 58 | s64 ret; | ||
| 59 | |||
| 60 | if (down_interruptible(&__efi_uv_runtime_lock)) | ||
| 61 | return BIOS_STATUS_ABORT; | ||
| 62 | |||
| 63 | ret = __uv_bios_call(which, a1, a2, a3, a4, a5); | ||
| 64 | up(&__efi_uv_runtime_lock); | ||
| 65 | |||
| 66 | return ret; | ||
| 67 | } | ||
| 54 | EXPORT_SYMBOL_GPL(uv_bios_call); | 68 | EXPORT_SYMBOL_GPL(uv_bios_call); |
| 55 | 69 | ||
| 56 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | 70 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, |
| @@ -59,10 +73,15 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | |||
| 59 | unsigned long bios_flags; | 73 | unsigned long bios_flags; |
| 60 | s64 ret; | 74 | s64 ret; |
| 61 | 75 | ||
| 76 | if (down_interruptible(&__efi_uv_runtime_lock)) | ||
| 77 | return BIOS_STATUS_ABORT; | ||
| 78 | |||
| 62 | local_irq_save(bios_flags); | 79 | local_irq_save(bios_flags); |
| 63 | ret = uv_bios_call(which, a1, a2, a3, a4, a5); | 80 | ret = __uv_bios_call(which, a1, a2, a3, a4, a5); |
| 64 | local_irq_restore(bios_flags); | 81 | local_irq_restore(bios_flags); |
| 65 | 82 | ||
| 83 | up(&__efi_uv_runtime_lock); | ||
| 84 | |||
| 66 | return ret; | 85 | return ret; |
| 67 | } | 86 | } |
| 68 | 87 | ||
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index f518b4744ff8..494eeb51e4e1 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig | |||
| @@ -16,7 +16,6 @@ config 64BIT | |||
| 16 | 16 | ||
| 17 | config X86_32 | 17 | config X86_32 |
| 18 | def_bool !64BIT | 18 | def_bool !64BIT |
| 19 | select HAVE_AOUT | ||
| 20 | select ARCH_WANT_IPC_PARSE_VERSION | 19 | select ARCH_WANT_IPC_PARSE_VERSION |
| 21 | select MODULES_USE_ELF_REL | 20 | select MODULES_USE_ELF_REL |
| 22 | select CLONE_BACKWARDS | 21 | select CLONE_BACKWARDS |
