diff options
| -rw-r--r-- | arch/arm64/crypto/sha2-ce-core.S | 29 | ||||
| -rw-r--r-- | arch/arm64/crypto/sha2-ce-glue.c | 227 |
2 files changed, 63 insertions, 193 deletions
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..5df9d9d470ad 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S | |||
| @@ -73,8 +73,8 @@ | |||
| 73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | 73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 |
| 74 | 74 | ||
| 75 | /* | 75 | /* |
| 76 | * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 76 | * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
| 77 | * u8 *head, long bytes) | 77 | * int blocks) |
| 78 | */ | 78 | */ |
| 79 | ENTRY(sha2_ce_transform) | 79 | ENTRY(sha2_ce_transform) |
| 80 | /* load round constants */ | 80 | /* load round constants */ |
| @@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform) | |||
| 85 | ld1 {v12.4s-v15.4s}, [x8] | 85 | ld1 {v12.4s-v15.4s}, [x8] |
| 86 | 86 | ||
| 87 | /* load state */ | 87 | /* load state */ |
| 88 | ldp dga, dgb, [x2] | 88 | ldp dga, dgb, [x0] |
| 89 | 89 | ||
| 90 | /* load partial input (if supplied) */ | 90 | /* load sha256_ce_state::finalize */ |
| 91 | cbz x3, 0f | 91 | ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] |
| 92 | ld1 {v16.4s-v19.4s}, [x3] | ||
| 93 | b 1f | ||
| 94 | 92 | ||
| 95 | /* load input */ | 93 | /* load input */ |
| 96 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 | 94 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 |
| 97 | sub w0, w0, #1 | 95 | sub w2, w2, #1 |
| 98 | 96 | ||
| 99 | 1: | ||
| 100 | CPU_LE( rev32 v16.16b, v16.16b ) | 97 | CPU_LE( rev32 v16.16b, v16.16b ) |
| 101 | CPU_LE( rev32 v17.16b, v17.16b ) | 98 | CPU_LE( rev32 v17.16b, v17.16b ) |
| 102 | CPU_LE( rev32 v18.16b, v18.16b ) | 99 | CPU_LE( rev32 v18.16b, v18.16b ) |
| 103 | CPU_LE( rev32 v19.16b, v19.16b ) | 100 | CPU_LE( rev32 v19.16b, v19.16b ) |
| 104 | 101 | ||
| 105 | 2: add t0.4s, v16.4s, v0.4s | 102 | 1: add t0.4s, v16.4s, v0.4s |
| 106 | mov dg0v.16b, dgav.16b | 103 | mov dg0v.16b, dgav.16b |
| 107 | mov dg1v.16b, dgbv.16b | 104 | mov dg1v.16b, dgbv.16b |
| 108 | 105 | ||
| @@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
| 131 | add dgbv.4s, dgbv.4s, dg1v.4s | 128 | add dgbv.4s, dgbv.4s, dg1v.4s |
| 132 | 129 | ||
| 133 | /* handled all input blocks? */ | 130 | /* handled all input blocks? */ |
| 134 | cbnz w0, 0b | 131 | cbnz w2, 0b |
| 135 | 132 | ||
| 136 | /* | 133 | /* |
| 137 | * Final block: add padding and total bit count. | 134 | * Final block: add padding and total bit count. |
| 138 | * Skip if we have no total byte count in x4. In that case, the input | 135 | * Skip if the input size was not a round multiple of the block size, |
| 139 | * size was not a round multiple of the block size, and the padding is | 136 | * the padding is handled by the C code in that case. |
| 140 | * handled by the C code. | ||
| 141 | */ | 137 | */ |
| 142 | cbz x4, 3f | 138 | cbz x4, 3f |
| 139 | ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] | ||
| 143 | movi v17.2d, #0 | 140 | movi v17.2d, #0 |
| 144 | mov x8, #0x80000000 | 141 | mov x8, #0x80000000 |
| 145 | movi v18.2d, #0 | 142 | movi v18.2d, #0 |
| @@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
| 148 | mov x4, #0 | 145 | mov x4, #0 |
| 149 | mov v19.d[0], xzr | 146 | mov v19.d[0], xzr |
| 150 | mov v19.d[1], x7 | 147 | mov v19.d[1], x7 |
| 151 | b 2b | 148 | b 1b |
| 152 | 149 | ||
| 153 | /* store new state */ | 150 | /* store new state */ |
| 154 | 3: stp dga, dgb, [x2] | 151 | 3: stp dga, dgb, [x0] |
| 155 | ret | 152 | ret |
| 156 | ENDPROC(sha2_ce_transform) | 153 | ENDPROC(sha2_ce_transform) |
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..1340e44c048b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c | |||
| @@ -12,206 +12,82 @@ | |||
| 12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
| 13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
| 14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
| 15 | #include <crypto/sha256_base.h> | ||
| 15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
| 16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
| 17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| 18 | 19 | ||
| 20 | #define ASM_EXPORT(sym, val) \ | ||
| 21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
| 22 | |||
| 19 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); |
| 20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| 21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
| 22 | 26 | ||
| 23 | asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha256_ce_state { |
| 24 | u8 *head, long bytes); | 28 | struct sha256_state sst; |
| 25 | 29 | u32 finalize; | |
| 26 | static int sha224_init(struct shash_desc *desc) | 30 | }; |
| 27 | { | ||
| 28 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 29 | |||
| 30 | *sctx = (struct sha256_state){ | ||
| 31 | .state = { | ||
| 32 | SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, | ||
| 33 | SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, | ||
| 34 | } | ||
| 35 | }; | ||
| 36 | return 0; | ||
| 37 | } | ||
| 38 | |||
| 39 | static int sha256_init(struct shash_desc *desc) | ||
| 40 | { | ||
| 41 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 42 | |||
| 43 | *sctx = (struct sha256_state){ | ||
| 44 | .state = { | ||
| 45 | SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, | ||
| 46 | SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, | ||
| 47 | } | ||
| 48 | }; | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | static int sha2_update(struct shash_desc *desc, const u8 *data, | ||
| 53 | unsigned int len) | ||
| 54 | { | ||
| 55 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 56 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
| 57 | |||
| 58 | sctx->count += len; | ||
| 59 | |||
| 60 | if ((partial + len) >= SHA256_BLOCK_SIZE) { | ||
| 61 | int blocks; | ||
| 62 | |||
| 63 | if (partial) { | ||
| 64 | int p = SHA256_BLOCK_SIZE - partial; | ||
| 65 | |||
| 66 | memcpy(sctx->buf + partial, data, p); | ||
| 67 | data += p; | ||
| 68 | len -= p; | ||
| 69 | } | ||
| 70 | 31 | ||
| 71 | blocks = len / SHA256_BLOCK_SIZE; | 32 | asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
| 72 | len %= SHA256_BLOCK_SIZE; | 33 | int blocks); |
| 73 | 34 | ||
| 74 | kernel_neon_begin_partial(28); | 35 | static int sha256_ce_update(struct shash_desc *desc, const u8 *data, |
| 75 | sha2_ce_transform(blocks, data, sctx->state, | 36 | unsigned int len) |
| 76 | partial ? sctx->buf : NULL, 0); | ||
| 77 | kernel_neon_end(); | ||
| 78 | |||
| 79 | data += blocks * SHA256_BLOCK_SIZE; | ||
| 80 | partial = 0; | ||
| 81 | } | ||
| 82 | if (len) | ||
| 83 | memcpy(sctx->buf + partial, data, len); | ||
| 84 | return 0; | ||
| 85 | } | ||
| 86 | |||
| 87 | static void sha2_final(struct shash_desc *desc) | ||
| 88 | { | 37 | { |
| 89 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | 38 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
| 90 | |||
| 91 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 92 | __be64 bits = cpu_to_be64(sctx->count << 3); | ||
| 93 | u32 padlen = SHA256_BLOCK_SIZE | ||
| 94 | - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); | ||
| 95 | |||
| 96 | sha2_update(desc, padding, padlen); | ||
| 97 | sha2_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
| 98 | } | ||
| 99 | |||
| 100 | static int sha224_final(struct shash_desc *desc, u8 *out) | ||
| 101 | { | ||
| 102 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 103 | __be32 *dst = (__be32 *)out; | ||
| 104 | int i; | ||
| 105 | |||
| 106 | sha2_final(desc); | ||
| 107 | |||
| 108 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
| 109 | put_unaligned_be32(sctx->state[i], dst++); | ||
| 110 | |||
| 111 | *sctx = (struct sha256_state){}; | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | 39 | ||
| 115 | static int sha256_final(struct shash_desc *desc, u8 *out) | 40 | sctx->finalize = 0; |
| 116 | { | 41 | kernel_neon_begin_partial(28); |
| 117 | struct sha256_state *sctx = shash_desc_ctx(desc); | 42 | sha256_base_do_update(desc, data, len, |
| 118 | __be32 *dst = (__be32 *)out; | 43 | (sha256_block_fn *)sha2_ce_transform); |
| 119 | int i; | 44 | kernel_neon_end(); |
| 120 | |||
| 121 | sha2_final(desc); | ||
| 122 | |||
| 123 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
| 124 | put_unaligned_be32(sctx->state[i], dst++); | ||
| 125 | 45 | ||
| 126 | *sctx = (struct sha256_state){}; | ||
| 127 | return 0; | 46 | return 0; |
| 128 | } | 47 | } |
| 129 | 48 | ||
| 130 | static void sha2_finup(struct shash_desc *desc, const u8 *data, | 49 | static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, |
| 131 | unsigned int len) | 50 | unsigned int len, u8 *out) |
| 132 | { | 51 | { |
| 133 | struct sha256_state *sctx = shash_desc_ctx(desc); | 52 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
| 134 | int blocks; | 53 | bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); |
| 135 | 54 | ||
| 136 | if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { | 55 | ASM_EXPORT(sha256_ce_offsetof_count, |
| 137 | sha2_update(desc, data, len); | 56 | offsetof(struct sha256_ce_state, sst.count)); |
| 138 | sha2_final(desc); | 57 | ASM_EXPORT(sha256_ce_offsetof_finalize, |
| 139 | return; | 58 | offsetof(struct sha256_ce_state, finalize)); |
| 140 | } | ||
| 141 | 59 | ||
| 142 | /* | 60 | /* |
| 143 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
| 144 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
| 145 | * perform the entire digest calculation in a single invocation | ||
| 146 | * of sha2_ce_transform() | ||
| 147 | */ | 63 | */ |
| 148 | blocks = len / SHA256_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
| 149 | 65 | ||
| 150 | kernel_neon_begin_partial(28); | 66 | kernel_neon_begin_partial(28); |
| 151 | sha2_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha256_base_do_update(desc, data, len, |
| 68 | (sha256_block_fn *)sha2_ce_transform); | ||
| 69 | if (!finalize) | ||
| 70 | sha256_base_do_finalize(desc, | ||
| 71 | (sha256_block_fn *)sha2_ce_transform); | ||
| 152 | kernel_neon_end(); | 72 | kernel_neon_end(); |
| 73 | return sha256_base_finish(desc, out); | ||
| 153 | } | 74 | } |
| 154 | 75 | ||
| 155 | static int sha224_finup(struct shash_desc *desc, const u8 *data, | 76 | static int sha256_ce_final(struct shash_desc *desc, u8 *out) |
| 156 | unsigned int len, u8 *out) | ||
| 157 | { | 77 | { |
| 158 | struct sha256_state *sctx = shash_desc_ctx(desc); | 78 | kernel_neon_begin_partial(28); |
| 159 | __be32 *dst = (__be32 *)out; | 79 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); |
| 160 | int i; | 80 | kernel_neon_end(); |
| 161 | 81 | return sha256_base_finish(desc, out); | |
| 162 | sha2_finup(desc, data, len); | ||
| 163 | |||
| 164 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
| 165 | put_unaligned_be32(sctx->state[i], dst++); | ||
| 166 | |||
| 167 | *sctx = (struct sha256_state){}; | ||
| 168 | return 0; | ||
| 169 | } | ||
| 170 | |||
| 171 | static int sha256_finup(struct shash_desc *desc, const u8 *data, | ||
| 172 | unsigned int len, u8 *out) | ||
| 173 | { | ||
| 174 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 175 | __be32 *dst = (__be32 *)out; | ||
| 176 | int i; | ||
| 177 | |||
| 178 | sha2_finup(desc, data, len); | ||
| 179 | |||
| 180 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
| 181 | put_unaligned_be32(sctx->state[i], dst++); | ||
| 182 | |||
| 183 | *sctx = (struct sha256_state){}; | ||
| 184 | return 0; | ||
| 185 | } | ||
| 186 | |||
| 187 | static int sha2_export(struct shash_desc *desc, void *out) | ||
| 188 | { | ||
| 189 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 190 | struct sha256_state *dst = out; | ||
| 191 | |||
| 192 | *dst = *sctx; | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | static int sha2_import(struct shash_desc *desc, const void *in) | ||
| 197 | { | ||
| 198 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
| 199 | struct sha256_state const *src = in; | ||
| 200 | |||
| 201 | *sctx = *src; | ||
| 202 | return 0; | ||
| 203 | } | 82 | } |
| 204 | 83 | ||
| 205 | static struct shash_alg algs[] = { { | 84 | static struct shash_alg algs[] = { { |
| 206 | .init = sha224_init, | 85 | .init = sha224_base_init, |
| 207 | .update = sha2_update, | 86 | .update = sha256_ce_update, |
| 208 | .final = sha224_final, | 87 | .final = sha256_ce_final, |
| 209 | .finup = sha224_finup, | 88 | .finup = sha256_ce_finup, |
| 210 | .export = sha2_export, | 89 | .descsize = sizeof(struct sha256_ce_state), |
| 211 | .import = sha2_import, | ||
| 212 | .descsize = sizeof(struct sha256_state), | ||
| 213 | .digestsize = SHA224_DIGEST_SIZE, | 90 | .digestsize = SHA224_DIGEST_SIZE, |
| 214 | .statesize = sizeof(struct sha256_state), | ||
| 215 | .base = { | 91 | .base = { |
| 216 | .cra_name = "sha224", | 92 | .cra_name = "sha224", |
| 217 | .cra_driver_name = "sha224-ce", | 93 | .cra_driver_name = "sha224-ce", |
| @@ -221,15 +97,12 @@ static struct shash_alg algs[] = { { | |||
| 221 | .cra_module = THIS_MODULE, | 97 | .cra_module = THIS_MODULE, |
| 222 | } | 98 | } |
| 223 | }, { | 99 | }, { |
| 224 | .init = sha256_init, | 100 | .init = sha256_base_init, |
| 225 | .update = sha2_update, | 101 | .update = sha256_ce_update, |
| 226 | .final = sha256_final, | 102 | .final = sha256_ce_final, |
| 227 | .finup = sha256_finup, | 103 | .finup = sha256_ce_finup, |
| 228 | .export = sha2_export, | 104 | .descsize = sizeof(struct sha256_ce_state), |
| 229 | .import = sha2_import, | ||
| 230 | .descsize = sizeof(struct sha256_state), | ||
| 231 | .digestsize = SHA256_DIGEST_SIZE, | 105 | .digestsize = SHA256_DIGEST_SIZE, |
| 232 | .statesize = sizeof(struct sha256_state), | ||
| 233 | .base = { | 106 | .base = { |
| 234 | .cra_name = "sha256", | 107 | .cra_name = "sha256", |
| 235 | .cra_driver_name = "sha256-ce", | 108 | .cra_driver_name = "sha256-ce", |
