diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2014-03-06 03:23:33 -0500 |
---|---|---|
committer | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2014-05-14 13:03:17 -0400 |
commit | 2c98833a42cd194ba0f537cd21917e15e5593715 (patch) | |
tree | 922ba503863ae2e6f8908b3c2b749ee086fcc824 /arch | |
parent | 190f1ca85d071114930dd7abe6b5d103e9d5572f (diff) |
arm64/crypto: SHA-1 using ARMv8 Crypto Extensions
This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs that
have support for the SHA-1 part of the ARM v8 Crypto Extensions.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm64/Kconfig | 3 | ||||
-rw-r--r-- | arch/arm64/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/crypto/Kconfig | 16 | ||||
-rw-r--r-- | arch/arm64/crypto/Makefile | 12 | ||||
-rw-r--r-- | arch/arm64/crypto/sha1-ce-core.S | 153 | ||||
-rw-r--r-- | arch/arm64/crypto/sha1-ce-glue.c | 174 |
6 files changed, 359 insertions, 0 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e6e4d3749a6e..1cefc6fe969a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -342,5 +342,8 @@ source "arch/arm64/Kconfig.debug" | |||
342 | source "security/Kconfig" | 342 | source "security/Kconfig" |
343 | 343 | ||
344 | source "crypto/Kconfig" | 344 | source "crypto/Kconfig" |
345 | if CRYPTO | ||
346 | source "arch/arm64/crypto/Kconfig" | ||
347 | endif | ||
345 | 348 | ||
346 | source "lib/Kconfig" | 349 | source "lib/Kconfig" |
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2fceb71ac3b7..8185a913c5ed 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile | |||
@@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS | |||
45 | core-y += arch/arm64/kernel/ arch/arm64/mm/ | 45 | core-y += arch/arm64/kernel/ arch/arm64/mm/ |
46 | core-$(CONFIG_KVM) += arch/arm64/kvm/ | 46 | core-$(CONFIG_KVM) += arch/arm64/kvm/ |
47 | core-$(CONFIG_XEN) += arch/arm64/xen/ | 47 | core-$(CONFIG_XEN) += arch/arm64/xen/ |
48 | core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ | ||
48 | libs-y := arch/arm64/lib/ $(libs-y) | 49 | libs-y := arch/arm64/lib/ $(libs-y) |
49 | libs-y += $(LIBGCC) | 50 | libs-y += $(LIBGCC) |
50 | 51 | ||
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig new file mode 100644 index 000000000000..7956881b5986 --- /dev/null +++ b/arch/arm64/crypto/Kconfig | |||
@@ -0,0 +1,16 @@ | |||
1 | |||
2 | menuconfig ARM64_CRYPTO | ||
3 | bool "ARM64 Accelerated Cryptographic Algorithms" | ||
4 | depends on ARM64 | ||
5 | help | ||
6 | Say Y here to choose from a selection of cryptographic algorithms | ||
7 | implemented using ARM64 specific CPU features or instructions. | ||
8 | |||
9 | if ARM64_CRYPTO | ||
10 | |||
11 | config CRYPTO_SHA1_ARM64_CE | ||
12 | tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" | ||
13 | depends on ARM64 && KERNEL_MODE_NEON | ||
14 | select CRYPTO_HASH | ||
15 | |||
16 | endif | ||
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile new file mode 100644 index 000000000000..0ed3caaec81b --- /dev/null +++ b/arch/arm64/crypto/Makefile | |||
@@ -0,0 +1,12 @@ | |||
1 | # | ||
2 | # linux/arch/arm64/crypto/Makefile | ||
3 | # | ||
4 | # Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | # | ||
6 | # This program is free software; you can redistribute it and/or modify | ||
7 | # it under the terms of the GNU General Public License version 2 as | ||
8 | # published by the Free Software Foundation. | ||
9 | # | ||
10 | |||
11 | obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o | ||
12 | sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o | ||
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..09d57d98609c --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-core.S | |||
@@ -0,0 +1,153 @@ | |||
1 | /* | ||
2 | * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/assembler.h> | ||
13 | |||
14 | .text | ||
15 | .arch armv8-a+crypto | ||
16 | |||
17 | k0 .req v0 | ||
18 | k1 .req v1 | ||
19 | k2 .req v2 | ||
20 | k3 .req v3 | ||
21 | |||
22 | t0 .req v4 | ||
23 | t1 .req v5 | ||
24 | |||
25 | dga .req q6 | ||
26 | dgav .req v6 | ||
27 | dgb .req s7 | ||
28 | dgbv .req v7 | ||
29 | |||
30 | dg0q .req q12 | ||
31 | dg0s .req s12 | ||
32 | dg0v .req v12 | ||
33 | dg1s .req s13 | ||
34 | dg1v .req v13 | ||
35 | dg2s .req s14 | ||
36 | |||
37 | .macro add_only, op, ev, rc, s0, dg1 | ||
38 | .ifc \ev, ev | ||
39 | add t1.4s, v\s0\().4s, \rc\().4s | ||
40 | sha1h dg2s, dg0s | ||
41 | .ifnb \dg1 | ||
42 | sha1\op dg0q, \dg1, t0.4s | ||
43 | .else | ||
44 | sha1\op dg0q, dg1s, t0.4s | ||
45 | .endif | ||
46 | .else | ||
47 | .ifnb \s0 | ||
48 | add t0.4s, v\s0\().4s, \rc\().4s | ||
49 | .endif | ||
50 | sha1h dg1s, dg0s | ||
51 | sha1\op dg0q, dg2s, t1.4s | ||
52 | .endif | ||
53 | .endm | ||
54 | |||
55 | .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 | ||
56 | sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s | ||
57 | add_only \op, \ev, \rc, \s1, \dg1 | ||
58 | sha1su1 v\s0\().4s, v\s3\().4s | ||
59 | .endm | ||
60 | |||
61 | /* | ||
62 | * The SHA1 round constants | ||
63 | */ | ||
64 | .align 4 | ||
65 | .Lsha1_rcon: | ||
66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | ||
67 | |||
68 | /* | ||
69 | * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | ||
70 | * u8 *head, long bytes) | ||
71 | */ | ||
72 | ENTRY(sha1_ce_transform) | ||
73 | /* load round constants */ | ||
74 | adr x6, .Lsha1_rcon | ||
75 | ld1r {k0.4s}, [x6], #4 | ||
76 | ld1r {k1.4s}, [x6], #4 | ||
77 | ld1r {k2.4s}, [x6], #4 | ||
78 | ld1r {k3.4s}, [x6] | ||
79 | |||
80 | /* load state */ | ||
81 | ldr dga, [x2] | ||
82 | ldr dgb, [x2, #16] | ||
83 | |||
84 | /* load partial state (if supplied) */ | ||
85 | cbz x3, 0f | ||
86 | ld1 {v8.4s-v11.4s}, [x3] | ||
87 | b 1f | ||
88 | |||
89 | /* load input */ | ||
90 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 | ||
91 | sub w0, w0, #1 | ||
92 | |||
93 | 1: | ||
94 | CPU_LE( rev32 v8.16b, v8.16b ) | ||
95 | CPU_LE( rev32 v9.16b, v9.16b ) | ||
96 | CPU_LE( rev32 v10.16b, v10.16b ) | ||
97 | CPU_LE( rev32 v11.16b, v11.16b ) | ||
98 | |||
99 | 2: add t0.4s, v8.4s, k0.4s | ||
100 | mov dg0v.16b, dgav.16b | ||
101 | |||
102 | add_update c, ev, k0, 8, 9, 10, 11, dgb | ||
103 | add_update c, od, k0, 9, 10, 11, 8 | ||
104 | add_update c, ev, k0, 10, 11, 8, 9 | ||
105 | add_update c, od, k0, 11, 8, 9, 10 | ||
106 | add_update c, ev, k1, 8, 9, 10, 11 | ||
107 | |||
108 | add_update p, od, k1, 9, 10, 11, 8 | ||
109 | add_update p, ev, k1, 10, 11, 8, 9 | ||
110 | add_update p, od, k1, 11, 8, 9, 10 | ||
111 | add_update p, ev, k1, 8, 9, 10, 11 | ||
112 | add_update p, od, k2, 9, 10, 11, 8 | ||
113 | |||
114 | add_update m, ev, k2, 10, 11, 8, 9 | ||
115 | add_update m, od, k2, 11, 8, 9, 10 | ||
116 | add_update m, ev, k2, 8, 9, 10, 11 | ||
117 | add_update m, od, k2, 9, 10, 11, 8 | ||
118 | add_update m, ev, k3, 10, 11, 8, 9 | ||
119 | |||
120 | add_update p, od, k3, 11, 8, 9, 10 | ||
121 | add_only p, ev, k3, 9 | ||
122 | add_only p, od, k3, 10 | ||
123 | add_only p, ev, k3, 11 | ||
124 | add_only p, od | ||
125 | |||
126 | /* update state */ | ||
127 | add dgbv.2s, dgbv.2s, dg1v.2s | ||
128 | add dgav.4s, dgav.4s, dg0v.4s | ||
129 | |||
130 | cbnz w0, 0b | ||
131 | |||
132 | /* | ||
133 | * Final block: add padding and total bit count. | ||
134 | * Skip if we have no total byte count in x4. In that case, the input | ||
135 | * size was not a round multiple of the block size, and the padding is | ||
136 | * handled by the C code. | ||
137 | */ | ||
138 | cbz x4, 3f | ||
139 | movi v9.2d, #0 | ||
140 | mov x8, #0x80000000 | ||
141 | movi v10.2d, #0 | ||
142 | ror x7, x4, #29 // ror(lsl(x4, 3), 32) | ||
143 | fmov d8, x8 | ||
144 | mov x4, #0 | ||
145 | mov v11.d[0], xzr | ||
146 | mov v11.d[1], x7 | ||
147 | b 2b | ||
148 | |||
149 | /* store new state */ | ||
150 | 3: str dga, [x2] | ||
151 | str dgb, [x2, #16] | ||
152 | ret | ||
153 | ENDPROC(sha1_ce_transform) | ||
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..6fe83f37a750 --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-glue.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <asm/neon.h> | ||
12 | #include <asm/unaligned.h> | ||
13 | #include <crypto/internal/hash.h> | ||
14 | #include <crypto/sha.h> | ||
15 | #include <linux/cpufeature.h> | ||
16 | #include <linux/crypto.h> | ||
17 | #include <linux/module.h> | ||
18 | |||
19 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | ||
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
21 | MODULE_LICENSE("GPL v2"); | ||
22 | |||
23 | asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | ||
24 | u8 *head, long bytes); | ||
25 | |||
26 | static int sha1_init(struct shash_desc *desc) | ||
27 | { | ||
28 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
29 | |||
30 | *sctx = (struct sha1_state){ | ||
31 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
32 | }; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
37 | unsigned int len) | ||
38 | { | ||
39 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
40 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
41 | |||
42 | sctx->count += len; | ||
43 | |||
44 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
45 | int blocks; | ||
46 | |||
47 | if (partial) { | ||
48 | int p = SHA1_BLOCK_SIZE - partial; | ||
49 | |||
50 | memcpy(sctx->buffer + partial, data, p); | ||
51 | data += p; | ||
52 | len -= p; | ||
53 | } | ||
54 | |||
55 | blocks = len / SHA1_BLOCK_SIZE; | ||
56 | len %= SHA1_BLOCK_SIZE; | ||
57 | |||
58 | kernel_neon_begin_partial(16); | ||
59 | sha1_ce_transform(blocks, data, sctx->state, | ||
60 | partial ? sctx->buffer : NULL, 0); | ||
61 | kernel_neon_end(); | ||
62 | |||
63 | data += blocks * SHA1_BLOCK_SIZE; | ||
64 | partial = 0; | ||
65 | } | ||
66 | if (len) | ||
67 | memcpy(sctx->buffer + partial, data, len); | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int sha1_final(struct shash_desc *desc, u8 *out) | ||
72 | { | ||
73 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
74 | |||
75 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
76 | __be64 bits = cpu_to_be64(sctx->count << 3); | ||
77 | __be32 *dst = (__be32 *)out; | ||
78 | int i; | ||
79 | |||
80 | u32 padlen = SHA1_BLOCK_SIZE | ||
81 | - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); | ||
82 | |||
83 | sha1_update(desc, padding, padlen); | ||
84 | sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
85 | |||
86 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
87 | put_unaligned_be32(sctx->state[i], dst++); | ||
88 | |||
89 | *sctx = (struct sha1_state){}; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int sha1_finup(struct shash_desc *desc, const u8 *data, | ||
94 | unsigned int len, u8 *out) | ||
95 | { | ||
96 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
97 | __be32 *dst = (__be32 *)out; | ||
98 | int blocks; | ||
99 | int i; | ||
100 | |||
101 | if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { | ||
102 | sha1_update(desc, data, len); | ||
103 | return sha1_final(desc, out); | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Use a fast path if the input is a multiple of 64 bytes. In | ||
108 | * this case, there is no need to copy data around, and we can | ||
109 | * perform the entire digest calculation in a single invocation | ||
110 | * of sha1_ce_transform() | ||
111 | */ | ||
112 | blocks = len / SHA1_BLOCK_SIZE; | ||
113 | |||
114 | kernel_neon_begin_partial(16); | ||
115 | sha1_ce_transform(blocks, data, sctx->state, NULL, len); | ||
116 | kernel_neon_end(); | ||
117 | |||
118 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
119 | put_unaligned_be32(sctx->state[i], dst++); | ||
120 | |||
121 | *sctx = (struct sha1_state){}; | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int sha1_export(struct shash_desc *desc, void *out) | ||
126 | { | ||
127 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
128 | struct sha1_state *dst = out; | ||
129 | |||
130 | *dst = *sctx; | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
135 | { | ||
136 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
137 | struct sha1_state const *src = in; | ||
138 | |||
139 | *sctx = *src; | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static struct shash_alg alg = { | ||
144 | .init = sha1_init, | ||
145 | .update = sha1_update, | ||
146 | .final = sha1_final, | ||
147 | .finup = sha1_finup, | ||
148 | .export = sha1_export, | ||
149 | .import = sha1_import, | ||
150 | .descsize = sizeof(struct sha1_state), | ||
151 | .digestsize = SHA1_DIGEST_SIZE, | ||
152 | .statesize = sizeof(struct sha1_state), | ||
153 | .base = { | ||
154 | .cra_name = "sha1", | ||
155 | .cra_driver_name = "sha1-ce", | ||
156 | .cra_priority = 200, | ||
157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
158 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
159 | .cra_module = THIS_MODULE, | ||
160 | } | ||
161 | }; | ||
162 | |||
163 | static int __init sha1_ce_mod_init(void) | ||
164 | { | ||
165 | return crypto_register_shash(&alg); | ||
166 | } | ||
167 | |||
168 | static void __exit sha1_ce_mod_fini(void) | ||
169 | { | ||
170 | crypto_unregister_shash(&alg); | ||
171 | } | ||
172 | |||
173 | module_cpu_feature_match(SHA1, sha1_ce_mod_init); | ||
174 | module_exit(sha1_ce_mod_fini); | ||