aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 15:57:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 15:57:42 -0400
commita20acf99f75e49271381d65db097c9763060a1e8 (patch)
tree3cf661125e86b7625171b96b885bf5395f62e684
parent437589a74b6a590d175f86cf9f7b2efcee7765e7 (diff)
parent42a4172b6ebb4a419085c6caee7c135e51cae5ea (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next
Pull sparc updates from David Miller: "Largely this is simply adding support for the Niagara 4 cpu. Major areas are perf events (chip now supports 4 counters and can monitor any event on each counter), crypto (opcodes are availble for sha1, sha256, sha512, md5, crc32c, AES, DES, CAMELLIA, and Kasumi although the last is unsupported since we lack a generic crypto layer Kasumi implementation), and an optimized memcpy. Finally some cleanups by Peter Senna Tschudin." * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next: (47 commits) sparc64: Fix trailing whitespace in NG4 memcpy. sparc64: Fix comment type in NG4 copy from user. sparc64: Add SPARC-T4 optimized memcpy. drivers/sbus/char: removes unnecessary semicolon arch/sparc/kernel/pci_sun4v.c: removes unnecessary semicolon sparc64: Fix function argument comment in camellia_sparc64_key_expand asm. sparc64: Fix IV handling bug in des_sparc64_cbc_decrypt sparc64: Add auto-loading mechanism to crypto-opcode drivers. sparc64: Add missing pr_fmt define to crypto opcode drivers. sparc64: Adjust crypto priorities. sparc64: Use cpu_pgsz_mask for linear kernel mapping config. sparc64: Probe cpu page size support more portably. sparc64: Support 2GB and 16GB page sizes for kernel linear mappings. sparc64: Fix bugs in unrolled 256-bit loops. sparc64: Avoid code duplication in crypto assembler. sparc64: Unroll CTR crypt loops in AES driver. sparc64: Unroll ECB decryption loops in AES driver. sparc64: Unroll ECB encryption loops in AES driver. sparc64: Add ctr mode support to AES driver. sparc64: Move AES driver over to a methods based implementation. ...
-rw-r--r--arch/sparc/Kbuild1
-rw-r--r--arch/sparc/crypto/Makefile25
-rw-r--r--arch/sparc/crypto/aes_asm.S1535
-rw-r--r--arch/sparc/crypto/aes_glue.c477
-rw-r--r--arch/sparc/crypto/camellia_asm.S563
-rw-r--r--arch/sparc/crypto/camellia_glue.c322
-rw-r--r--arch/sparc/crypto/crc32c_asm.S20
-rw-r--r--arch/sparc/crypto/crc32c_glue.c179
-rw-r--r--arch/sparc/crypto/crop_devid.c14
-rw-r--r--arch/sparc/crypto/des_asm.S418
-rw-r--r--arch/sparc/crypto/des_glue.c529
-rw-r--r--arch/sparc/crypto/md5_asm.S70
-rw-r--r--arch/sparc/crypto/md5_glue.c188
-rw-r--r--arch/sparc/crypto/opcodes.h99
-rw-r--r--arch/sparc/crypto/sha1_asm.S72
-rw-r--r--arch/sparc/crypto/sha1_glue.c183
-rw-r--r--arch/sparc/crypto/sha256_asm.S78
-rw-r--r--arch/sparc/crypto/sha256_glue.c241
-rw-r--r--arch/sparc/crypto/sha512_asm.S102
-rw-r--r--arch/sparc/crypto/sha512_glue.c226
-rw-r--r--arch/sparc/include/asm/asi.h4
-rw-r--r--arch/sparc/include/asm/elf_64.h9
-rw-r--r--arch/sparc/include/asm/hypervisor.h11
-rw-r--r--arch/sparc/include/asm/mdesc.h1
-rw-r--r--arch/sparc/include/asm/pcr.h36
-rw-r--r--arch/sparc/include/asm/perfctr.h30
-rw-r--r--arch/sparc/include/asm/pstate.h14
-rw-r--r--arch/sparc/kernel/head_64.S14
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/hvcalls.S16
-rw-r--r--arch/sparc/kernel/ktlb.S25
-rw-r--r--arch/sparc/kernel/mdesc.c24
-rw-r--r--arch/sparc/kernel/nmi.c21
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/sparc/kernel/pcr.c172
-rw-r--r--arch/sparc/kernel/perf_event.c516
-rw-r--r--arch/sparc/kernel/setup_64.c67
-rw-r--r--arch/sparc/lib/Makefile3
-rw-r--r--arch/sparc/lib/NG4copy_from_user.S30
-rw-r--r--arch/sparc/lib/NG4copy_page.S57
-rw-r--r--arch/sparc/lib/NG4copy_to_user.S39
-rw-r--r--arch/sparc/lib/NG4memcpy.S360
-rw-r--r--arch/sparc/lib/NG4patch.S43
-rw-r--r--arch/sparc/lib/NGpage.S2
-rw-r--r--arch/sparc/lib/ksyms.c4
-rw-r--r--arch/sparc/mm/init_64.c230
-rw-r--r--arch/sparc/mm/init_64.h4
-rw-r--r--crypto/Kconfig97
-rw-r--r--drivers/crypto/n2_core.c2
-rw-r--r--drivers/sbus/char/display7seg.c2
-rw-r--r--drivers/sbus/char/envctrl.c8
-rw-r--r--drivers/sbus/char/openprom.c4
52 files changed, 6919 insertions, 271 deletions
diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild
index 5cd01161fd00..675afa285ddb 100644
--- a/arch/sparc/Kbuild
+++ b/arch/sparc/Kbuild
@@ -6,3 +6,4 @@ obj-y += kernel/
6obj-y += mm/ 6obj-y += mm/
7obj-y += math-emu/ 7obj-y += math-emu/
8obj-y += net/ 8obj-y += net/
9obj-y += crypto/
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
new file mode 100644
index 000000000000..6ae1ad5e502b
--- /dev/null
+++ b/arch/sparc/crypto/Makefile
@@ -0,0 +1,25 @@
1#
2# Arch-specific CryptoAPI modules.
3#
4
5obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o
6obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
7obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o
8obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
9
10obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
11obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
12obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
13
14obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
15
16sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o
17sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o
18sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o
19md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o
20
21aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o
22des-sparc64-y := des_asm.o des_glue.o crop_devid.o
23camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o
24
25crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
new file mode 100644
index 000000000000..23f6cbb910d3
--- /dev/null
+++ b/arch/sparc/crypto/aes_asm.S
@@ -0,0 +1,1535 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
7 AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
8 AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
9 AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \
10 AES_EROUND23(KEY_BASE + 6, T0, T1, I1)
11
12#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
13 AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
14 AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
15 AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \
16 AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \
17 AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \
18 AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \
19 AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \
20 AES_EROUND23(KEY_BASE + 6, T2, T3, I3)
21
22#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
23 AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
24 AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
25 AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \
26 AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1)
27
28#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
29 AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
30 AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
31 AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \
32 AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \
33 AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \
34 AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \
35 AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \
36 AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3)
37
38 /* 10 rounds */
39#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
40 ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
41 ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
42 ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
43 ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
44 ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
45
46#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
47 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
48 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
49 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
50 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
51 ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
52
53 /* 12 rounds */
54#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
55 ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
56 ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
57 ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
58 ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
59 ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
60 ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
61
62#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
63 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
64 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
65 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
66 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
67 ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
68 ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
69
70 /* 14 rounds */
71#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
72 ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
73 ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
74 ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
75 ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
76 ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
77 ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
78 ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
79
80#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
81 ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
82 TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
83
84#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
85 ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \
86 ldd [%o0 + 0xd0], %f56; \
87 ldd [%o0 + 0xd8], %f58; \
88 ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \
89 ldd [%o0 + 0xe0], %f60; \
90 ldd [%o0 + 0xe8], %f62; \
91 ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \
92 ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \
93 ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \
94 ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \
95 AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \
96 AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \
97 AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \
98 AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \
99 AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \
100 AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \
101 ldd [%o0 + 0x10], %f8; \
102 ldd [%o0 + 0x18], %f10; \
103 AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \
104 AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \
105 ldd [%o0 + 0x20], %f12; \
106 ldd [%o0 + 0x28], %f14;
107
108#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
109 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
110 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
111 AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
112 AES_DROUND01(KEY_BASE + 6, T0, T1, I0)
113
114#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
115 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
116 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
117 AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
118 AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
119 AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
120 AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \
121 AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \
122 AES_DROUND01(KEY_BASE + 6, T2, T3, I2)
123
124#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
125 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
126 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
127 AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
128 AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0)
129
130#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
131 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
132 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
133 AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
134 AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
135 AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
136 AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \
137 AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \
138 AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2)
139
140 /* 10 rounds */
141#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
142 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
143 DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
144 DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
145 DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
146 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
147
148#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
149 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
150 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
151 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
152 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
153 DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
154
155 /* 12 rounds */
156#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
157 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
158 DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
159 DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
160 DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
161 DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
162 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
163
164#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
165 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
166 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
167 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
168 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
169 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
170 DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
171
172 /* 14 rounds */
173#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
174 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
175 DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
176 DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
177 DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
178 DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
179 DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
180 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
181
182#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
183 DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
184 TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
185
186#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
187 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \
188 ldd [%o0 + 0x18], %f56; \
189 ldd [%o0 + 0x10], %f58; \
190 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \
191 ldd [%o0 + 0x08], %f60; \
192 ldd [%o0 + 0x00], %f62; \
193 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \
194 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \
195 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \
196 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \
197 AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \
198 AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \
199 AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \
200 AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \
201 AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \
202 AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \
203 ldd [%o0 + 0xd8], %f8; \
204 ldd [%o0 + 0xd0], %f10; \
205 AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \
206 AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \
207 ldd [%o0 + 0xc8], %f12; \
208 ldd [%o0 + 0xc0], %f14;
209
210 .align 32
211ENTRY(aes_sparc64_key_expand)
212 /* %o0=input_key, %o1=output_key, %o2=key_len */
213 VISEntry
214 ld [%o0 + 0x00], %f0
215 ld [%o0 + 0x04], %f1
216 ld [%o0 + 0x08], %f2
217 ld [%o0 + 0x0c], %f3
218
219 std %f0, [%o1 + 0x00]
220 std %f2, [%o1 + 0x08]
221 add %o1, 0x10, %o1
222
223 cmp %o2, 24
224 bl 2f
225 nop
226
227 be 1f
228 nop
229
230 /* 256-bit key expansion */
231 ld [%o0 + 0x10], %f4
232 ld [%o0 + 0x14], %f5
233 ld [%o0 + 0x18], %f6
234 ld [%o0 + 0x1c], %f7
235
236 std %f4, [%o1 + 0x00]
237 std %f6, [%o1 + 0x08]
238 add %o1, 0x10, %o1
239
240 AES_KEXPAND1(0, 6, 0x0, 8)
241 AES_KEXPAND2(2, 8, 10)
242 AES_KEXPAND0(4, 10, 12)
243 AES_KEXPAND2(6, 12, 14)
244 AES_KEXPAND1(8, 14, 0x1, 16)
245 AES_KEXPAND2(10, 16, 18)
246 AES_KEXPAND0(12, 18, 20)
247 AES_KEXPAND2(14, 20, 22)
248 AES_KEXPAND1(16, 22, 0x2, 24)
249 AES_KEXPAND2(18, 24, 26)
250 AES_KEXPAND0(20, 26, 28)
251 AES_KEXPAND2(22, 28, 30)
252 AES_KEXPAND1(24, 30, 0x3, 32)
253 AES_KEXPAND2(26, 32, 34)
254 AES_KEXPAND0(28, 34, 36)
255 AES_KEXPAND2(30, 36, 38)
256 AES_KEXPAND1(32, 38, 0x4, 40)
257 AES_KEXPAND2(34, 40, 42)
258 AES_KEXPAND0(36, 42, 44)
259 AES_KEXPAND2(38, 44, 46)
260 AES_KEXPAND1(40, 46, 0x5, 48)
261 AES_KEXPAND2(42, 48, 50)
262 AES_KEXPAND0(44, 50, 52)
263 AES_KEXPAND2(46, 52, 54)
264 AES_KEXPAND1(48, 54, 0x6, 56)
265 AES_KEXPAND2(50, 56, 58)
266
267 std %f8, [%o1 + 0x00]
268 std %f10, [%o1 + 0x08]
269 std %f12, [%o1 + 0x10]
270 std %f14, [%o1 + 0x18]
271 std %f16, [%o1 + 0x20]
272 std %f18, [%o1 + 0x28]
273 std %f20, [%o1 + 0x30]
274 std %f22, [%o1 + 0x38]
275 std %f24, [%o1 + 0x40]
276 std %f26, [%o1 + 0x48]
277 std %f28, [%o1 + 0x50]
278 std %f30, [%o1 + 0x58]
279 std %f32, [%o1 + 0x60]
280 std %f34, [%o1 + 0x68]
281 std %f36, [%o1 + 0x70]
282 std %f38, [%o1 + 0x78]
283 std %f40, [%o1 + 0x80]
284 std %f42, [%o1 + 0x88]
285 std %f44, [%o1 + 0x90]
286 std %f46, [%o1 + 0x98]
287 std %f48, [%o1 + 0xa0]
288 std %f50, [%o1 + 0xa8]
289 std %f52, [%o1 + 0xb0]
290 std %f54, [%o1 + 0xb8]
291 std %f56, [%o1 + 0xc0]
292 ba,pt %xcc, 80f
293 std %f58, [%o1 + 0xc8]
294
2951:
296 /* 192-bit key expansion */
297 ld [%o0 + 0x10], %f4
298 ld [%o0 + 0x14], %f5
299
300 std %f4, [%o1 + 0x00]
301 add %o1, 0x08, %o1
302
303 AES_KEXPAND1(0, 4, 0x0, 6)
304 AES_KEXPAND2(2, 6, 8)
305 AES_KEXPAND2(4, 8, 10)
306 AES_KEXPAND1(6, 10, 0x1, 12)
307 AES_KEXPAND2(8, 12, 14)
308 AES_KEXPAND2(10, 14, 16)
309 AES_KEXPAND1(12, 16, 0x2, 18)
310 AES_KEXPAND2(14, 18, 20)
311 AES_KEXPAND2(16, 20, 22)
312 AES_KEXPAND1(18, 22, 0x3, 24)
313 AES_KEXPAND2(20, 24, 26)
314 AES_KEXPAND2(22, 26, 28)
315 AES_KEXPAND1(24, 28, 0x4, 30)
316 AES_KEXPAND2(26, 30, 32)
317 AES_KEXPAND2(28, 32, 34)
318 AES_KEXPAND1(30, 34, 0x5, 36)
319 AES_KEXPAND2(32, 36, 38)
320 AES_KEXPAND2(34, 38, 40)
321 AES_KEXPAND1(36, 40, 0x6, 42)
322 AES_KEXPAND2(38, 42, 44)
323 AES_KEXPAND2(40, 44, 46)
324 AES_KEXPAND1(42, 46, 0x7, 48)
325 AES_KEXPAND2(44, 48, 50)
326
327 std %f6, [%o1 + 0x00]
328 std %f8, [%o1 + 0x08]
329 std %f10, [%o1 + 0x10]
330 std %f12, [%o1 + 0x18]
331 std %f14, [%o1 + 0x20]
332 std %f16, [%o1 + 0x28]
333 std %f18, [%o1 + 0x30]
334 std %f20, [%o1 + 0x38]
335 std %f22, [%o1 + 0x40]
336 std %f24, [%o1 + 0x48]
337 std %f26, [%o1 + 0x50]
338 std %f28, [%o1 + 0x58]
339 std %f30, [%o1 + 0x60]
340 std %f32, [%o1 + 0x68]
341 std %f34, [%o1 + 0x70]
342 std %f36, [%o1 + 0x78]
343 std %f38, [%o1 + 0x80]
344 std %f40, [%o1 + 0x88]
345 std %f42, [%o1 + 0x90]
346 std %f44, [%o1 + 0x98]
347 std %f46, [%o1 + 0xa0]
348 std %f48, [%o1 + 0xa8]
349 ba,pt %xcc, 80f
350 std %f50, [%o1 + 0xb0]
351
3522:
353 /* 128-bit key expansion */
354 AES_KEXPAND1(0, 2, 0x0, 4)
355 AES_KEXPAND2(2, 4, 6)
356 AES_KEXPAND1(4, 6, 0x1, 8)
357 AES_KEXPAND2(6, 8, 10)
358 AES_KEXPAND1(8, 10, 0x2, 12)
359 AES_KEXPAND2(10, 12, 14)
360 AES_KEXPAND1(12, 14, 0x3, 16)
361 AES_KEXPAND2(14, 16, 18)
362 AES_KEXPAND1(16, 18, 0x4, 20)
363 AES_KEXPAND2(18, 20, 22)
364 AES_KEXPAND1(20, 22, 0x5, 24)
365 AES_KEXPAND2(22, 24, 26)
366 AES_KEXPAND1(24, 26, 0x6, 28)
367 AES_KEXPAND2(26, 28, 30)
368 AES_KEXPAND1(28, 30, 0x7, 32)
369 AES_KEXPAND2(30, 32, 34)
370 AES_KEXPAND1(32, 34, 0x8, 36)
371 AES_KEXPAND2(34, 36, 38)
372 AES_KEXPAND1(36, 38, 0x9, 40)
373 AES_KEXPAND2(38, 40, 42)
374
375 std %f4, [%o1 + 0x00]
376 std %f6, [%o1 + 0x08]
377 std %f8, [%o1 + 0x10]
378 std %f10, [%o1 + 0x18]
379 std %f12, [%o1 + 0x20]
380 std %f14, [%o1 + 0x28]
381 std %f16, [%o1 + 0x30]
382 std %f18, [%o1 + 0x38]
383 std %f20, [%o1 + 0x40]
384 std %f22, [%o1 + 0x48]
385 std %f24, [%o1 + 0x50]
386 std %f26, [%o1 + 0x58]
387 std %f28, [%o1 + 0x60]
388 std %f30, [%o1 + 0x68]
389 std %f32, [%o1 + 0x70]
390 std %f34, [%o1 + 0x78]
391 std %f36, [%o1 + 0x80]
392 std %f38, [%o1 + 0x88]
393 std %f40, [%o1 + 0x90]
394 std %f42, [%o1 + 0x98]
39580:
396 retl
397 VISExit
398ENDPROC(aes_sparc64_key_expand)
399
400 .align 32
401ENTRY(aes_sparc64_encrypt_128)
402 /* %o0=key, %o1=input, %o2=output */
403 VISEntry
404 ld [%o1 + 0x00], %f4
405 ld [%o1 + 0x04], %f5
406 ld [%o1 + 0x08], %f6
407 ld [%o1 + 0x0c], %f7
408 ldd [%o0 + 0x00], %f8
409 ldd [%o0 + 0x08], %f10
410 ldd [%o0 + 0x10], %f12
411 ldd [%o0 + 0x18], %f14
412 ldd [%o0 + 0x20], %f16
413 ldd [%o0 + 0x28], %f18
414 ldd [%o0 + 0x30], %f20
415 ldd [%o0 + 0x38], %f22
416 ldd [%o0 + 0x40], %f24
417 ldd [%o0 + 0x48], %f26
418 ldd [%o0 + 0x50], %f28
419 ldd [%o0 + 0x58], %f30
420 ldd [%o0 + 0x60], %f32
421 ldd [%o0 + 0x68], %f34
422 ldd [%o0 + 0x70], %f36
423 ldd [%o0 + 0x78], %f38
424 ldd [%o0 + 0x80], %f40
425 ldd [%o0 + 0x88], %f42
426 ldd [%o0 + 0x90], %f44
427 ldd [%o0 + 0x98], %f46
428 ldd [%o0 + 0xa0], %f48
429 ldd [%o0 + 0xa8], %f50
430 fxor %f8, %f4, %f4
431 fxor %f10, %f6, %f6
432 ENCRYPT_128(12, 4, 6, 0, 2)
433 st %f4, [%o2 + 0x00]
434 st %f5, [%o2 + 0x04]
435 st %f6, [%o2 + 0x08]
436 st %f7, [%o2 + 0x0c]
437 retl
438 VISExit
439ENDPROC(aes_sparc64_encrypt_128)
440
441 .align 32
442ENTRY(aes_sparc64_encrypt_192)
443 /* %o0=key, %o1=input, %o2=output */
444 VISEntry
445 ld [%o1 + 0x00], %f4
446 ld [%o1 + 0x04], %f5
447 ld [%o1 + 0x08], %f6
448 ld [%o1 + 0x0c], %f7
449
450 ldd [%o0 + 0x00], %f8
451 ldd [%o0 + 0x08], %f10
452
453 fxor %f8, %f4, %f4
454 fxor %f10, %f6, %f6
455
456 ldd [%o0 + 0x10], %f8
457 ldd [%o0 + 0x18], %f10
458 ldd [%o0 + 0x20], %f12
459 ldd [%o0 + 0x28], %f14
460 add %o0, 0x20, %o0
461
462 ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
463
464 ldd [%o0 + 0x10], %f12
465 ldd [%o0 + 0x18], %f14
466 ldd [%o0 + 0x20], %f16
467 ldd [%o0 + 0x28], %f18
468 ldd [%o0 + 0x30], %f20
469 ldd [%o0 + 0x38], %f22
470 ldd [%o0 + 0x40], %f24
471 ldd [%o0 + 0x48], %f26
472 ldd [%o0 + 0x50], %f28
473 ldd [%o0 + 0x58], %f30
474 ldd [%o0 + 0x60], %f32
475 ldd [%o0 + 0x68], %f34
476 ldd [%o0 + 0x70], %f36
477 ldd [%o0 + 0x78], %f38
478 ldd [%o0 + 0x80], %f40
479 ldd [%o0 + 0x88], %f42
480 ldd [%o0 + 0x90], %f44
481 ldd [%o0 + 0x98], %f46
482 ldd [%o0 + 0xa0], %f48
483 ldd [%o0 + 0xa8], %f50
484
485
486 ENCRYPT_128(12, 4, 6, 0, 2)
487
488 st %f4, [%o2 + 0x00]
489 st %f5, [%o2 + 0x04]
490 st %f6, [%o2 + 0x08]
491 st %f7, [%o2 + 0x0c]
492
493 retl
494 VISExit
495ENDPROC(aes_sparc64_encrypt_192)
496
497 .align 32
498ENTRY(aes_sparc64_encrypt_256)
499 /* %o0=key, %o1=input, %o2=output */
500 VISEntry
501 ld [%o1 + 0x00], %f4
502 ld [%o1 + 0x04], %f5
503 ld [%o1 + 0x08], %f6
504 ld [%o1 + 0x0c], %f7
505
506 ldd [%o0 + 0x00], %f8
507 ldd [%o0 + 0x08], %f10
508
509 fxor %f8, %f4, %f4
510 fxor %f10, %f6, %f6
511
512 ldd [%o0 + 0x10], %f8
513
514 ldd [%o0 + 0x18], %f10
515 ldd [%o0 + 0x20], %f12
516 ldd [%o0 + 0x28], %f14
517 add %o0, 0x20, %o0
518
519 ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
520
521 ldd [%o0 + 0x10], %f8
522
523 ldd [%o0 + 0x18], %f10
524 ldd [%o0 + 0x20], %f12
525 ldd [%o0 + 0x28], %f14
526 add %o0, 0x20, %o0
527
528 ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
529
530 ldd [%o0 + 0x10], %f12
531 ldd [%o0 + 0x18], %f14
532 ldd [%o0 + 0x20], %f16
533 ldd [%o0 + 0x28], %f18
534 ldd [%o0 + 0x30], %f20
535 ldd [%o0 + 0x38], %f22
536 ldd [%o0 + 0x40], %f24
537 ldd [%o0 + 0x48], %f26
538 ldd [%o0 + 0x50], %f28
539 ldd [%o0 + 0x58], %f30
540 ldd [%o0 + 0x60], %f32
541 ldd [%o0 + 0x68], %f34
542 ldd [%o0 + 0x70], %f36
543 ldd [%o0 + 0x78], %f38
544 ldd [%o0 + 0x80], %f40
545 ldd [%o0 + 0x88], %f42
546 ldd [%o0 + 0x90], %f44
547 ldd [%o0 + 0x98], %f46
548 ldd [%o0 + 0xa0], %f48
549 ldd [%o0 + 0xa8], %f50
550
551 ENCRYPT_128(12, 4, 6, 0, 2)
552
553 st %f4, [%o2 + 0x00]
554 st %f5, [%o2 + 0x04]
555 st %f6, [%o2 + 0x08]
556 st %f7, [%o2 + 0x0c]
557
558 retl
559 VISExit
560ENDPROC(aes_sparc64_encrypt_256)
561
562 .align 32
563ENTRY(aes_sparc64_decrypt_128)
564 /* %o0=key, %o1=input, %o2=output */
565 VISEntry
566 ld [%o1 + 0x00], %f4
567 ld [%o1 + 0x04], %f5
568 ld [%o1 + 0x08], %f6
569 ld [%o1 + 0x0c], %f7
570 ldd [%o0 + 0xa0], %f8
571 ldd [%o0 + 0xa8], %f10
572 ldd [%o0 + 0x98], %f12
573 ldd [%o0 + 0x90], %f14
574 ldd [%o0 + 0x88], %f16
575 ldd [%o0 + 0x80], %f18
576 ldd [%o0 + 0x78], %f20
577 ldd [%o0 + 0x70], %f22
578 ldd [%o0 + 0x68], %f24
579 ldd [%o0 + 0x60], %f26
580 ldd [%o0 + 0x58], %f28
581 ldd [%o0 + 0x50], %f30
582 ldd [%o0 + 0x48], %f32
583 ldd [%o0 + 0x40], %f34
584 ldd [%o0 + 0x38], %f36
585 ldd [%o0 + 0x30], %f38
586 ldd [%o0 + 0x28], %f40
587 ldd [%o0 + 0x20], %f42
588 ldd [%o0 + 0x18], %f44
589 ldd [%o0 + 0x10], %f46
590 ldd [%o0 + 0x08], %f48
591 ldd [%o0 + 0x00], %f50
592 fxor %f8, %f4, %f4
593 fxor %f10, %f6, %f6
594 DECRYPT_128(12, 4, 6, 0, 2)
595 st %f4, [%o2 + 0x00]
596 st %f5, [%o2 + 0x04]
597 st %f6, [%o2 + 0x08]
598 st %f7, [%o2 + 0x0c]
599 retl
600 VISExit
601ENDPROC(aes_sparc64_decrypt_128)
602
603 .align 32
604ENTRY(aes_sparc64_decrypt_192)
605 /* %o0=key, %o1=input, %o2=output */
606 VISEntry
607 ld [%o1 + 0x00], %f4
608 ld [%o1 + 0x04], %f5
609 ld [%o1 + 0x08], %f6
610 ld [%o1 + 0x0c], %f7
611 ldd [%o0 + 0xc0], %f8
612 ldd [%o0 + 0xc8], %f10
613 ldd [%o0 + 0xb8], %f12
614 ldd [%o0 + 0xb0], %f14
615 ldd [%o0 + 0xa8], %f16
616 ldd [%o0 + 0xa0], %f18
617 fxor %f8, %f4, %f4
618 fxor %f10, %f6, %f6
619 ldd [%o0 + 0x98], %f20
620 ldd [%o0 + 0x90], %f22
621 ldd [%o0 + 0x88], %f24
622 ldd [%o0 + 0x80], %f26
623 DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
624 ldd [%o0 + 0x78], %f28
625 ldd [%o0 + 0x70], %f30
626 ldd [%o0 + 0x68], %f32
627 ldd [%o0 + 0x60], %f34
628 ldd [%o0 + 0x58], %f36
629 ldd [%o0 + 0x50], %f38
630 ldd [%o0 + 0x48], %f40
631 ldd [%o0 + 0x40], %f42
632 ldd [%o0 + 0x38], %f44
633 ldd [%o0 + 0x30], %f46
634 ldd [%o0 + 0x28], %f48
635 ldd [%o0 + 0x20], %f50
636 ldd [%o0 + 0x18], %f52
637 ldd [%o0 + 0x10], %f54
638 ldd [%o0 + 0x08], %f56
639 ldd [%o0 + 0x00], %f58
640 DECRYPT_128(20, 4, 6, 0, 2)
641 st %f4, [%o2 + 0x00]
642 st %f5, [%o2 + 0x04]
643 st %f6, [%o2 + 0x08]
644 st %f7, [%o2 + 0x0c]
645 retl
646 VISExit
647ENDPROC(aes_sparc64_decrypt_192)
648
649 .align 32
650ENTRY(aes_sparc64_decrypt_256)
651 /* %o0=key, %o1=input, %o2=output */
652 VISEntry
653 ld [%o1 + 0x00], %f4
654 ld [%o1 + 0x04], %f5
655 ld [%o1 + 0x08], %f6
656 ld [%o1 + 0x0c], %f7
657 ldd [%o0 + 0xe0], %f8
658 ldd [%o0 + 0xe8], %f10
659 ldd [%o0 + 0xd8], %f12
660 ldd [%o0 + 0xd0], %f14
661 ldd [%o0 + 0xc8], %f16
662 fxor %f8, %f4, %f4
663 ldd [%o0 + 0xc0], %f18
664 fxor %f10, %f6, %f6
665 ldd [%o0 + 0xb8], %f20
666 AES_DROUND23(12, 4, 6, 2)
667 ldd [%o0 + 0xb0], %f22
668 AES_DROUND01(14, 4, 6, 0)
669 ldd [%o0 + 0xa8], %f24
670 AES_DROUND23(16, 0, 2, 6)
671 ldd [%o0 + 0xa0], %f26
672 AES_DROUND01(18, 0, 2, 4)
673 ldd [%o0 + 0x98], %f12
674 AES_DROUND23(20, 4, 6, 2)
675 ldd [%o0 + 0x90], %f14
676 AES_DROUND01(22, 4, 6, 0)
677 ldd [%o0 + 0x88], %f16
678 AES_DROUND23(24, 0, 2, 6)
679 ldd [%o0 + 0x80], %f18
680 AES_DROUND01(26, 0, 2, 4)
681 ldd [%o0 + 0x78], %f20
682 AES_DROUND23(12, 4, 6, 2)
683 ldd [%o0 + 0x70], %f22
684 AES_DROUND01(14, 4, 6, 0)
685 ldd [%o0 + 0x68], %f24
686 AES_DROUND23(16, 0, 2, 6)
687 ldd [%o0 + 0x60], %f26
688 AES_DROUND01(18, 0, 2, 4)
689 ldd [%o0 + 0x58], %f28
690 AES_DROUND23(20, 4, 6, 2)
691 ldd [%o0 + 0x50], %f30
692 AES_DROUND01(22, 4, 6, 0)
693 ldd [%o0 + 0x48], %f32
694 AES_DROUND23(24, 0, 2, 6)
695 ldd [%o0 + 0x40], %f34
696 AES_DROUND01(26, 0, 2, 4)
697 ldd [%o0 + 0x38], %f36
698 AES_DROUND23(28, 4, 6, 2)
699 ldd [%o0 + 0x30], %f38
700 AES_DROUND01(30, 4, 6, 0)
701 ldd [%o0 + 0x28], %f40
702 AES_DROUND23(32, 0, 2, 6)
703 ldd [%o0 + 0x20], %f42
704 AES_DROUND01(34, 0, 2, 4)
705 ldd [%o0 + 0x18], %f44
706 AES_DROUND23(36, 4, 6, 2)
707 ldd [%o0 + 0x10], %f46
708 AES_DROUND01(38, 4, 6, 0)
709 ldd [%o0 + 0x08], %f48
710 AES_DROUND23(40, 0, 2, 6)
711 ldd [%o0 + 0x00], %f50
712 AES_DROUND01(42, 0, 2, 4)
713 AES_DROUND23(44, 4, 6, 2)
714 AES_DROUND01(46, 4, 6, 0)
715 AES_DROUND23_L(48, 0, 2, 6)
716 AES_DROUND01_L(50, 0, 2, 4)
717 st %f4, [%o2 + 0x00]
718 st %f5, [%o2 + 0x04]
719 st %f6, [%o2 + 0x08]
720 st %f7, [%o2 + 0x0c]
721 retl
722 VISExit
723ENDPROC(aes_sparc64_decrypt_256)
724
725 .align 32
726ENTRY(aes_sparc64_load_encrypt_keys_128)
727 /* %o0=key */
728 VISEntry
729 ldd [%o0 + 0x10], %f8
730 ldd [%o0 + 0x18], %f10
731 ldd [%o0 + 0x20], %f12
732 ldd [%o0 + 0x28], %f14
733 ldd [%o0 + 0x30], %f16
734 ldd [%o0 + 0x38], %f18
735 ldd [%o0 + 0x40], %f20
736 ldd [%o0 + 0x48], %f22
737 ldd [%o0 + 0x50], %f24
738 ldd [%o0 + 0x58], %f26
739 ldd [%o0 + 0x60], %f28
740 ldd [%o0 + 0x68], %f30
741 ldd [%o0 + 0x70], %f32
742 ldd [%o0 + 0x78], %f34
743 ldd [%o0 + 0x80], %f36
744 ldd [%o0 + 0x88], %f38
745 ldd [%o0 + 0x90], %f40
746 ldd [%o0 + 0x98], %f42
747 ldd [%o0 + 0xa0], %f44
748 retl
749 ldd [%o0 + 0xa8], %f46
750ENDPROC(aes_sparc64_load_encrypt_keys_128)
751
752 .align 32
753ENTRY(aes_sparc64_load_encrypt_keys_192)
754 /* %o0=key */
755 VISEntry
756 ldd [%o0 + 0x10], %f8
757 ldd [%o0 + 0x18], %f10
758 ldd [%o0 + 0x20], %f12
759 ldd [%o0 + 0x28], %f14
760 ldd [%o0 + 0x30], %f16
761 ldd [%o0 + 0x38], %f18
762 ldd [%o0 + 0x40], %f20
763 ldd [%o0 + 0x48], %f22
764 ldd [%o0 + 0x50], %f24
765 ldd [%o0 + 0x58], %f26
766 ldd [%o0 + 0x60], %f28
767 ldd [%o0 + 0x68], %f30
768 ldd [%o0 + 0x70], %f32
769 ldd [%o0 + 0x78], %f34
770 ldd [%o0 + 0x80], %f36
771 ldd [%o0 + 0x88], %f38
772 ldd [%o0 + 0x90], %f40
773 ldd [%o0 + 0x98], %f42
774 ldd [%o0 + 0xa0], %f44
775 ldd [%o0 + 0xa8], %f46
776 ldd [%o0 + 0xb0], %f48
777 ldd [%o0 + 0xb8], %f50
778 ldd [%o0 + 0xc0], %f52
779 retl
780 ldd [%o0 + 0xc8], %f54
781ENDPROC(aes_sparc64_load_encrypt_keys_192)
782
783 .align 32
784ENTRY(aes_sparc64_load_encrypt_keys_256)
785 /* %o0=key */
786 VISEntry
787 ldd [%o0 + 0x10], %f8
788 ldd [%o0 + 0x18], %f10
789 ldd [%o0 + 0x20], %f12
790 ldd [%o0 + 0x28], %f14
791 ldd [%o0 + 0x30], %f16
792 ldd [%o0 + 0x38], %f18
793 ldd [%o0 + 0x40], %f20
794 ldd [%o0 + 0x48], %f22
795 ldd [%o0 + 0x50], %f24
796 ldd [%o0 + 0x58], %f26
797 ldd [%o0 + 0x60], %f28
798 ldd [%o0 + 0x68], %f30
799 ldd [%o0 + 0x70], %f32
800 ldd [%o0 + 0x78], %f34
801 ldd [%o0 + 0x80], %f36
802 ldd [%o0 + 0x88], %f38
803 ldd [%o0 + 0x90], %f40
804 ldd [%o0 + 0x98], %f42
805 ldd [%o0 + 0xa0], %f44
806 ldd [%o0 + 0xa8], %f46
807 ldd [%o0 + 0xb0], %f48
808 ldd [%o0 + 0xb8], %f50
809 ldd [%o0 + 0xc0], %f52
810 ldd [%o0 + 0xc8], %f54
811 ldd [%o0 + 0xd0], %f56
812 ldd [%o0 + 0xd8], %f58
813 ldd [%o0 + 0xe0], %f60
814 retl
815 ldd [%o0 + 0xe8], %f62
816ENDPROC(aes_sparc64_load_encrypt_keys_256)
817
818 .align 32
819ENTRY(aes_sparc64_load_decrypt_keys_128)
820 /* %o0=key */
821 VISEntry
822 ldd [%o0 + 0x98], %f8
823 ldd [%o0 + 0x90], %f10
824 ldd [%o0 + 0x88], %f12
825 ldd [%o0 + 0x80], %f14
826 ldd [%o0 + 0x78], %f16
827 ldd [%o0 + 0x70], %f18
828 ldd [%o0 + 0x68], %f20
829 ldd [%o0 + 0x60], %f22
830 ldd [%o0 + 0x58], %f24
831 ldd [%o0 + 0x50], %f26
832 ldd [%o0 + 0x48], %f28
833 ldd [%o0 + 0x40], %f30
834 ldd [%o0 + 0x38], %f32
835 ldd [%o0 + 0x30], %f34
836 ldd [%o0 + 0x28], %f36
837 ldd [%o0 + 0x20], %f38
838 ldd [%o0 + 0x18], %f40
839 ldd [%o0 + 0x10], %f42
840 ldd [%o0 + 0x08], %f44
841 retl
842 ldd [%o0 + 0x00], %f46
843ENDPROC(aes_sparc64_load_decrypt_keys_128)
844
845 .align 32
846ENTRY(aes_sparc64_load_decrypt_keys_192)
847 /* %o0=key */
848 VISEntry
849 ldd [%o0 + 0xb8], %f8
850 ldd [%o0 + 0xb0], %f10
851 ldd [%o0 + 0xa8], %f12
852 ldd [%o0 + 0xa0], %f14
853 ldd [%o0 + 0x98], %f16
854 ldd [%o0 + 0x90], %f18
855 ldd [%o0 + 0x88], %f20
856 ldd [%o0 + 0x80], %f22
857 ldd [%o0 + 0x78], %f24
858 ldd [%o0 + 0x70], %f26
859 ldd [%o0 + 0x68], %f28
860 ldd [%o0 + 0x60], %f30
861 ldd [%o0 + 0x58], %f32
862 ldd [%o0 + 0x50], %f34
863 ldd [%o0 + 0x48], %f36
864 ldd [%o0 + 0x40], %f38
865 ldd [%o0 + 0x38], %f40
866 ldd [%o0 + 0x30], %f42
867 ldd [%o0 + 0x28], %f44
868 ldd [%o0 + 0x20], %f46
869 ldd [%o0 + 0x18], %f48
870 ldd [%o0 + 0x10], %f50
871 ldd [%o0 + 0x08], %f52
872 retl
873 ldd [%o0 + 0x00], %f54
874ENDPROC(aes_sparc64_load_decrypt_keys_192)
875
876 .align 32
877ENTRY(aes_sparc64_load_decrypt_keys_256)
878 /* %o0=key */
879 VISEntry
880 ldd [%o0 + 0xd8], %f8
881 ldd [%o0 + 0xd0], %f10
882 ldd [%o0 + 0xc8], %f12
883 ldd [%o0 + 0xc0], %f14
884 ldd [%o0 + 0xb8], %f16
885 ldd [%o0 + 0xb0], %f18
886 ldd [%o0 + 0xa8], %f20
887 ldd [%o0 + 0xa0], %f22
888 ldd [%o0 + 0x98], %f24
889 ldd [%o0 + 0x90], %f26
890 ldd [%o0 + 0x88], %f28
891 ldd [%o0 + 0x80], %f30
892 ldd [%o0 + 0x78], %f32
893 ldd [%o0 + 0x70], %f34
894 ldd [%o0 + 0x68], %f36
895 ldd [%o0 + 0x60], %f38
896 ldd [%o0 + 0x58], %f40
897 ldd [%o0 + 0x50], %f42
898 ldd [%o0 + 0x48], %f44
899 ldd [%o0 + 0x40], %f46
900 ldd [%o0 + 0x38], %f48
901 ldd [%o0 + 0x30], %f50
902 ldd [%o0 + 0x28], %f52
903 ldd [%o0 + 0x20], %f54
904 ldd [%o0 + 0x18], %f56
905 ldd [%o0 + 0x10], %f58
906 ldd [%o0 + 0x08], %f60
907 retl
908 ldd [%o0 + 0x00], %f62
909ENDPROC(aes_sparc64_load_decrypt_keys_256)
910
911 .align 32
912ENTRY(aes_sparc64_ecb_encrypt_128)
913 /* %o0=key, %o1=input, %o2=output, %o3=len */
914 ldx [%o0 + 0x00], %g1
915 subcc %o3, 0x10, %o3
916 be 10f
917 ldx [%o0 + 0x08], %g2
9181: ldx [%o1 + 0x00], %g3
919 ldx [%o1 + 0x08], %g7
920 ldx [%o1 + 0x10], %o4
921 ldx [%o1 + 0x18], %o5
922 xor %g1, %g3, %g3
923 xor %g2, %g7, %g7
924 MOVXTOD_G3_F4
925 MOVXTOD_G7_F6
926 xor %g1, %o4, %g3
927 xor %g2, %o5, %g7
928 MOVXTOD_G3_F60
929 MOVXTOD_G7_F62
930 ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
931 std %f4, [%o2 + 0x00]
932 std %f6, [%o2 + 0x08]
933 std %f60, [%o2 + 0x10]
934 std %f62, [%o2 + 0x18]
935 sub %o3, 0x20, %o3
936 add %o1, 0x20, %o1
937 brgz %o3, 1b
938 add %o2, 0x20, %o2
939 brlz,pt %o3, 11f
940 nop
94110: ldx [%o1 + 0x00], %g3
942 ldx [%o1 + 0x08], %g7
943 xor %g1, %g3, %g3
944 xor %g2, %g7, %g7
945 MOVXTOD_G3_F4
946 MOVXTOD_G7_F6
947 ENCRYPT_128(8, 4, 6, 0, 2)
948 std %f4, [%o2 + 0x00]
949 std %f6, [%o2 + 0x08]
95011: retl
951 nop
952ENDPROC(aes_sparc64_ecb_encrypt_128)
953
954 .align 32
955ENTRY(aes_sparc64_ecb_encrypt_192)
956 /* %o0=key, %o1=input, %o2=output, %o3=len */
957 ldx [%o0 + 0x00], %g1
958 subcc %o3, 0x10, %o3
959 be 10f
960 ldx [%o0 + 0x08], %g2
9611: ldx [%o1 + 0x00], %g3
962 ldx [%o1 + 0x08], %g7
963 ldx [%o1 + 0x10], %o4
964 ldx [%o1 + 0x18], %o5
965 xor %g1, %g3, %g3
966 xor %g2, %g7, %g7
967 MOVXTOD_G3_F4
968 MOVXTOD_G7_F6
969 xor %g1, %o4, %g3
970 xor %g2, %o5, %g7
971 MOVXTOD_G3_F60
972 MOVXTOD_G7_F62
973 ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
974 std %f4, [%o2 + 0x00]
975 std %f6, [%o2 + 0x08]
976 std %f60, [%o2 + 0x10]
977 std %f62, [%o2 + 0x18]
978 sub %o3, 0x20, %o3
979 add %o1, 0x20, %o1
980 brgz %o3, 1b
981 add %o2, 0x20, %o2
982 brlz,pt %o3, 11f
983 nop
98410: ldx [%o1 + 0x00], %g3
985 ldx [%o1 + 0x08], %g7
986 xor %g1, %g3, %g3
987 xor %g2, %g7, %g7
988 MOVXTOD_G3_F4
989 MOVXTOD_G7_F6
990 ENCRYPT_192(8, 4, 6, 0, 2)
991 std %f4, [%o2 + 0x00]
992 std %f6, [%o2 + 0x08]
99311: retl
994 nop
995ENDPROC(aes_sparc64_ecb_encrypt_192)
996
997 .align 32
998ENTRY(aes_sparc64_ecb_encrypt_256)
999 /* %o0=key, %o1=input, %o2=output, %o3=len */
1000 ldx [%o0 + 0x00], %g1
1001 subcc %o3, 0x10, %o3
1002 be 10f
1003 ldx [%o0 + 0x08], %g2
10041: ldx [%o1 + 0x00], %g3
1005 ldx [%o1 + 0x08], %g7
1006 ldx [%o1 + 0x10], %o4
1007 ldx [%o1 + 0x18], %o5
1008 xor %g1, %g3, %g3
1009 xor %g2, %g7, %g7
1010 MOVXTOD_G3_F4
1011 MOVXTOD_G7_F6
1012 xor %g1, %o4, %g3
1013 xor %g2, %o5, %g7
1014 MOVXTOD_G3_F0
1015 MOVXTOD_G7_F2
1016 ENCRYPT_256_2(8, 4, 6, 0, 2)
1017 std %f4, [%o2 + 0x00]
1018 std %f6, [%o2 + 0x08]
1019 std %f0, [%o2 + 0x10]
1020 std %f2, [%o2 + 0x18]
1021 sub %o3, 0x20, %o3
1022 add %o1, 0x20, %o1
1023 brgz %o3, 1b
1024 add %o2, 0x20, %o2
1025 brlz,pt %o3, 11f
1026 nop
102710: ldx [%o1 + 0x00], %g3
1028 ldx [%o1 + 0x08], %g7
1029 xor %g1, %g3, %g3
1030 xor %g2, %g7, %g7
1031 MOVXTOD_G3_F4
1032 MOVXTOD_G7_F6
1033 ENCRYPT_256(8, 4, 6, 0, 2)
1034 std %f4, [%o2 + 0x00]
1035 std %f6, [%o2 + 0x08]
103611: retl
1037 nop
1038ENDPROC(aes_sparc64_ecb_encrypt_256)
1039
1040 .align 32
1041ENTRY(aes_sparc64_ecb_decrypt_128)
1042 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1043 ldx [%o0 - 0x10], %g1
1044 subcc %o3, 0x10, %o3
1045 be 10f
1046 ldx [%o0 - 0x08], %g2
10471: ldx [%o1 + 0x00], %g3
1048 ldx [%o1 + 0x08], %g7
1049 ldx [%o1 + 0x10], %o4
1050 ldx [%o1 + 0x18], %o5
1051 xor %g1, %g3, %g3
1052 xor %g2, %g7, %g7
1053 MOVXTOD_G3_F4
1054 MOVXTOD_G7_F6
1055 xor %g1, %o4, %g3
1056 xor %g2, %o5, %g7
1057 MOVXTOD_G3_F60
1058 MOVXTOD_G7_F62
1059 DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1060 std %f4, [%o2 + 0x00]
1061 std %f6, [%o2 + 0x08]
1062 std %f60, [%o2 + 0x10]
1063 std %f62, [%o2 + 0x18]
1064 sub %o3, 0x20, %o3
1065 add %o1, 0x20, %o1
1066 brgz,pt %o3, 1b
1067 add %o2, 0x20, %o2
1068 brlz,pt %o3, 11f
1069 nop
107010: ldx [%o1 + 0x00], %g3
1071 ldx [%o1 + 0x08], %g7
1072 xor %g1, %g3, %g3
1073 xor %g2, %g7, %g7
1074 MOVXTOD_G3_F4
1075 MOVXTOD_G7_F6
1076 DECRYPT_128(8, 4, 6, 0, 2)
1077 std %f4, [%o2 + 0x00]
1078 std %f6, [%o2 + 0x08]
107911: retl
1080 nop
1081ENDPROC(aes_sparc64_ecb_decrypt_128)
1082
1083 .align 32
1084ENTRY(aes_sparc64_ecb_decrypt_192)
1085 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1086 ldx [%o0 - 0x10], %g1
1087 subcc %o3, 0x10, %o3
1088 be 10f
1089 ldx [%o0 - 0x08], %g2
10901: ldx [%o1 + 0x00], %g3
1091 ldx [%o1 + 0x08], %g7
1092 ldx [%o1 + 0x10], %o4
1093 ldx [%o1 + 0x18], %o5
1094 xor %g1, %g3, %g3
1095 xor %g2, %g7, %g7
1096 MOVXTOD_G3_F4
1097 MOVXTOD_G7_F6
1098 xor %g1, %o4, %g3
1099 xor %g2, %o5, %g7
1100 MOVXTOD_G3_F60
1101 MOVXTOD_G7_F62
1102 DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1103 std %f4, [%o2 + 0x00]
1104 std %f6, [%o2 + 0x08]
1105 std %f60, [%o2 + 0x10]
1106 std %f62, [%o2 + 0x18]
1107 sub %o3, 0x20, %o3
1108 add %o1, 0x20, %o1
1109 brgz,pt %o3, 1b
1110 add %o2, 0x20, %o2
1111 brlz,pt %o3, 11f
1112 nop
111310: ldx [%o1 + 0x00], %g3
1114 ldx [%o1 + 0x08], %g7
1115 xor %g1, %g3, %g3
1116 xor %g2, %g7, %g7
1117 MOVXTOD_G3_F4
1118 MOVXTOD_G7_F6
1119 DECRYPT_192(8, 4, 6, 0, 2)
1120 std %f4, [%o2 + 0x00]
1121 std %f6, [%o2 + 0x08]
112211: retl
1123 nop
1124ENDPROC(aes_sparc64_ecb_decrypt_192)
1125
1126 .align 32
1127ENTRY(aes_sparc64_ecb_decrypt_256)
1128 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1129 ldx [%o0 - 0x10], %g1
1130 subcc %o3, 0x10, %o3
1131 be 10f
1132 ldx [%o0 - 0x08], %g2
1133 sub %o0, 0xf0, %o0
11341: ldx [%o1 + 0x00], %g3
1135 ldx [%o1 + 0x08], %g7
1136 ldx [%o1 + 0x10], %o4
1137 ldx [%o1 + 0x18], %o5
1138 xor %g1, %g3, %g3
1139 xor %g2, %g7, %g7
1140 MOVXTOD_G3_F4
1141 MOVXTOD_G7_F6
1142 xor %g1, %o4, %g3
1143 xor %g2, %o5, %g7
1144 MOVXTOD_G3_F0
1145 MOVXTOD_G7_F2
1146 DECRYPT_256_2(8, 4, 6, 0, 2)
1147 std %f4, [%o2 + 0x00]
1148 std %f6, [%o2 + 0x08]
1149 std %f0, [%o2 + 0x10]
1150 std %f2, [%o2 + 0x18]
1151 sub %o3, 0x20, %o3
1152 add %o1, 0x20, %o1
1153 brgz,pt %o3, 1b
1154 add %o2, 0x20, %o2
1155 brlz,pt %o3, 11f
1156 nop
115710: ldx [%o1 + 0x00], %g3
1158 ldx [%o1 + 0x08], %g7
1159 xor %g1, %g3, %g3
1160 xor %g2, %g7, %g7
1161 MOVXTOD_G3_F4
1162 MOVXTOD_G7_F6
1163 DECRYPT_256(8, 4, 6, 0, 2)
1164 std %f4, [%o2 + 0x00]
1165 std %f6, [%o2 + 0x08]
116611: retl
1167 nop
1168ENDPROC(aes_sparc64_ecb_decrypt_256)
1169
1170 .align 32
1171ENTRY(aes_sparc64_cbc_encrypt_128)
1172 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1173 ldd [%o4 + 0x00], %f4
1174 ldd [%o4 + 0x08], %f6
1175 ldx [%o0 + 0x00], %g1
1176 ldx [%o0 + 0x08], %g2
11771: ldx [%o1 + 0x00], %g3
1178 ldx [%o1 + 0x08], %g7
1179 add %o1, 0x10, %o1
1180 xor %g1, %g3, %g3
1181 xor %g2, %g7, %g7
1182 MOVXTOD_G3_F0
1183 MOVXTOD_G7_F2
1184 fxor %f4, %f0, %f4
1185 fxor %f6, %f2, %f6
1186 ENCRYPT_128(8, 4, 6, 0, 2)
1187 std %f4, [%o2 + 0x00]
1188 std %f6, [%o2 + 0x08]
1189 subcc %o3, 0x10, %o3
1190 bne,pt %xcc, 1b
1191 add %o2, 0x10, %o2
1192 std %f4, [%o4 + 0x00]
1193 std %f6, [%o4 + 0x08]
1194 retl
1195 nop
1196ENDPROC(aes_sparc64_cbc_encrypt_128)
1197
1198 .align 32
1199ENTRY(aes_sparc64_cbc_encrypt_192)
1200 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1201 ldd [%o4 + 0x00], %f4
1202 ldd [%o4 + 0x08], %f6
1203 ldx [%o0 + 0x00], %g1
1204 ldx [%o0 + 0x08], %g2
12051: ldx [%o1 + 0x00], %g3
1206 ldx [%o1 + 0x08], %g7
1207 add %o1, 0x10, %o1
1208 xor %g1, %g3, %g3
1209 xor %g2, %g7, %g7
1210 MOVXTOD_G3_F0
1211 MOVXTOD_G7_F2
1212 fxor %f4, %f0, %f4
1213 fxor %f6, %f2, %f6
1214 ENCRYPT_192(8, 4, 6, 0, 2)
1215 std %f4, [%o2 + 0x00]
1216 std %f6, [%o2 + 0x08]
1217 subcc %o3, 0x10, %o3
1218 bne,pt %xcc, 1b
1219 add %o2, 0x10, %o2
1220 std %f4, [%o4 + 0x00]
1221 std %f6, [%o4 + 0x08]
1222 retl
1223 nop
1224ENDPROC(aes_sparc64_cbc_encrypt_192)
1225
1226 .align 32
1227ENTRY(aes_sparc64_cbc_encrypt_256)
1228 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1229 ldd [%o4 + 0x00], %f4
1230 ldd [%o4 + 0x08], %f6
1231 ldx [%o0 + 0x00], %g1
1232 ldx [%o0 + 0x08], %g2
12331: ldx [%o1 + 0x00], %g3
1234 ldx [%o1 + 0x08], %g7
1235 add %o1, 0x10, %o1
1236 xor %g1, %g3, %g3
1237 xor %g2, %g7, %g7
1238 MOVXTOD_G3_F0
1239 MOVXTOD_G7_F2
1240 fxor %f4, %f0, %f4
1241 fxor %f6, %f2, %f6
1242 ENCRYPT_256(8, 4, 6, 0, 2)
1243 std %f4, [%o2 + 0x00]
1244 std %f6, [%o2 + 0x08]
1245 subcc %o3, 0x10, %o3
1246 bne,pt %xcc, 1b
1247 add %o2, 0x10, %o2
1248 std %f4, [%o4 + 0x00]
1249 std %f6, [%o4 + 0x08]
1250 retl
1251 nop
1252ENDPROC(aes_sparc64_cbc_encrypt_256)
1253
1254 .align 32
1255ENTRY(aes_sparc64_cbc_decrypt_128)
1256 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1257 ldx [%o0 - 0x10], %g1
1258 ldx [%o0 - 0x08], %g2
1259 ldx [%o4 + 0x00], %o0
1260 ldx [%o4 + 0x08], %o5
12611: ldx [%o1 + 0x00], %g3
1262 ldx [%o1 + 0x08], %g7
1263 add %o1, 0x10, %o1
1264 xor %g1, %g3, %g3
1265 xor %g2, %g7, %g7
1266 MOVXTOD_G3_F4
1267 MOVXTOD_G7_F6
1268 DECRYPT_128(8, 4, 6, 0, 2)
1269 MOVXTOD_O0_F0
1270 MOVXTOD_O5_F2
1271 xor %g1, %g3, %o0
1272 xor %g2, %g7, %o5
1273 fxor %f4, %f0, %f4
1274 fxor %f6, %f2, %f6
1275 std %f4, [%o2 + 0x00]
1276 std %f6, [%o2 + 0x08]
1277 subcc %o3, 0x10, %o3
1278 bne,pt %xcc, 1b
1279 add %o2, 0x10, %o2
1280 stx %o0, [%o4 + 0x00]
1281 stx %o5, [%o4 + 0x08]
1282 retl
1283 nop
1284ENDPROC(aes_sparc64_cbc_decrypt_128)
1285
1286 .align 32
1287ENTRY(aes_sparc64_cbc_decrypt_192)
1288 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1289 ldx [%o0 - 0x10], %g1
1290 ldx [%o0 - 0x08], %g2
1291 ldx [%o4 + 0x00], %o0
1292 ldx [%o4 + 0x08], %o5
12931: ldx [%o1 + 0x00], %g3
1294 ldx [%o1 + 0x08], %g7
1295 add %o1, 0x10, %o1
1296 xor %g1, %g3, %g3
1297 xor %g2, %g7, %g7
1298 MOVXTOD_G3_F4
1299 MOVXTOD_G7_F6
1300 DECRYPT_192(8, 4, 6, 0, 2)
1301 MOVXTOD_O0_F0
1302 MOVXTOD_O5_F2
1303 xor %g1, %g3, %o0
1304 xor %g2, %g7, %o5
1305 fxor %f4, %f0, %f4
1306 fxor %f6, %f2, %f6
1307 std %f4, [%o2 + 0x00]
1308 std %f6, [%o2 + 0x08]
1309 subcc %o3, 0x10, %o3
1310 bne,pt %xcc, 1b
1311 add %o2, 0x10, %o2
1312 stx %o0, [%o4 + 0x00]
1313 stx %o5, [%o4 + 0x08]
1314 retl
1315 nop
1316ENDPROC(aes_sparc64_cbc_decrypt_192)
1317
1318 .align 32
1319ENTRY(aes_sparc64_cbc_decrypt_256)
1320 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1321 ldx [%o0 - 0x10], %g1
1322 ldx [%o0 - 0x08], %g2
1323 ldx [%o4 + 0x00], %o0
1324 ldx [%o4 + 0x08], %o5
13251: ldx [%o1 + 0x00], %g3
1326 ldx [%o1 + 0x08], %g7
1327 add %o1, 0x10, %o1
1328 xor %g1, %g3, %g3
1329 xor %g2, %g7, %g7
1330 MOVXTOD_G3_F4
1331 MOVXTOD_G7_F6
1332 DECRYPT_256(8, 4, 6, 0, 2)
1333 MOVXTOD_O0_F0
1334 MOVXTOD_O5_F2
1335 xor %g1, %g3, %o0
1336 xor %g2, %g7, %o5
1337 fxor %f4, %f0, %f4
1338 fxor %f6, %f2, %f6
1339 std %f4, [%o2 + 0x00]
1340 std %f6, [%o2 + 0x08]
1341 subcc %o3, 0x10, %o3
1342 bne,pt %xcc, 1b
1343 add %o2, 0x10, %o2
1344 stx %o0, [%o4 + 0x00]
1345 stx %o5, [%o4 + 0x08]
1346 retl
1347 nop
1348ENDPROC(aes_sparc64_cbc_decrypt_256)
1349
1350 .align 32
1351ENTRY(aes_sparc64_ctr_crypt_128)
1352 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1353 ldx [%o4 + 0x00], %g3
1354 ldx [%o4 + 0x08], %g7
1355 subcc %o3, 0x10, %o3
1356 ldx [%o0 + 0x00], %g1
1357 be 10f
1358 ldx [%o0 + 0x08], %g2
13591: xor %g1, %g3, %o5
1360 MOVXTOD_O5_F0
1361 xor %g2, %g7, %o5
1362 MOVXTOD_O5_F2
1363 add %g7, 1, %g7
1364 add %g3, 1, %o5
1365 movrz %g7, %o5, %g3
1366 xor %g1, %g3, %o5
1367 MOVXTOD_O5_F4
1368 xor %g2, %g7, %o5
1369 MOVXTOD_O5_F6
1370 add %g7, 1, %g7
1371 add %g3, 1, %o5
1372 movrz %g7, %o5, %g3
1373 ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1374 ldd [%o1 + 0x00], %f56
1375 ldd [%o1 + 0x08], %f58
1376 ldd [%o1 + 0x10], %f60
1377 ldd [%o1 + 0x18], %f62
1378 fxor %f56, %f0, %f56
1379 fxor %f58, %f2, %f58
1380 fxor %f60, %f4, %f60
1381 fxor %f62, %f6, %f62
1382 std %f56, [%o2 + 0x00]
1383 std %f58, [%o2 + 0x08]
1384 std %f60, [%o2 + 0x10]
1385 std %f62, [%o2 + 0x18]
1386 subcc %o3, 0x20, %o3
1387 add %o1, 0x20, %o1
1388 brgz %o3, 1b
1389 add %o2, 0x20, %o2
1390 brlz,pt %o3, 11f
1391 nop
139210: xor %g1, %g3, %o5
1393 MOVXTOD_O5_F0
1394 xor %g2, %g7, %o5
1395 MOVXTOD_O5_F2
1396 add %g7, 1, %g7
1397 add %g3, 1, %o5
1398 movrz %g7, %o5, %g3
1399 ENCRYPT_128(8, 0, 2, 4, 6)
1400 ldd [%o1 + 0x00], %f4
1401 ldd [%o1 + 0x08], %f6
1402 fxor %f4, %f0, %f4
1403 fxor %f6, %f2, %f6
1404 std %f4, [%o2 + 0x00]
1405 std %f6, [%o2 + 0x08]
140611: stx %g3, [%o4 + 0x00]
1407 retl
1408 stx %g7, [%o4 + 0x08]
1409ENDPROC(aes_sparc64_ctr_crypt_128)
1410
1411 .align 32
1412ENTRY(aes_sparc64_ctr_crypt_192)
1413 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1414 ldx [%o4 + 0x00], %g3
1415 ldx [%o4 + 0x08], %g7
1416 subcc %o3, 0x10, %o3
1417 ldx [%o0 + 0x00], %g1
1418 be 10f
1419 ldx [%o0 + 0x08], %g2
14201: xor %g1, %g3, %o5
1421 MOVXTOD_O5_F0
1422 xor %g2, %g7, %o5
1423 MOVXTOD_O5_F2
1424 add %g7, 1, %g7
1425 add %g3, 1, %o5
1426 movrz %g7, %o5, %g3
1427 xor %g1, %g3, %o5
1428 MOVXTOD_O5_F4
1429 xor %g2, %g7, %o5
1430 MOVXTOD_O5_F6
1431 add %g7, 1, %g7
1432 add %g3, 1, %o5
1433 movrz %g7, %o5, %g3
1434 ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1435 ldd [%o1 + 0x00], %f56
1436 ldd [%o1 + 0x08], %f58
1437 ldd [%o1 + 0x10], %f60
1438 ldd [%o1 + 0x18], %f62
1439 fxor %f56, %f0, %f56
1440 fxor %f58, %f2, %f58
1441 fxor %f60, %f4, %f60
1442 fxor %f62, %f6, %f62
1443 std %f56, [%o2 + 0x00]
1444 std %f58, [%o2 + 0x08]
1445 std %f60, [%o2 + 0x10]
1446 std %f62, [%o2 + 0x18]
1447 subcc %o3, 0x20, %o3
1448 add %o1, 0x20, %o1
1449 brgz %o3, 1b
1450 add %o2, 0x20, %o2
1451 brlz,pt %o3, 11f
1452 nop
145310: xor %g1, %g3, %o5
1454 MOVXTOD_O5_F0
1455 xor %g2, %g7, %o5
1456 MOVXTOD_O5_F2
1457 add %g7, 1, %g7
1458 add %g3, 1, %o5
1459 movrz %g7, %o5, %g3
1460 ENCRYPT_192(8, 0, 2, 4, 6)
1461 ldd [%o1 + 0x00], %f4
1462 ldd [%o1 + 0x08], %f6
1463 fxor %f4, %f0, %f4
1464 fxor %f6, %f2, %f6
1465 std %f4, [%o2 + 0x00]
1466 std %f6, [%o2 + 0x08]
146711: stx %g3, [%o4 + 0x00]
1468 retl
1469 stx %g7, [%o4 + 0x08]
1470ENDPROC(aes_sparc64_ctr_crypt_192)
1471
1472 .align 32
1473ENTRY(aes_sparc64_ctr_crypt_256)
1474 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1475 ldx [%o4 + 0x00], %g3
1476 ldx [%o4 + 0x08], %g7
1477 subcc %o3, 0x10, %o3
1478 ldx [%o0 + 0x00], %g1
1479 be 10f
1480 ldx [%o0 + 0x08], %g2
14811: xor %g1, %g3, %o5
1482 MOVXTOD_O5_F0
1483 xor %g2, %g7, %o5
1484 MOVXTOD_O5_F2
1485 add %g7, 1, %g7
1486 add %g3, 1, %o5
1487 movrz %g7, %o5, %g3
1488 xor %g1, %g3, %o5
1489 MOVXTOD_O5_F4
1490 xor %g2, %g7, %o5
1491 MOVXTOD_O5_F6
1492 add %g7, 1, %g7
1493 add %g3, 1, %o5
1494 movrz %g7, %o5, %g3
1495 ENCRYPT_256_2(8, 0, 2, 4, 6)
1496 ldd [%o1 + 0x00], %f56
1497 ldd [%o1 + 0x08], %f58
1498 ldd [%o1 + 0x10], %f60
1499 ldd [%o1 + 0x18], %f62
1500 fxor %f56, %f0, %f56
1501 fxor %f58, %f2, %f58
1502 fxor %f60, %f4, %f60
1503 fxor %f62, %f6, %f62
1504 std %f56, [%o2 + 0x00]
1505 std %f58, [%o2 + 0x08]
1506 std %f60, [%o2 + 0x10]
1507 std %f62, [%o2 + 0x18]
1508 subcc %o3, 0x20, %o3
1509 add %o1, 0x20, %o1
1510 brgz %o3, 1b
1511 add %o2, 0x20, %o2
1512 brlz,pt %o3, 11f
1513 nop
1514 ldd [%o0 + 0xd0], %f56
1515 ldd [%o0 + 0xd8], %f58
1516 ldd [%o0 + 0xe0], %f60
1517 ldd [%o0 + 0xe8], %f62
151810: xor %g1, %g3, %o5
1519 MOVXTOD_O5_F0
1520 xor %g2, %g7, %o5
1521 MOVXTOD_O5_F2
1522 add %g7, 1, %g7
1523 add %g3, 1, %o5
1524 movrz %g7, %o5, %g3
1525 ENCRYPT_256(8, 0, 2, 4, 6)
1526 ldd [%o1 + 0x00], %f4
1527 ldd [%o1 + 0x08], %f6
1528 fxor %f4, %f0, %f4
1529 fxor %f6, %f2, %f6
1530 std %f4, [%o2 + 0x00]
1531 std %f6, [%o2 + 0x08]
153211: stx %g3, [%o4 + 0x00]
1533 retl
1534 stx %g7, [%o4 + 0x08]
1535ENDPROC(aes_sparc64_ctr_crypt_256)
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
new file mode 100644
index 000000000000..8f1c9980f637
--- /dev/null
+++ b/arch/sparc/crypto/aes_glue.c
@@ -0,0 +1,477 @@
1/* Glue code for AES encryption optimized for sparc64 crypto opcodes.
2 *
3 * This is based largely upon arch/x86/crypto/aesni-intel_glue.c
4 *
5 * Copyright (C) 2008, Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
7 *
8 * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
9 * interface for 64-bit kernels.
10 * Authors: Adrian Hoban <adrian.hoban@intel.com>
11 * Gabriele Paoloni <gabriele.paoloni@intel.com>
12 * Tadeusz Struk (tadeusz.struk@intel.com)
13 * Aidan O'Mahony (aidan.o.mahony@intel.com)
14 * Copyright (c) 2010, Intel Corporation.
15 */
16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19#include <linux/crypto.h>
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/mm.h>
23#include <linux/types.h>
24#include <crypto/algapi.h>
25#include <crypto/aes.h>
26
27#include <asm/fpumacro.h>
28#include <asm/pstate.h>
29#include <asm/elf.h>
30
31#include "opcodes.h"
32
33struct aes_ops {
34 void (*encrypt)(const u64 *key, const u32 *input, u32 *output);
35 void (*decrypt)(const u64 *key, const u32 *input, u32 *output);
36 void (*load_encrypt_keys)(const u64 *key);
37 void (*load_decrypt_keys)(const u64 *key);
38 void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output,
39 unsigned int len);
40 void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output,
41 unsigned int len);
42 void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output,
43 unsigned int len, u64 *iv);
44 void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output,
45 unsigned int len, u64 *iv);
46 void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output,
47 unsigned int len, u64 *iv);
48};
49
50struct crypto_sparc64_aes_ctx {
51 struct aes_ops *ops;
52 u64 key[AES_MAX_KEYLENGTH / sizeof(u64)];
53 u32 key_length;
54 u32 expanded_key_length;
55};
56
57extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input,
58 u32 *output);
59extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input,
60 u32 *output);
61extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input,
62 u32 *output);
63
64extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input,
65 u32 *output);
66extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input,
67 u32 *output);
68extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input,
69 u32 *output);
70
71extern void aes_sparc64_load_encrypt_keys_128(const u64 *key);
72extern void aes_sparc64_load_encrypt_keys_192(const u64 *key);
73extern void aes_sparc64_load_encrypt_keys_256(const u64 *key);
74
75extern void aes_sparc64_load_decrypt_keys_128(const u64 *key);
76extern void aes_sparc64_load_decrypt_keys_192(const u64 *key);
77extern void aes_sparc64_load_decrypt_keys_256(const u64 *key);
78
79extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input,
80 u64 *output, unsigned int len);
81extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input,
82 u64 *output, unsigned int len);
83extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input,
84 u64 *output, unsigned int len);
85
86extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input,
87 u64 *output, unsigned int len);
88extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input,
89 u64 *output, unsigned int len);
90extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input,
91 u64 *output, unsigned int len);
92
93extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input,
94 u64 *output, unsigned int len,
95 u64 *iv);
96
97extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input,
98 u64 *output, unsigned int len,
99 u64 *iv);
100
101extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input,
102 u64 *output, unsigned int len,
103 u64 *iv);
104
105extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input,
106 u64 *output, unsigned int len,
107 u64 *iv);
108
109extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input,
110 u64 *output, unsigned int len,
111 u64 *iv);
112
113extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input,
114 u64 *output, unsigned int len,
115 u64 *iv);
116
117extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input,
118 u64 *output, unsigned int len,
119 u64 *iv);
120extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input,
121 u64 *output, unsigned int len,
122 u64 *iv);
123extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input,
124 u64 *output, unsigned int len,
125 u64 *iv);
126
127struct aes_ops aes128_ops = {
128 .encrypt = aes_sparc64_encrypt_128,
129 .decrypt = aes_sparc64_decrypt_128,
130 .load_encrypt_keys = aes_sparc64_load_encrypt_keys_128,
131 .load_decrypt_keys = aes_sparc64_load_decrypt_keys_128,
132 .ecb_encrypt = aes_sparc64_ecb_encrypt_128,
133 .ecb_decrypt = aes_sparc64_ecb_decrypt_128,
134 .cbc_encrypt = aes_sparc64_cbc_encrypt_128,
135 .cbc_decrypt = aes_sparc64_cbc_decrypt_128,
136 .ctr_crypt = aes_sparc64_ctr_crypt_128,
137};
138
139struct aes_ops aes192_ops = {
140 .encrypt = aes_sparc64_encrypt_192,
141 .decrypt = aes_sparc64_decrypt_192,
142 .load_encrypt_keys = aes_sparc64_load_encrypt_keys_192,
143 .load_decrypt_keys = aes_sparc64_load_decrypt_keys_192,
144 .ecb_encrypt = aes_sparc64_ecb_encrypt_192,
145 .ecb_decrypt = aes_sparc64_ecb_decrypt_192,
146 .cbc_encrypt = aes_sparc64_cbc_encrypt_192,
147 .cbc_decrypt = aes_sparc64_cbc_decrypt_192,
148 .ctr_crypt = aes_sparc64_ctr_crypt_192,
149};
150
151struct aes_ops aes256_ops = {
152 .encrypt = aes_sparc64_encrypt_256,
153 .decrypt = aes_sparc64_decrypt_256,
154 .load_encrypt_keys = aes_sparc64_load_encrypt_keys_256,
155 .load_decrypt_keys = aes_sparc64_load_decrypt_keys_256,
156 .ecb_encrypt = aes_sparc64_ecb_encrypt_256,
157 .ecb_decrypt = aes_sparc64_ecb_decrypt_256,
158 .cbc_encrypt = aes_sparc64_cbc_encrypt_256,
159 .cbc_decrypt = aes_sparc64_cbc_decrypt_256,
160 .ctr_crypt = aes_sparc64_ctr_crypt_256,
161};
162
163extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key,
164 unsigned int key_len);
165
166static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
167 unsigned int key_len)
168{
169 struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
170 u32 *flags = &tfm->crt_flags;
171
172 switch (key_len) {
173 case AES_KEYSIZE_128:
174 ctx->expanded_key_length = 0xb0;
175 ctx->ops = &aes128_ops;
176 break;
177
178 case AES_KEYSIZE_192:
179 ctx->expanded_key_length = 0xd0;
180 ctx->ops = &aes192_ops;
181 break;
182
183 case AES_KEYSIZE_256:
184 ctx->expanded_key_length = 0xf0;
185 ctx->ops = &aes256_ops;
186 break;
187
188 default:
189 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
190 return -EINVAL;
191 }
192
193 aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len);
194 ctx->key_length = key_len;
195
196 return 0;
197}
198
199static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
200{
201 struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
202
203 ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
204}
205
206static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
207{
208 struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
209
210 ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
211}
212
213#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
214
215static int ecb_encrypt(struct blkcipher_desc *desc,
216 struct scatterlist *dst, struct scatterlist *src,
217 unsigned int nbytes)
218{
219 struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
220 struct blkcipher_walk walk;
221 int err;
222
223 blkcipher_walk_init(&walk, dst, src, nbytes);
224 err = blkcipher_walk_virt(desc, &walk);
225
226 ctx->ops->load_encrypt_keys(&ctx->key[0]);
227 while ((nbytes = walk.nbytes)) {
228 unsigned int block_len = nbytes & AES_BLOCK_MASK;
229
230 if (likely(block_len)) {
231 ctx->ops->ecb_encrypt(&ctx->key[0],
232 (const u64 *)walk.src.virt.addr,
233 (u64 *) walk.dst.virt.addr,
234 block_len);
235 }
236 nbytes &= AES_BLOCK_SIZE - 1;
237 err = blkcipher_walk_done(desc, &walk, nbytes);
238 }
239 fprs_write(0);
240 return err;
241}
242
243static int ecb_decrypt(struct blkcipher_desc *desc,
244 struct scatterlist *dst, struct scatterlist *src,
245 unsigned int nbytes)
246{
247 struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
248 struct blkcipher_walk walk;
249 u64 *key_end;
250 int err;
251
252 blkcipher_walk_init(&walk, dst, src, nbytes);
253 err = blkcipher_walk_virt(desc, &walk);
254
255 ctx->ops->load_decrypt_keys(&ctx->key[0]);
256 key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
257 while ((nbytes = walk.nbytes)) {
258 unsigned int block_len = nbytes & AES_BLOCK_MASK;
259
260 if (likely(block_len)) {
261 ctx->ops->ecb_decrypt(key_end,
262 (const u64 *) walk.src.virt.addr,
263 (u64 *) walk.dst.virt.addr, block_len);
264 }
265 nbytes &= AES_BLOCK_SIZE - 1;
266 err = blkcipher_walk_done(desc, &walk, nbytes);
267 }
268 fprs_write(0);
269
270 return err;
271}
272
273static int cbc_encrypt(struct blkcipher_desc *desc,
274 struct scatterlist *dst, struct scatterlist *src,
275 unsigned int nbytes)
276{
277 struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
278 struct blkcipher_walk walk;
279 int err;
280
281 blkcipher_walk_init(&walk, dst, src, nbytes);
282 err = blkcipher_walk_virt(desc, &walk);
283
284 ctx->ops->load_encrypt_keys(&ctx->key[0]);
285 while ((nbytes = walk.nbytes)) {
286 unsigned int block_len = nbytes & AES_BLOCK_MASK;
287
288 if (likely(block_len)) {
289 ctx->ops->cbc_encrypt(&ctx->key[0],
290 (const u64 *)walk.src.virt.addr,
291 (u64 *) walk.dst.virt.addr,
292 block_len, (u64 *) walk.iv);
293 }
294 nbytes &= AES_BLOCK_SIZE - 1;
295 err = blkcipher_walk_done(desc, &walk, nbytes);
296 }
297 fprs_write(0);
298 return err;
299}
300
301static int cbc_decrypt(struct blkcipher_desc *desc,
302 struct scatterlist *dst, struct scatterlist *src,
303 unsigned int nbytes)
304{
305 struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
306 struct blkcipher_walk walk;
307 u64 *key_end;
308 int err;
309
310 blkcipher_walk_init(&walk, dst, src, nbytes);
311 err = blkcipher_walk_virt(desc, &walk);
312
313 ctx->ops->load_decrypt_keys(&ctx->key[0]);
314 key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
315 while ((nbytes = walk.nbytes)) {
316 unsigned int block_len = nbytes & AES_BLOCK_MASK;
317
318 if (likely(block_len)) {
319 ctx->ops->cbc_decrypt(key_end,
320 (const u64 *) walk.src.virt.addr,
321 (u64 *) walk.dst.virt.addr,
322 block_len, (u64 *) walk.iv);
323 }
324 nbytes &= AES_BLOCK_SIZE - 1;
325 err = blkcipher_walk_done(desc, &walk, nbytes);
326 }
327 fprs_write(0);
328
329 return err;
330}
331
332static int ctr_crypt(struct blkcipher_desc *desc,
333 struct scatterlist *dst, struct scatterlist *src,
334 unsigned int nbytes)
335{
336 struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
337 struct blkcipher_walk walk;
338 int err;
339
340 blkcipher_walk_init(&walk, dst, src, nbytes);
341 err = blkcipher_walk_virt(desc, &walk);
342
343 ctx->ops->load_encrypt_keys(&ctx->key[0]);
344 while ((nbytes = walk.nbytes)) {
345 unsigned int block_len = nbytes & AES_BLOCK_MASK;
346
347 if (likely(block_len)) {
348 ctx->ops->ctr_crypt(&ctx->key[0],
349 (const u64 *)walk.src.virt.addr,
350 (u64 *) walk.dst.virt.addr,
351 block_len, (u64 *) walk.iv);
352 }
353 nbytes &= AES_BLOCK_SIZE - 1;
354 err = blkcipher_walk_done(desc, &walk, nbytes);
355 }
356 fprs_write(0);
357 return err;
358}
359
360static struct crypto_alg algs[] = { {
361 .cra_name = "aes",
362 .cra_driver_name = "aes-sparc64",
363 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
364 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
365 .cra_blocksize = AES_BLOCK_SIZE,
366 .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
367 .cra_alignmask = 3,
368 .cra_module = THIS_MODULE,
369 .cra_u = {
370 .cipher = {
371 .cia_min_keysize = AES_MIN_KEY_SIZE,
372 .cia_max_keysize = AES_MAX_KEY_SIZE,
373 .cia_setkey = aes_set_key,
374 .cia_encrypt = aes_encrypt,
375 .cia_decrypt = aes_decrypt
376 }
377 }
378}, {
379 .cra_name = "ecb(aes)",
380 .cra_driver_name = "ecb-aes-sparc64",
381 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
383 .cra_blocksize = AES_BLOCK_SIZE,
384 .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
385 .cra_alignmask = 7,
386 .cra_type = &crypto_blkcipher_type,
387 .cra_module = THIS_MODULE,
388 .cra_u = {
389 .blkcipher = {
390 .min_keysize = AES_MIN_KEY_SIZE,
391 .max_keysize = AES_MAX_KEY_SIZE,
392 .setkey = aes_set_key,
393 .encrypt = ecb_encrypt,
394 .decrypt = ecb_decrypt,
395 },
396 },
397}, {
398 .cra_name = "cbc(aes)",
399 .cra_driver_name = "cbc-aes-sparc64",
400 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
401 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
402 .cra_blocksize = AES_BLOCK_SIZE,
403 .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
404 .cra_alignmask = 7,
405 .cra_type = &crypto_blkcipher_type,
406 .cra_module = THIS_MODULE,
407 .cra_u = {
408 .blkcipher = {
409 .min_keysize = AES_MIN_KEY_SIZE,
410 .max_keysize = AES_MAX_KEY_SIZE,
411 .setkey = aes_set_key,
412 .encrypt = cbc_encrypt,
413 .decrypt = cbc_decrypt,
414 },
415 },
416}, {
417 .cra_name = "ctr(aes)",
418 .cra_driver_name = "ctr-aes-sparc64",
419 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
420 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
421 .cra_blocksize = AES_BLOCK_SIZE,
422 .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
423 .cra_alignmask = 7,
424 .cra_type = &crypto_blkcipher_type,
425 .cra_module = THIS_MODULE,
426 .cra_u = {
427 .blkcipher = {
428 .min_keysize = AES_MIN_KEY_SIZE,
429 .max_keysize = AES_MAX_KEY_SIZE,
430 .setkey = aes_set_key,
431 .encrypt = ctr_crypt,
432 .decrypt = ctr_crypt,
433 },
434 },
435} };
436
437static bool __init sparc64_has_aes_opcode(void)
438{
439 unsigned long cfr;
440
441 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
442 return false;
443
444 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
445 if (!(cfr & CFR_AES))
446 return false;
447
448 return true;
449}
450
451static int __init aes_sparc64_mod_init(void)
452{
453 int i;
454
455 for (i = 0; i < ARRAY_SIZE(algs); i++)
456 INIT_LIST_HEAD(&algs[i].cra_list);
457
458 if (sparc64_has_aes_opcode()) {
459 pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
460 return crypto_register_algs(algs, ARRAY_SIZE(algs));
461 }
462 pr_info("sparc64 aes opcodes not available.\n");
463 return -ENODEV;
464}
465
466static void __exit aes_sparc64_mod_fini(void)
467{
468 crypto_unregister_algs(algs, ARRAY_SIZE(algs));
469}
470
471module_init(aes_sparc64_mod_init);
472module_exit(aes_sparc64_mod_fini);
473
474MODULE_LICENSE("GPL");
475MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
476
477MODULE_ALIAS("aes");
diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S
new file mode 100644
index 000000000000..cc39553a4e43
--- /dev/null
+++ b/arch/sparc/crypto/camellia_asm.S
@@ -0,0 +1,563 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
7 CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \
8 CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \
9 CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \
10 CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \
11 CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \
12 CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
13
14#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
15 CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
16 CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
17 CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
18
19 .data
20
21 .align 8
22SIGMA: .xword 0xA09E667F3BCC908B
23 .xword 0xB67AE8584CAA73B2
24 .xword 0xC6EF372FE94F82BE
25 .xword 0x54FF53A5F1D36F1C
26 .xword 0x10E527FADE682D1D
27 .xword 0xB05688C2B3E6C1FD
28
29 .text
30
31 .align 32
32ENTRY(camellia_sparc64_key_expand)
33 /* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
34 VISEntry
35 ld [%o0 + 0x00], %f0 ! i0, k[0]
36 ld [%o0 + 0x04], %f1 ! i1, k[1]
37 ld [%o0 + 0x08], %f2 ! i2, k[2]
38 ld [%o0 + 0x0c], %f3 ! i3, k[3]
39 std %f0, [%o1 + 0x00] ! k[0, 1]
40 fsrc2 %f0, %f28
41 std %f2, [%o1 + 0x08] ! k[2, 3]
42 cmp %o2, 16
43 be 10f
44 fsrc2 %f2, %f30
45
46 ld [%o0 + 0x10], %f0
47 ld [%o0 + 0x14], %f1
48 std %f0, [%o1 + 0x20] ! k[8, 9]
49 cmp %o2, 24
50 fone %f10
51 be,a 1f
52 fxor %f10, %f0, %f2
53 ld [%o0 + 0x18], %f2
54 ld [%o0 + 0x1c], %f3
551:
56 std %f2, [%o1 + 0x28] ! k[10, 11]
57 fxor %f28, %f0, %f0
58 fxor %f30, %f2, %f2
59
6010:
61 sethi %hi(SIGMA), %g3
62 or %g3, %lo(SIGMA), %g3
63 ldd [%g3 + 0x00], %f16
64 ldd [%g3 + 0x08], %f18
65 ldd [%g3 + 0x10], %f20
66 ldd [%g3 + 0x18], %f22
67 ldd [%g3 + 0x20], %f24
68 ldd [%g3 + 0x28], %f26
69 CAMELLIA_F(16, 2, 0, 2)
70 CAMELLIA_F(18, 0, 2, 0)
71 fxor %f28, %f0, %f0
72 fxor %f30, %f2, %f2
73 CAMELLIA_F(20, 2, 0, 2)
74 CAMELLIA_F(22, 0, 2, 0)
75
76#define ROTL128(S01, S23, TMP1, TMP2, N) \
77 srlx S01, (64 - N), TMP1; \
78 sllx S01, N, S01; \
79 srlx S23, (64 - N), TMP2; \
80 sllx S23, N, S23; \
81 or S01, TMP2, S01; \
82 or S23, TMP1, S23
83
84 cmp %o2, 16
85 bne 1f
86 nop
87 /* 128-bit key */
88 std %f0, [%o1 + 0x10] ! k[ 4, 5]
89 std %f2, [%o1 + 0x18] ! k[ 6, 7]
90 MOVDTOX_F0_O4
91 MOVDTOX_F2_O5
92 ROTL128(%o4, %o5, %g2, %g3, 15)
93 stx %o4, [%o1 + 0x30] ! k[12, 13]
94 stx %o5, [%o1 + 0x38] ! k[14, 15]
95 ROTL128(%o4, %o5, %g2, %g3, 15)
96 stx %o4, [%o1 + 0x40] ! k[16, 17]
97 stx %o5, [%o1 + 0x48] ! k[18, 19]
98 ROTL128(%o4, %o5, %g2, %g3, 15)
99 stx %o4, [%o1 + 0x60] ! k[24, 25]
100 ROTL128(%o4, %o5, %g2, %g3, 15)
101 stx %o4, [%o1 + 0x70] ! k[28, 29]
102 stx %o5, [%o1 + 0x78] ! k[30, 31]
103 ROTL128(%o4, %o5, %g2, %g3, 34)
104 stx %o4, [%o1 + 0xa0] ! k[40, 41]
105 stx %o5, [%o1 + 0xa8] ! k[42, 43]
106 ROTL128(%o4, %o5, %g2, %g3, 17)
107 stx %o4, [%o1 + 0xc0] ! k[48, 49]
108 stx %o5, [%o1 + 0xc8] ! k[50, 51]
109
110 ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
111 ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
112 ROTL128(%o4, %o5, %g2, %g3, 15)
113 stx %o4, [%o1 + 0x20] ! k[ 8, 9]
114 stx %o5, [%o1 + 0x28] ! k[10, 11]
115 ROTL128(%o4, %o5, %g2, %g3, 30)
116 stx %o4, [%o1 + 0x50] ! k[20, 21]
117 stx %o5, [%o1 + 0x58] ! k[22, 23]
118 ROTL128(%o4, %o5, %g2, %g3, 15)
119 stx %o5, [%o1 + 0x68] ! k[26, 27]
120 ROTL128(%o4, %o5, %g2, %g3, 17)
121 stx %o4, [%o1 + 0x80] ! k[32, 33]
122 stx %o5, [%o1 + 0x88] ! k[34, 35]
123 ROTL128(%o4, %o5, %g2, %g3, 17)
124 stx %o4, [%o1 + 0x90] ! k[36, 37]
125 stx %o5, [%o1 + 0x98] ! k[38, 39]
126 ROTL128(%o4, %o5, %g2, %g3, 17)
127 stx %o4, [%o1 + 0xb0] ! k[44, 45]
128 stx %o5, [%o1 + 0xb8] ! k[46, 47]
129
130 ba,pt %xcc, 2f
131 mov (3 * 16 * 4), %o0
132
1331:
134 /* 192-bit or 256-bit key */
135 std %f0, [%o1 + 0x30] ! k[12, 13]
136 std %f2, [%o1 + 0x38] ! k[14, 15]
137 ldd [%o1 + 0x20], %f4 ! k[ 8, 9]
138 ldd [%o1 + 0x28], %f6 ! k[10, 11]
139 fxor %f0, %f4, %f0
140 fxor %f2, %f6, %f2
141 CAMELLIA_F(24, 2, 0, 2)
142 CAMELLIA_F(26, 0, 2, 0)
143 std %f0, [%o1 + 0x10] ! k[ 4, 5]
144 std %f2, [%o1 + 0x18] ! k[ 6, 7]
145 MOVDTOX_F0_O4
146 MOVDTOX_F2_O5
147 ROTL128(%o4, %o5, %g2, %g3, 30)
148 stx %o4, [%o1 + 0x50] ! k[20, 21]
149 stx %o5, [%o1 + 0x58] ! k[22, 23]
150 ROTL128(%o4, %o5, %g2, %g3, 30)
151 stx %o4, [%o1 + 0xa0] ! k[40, 41]
152 stx %o5, [%o1 + 0xa8] ! k[42, 43]
153 ROTL128(%o4, %o5, %g2, %g3, 51)
154 stx %o4, [%o1 + 0x100] ! k[64, 65]
155 stx %o5, [%o1 + 0x108] ! k[66, 67]
156 ldx [%o1 + 0x20], %o4 ! k[ 8, 9]
157 ldx [%o1 + 0x28], %o5 ! k[10, 11]
158 ROTL128(%o4, %o5, %g2, %g3, 15)
159 stx %o4, [%o1 + 0x20] ! k[ 8, 9]
160 stx %o5, [%o1 + 0x28] ! k[10, 11]
161 ROTL128(%o4, %o5, %g2, %g3, 15)
162 stx %o4, [%o1 + 0x40] ! k[16, 17]
163 stx %o5, [%o1 + 0x48] ! k[18, 19]
164 ROTL128(%o4, %o5, %g2, %g3, 30)
165 stx %o4, [%o1 + 0x90] ! k[36, 37]
166 stx %o5, [%o1 + 0x98] ! k[38, 39]
167 ROTL128(%o4, %o5, %g2, %g3, 34)
168 stx %o4, [%o1 + 0xd0] ! k[52, 53]
169 stx %o5, [%o1 + 0xd8] ! k[54, 55]
170 ldx [%o1 + 0x30], %o4 ! k[12, 13]
171 ldx [%o1 + 0x38], %o5 ! k[14, 15]
172 ROTL128(%o4, %o5, %g2, %g3, 15)
173 stx %o4, [%o1 + 0x30] ! k[12, 13]
174 stx %o5, [%o1 + 0x38] ! k[14, 15]
175 ROTL128(%o4, %o5, %g2, %g3, 30)
176 stx %o4, [%o1 + 0x70] ! k[28, 29]
177 stx %o5, [%o1 + 0x78] ! k[30, 31]
178 srlx %o4, 32, %g2
179 srlx %o5, 32, %g3
180 stw %o4, [%o1 + 0xc0] ! k[48]
181 stw %g3, [%o1 + 0xc4] ! k[49]
182 stw %o5, [%o1 + 0xc8] ! k[50]
183 stw %g2, [%o1 + 0xcc] ! k[51]
184 ROTL128(%o4, %o5, %g2, %g3, 49)
185 stx %o4, [%o1 + 0xe0] ! k[56, 57]
186 stx %o5, [%o1 + 0xe8] ! k[58, 59]
187 ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
188 ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
189 ROTL128(%o4, %o5, %g2, %g3, 45)
190 stx %o4, [%o1 + 0x60] ! k[24, 25]
191 stx %o5, [%o1 + 0x68] ! k[26, 27]
192 ROTL128(%o4, %o5, %g2, %g3, 15)
193 stx %o4, [%o1 + 0x80] ! k[32, 33]
194 stx %o5, [%o1 + 0x88] ! k[34, 35]
195 ROTL128(%o4, %o5, %g2, %g3, 17)
196 stx %o4, [%o1 + 0xb0] ! k[44, 45]
197 stx %o5, [%o1 + 0xb8] ! k[46, 47]
198 ROTL128(%o4, %o5, %g2, %g3, 34)
199 stx %o4, [%o1 + 0xf0] ! k[60, 61]
200 stx %o5, [%o1 + 0xf8] ! k[62, 63]
201 mov (4 * 16 * 4), %o0
2022:
203 add %o1, %o0, %o1
204 ldd [%o1 + 0x00], %f0
205 ldd [%o1 + 0x08], %f2
206 std %f0, [%o3 + 0x00]
207 std %f2, [%o3 + 0x08]
208 add %o3, 0x10, %o3
2091:
210 sub %o1, (16 * 4), %o1
211 ldd [%o1 + 0x38], %f0
212 ldd [%o1 + 0x30], %f2
213 ldd [%o1 + 0x28], %f4
214 ldd [%o1 + 0x20], %f6
215 ldd [%o1 + 0x18], %f8
216 ldd [%o1 + 0x10], %f10
217 std %f0, [%o3 + 0x00]
218 std %f2, [%o3 + 0x08]
219 std %f4, [%o3 + 0x10]
220 std %f6, [%o3 + 0x18]
221 std %f8, [%o3 + 0x20]
222 std %f10, [%o3 + 0x28]
223
224 ldd [%o1 + 0x08], %f0
225 ldd [%o1 + 0x00], %f2
226 std %f0, [%o3 + 0x30]
227 std %f2, [%o3 + 0x38]
228 subcc %o0, (16 * 4), %o0
229 bne,pt %icc, 1b
230 add %o3, (16 * 4), %o3
231
232 std %f2, [%o3 - 0x10]
233 std %f0, [%o3 - 0x08]
234
235 retl
236 VISExit
237ENDPROC(camellia_sparc64_key_expand)
238
239 .align 32
240ENTRY(camellia_sparc64_crypt)
241 /* %o0=key, %o1=input, %o2=output, %o3=key_len */
242 VISEntry
243
244 ld [%o1 + 0x00], %f0
245 ld [%o1 + 0x04], %f1
246 ld [%o1 + 0x08], %f2
247 ld [%o1 + 0x0c], %f3
248
249 ldd [%o0 + 0x00], %f4
250 ldd [%o0 + 0x08], %f6
251
252 cmp %o3, 16
253 fxor %f4, %f0, %f0
254 be 1f
255 fxor %f6, %f2, %f2
256
257 ldd [%o0 + 0x10], %f8
258 ldd [%o0 + 0x18], %f10
259 ldd [%o0 + 0x20], %f12
260 ldd [%o0 + 0x28], %f14
261 ldd [%o0 + 0x30], %f16
262 ldd [%o0 + 0x38], %f18
263 ldd [%o0 + 0x40], %f20
264 ldd [%o0 + 0x48], %f22
265 add %o0, 0x40, %o0
266
267 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
268
2691:
270 ldd [%o0 + 0x10], %f8
271 ldd [%o0 + 0x18], %f10
272 ldd [%o0 + 0x20], %f12
273 ldd [%o0 + 0x28], %f14
274 ldd [%o0 + 0x30], %f16
275 ldd [%o0 + 0x38], %f18
276 ldd [%o0 + 0x40], %f20
277 ldd [%o0 + 0x48], %f22
278 ldd [%o0 + 0x50], %f24
279 ldd [%o0 + 0x58], %f26
280 ldd [%o0 + 0x60], %f28
281 ldd [%o0 + 0x68], %f30
282 ldd [%o0 + 0x70], %f32
283 ldd [%o0 + 0x78], %f34
284 ldd [%o0 + 0x80], %f36
285 ldd [%o0 + 0x88], %f38
286 ldd [%o0 + 0x90], %f40
287 ldd [%o0 + 0x98], %f42
288 ldd [%o0 + 0xa0], %f44
289 ldd [%o0 + 0xa8], %f46
290 ldd [%o0 + 0xb0], %f48
291 ldd [%o0 + 0xb8], %f50
292 ldd [%o0 + 0xc0], %f52
293 ldd [%o0 + 0xc8], %f54
294
295 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
296 CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
297 CAMELLIA_6ROUNDS(40, 0, 2)
298 fxor %f52, %f2, %f2
299 fxor %f54, %f0, %f0
300
301 st %f2, [%o2 + 0x00]
302 st %f3, [%o2 + 0x04]
303 st %f0, [%o2 + 0x08]
304 st %f1, [%o2 + 0x0c]
305
306 retl
307 VISExit
308ENDPROC(camellia_sparc64_crypt)
309
310 .align 32
311ENTRY(camellia_sparc64_load_keys)
312 /* %o0=key, %o1=key_len */
313 VISEntry
314 ldd [%o0 + 0x00], %f4
315 ldd [%o0 + 0x08], %f6
316 ldd [%o0 + 0x10], %f8
317 ldd [%o0 + 0x18], %f10
318 ldd [%o0 + 0x20], %f12
319 ldd [%o0 + 0x28], %f14
320 ldd [%o0 + 0x30], %f16
321 ldd [%o0 + 0x38], %f18
322 ldd [%o0 + 0x40], %f20
323 ldd [%o0 + 0x48], %f22
324 ldd [%o0 + 0x50], %f24
325 ldd [%o0 + 0x58], %f26
326 ldd [%o0 + 0x60], %f28
327 ldd [%o0 + 0x68], %f30
328 ldd [%o0 + 0x70], %f32
329 ldd [%o0 + 0x78], %f34
330 ldd [%o0 + 0x80], %f36
331 ldd [%o0 + 0x88], %f38
332 ldd [%o0 + 0x90], %f40
333 ldd [%o0 + 0x98], %f42
334 ldd [%o0 + 0xa0], %f44
335 ldd [%o0 + 0xa8], %f46
336 ldd [%o0 + 0xb0], %f48
337 ldd [%o0 + 0xb8], %f50
338 ldd [%o0 + 0xc0], %f52
339 retl
340 ldd [%o0 + 0xc8], %f54
341ENDPROC(camellia_sparc64_load_keys)
342
343 .align 32
344ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
345 /* %o0=input, %o1=output, %o2=len, %o3=key */
3461: ldd [%o0 + 0x00], %f0
347 ldd [%o0 + 0x08], %f2
348 add %o0, 0x10, %o0
349 fxor %f4, %f0, %f0
350 fxor %f6, %f2, %f2
351 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
352 CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
353 CAMELLIA_6ROUNDS(40, 0, 2)
354 fxor %f52, %f2, %f2
355 fxor %f54, %f0, %f0
356 std %f2, [%o1 + 0x00]
357 std %f0, [%o1 + 0x08]
358 subcc %o2, 0x10, %o2
359 bne,pt %icc, 1b
360 add %o1, 0x10, %o1
361 retl
362 nop
363ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
364
365 .align 32
366ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
367 /* %o0=input, %o1=output, %o2=len, %o3=key */
3681: ldd [%o0 + 0x00], %f0
369 ldd [%o0 + 0x08], %f2
370 add %o0, 0x10, %o0
371 fxor %f4, %f0, %f0
372 fxor %f6, %f2, %f2
373 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
374 ldd [%o3 + 0xd0], %f8
375 ldd [%o3 + 0xd8], %f10
376 ldd [%o3 + 0xe0], %f12
377 ldd [%o3 + 0xe8], %f14
378 ldd [%o3 + 0xf0], %f16
379 ldd [%o3 + 0xf8], %f18
380 ldd [%o3 + 0x100], %f20
381 ldd [%o3 + 0x108], %f22
382 CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
383 CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
384 CAMELLIA_F(8, 2, 0, 2)
385 CAMELLIA_F(10, 0, 2, 0)
386 ldd [%o3 + 0x10], %f8
387 ldd [%o3 + 0x18], %f10
388 CAMELLIA_F(12, 2, 0, 2)
389 CAMELLIA_F(14, 0, 2, 0)
390 ldd [%o3 + 0x20], %f12
391 ldd [%o3 + 0x28], %f14
392 CAMELLIA_F(16, 2, 0, 2)
393 CAMELLIA_F(18, 0, 2, 0)
394 ldd [%o3 + 0x30], %f16
395 ldd [%o3 + 0x38], %f18
396 fxor %f20, %f2, %f2
397 fxor %f22, %f0, %f0
398 ldd [%o3 + 0x40], %f20
399 ldd [%o3 + 0x48], %f22
400 std %f2, [%o1 + 0x00]
401 std %f0, [%o1 + 0x08]
402 subcc %o2, 0x10, %o2
403 bne,pt %icc, 1b
404 add %o1, 0x10, %o1
405 retl
406 nop
407ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
408
409 .align 32
410ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
411 /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
412 ldd [%o4 + 0x00], %f60
413 ldd [%o4 + 0x08], %f62
4141: ldd [%o0 + 0x00], %f0
415 ldd [%o0 + 0x08], %f2
416 add %o0, 0x10, %o0
417 fxor %f60, %f0, %f0
418 fxor %f62, %f2, %f2
419 fxor %f4, %f0, %f0
420 fxor %f6, %f2, %f2
421 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
422 CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
423 CAMELLIA_6ROUNDS(40, 0, 2)
424 fxor %f52, %f2, %f60
425 fxor %f54, %f0, %f62
426 std %f60, [%o1 + 0x00]
427 std %f62, [%o1 + 0x08]
428 subcc %o2, 0x10, %o2
429 bne,pt %icc, 1b
430 add %o1, 0x10, %o1
431 std %f60, [%o4 + 0x00]
432 retl
433 std %f62, [%o4 + 0x08]
434ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
435
436 .align 32
437ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
438 /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
439 ldd [%o4 + 0x00], %f60
440 ldd [%o4 + 0x08], %f62
4411: ldd [%o0 + 0x00], %f0
442 ldd [%o0 + 0x08], %f2
443 add %o0, 0x10, %o0
444 fxor %f60, %f0, %f0
445 fxor %f62, %f2, %f2
446 fxor %f4, %f0, %f0
447 fxor %f6, %f2, %f2
448 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
449 ldd [%o3 + 0xd0], %f8
450 ldd [%o3 + 0xd8], %f10
451 ldd [%o3 + 0xe0], %f12
452 ldd [%o3 + 0xe8], %f14
453 ldd [%o3 + 0xf0], %f16
454 ldd [%o3 + 0xf8], %f18
455 ldd [%o3 + 0x100], %f20
456 ldd [%o3 + 0x108], %f22
457 CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
458 CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
459 CAMELLIA_F(8, 2, 0, 2)
460 CAMELLIA_F(10, 0, 2, 0)
461 ldd [%o3 + 0x10], %f8
462 ldd [%o3 + 0x18], %f10
463 CAMELLIA_F(12, 2, 0, 2)
464 CAMELLIA_F(14, 0, 2, 0)
465 ldd [%o3 + 0x20], %f12
466 ldd [%o3 + 0x28], %f14
467 CAMELLIA_F(16, 2, 0, 2)
468 CAMELLIA_F(18, 0, 2, 0)
469 ldd [%o3 + 0x30], %f16
470 ldd [%o3 + 0x38], %f18
471 fxor %f20, %f2, %f60
472 fxor %f22, %f0, %f62
473 ldd [%o3 + 0x40], %f20
474 ldd [%o3 + 0x48], %f22
475 std %f60, [%o1 + 0x00]
476 std %f62, [%o1 + 0x08]
477 subcc %o2, 0x10, %o2
478 bne,pt %icc, 1b
479 add %o1, 0x10, %o1
480 std %f60, [%o4 + 0x00]
481 retl
482 std %f62, [%o4 + 0x08]
483ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
484
485 .align 32
486ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
487 /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
488 ldd [%o4 + 0x00], %f60
489 ldd [%o4 + 0x08], %f62
4901: ldd [%o0 + 0x00], %f56
491 ldd [%o0 + 0x08], %f58
492 add %o0, 0x10, %o0
493 fxor %f4, %f56, %f0
494 fxor %f6, %f58, %f2
495 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
496 CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
497 CAMELLIA_6ROUNDS(40, 0, 2)
498 fxor %f52, %f2, %f2
499 fxor %f54, %f0, %f0
500 fxor %f60, %f2, %f2
501 fxor %f62, %f0, %f0
502 fsrc2 %f56, %f60
503 fsrc2 %f58, %f62
504 std %f2, [%o1 + 0x00]
505 std %f0, [%o1 + 0x08]
506 subcc %o2, 0x10, %o2
507 bne,pt %icc, 1b
508 add %o1, 0x10, %o1
509 std %f60, [%o4 + 0x00]
510 retl
511 std %f62, [%o4 + 0x08]
512ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
513
514 .align 32
515ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
516 /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
517 ldd [%o4 + 0x00], %f60
518 ldd [%o4 + 0x08], %f62
5191: ldd [%o0 + 0x00], %f56
520 ldd [%o0 + 0x08], %f58
521 add %o0, 0x10, %o0
522 fxor %f4, %f56, %f0
523 fxor %f6, %f58, %f2
524 CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
525 ldd [%o3 + 0xd0], %f8
526 ldd [%o3 + 0xd8], %f10
527 ldd [%o3 + 0xe0], %f12
528 ldd [%o3 + 0xe8], %f14
529 ldd [%o3 + 0xf0], %f16
530 ldd [%o3 + 0xf8], %f18
531 ldd [%o3 + 0x100], %f20
532 ldd [%o3 + 0x108], %f22
533 CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
534 CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
535 CAMELLIA_F(8, 2, 0, 2)
536 CAMELLIA_F(10, 0, 2, 0)
537 ldd [%o3 + 0x10], %f8
538 ldd [%o3 + 0x18], %f10
539 CAMELLIA_F(12, 2, 0, 2)
540 CAMELLIA_F(14, 0, 2, 0)
541 ldd [%o3 + 0x20], %f12
542 ldd [%o3 + 0x28], %f14
543 CAMELLIA_F(16, 2, 0, 2)
544 CAMELLIA_F(18, 0, 2, 0)
545 ldd [%o3 + 0x30], %f16
546 ldd [%o3 + 0x38], %f18
547 fxor %f20, %f2, %f2
548 fxor %f22, %f0, %f0
549 ldd [%o3 + 0x40], %f20
550 ldd [%o3 + 0x48], %f22
551 fxor %f60, %f2, %f2
552 fxor %f62, %f0, %f0
553 fsrc2 %f56, %f60
554 fsrc2 %f58, %f62
555 std %f2, [%o1 + 0x00]
556 std %f0, [%o1 + 0x08]
557 subcc %o2, 0x10, %o2
558 bne,pt %icc, 1b
559 add %o1, 0x10, %o1
560 std %f60, [%o4 + 0x00]
561 retl
562 std %f62, [%o4 + 0x08]
563ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
new file mode 100644
index 000000000000..42905c084299
--- /dev/null
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -0,0 +1,322 @@
1/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes.
2 *
3 * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/crypto.h>
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/mm.h>
12#include <linux/types.h>
13#include <crypto/algapi.h>
14
15#include <asm/fpumacro.h>
16#include <asm/pstate.h>
17#include <asm/elf.h>
18
19#include "opcodes.h"
20
21#define CAMELLIA_MIN_KEY_SIZE 16
22#define CAMELLIA_MAX_KEY_SIZE 32
23#define CAMELLIA_BLOCK_SIZE 16
24#define CAMELLIA_TABLE_BYTE_LEN 272
25
26struct camellia_sparc64_ctx {
27 u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
28 u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
29 int key_len;
30};
31
32extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key,
33 unsigned int key_len, u64 *decrypt_key);
34
35static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
36 unsigned int key_len)
37{
38 struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
39 const u32 *in_key = (const u32 *) _in_key;
40 u32 *flags = &tfm->crt_flags;
41
42 if (key_len != 16 && key_len != 24 && key_len != 32) {
43 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
44 return -EINVAL;
45 }
46
47 ctx->key_len = key_len;
48
49 camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0],
50 key_len, &ctx->decrypt_key[0]);
51 return 0;
52}
53
54extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
55 u32 *output, unsigned int key_len);
56
57static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
58{
59 struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
60
61 camellia_sparc64_crypt(&ctx->encrypt_key[0],
62 (const u32 *) src,
63 (u32 *) dst, ctx->key_len);
64}
65
66static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
67{
68 struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
69
70 camellia_sparc64_crypt(&ctx->decrypt_key[0],
71 (const u32 *) src,
72 (u32 *) dst, ctx->key_len);
73}
74
75extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len);
76
77typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
78 const u64 *key);
79
80extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
81extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
82
83#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1))
84
85static int __ecb_crypt(struct blkcipher_desc *desc,
86 struct scatterlist *dst, struct scatterlist *src,
87 unsigned int nbytes, bool encrypt)
88{
89 struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
90 struct blkcipher_walk walk;
91 ecb_crypt_op *op;
92 const u64 *key;
93 int err;
94
95 op = camellia_sparc64_ecb_crypt_3_grand_rounds;
96 if (ctx->key_len != 16)
97 op = camellia_sparc64_ecb_crypt_4_grand_rounds;
98
99 blkcipher_walk_init(&walk, dst, src, nbytes);
100 err = blkcipher_walk_virt(desc, &walk);
101
102 if (encrypt)
103 key = &ctx->encrypt_key[0];
104 else
105 key = &ctx->decrypt_key[0];
106 camellia_sparc64_load_keys(key, ctx->key_len);
107 while ((nbytes = walk.nbytes)) {
108 unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
109
110 if (likely(block_len)) {
111 const u64 *src64;
112 u64 *dst64;
113
114 src64 = (const u64 *)walk.src.virt.addr;
115 dst64 = (u64 *) walk.dst.virt.addr;
116 op(src64, dst64, block_len, key);
117 }
118 nbytes &= CAMELLIA_BLOCK_SIZE - 1;
119 err = blkcipher_walk_done(desc, &walk, nbytes);
120 }
121 fprs_write(0);
122 return err;
123}
124
125static int ecb_encrypt(struct blkcipher_desc *desc,
126 struct scatterlist *dst, struct scatterlist *src,
127 unsigned int nbytes)
128{
129 return __ecb_crypt(desc, dst, src, nbytes, true);
130}
131
132static int ecb_decrypt(struct blkcipher_desc *desc,
133 struct scatterlist *dst, struct scatterlist *src,
134 unsigned int nbytes)
135{
136 return __ecb_crypt(desc, dst, src, nbytes, false);
137}
138
139typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
140 const u64 *key, u64 *iv);
141
142extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds;
143extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
144extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
145extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
146
147static int cbc_encrypt(struct blkcipher_desc *desc,
148 struct scatterlist *dst, struct scatterlist *src,
149 unsigned int nbytes)
150{
151 struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
152 struct blkcipher_walk walk;
153 cbc_crypt_op *op;
154 const u64 *key;
155 int err;
156
157 op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
158 if (ctx->key_len != 16)
159 op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
160
161 blkcipher_walk_init(&walk, dst, src, nbytes);
162 err = blkcipher_walk_virt(desc, &walk);
163
164 key = &ctx->encrypt_key[0];
165 camellia_sparc64_load_keys(key, ctx->key_len);
166 while ((nbytes = walk.nbytes)) {
167 unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
168
169 if (likely(block_len)) {
170 const u64 *src64;
171 u64 *dst64;
172
173 src64 = (const u64 *)walk.src.virt.addr;
174 dst64 = (u64 *) walk.dst.virt.addr;
175 op(src64, dst64, block_len, key,
176 (u64 *) walk.iv);
177 }
178 nbytes &= CAMELLIA_BLOCK_SIZE - 1;
179 err = blkcipher_walk_done(desc, &walk, nbytes);
180 }
181 fprs_write(0);
182 return err;
183}
184
185static int cbc_decrypt(struct blkcipher_desc *desc,
186 struct scatterlist *dst, struct scatterlist *src,
187 unsigned int nbytes)
188{
189 struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
190 struct blkcipher_walk walk;
191 cbc_crypt_op *op;
192 const u64 *key;
193 int err;
194
195 op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
196 if (ctx->key_len != 16)
197 op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
198
199 blkcipher_walk_init(&walk, dst, src, nbytes);
200 err = blkcipher_walk_virt(desc, &walk);
201
202 key = &ctx->decrypt_key[0];
203 camellia_sparc64_load_keys(key, ctx->key_len);
204 while ((nbytes = walk.nbytes)) {
205 unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
206
207 if (likely(block_len)) {
208 const u64 *src64;
209 u64 *dst64;
210
211 src64 = (const u64 *)walk.src.virt.addr;
212 dst64 = (u64 *) walk.dst.virt.addr;
213 op(src64, dst64, block_len, key,
214 (u64 *) walk.iv);
215 }
216 nbytes &= CAMELLIA_BLOCK_SIZE - 1;
217 err = blkcipher_walk_done(desc, &walk, nbytes);
218 }
219 fprs_write(0);
220 return err;
221}
222
223static struct crypto_alg algs[] = { {
224 .cra_name = "camellia",
225 .cra_driver_name = "camellia-sparc64",
226 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
227 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
228 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
229 .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
230 .cra_alignmask = 3,
231 .cra_module = THIS_MODULE,
232 .cra_u = {
233 .cipher = {
234 .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
235 .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
236 .cia_setkey = camellia_set_key,
237 .cia_encrypt = camellia_encrypt,
238 .cia_decrypt = camellia_decrypt
239 }
240 }
241}, {
242 .cra_name = "ecb(camellia)",
243 .cra_driver_name = "ecb-camellia-sparc64",
244 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
245 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
246 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
247 .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
248 .cra_alignmask = 7,
249 .cra_type = &crypto_blkcipher_type,
250 .cra_module = THIS_MODULE,
251 .cra_u = {
252 .blkcipher = {
253 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
254 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
255 .setkey = camellia_set_key,
256 .encrypt = ecb_encrypt,
257 .decrypt = ecb_decrypt,
258 },
259 },
260}, {
261 .cra_name = "cbc(camellia)",
262 .cra_driver_name = "cbc-camellia-sparc64",
263 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
264 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
265 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
266 .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
267 .cra_alignmask = 7,
268 .cra_type = &crypto_blkcipher_type,
269 .cra_module = THIS_MODULE,
270 .cra_u = {
271 .blkcipher = {
272 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
273 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
274 .setkey = camellia_set_key,
275 .encrypt = cbc_encrypt,
276 .decrypt = cbc_decrypt,
277 },
278 },
279}
280};
281
282static bool __init sparc64_has_camellia_opcode(void)
283{
284 unsigned long cfr;
285
286 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
287 return false;
288
289 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
290 if (!(cfr & CFR_CAMELLIA))
291 return false;
292
293 return true;
294}
295
296static int __init camellia_sparc64_mod_init(void)
297{
298 int i;
299
300 for (i = 0; i < ARRAY_SIZE(algs); i++)
301 INIT_LIST_HEAD(&algs[i].cra_list);
302
303 if (sparc64_has_camellia_opcode()) {
304 pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
305 return crypto_register_algs(algs, ARRAY_SIZE(algs));
306 }
307 pr_info("sparc64 camellia opcodes not available.\n");
308 return -ENODEV;
309}
310
311static void __exit camellia_sparc64_mod_fini(void)
312{
313 crypto_unregister_algs(algs, ARRAY_SIZE(algs));
314}
315
316module_init(camellia_sparc64_mod_init);
317module_exit(camellia_sparc64_mod_fini);
318
319MODULE_LICENSE("GPL");
320MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
321
322MODULE_ALIAS("aes");
diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S
new file mode 100644
index 000000000000..2b1976e765b5
--- /dev/null
+++ b/arch/sparc/crypto/crc32c_asm.S
@@ -0,0 +1,20 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3#include <asm/asi.h>
4
5#include "opcodes.h"
6
7ENTRY(crc32c_sparc64)
8 /* %o0=crc32p, %o1=data_ptr, %o2=len */
9 VISEntryHalf
10 lda [%o0] ASI_PL, %f1
111: ldd [%o1], %f2
12 CRC32C(0,2,0)
13 subcc %o2, 8, %o2
14 bne,pt %icc, 1b
15 add %o1, 0x8, %o1
16 sta %f1, [%o0] ASI_PL
17 VISExitHalf
182: retl
19 nop
20ENDPROC(crc32c_sparc64)
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
new file mode 100644
index 000000000000..0bd89cea8d8e
--- /dev/null
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -0,0 +1,179 @@
1/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
2 *
3 * This is based largely upon arch/x86/crypto/crc32c-intel.c
4 *
5 * Copyright (C) 2008 Intel Corporation
6 * Authors: Austin Zhang <austin_zhang@linux.intel.com>
7 * Kent Liu <kent.liu@intel.com>
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/crc32.h>
17
18#include <crypto/internal/hash.h>
19
20#include <asm/pstate.h>
21#include <asm/elf.h>
22
23#include "opcodes.h"
24
25/*
26 * Setting the seed allows arbitrary accumulators and flexible XOR policy
27 * If your algorithm starts with ~0, then XOR with ~0 before you set
28 * the seed.
29 */
30static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
31 unsigned int keylen)
32{
33 u32 *mctx = crypto_shash_ctx(hash);
34
35 if (keylen != sizeof(u32)) {
36 crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
37 return -EINVAL;
38 }
39 *(__le32 *)mctx = le32_to_cpup((__le32 *)key);
40 return 0;
41}
42
43static int crc32c_sparc64_init(struct shash_desc *desc)
44{
45 u32 *mctx = crypto_shash_ctx(desc->tfm);
46 u32 *crcp = shash_desc_ctx(desc);
47
48 *crcp = *mctx;
49
50 return 0;
51}
52
53extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len);
54
55static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len)
56{
57 unsigned int asm_len;
58
59 asm_len = len & ~7U;
60 if (asm_len) {
61 crc32c_sparc64(crcp, data, asm_len);
62 data += asm_len / 8;
63 len -= asm_len;
64 }
65 if (len)
66 *crcp = __crc32c_le(*crcp, (const unsigned char *) data, len);
67}
68
69static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data,
70 unsigned int len)
71{
72 u32 *crcp = shash_desc_ctx(desc);
73
74 crc32c_compute(crcp, (const u64 *) data, len);
75
76 return 0;
77}
78
79static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len,
80 u8 *out)
81{
82 u32 tmp = *crcp;
83
84 crc32c_compute(&tmp, (const u64 *) data, len);
85
86 *(__le32 *) out = ~cpu_to_le32(tmp);
87 return 0;
88}
89
90static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data,
91 unsigned int len, u8 *out)
92{
93 return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out);
94}
95
96static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out)
97{
98 u32 *crcp = shash_desc_ctx(desc);
99
100 *(__le32 *) out = ~cpu_to_le32p(crcp);
101 return 0;
102}
103
104static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data,
105 unsigned int len, u8 *out)
106{
107 return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len,
108 out);
109}
110
111static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm)
112{
113 u32 *key = crypto_tfm_ctx(tfm);
114
115 *key = ~0;
116
117 return 0;
118}
119
120#define CHKSUM_BLOCK_SIZE 1
121#define CHKSUM_DIGEST_SIZE 4
122
123static struct shash_alg alg = {
124 .setkey = crc32c_sparc64_setkey,
125 .init = crc32c_sparc64_init,
126 .update = crc32c_sparc64_update,
127 .final = crc32c_sparc64_final,
128 .finup = crc32c_sparc64_finup,
129 .digest = crc32c_sparc64_digest,
130 .descsize = sizeof(u32),
131 .digestsize = CHKSUM_DIGEST_SIZE,
132 .base = {
133 .cra_name = "crc32c",
134 .cra_driver_name = "crc32c-sparc64",
135 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
136 .cra_blocksize = CHKSUM_BLOCK_SIZE,
137 .cra_ctxsize = sizeof(u32),
138 .cra_alignmask = 7,
139 .cra_module = THIS_MODULE,
140 .cra_init = crc32c_sparc64_cra_init,
141 }
142};
143
144static bool __init sparc64_has_crc32c_opcode(void)
145{
146 unsigned long cfr;
147
148 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
149 return false;
150
151 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
152 if (!(cfr & CFR_CRC32C))
153 return false;
154
155 return true;
156}
157
158static int __init crc32c_sparc64_mod_init(void)
159{
160 if (sparc64_has_crc32c_opcode()) {
161 pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
162 return crypto_register_shash(&alg);
163 }
164 pr_info("sparc64 crc32c opcode not available.\n");
165 return -ENODEV;
166}
167
168static void __exit crc32c_sparc64_mod_fini(void)
169{
170 crypto_unregister_shash(&alg);
171}
172
173module_init(crc32c_sparc64_mod_init);
174module_exit(crc32c_sparc64_mod_fini);
175
176MODULE_LICENSE("GPL");
177MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
178
179MODULE_ALIAS("crc32c");
diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c
new file mode 100644
index 000000000000..5f5724a0ae22
--- /dev/null
+++ b/arch/sparc/crypto/crop_devid.c
@@ -0,0 +1,14 @@
1#include <linux/module.h>
2#include <linux/of_device.h>
3
4/* This is a dummy device table linked into all of the crypto
5 * opcode drivers. It serves to trigger the module autoloading
6 * mechanisms in userspace which scan the OF device tree and
7 * load any modules which have device table entries that
8 * match OF device nodes.
9 */
10static const struct of_device_id crypto_opcode_match[] = {
11 { .name = "cpu", .compatible = "sun4v", },
12 {},
13};
14MODULE_DEVICE_TABLE(of, crypto_opcode_match);
diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S
new file mode 100644
index 000000000000..30b6e90b28b2
--- /dev/null
+++ b/arch/sparc/crypto/des_asm.S
@@ -0,0 +1,418 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6 .align 32
7ENTRY(des_sparc64_key_expand)
8 /* %o0=input_key, %o1=output_key */
9 VISEntryHalf
10 ld [%o0 + 0x00], %f0
11 ld [%o0 + 0x04], %f1
12 DES_KEXPAND(0, 0, 0)
13 DES_KEXPAND(0, 1, 2)
14 DES_KEXPAND(2, 3, 6)
15 DES_KEXPAND(2, 2, 4)
16 DES_KEXPAND(6, 3, 10)
17 DES_KEXPAND(6, 2, 8)
18 DES_KEXPAND(10, 3, 14)
19 DES_KEXPAND(10, 2, 12)
20 DES_KEXPAND(14, 1, 16)
21 DES_KEXPAND(16, 3, 20)
22 DES_KEXPAND(16, 2, 18)
23 DES_KEXPAND(20, 3, 24)
24 DES_KEXPAND(20, 2, 22)
25 DES_KEXPAND(24, 3, 28)
26 DES_KEXPAND(24, 2, 26)
27 DES_KEXPAND(28, 1, 30)
28 std %f0, [%o1 + 0x00]
29 std %f2, [%o1 + 0x08]
30 std %f4, [%o1 + 0x10]
31 std %f6, [%o1 + 0x18]
32 std %f8, [%o1 + 0x20]
33 std %f10, [%o1 + 0x28]
34 std %f12, [%o1 + 0x30]
35 std %f14, [%o1 + 0x38]
36 std %f16, [%o1 + 0x40]
37 std %f18, [%o1 + 0x48]
38 std %f20, [%o1 + 0x50]
39 std %f22, [%o1 + 0x58]
40 std %f24, [%o1 + 0x60]
41 std %f26, [%o1 + 0x68]
42 std %f28, [%o1 + 0x70]
43 std %f30, [%o1 + 0x78]
44 retl
45 VISExitHalf
46ENDPROC(des_sparc64_key_expand)
47
48 .align 32
49ENTRY(des_sparc64_crypt)
50 /* %o0=key, %o1=input, %o2=output */
51 VISEntry
52 ldd [%o1 + 0x00], %f32
53 ldd [%o0 + 0x00], %f0
54 ldd [%o0 + 0x08], %f2
55 ldd [%o0 + 0x10], %f4
56 ldd [%o0 + 0x18], %f6
57 ldd [%o0 + 0x20], %f8
58 ldd [%o0 + 0x28], %f10
59 ldd [%o0 + 0x30], %f12
60 ldd [%o0 + 0x38], %f14
61 ldd [%o0 + 0x40], %f16
62 ldd [%o0 + 0x48], %f18
63 ldd [%o0 + 0x50], %f20
64 ldd [%o0 + 0x58], %f22
65 ldd [%o0 + 0x60], %f24
66 ldd [%o0 + 0x68], %f26
67 ldd [%o0 + 0x70], %f28
68 ldd [%o0 + 0x78], %f30
69 DES_IP(32, 32)
70 DES_ROUND(0, 2, 32, 32)
71 DES_ROUND(4, 6, 32, 32)
72 DES_ROUND(8, 10, 32, 32)
73 DES_ROUND(12, 14, 32, 32)
74 DES_ROUND(16, 18, 32, 32)
75 DES_ROUND(20, 22, 32, 32)
76 DES_ROUND(24, 26, 32, 32)
77 DES_ROUND(28, 30, 32, 32)
78 DES_IIP(32, 32)
79 std %f32, [%o2 + 0x00]
80 retl
81 VISExit
82ENDPROC(des_sparc64_crypt)
83
84 .align 32
85ENTRY(des_sparc64_load_keys)
86 /* %o0=key */
87 VISEntry
88 ldd [%o0 + 0x00], %f0
89 ldd [%o0 + 0x08], %f2
90 ldd [%o0 + 0x10], %f4
91 ldd [%o0 + 0x18], %f6
92 ldd [%o0 + 0x20], %f8
93 ldd [%o0 + 0x28], %f10
94 ldd [%o0 + 0x30], %f12
95 ldd [%o0 + 0x38], %f14
96 ldd [%o0 + 0x40], %f16
97 ldd [%o0 + 0x48], %f18
98 ldd [%o0 + 0x50], %f20
99 ldd [%o0 + 0x58], %f22
100 ldd [%o0 + 0x60], %f24
101 ldd [%o0 + 0x68], %f26
102 ldd [%o0 + 0x70], %f28
103 retl
104 ldd [%o0 + 0x78], %f30
105ENDPROC(des_sparc64_load_keys)
106
107 .align 32
108ENTRY(des_sparc64_ecb_crypt)
109 /* %o0=input, %o1=output, %o2=len */
1101: ldd [%o0 + 0x00], %f32
111 add %o0, 0x08, %o0
112 DES_IP(32, 32)
113 DES_ROUND(0, 2, 32, 32)
114 DES_ROUND(4, 6, 32, 32)
115 DES_ROUND(8, 10, 32, 32)
116 DES_ROUND(12, 14, 32, 32)
117 DES_ROUND(16, 18, 32, 32)
118 DES_ROUND(20, 22, 32, 32)
119 DES_ROUND(24, 26, 32, 32)
120 DES_ROUND(28, 30, 32, 32)
121 DES_IIP(32, 32)
122 std %f32, [%o1 + 0x00]
123 subcc %o2, 0x08, %o2
124 bne,pt %icc, 1b
125 add %o1, 0x08, %o1
126 retl
127 nop
128ENDPROC(des_sparc64_ecb_crypt)
129
130 .align 32
131ENTRY(des_sparc64_cbc_encrypt)
132 /* %o0=input, %o1=output, %o2=len, %o3=IV */
133 ldd [%o3 + 0x00], %f32
1341: ldd [%o0 + 0x00], %f34
135 fxor %f32, %f34, %f32
136 DES_IP(32, 32)
137 DES_ROUND(0, 2, 32, 32)
138 DES_ROUND(4, 6, 32, 32)
139 DES_ROUND(8, 10, 32, 32)
140 DES_ROUND(12, 14, 32, 32)
141 DES_ROUND(16, 18, 32, 32)
142 DES_ROUND(20, 22, 32, 32)
143 DES_ROUND(24, 26, 32, 32)
144 DES_ROUND(28, 30, 32, 32)
145 DES_IIP(32, 32)
146 std %f32, [%o1 + 0x00]
147 add %o0, 0x08, %o0
148 subcc %o2, 0x08, %o2
149 bne,pt %icc, 1b
150 add %o1, 0x08, %o1
151 retl
152 std %f32, [%o3 + 0x00]
153ENDPROC(des_sparc64_cbc_encrypt)
154
155 .align 32
156ENTRY(des_sparc64_cbc_decrypt)
157 /* %o0=input, %o1=output, %o2=len, %o3=IV */
158 ldd [%o3 + 0x00], %f34
1591: ldd [%o0 + 0x00], %f36
160 DES_IP(36, 32)
161 DES_ROUND(0, 2, 32, 32)
162 DES_ROUND(4, 6, 32, 32)
163 DES_ROUND(8, 10, 32, 32)
164 DES_ROUND(12, 14, 32, 32)
165 DES_ROUND(16, 18, 32, 32)
166 DES_ROUND(20, 22, 32, 32)
167 DES_ROUND(24, 26, 32, 32)
168 DES_ROUND(28, 30, 32, 32)
169 DES_IIP(32, 32)
170 fxor %f32, %f34, %f32
171 fsrc2 %f36, %f34
172 std %f32, [%o1 + 0x00]
173 add %o0, 0x08, %o0
174 subcc %o2, 0x08, %o2
175 bne,pt %icc, 1b
176 add %o1, 0x08, %o1
177 retl
178 std %f36, [%o3 + 0x00]
179ENDPROC(des_sparc64_cbc_decrypt)
180
181 .align 32
182ENTRY(des3_ede_sparc64_crypt)
183 /* %o0=key, %o1=input, %o2=output */
184 VISEntry
185 ldd [%o1 + 0x00], %f32
186 ldd [%o0 + 0x00], %f0
187 ldd [%o0 + 0x08], %f2
188 ldd [%o0 + 0x10], %f4
189 ldd [%o0 + 0x18], %f6
190 ldd [%o0 + 0x20], %f8
191 ldd [%o0 + 0x28], %f10
192 ldd [%o0 + 0x30], %f12
193 ldd [%o0 + 0x38], %f14
194 ldd [%o0 + 0x40], %f16
195 ldd [%o0 + 0x48], %f18
196 ldd [%o0 + 0x50], %f20
197 ldd [%o0 + 0x58], %f22
198 ldd [%o0 + 0x60], %f24
199 ldd [%o0 + 0x68], %f26
200 ldd [%o0 + 0x70], %f28
201 ldd [%o0 + 0x78], %f30
202 DES_IP(32, 32)
203 DES_ROUND(0, 2, 32, 32)
204 ldd [%o0 + 0x80], %f0
205 ldd [%o0 + 0x88], %f2
206 DES_ROUND(4, 6, 32, 32)
207 ldd [%o0 + 0x90], %f4
208 ldd [%o0 + 0x98], %f6
209 DES_ROUND(8, 10, 32, 32)
210 ldd [%o0 + 0xa0], %f8
211 ldd [%o0 + 0xa8], %f10
212 DES_ROUND(12, 14, 32, 32)
213 ldd [%o0 + 0xb0], %f12
214 ldd [%o0 + 0xb8], %f14
215 DES_ROUND(16, 18, 32, 32)
216 ldd [%o0 + 0xc0], %f16
217 ldd [%o0 + 0xc8], %f18
218 DES_ROUND(20, 22, 32, 32)
219 ldd [%o0 + 0xd0], %f20
220 ldd [%o0 + 0xd8], %f22
221 DES_ROUND(24, 26, 32, 32)
222 ldd [%o0 + 0xe0], %f24
223 ldd [%o0 + 0xe8], %f26
224 DES_ROUND(28, 30, 32, 32)
225 ldd [%o0 + 0xf0], %f28
226 ldd [%o0 + 0xf8], %f30
227 DES_IIP(32, 32)
228 DES_IP(32, 32)
229 DES_ROUND(0, 2, 32, 32)
230 ldd [%o0 + 0x100], %f0
231 ldd [%o0 + 0x108], %f2
232 DES_ROUND(4, 6, 32, 32)
233 ldd [%o0 + 0x110], %f4
234 ldd [%o0 + 0x118], %f6
235 DES_ROUND(8, 10, 32, 32)
236 ldd [%o0 + 0x120], %f8
237 ldd [%o0 + 0x128], %f10
238 DES_ROUND(12, 14, 32, 32)
239 ldd [%o0 + 0x130], %f12
240 ldd [%o0 + 0x138], %f14
241 DES_ROUND(16, 18, 32, 32)
242 ldd [%o0 + 0x140], %f16
243 ldd [%o0 + 0x148], %f18
244 DES_ROUND(20, 22, 32, 32)
245 ldd [%o0 + 0x150], %f20
246 ldd [%o0 + 0x158], %f22
247 DES_ROUND(24, 26, 32, 32)
248 ldd [%o0 + 0x160], %f24
249 ldd [%o0 + 0x168], %f26
250 DES_ROUND(28, 30, 32, 32)
251 ldd [%o0 + 0x170], %f28
252 ldd [%o0 + 0x178], %f30
253 DES_IIP(32, 32)
254 DES_IP(32, 32)
255 DES_ROUND(0, 2, 32, 32)
256 DES_ROUND(4, 6, 32, 32)
257 DES_ROUND(8, 10, 32, 32)
258 DES_ROUND(12, 14, 32, 32)
259 DES_ROUND(16, 18, 32, 32)
260 DES_ROUND(20, 22, 32, 32)
261 DES_ROUND(24, 26, 32, 32)
262 DES_ROUND(28, 30, 32, 32)
263 DES_IIP(32, 32)
264
265 std %f32, [%o2 + 0x00]
266 retl
267 VISExit
268ENDPROC(des3_ede_sparc64_crypt)
269
270 .align 32
271ENTRY(des3_ede_sparc64_load_keys)
272 /* %o0=key */
273 VISEntry
274 ldd [%o0 + 0x00], %f0
275 ldd [%o0 + 0x08], %f2
276 ldd [%o0 + 0x10], %f4
277 ldd [%o0 + 0x18], %f6
278 ldd [%o0 + 0x20], %f8
279 ldd [%o0 + 0x28], %f10
280 ldd [%o0 + 0x30], %f12
281 ldd [%o0 + 0x38], %f14
282 ldd [%o0 + 0x40], %f16
283 ldd [%o0 + 0x48], %f18
284 ldd [%o0 + 0x50], %f20
285 ldd [%o0 + 0x58], %f22
286 ldd [%o0 + 0x60], %f24
287 ldd [%o0 + 0x68], %f26
288 ldd [%o0 + 0x70], %f28
289 ldd [%o0 + 0x78], %f30
290 ldd [%o0 + 0x80], %f32
291 ldd [%o0 + 0x88], %f34
292 ldd [%o0 + 0x90], %f36
293 ldd [%o0 + 0x98], %f38
294 ldd [%o0 + 0xa0], %f40
295 ldd [%o0 + 0xa8], %f42
296 ldd [%o0 + 0xb0], %f44
297 ldd [%o0 + 0xb8], %f46
298 ldd [%o0 + 0xc0], %f48
299 ldd [%o0 + 0xc8], %f50
300 ldd [%o0 + 0xd0], %f52
301 ldd [%o0 + 0xd8], %f54
302 ldd [%o0 + 0xe0], %f56
303 retl
304 ldd [%o0 + 0xe8], %f58
305ENDPROC(des3_ede_sparc64_load_keys)
306
307#define DES3_LOOP_BODY(X) \
308 DES_IP(X, X) \
309 DES_ROUND(0, 2, X, X) \
310 DES_ROUND(4, 6, X, X) \
311 DES_ROUND(8, 10, X, X) \
312 DES_ROUND(12, 14, X, X) \
313 DES_ROUND(16, 18, X, X) \
314 ldd [%o0 + 0xf0], %f16; \
315 ldd [%o0 + 0xf8], %f18; \
316 DES_ROUND(20, 22, X, X) \
317 ldd [%o0 + 0x100], %f20; \
318 ldd [%o0 + 0x108], %f22; \
319 DES_ROUND(24, 26, X, X) \
320 ldd [%o0 + 0x110], %f24; \
321 ldd [%o0 + 0x118], %f26; \
322 DES_ROUND(28, 30, X, X) \
323 ldd [%o0 + 0x120], %f28; \
324 ldd [%o0 + 0x128], %f30; \
325 DES_IIP(X, X) \
326 DES_IP(X, X) \
327 DES_ROUND(32, 34, X, X) \
328 ldd [%o0 + 0x130], %f0; \
329 ldd [%o0 + 0x138], %f2; \
330 DES_ROUND(36, 38, X, X) \
331 ldd [%o0 + 0x140], %f4; \
332 ldd [%o0 + 0x148], %f6; \
333 DES_ROUND(40, 42, X, X) \
334 ldd [%o0 + 0x150], %f8; \
335 ldd [%o0 + 0x158], %f10; \
336 DES_ROUND(44, 46, X, X) \
337 ldd [%o0 + 0x160], %f12; \
338 ldd [%o0 + 0x168], %f14; \
339 DES_ROUND(48, 50, X, X) \
340 DES_ROUND(52, 54, X, X) \
341 DES_ROUND(56, 58, X, X) \
342 DES_ROUND(16, 18, X, X) \
343 ldd [%o0 + 0x170], %f16; \
344 ldd [%o0 + 0x178], %f18; \
345 DES_IIP(X, X) \
346 DES_IP(X, X) \
347 DES_ROUND(20, 22, X, X) \
348 ldd [%o0 + 0x50], %f20; \
349 ldd [%o0 + 0x58], %f22; \
350 DES_ROUND(24, 26, X, X) \
351 ldd [%o0 + 0x60], %f24; \
352 ldd [%o0 + 0x68], %f26; \
353 DES_ROUND(28, 30, X, X) \
354 ldd [%o0 + 0x70], %f28; \
355 ldd [%o0 + 0x78], %f30; \
356 DES_ROUND(0, 2, X, X) \
357 ldd [%o0 + 0x00], %f0; \
358 ldd [%o0 + 0x08], %f2; \
359 DES_ROUND(4, 6, X, X) \
360 ldd [%o0 + 0x10], %f4; \
361 ldd [%o0 + 0x18], %f6; \
362 DES_ROUND(8, 10, X, X) \
363 ldd [%o0 + 0x20], %f8; \
364 ldd [%o0 + 0x28], %f10; \
365 DES_ROUND(12, 14, X, X) \
366 ldd [%o0 + 0x30], %f12; \
367 ldd [%o0 + 0x38], %f14; \
368 DES_ROUND(16, 18, X, X) \
369 ldd [%o0 + 0x40], %f16; \
370 ldd [%o0 + 0x48], %f18; \
371 DES_IIP(X, X)
372
373 .align 32
374ENTRY(des3_ede_sparc64_ecb_crypt)
375 /* %o0=key, %o1=input, %o2=output, %o3=len */
3761: ldd [%o1 + 0x00], %f60
377 DES3_LOOP_BODY(60)
378 std %f60, [%o2 + 0x00]
379 subcc %o3, 0x08, %o3
380 bne,pt %icc, 1b
381 add %o2, 0x08, %o2
382 retl
383 nop
384ENDPROC(des3_ede_sparc64_ecb_crypt)
385
386 .align 32
387ENTRY(des3_ede_sparc64_cbc_encrypt)
388 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
389 ldd [%o4 + 0x00], %f60
3901: ldd [%o1 + 0x00], %f62
391 fxor %f60, %f62, %f60
392 DES3_LOOP_BODY(60)
393 std %f60, [%o2 + 0x00]
394 add %o1, 0x08, %o1
395 subcc %o3, 0x08, %o3
396 bne,pt %icc, 1b
397 add %o2, 0x08, %o2
398 retl
399 std %f60, [%o4 + 0x00]
400ENDPROC(des3_ede_sparc64_cbc_encrypt)
401
402 .align 32
403ENTRY(des3_ede_sparc64_cbc_decrypt)
404 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
405 ldd [%o4 + 0x00], %f62
4061: ldx [%o1 + 0x00], %g1
407 MOVXTOD_G1_F60
408 DES3_LOOP_BODY(60)
409 fxor %f62, %f60, %f60
410 MOVXTOD_G1_F62
411 std %f60, [%o2 + 0x00]
412 add %o1, 0x08, %o1
413 subcc %o3, 0x08, %o3
414 bne,pt %icc, 1b
415 add %o2, 0x08, %o2
416 retl
417 stx %g1, [%o4 + 0x00]
418ENDPROC(des3_ede_sparc64_cbc_decrypt)
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
new file mode 100644
index 000000000000..c4940c2d3073
--- /dev/null
+++ b/arch/sparc/crypto/des_glue.c
@@ -0,0 +1,529 @@
1/* Glue code for DES encryption optimized for sparc64 crypto opcodes.
2 *
3 * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/crypto.h>
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/mm.h>
12#include <linux/types.h>
13#include <crypto/algapi.h>
14#include <crypto/des.h>
15
16#include <asm/fpumacro.h>
17#include <asm/pstate.h>
18#include <asm/elf.h>
19
20#include "opcodes.h"
21
22struct des_sparc64_ctx {
23 u64 encrypt_expkey[DES_EXPKEY_WORDS / 2];
24 u64 decrypt_expkey[DES_EXPKEY_WORDS / 2];
25};
26
27struct des3_ede_sparc64_ctx {
28 u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
29 u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
30};
31
32static void encrypt_to_decrypt(u64 *d, const u64 *e)
33{
34 const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1;
35 int i;
36
37 for (i = 0; i < DES_EXPKEY_WORDS / 2; i++)
38 *d++ = *s--;
39}
40
41extern void des_sparc64_key_expand(const u32 *input_key, u64 *key);
42
43static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
44 unsigned int keylen)
45{
46 struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
47 u32 *flags = &tfm->crt_flags;
48 u32 tmp[DES_EXPKEY_WORDS];
49 int ret;
50
51 /* Even though we have special instructions for key expansion,
52 * we call des_ekey() so that we don't have to write our own
53 * weak key detection code.
54 */
55 ret = des_ekey(tmp, key);
56 if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
57 *flags |= CRYPTO_TFM_RES_WEAK_KEY;
58 return -EINVAL;
59 }
60
61 des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]);
62 encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]);
63
64 return 0;
65}
66
67extern void des_sparc64_crypt(const u64 *key, const u64 *input,
68 u64 *output);
69
70static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
71{
72 struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
73 const u64 *K = ctx->encrypt_expkey;
74
75 des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
76}
77
78static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
79{
80 struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
81 const u64 *K = ctx->decrypt_expkey;
82
83 des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
84}
85
86extern void des_sparc64_load_keys(const u64 *key);
87
88extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
89 unsigned int len);
90
91#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1))
92
93static int __ecb_crypt(struct blkcipher_desc *desc,
94 struct scatterlist *dst, struct scatterlist *src,
95 unsigned int nbytes, bool encrypt)
96{
97 struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
98 struct blkcipher_walk walk;
99 int err;
100
101 blkcipher_walk_init(&walk, dst, src, nbytes);
102 err = blkcipher_walk_virt(desc, &walk);
103
104 if (encrypt)
105 des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
106 else
107 des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
108 while ((nbytes = walk.nbytes)) {
109 unsigned int block_len = nbytes & DES_BLOCK_MASK;
110
111 if (likely(block_len)) {
112 des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
113 (u64 *) walk.dst.virt.addr,
114 block_len);
115 }
116 nbytes &= DES_BLOCK_SIZE - 1;
117 err = blkcipher_walk_done(desc, &walk, nbytes);
118 }
119 fprs_write(0);
120 return err;
121}
122
123static int ecb_encrypt(struct blkcipher_desc *desc,
124 struct scatterlist *dst, struct scatterlist *src,
125 unsigned int nbytes)
126{
127 return __ecb_crypt(desc, dst, src, nbytes, true);
128}
129
130static int ecb_decrypt(struct blkcipher_desc *desc,
131 struct scatterlist *dst, struct scatterlist *src,
132 unsigned int nbytes)
133{
134 return __ecb_crypt(desc, dst, src, nbytes, false);
135}
136
137extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
138 unsigned int len, u64 *iv);
139
140static int cbc_encrypt(struct blkcipher_desc *desc,
141 struct scatterlist *dst, struct scatterlist *src,
142 unsigned int nbytes)
143{
144 struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
145 struct blkcipher_walk walk;
146 int err;
147
148 blkcipher_walk_init(&walk, dst, src, nbytes);
149 err = blkcipher_walk_virt(desc, &walk);
150
151 des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
152 while ((nbytes = walk.nbytes)) {
153 unsigned int block_len = nbytes & DES_BLOCK_MASK;
154
155 if (likely(block_len)) {
156 des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
157 (u64 *) walk.dst.virt.addr,
158 block_len, (u64 *) walk.iv);
159 }
160 nbytes &= DES_BLOCK_SIZE - 1;
161 err = blkcipher_walk_done(desc, &walk, nbytes);
162 }
163 fprs_write(0);
164 return err;
165}
166
167extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
168 unsigned int len, u64 *iv);
169
170static int cbc_decrypt(struct blkcipher_desc *desc,
171 struct scatterlist *dst, struct scatterlist *src,
172 unsigned int nbytes)
173{
174 struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
175 struct blkcipher_walk walk;
176 int err;
177
178 blkcipher_walk_init(&walk, dst, src, nbytes);
179 err = blkcipher_walk_virt(desc, &walk);
180
181 des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
182 while ((nbytes = walk.nbytes)) {
183 unsigned int block_len = nbytes & DES_BLOCK_MASK;
184
185 if (likely(block_len)) {
186 des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
187 (u64 *) walk.dst.virt.addr,
188 block_len, (u64 *) walk.iv);
189 }
190 nbytes &= DES_BLOCK_SIZE - 1;
191 err = blkcipher_walk_done(desc, &walk, nbytes);
192 }
193 fprs_write(0);
194 return err;
195}
196
197static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
198 unsigned int keylen)
199{
200 struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
201 const u32 *K = (const u32 *)key;
202 u32 *flags = &tfm->crt_flags;
203 u64 k1[DES_EXPKEY_WORDS / 2];
204 u64 k2[DES_EXPKEY_WORDS / 2];
205 u64 k3[DES_EXPKEY_WORDS / 2];
206
207 if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
208 !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
209 (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
210 *flags |= CRYPTO_TFM_RES_WEAK_KEY;
211 return -EINVAL;
212 }
213
214 des_sparc64_key_expand((const u32 *)key, k1);
215 key += DES_KEY_SIZE;
216 des_sparc64_key_expand((const u32 *)key, k2);
217 key += DES_KEY_SIZE;
218 des_sparc64_key_expand((const u32 *)key, k3);
219
220 memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1));
221 encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]);
222 memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
223 &k3[0], sizeof(k3));
224
225 encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]);
226 memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2],
227 &k2[0], sizeof(k2));
228 encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
229 &k1[0]);
230
231 return 0;
232}
233
234extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
235 u64 *output);
236
237static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
238{
239 struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
240 const u64 *K = ctx->encrypt_expkey;
241
242 des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
243}
244
245static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
246{
247 struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
248 const u64 *K = ctx->decrypt_expkey;
249
250 des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
251}
252
253extern void des3_ede_sparc64_load_keys(const u64 *key);
254
255extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
256 u64 *output, unsigned int len);
257
258static int __ecb3_crypt(struct blkcipher_desc *desc,
259 struct scatterlist *dst, struct scatterlist *src,
260 unsigned int nbytes, bool encrypt)
261{
262 struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
263 struct blkcipher_walk walk;
264 const u64 *K;
265 int err;
266
267 blkcipher_walk_init(&walk, dst, src, nbytes);
268 err = blkcipher_walk_virt(desc, &walk);
269
270 if (encrypt)
271 K = &ctx->encrypt_expkey[0];
272 else
273 K = &ctx->decrypt_expkey[0];
274 des3_ede_sparc64_load_keys(K);
275 while ((nbytes = walk.nbytes)) {
276 unsigned int block_len = nbytes & DES_BLOCK_MASK;
277
278 if (likely(block_len)) {
279 const u64 *src64 = (const u64 *)walk.src.virt.addr;
280 des3_ede_sparc64_ecb_crypt(K, src64,
281 (u64 *) walk.dst.virt.addr,
282 block_len);
283 }
284 nbytes &= DES_BLOCK_SIZE - 1;
285 err = blkcipher_walk_done(desc, &walk, nbytes);
286 }
287 fprs_write(0);
288 return err;
289}
290
291static int ecb3_encrypt(struct blkcipher_desc *desc,
292 struct scatterlist *dst, struct scatterlist *src,
293 unsigned int nbytes)
294{
295 return __ecb3_crypt(desc, dst, src, nbytes, true);
296}
297
298static int ecb3_decrypt(struct blkcipher_desc *desc,
299 struct scatterlist *dst, struct scatterlist *src,
300 unsigned int nbytes)
301{
302 return __ecb3_crypt(desc, dst, src, nbytes, false);
303}
304
305extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
306 u64 *output, unsigned int len,
307 u64 *iv);
308
309static int cbc3_encrypt(struct blkcipher_desc *desc,
310 struct scatterlist *dst, struct scatterlist *src,
311 unsigned int nbytes)
312{
313 struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
314 struct blkcipher_walk walk;
315 const u64 *K;
316 int err;
317
318 blkcipher_walk_init(&walk, dst, src, nbytes);
319 err = blkcipher_walk_virt(desc, &walk);
320
321 K = &ctx->encrypt_expkey[0];
322 des3_ede_sparc64_load_keys(K);
323 while ((nbytes = walk.nbytes)) {
324 unsigned int block_len = nbytes & DES_BLOCK_MASK;
325
326 if (likely(block_len)) {
327 const u64 *src64 = (const u64 *)walk.src.virt.addr;
328 des3_ede_sparc64_cbc_encrypt(K, src64,
329 (u64 *) walk.dst.virt.addr,
330 block_len,
331 (u64 *) walk.iv);
332 }
333 nbytes &= DES_BLOCK_SIZE - 1;
334 err = blkcipher_walk_done(desc, &walk, nbytes);
335 }
336 fprs_write(0);
337 return err;
338}
339
340extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
341 u64 *output, unsigned int len,
342 u64 *iv);
343
344static int cbc3_decrypt(struct blkcipher_desc *desc,
345 struct scatterlist *dst, struct scatterlist *src,
346 unsigned int nbytes)
347{
348 struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
349 struct blkcipher_walk walk;
350 const u64 *K;
351 int err;
352
353 blkcipher_walk_init(&walk, dst, src, nbytes);
354 err = blkcipher_walk_virt(desc, &walk);
355
356 K = &ctx->decrypt_expkey[0];
357 des3_ede_sparc64_load_keys(K);
358 while ((nbytes = walk.nbytes)) {
359 unsigned int block_len = nbytes & DES_BLOCK_MASK;
360
361 if (likely(block_len)) {
362 const u64 *src64 = (const u64 *)walk.src.virt.addr;
363 des3_ede_sparc64_cbc_decrypt(K, src64,
364 (u64 *) walk.dst.virt.addr,
365 block_len,
366 (u64 *) walk.iv);
367 }
368 nbytes &= DES_BLOCK_SIZE - 1;
369 err = blkcipher_walk_done(desc, &walk, nbytes);
370 }
371 fprs_write(0);
372 return err;
373}
374
375static struct crypto_alg algs[] = { {
376 .cra_name = "des",
377 .cra_driver_name = "des-sparc64",
378 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
379 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
380 .cra_blocksize = DES_BLOCK_SIZE,
381 .cra_ctxsize = sizeof(struct des_sparc64_ctx),
382 .cra_alignmask = 7,
383 .cra_module = THIS_MODULE,
384 .cra_u = {
385 .cipher = {
386 .cia_min_keysize = DES_KEY_SIZE,
387 .cia_max_keysize = DES_KEY_SIZE,
388 .cia_setkey = des_set_key,
389 .cia_encrypt = des_encrypt,
390 .cia_decrypt = des_decrypt
391 }
392 }
393}, {
394 .cra_name = "ecb(des)",
395 .cra_driver_name = "ecb-des-sparc64",
396 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
397 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
398 .cra_blocksize = DES_BLOCK_SIZE,
399 .cra_ctxsize = sizeof(struct des_sparc64_ctx),
400 .cra_alignmask = 7,
401 .cra_type = &crypto_blkcipher_type,
402 .cra_module = THIS_MODULE,
403 .cra_u = {
404 .blkcipher = {
405 .min_keysize = DES_KEY_SIZE,
406 .max_keysize = DES_KEY_SIZE,
407 .setkey = des_set_key,
408 .encrypt = ecb_encrypt,
409 .decrypt = ecb_decrypt,
410 },
411 },
412}, {
413 .cra_name = "cbc(des)",
414 .cra_driver_name = "cbc-des-sparc64",
415 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
416 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
417 .cra_blocksize = DES_BLOCK_SIZE,
418 .cra_ctxsize = sizeof(struct des_sparc64_ctx),
419 .cra_alignmask = 7,
420 .cra_type = &crypto_blkcipher_type,
421 .cra_module = THIS_MODULE,
422 .cra_u = {
423 .blkcipher = {
424 .min_keysize = DES_KEY_SIZE,
425 .max_keysize = DES_KEY_SIZE,
426 .setkey = des_set_key,
427 .encrypt = cbc_encrypt,
428 .decrypt = cbc_decrypt,
429 },
430 },
431}, {
432 .cra_name = "des3_ede",
433 .cra_driver_name = "des3_ede-sparc64",
434 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
435 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
436 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
437 .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
438 .cra_alignmask = 7,
439 .cra_module = THIS_MODULE,
440 .cra_u = {
441 .cipher = {
442 .cia_min_keysize = DES3_EDE_KEY_SIZE,
443 .cia_max_keysize = DES3_EDE_KEY_SIZE,
444 .cia_setkey = des3_ede_set_key,
445 .cia_encrypt = des3_ede_encrypt,
446 .cia_decrypt = des3_ede_decrypt
447 }
448 }
449}, {
450 .cra_name = "ecb(des3_ede)",
451 .cra_driver_name = "ecb-des3_ede-sparc64",
452 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
453 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
454 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
455 .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
456 .cra_alignmask = 7,
457 .cra_type = &crypto_blkcipher_type,
458 .cra_module = THIS_MODULE,
459 .cra_u = {
460 .blkcipher = {
461 .min_keysize = DES3_EDE_KEY_SIZE,
462 .max_keysize = DES3_EDE_KEY_SIZE,
463 .setkey = des3_ede_set_key,
464 .encrypt = ecb3_encrypt,
465 .decrypt = ecb3_decrypt,
466 },
467 },
468}, {
469 .cra_name = "cbc(des3_ede)",
470 .cra_driver_name = "cbc-des3_ede-sparc64",
471 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
472 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
473 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
474 .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
475 .cra_alignmask = 7,
476 .cra_type = &crypto_blkcipher_type,
477 .cra_module = THIS_MODULE,
478 .cra_u = {
479 .blkcipher = {
480 .min_keysize = DES3_EDE_KEY_SIZE,
481 .max_keysize = DES3_EDE_KEY_SIZE,
482 .setkey = des3_ede_set_key,
483 .encrypt = cbc3_encrypt,
484 .decrypt = cbc3_decrypt,
485 },
486 },
487} };
488
489static bool __init sparc64_has_des_opcode(void)
490{
491 unsigned long cfr;
492
493 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
494 return false;
495
496 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
497 if (!(cfr & CFR_DES))
498 return false;
499
500 return true;
501}
502
503static int __init des_sparc64_mod_init(void)
504{
505 int i;
506
507 for (i = 0; i < ARRAY_SIZE(algs); i++)
508 INIT_LIST_HEAD(&algs[i].cra_list);
509
510 if (sparc64_has_des_opcode()) {
511 pr_info("Using sparc64 des opcodes optimized DES implementation\n");
512 return crypto_register_algs(algs, ARRAY_SIZE(algs));
513 }
514 pr_info("sparc64 des opcodes not available.\n");
515 return -ENODEV;
516}
517
518static void __exit des_sparc64_mod_fini(void)
519{
520 crypto_unregister_algs(algs, ARRAY_SIZE(algs));
521}
522
523module_init(des_sparc64_mod_init);
524module_exit(des_sparc64_mod_fini);
525
526MODULE_LICENSE("GPL");
527MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
528
529MODULE_ALIAS("des");
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S
new file mode 100644
index 000000000000..3150404e602e
--- /dev/null
+++ b/arch/sparc/crypto/md5_asm.S
@@ -0,0 +1,70 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6ENTRY(md5_sparc64_transform)
7 /* %o0 = digest, %o1 = data, %o2 = rounds */
8 VISEntryHalf
9 ld [%o0 + 0x00], %f0
10 ld [%o0 + 0x04], %f1
11 andcc %o1, 0x7, %g0
12 ld [%o0 + 0x08], %f2
13 bne,pn %xcc, 10f
14 ld [%o0 + 0x0c], %f3
15
161:
17 ldd [%o1 + 0x00], %f8
18 ldd [%o1 + 0x08], %f10
19 ldd [%o1 + 0x10], %f12
20 ldd [%o1 + 0x18], %f14
21 ldd [%o1 + 0x20], %f16
22 ldd [%o1 + 0x28], %f18
23 ldd [%o1 + 0x30], %f20
24 ldd [%o1 + 0x38], %f22
25
26 MD5
27
28 subcc %o2, 1, %o2
29 bne,pt %xcc, 1b
30 add %o1, 0x40, %o1
31
325:
33 st %f0, [%o0 + 0x00]
34 st %f1, [%o0 + 0x04]
35 st %f2, [%o0 + 0x08]
36 st %f3, [%o0 + 0x0c]
37 retl
38 VISExitHalf
3910:
40 alignaddr %o1, %g0, %o1
41
42 ldd [%o1 + 0x00], %f10
431:
44 ldd [%o1 + 0x08], %f12
45 ldd [%o1 + 0x10], %f14
46 ldd [%o1 + 0x18], %f16
47 ldd [%o1 + 0x20], %f18
48 ldd [%o1 + 0x28], %f20
49 ldd [%o1 + 0x30], %f22
50 ldd [%o1 + 0x38], %f24
51 ldd [%o1 + 0x40], %f26
52
53 faligndata %f10, %f12, %f8
54 faligndata %f12, %f14, %f10
55 faligndata %f14, %f16, %f12
56 faligndata %f16, %f18, %f14
57 faligndata %f18, %f20, %f16
58 faligndata %f20, %f22, %f18
59 faligndata %f22, %f24, %f20
60 faligndata %f24, %f26, %f22
61
62 MD5
63
64 subcc %o2, 1, %o2
65 fsrc2 %f26, %f10
66 bne,pt %xcc, 1b
67 add %o1, 0x40, %o1
68
69 ba,a,pt %xcc, 5b
70ENDPROC(md5_sparc64_transform)
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
new file mode 100644
index 000000000000..603d723038ce
--- /dev/null
+++ b/arch/sparc/crypto/md5_glue.c
@@ -0,0 +1,188 @@
1/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes.
2 *
3 * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
4 * and crypto/md5.c which are:
5 *
6 * Copyright (c) Alan Smithee.
7 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
8 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
9 * Copyright (c) Mathias Krause <minipli@googlemail.com>
10 * Copyright (c) Cryptoapi developers.
11 * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
12 */
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <crypto/internal/hash.h>
17#include <linux/init.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/cryptohash.h>
21#include <linux/types.h>
22#include <crypto/md5.h>
23
24#include <asm/pstate.h>
25#include <asm/elf.h>
26
27#include "opcodes.h"
28
29asmlinkage void md5_sparc64_transform(u32 *digest, const char *data,
30 unsigned int rounds);
31
32static int md5_sparc64_init(struct shash_desc *desc)
33{
34 struct md5_state *mctx = shash_desc_ctx(desc);
35
36 mctx->hash[0] = cpu_to_le32(0x67452301);
37 mctx->hash[1] = cpu_to_le32(0xefcdab89);
38 mctx->hash[2] = cpu_to_le32(0x98badcfe);
39 mctx->hash[3] = cpu_to_le32(0x10325476);
40 mctx->byte_count = 0;
41
42 return 0;
43}
44
45static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data,
46 unsigned int len, unsigned int partial)
47{
48 unsigned int done = 0;
49
50 sctx->byte_count += len;
51 if (partial) {
52 done = MD5_HMAC_BLOCK_SIZE - partial;
53 memcpy((u8 *)sctx->block + partial, data, done);
54 md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1);
55 }
56 if (len - done >= MD5_HMAC_BLOCK_SIZE) {
57 const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE;
58
59 md5_sparc64_transform(sctx->hash, data + done, rounds);
60 done += rounds * MD5_HMAC_BLOCK_SIZE;
61 }
62
63 memcpy(sctx->block, data + done, len - done);
64}
65
66static int md5_sparc64_update(struct shash_desc *desc, const u8 *data,
67 unsigned int len)
68{
69 struct md5_state *sctx = shash_desc_ctx(desc);
70 unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
71
72 /* Handle the fast case right here */
73 if (partial + len < MD5_HMAC_BLOCK_SIZE) {
74 sctx->byte_count += len;
75 memcpy((u8 *)sctx->block + partial, data, len);
76 } else
77 __md5_sparc64_update(sctx, data, len, partial);
78
79 return 0;
80}
81
82/* Add padding and return the message digest. */
83static int md5_sparc64_final(struct shash_desc *desc, u8 *out)
84{
85 struct md5_state *sctx = shash_desc_ctx(desc);
86 unsigned int i, index, padlen;
87 u32 *dst = (u32 *)out;
88 __le64 bits;
89 static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, };
90
91 bits = cpu_to_le64(sctx->byte_count << 3);
92
93 /* Pad out to 56 mod 64 and append length */
94 index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
95 padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index);
96
97 /* We need to fill a whole block for __md5_sparc64_update() */
98 if (padlen <= 56) {
99 sctx->byte_count += padlen;
100 memcpy((u8 *)sctx->block + index, padding, padlen);
101 } else {
102 __md5_sparc64_update(sctx, padding, padlen, index);
103 }
104 __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
105
106 /* Store state in digest */
107 for (i = 0; i < MD5_HASH_WORDS; i++)
108 dst[i] = sctx->hash[i];
109
110 /* Wipe context */
111 memset(sctx, 0, sizeof(*sctx));
112
113 return 0;
114}
115
116static int md5_sparc64_export(struct shash_desc *desc, void *out)
117{
118 struct md5_state *sctx = shash_desc_ctx(desc);
119
120 memcpy(out, sctx, sizeof(*sctx));
121
122 return 0;
123}
124
125static int md5_sparc64_import(struct shash_desc *desc, const void *in)
126{
127 struct md5_state *sctx = shash_desc_ctx(desc);
128
129 memcpy(sctx, in, sizeof(*sctx));
130
131 return 0;
132}
133
134static struct shash_alg alg = {
135 .digestsize = MD5_DIGEST_SIZE,
136 .init = md5_sparc64_init,
137 .update = md5_sparc64_update,
138 .final = md5_sparc64_final,
139 .export = md5_sparc64_export,
140 .import = md5_sparc64_import,
141 .descsize = sizeof(struct md5_state),
142 .statesize = sizeof(struct md5_state),
143 .base = {
144 .cra_name = "md5",
145 .cra_driver_name= "md5-sparc64",
146 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
147 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
148 .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
149 .cra_module = THIS_MODULE,
150 }
151};
152
153static bool __init sparc64_has_md5_opcode(void)
154{
155 unsigned long cfr;
156
157 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
158 return false;
159
160 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
161 if (!(cfr & CFR_MD5))
162 return false;
163
164 return true;
165}
166
167static int __init md5_sparc64_mod_init(void)
168{
169 if (sparc64_has_md5_opcode()) {
170 pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n");
171 return crypto_register_shash(&alg);
172 }
173 pr_info("sparc64 md5 opcode not available.\n");
174 return -ENODEV;
175}
176
177static void __exit md5_sparc64_mod_fini(void)
178{
179 crypto_unregister_shash(&alg);
180}
181
182module_init(md5_sparc64_mod_init);
183module_exit(md5_sparc64_mod_fini);
184
185MODULE_LICENSE("GPL");
186MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
187
188MODULE_ALIAS("md5");
diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h
new file mode 100644
index 000000000000..19cbaea6976f
--- /dev/null
+++ b/arch/sparc/crypto/opcodes.h
@@ -0,0 +1,99 @@
1#ifndef _OPCODES_H
2#define _OPCODES_H
3
4#define SPARC_CR_OPCODE_PRIORITY 300
5
6#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5))
7
8#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20)))
9
10#define RS1(x) (FPD_ENCODE(x) << 14)
11#define RS2(x) (FPD_ENCODE(x) << 0)
12#define RS3(x) (FPD_ENCODE(x) << 9)
13#define RD(x) (FPD_ENCODE(x) << 25)
14#define IMM5_0(x) ((x) << 0)
15#define IMM5_9(x) ((x) << 9)
16
17#define CRC32C(a,b,c) \
18 .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c));
19
20#define MD5 \
21 .word 0x81b02800;
22#define SHA1 \
23 .word 0x81b02820;
24#define SHA256 \
25 .word 0x81b02840;
26#define SHA512 \
27 .word 0x81b02860;
28
29#define AES_EROUND01(a,b,c,d) \
30 .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d));
31#define AES_EROUND23(a,b,c,d) \
32 .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d));
33#define AES_DROUND01(a,b,c,d) \
34 .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d));
35#define AES_DROUND23(a,b,c,d) \
36 .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d));
37#define AES_EROUND01_L(a,b,c,d) \
38 .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d));
39#define AES_EROUND23_L(a,b,c,d) \
40 .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d));
41#define AES_DROUND01_L(a,b,c,d) \
42 .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d));
43#define AES_DROUND23_L(a,b,c,d) \
44 .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d));
45#define AES_KEXPAND1(a,b,c,d) \
46 .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d));
47#define AES_KEXPAND0(a,b,c) \
48 .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c));
49#define AES_KEXPAND2(a,b,c) \
50 .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c));
51
52#define DES_IP(a,b) \
53 .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b));
54#define DES_IIP(a,b) \
55 .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b));
56#define DES_KEXPAND(a,b,c) \
57 .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c));
58#define DES_ROUND(a,b,c,d) \
59 .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d));
60
61#define CAMELLIA_F(a,b,c,d) \
62 .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d));
63#define CAMELLIA_FL(a,b,c) \
64 .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c));
65#define CAMELLIA_FLI(a,b,c) \
66 .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c));
67
68#define MOVDTOX_F0_O4 \
69 .word 0x99b02200
70#define MOVDTOX_F2_O5 \
71 .word 0x9bb02202
72#define MOVXTOD_G1_F60 \
73 .word 0xbbb02301
74#define MOVXTOD_G1_F62 \
75 .word 0xbfb02301
76#define MOVXTOD_G3_F4 \
77 .word 0x89b02303;
78#define MOVXTOD_G7_F6 \
79 .word 0x8db02307;
80#define MOVXTOD_G3_F0 \
81 .word 0x81b02303;
82#define MOVXTOD_G7_F2 \
83 .word 0x85b02307;
84#define MOVXTOD_O0_F0 \
85 .word 0x81b02308;
86#define MOVXTOD_O5_F0 \
87 .word 0x81b0230d;
88#define MOVXTOD_O5_F2 \
89 .word 0x85b0230d;
90#define MOVXTOD_O5_F4 \
91 .word 0x89b0230d;
92#define MOVXTOD_O5_F6 \
93 .word 0x8db0230d;
94#define MOVXTOD_G3_F60 \
95 .word 0xbbb02303;
96#define MOVXTOD_G7_F62 \
97 .word 0xbfb02307;
98
99#endif /* _OPCODES_H */
diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S
new file mode 100644
index 000000000000..219d10c5ae0e
--- /dev/null
+++ b/arch/sparc/crypto/sha1_asm.S
@@ -0,0 +1,72 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6ENTRY(sha1_sparc64_transform)
7 /* %o0 = digest, %o1 = data, %o2 = rounds */
8 VISEntryHalf
9 ld [%o0 + 0x00], %f0
10 ld [%o0 + 0x04], %f1
11 ld [%o0 + 0x08], %f2
12 andcc %o1, 0x7, %g0
13 ld [%o0 + 0x0c], %f3
14 bne,pn %xcc, 10f
15 ld [%o0 + 0x10], %f4
16
171:
18 ldd [%o1 + 0x00], %f8
19 ldd [%o1 + 0x08], %f10
20 ldd [%o1 + 0x10], %f12
21 ldd [%o1 + 0x18], %f14
22 ldd [%o1 + 0x20], %f16
23 ldd [%o1 + 0x28], %f18
24 ldd [%o1 + 0x30], %f20
25 ldd [%o1 + 0x38], %f22
26
27 SHA1
28
29 subcc %o2, 1, %o2
30 bne,pt %xcc, 1b
31 add %o1, 0x40, %o1
32
335:
34 st %f0, [%o0 + 0x00]
35 st %f1, [%o0 + 0x04]
36 st %f2, [%o0 + 0x08]
37 st %f3, [%o0 + 0x0c]
38 st %f4, [%o0 + 0x10]
39 retl
40 VISExitHalf
4110:
42 alignaddr %o1, %g0, %o1
43
44 ldd [%o1 + 0x00], %f10
451:
46 ldd [%o1 + 0x08], %f12
47 ldd [%o1 + 0x10], %f14
48 ldd [%o1 + 0x18], %f16
49 ldd [%o1 + 0x20], %f18
50 ldd [%o1 + 0x28], %f20
51 ldd [%o1 + 0x30], %f22
52 ldd [%o1 + 0x38], %f24
53 ldd [%o1 + 0x40], %f26
54
55 faligndata %f10, %f12, %f8
56 faligndata %f12, %f14, %f10
57 faligndata %f14, %f16, %f12
58 faligndata %f16, %f18, %f14
59 faligndata %f18, %f20, %f16
60 faligndata %f20, %f22, %f18
61 faligndata %f22, %f24, %f20
62 faligndata %f24, %f26, %f22
63
64 SHA1
65
66 subcc %o2, 1, %o2
67 fsrc2 %f26, %f10
68 bne,pt %xcc, 1b
69 add %o1, 0x40, %o1
70
71 ba,a,pt %xcc, 5b
72ENDPROC(sha1_sparc64_transform)
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
new file mode 100644
index 000000000000..2bbb20bee9f1
--- /dev/null
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -0,0 +1,183 @@
1/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes.
2 *
3 * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
4 *
5 * Copyright (c) Alan Smithee.
6 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
7 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
8 * Copyright (c) Mathias Krause <minipli@googlemail.com>
9 */
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13#include <crypto/internal/hash.h>
14#include <linux/init.h>
15#include <linux/module.h>
16#include <linux/mm.h>
17#include <linux/cryptohash.h>
18#include <linux/types.h>
19#include <crypto/sha.h>
20
21#include <asm/pstate.h>
22#include <asm/elf.h>
23
24#include "opcodes.h"
25
26asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data,
27 unsigned int rounds);
28
29static int sha1_sparc64_init(struct shash_desc *desc)
30{
31 struct sha1_state *sctx = shash_desc_ctx(desc);
32
33 *sctx = (struct sha1_state){
34 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
35 };
36
37 return 0;
38}
39
40static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data,
41 unsigned int len, unsigned int partial)
42{
43 unsigned int done = 0;
44
45 sctx->count += len;
46 if (partial) {
47 done = SHA1_BLOCK_SIZE - partial;
48 memcpy(sctx->buffer + partial, data, done);
49 sha1_sparc64_transform(sctx->state, sctx->buffer, 1);
50 }
51 if (len - done >= SHA1_BLOCK_SIZE) {
52 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
53
54 sha1_sparc64_transform(sctx->state, data + done, rounds);
55 done += rounds * SHA1_BLOCK_SIZE;
56 }
57
58 memcpy(sctx->buffer, data + done, len - done);
59}
60
61static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data,
62 unsigned int len)
63{
64 struct sha1_state *sctx = shash_desc_ctx(desc);
65 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
66
67 /* Handle the fast case right here */
68 if (partial + len < SHA1_BLOCK_SIZE) {
69 sctx->count += len;
70 memcpy(sctx->buffer + partial, data, len);
71 } else
72 __sha1_sparc64_update(sctx, data, len, partial);
73
74 return 0;
75}
76
77/* Add padding and return the message digest. */
78static int sha1_sparc64_final(struct shash_desc *desc, u8 *out)
79{
80 struct sha1_state *sctx = shash_desc_ctx(desc);
81 unsigned int i, index, padlen;
82 __be32 *dst = (__be32 *)out;
83 __be64 bits;
84 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
85
86 bits = cpu_to_be64(sctx->count << 3);
87
88 /* Pad out to 56 mod 64 and append length */
89 index = sctx->count % SHA1_BLOCK_SIZE;
90 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
91
92 /* We need to fill a whole block for __sha1_sparc64_update() */
93 if (padlen <= 56) {
94 sctx->count += padlen;
95 memcpy(sctx->buffer + index, padding, padlen);
96 } else {
97 __sha1_sparc64_update(sctx, padding, padlen, index);
98 }
99 __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
100
101 /* Store state in digest */
102 for (i = 0; i < 5; i++)
103 dst[i] = cpu_to_be32(sctx->state[i]);
104
105 /* Wipe context */
106 memset(sctx, 0, sizeof(*sctx));
107
108 return 0;
109}
110
111static int sha1_sparc64_export(struct shash_desc *desc, void *out)
112{
113 struct sha1_state *sctx = shash_desc_ctx(desc);
114
115 memcpy(out, sctx, sizeof(*sctx));
116
117 return 0;
118}
119
120static int sha1_sparc64_import(struct shash_desc *desc, const void *in)
121{
122 struct sha1_state *sctx = shash_desc_ctx(desc);
123
124 memcpy(sctx, in, sizeof(*sctx));
125
126 return 0;
127}
128
129static struct shash_alg alg = {
130 .digestsize = SHA1_DIGEST_SIZE,
131 .init = sha1_sparc64_init,
132 .update = sha1_sparc64_update,
133 .final = sha1_sparc64_final,
134 .export = sha1_sparc64_export,
135 .import = sha1_sparc64_import,
136 .descsize = sizeof(struct sha1_state),
137 .statesize = sizeof(struct sha1_state),
138 .base = {
139 .cra_name = "sha1",
140 .cra_driver_name= "sha1-sparc64",
141 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
142 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
143 .cra_blocksize = SHA1_BLOCK_SIZE,
144 .cra_module = THIS_MODULE,
145 }
146};
147
148static bool __init sparc64_has_sha1_opcode(void)
149{
150 unsigned long cfr;
151
152 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
153 return false;
154
155 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
156 if (!(cfr & CFR_SHA1))
157 return false;
158
159 return true;
160}
161
162static int __init sha1_sparc64_mod_init(void)
163{
164 if (sparc64_has_sha1_opcode()) {
165 pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n");
166 return crypto_register_shash(&alg);
167 }
168 pr_info("sparc64 sha1 opcode not available.\n");
169 return -ENODEV;
170}
171
172static void __exit sha1_sparc64_mod_fini(void)
173{
174 crypto_unregister_shash(&alg);
175}
176
177module_init(sha1_sparc64_mod_init);
178module_exit(sha1_sparc64_mod_fini);
179
180MODULE_LICENSE("GPL");
181MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
182
183MODULE_ALIAS("sha1");
diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S
new file mode 100644
index 000000000000..b5f3d5826eb4
--- /dev/null
+++ b/arch/sparc/crypto/sha256_asm.S
@@ -0,0 +1,78 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6ENTRY(sha256_sparc64_transform)
7 /* %o0 = digest, %o1 = data, %o2 = rounds */
8 VISEntryHalf
9 ld [%o0 + 0x00], %f0
10 ld [%o0 + 0x04], %f1
11 ld [%o0 + 0x08], %f2
12 ld [%o0 + 0x0c], %f3
13 ld [%o0 + 0x10], %f4
14 ld [%o0 + 0x14], %f5
15 andcc %o1, 0x7, %g0
16 ld [%o0 + 0x18], %f6
17 bne,pn %xcc, 10f
18 ld [%o0 + 0x1c], %f7
19
201:
21 ldd [%o1 + 0x00], %f8
22 ldd [%o1 + 0x08], %f10
23 ldd [%o1 + 0x10], %f12
24 ldd [%o1 + 0x18], %f14
25 ldd [%o1 + 0x20], %f16
26 ldd [%o1 + 0x28], %f18
27 ldd [%o1 + 0x30], %f20
28 ldd [%o1 + 0x38], %f22
29
30 SHA256
31
32 subcc %o2, 1, %o2
33 bne,pt %xcc, 1b
34 add %o1, 0x40, %o1
35
365:
37 st %f0, [%o0 + 0x00]
38 st %f1, [%o0 + 0x04]
39 st %f2, [%o0 + 0x08]
40 st %f3, [%o0 + 0x0c]
41 st %f4, [%o0 + 0x10]
42 st %f5, [%o0 + 0x14]
43 st %f6, [%o0 + 0x18]
44 st %f7, [%o0 + 0x1c]
45 retl
46 VISExitHalf
4710:
48 alignaddr %o1, %g0, %o1
49
50 ldd [%o1 + 0x00], %f10
511:
52 ldd [%o1 + 0x08], %f12
53 ldd [%o1 + 0x10], %f14
54 ldd [%o1 + 0x18], %f16
55 ldd [%o1 + 0x20], %f18
56 ldd [%o1 + 0x28], %f20
57 ldd [%o1 + 0x30], %f22
58 ldd [%o1 + 0x38], %f24
59 ldd [%o1 + 0x40], %f26
60
61 faligndata %f10, %f12, %f8
62 faligndata %f12, %f14, %f10
63 faligndata %f14, %f16, %f12
64 faligndata %f16, %f18, %f14
65 faligndata %f18, %f20, %f16
66 faligndata %f20, %f22, %f18
67 faligndata %f22, %f24, %f20
68 faligndata %f24, %f26, %f22
69
70 SHA256
71
72 subcc %o2, 1, %o2
73 fsrc2 %f26, %f10
74 bne,pt %xcc, 1b
75 add %o1, 0x40, %o1
76
77 ba,a,pt %xcc, 5b
78ENDPROC(sha256_sparc64_transform)
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
new file mode 100644
index 000000000000..591e656bd891
--- /dev/null
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -0,0 +1,241 @@
1/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes.
2 *
3 * This is based largely upon crypto/sha256_generic.c
4 *
5 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
6 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
7 * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
8 * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
9 */
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13#include <crypto/internal/hash.h>
14#include <linux/init.h>
15#include <linux/module.h>
16#include <linux/mm.h>
17#include <linux/cryptohash.h>
18#include <linux/types.h>
19#include <crypto/sha.h>
20
21#include <asm/pstate.h>
22#include <asm/elf.h>
23
24#include "opcodes.h"
25
26asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data,
27 unsigned int rounds);
28
29static int sha224_sparc64_init(struct shash_desc *desc)
30{
31 struct sha256_state *sctx = shash_desc_ctx(desc);
32 sctx->state[0] = SHA224_H0;
33 sctx->state[1] = SHA224_H1;
34 sctx->state[2] = SHA224_H2;
35 sctx->state[3] = SHA224_H3;
36 sctx->state[4] = SHA224_H4;
37 sctx->state[5] = SHA224_H5;
38 sctx->state[6] = SHA224_H6;
39 sctx->state[7] = SHA224_H7;
40 sctx->count = 0;
41
42 return 0;
43}
44
45static int sha256_sparc64_init(struct shash_desc *desc)
46{
47 struct sha256_state *sctx = shash_desc_ctx(desc);
48 sctx->state[0] = SHA256_H0;
49 sctx->state[1] = SHA256_H1;
50 sctx->state[2] = SHA256_H2;
51 sctx->state[3] = SHA256_H3;
52 sctx->state[4] = SHA256_H4;
53 sctx->state[5] = SHA256_H5;
54 sctx->state[6] = SHA256_H6;
55 sctx->state[7] = SHA256_H7;
56 sctx->count = 0;
57
58 return 0;
59}
60
61static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data,
62 unsigned int len, unsigned int partial)
63{
64 unsigned int done = 0;
65
66 sctx->count += len;
67 if (partial) {
68 done = SHA256_BLOCK_SIZE - partial;
69 memcpy(sctx->buf + partial, data, done);
70 sha256_sparc64_transform(sctx->state, sctx->buf, 1);
71 }
72 if (len - done >= SHA256_BLOCK_SIZE) {
73 const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
74
75 sha256_sparc64_transform(sctx->state, data + done, rounds);
76 done += rounds * SHA256_BLOCK_SIZE;
77 }
78
79 memcpy(sctx->buf, data + done, len - done);
80}
81
82static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data,
83 unsigned int len)
84{
85 struct sha256_state *sctx = shash_desc_ctx(desc);
86 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
87
88 /* Handle the fast case right here */
89 if (partial + len < SHA256_BLOCK_SIZE) {
90 sctx->count += len;
91 memcpy(sctx->buf + partial, data, len);
92 } else
93 __sha256_sparc64_update(sctx, data, len, partial);
94
95 return 0;
96}
97
98static int sha256_sparc64_final(struct shash_desc *desc, u8 *out)
99{
100 struct sha256_state *sctx = shash_desc_ctx(desc);
101 unsigned int i, index, padlen;
102 __be32 *dst = (__be32 *)out;
103 __be64 bits;
104 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
105
106 bits = cpu_to_be64(sctx->count << 3);
107
108 /* Pad out to 56 mod 64 and append length */
109 index = sctx->count % SHA256_BLOCK_SIZE;
110 padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index);
111
112 /* We need to fill a whole block for __sha256_sparc64_update() */
113 if (padlen <= 56) {
114 sctx->count += padlen;
115 memcpy(sctx->buf + index, padding, padlen);
116 } else {
117 __sha256_sparc64_update(sctx, padding, padlen, index);
118 }
119 __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
120
121 /* Store state in digest */
122 for (i = 0; i < 8; i++)
123 dst[i] = cpu_to_be32(sctx->state[i]);
124
125 /* Wipe context */
126 memset(sctx, 0, sizeof(*sctx));
127
128 return 0;
129}
130
131static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash)
132{
133 u8 D[SHA256_DIGEST_SIZE];
134
135 sha256_sparc64_final(desc, D);
136
137 memcpy(hash, D, SHA224_DIGEST_SIZE);
138 memset(D, 0, SHA256_DIGEST_SIZE);
139
140 return 0;
141}
142
143static int sha256_sparc64_export(struct shash_desc *desc, void *out)
144{
145 struct sha256_state *sctx = shash_desc_ctx(desc);
146
147 memcpy(out, sctx, sizeof(*sctx));
148 return 0;
149}
150
151static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
152{
153 struct sha256_state *sctx = shash_desc_ctx(desc);
154
155 memcpy(sctx, in, sizeof(*sctx));
156 return 0;
157}
158
159static struct shash_alg sha256 = {
160 .digestsize = SHA256_DIGEST_SIZE,
161 .init = sha256_sparc64_init,
162 .update = sha256_sparc64_update,
163 .final = sha256_sparc64_final,
164 .export = sha256_sparc64_export,
165 .import = sha256_sparc64_import,
166 .descsize = sizeof(struct sha256_state),
167 .statesize = sizeof(struct sha256_state),
168 .base = {
169 .cra_name = "sha256",
170 .cra_driver_name= "sha256-sparc64",
171 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
172 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
173 .cra_blocksize = SHA256_BLOCK_SIZE,
174 .cra_module = THIS_MODULE,
175 }
176};
177
178static struct shash_alg sha224 = {
179 .digestsize = SHA224_DIGEST_SIZE,
180 .init = sha224_sparc64_init,
181 .update = sha256_sparc64_update,
182 .final = sha224_sparc64_final,
183 .descsize = sizeof(struct sha256_state),
184 .base = {
185 .cra_name = "sha224",
186 .cra_driver_name= "sha224-sparc64",
187 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
188 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
189 .cra_blocksize = SHA224_BLOCK_SIZE,
190 .cra_module = THIS_MODULE,
191 }
192};
193
194static bool __init sparc64_has_sha256_opcode(void)
195{
196 unsigned long cfr;
197
198 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
199 return false;
200
201 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
202 if (!(cfr & CFR_SHA256))
203 return false;
204
205 return true;
206}
207
208static int __init sha256_sparc64_mod_init(void)
209{
210 if (sparc64_has_sha256_opcode()) {
211 int ret = crypto_register_shash(&sha224);
212 if (ret < 0)
213 return ret;
214
215 ret = crypto_register_shash(&sha256);
216 if (ret < 0) {
217 crypto_unregister_shash(&sha224);
218 return ret;
219 }
220
221 pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n");
222 return 0;
223 }
224 pr_info("sparc64 sha256 opcode not available.\n");
225 return -ENODEV;
226}
227
228static void __exit sha256_sparc64_mod_fini(void)
229{
230 crypto_unregister_shash(&sha224);
231 crypto_unregister_shash(&sha256);
232}
233
234module_init(sha256_sparc64_mod_init);
235module_exit(sha256_sparc64_mod_fini);
236
237MODULE_LICENSE("GPL");
238MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated");
239
240MODULE_ALIAS("sha224");
241MODULE_ALIAS("sha256");
diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S
new file mode 100644
index 000000000000..54bfba713c0e
--- /dev/null
+++ b/arch/sparc/crypto/sha512_asm.S
@@ -0,0 +1,102 @@
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6ENTRY(sha512_sparc64_transform)
7 /* %o0 = digest, %o1 = data, %o2 = rounds */
8 VISEntry
9 ldd [%o0 + 0x00], %f0
10 ldd [%o0 + 0x08], %f2
11 ldd [%o0 + 0x10], %f4
12 ldd [%o0 + 0x18], %f6
13 ldd [%o0 + 0x20], %f8
14 ldd [%o0 + 0x28], %f10
15 andcc %o1, 0x7, %g0
16 ldd [%o0 + 0x30], %f12
17 bne,pn %xcc, 10f
18 ldd [%o0 + 0x38], %f14
19
201:
21 ldd [%o1 + 0x00], %f16
22 ldd [%o1 + 0x08], %f18
23 ldd [%o1 + 0x10], %f20
24 ldd [%o1 + 0x18], %f22
25 ldd [%o1 + 0x20], %f24
26 ldd [%o1 + 0x28], %f26
27 ldd [%o1 + 0x30], %f28
28 ldd [%o1 + 0x38], %f30
29 ldd [%o1 + 0x40], %f32
30 ldd [%o1 + 0x48], %f34
31 ldd [%o1 + 0x50], %f36
32 ldd [%o1 + 0x58], %f38
33 ldd [%o1 + 0x60], %f40
34 ldd [%o1 + 0x68], %f42
35 ldd [%o1 + 0x70], %f44
36 ldd [%o1 + 0x78], %f46
37
38 SHA512
39
40 subcc %o2, 1, %o2
41 bne,pt %xcc, 1b
42 add %o1, 0x80, %o1
43
445:
45 std %f0, [%o0 + 0x00]
46 std %f2, [%o0 + 0x08]
47 std %f4, [%o0 + 0x10]
48 std %f6, [%o0 + 0x18]
49 std %f8, [%o0 + 0x20]
50 std %f10, [%o0 + 0x28]
51 std %f12, [%o0 + 0x30]
52 std %f14, [%o0 + 0x38]
53 retl
54 VISExit
5510:
56 alignaddr %o1, %g0, %o1
57
58 ldd [%o1 + 0x00], %f18
591:
60 ldd [%o1 + 0x08], %f20
61 ldd [%o1 + 0x10], %f22
62 ldd [%o1 + 0x18], %f24
63 ldd [%o1 + 0x20], %f26
64 ldd [%o1 + 0x28], %f28
65 ldd [%o1 + 0x30], %f30
66 ldd [%o1 + 0x38], %f32
67 ldd [%o1 + 0x40], %f34
68 ldd [%o1 + 0x48], %f36
69 ldd [%o1 + 0x50], %f38
70 ldd [%o1 + 0x58], %f40
71 ldd [%o1 + 0x60], %f42
72 ldd [%o1 + 0x68], %f44
73 ldd [%o1 + 0x70], %f46
74 ldd [%o1 + 0x78], %f48
75 ldd [%o1 + 0x80], %f50
76
77 faligndata %f18, %f20, %f16
78 faligndata %f20, %f22, %f18
79 faligndata %f22, %f24, %f20
80 faligndata %f24, %f26, %f22
81 faligndata %f26, %f28, %f24
82 faligndata %f28, %f30, %f26
83 faligndata %f30, %f32, %f28
84 faligndata %f32, %f34, %f30
85 faligndata %f34, %f36, %f32
86 faligndata %f36, %f38, %f34
87 faligndata %f38, %f40, %f36
88 faligndata %f40, %f42, %f38
89 faligndata %f42, %f44, %f40
90 faligndata %f44, %f46, %f42
91 faligndata %f46, %f48, %f44
92 faligndata %f48, %f50, %f46
93
94 SHA512
95
96 subcc %o2, 1, %o2
97 fsrc2 %f50, %f18
98 bne,pt %xcc, 1b
99 add %o1, 0x80, %o1
100
101 ba,a,pt %xcc, 5b
102ENDPROC(sha512_sparc64_transform)
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
new file mode 100644
index 000000000000..486f0a2b7001
--- /dev/null
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -0,0 +1,226 @@
1/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes.
2 *
3 * This is based largely upon crypto/sha512_generic.c
4 *
5 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
6 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
7 * Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <crypto/internal/hash.h>
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/mm.h>
16#include <linux/cryptohash.h>
17#include <linux/types.h>
18#include <crypto/sha.h>
19
20#include <asm/pstate.h>
21#include <asm/elf.h>
22
23#include "opcodes.h"
24
25asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data,
26 unsigned int rounds);
27
28static int sha512_sparc64_init(struct shash_desc *desc)
29{
30 struct sha512_state *sctx = shash_desc_ctx(desc);
31 sctx->state[0] = SHA512_H0;
32 sctx->state[1] = SHA512_H1;
33 sctx->state[2] = SHA512_H2;
34 sctx->state[3] = SHA512_H3;
35 sctx->state[4] = SHA512_H4;
36 sctx->state[5] = SHA512_H5;
37 sctx->state[6] = SHA512_H6;
38 sctx->state[7] = SHA512_H7;
39 sctx->count[0] = sctx->count[1] = 0;
40
41 return 0;
42}
43
44static int sha384_sparc64_init(struct shash_desc *desc)
45{
46 struct sha512_state *sctx = shash_desc_ctx(desc);
47 sctx->state[0] = SHA384_H0;
48 sctx->state[1] = SHA384_H1;
49 sctx->state[2] = SHA384_H2;
50 sctx->state[3] = SHA384_H3;
51 sctx->state[4] = SHA384_H4;
52 sctx->state[5] = SHA384_H5;
53 sctx->state[6] = SHA384_H6;
54 sctx->state[7] = SHA384_H7;
55 sctx->count[0] = sctx->count[1] = 0;
56
57 return 0;
58}
59
60static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data,
61 unsigned int len, unsigned int partial)
62{
63 unsigned int done = 0;
64
65 if ((sctx->count[0] += len) < len)
66 sctx->count[1]++;
67 if (partial) {
68 done = SHA512_BLOCK_SIZE - partial;
69 memcpy(sctx->buf + partial, data, done);
70 sha512_sparc64_transform(sctx->state, sctx->buf, 1);
71 }
72 if (len - done >= SHA512_BLOCK_SIZE) {
73 const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
74
75 sha512_sparc64_transform(sctx->state, data + done, rounds);
76 done += rounds * SHA512_BLOCK_SIZE;
77 }
78
79 memcpy(sctx->buf, data + done, len - done);
80}
81
82static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data,
83 unsigned int len)
84{
85 struct sha512_state *sctx = shash_desc_ctx(desc);
86 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
87
88 /* Handle the fast case right here */
89 if (partial + len < SHA512_BLOCK_SIZE) {
90 if ((sctx->count[0] += len) < len)
91 sctx->count[1]++;
92 memcpy(sctx->buf + partial, data, len);
93 } else
94 __sha512_sparc64_update(sctx, data, len, partial);
95
96 return 0;
97}
98
99static int sha512_sparc64_final(struct shash_desc *desc, u8 *out)
100{
101 struct sha512_state *sctx = shash_desc_ctx(desc);
102 unsigned int i, index, padlen;
103 __be64 *dst = (__be64 *)out;
104 __be64 bits[2];
105 static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
106
107 /* Save number of bits */
108 bits[1] = cpu_to_be64(sctx->count[0] << 3);
109 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
110
111 /* Pad out to 112 mod 128 and append length */
112 index = sctx->count[0] % SHA512_BLOCK_SIZE;
113 padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index);
114
115 /* We need to fill a whole block for __sha512_sparc64_update() */
116 if (padlen <= 112) {
117 if ((sctx->count[0] += padlen) < padlen)
118 sctx->count[1]++;
119 memcpy(sctx->buf + index, padding, padlen);
120 } else {
121 __sha512_sparc64_update(sctx, padding, padlen, index);
122 }
123 __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112);
124
125 /* Store state in digest */
126 for (i = 0; i < 8; i++)
127 dst[i] = cpu_to_be64(sctx->state[i]);
128
129 /* Wipe context */
130 memset(sctx, 0, sizeof(*sctx));
131
132 return 0;
133}
134
135static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash)
136{
137 u8 D[64];
138
139 sha512_sparc64_final(desc, D);
140
141 memcpy(hash, D, 48);
142 memset(D, 0, 64);
143
144 return 0;
145}
146
147static struct shash_alg sha512 = {
148 .digestsize = SHA512_DIGEST_SIZE,
149 .init = sha512_sparc64_init,
150 .update = sha512_sparc64_update,
151 .final = sha512_sparc64_final,
152 .descsize = sizeof(struct sha512_state),
153 .base = {
154 .cra_name = "sha512",
155 .cra_driver_name= "sha512-sparc64",
156 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
157 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
158 .cra_blocksize = SHA512_BLOCK_SIZE,
159 .cra_module = THIS_MODULE,
160 }
161};
162
163static struct shash_alg sha384 = {
164 .digestsize = SHA384_DIGEST_SIZE,
165 .init = sha384_sparc64_init,
166 .update = sha512_sparc64_update,
167 .final = sha384_sparc64_final,
168 .descsize = sizeof(struct sha512_state),
169 .base = {
170 .cra_name = "sha384",
171 .cra_driver_name= "sha384-sparc64",
172 .cra_priority = SPARC_CR_OPCODE_PRIORITY,
173 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
174 .cra_blocksize = SHA384_BLOCK_SIZE,
175 .cra_module = THIS_MODULE,
176 }
177};
178
179static bool __init sparc64_has_sha512_opcode(void)
180{
181 unsigned long cfr;
182
183 if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
184 return false;
185
186 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
187 if (!(cfr & CFR_SHA512))
188 return false;
189
190 return true;
191}
192
193static int __init sha512_sparc64_mod_init(void)
194{
195 if (sparc64_has_sha512_opcode()) {
196 int ret = crypto_register_shash(&sha384);
197 if (ret < 0)
198 return ret;
199
200 ret = crypto_register_shash(&sha512);
201 if (ret < 0) {
202 crypto_unregister_shash(&sha384);
203 return ret;
204 }
205
206 pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n");
207 return 0;
208 }
209 pr_info("sparc64 sha512 opcode not available.\n");
210 return -ENODEV;
211}
212
213static void __exit sha512_sparc64_mod_fini(void)
214{
215 crypto_unregister_shash(&sha384);
216 crypto_unregister_shash(&sha512);
217}
218
219module_init(sha512_sparc64_mod_init);
220module_exit(sha512_sparc64_mod_fini);
221
222MODULE_LICENSE("GPL");
223MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated");
224
225MODULE_ALIAS("sha384");
226MODULE_ALIAS("sha512");
diff --git a/arch/sparc/include/asm/asi.h b/arch/sparc/include/asm/asi.h
index 61ebe7411ceb..cc0006dc5d4a 100644
--- a/arch/sparc/include/asm/asi.h
+++ b/arch/sparc/include/asm/asi.h
@@ -141,7 +141,8 @@
141/* SpitFire and later extended ASIs. The "(III)" marker designates 141/* SpitFire and later extended ASIs. The "(III)" marker designates
142 * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates 142 * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates
143 * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific 143 * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific
144 * ASIs, "(4V)" designates SUN4V specific ASIs. 144 * ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4
145 * and later ASIs.
145 */ 146 */
146#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ 147#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */
147#define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ 148#define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */
@@ -243,6 +244,7 @@
243#define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ 244#define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/
244#define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ 245#define ASI_INTR_R 0x7f /* IRQ vector dispatch read */
245#define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ 246#define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */
247#define ASI_PIC 0xb0 /* (NG4) PIC registers */
246#define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ 248#define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */
247#define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ 249#define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */
248#define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */ 250#define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 7df8b7f544d4..370ca1e71ffb 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -86,6 +86,15 @@
86#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ 86#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */
87#define AV_SPARC_ASI_CACHE_SPARING \ 87#define AV_SPARC_ASI_CACHE_SPARING \
88 0x00800000 /* cache sparing ASIs available */ 88 0x00800000 /* cache sparing ASIs available */
89#define AV_SPARC_PAUSE 0x01000000 /* PAUSE available */
90#define AV_SPARC_CBCOND 0x02000000 /* CBCOND insns available */
91
92/* Solaris decided to enumerate every single crypto instruction type
93 * in the AT_HWCAP bits. This is wasteful, since if crypto is present,
94 * you still need to look in the CFR register to see if the opcode is
95 * really available. So we simply advertise only "crypto" support.
96 */
97#define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */
89 98
90#define CORE_DUMP_USE_REGSET 99#define CORE_DUMP_USE_REGSET
91 100
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 015a761eaa32..ca121f0fa3ec 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2934,6 +2934,16 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
2934 unsigned long len); 2934 unsigned long len);
2935#endif 2935#endif
2936 2936
2937#define HV_FAST_VT_GET_PERFREG 0x184
2938#define HV_FAST_VT_SET_PERFREG 0x185
2939
2940#ifndef __ASSEMBLY__
2941extern unsigned long sun4v_vt_get_perfreg(unsigned long reg_num,
2942 unsigned long *reg_val);
2943extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
2944 unsigned long reg_val);
2945#endif
2946
2937/* Function numbers for HV_CORE_TRAP. */ 2947/* Function numbers for HV_CORE_TRAP. */
2938#define HV_CORE_SET_VER 0x00 2948#define HV_CORE_SET_VER 0x00
2939#define HV_CORE_PUTCHAR 0x01 2949#define HV_CORE_PUTCHAR 0x01
@@ -2964,6 +2974,7 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
2964#define HV_GRP_NIU 0x0204 2974#define HV_GRP_NIU 0x0204
2965#define HV_GRP_VF_CPU 0x0205 2975#define HV_GRP_VF_CPU 0x0205
2966#define HV_GRP_KT_CPU 0x0209 2976#define HV_GRP_KT_CPU 0x0209
2977#define HV_GRP_VT_CPU 0x020c
2967#define HV_GRP_DIAG 0x0300 2978#define HV_GRP_DIAG 0x0300
2968 2979
2969#ifndef __ASSEMBLY__ 2980#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/mdesc.h b/arch/sparc/include/asm/mdesc.h
index 9faa046713fb..139097f3a67b 100644
--- a/arch/sparc/include/asm/mdesc.h
+++ b/arch/sparc/include/asm/mdesc.h
@@ -73,6 +73,7 @@ extern void mdesc_register_notifier(struct mdesc_notifier_client *client);
73 73
74extern void mdesc_fill_in_cpu_data(cpumask_t *mask); 74extern void mdesc_fill_in_cpu_data(cpumask_t *mask);
75extern void mdesc_populate_present_mask(cpumask_t *mask); 75extern void mdesc_populate_present_mask(cpumask_t *mask);
76extern void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask);
76 77
77extern void sun4v_mdesc_init(void); 78extern void sun4v_mdesc_init(void);
78 79
diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h
index 288d7beba051..942bb17f60cd 100644
--- a/arch/sparc/include/asm/pcr.h
+++ b/arch/sparc/include/asm/pcr.h
@@ -2,8 +2,13 @@
2#define __PCR_H 2#define __PCR_H
3 3
4struct pcr_ops { 4struct pcr_ops {
5 u64 (*read)(void); 5 u64 (*read_pcr)(unsigned long);
6 void (*write)(u64); 6 void (*write_pcr)(unsigned long, u64);
7 u64 (*read_pic)(unsigned long);
8 void (*write_pic)(unsigned long, u64);
9 u64 (*nmi_picl_value)(unsigned int nmi_hz);
10 u64 pcr_nmi_enable;
11 u64 pcr_nmi_disable;
7}; 12};
8extern const struct pcr_ops *pcr_ops; 13extern const struct pcr_ops *pcr_ops;
9 14
@@ -27,21 +32,18 @@ extern void schedule_deferred_pcr_work(void);
27#define PCR_N2_SL1_SHIFT 27 32#define PCR_N2_SL1_SHIFT 27
28#define PCR_N2_OV1 0x80000000 33#define PCR_N2_OV1 0x80000000
29 34
30extern unsigned int picl_shift; 35#define PCR_N4_OV 0x00000001 /* PIC overflow */
31 36#define PCR_N4_TOE 0x00000002 /* Trap On Event */
32/* In order to commonize as much of the implementation as 37#define PCR_N4_UTRACE 0x00000004 /* Trace user events */
33 * possible, we use PICH as our counter. Mostly this is 38#define PCR_N4_STRACE 0x00000008 /* Trace supervisor events */
34 * to accommodate Niagara-1 which can only count insn cycles 39#define PCR_N4_HTRACE 0x00000010 /* Trace hypervisor events */
35 * in PICH. 40#define PCR_N4_MASK 0x000007e0 /* Event mask */
36 */ 41#define PCR_N4_MASK_SHIFT 5
37static inline u64 picl_value(unsigned int nmi_hz) 42#define PCR_N4_SL 0x0000f800 /* Event Select */
38{ 43#define PCR_N4_SL_SHIFT 11
39 u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift); 44#define PCR_N4_PICNPT 0x00010000 /* PIC non-privileged trap */
40 45#define PCR_N4_PICNHT 0x00020000 /* PIC non-hypervisor trap */
41 return ((u64)((0 - delta) & 0xffffffff)) << 32; 46#define PCR_N4_NTC 0x00040000 /* Next-To-Commit wrap */
42}
43
44extern u64 pcr_enable;
45 47
46extern int pcr_arch_init(void); 48extern int pcr_arch_init(void);
47 49
diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h
index 3332d2cba6c1..214feefa577c 100644
--- a/arch/sparc/include/asm/perfctr.h
+++ b/arch/sparc/include/asm/perfctr.h
@@ -54,11 +54,6 @@ enum perfctr_opcode {
54 PERFCTR_GETPCR 54 PERFCTR_GETPCR
55}; 55};
56 56
57/* I don't want the kernel's namespace to be polluted with this
58 * stuff when this file is included. --DaveM
59 */
60#ifndef __KERNEL__
61
62#define PRIV 0x00000001 57#define PRIV 0x00000001
63#define SYS 0x00000002 58#define SYS 0x00000002
64#define USR 0x00000004 59#define USR 0x00000004
@@ -168,29 +163,4 @@ struct vcounter_struct {
168 unsigned long long vcnt1; 163 unsigned long long vcnt1;
169}; 164};
170 165
171#else /* !(__KERNEL__) */
172
173#ifndef CONFIG_SPARC32
174
175/* Performance counter register access. */
176#define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p))
177#define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p))
178#define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p))
179
180/* Blackbird errata workaround. See commentary in
181 * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
182 * for more information.
183 */
184#define write_pic(__p) \
185 __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \
186 " nop\n\t" \
187 ".align 64\n" \
188 "99:wr %0, 0x0, %%pic\n\t" \
189 "rd %%pic, %%g0" : : "r" (__p))
190#define reset_pic() write_pic(0)
191
192#endif /* !CONFIG_SPARC32 */
193
194#endif /* !(__KERNEL__) */
195
196#endif /* !(PERF_COUNTER_API) */ 166#endif /* !(PERF_COUNTER_API) */
diff --git a/arch/sparc/include/asm/pstate.h b/arch/sparc/include/asm/pstate.h
index a26a53777bb0..4b6b998afd99 100644
--- a/arch/sparc/include/asm/pstate.h
+++ b/arch/sparc/include/asm/pstate.h
@@ -88,4 +88,18 @@
88#define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */ 88#define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */
89#define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/ 89#define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/
90 90
91/* Compatability Feature Register (%asr26), SPARC-T4 and later */
92#define CFR_AES _AC(0x0000000000000001,UL) /* Supports AES opcodes */
93#define CFR_DES _AC(0x0000000000000002,UL) /* Supports DES opcodes */
94#define CFR_KASUMI _AC(0x0000000000000004,UL) /* Supports KASUMI opcodes */
95#define CFR_CAMELLIA _AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/
96#define CFR_MD5 _AC(0x0000000000000010,UL) /* Supports MD5 opcodes */
97#define CFR_SHA1 _AC(0x0000000000000020,UL) /* Supports SHA1 opcodes */
98#define CFR_SHA256 _AC(0x0000000000000040,UL) /* Supports SHA256 opcodes */
99#define CFR_SHA512 _AC(0x0000000000000080,UL) /* Supports SHA512 opcodes */
100#define CFR_MPMUL _AC(0x0000000000000100,UL) /* Supports MPMUL opcodes */
101#define CFR_MONTMUL _AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */
102#define CFR_MONTSQR _AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */
103#define CFR_CRC32C _AC(0x0000000000000800,UL) /* Supports CRC32C opcodes */
104
91#endif /* !(_SPARC64_PSTATE_H) */ 105#endif /* !(_SPARC64_PSTATE_H) */
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index b42ddbf9651e..ee5dcced2499 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -559,10 +559,10 @@ niagara_tlb_fixup:
559 be,pt %xcc, niagara2_patch 559 be,pt %xcc, niagara2_patch
560 nop 560 nop
561 cmp %g1, SUN4V_CHIP_NIAGARA4 561 cmp %g1, SUN4V_CHIP_NIAGARA4
562 be,pt %xcc, niagara2_patch 562 be,pt %xcc, niagara4_patch
563 nop 563 nop
564 cmp %g1, SUN4V_CHIP_NIAGARA5 564 cmp %g1, SUN4V_CHIP_NIAGARA5
565 be,pt %xcc, niagara2_patch 565 be,pt %xcc, niagara4_patch
566 nop 566 nop
567 567
568 call generic_patch_copyops 568 call generic_patch_copyops
@@ -573,6 +573,16 @@ niagara_tlb_fixup:
573 nop 573 nop
574 574
575 ba,a,pt %xcc, 80f 575 ba,a,pt %xcc, 80f
576niagara4_patch:
577 call niagara4_patch_copyops
578 nop
579 call niagara_patch_bzero
580 nop
581 call niagara4_patch_pageops
582 nop
583
584 ba,a,pt %xcc, 80f
585
576niagara2_patch: 586niagara2_patch:
577 call niagara2_patch_copyops 587 call niagara2_patch_copyops
578 nop 588 nop
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 8593672838fd..1032df43ec95 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -45,6 +45,7 @@ static struct api_info api_table[] = {
45 { .group = HV_GRP_NIU, }, 45 { .group = HV_GRP_NIU, },
46 { .group = HV_GRP_VF_CPU, }, 46 { .group = HV_GRP_VF_CPU, },
47 { .group = HV_GRP_KT_CPU, }, 47 { .group = HV_GRP_KT_CPU, },
48 { .group = HV_GRP_VT_CPU, },
48 { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, 49 { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
49}; 50};
50 51
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index 58d60de4d65b..f3ab509b76a8 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set)
805 retl 805 retl
806 nop 806 nop
807ENDPROC(sun4v_reboot_data_set) 807ENDPROC(sun4v_reboot_data_set)
808
809ENTRY(sun4v_vt_get_perfreg)
810 mov %o1, %o4
811 mov HV_FAST_VT_GET_PERFREG, %o5
812 ta HV_FAST_TRAP
813 stx %o1, [%o4]
814 retl
815 nop
816ENDPROC(sun4v_vt_get_perfreg)
817
818ENTRY(sun4v_vt_set_perfreg)
819 mov HV_FAST_VT_SET_PERFREG, %o5
820 ta HV_FAST_TRAP
821 retl
822 nop
823ENDPROC(sun4v_vt_set_perfreg)
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index 79f310364849..0746e5e32b37 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -188,31 +188,26 @@ valid_addr_bitmap_patch:
188 be,pn %xcc, kvmap_dtlb_longpath 188 be,pn %xcc, kvmap_dtlb_longpath
189 189
1902: sethi %hi(kpte_linear_bitmap), %g2 1902: sethi %hi(kpte_linear_bitmap), %g2
191 or %g2, %lo(kpte_linear_bitmap), %g2
192 191
193 /* Get the 256MB physical address index. */ 192 /* Get the 256MB physical address index. */
194 sllx %g4, 21, %g5 193 sllx %g4, 21, %g5
195 mov 1, %g7 194 or %g2, %lo(kpte_linear_bitmap), %g2
196 srlx %g5, 21 + 28, %g5 195 srlx %g5, 21 + 28, %g5
196 and %g5, (32 - 1), %g7
197 197
198 /* Don't try this at home kids... this depends upon srlx 198 /* Divide by 32 to get the offset into the bitmask. */
199 * only taking the low 6 bits of the shift count in %g5. 199 srlx %g5, 5, %g5
200 */ 200 add %g7, %g7, %g7
201 sllx %g7, %g5, %g7
202
203 /* Divide by 64 to get the offset into the bitmask. */
204 srlx %g5, 6, %g5
205 sllx %g5, 3, %g5 201 sllx %g5, 3, %g5
206 202
207 /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ 203 /* kern_linear_pte_xor[(mask >> shift) & 3)] */
208 ldx [%g2 + %g5], %g2 204 ldx [%g2 + %g5], %g2
209 andcc %g2, %g7, %g0 205 srlx %g2, %g7, %g7
210 sethi %hi(kern_linear_pte_xor), %g5 206 sethi %hi(kern_linear_pte_xor), %g5
207 and %g7, 3, %g7
211 or %g5, %lo(kern_linear_pte_xor), %g5 208 or %g5, %lo(kern_linear_pte_xor), %g5
212 bne,a,pt %xcc, 1f 209 sllx %g7, 3, %g7
213 add %g5, 8, %g5 210 ldx [%g5 + %g7], %g2
214
2151: ldx [%g5], %g2
216 211
217 .globl kvmap_linear_patch 212 .globl kvmap_linear_patch
218kvmap_linear_patch: 213kvmap_linear_patch:
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 6dc796280589..831c001604e8 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)
817 mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); 817 mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
818} 818}
819 819
820static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
821{
822 const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL);
823 unsigned long *pgsz_mask = arg;
824 u64 val;
825
826 val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
827 HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
828 if (pgsz_prop)
829 val = *pgsz_prop;
830
831 if (!*pgsz_mask)
832 *pgsz_mask = val;
833 else
834 *pgsz_mask &= val;
835 return NULL;
836}
837
838void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask)
839{
840 *pgsz_mask = 0;
841 mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask);
842}
843
820static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) 844static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
821{ 845{
822 const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); 846 const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index eb1c1f010a47..6479256fd5a4 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -22,7 +22,6 @@
22#include <asm/perf_event.h> 22#include <asm/perf_event.h>
23#include <asm/ptrace.h> 23#include <asm/ptrace.h>
24#include <asm/pcr.h> 24#include <asm/pcr.h>
25#include <asm/perfctr.h>
26 25
27#include "kstack.h" 26#include "kstack.h"
28 27
@@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
109 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 108 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
110 touched = 1; 109 touched = 1;
111 else 110 else
112 pcr_ops->write(PCR_PIC_PRIV); 111 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
113 112
114 sum = local_cpu_data().irq0_irqs; 113 sum = local_cpu_data().irq0_irqs;
115 if (__get_cpu_var(nmi_touch)) { 114 if (__get_cpu_var(nmi_touch)) {
@@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
126 __this_cpu_write(alert_counter, 0); 125 __this_cpu_write(alert_counter, 0);
127 } 126 }
128 if (__get_cpu_var(wd_enabled)) { 127 if (__get_cpu_var(wd_enabled)) {
129 write_pic(picl_value(nmi_hz)); 128 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
130 pcr_ops->write(pcr_enable); 129 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
131 } 130 }
132 131
133 restore_hardirq_stack(orig_sp); 132 restore_hardirq_stack(orig_sp);
@@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
166 165
167void stop_nmi_watchdog(void *unused) 166void stop_nmi_watchdog(void *unused)
168{ 167{
169 pcr_ops->write(PCR_PIC_PRIV); 168 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
170 __get_cpu_var(wd_enabled) = 0; 169 __get_cpu_var(wd_enabled) = 0;
171 atomic_dec(&nmi_active); 170 atomic_dec(&nmi_active);
172} 171}
@@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused)
223 __get_cpu_var(wd_enabled) = 1; 222 __get_cpu_var(wd_enabled) = 1;
224 atomic_inc(&nmi_active); 223 atomic_inc(&nmi_active);
225 224
226 pcr_ops->write(PCR_PIC_PRIV); 225 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
227 write_pic(picl_value(nmi_hz)); 226 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
228 227
229 pcr_ops->write(pcr_enable); 228 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
230} 229}
231 230
232static void nmi_adjust_hz_one(void *unused) 231static void nmi_adjust_hz_one(void *unused)
@@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused)
234 if (!__get_cpu_var(wd_enabled)) 233 if (!__get_cpu_var(wd_enabled))
235 return; 234 return;
236 235
237 pcr_ops->write(PCR_PIC_PRIV); 236 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
238 write_pic(picl_value(nmi_hz)); 237 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
239 238
240 pcr_ops->write(pcr_enable); 239 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
241} 240}
242 241
243void nmi_adjust_hz(unsigned int new_hz) 242void nmi_adjust_hz(unsigned int new_hz)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 7661e84a05a0..051b69caeffd 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
594 printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", 594 printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
595 vdma[0], vdma[1]); 595 vdma[0], vdma[1]);
596 return -EINVAL; 596 return -EINVAL;
597 }; 597 }
598 598
599 dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); 599 dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
600 num_tsb_entries = vdma[1] / IO_PAGE_SIZE; 600 num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 0ce0dd2332aa..269af58497aa 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -13,23 +13,14 @@
13#include <asm/pil.h> 13#include <asm/pil.h>
14#include <asm/pcr.h> 14#include <asm/pcr.h>
15#include <asm/nmi.h> 15#include <asm/nmi.h>
16#include <asm/asi.h>
16#include <asm/spitfire.h> 17#include <asm/spitfire.h>
17#include <asm/perfctr.h>
18 18
19/* This code is shared between various users of the performance 19/* This code is shared between various users of the performance
20 * counters. Users will be oprofile, pseudo-NMI watchdog, and the 20 * counters. Users will be oprofile, pseudo-NMI watchdog, and the
21 * perf_event support layer. 21 * perf_event support layer.
22 */ 22 */
23 23
24#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
25#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \
26 PCR_N2_TOE_OV1 | \
27 (2 << PCR_N2_SL1_SHIFT) | \
28 (0xff << PCR_N2_MASK1_SHIFT))
29
30u64 pcr_enable;
31unsigned int picl_shift;
32
33/* Performance counter interrupts run unmasked at PIL level 15. 24/* Performance counter interrupts run unmasked at PIL level 15.
34 * Therefore we can't do things like wakeups and other work 25 * Therefore we can't do things like wakeups and other work
35 * that expects IRQ disabling to be adhered to in locking etc. 26 * that expects IRQ disabling to be adhered to in locking etc.
@@ -60,39 +51,144 @@ void arch_irq_work_raise(void)
60const struct pcr_ops *pcr_ops; 51const struct pcr_ops *pcr_ops;
61EXPORT_SYMBOL_GPL(pcr_ops); 52EXPORT_SYMBOL_GPL(pcr_ops);
62 53
63static u64 direct_pcr_read(void) 54static u64 direct_pcr_read(unsigned long reg_num)
64{ 55{
65 u64 val; 56 u64 val;
66 57
67 read_pcr(val); 58 WARN_ON_ONCE(reg_num != 0);
59 __asm__ __volatile__("rd %%pcr, %0" : "=r" (val));
68 return val; 60 return val;
69} 61}
70 62
71static void direct_pcr_write(u64 val) 63static void direct_pcr_write(unsigned long reg_num, u64 val)
64{
65 WARN_ON_ONCE(reg_num != 0);
66 __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val));
67}
68
69static u64 direct_pic_read(unsigned long reg_num)
72{ 70{
73 write_pcr(val); 71 u64 val;
72
73 WARN_ON_ONCE(reg_num != 0);
74 __asm__ __volatile__("rd %%pic, %0" : "=r" (val));
75 return val;
76}
77
78static void direct_pic_write(unsigned long reg_num, u64 val)
79{
80 WARN_ON_ONCE(reg_num != 0);
81
82 /* Blackbird errata workaround. See commentary in
83 * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
84 * for more information.
85 */
86 __asm__ __volatile__("ba,pt %%xcc, 99f\n\t"
87 " nop\n\t"
88 ".align 64\n"
89 "99:wr %0, 0x0, %%pic\n\t"
90 "rd %%pic, %%g0" : : "r" (val));
91}
92
93static u64 direct_picl_value(unsigned int nmi_hz)
94{
95 u32 delta = local_cpu_data().clock_tick / nmi_hz;
96
97 return ((u64)((0 - delta) & 0xffffffff)) << 32;
74} 98}
75 99
76static const struct pcr_ops direct_pcr_ops = { 100static const struct pcr_ops direct_pcr_ops = {
77 .read = direct_pcr_read, 101 .read_pcr = direct_pcr_read,
78 .write = direct_pcr_write, 102 .write_pcr = direct_pcr_write,
103 .read_pic = direct_pic_read,
104 .write_pic = direct_pic_write,
105 .nmi_picl_value = direct_picl_value,
106 .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE),
107 .pcr_nmi_disable = PCR_PIC_PRIV,
79}; 108};
80 109
81static void n2_pcr_write(u64 val) 110static void n2_pcr_write(unsigned long reg_num, u64 val)
82{ 111{
83 unsigned long ret; 112 unsigned long ret;
84 113
114 WARN_ON_ONCE(reg_num != 0);
85 if (val & PCR_N2_HTRACE) { 115 if (val & PCR_N2_HTRACE) {
86 ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); 116 ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
87 if (ret != HV_EOK) 117 if (ret != HV_EOK)
88 write_pcr(val); 118 direct_pcr_write(reg_num, val);
89 } else 119 } else
90 write_pcr(val); 120 direct_pcr_write(reg_num, val);
121}
122
123static u64 n2_picl_value(unsigned int nmi_hz)
124{
125 u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
126
127 return ((u64)((0 - delta) & 0xffffffff)) << 32;
91} 128}
92 129
93static const struct pcr_ops n2_pcr_ops = { 130static const struct pcr_ops n2_pcr_ops = {
94 .read = direct_pcr_read, 131 .read_pcr = direct_pcr_read,
95 .write = n2_pcr_write, 132 .write_pcr = n2_pcr_write,
133 .read_pic = direct_pic_read,
134 .write_pic = direct_pic_write,
135 .nmi_picl_value = n2_picl_value,
136 .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE |
137 PCR_N2_TOE_OV1 |
138 (2 << PCR_N2_SL1_SHIFT) |
139 (0xff << PCR_N2_MASK1_SHIFT)),
140 .pcr_nmi_disable = PCR_PIC_PRIV,
141};
142
143static u64 n4_pcr_read(unsigned long reg_num)
144{
145 unsigned long val;
146
147 (void) sun4v_vt_get_perfreg(reg_num, &val);
148
149 return val;
150}
151
152static void n4_pcr_write(unsigned long reg_num, u64 val)
153{
154 (void) sun4v_vt_set_perfreg(reg_num, val);
155}
156
157static u64 n4_pic_read(unsigned long reg_num)
158{
159 unsigned long val;
160
161 __asm__ __volatile__("ldxa [%1] %2, %0"
162 : "=r" (val)
163 : "r" (reg_num * 0x8UL), "i" (ASI_PIC));
164
165 return val;
166}
167
168static void n4_pic_write(unsigned long reg_num, u64 val)
169{
170 __asm__ __volatile__("stxa %0, [%1] %2"
171 : /* no outputs */
172 : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC));
173}
174
175static u64 n4_picl_value(unsigned int nmi_hz)
176{
177 u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
178
179 return ((u64)((0 - delta) & 0xffffffff));
180}
181
182static const struct pcr_ops n4_pcr_ops = {
183 .read_pcr = n4_pcr_read,
184 .write_pcr = n4_pcr_write,
185 .read_pic = n4_pic_read,
186 .write_pic = n4_pic_write,
187 .nmi_picl_value = n4_picl_value,
188 .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
189 PCR_N4_UTRACE | PCR_N4_TOE |
190 (26 << PCR_N4_SL_SHIFT)),
191 .pcr_nmi_disable = PCR_N4_PICNPT,
96}; 192};
97 193
98static unsigned long perf_hsvc_group; 194static unsigned long perf_hsvc_group;
@@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void)
115 perf_hsvc_group = HV_GRP_KT_CPU; 211 perf_hsvc_group = HV_GRP_KT_CPU;
116 break; 212 break;
117 213
214 case SUN4V_CHIP_NIAGARA4:
215 perf_hsvc_group = HV_GRP_VT_CPU;
216 break;
217
118 default: 218 default:
119 return -ENODEV; 219 return -ENODEV;
120 } 220 }
@@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void)
139 sun4v_hvapi_unregister(perf_hsvc_group); 239 sun4v_hvapi_unregister(perf_hsvc_group);
140} 240}
141 241
242static int __init setup_sun4v_pcr_ops(void)
243{
244 int ret = 0;
245
246 switch (sun4v_chip_type) {
247 case SUN4V_CHIP_NIAGARA1:
248 case SUN4V_CHIP_NIAGARA2:
249 case SUN4V_CHIP_NIAGARA3:
250 pcr_ops = &n2_pcr_ops;
251 break;
252
253 case SUN4V_CHIP_NIAGARA4:
254 pcr_ops = &n4_pcr_ops;
255 break;
256
257 default:
258 ret = -ENODEV;
259 break;
260 }
261
262 return ret;
263}
264
142int __init pcr_arch_init(void) 265int __init pcr_arch_init(void)
143{ 266{
144 int err = register_perf_hsvc(); 267 int err = register_perf_hsvc();
@@ -148,15 +271,14 @@ int __init pcr_arch_init(void)
148 271
149 switch (tlb_type) { 272 switch (tlb_type) {
150 case hypervisor: 273 case hypervisor:
151 pcr_ops = &n2_pcr_ops; 274 err = setup_sun4v_pcr_ops();
152 pcr_enable = PCR_N2_ENABLE; 275 if (err)
153 picl_shift = 2; 276 goto out_unregister;
154 break; 277 break;
155 278
156 case cheetah: 279 case cheetah:
157 case cheetah_plus: 280 case cheetah_plus:
158 pcr_ops = &direct_pcr_ops; 281 pcr_ops = &direct_pcr_ops;
159 pcr_enable = PCR_SUN4U_ENABLE;
160 break; 282 break;
161 283
162 case spitfire: 284 case spitfire:
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 5713957dcb8a..e48651dace1b 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -25,36 +25,48 @@
25#include <linux/atomic.h> 25#include <linux/atomic.h>
26#include <asm/nmi.h> 26#include <asm/nmi.h>
27#include <asm/pcr.h> 27#include <asm/pcr.h>
28#include <asm/perfctr.h>
29#include <asm/cacheflush.h> 28#include <asm/cacheflush.h>
30 29
31#include "kernel.h" 30#include "kernel.h"
32#include "kstack.h" 31#include "kstack.h"
33 32
34/* Sparc64 chips have two performance counters, 32-bits each, with 33/* Two classes of sparc64 chips currently exist. All of which have
35 * overflow interrupts generated on transition from 0xffffffff to 0. 34 * 32-bit counters which can generate overflow interrupts on the
36 * The counters are accessed in one go using a 64-bit register. 35 * transition from 0xffffffff to 0.
37 * 36 *
38 * Both counters are controlled using a single control register. The 37 * All chips upto and including SPARC-T3 have two performance
39 * only way to stop all sampling is to clear all of the context (user, 38 * counters. The two 32-bit counters are accessed in one go using a
40 * supervisor, hypervisor) sampling enable bits. But these bits apply 39 * single 64-bit register.
41 * to both counters, thus the two counters can't be enabled/disabled
42 * individually.
43 * 40 *
44 * The control register has two event fields, one for each of the two 41 * On these older chips both counters are controlled using a single
45 * counters. It's thus nearly impossible to have one counter going 42 * control register. The only way to stop all sampling is to clear
46 * while keeping the other one stopped. Therefore it is possible to 43 * all of the context (user, supervisor, hypervisor) sampling enable
47 * get overflow interrupts for counters not currently "in use" and 44 * bits. But these bits apply to both counters, thus the two counters
48 * that condition must be checked in the overflow interrupt handler. 45 * can't be enabled/disabled individually.
46 *
47 * Furthermore, the control register on these older chips have two
48 * event fields, one for each of the two counters. It's thus nearly
49 * impossible to have one counter going while keeping the other one
50 * stopped. Therefore it is possible to get overflow interrupts for
51 * counters not currently "in use" and that condition must be checked
52 * in the overflow interrupt handler.
49 * 53 *
50 * So we use a hack, in that we program inactive counters with the 54 * So we use a hack, in that we program inactive counters with the
51 * "sw_count0" and "sw_count1" events. These count how many times 55 * "sw_count0" and "sw_count1" events. These count how many times
52 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an 56 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
53 * unusual way to encode a NOP and therefore will not trigger in 57 * unusual way to encode a NOP and therefore will not trigger in
54 * normal code. 58 * normal code.
59 *
60 * Starting with SPARC-T4 we have one control register per counter.
61 * And the counters are stored in individual registers. The registers
62 * for the counters are 64-bit but only a 32-bit counter is
63 * implemented. The event selections on SPARC-T4 lack any
64 * restrictions, therefore we can elide all of the complicated
65 * conflict resolution code we have for SPARC-T3 and earlier chips.
55 */ 66 */
56 67
57#define MAX_HWEVENTS 2 68#define MAX_HWEVENTS 4
69#define MAX_PCRS 4
58#define MAX_PERIOD ((1UL << 32) - 1) 70#define MAX_PERIOD ((1UL << 32) - 1)
59 71
60#define PIC_UPPER_INDEX 0 72#define PIC_UPPER_INDEX 0
@@ -90,8 +102,8 @@ struct cpu_hw_events {
90 */ 102 */
91 int current_idx[MAX_HWEVENTS]; 103 int current_idx[MAX_HWEVENTS];
92 104
93 /* Software copy of %pcr register on this cpu. */ 105 /* Software copy of %pcr register(s) on this cpu. */
94 u64 pcr; 106 u64 pcr[MAX_HWEVENTS];
95 107
96 /* Enabled/disable state. */ 108 /* Enabled/disable state. */
97 int enabled; 109 int enabled;
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
103/* An event map describes the characteristics of a performance 115/* An event map describes the characteristics of a performance
104 * counter event. In particular it gives the encoding as well as 116 * counter event. In particular it gives the encoding as well as
105 * a mask telling which counters the event can be measured on. 117 * a mask telling which counters the event can be measured on.
118 *
119 * The mask is unused on SPARC-T4 and later.
106 */ 120 */
107struct perf_event_map { 121struct perf_event_map {
108 u16 encoding; 122 u16 encoding;
@@ -142,15 +156,53 @@ struct sparc_pmu {
142 const struct perf_event_map *(*event_map)(int); 156 const struct perf_event_map *(*event_map)(int);
143 const cache_map_t *cache_map; 157 const cache_map_t *cache_map;
144 int max_events; 158 int max_events;
159 u32 (*read_pmc)(int);
160 void (*write_pmc)(int, u64);
145 int upper_shift; 161 int upper_shift;
146 int lower_shift; 162 int lower_shift;
147 int event_mask; 163 int event_mask;
164 int user_bit;
165 int priv_bit;
148 int hv_bit; 166 int hv_bit;
149 int irq_bit; 167 int irq_bit;
150 int upper_nop; 168 int upper_nop;
151 int lower_nop; 169 int lower_nop;
170 unsigned int flags;
171#define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001
172#define SPARC_PMU_HAS_CONFLICTS 0x00000002
173 int max_hw_events;
174 int num_pcrs;
175 int num_pic_regs;
152}; 176};
153 177
178static u32 sparc_default_read_pmc(int idx)
179{
180 u64 val;
181
182 val = pcr_ops->read_pic(0);
183 if (idx == PIC_UPPER_INDEX)
184 val >>= 32;
185
186 return val & 0xffffffff;
187}
188
189static void sparc_default_write_pmc(int idx, u64 val)
190{
191 u64 shift, mask, pic;
192
193 shift = 0;
194 if (idx == PIC_UPPER_INDEX)
195 shift = 32;
196
197 mask = ((u64) 0xffffffff) << shift;
198 val <<= shift;
199
200 pic = pcr_ops->read_pic(0);
201 pic &= ~mask;
202 pic |= val;
203 pcr_ops->write_pic(0, pic);
204}
205
154static const struct perf_event_map ultra3_perfmon_event_map[] = { 206static const struct perf_event_map ultra3_perfmon_event_map[] = {
155 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, 207 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
156 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, 208 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = {
268 .event_map = ultra3_event_map, 320 .event_map = ultra3_event_map,
269 .cache_map = &ultra3_cache_map, 321 .cache_map = &ultra3_cache_map,
270 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), 322 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map),
323 .read_pmc = sparc_default_read_pmc,
324 .write_pmc = sparc_default_write_pmc,
271 .upper_shift = 11, 325 .upper_shift = 11,
272 .lower_shift = 4, 326 .lower_shift = 4,
273 .event_mask = 0x3f, 327 .event_mask = 0x3f,
328 .user_bit = PCR_UTRACE,
329 .priv_bit = PCR_STRACE,
274 .upper_nop = 0x1c, 330 .upper_nop = 0x1c,
275 .lower_nop = 0x14, 331 .lower_nop = 0x14,
332 .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
333 SPARC_PMU_HAS_CONFLICTS),
334 .max_hw_events = 2,
335 .num_pcrs = 1,
336 .num_pic_regs = 1,
276}; 337};
277 338
278/* Niagara1 is very limited. The upper PIC is hard-locked to count 339/* Niagara1 is very limited. The upper PIC is hard-locked to count
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = {
397 .event_map = niagara1_event_map, 458 .event_map = niagara1_event_map,
398 .cache_map = &niagara1_cache_map, 459 .cache_map = &niagara1_cache_map,
399 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), 460 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map),
461 .read_pmc = sparc_default_read_pmc,
462 .write_pmc = sparc_default_write_pmc,
400 .upper_shift = 0, 463 .upper_shift = 0,
401 .lower_shift = 4, 464 .lower_shift = 4,
402 .event_mask = 0x7, 465 .event_mask = 0x7,
466 .user_bit = PCR_UTRACE,
467 .priv_bit = PCR_STRACE,
403 .upper_nop = 0x0, 468 .upper_nop = 0x0,
404 .lower_nop = 0x0, 469 .lower_nop = 0x0,
470 .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
471 SPARC_PMU_HAS_CONFLICTS),
472 .max_hw_events = 2,
473 .num_pcrs = 1,
474 .num_pic_regs = 1,
405}; 475};
406 476
407static const struct perf_event_map niagara2_perfmon_event_map[] = { 477static const struct perf_event_map niagara2_perfmon_event_map[] = {
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = {
523 .event_map = niagara2_event_map, 593 .event_map = niagara2_event_map,
524 .cache_map = &niagara2_cache_map, 594 .cache_map = &niagara2_cache_map,
525 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), 595 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
596 .read_pmc = sparc_default_read_pmc,
597 .write_pmc = sparc_default_write_pmc,
526 .upper_shift = 19, 598 .upper_shift = 19,
527 .lower_shift = 6, 599 .lower_shift = 6,
528 .event_mask = 0xfff, 600 .event_mask = 0xfff,
529 .hv_bit = 0x8, 601 .user_bit = PCR_UTRACE,
602 .priv_bit = PCR_STRACE,
603 .hv_bit = PCR_N2_HTRACE,
530 .irq_bit = 0x30, 604 .irq_bit = 0x30,
531 .upper_nop = 0x220, 605 .upper_nop = 0x220,
532 .lower_nop = 0x220, 606 .lower_nop = 0x220,
607 .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
608 SPARC_PMU_HAS_CONFLICTS),
609 .max_hw_events = 2,
610 .num_pcrs = 1,
611 .num_pic_regs = 1,
612};
613
614static const struct perf_event_map niagara4_perfmon_event_map[] = {
615 [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) },
616 [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f },
617 [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 },
618 [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 },
619 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 },
620 [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f },
621};
622
623static const struct perf_event_map *niagara4_event_map(int event_id)
624{
625 return &niagara4_perfmon_event_map[event_id];
626}
627
628static const cache_map_t niagara4_cache_map = {
629[C(L1D)] = {
630 [C(OP_READ)] = {
631 [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
632 [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
633 },
634 [C(OP_WRITE)] = {
635 [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
636 [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
637 },
638 [C(OP_PREFETCH)] = {
639 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
640 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
641 },
642},
643[C(L1I)] = {
644 [C(OP_READ)] = {
645 [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f },
646 [C(RESULT_MISS)] = { (11 << 6) | 0x03 },
647 },
648 [ C(OP_WRITE) ] = {
649 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
650 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
651 },
652 [ C(OP_PREFETCH) ] = {
653 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
654 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
655 },
656},
657[C(LL)] = {
658 [C(OP_READ)] = {
659 [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
660 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
661 },
662 [C(OP_WRITE)] = {
663 [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
664 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
665 },
666 [C(OP_PREFETCH)] = {
667 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
668 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
669 },
670},
671[C(DTLB)] = {
672 [C(OP_READ)] = {
673 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
674 [C(RESULT_MISS)] = { (17 << 6) | 0x3f },
675 },
676 [ C(OP_WRITE) ] = {
677 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
678 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
679 },
680 [ C(OP_PREFETCH) ] = {
681 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
682 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
683 },
684},
685[C(ITLB)] = {
686 [C(OP_READ)] = {
687 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
688 [C(RESULT_MISS)] = { (6 << 6) | 0x3f },
689 },
690 [ C(OP_WRITE) ] = {
691 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
692 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
693 },
694 [ C(OP_PREFETCH) ] = {
695 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
696 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
697 },
698},
699[C(BPU)] = {
700 [C(OP_READ)] = {
701 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
702 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
703 },
704 [ C(OP_WRITE) ] = {
705 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
706 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
707 },
708 [ C(OP_PREFETCH) ] = {
709 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
710 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
711 },
712},
713[C(NODE)] = {
714 [C(OP_READ)] = {
715 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
716 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
717 },
718 [ C(OP_WRITE) ] = {
719 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
720 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
721 },
722 [ C(OP_PREFETCH) ] = {
723 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
724 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
725 },
726},
727};
728
729static u32 sparc_vt_read_pmc(int idx)
730{
731 u64 val = pcr_ops->read_pic(idx);
732
733 return val & 0xffffffff;
734}
735
736static void sparc_vt_write_pmc(int idx, u64 val)
737{
738 u64 pcr;
739
740 /* There seems to be an internal latch on the overflow event
741 * on SPARC-T4 that prevents it from triggering unless you
742 * update the PIC exactly as we do here. The requirement
743 * seems to be that you have to turn off event counting in the
744 * PCR around the PIC update.
745 *
746 * For example, after the following sequence:
747 *
748 * 1) set PIC to -1
749 * 2) enable event counting and overflow reporting in PCR
750 * 3) overflow triggers, softint 15 handler invoked
751 * 4) clear OV bit in PCR
752 * 5) write PIC to -1
753 *
754 * a subsequent overflow event will not trigger. This
755 * sequence works on SPARC-T3 and previous chips.
756 */
757 pcr = pcr_ops->read_pcr(idx);
758 pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
759
760 pcr_ops->write_pic(idx, val & 0xffffffff);
761
762 pcr_ops->write_pcr(idx, pcr);
763}
764
765static const struct sparc_pmu niagara4_pmu = {
766 .event_map = niagara4_event_map,
767 .cache_map = &niagara4_cache_map,
768 .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
769 .read_pmc = sparc_vt_read_pmc,
770 .write_pmc = sparc_vt_write_pmc,
771 .upper_shift = 5,
772 .lower_shift = 5,
773 .event_mask = 0x7ff,
774 .user_bit = PCR_N4_UTRACE,
775 .priv_bit = PCR_N4_STRACE,
776
777 /* We explicitly don't support hypervisor tracing. The T4
778 * generates the overflow event for precise events via a trap
779 * which will not be generated (ie. it's completely lost) if
780 * we happen to be in the hypervisor when the event triggers.
781 * Essentially, the overflow event reporting is completely
782 * unusable when you have hypervisor mode tracing enabled.
783 */
784 .hv_bit = 0,
785
786 .irq_bit = PCR_N4_TOE,
787 .upper_nop = 0,
788 .lower_nop = 0,
789 .flags = 0,
790 .max_hw_events = 4,
791 .num_pcrs = 4,
792 .num_pic_regs = 4,
533}; 793};
534 794
535static const struct sparc_pmu *sparc_pmu __read_mostly; 795static const struct sparc_pmu *sparc_pmu __read_mostly;
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx)
558static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 818static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
559{ 819{
560 u64 val, mask = mask_for_index(idx); 820 u64 val, mask = mask_for_index(idx);
821 int pcr_index = 0;
561 822
562 val = cpuc->pcr; 823 if (sparc_pmu->num_pcrs > 1)
824 pcr_index = idx;
825
826 val = cpuc->pcr[pcr_index];
563 val &= ~mask; 827 val &= ~mask;
564 val |= hwc->config; 828 val |= hwc->config;
565 cpuc->pcr = val; 829 cpuc->pcr[pcr_index] = val;
566 830
567 pcr_ops->write(cpuc->pcr); 831 pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
568} 832}
569 833
570static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 834static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
571{ 835{
572 u64 mask = mask_for_index(idx); 836 u64 mask = mask_for_index(idx);
573 u64 nop = nop_for_index(idx); 837 u64 nop = nop_for_index(idx);
838 int pcr_index = 0;
574 u64 val; 839 u64 val;
575 840
576 val = cpuc->pcr; 841 if (sparc_pmu->num_pcrs > 1)
842 pcr_index = idx;
843
844 val = cpuc->pcr[pcr_index];
577 val &= ~mask; 845 val &= ~mask;
578 val |= nop; 846 val |= nop;
579 cpuc->pcr = val; 847 cpuc->pcr[pcr_index] = val;
580 848
581 pcr_ops->write(cpuc->pcr); 849 pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
582}
583
584static u32 read_pmc(int idx)
585{
586 u64 val;
587
588 read_pic(val);
589 if (idx == PIC_UPPER_INDEX)
590 val >>= 32;
591
592 return val & 0xffffffff;
593}
594
595static void write_pmc(int idx, u64 val)
596{
597 u64 shift, mask, pic;
598
599 shift = 0;
600 if (idx == PIC_UPPER_INDEX)
601 shift = 32;
602
603 mask = ((u64) 0xffffffff) << shift;
604 val <<= shift;
605
606 read_pic(pic);
607 pic &= ~mask;
608 pic |= val;
609 write_pic(pic);
610} 850}
611 851
612static u64 sparc_perf_event_update(struct perf_event *event, 852static u64 sparc_perf_event_update(struct perf_event *event,
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event,
618 858
619again: 859again:
620 prev_raw_count = local64_read(&hwc->prev_count); 860 prev_raw_count = local64_read(&hwc->prev_count);
621 new_raw_count = read_pmc(idx); 861 new_raw_count = sparc_pmu->read_pmc(idx);
622 862
623 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 863 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
624 new_raw_count) != prev_raw_count) 864 new_raw_count) != prev_raw_count)
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event,
658 898
659 local64_set(&hwc->prev_count, (u64)-left); 899 local64_set(&hwc->prev_count, (u64)-left);
660 900
661 write_pmc(idx, (u64)(-left) & 0xffffffff); 901 sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);
662 902
663 perf_event_update_userpage(event); 903 perf_event_update_userpage(event);
664 904
665 return ret; 905 return ret;
666} 906}
667 907
668/* If performance event entries have been added, move existing 908static void read_in_all_counters(struct cpu_hw_events *cpuc)
669 * events around (if necessary) and then assign new entries to
670 * counters.
671 */
672static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
673{ 909{
674 int i; 910 int i;
675 911
676 if (!cpuc->n_added)
677 goto out;
678
679 /* Read in the counters which are moving. */
680 for (i = 0; i < cpuc->n_events; i++) { 912 for (i = 0; i < cpuc->n_events; i++) {
681 struct perf_event *cp = cpuc->event[i]; 913 struct perf_event *cp = cpuc->event[i];
682 914
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
687 cpuc->current_idx[i] = PIC_NO_INDEX; 919 cpuc->current_idx[i] = PIC_NO_INDEX;
688 } 920 }
689 } 921 }
922}
923
924/* On this PMU all PICs are programmed using a single PCR. Calculate
925 * the combined control register value.
926 *
927 * For such chips we require that all of the events have the same
928 * configuration, so just fetch the settings from the first entry.
929 */
930static void calculate_single_pcr(struct cpu_hw_events *cpuc)
931{
932 int i;
933
934 if (!cpuc->n_added)
935 goto out;
690 936
691 /* Assign to counters all unassigned events. */ 937 /* Assign to counters all unassigned events. */
692 for (i = 0; i < cpuc->n_events; i++) { 938 for (i = 0; i < cpuc->n_events; i++) {
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
702 cpuc->current_idx[i] = idx; 948 cpuc->current_idx[i] = idx;
703 949
704 enc = perf_event_get_enc(cpuc->events[i]); 950 enc = perf_event_get_enc(cpuc->events[i]);
705 pcr &= ~mask_for_index(idx); 951 cpuc->pcr[0] &= ~mask_for_index(idx);
706 if (hwc->state & PERF_HES_STOPPED) 952 if (hwc->state & PERF_HES_STOPPED)
707 pcr |= nop_for_index(idx); 953 cpuc->pcr[0] |= nop_for_index(idx);
708 else 954 else
709 pcr |= event_encoding(enc, idx); 955 cpuc->pcr[0] |= event_encoding(enc, idx);
710 } 956 }
711out: 957out:
712 return pcr; 958 cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
959}
960
961/* On this PMU each PIC has it's own PCR control register. */
962static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
963{
964 int i;
965
966 if (!cpuc->n_added)
967 goto out;
968
969 for (i = 0; i < cpuc->n_events; i++) {
970 struct perf_event *cp = cpuc->event[i];
971 struct hw_perf_event *hwc = &cp->hw;
972 int idx = hwc->idx;
973 u64 enc;
974
975 if (cpuc->current_idx[i] != PIC_NO_INDEX)
976 continue;
977
978 sparc_perf_event_set_period(cp, hwc, idx);
979 cpuc->current_idx[i] = idx;
980
981 enc = perf_event_get_enc(cpuc->events[i]);
982 cpuc->pcr[idx] &= ~mask_for_index(idx);
983 if (hwc->state & PERF_HES_STOPPED)
984 cpuc->pcr[idx] |= nop_for_index(idx);
985 else
986 cpuc->pcr[idx] |= event_encoding(enc, idx);
987 }
988out:
989 for (i = 0; i < cpuc->n_events; i++) {
990 struct perf_event *cp = cpuc->event[i];
991 int idx = cp->hw.idx;
992
993 cpuc->pcr[idx] |= cp->hw.config_base;
994 }
995}
996
997/* If performance event entries have been added, move existing events
998 * around (if necessary) and then assign new entries to counters.
999 */
1000static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
1001{
1002 if (cpuc->n_added)
1003 read_in_all_counters(cpuc);
1004
1005 if (sparc_pmu->num_pcrs == 1) {
1006 calculate_single_pcr(cpuc);
1007 } else {
1008 calculate_multiple_pcrs(cpuc);
1009 }
713} 1010}
714 1011
715static void sparc_pmu_enable(struct pmu *pmu) 1012static void sparc_pmu_enable(struct pmu *pmu)
716{ 1013{
717 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1014 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
718 u64 pcr; 1015 int i;
719 1016
720 if (cpuc->enabled) 1017 if (cpuc->enabled)
721 return; 1018 return;
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu)
723 cpuc->enabled = 1; 1020 cpuc->enabled = 1;
724 barrier(); 1021 barrier();
725 1022
726 pcr = cpuc->pcr; 1023 if (cpuc->n_events)
727 if (!cpuc->n_events) { 1024 update_pcrs_for_enable(cpuc);
728 pcr = 0;
729 } else {
730 pcr = maybe_change_configuration(cpuc, pcr);
731
732 /* We require that all of the events have the same
733 * configuration, so just fetch the settings from the
734 * first entry.
735 */
736 cpuc->pcr = pcr | cpuc->event[0]->hw.config_base;
737 }
738 1025
739 pcr_ops->write(cpuc->pcr); 1026 for (i = 0; i < sparc_pmu->num_pcrs; i++)
1027 pcr_ops->write_pcr(i, cpuc->pcr[i]);
740} 1028}
741 1029
742static void sparc_pmu_disable(struct pmu *pmu) 1030static void sparc_pmu_disable(struct pmu *pmu)
743{ 1031{
744 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1032 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
745 u64 val; 1033 int i;
746 1034
747 if (!cpuc->enabled) 1035 if (!cpuc->enabled)
748 return; 1036 return;
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu)
750 cpuc->enabled = 0; 1038 cpuc->enabled = 0;
751 cpuc->n_added = 0; 1039 cpuc->n_added = 0;
752 1040
753 val = cpuc->pcr; 1041 for (i = 0; i < sparc_pmu->num_pcrs; i++) {
754 val &= ~(PCR_UTRACE | PCR_STRACE | 1042 u64 val = cpuc->pcr[i];
755 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
756 cpuc->pcr = val;
757 1043
758 pcr_ops->write(cpuc->pcr); 1044 val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit |
1045 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
1046 cpuc->pcr[i] = val;
1047 pcr_ops->write_pcr(i, cpuc->pcr[i]);
1048 }
759} 1049}
760 1050
761static int active_event_index(struct cpu_hw_events *cpuc, 1051static int active_event_index(struct cpu_hw_events *cpuc,
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex);
854static void perf_stop_nmi_watchdog(void *unused) 1144static void perf_stop_nmi_watchdog(void *unused)
855{ 1145{
856 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1146 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1147 int i;
857 1148
858 stop_nmi_watchdog(NULL); 1149 stop_nmi_watchdog(NULL);
859 cpuc->pcr = pcr_ops->read(); 1150 for (i = 0; i < sparc_pmu->num_pcrs; i++)
1151 cpuc->pcr[i] = pcr_ops->read_pcr(i);
860} 1152}
861 1153
862void perf_event_grab_pmc(void) 1154void perf_event_grab_pmc(void)
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts,
942 if (!n_ev) 1234 if (!n_ev)
943 return 0; 1235 return 0;
944 1236
945 if (n_ev > MAX_HWEVENTS) 1237 if (n_ev > sparc_pmu->max_hw_events)
946 return -1; 1238 return -1;
947 1239
1240 if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) {
1241 int i;
1242
1243 for (i = 0; i < n_ev; i++)
1244 evts[i]->hw.idx = i;
1245 return 0;
1246 }
1247
948 msk0 = perf_event_get_msk(events[0]); 1248 msk0 = perf_event_get_msk(events[0]);
949 if (n_ev == 1) { 1249 if (n_ev == 1) {
950 if (msk0 & PIC_LOWER) 1250 if (msk0 & PIC_LOWER)
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
1000 struct perf_event *event; 1300 struct perf_event *event;
1001 int i, n, first; 1301 int i, n, first;
1002 1302
1303 if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME))
1304 return 0;
1305
1003 n = n_prev + n_new; 1306 n = n_prev + n_new;
1004 if (n <= 1) 1307 if (n <= 1)
1005 return 0; 1308 return 0;
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
1059 perf_pmu_disable(event->pmu); 1362 perf_pmu_disable(event->pmu);
1060 1363
1061 n0 = cpuc->n_events; 1364 n0 = cpuc->n_events;
1062 if (n0 >= MAX_HWEVENTS) 1365 if (n0 >= sparc_pmu->max_hw_events)
1063 goto out; 1366 goto out;
1064 1367
1065 cpuc->event[n0] = event; 1368 cpuc->event[n0] = event;
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event)
1146 /* We save the enable bits in the config_base. */ 1449 /* We save the enable bits in the config_base. */
1147 hwc->config_base = sparc_pmu->irq_bit; 1450 hwc->config_base = sparc_pmu->irq_bit;
1148 if (!attr->exclude_user) 1451 if (!attr->exclude_user)
1149 hwc->config_base |= PCR_UTRACE; 1452 hwc->config_base |= sparc_pmu->user_bit;
1150 if (!attr->exclude_kernel) 1453 if (!attr->exclude_kernel)
1151 hwc->config_base |= PCR_STRACE; 1454 hwc->config_base |= sparc_pmu->priv_bit;
1152 if (!attr->exclude_hv) 1455 if (!attr->exclude_hv)
1153 hwc->config_base |= sparc_pmu->hv_bit; 1456 hwc->config_base |= sparc_pmu->hv_bit;
1154 1457
1155 n = 0; 1458 n = 0;
1156 if (event->group_leader != event) { 1459 if (event->group_leader != event) {
1157 n = collect_events(event->group_leader, 1460 n = collect_events(event->group_leader,
1158 MAX_HWEVENTS - 1, 1461 sparc_pmu->max_hw_events - 1,
1159 evts, events, current_idx_dmy); 1462 evts, events, current_idx_dmy);
1160 if (n < 0) 1463 if (n < 0)
1161 return -EINVAL; 1464 return -EINVAL;
@@ -1254,8 +1557,7 @@ static struct pmu pmu = {
1254void perf_event_print_debug(void) 1557void perf_event_print_debug(void)
1255{ 1558{
1256 unsigned long flags; 1559 unsigned long flags;
1257 u64 pcr, pic; 1560 int cpu, i;
1258 int cpu;
1259 1561
1260 if (!sparc_pmu) 1562 if (!sparc_pmu)
1261 return; 1563 return;
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void)
1264 1566
1265 cpu = smp_processor_id(); 1567 cpu = smp_processor_id();
1266 1568
1267 pcr = pcr_ops->read();
1268 read_pic(pic);
1269
1270 pr_info("\n"); 1569 pr_info("\n");
1271 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", 1570 for (i = 0; i < sparc_pmu->num_pcrs; i++)
1272 cpu, pcr, pic); 1571 pr_info("CPU#%d: PCR%d[%016llx]\n",
1572 cpu, i, pcr_ops->read_pcr(i));
1573 for (i = 0; i < sparc_pmu->num_pic_regs; i++)
1574 pr_info("CPU#%d: PIC%d[%016llx]\n",
1575 cpu, i, pcr_ops->read_pic(i));
1273 1576
1274 local_irq_restore(flags); 1577 local_irq_restore(flags);
1275} 1578}
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1305 * Do this before we peek at the counters to determine 1608 * Do this before we peek at the counters to determine
1306 * overflow so we don't lose any events. 1609 * overflow so we don't lose any events.
1307 */ 1610 */
1308 if (sparc_pmu->irq_bit) 1611 if (sparc_pmu->irq_bit &&
1309 pcr_ops->write(cpuc->pcr); 1612 sparc_pmu->num_pcrs == 1)
1613 pcr_ops->write_pcr(0, cpuc->pcr[0]);
1310 1614
1311 for (i = 0; i < cpuc->n_events; i++) { 1615 for (i = 0; i < cpuc->n_events; i++) {
1312 struct perf_event *event = cpuc->event[i]; 1616 struct perf_event *event = cpuc->event[i];
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1314 struct hw_perf_event *hwc; 1618 struct hw_perf_event *hwc;
1315 u64 val; 1619 u64 val;
1316 1620
1621 if (sparc_pmu->irq_bit &&
1622 sparc_pmu->num_pcrs > 1)
1623 pcr_ops->write_pcr(idx, cpuc->pcr[idx]);
1624
1317 hwc = &event->hw; 1625 hwc = &event->hw;
1318 val = sparc_perf_event_update(event, hwc, idx); 1626 val = sparc_perf_event_update(event, hwc, idx);
1319 if (val & (1ULL << 31)) 1627 if (val & (1ULL << 31))
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void)
1352 sparc_pmu = &niagara2_pmu; 1660 sparc_pmu = &niagara2_pmu;
1353 return true; 1661 return true;
1354 } 1662 }
1663 if (!strcmp(sparc_pmu_type, "niagara4")) {
1664 sparc_pmu = &niagara4_pmu;
1665 return true;
1666 }
1355 return false; 1667 return false;
1356} 1668}
1357 1669
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 1414d16712b2..0800e71d8a88 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -340,7 +340,12 @@ static const char *hwcaps[] = {
340 */ 340 */
341 "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", 341 "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
342 "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", 342 "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
343 "ima", "cspare", 343 "ima", "cspare", "pause", "cbcond",
344};
345
346static const char *crypto_hwcaps[] = {
347 "aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256",
348 "sha512", "mpmul", "montmul", "montsqr", "crc32c",
344}; 349};
345 350
346void cpucap_info(struct seq_file *m) 351void cpucap_info(struct seq_file *m)
@@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m)
357 printed++; 362 printed++;
358 } 363 }
359 } 364 }
365 if (caps & HWCAP_SPARC_CRYPTO) {
366 unsigned long cfr;
367
368 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
369 for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
370 unsigned long bit = 1UL << i;
371 if (cfr & bit) {
372 seq_printf(m, "%s%s",
373 printed ? "," : "", crypto_hwcaps[i]);
374 printed++;
375 }
376 }
377 }
360 seq_putc(m, '\n'); 378 seq_putc(m, '\n');
361} 379}
362 380
381static void __init report_one_hwcap(int *printed, const char *name)
382{
383 if ((*printed) == 0)
384 printk(KERN_INFO "CPU CAPS: [");
385 printk(KERN_CONT "%s%s",
386 (*printed) ? "," : "", name);
387 if (++(*printed) == 8) {
388 printk(KERN_CONT "]\n");
389 *printed = 0;
390 }
391}
392
393static void __init report_crypto_hwcaps(int *printed)
394{
395 unsigned long cfr;
396 int i;
397
398 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
399
400 for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
401 unsigned long bit = 1UL << i;
402 if (cfr & bit)
403 report_one_hwcap(printed, crypto_hwcaps[i]);
404 }
405}
406
363static void __init report_hwcaps(unsigned long caps) 407static void __init report_hwcaps(unsigned long caps)
364{ 408{
365 int i, printed = 0; 409 int i, printed = 0;
366 410
367 printk(KERN_INFO "CPU CAPS: [");
368 for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { 411 for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
369 unsigned long bit = 1UL << i; 412 unsigned long bit = 1UL << i;
370 if (caps & bit) { 413 if (caps & bit)
371 printk(KERN_CONT "%s%s", 414 report_one_hwcap(&printed, hwcaps[i]);
372 printed ? "," : "", hwcaps[i]);
373 if (++printed == 8) {
374 printk(KERN_CONT "]\n");
375 printk(KERN_INFO "CPU CAPS: [");
376 printed = 0;
377 }
378 }
379 } 415 }
380 printk(KERN_CONT "]\n"); 416 if (caps & HWCAP_SPARC_CRYPTO)
417 report_crypto_hwcaps(&printed);
418 if (printed != 0)
419 printk(KERN_CONT "]\n");
381} 420}
382 421
383static unsigned long __init mdesc_cpu_hwcap_list(void) 422static unsigned long __init mdesc_cpu_hwcap_list(void)
@@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)
411 break; 450 break;
412 } 451 }
413 } 452 }
453 for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
454 if (!strcmp(prop, crypto_hwcaps[i]))
455 caps |= HWCAP_SPARC_CRYPTO;
456 }
414 457
415 plen = strlen(prop) + 1; 458 plen = strlen(prop) + 1;
416 prop += plen; 459 prop += plen;
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index dff4096f3dec..30f6ab51c551 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
32lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o 32lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
33lib-$(CONFIG_SPARC64) += NG2patch.o 33lib-$(CONFIG_SPARC64) += NG2patch.o
34 34
35lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o
36lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o
37
35lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o 38lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
36lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o 39lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
37 40
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
new file mode 100644
index 000000000000..fd9f903ffa32
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -0,0 +1,30 @@
1/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace.
2 *
3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
4 */
5
6#define EX_LD(x) \
798: x; \
8 .section __ex_table,"a";\
9 .align 4; \
10 .word 98b, __retl_one_asi;\
11 .text; \
12 .align 4;
13
14#ifndef ASI_AIUS
15#define ASI_AIUS 0x11
16#endif
17
18#define FUNC_NAME NG4copy_from_user
19#define LOAD(type,addr,dest) type##a [addr] %asi, dest
20#define EX_RETVAL(x) 0
21
22#ifdef __KERNEL__
23#define PREAMBLE \
24 rd %asi, %g1; \
25 cmp %g1, ASI_AIUS; \
26 bne,pn %icc, ___copy_in_user; \
27 nop
28#endif
29
30#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S
new file mode 100644
index 000000000000..f30ec10bbcac
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_page.S
@@ -0,0 +1,57 @@
1/* NG4copy_page.S: Niagara-4 optimized copy page.
2 *
3 * Copyright (C) 2012 (davem@davemloft.net)
4 */
5
6#include <asm/asi.h>
7#include <asm/page.h>
8
9 .text
10 .align 32
11
12 .register %g2, #scratch
13 .register %g3, #scratch
14
15 .globl NG4copy_user_page
16NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
17 prefetch [%o1 + 0x000], #n_reads_strong
18 prefetch [%o1 + 0x040], #n_reads_strong
19 prefetch [%o1 + 0x080], #n_reads_strong
20 prefetch [%o1 + 0x0c0], #n_reads_strong
21 set PAGE_SIZE, %g7
22 prefetch [%o1 + 0x100], #n_reads_strong
23 prefetch [%o1 + 0x140], #n_reads_strong
24 prefetch [%o1 + 0x180], #n_reads_strong
25 prefetch [%o1 + 0x1c0], #n_reads_strong
261:
27 ldx [%o1 + 0x00], %o2
28 subcc %g7, 0x40, %g7
29 ldx [%o1 + 0x08], %o3
30 ldx [%o1 + 0x10], %o4
31 ldx [%o1 + 0x18], %o5
32 ldx [%o1 + 0x20], %g1
33 stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
34 add %o0, 0x08, %o0
35 ldx [%o1 + 0x28], %g2
36 stxa %o3, [%o0] ASI_BLK_INIT_QUAD_LDD_P
37 add %o0, 0x08, %o0
38 ldx [%o1 + 0x30], %g3
39 stxa %o4, [%o0] ASI_BLK_INIT_QUAD_LDD_P
40 add %o0, 0x08, %o0
41 ldx [%o1 + 0x38], %o2
42 add %o1, 0x40, %o1
43 stxa %o5, [%o0] ASI_BLK_INIT_QUAD_LDD_P
44 add %o0, 0x08, %o0
45 stxa %g1, [%o0] ASI_BLK_INIT_QUAD_LDD_P
46 add %o0, 0x08, %o0
47 stxa %g2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
48 add %o0, 0x08, %o0
49 stxa %g3, [%o0] ASI_BLK_INIT_QUAD_LDD_P
50 add %o0, 0x08, %o0
51 stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
52 add %o0, 0x08, %o0
53 bne,pt %icc, 1b
54 prefetch [%o1 + 0x200], #n_reads_strong
55 retl
56 membar #StoreLoad | #StoreStore
57 .size NG4copy_user_page,.-NG4copy_user_page
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
new file mode 100644
index 000000000000..9744c4540a8d
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -0,0 +1,39 @@
1/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace.
2 *
3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
4 */
5
6#define EX_ST(x) \
798: x; \
8 .section __ex_table,"a";\
9 .align 4; \
10 .word 98b, __retl_one_asi;\
11 .text; \
12 .align 4;
13
14#ifndef ASI_AIUS
15#define ASI_AIUS 0x11
16#endif
17
18#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
19#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
20#endif
21
22#define FUNC_NAME NG4copy_to_user
23#define STORE(type,src,addr) type##a src, [addr] %asi
24#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
25#define EX_RETVAL(x) 0
26
27#ifdef __KERNEL__
28 /* Writing to %asi is _expensive_ so we hardcode it.
29 * Reading %asi to check for KERNEL_DS is comparatively
30 * cheap.
31 */
32#define PREAMBLE \
33 rd %asi, %g1; \
34 cmp %g1, ASI_AIUS; \
35 bne,pn %icc, ___copy_in_user; \
36 nop
37#endif
38
39#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
new file mode 100644
index 000000000000..9cf2ee01cee3
--- /dev/null
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -0,0 +1,360 @@
1/* NG4memcpy.S: Niagara-4 optimized memcpy.
2 *
3 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
4 */
5
6#ifdef __KERNEL__
7#include <asm/visasm.h>
8#include <asm/asi.h>
9#define GLOBAL_SPARE %g7
10#else
11#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
12#define FPRS_FEF 0x04
13
14/* On T4 it is very expensive to access ASRs like %fprs and
15 * %asi, avoiding a read or a write can save ~50 cycles.
16 */
17#define FPU_ENTER \
18 rd %fprs, %o5; \
19 andcc %o5, FPRS_FEF, %g0; \
20 be,a,pn %icc, 999f; \
21 wr %g0, FPRS_FEF, %fprs; \
22 999:
23
24#ifdef MEMCPY_DEBUG
25#define VISEntryHalf FPU_ENTER; \
26 clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
27#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
28#else
29#define VISEntryHalf FPU_ENTER
30#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
31#endif
32
33#define GLOBAL_SPARE %g5
34#endif
35
36#ifndef STORE_ASI
37#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
38#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
39#else
40#define STORE_ASI 0x80 /* ASI_P */
41#endif
42#endif
43
44#ifndef EX_LD
45#define EX_LD(x) x
46#endif
47
48#ifndef EX_ST
49#define EX_ST(x) x
50#endif
51
52#ifndef EX_RETVAL
53#define EX_RETVAL(x) x
54#endif
55
56#ifndef LOAD
57#define LOAD(type,addr,dest) type [addr], dest
58#endif
59
60#ifndef STORE
61#ifndef MEMCPY_DEBUG
62#define STORE(type,src,addr) type src, [addr]
63#else
64#define STORE(type,src,addr) type##a src, [addr] %asi
65#endif
66#endif
67
68#ifndef STORE_INIT
69#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
70#endif
71
72#ifndef FUNC_NAME
73#define FUNC_NAME NG4memcpy
74#endif
75#ifndef PREAMBLE
76#define PREAMBLE
77#endif
78
79#ifndef XCC
80#define XCC xcc
81#endif
82
83 .register %g2,#scratch
84 .register %g3,#scratch
85
86 .text
87 .align 64
88
89 .globl FUNC_NAME
90 .type FUNC_NAME,#function
91FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
92#ifdef MEMCPY_DEBUG
93 wr %g0, 0x80, %asi
94#endif
95 srlx %o2, 31, %g2
96 cmp %g2, 0
97 tne %XCC, 5
98 PREAMBLE
99 mov %o0, %o3
100 brz,pn %o2, .Lexit
101 cmp %o2, 3
102 ble,pn %icc, .Ltiny
103 cmp %o2, 19
104 ble,pn %icc, .Lsmall
105 or %o0, %o1, %g2
106 cmp %o2, 128
107 bl,pn %icc, .Lmedium
108 nop
109
110.Llarge:/* len >= 0x80 */
111 /* First get dest 8 byte aligned. */
112 sub %g0, %o0, %g1
113 and %g1, 0x7, %g1
114 brz,pt %g1, 51f
115 sub %o2, %g1, %o2
116
1171: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
118 add %o1, 1, %o1
119 subcc %g1, 1, %g1
120 add %o0, 1, %o0
121 bne,pt %icc, 1b
122 EX_ST(STORE(stb, %g2, %o0 - 0x01))
123
12451: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
125 LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
126 LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
127 LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
128 LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
129 LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
130 LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
131 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
132
133 /* Check if we can use the straight fully aligned
134 * loop, or we require the alignaddr/faligndata variant.
135 */
136 andcc %o1, 0x7, %o5
137 bne,pn %icc, .Llarge_src_unaligned
138 sub %g0, %o0, %g1
139
140 /* Legitimize the use of initializing stores by getting dest
141 * to be 64-byte aligned.
142 */
143 and %g1, 0x3f, %g1
144 brz,pt %g1, .Llarge_aligned
145 sub %o2, %g1, %o2
146
1471: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
148 add %o1, 8, %o1
149 subcc %g1, 8, %g1
150 add %o0, 8, %o0
151 bne,pt %icc, 1b
152 EX_ST(STORE(stx, %g2, %o0 - 0x08))
153
154.Llarge_aligned:
155 /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
156 andn %o2, 0x3f, %o4
157 sub %o2, %o4, %o2
158
1591: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
160 add %o1, 0x40, %o1
161 EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
162 subcc %o4, 0x40, %o4
163 EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
164 EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
165 EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
166 EX_ST(STORE_INIT(%g1, %o0))
167 add %o0, 0x08, %o0
168 EX_ST(STORE_INIT(%g2, %o0))
169 add %o0, 0x08, %o0
170 EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
171 EX_ST(STORE_INIT(%g3, %o0))
172 add %o0, 0x08, %o0
173 EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
174 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
175 add %o0, 0x08, %o0
176 EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
177 EX_ST(STORE_INIT(%o5, %o0))
178 add %o0, 0x08, %o0
179 EX_ST(STORE_INIT(%g2, %o0))
180 add %o0, 0x08, %o0
181 EX_ST(STORE_INIT(%g3, %o0))
182 add %o0, 0x08, %o0
183 EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
184 add %o0, 0x08, %o0
185 bne,pt %icc, 1b
186 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
187
188 membar #StoreLoad | #StoreStore
189
190 brz,pn %o2, .Lexit
191 cmp %o2, 19
192 ble,pn %icc, .Lsmall_unaligned
193 nop
194 ba,a,pt %icc, .Lmedium_noprefetch
195
196.Lexit: retl
197 mov EX_RETVAL(%o3), %o0
198
199.Llarge_src_unaligned:
200 andn %o2, 0x3f, %o4
201 sub %o2, %o4, %o2
202 VISEntryHalf
203 alignaddr %o1, %g0, %g1
204 add %o1, %o4, %o1
205 EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
2061: EX_LD(LOAD(ldd, %g1 + 0x08, %f2))
207 subcc %o4, 0x40, %o4
208 EX_LD(LOAD(ldd, %g1 + 0x10, %f4))
209 EX_LD(LOAD(ldd, %g1 + 0x18, %f6))
210 EX_LD(LOAD(ldd, %g1 + 0x20, %f8))
211 EX_LD(LOAD(ldd, %g1 + 0x28, %f10))
212 EX_LD(LOAD(ldd, %g1 + 0x30, %f12))
213 EX_LD(LOAD(ldd, %g1 + 0x38, %f14))
214 faligndata %f0, %f2, %f16
215 EX_LD(LOAD(ldd, %g1 + 0x40, %f0))
216 faligndata %f2, %f4, %f18
217 add %g1, 0x40, %g1
218 faligndata %f4, %f6, %f20
219 faligndata %f6, %f8, %f22
220 faligndata %f8, %f10, %f24
221 faligndata %f10, %f12, %f26
222 faligndata %f12, %f14, %f28
223 faligndata %f14, %f0, %f30
224 EX_ST(STORE(std, %f16, %o0 + 0x00))
225 EX_ST(STORE(std, %f18, %o0 + 0x08))
226 EX_ST(STORE(std, %f20, %o0 + 0x10))
227 EX_ST(STORE(std, %f22, %o0 + 0x18))
228 EX_ST(STORE(std, %f24, %o0 + 0x20))
229 EX_ST(STORE(std, %f26, %o0 + 0x28))
230 EX_ST(STORE(std, %f28, %o0 + 0x30))
231 EX_ST(STORE(std, %f30, %o0 + 0x38))
232 add %o0, 0x40, %o0
233 bne,pt %icc, 1b
234 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
235 VISExitHalf
236
237 brz,pn %o2, .Lexit
238 cmp %o2, 19
239 ble,pn %icc, .Lsmall_unaligned
240 nop
241 ba,a,pt %icc, .Lmedium_unaligned
242
243.Lmedium:
244 LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
245 andcc %g2, 0x7, %g0
246 bne,pn %icc, .Lmedium_unaligned
247 nop
248.Lmedium_noprefetch:
249 andncc %o2, 0x20 - 1, %o5
250 be,pn %icc, 2f
251 sub %o2, %o5, %o2
2521: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
253 EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
254 EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
255 EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
256 add %o1, 0x20, %o1
257 subcc %o5, 0x20, %o5
258 EX_ST(STORE(stx, %g1, %o0 + 0x00))
259 EX_ST(STORE(stx, %g2, %o0 + 0x08))
260 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
261 EX_ST(STORE(stx, %o4, %o0 + 0x18))
262 bne,pt %icc, 1b
263 add %o0, 0x20, %o0
2642: andcc %o2, 0x18, %o5
265 be,pt %icc, 3f
266 sub %o2, %o5, %o2
2671: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
268 add %o1, 0x08, %o1
269 add %o0, 0x08, %o0
270 subcc %o5, 0x08, %o5
271 bne,pt %icc, 1b
272 EX_ST(STORE(stx, %g1, %o0 - 0x08))
2733: brz,pt %o2, .Lexit
274 cmp %o2, 0x04
275 bl,pn %icc, .Ltiny
276 nop
277 EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
278 add %o1, 0x04, %o1
279 add %o0, 0x04, %o0
280 subcc %o2, 0x04, %o2
281 bne,pn %icc, .Ltiny
282 EX_ST(STORE(stw, %g1, %o0 - 0x04))
283 ba,a,pt %icc, .Lexit
284.Lmedium_unaligned:
285 /* First get dest 8 byte aligned. */
286 sub %g0, %o0, %g1
287 and %g1, 0x7, %g1
288 brz,pt %g1, 2f
289 sub %o2, %g1, %o2
290
2911: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
292 add %o1, 1, %o1
293 subcc %g1, 1, %g1
294 add %o0, 1, %o0
295 bne,pt %icc, 1b
296 EX_ST(STORE(stb, %g2, %o0 - 0x01))
2972:
298 and %o1, 0x7, %g1
299 brz,pn %g1, .Lmedium_noprefetch
300 sll %g1, 3, %g1
301 mov 64, %g2
302 sub %g2, %g1, %g2
303 andn %o1, 0x7, %o1
304 EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
305 sllx %o4, %g1, %o4
306 andn %o2, 0x08 - 1, %o5
307 sub %o2, %o5, %o2
3081: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
309 add %o1, 0x08, %o1
310 subcc %o5, 0x08, %o5
311 srlx %g3, %g2, GLOBAL_SPARE
312 or GLOBAL_SPARE, %o4, GLOBAL_SPARE
313 EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
314 add %o0, 0x08, %o0
315 bne,pt %icc, 1b
316 sllx %g3, %g1, %o4
317 srl %g1, 3, %g1
318 add %o1, %g1, %o1
319 brz,pn %o2, .Lexit
320 nop
321 ba,pt %icc, .Lsmall_unaligned
322
323.Ltiny:
324 EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
325 subcc %o2, 1, %o2
326 be,pn %icc, .Lexit
327 EX_ST(STORE(stb, %g1, %o0 + 0x00))
328 EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
329 subcc %o2, 1, %o2
330 be,pn %icc, .Lexit
331 EX_ST(STORE(stb, %g1, %o0 + 0x01))
332 EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
333 ba,pt %icc, .Lexit
334 EX_ST(STORE(stb, %g1, %o0 + 0x02))
335
336.Lsmall:
337 andcc %g2, 0x3, %g0
338 bne,pn %icc, .Lsmall_unaligned
339 andn %o2, 0x4 - 1, %o5
340 sub %o2, %o5, %o2
3411:
342 EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
343 add %o1, 0x04, %o1
344 subcc %o5, 0x04, %o5
345 add %o0, 0x04, %o0
346 bne,pt %icc, 1b
347 EX_ST(STORE(stw, %g1, %o0 - 0x04))
348 brz,pt %o2, .Lexit
349 nop
350 ba,a,pt %icc, .Ltiny
351
352.Lsmall_unaligned:
3531: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
354 add %o1, 1, %o1
355 add %o0, 1, %o0
356 subcc %o2, 1, %o2
357 bne,pt %icc, 1b
358 EX_ST(STORE(stb, %g1, %o0 - 0x01))
359 ba,a,pt %icc, .Lexit
360 .size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
new file mode 100644
index 000000000000..c21c34c61dda
--- /dev/null
+++ b/arch/sparc/lib/NG4patch.S
@@ -0,0 +1,43 @@
1/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant.
2 *
3 * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
4 */
5
6#define BRANCH_ALWAYS 0x10680000
7#define NOP 0x01000000
8#define NG_DO_PATCH(OLD, NEW) \
9 sethi %hi(NEW), %g1; \
10 or %g1, %lo(NEW), %g1; \
11 sethi %hi(OLD), %g2; \
12 or %g2, %lo(OLD), %g2; \
13 sub %g1, %g2, %g1; \
14 sethi %hi(BRANCH_ALWAYS), %g3; \
15 sll %g1, 11, %g1; \
16 srl %g1, 11 + 2, %g1; \
17 or %g3, %lo(BRANCH_ALWAYS), %g3; \
18 or %g3, %g1, %g3; \
19 stw %g3, [%g2]; \
20 sethi %hi(NOP), %g3; \
21 or %g3, %lo(NOP), %g3; \
22 stw %g3, [%g2 + 0x4]; \
23 flush %g2;
24
25 .globl niagara4_patch_copyops
26 .type niagara4_patch_copyops,#function
27niagara4_patch_copyops:
28 NG_DO_PATCH(memcpy, NG4memcpy)
29 NG_DO_PATCH(___copy_from_user, NG4copy_from_user)
30 NG_DO_PATCH(___copy_to_user, NG4copy_to_user)
31 retl
32 nop
33 .size niagara4_patch_copyops,.-niagara4_patch_copyops
34
35 .globl niagara4_patch_pageops
36 .type niagara4_patch_pageops,#function
37niagara4_patch_pageops:
38 NG_DO_PATCH(copy_user_page, NG4copy_user_page)
39 NG_DO_PATCH(_clear_page, NGclear_page)
40 NG_DO_PATCH(clear_user_page, NGclear_user_page)
41 retl
42 nop
43 .size niagara4_patch_pageops,.-niagara4_patch_pageops
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S
index b9e790b9c6b8..423d46e2258b 100644
--- a/arch/sparc/lib/NGpage.S
+++ b/arch/sparc/lib/NGpage.S
@@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
59 restore 59 restore
60 60
61 .align 32 61 .align 32
62 .globl NGclear_page
63 .globl NGclear_user_page
62NGclear_page: /* %o0=dest */ 64NGclear_page: /* %o0=dest */
63NGclear_user_page: /* %o0=dest, %o1=vaddr */ 65NGclear_user_page: /* %o0=dest, %o1=vaddr */
64 rd %asi, %g3 66 rd %asi, %g3
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 3b31218cafc6..ee31b884c61b 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page);
134void VISenter(void); 134void VISenter(void);
135EXPORT_SYMBOL(VISenter); 135EXPORT_SYMBOL(VISenter);
136 136
137/* CRYPTO code needs this */
138void VISenterhalf(void);
139EXPORT_SYMBOL(VISenterhalf);
140
137extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); 141extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
138extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, 142extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
139 unsigned long *); 143 unsigned long *);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index d58edf5fefdb..696bb095e0fc 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -51,22 +51,40 @@
51 51
52#include "init_64.h" 52#include "init_64.h"
53 53
54unsigned long kern_linear_pte_xor[2] __read_mostly; 54unsigned long kern_linear_pte_xor[4] __read_mostly;
55 55
56/* A bitmap, one bit for every 256MB of physical memory. If the bit 56/* A bitmap, two bits for every 256MB of physical memory. These two
57 * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else 57 * bits determine what page size we use for kernel linear
58 * if set we should use a 256MB page (via kern_linear_pte_xor[1]). 58 * translations. They form an index into kern_linear_pte_xor[]. The
59 * value in the indexed slot is XOR'd with the TLB miss virtual
60 * address to form the resulting TTE. The mapping is:
61 *
62 * 0 ==> 4MB
63 * 1 ==> 256MB
64 * 2 ==> 2GB
65 * 3 ==> 16GB
66 *
67 * All sun4v chips support 256MB pages. Only SPARC-T4 and later
68 * support 2GB pages, and hopefully future cpus will support the 16GB
69 * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there
70 * if these larger page sizes are not supported by the cpu.
71 *
72 * It would be nice to determine this from the machine description
73 * 'cpu' properties, but we need to have this table setup before the
74 * MDESC is initialized.
59 */ 75 */
60unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; 76unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
61 77
62#ifndef CONFIG_DEBUG_PAGEALLOC 78#ifndef CONFIG_DEBUG_PAGEALLOC
63/* A special kernel TSB for 4MB and 256MB linear mappings. 79/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
64 * Space is allocated for this right after the trap table 80 * Space is allocated for this right after the trap table in
65 * in arch/sparc64/kernel/head.S 81 * arch/sparc64/kernel/head.S
66 */ 82 */
67extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; 83extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
68#endif 84#endif
69 85
86static unsigned long cpu_pgsz_mask;
87
70#define MAX_BANKS 32 88#define MAX_BANKS 32
71 89
72static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata; 90static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata;
@@ -403,6 +421,12 @@ EXPORT_SYMBOL(flush_icache_range);
403 421
404void mmu_info(struct seq_file *m) 422void mmu_info(struct seq_file *m)
405{ 423{
424 static const char *pgsz_strings[] = {
425 "8K", "64K", "512K", "4MB", "32MB",
426 "256MB", "2GB", "16GB",
427 };
428 int i, printed;
429
406 if (tlb_type == cheetah) 430 if (tlb_type == cheetah)
407 seq_printf(m, "MMU Type\t: Cheetah\n"); 431 seq_printf(m, "MMU Type\t: Cheetah\n");
408 else if (tlb_type == cheetah_plus) 432 else if (tlb_type == cheetah_plus)
@@ -414,6 +438,17 @@ void mmu_info(struct seq_file *m)
414 else 438 else
415 seq_printf(m, "MMU Type\t: ???\n"); 439 seq_printf(m, "MMU Type\t: ???\n");
416 440
441 seq_printf(m, "MMU PGSZs\t: ");
442 printed = 0;
443 for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
444 if (cpu_pgsz_mask & (1UL << i)) {
445 seq_printf(m, "%s%s",
446 printed ? "," : "", pgsz_strings[i]);
447 printed++;
448 }
449 }
450 seq_putc(m, '\n');
451
417#ifdef CONFIG_DEBUG_DCFLUSH 452#ifdef CONFIG_DEBUG_DCFLUSH
418 seq_printf(m, "DCPageFlushes\t: %d\n", 453 seq_printf(m, "DCPageFlushes\t: %d\n",
419 atomic_read(&dcpage_flushes)); 454 atomic_read(&dcpage_flushes));
@@ -1358,32 +1393,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
1358extern unsigned int kvmap_linear_patch[1]; 1393extern unsigned int kvmap_linear_patch[1];
1359#endif /* CONFIG_DEBUG_PAGEALLOC */ 1394#endif /* CONFIG_DEBUG_PAGEALLOC */
1360 1395
1361static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) 1396static void __init kpte_set_val(unsigned long index, unsigned long val)
1362{ 1397{
1363 const unsigned long shift_256MB = 28; 1398 unsigned long *ptr = kpte_linear_bitmap;
1364 const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
1365 const unsigned long size_256MB = (1UL << shift_256MB);
1366 1399
1367 while (start < end) { 1400 val <<= ((index % (BITS_PER_LONG / 2)) * 2);
1368 long remains; 1401 ptr += (index / (BITS_PER_LONG / 2));
1369 1402
1370 remains = end - start; 1403 *ptr |= val;
1371 if (remains < size_256MB) 1404}
1372 break;
1373 1405
1374 if (start & mask_256MB) { 1406static const unsigned long kpte_shift_min = 28; /* 256MB */
1375 start = (start + size_256MB) & ~mask_256MB; 1407static const unsigned long kpte_shift_max = 34; /* 16GB */
1376 continue; 1408static const unsigned long kpte_shift_incr = 3;
1377 } 1409
1410static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
1411 unsigned long shift)
1412{
1413 unsigned long size = (1UL << shift);
1414 unsigned long mask = (size - 1UL);
1415 unsigned long remains = end - start;
1416 unsigned long val;
1417
1418 if (remains < size || (start & mask))
1419 return start;
1420
1421 /* VAL maps:
1422 *
1423 * shift 28 --> kern_linear_pte_xor index 1
1424 * shift 31 --> kern_linear_pte_xor index 2
1425 * shift 34 --> kern_linear_pte_xor index 3
1426 */
1427 val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
1428
1429 remains &= ~mask;
1430 if (shift != kpte_shift_max)
1431 remains = size;
1378 1432
1379 while (remains >= size_256MB) { 1433 while (remains) {
1380 unsigned long index = start >> shift_256MB; 1434 unsigned long index = start >> kpte_shift_min;
1381 1435
1382 __set_bit(index, kpte_linear_bitmap); 1436 kpte_set_val(index, val);
1383 1437
1384 start += size_256MB; 1438 start += 1UL << kpte_shift_min;
1385 remains -= size_256MB; 1439 remains -= 1UL << kpte_shift_min;
1440 }
1441
1442 return start;
1443}
1444
1445static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
1446{
1447 unsigned long smallest_size, smallest_mask;
1448 unsigned long s;
1449
1450 smallest_size = (1UL << kpte_shift_min);
1451 smallest_mask = (smallest_size - 1UL);
1452
1453 while (start < end) {
1454 unsigned long orig_start = start;
1455
1456 for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
1457 start = kpte_mark_using_shift(start, end, s);
1458
1459 if (start != orig_start)
1460 break;
1386 } 1461 }
1462
1463 if (start == orig_start)
1464 start = (start + smallest_size) & ~smallest_mask;
1387 } 1465 }
1388} 1466}
1389 1467
@@ -1577,13 +1655,16 @@ static void __init sun4v_ktsb_init(void)
1577 ktsb_descr[0].resv = 0; 1655 ktsb_descr[0].resv = 0;
1578 1656
1579#ifndef CONFIG_DEBUG_PAGEALLOC 1657#ifndef CONFIG_DEBUG_PAGEALLOC
1580 /* Second KTSB for 4MB/256MB mappings. */ 1658 /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */
1581 ktsb_pa = (kern_base + 1659 ktsb_pa = (kern_base +
1582 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); 1660 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
1583 1661
1584 ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; 1662 ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
1585 ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | 1663 ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB |
1586 HV_PGSZ_MASK_256MB); 1664 HV_PGSZ_MASK_256MB |
1665 HV_PGSZ_MASK_2GB |
1666 HV_PGSZ_MASK_16GB) &
1667 cpu_pgsz_mask);
1587 ktsb_descr[1].assoc = 1; 1668 ktsb_descr[1].assoc = 1;
1588 ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; 1669 ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
1589 ktsb_descr[1].ctx_idx = 0; 1670 ktsb_descr[1].ctx_idx = 0;
@@ -1606,6 +1687,47 @@ void __cpuinit sun4v_ktsb_register(void)
1606 } 1687 }
1607} 1688}
1608 1689
1690static void __init sun4u_linear_pte_xor_finalize(void)
1691{
1692#ifndef CONFIG_DEBUG_PAGEALLOC
1693 /* This is where we would add Panther support for
1694 * 32MB and 256MB pages.
1695 */
1696#endif
1697}
1698
1699static void __init sun4v_linear_pte_xor_finalize(void)
1700{
1701#ifndef CONFIG_DEBUG_PAGEALLOC
1702 if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
1703 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
1704 0xfffff80000000000UL;
1705 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1706 _PAGE_P_4V | _PAGE_W_4V);
1707 } else {
1708 kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
1709 }
1710
1711 if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
1712 kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
1713 0xfffff80000000000UL;
1714 kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1715 _PAGE_P_4V | _PAGE_W_4V);
1716 } else {
1717 kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
1718 }
1719
1720 if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
1721 kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
1722 0xfffff80000000000UL;
1723 kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1724 _PAGE_P_4V | _PAGE_W_4V);
1725 } else {
1726 kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
1727 }
1728#endif
1729}
1730
1609/* paging_init() sets up the page tables */ 1731/* paging_init() sets up the page tables */
1610 1732
1611static unsigned long last_valid_pfn; 1733static unsigned long last_valid_pfn;
@@ -1665,10 +1787,8 @@ void __init paging_init(void)
1665 ktsb_phys_patch(); 1787 ktsb_phys_patch();
1666 } 1788 }
1667 1789
1668 if (tlb_type == hypervisor) { 1790 if (tlb_type == hypervisor)
1669 sun4v_patch_tlb_handlers(); 1791 sun4v_patch_tlb_handlers();
1670 sun4v_ktsb_init();
1671 }
1672 1792
1673 /* Find available physical memory... 1793 /* Find available physical memory...
1674 * 1794 *
@@ -1727,9 +1847,6 @@ void __init paging_init(void)
1727 1847
1728 __flush_tlb_all(); 1848 __flush_tlb_all();
1729 1849
1730 if (tlb_type == hypervisor)
1731 sun4v_ktsb_register();
1732
1733 prom_build_devicetree(); 1850 prom_build_devicetree();
1734 of_populate_present_mask(); 1851 of_populate_present_mask();
1735#ifndef CONFIG_SMP 1852#ifndef CONFIG_SMP
@@ -1742,8 +1859,36 @@ void __init paging_init(void)
1742#ifndef CONFIG_SMP 1859#ifndef CONFIG_SMP
1743 mdesc_fill_in_cpu_data(cpu_all_mask); 1860 mdesc_fill_in_cpu_data(cpu_all_mask);
1744#endif 1861#endif
1862 mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask);
1863
1864 sun4v_linear_pte_xor_finalize();
1865
1866 sun4v_ktsb_init();
1867 sun4v_ktsb_register();
1868 } else {
1869 unsigned long impl, ver;
1870
1871 cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
1872 HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
1873
1874 __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
1875 impl = ((ver >> 32) & 0xffff);
1876 if (impl == PANTHER_IMPL)
1877 cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB |
1878 HV_PGSZ_MASK_256MB);
1879
1880 sun4u_linear_pte_xor_finalize();
1745 } 1881 }
1746 1882
1883 /* Flush the TLBs and the 4M TSB so that the updated linear
1884 * pte XOR settings are realized for all mappings.
1885 */
1886 __flush_tlb_all();
1887#ifndef CONFIG_DEBUG_PAGEALLOC
1888 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
1889#endif
1890 __flush_tlb_all();
1891
1747 /* Setup bootmem... */ 1892 /* Setup bootmem... */
1748 last_valid_pfn = end_pfn = bootmem_init(phys_base); 1893 last_valid_pfn = end_pfn = bootmem_init(phys_base);
1749 1894
@@ -2110,6 +2255,7 @@ static void __init sun4u_pgprot_init(void)
2110{ 2255{
2111 unsigned long page_none, page_shared, page_copy, page_readonly; 2256 unsigned long page_none, page_shared, page_copy, page_readonly;
2112 unsigned long page_exec_bit; 2257 unsigned long page_exec_bit;
2258 int i;
2113 2259
2114 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | 2260 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
2115 _PAGE_CACHE_4U | _PAGE_P_4U | 2261 _PAGE_CACHE_4U | _PAGE_P_4U |
@@ -2137,8 +2283,8 @@ static void __init sun4u_pgprot_init(void)
2137 kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | 2283 kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
2138 _PAGE_P_4U | _PAGE_W_4U); 2284 _PAGE_P_4U | _PAGE_W_4U);
2139 2285
2140 /* XXX Should use 256MB on Panther. XXX */ 2286 for (i = 1; i < 4; i++)
2141 kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; 2287 kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
2142 2288
2143 _PAGE_SZBITS = _PAGE_SZBITS_4U; 2289 _PAGE_SZBITS = _PAGE_SZBITS_4U;
2144 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | 2290 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
@@ -2164,6 +2310,7 @@ static void __init sun4v_pgprot_init(void)
2164{ 2310{
2165 unsigned long page_none, page_shared, page_copy, page_readonly; 2311 unsigned long page_none, page_shared, page_copy, page_readonly;
2166 unsigned long page_exec_bit; 2312 unsigned long page_exec_bit;
2313 int i;
2167 2314
2168 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | 2315 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
2169 _PAGE_CACHE_4V | _PAGE_P_4V | 2316 _PAGE_CACHE_4V | _PAGE_P_4V |
@@ -2185,15 +2332,8 @@ static void __init sun4v_pgprot_init(void)
2185 kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | 2332 kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
2186 _PAGE_P_4V | _PAGE_W_4V); 2333 _PAGE_P_4V | _PAGE_W_4V);
2187 2334
2188#ifdef CONFIG_DEBUG_PAGEALLOC 2335 for (i = 1; i < 4; i++)
2189 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ 2336 kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
2190 0xfffff80000000000UL;
2191#else
2192 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
2193 0xfffff80000000000UL;
2194#endif
2195 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
2196 _PAGE_P_4V | _PAGE_W_4V);
2197 2337
2198 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | 2338 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
2199 __ACCESS_BITS_4V | _PAGE_E_4V); 2339 __ACCESS_BITS_4V | _PAGE_E_4V);
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h
index 3e1ac8b96cae..0661aa606dec 100644
--- a/arch/sparc/mm/init_64.h
+++ b/arch/sparc/mm/init_64.h
@@ -8,12 +8,12 @@
8#define MAX_PHYS_ADDRESS (1UL << 41UL) 8#define MAX_PHYS_ADDRESS (1UL << 41UL)
9#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) 9#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
10#define KPTE_BITMAP_BYTES \ 10#define KPTE_BITMAP_BYTES \
11 ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) 11 ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
12#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) 12#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
13#define VALID_ADDR_BITMAP_BYTES \ 13#define VALID_ADDR_BITMAP_BYTES \
14 ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) 14 ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
15 15
16extern unsigned long kern_linear_pte_xor[2]; 16extern unsigned long kern_linear_pte_xor[4];
17extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; 17extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
18extern unsigned int sparc64_highest_unlocked_tlb_ent; 18extern unsigned int sparc64_highest_unlocked_tlb_ent;
19extern unsigned long sparc64_kern_pri_context; 19extern unsigned long sparc64_kern_pri_context;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a3238051b03e..94f232f96d03 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -336,6 +336,15 @@ config CRYPTO_CRC32C_INTEL
336 gain performance compared with software implementation. 336 gain performance compared with software implementation.
337 Module will be crc32c-intel. 337 Module will be crc32c-intel.
338 338
339config CRYPTO_CRC32C_SPARC64
340 tristate "CRC32c CRC algorithm (SPARC64)"
341 depends on SPARC64
342 select CRYPTO_HASH
343 select CRC32
344 help
345 CRC32c CRC algorithm implemented using sparc64 crypto instructions,
346 when available.
347
339config CRYPTO_GHASH 348config CRYPTO_GHASH
340 tristate "GHASH digest algorithm" 349 tristate "GHASH digest algorithm"
341 select CRYPTO_GF128MUL 350 select CRYPTO_GF128MUL
@@ -354,6 +363,15 @@ config CRYPTO_MD5
354 help 363 help
355 MD5 message digest algorithm (RFC1321). 364 MD5 message digest algorithm (RFC1321).
356 365
366config CRYPTO_MD5_SPARC64
367 tristate "MD5 digest algorithm (SPARC64)"
368 depends on SPARC64
369 select CRYPTO_MD5
370 select CRYPTO_HASH
371 help
372 MD5 message digest algorithm (RFC1321) implemented
373 using sparc64 crypto instructions, when available.
374
357config CRYPTO_MICHAEL_MIC 375config CRYPTO_MICHAEL_MIC
358 tristate "Michael MIC keyed digest algorithm" 376 tristate "Michael MIC keyed digest algorithm"
359 select CRYPTO_HASH 377 select CRYPTO_HASH
@@ -433,6 +451,15 @@ config CRYPTO_SHA1_SSSE3
433 using Supplemental SSE3 (SSSE3) instructions or Advanced Vector 451 using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
434 Extensions (AVX), when available. 452 Extensions (AVX), when available.
435 453
454config CRYPTO_SHA1_SPARC64
455 tristate "SHA1 digest algorithm (SPARC64)"
456 depends on SPARC64
457 select CRYPTO_SHA1
458 select CRYPTO_HASH
459 help
460 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
461 using sparc64 crypto instructions, when available.
462
436config CRYPTO_SHA256 463config CRYPTO_SHA256
437 tristate "SHA224 and SHA256 digest algorithm" 464 tristate "SHA224 and SHA256 digest algorithm"
438 select CRYPTO_HASH 465 select CRYPTO_HASH
@@ -445,6 +472,15 @@ config CRYPTO_SHA256
445 This code also includes SHA-224, a 224 bit hash with 112 bits 472 This code also includes SHA-224, a 224 bit hash with 112 bits
446 of security against collision attacks. 473 of security against collision attacks.
447 474
475config CRYPTO_SHA256_SPARC64
476 tristate "SHA224 and SHA256 digest algorithm (SPARC64)"
477 depends on SPARC64
478 select CRYPTO_SHA256
479 select CRYPTO_HASH
480 help
481 SHA-256 secure hash standard (DFIPS 180-2) implemented
482 using sparc64 crypto instructions, when available.
483
448config CRYPTO_SHA512 484config CRYPTO_SHA512
449 tristate "SHA384 and SHA512 digest algorithms" 485 tristate "SHA384 and SHA512 digest algorithms"
450 select CRYPTO_HASH 486 select CRYPTO_HASH
@@ -457,6 +493,15 @@ config CRYPTO_SHA512
457 This code also includes SHA-384, a 384 bit hash with 192 bits 493 This code also includes SHA-384, a 384 bit hash with 192 bits
458 of security against collision attacks. 494 of security against collision attacks.
459 495
496config CRYPTO_SHA512_SPARC64
497 tristate "SHA384 and SHA512 digest algorithm (SPARC64)"
498 depends on SPARC64
499 select CRYPTO_SHA512
500 select CRYPTO_HASH
501 help
502 SHA-512 secure hash standard (DFIPS 180-2) implemented
503 using sparc64 crypto instructions, when available.
504
460config CRYPTO_TGR192 505config CRYPTO_TGR192
461 tristate "Tiger digest algorithms" 506 tristate "Tiger digest algorithms"
462 select CRYPTO_HASH 507 select CRYPTO_HASH
@@ -588,6 +633,34 @@ config CRYPTO_AES_NI_INTEL
588 ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional 633 ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
589 acceleration for CTR. 634 acceleration for CTR.
590 635
636config CRYPTO_AES_SPARC64
637 tristate "AES cipher algorithms (SPARC64)"
638 depends on SPARC64
639 select CRYPTO_CRYPTD
640 select CRYPTO_ALGAPI
641 help
642 Use SPARC64 crypto opcodes for AES algorithm.
643
644 AES cipher algorithms (FIPS-197). AES uses the Rijndael
645 algorithm.
646
647 Rijndael appears to be consistently a very good performer in
648 both hardware and software across a wide range of computing
649 environments regardless of its use in feedback or non-feedback
650 modes. Its key setup time is excellent, and its key agility is
651 good. Rijndael's very low memory requirements make it very well
652 suited for restricted-space environments, in which it also
653 demonstrates excellent performance. Rijndael's operations are
654 among the easiest to defend against power and timing attacks.
655
656 The AES specifies three key sizes: 128, 192 and 256 bits
657
658 See <http://csrc.nist.gov/encryption/aes/> for more information.
659
660 In addition to AES cipher algorithm support, the acceleration
661 for some popular block cipher mode is supported too, including
662 ECB and CBC.
663
591config CRYPTO_ANUBIS 664config CRYPTO_ANUBIS
592 tristate "Anubis cipher algorithm" 665 tristate "Anubis cipher algorithm"
593 select CRYPTO_ALGAPI 666 select CRYPTO_ALGAPI
@@ -685,6 +758,22 @@ config CRYPTO_CAMELLIA_X86_64
685 See also: 758 See also:
686 <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html> 759 <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
687 760
761config CRYPTO_CAMELLIA_SPARC64
762 tristate "Camellia cipher algorithm (SPARC64)"
763 depends on SPARC64
764 depends on CRYPTO
765 select CRYPTO_ALGAPI
766 help
767 Camellia cipher algorithm module (SPARC64).
768
769 Camellia is a symmetric key block cipher developed jointly
770 at NTT and Mitsubishi Electric Corporation.
771
772 The Camellia specifies three key sizes: 128, 192 and 256 bits.
773
774 See also:
775 <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
776
688config CRYPTO_CAST5 777config CRYPTO_CAST5
689 tristate "CAST5 (CAST-128) cipher algorithm" 778 tristate "CAST5 (CAST-128) cipher algorithm"
690 select CRYPTO_ALGAPI 779 select CRYPTO_ALGAPI
@@ -705,6 +794,14 @@ config CRYPTO_DES
705 help 794 help
706 DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). 795 DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
707 796
797config CRYPTO_DES_SPARC64
798 tristate "DES and Triple DES EDE cipher algorithms (SPARC64)"
799 select CRYPTO_ALGAPI
800 select CRYPTO_DES
801 help
802 DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3),
803 optimized using SPARC64 crypto opcodes.
804
708config CRYPTO_FCRYPT 805config CRYPTO_FCRYPT
709 tristate "FCrypt cipher algorithm" 806 tristate "FCrypt cipher algorithm"
710 select CRYPTO_ALGAPI 807 select CRYPTO_ALGAPI
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index a8bd0310f8fe..aab257403b4a 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -42,7 +42,7 @@ MODULE_DESCRIPTION("Niagara2 Crypto driver");
42MODULE_LICENSE("GPL"); 42MODULE_LICENSE("GPL");
43MODULE_VERSION(DRV_MODULE_VERSION); 43MODULE_VERSION(DRV_MODULE_VERSION);
44 44
45#define N2_CRA_PRIORITY 300 45#define N2_CRA_PRIORITY 200
46 46
47static DEFINE_MUTEX(spu_lock); 47static DEFINE_MUTEX(spu_lock);
48 48
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 4b9939726c34..b160073e54b6 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -150,7 +150,7 @@ static long d7s_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
150 regs |= D7S_FLIP; 150 regs |= D7S_FLIP;
151 writeb(regs, p->regs); 151 writeb(regs, p->regs);
152 break; 152 break;
153 }; 153 }
154 mutex_unlock(&d7s_mutex); 154 mutex_unlock(&d7s_mutex);
155 155
156 return error; 156 return error;
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 339fd6f65eda..0bc18569f9c0 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -353,7 +353,7 @@ static int envctrl_i2c_data_translate(unsigned char data, int translate_type,
353 353
354 default: 354 default:
355 break; 355 break;
356 }; 356 }
357 357
358 return len; 358 return len;
359} 359}
@@ -644,7 +644,7 @@ envctrl_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
644 default: 644 default:
645 break; 645 break;
646 646
647 }; 647 }
648 648
649 return ret; 649 return ret;
650} 650}
@@ -687,7 +687,7 @@ envctrl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
687 687
688 default: 688 default:
689 return -EINVAL; 689 return -EINVAL;
690 }; 690 }
691 691
692 return 0; 692 return 0;
693} 693}
@@ -947,7 +947,7 @@ static void envctrl_init_i2c_child(struct device_node *dp,
947 947
948 default: 948 default:
949 break; 949 break;
950 }; 950 }
951 } 951 }
952} 952}
953 953
diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index 2236aea3ca2f..5843288f64bc 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -222,7 +222,7 @@ static int opromnext(void __user *argp, unsigned int cmd, struct device_node *dp
222 case OPROMSETCUR: 222 case OPROMSETCUR:
223 default: 223 default:
224 break; 224 break;
225 }; 225 }
226 } else { 226 } else {
227 /* Sibling of node zero is the root node. */ 227 /* Sibling of node zero is the root node. */
228 if (cmd != OPROMNEXT) 228 if (cmd != OPROMNEXT)
@@ -588,7 +588,7 @@ static int openprom_bsd_ioctl(struct file * file,
588 default: 588 default:
589 err = -EINVAL; 589 err = -EINVAL;
590 break; 590 break;
591 }; 591 }
592 mutex_unlock(&openprom_mutex); 592 mutex_unlock(&openprom_mutex);
593 593
594 return err; 594 return err;