diff options
Diffstat (limited to 'arch/sparc')
84 files changed, 7867 insertions, 729 deletions
diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild index 5cd01161fd00..675afa285ddb 100644 --- a/arch/sparc/Kbuild +++ b/arch/sparc/Kbuild | |||
@@ -6,3 +6,4 @@ obj-y += kernel/ | |||
6 | obj-y += mm/ | 6 | obj-y += mm/ |
7 | obj-y += math-emu/ | 7 | obj-y += math-emu/ |
8 | obj-y += net/ | 8 | obj-y += net/ |
9 | obj-y += crypto/ | ||
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 67f1f6f5f4e1..91c780c973ba 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -18,6 +18,7 @@ config SPARC | |||
18 | select HAVE_OPROFILE | 18 | select HAVE_OPROFILE |
19 | select HAVE_ARCH_KGDB if !SMP || SPARC64 | 19 | select HAVE_ARCH_KGDB if !SMP || SPARC64 |
20 | select HAVE_ARCH_TRACEHOOK | 20 | select HAVE_ARCH_TRACEHOOK |
21 | select SYSCTL_EXCEPTION_TRACE | ||
21 | select ARCH_WANT_OPTIONAL_GPIOLIB | 22 | select ARCH_WANT_OPTIONAL_GPIOLIB |
22 | select RTC_CLASS | 23 | select RTC_CLASS |
23 | select RTC_DRV_M48T59 | 24 | select RTC_DRV_M48T59 |
@@ -32,6 +33,7 @@ config SPARC | |||
32 | select GENERIC_PCI_IOMAP | 33 | select GENERIC_PCI_IOMAP |
33 | select HAVE_NMI_WATCHDOG if SPARC64 | 34 | select HAVE_NMI_WATCHDOG if SPARC64 |
34 | select HAVE_BPF_JIT | 35 | select HAVE_BPF_JIT |
36 | select HAVE_DEBUG_BUGVERBOSE | ||
35 | select GENERIC_SMP_IDLE_THREAD | 37 | select GENERIC_SMP_IDLE_THREAD |
36 | select GENERIC_CMOS_UPDATE | 38 | select GENERIC_CMOS_UPDATE |
37 | select GENERIC_CLOCKEVENTS | 39 | select GENERIC_CLOCKEVENTS |
@@ -42,6 +44,7 @@ config SPARC32 | |||
42 | def_bool !64BIT | 44 | def_bool !64BIT |
43 | select GENERIC_ATOMIC64 | 45 | select GENERIC_ATOMIC64 |
44 | select CLZ_TAB | 46 | select CLZ_TAB |
47 | select HAVE_UID16 | ||
45 | 48 | ||
46 | config SPARC64 | 49 | config SPARC64 |
47 | def_bool 64BIT | 50 | def_bool 64BIT |
@@ -59,6 +62,7 @@ config SPARC64 | |||
59 | select HAVE_DYNAMIC_FTRACE | 62 | select HAVE_DYNAMIC_FTRACE |
60 | select HAVE_FTRACE_MCOUNT_RECORD | 63 | select HAVE_FTRACE_MCOUNT_RECORD |
61 | select HAVE_SYSCALL_TRACEPOINTS | 64 | select HAVE_SYSCALL_TRACEPOINTS |
65 | select HAVE_DEBUG_KMEMLEAK | ||
62 | select RTC_DRV_CMOS | 66 | select RTC_DRV_CMOS |
63 | select RTC_DRV_BQ4802 | 67 | select RTC_DRV_BQ4802 |
64 | select RTC_DRV_SUN4V | 68 | select RTC_DRV_SUN4V |
@@ -226,25 +230,6 @@ config EARLYFB | |||
226 | help | 230 | help |
227 | Say Y here to enable a faster early framebuffer boot console. | 231 | Say Y here to enable a faster early framebuffer boot console. |
228 | 232 | ||
229 | choice | ||
230 | prompt "Kernel page size" if SPARC64 | ||
231 | default SPARC64_PAGE_SIZE_8KB | ||
232 | |||
233 | config SPARC64_PAGE_SIZE_8KB | ||
234 | bool "8KB" | ||
235 | help | ||
236 | This lets you select the page size of the kernel. | ||
237 | |||
238 | 8KB and 64KB work quite well, since SPARC ELF sections | ||
239 | provide for up to 64KB alignment. | ||
240 | |||
241 | If you don't know what to do, choose 8KB. | ||
242 | |||
243 | config SPARC64_PAGE_SIZE_64KB | ||
244 | bool "64KB" | ||
245 | |||
246 | endchoice | ||
247 | |||
248 | config SECCOMP | 233 | config SECCOMP |
249 | bool "Enable seccomp to safely compute untrusted bytecode" | 234 | bool "Enable seccomp to safely compute untrusted bytecode" |
250 | depends on SPARC64 && PROC_FS | 235 | depends on SPARC64 && PROC_FS |
@@ -316,23 +301,6 @@ config GENERIC_LOCKBREAK | |||
316 | default y | 301 | default y |
317 | depends on SPARC64 && SMP && PREEMPT | 302 | depends on SPARC64 && SMP && PREEMPT |
318 | 303 | ||
319 | choice | ||
320 | prompt "SPARC64 Huge TLB Page Size" | ||
321 | depends on SPARC64 && HUGETLB_PAGE | ||
322 | default HUGETLB_PAGE_SIZE_4MB | ||
323 | |||
324 | config HUGETLB_PAGE_SIZE_4MB | ||
325 | bool "4MB" | ||
326 | |||
327 | config HUGETLB_PAGE_SIZE_512K | ||
328 | bool "512K" | ||
329 | |||
330 | config HUGETLB_PAGE_SIZE_64K | ||
331 | depends on !SPARC64_PAGE_SIZE_64KB | ||
332 | bool "64K" | ||
333 | |||
334 | endchoice | ||
335 | |||
336 | config NUMA | 304 | config NUMA |
337 | bool "NUMA support" | 305 | bool "NUMA support" |
338 | depends on SPARC64 && SMP | 306 | depends on SPARC64 && SMP |
@@ -571,6 +539,7 @@ config COMPAT | |||
571 | depends on SPARC64 | 539 | depends on SPARC64 |
572 | default y | 540 | default y |
573 | select COMPAT_BINFMT_ELF | 541 | select COMPAT_BINFMT_ELF |
542 | select HAVE_UID16 | ||
574 | select ARCH_WANT_OLD_COMPAT_IPC | 543 | select ARCH_WANT_OLD_COMPAT_IPC |
575 | 544 | ||
576 | config SYSVIPC_COMPAT | 545 | config SYSVIPC_COMPAT |
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile new file mode 100644 index 000000000000..6ae1ad5e502b --- /dev/null +++ b/arch/sparc/crypto/Makefile | |||
@@ -0,0 +1,25 @@ | |||
1 | # | ||
2 | # Arch-specific CryptoAPI modules. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o | ||
6 | obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o | ||
7 | obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o | ||
8 | obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o | ||
9 | |||
10 | obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o | ||
11 | obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o | ||
12 | obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o | ||
13 | |||
14 | obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o | ||
15 | |||
16 | sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o | ||
17 | sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o | ||
18 | sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o | ||
19 | md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o | ||
20 | |||
21 | aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o | ||
22 | des-sparc64-y := des_asm.o des_glue.o crop_devid.o | ||
23 | camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o | ||
24 | |||
25 | crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o | ||
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S new file mode 100644 index 000000000000..23f6cbb910d3 --- /dev/null +++ b/arch/sparc/crypto/aes_asm.S | |||
@@ -0,0 +1,1535 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | #define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ | ||
7 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
8 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
9 | AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ | ||
10 | AES_EROUND23(KEY_BASE + 6, T0, T1, I1) | ||
11 | |||
12 | #define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
13 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
14 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
15 | AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ | ||
16 | AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ | ||
17 | AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ | ||
18 | AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \ | ||
19 | AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \ | ||
20 | AES_EROUND23(KEY_BASE + 6, T2, T3, I3) | ||
21 | |||
22 | #define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ | ||
23 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
24 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
25 | AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ | ||
26 | AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) | ||
27 | |||
28 | #define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
29 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
30 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
31 | AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ | ||
32 | AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ | ||
33 | AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ | ||
34 | AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \ | ||
35 | AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \ | ||
36 | AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3) | ||
37 | |||
38 | /* 10 rounds */ | ||
39 | #define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \ | ||
40 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
41 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
42 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
43 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
44 | ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) | ||
45 | |||
46 | #define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
47 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
48 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
49 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
50 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
51 | ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) | ||
52 | |||
53 | /* 12 rounds */ | ||
54 | #define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \ | ||
55 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
56 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
57 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
58 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
59 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
60 | ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) | ||
61 | |||
62 | #define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
63 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
64 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
65 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
66 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
67 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
68 | ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) | ||
69 | |||
70 | /* 14 rounds */ | ||
71 | #define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \ | ||
72 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
73 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
74 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
75 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
76 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
77 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ | ||
78 | ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) | ||
79 | |||
80 | #define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ | ||
81 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ | ||
82 | TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) | ||
83 | |||
84 | #define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ | ||
85 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ | ||
86 | ldd [%o0 + 0xd0], %f56; \ | ||
87 | ldd [%o0 + 0xd8], %f58; \ | ||
88 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
89 | ldd [%o0 + 0xe0], %f60; \ | ||
90 | ldd [%o0 + 0xe8], %f62; \ | ||
91 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
92 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
93 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
94 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
95 | AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \ | ||
96 | AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \ | ||
97 | AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \ | ||
98 | AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \ | ||
99 | AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \ | ||
100 | AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \ | ||
101 | ldd [%o0 + 0x10], %f8; \ | ||
102 | ldd [%o0 + 0x18], %f10; \ | ||
103 | AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \ | ||
104 | AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \ | ||
105 | ldd [%o0 + 0x20], %f12; \ | ||
106 | ldd [%o0 + 0x28], %f14; | ||
107 | |||
108 | #define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ | ||
109 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
110 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
111 | AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ | ||
112 | AES_DROUND01(KEY_BASE + 6, T0, T1, I0) | ||
113 | |||
114 | #define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
115 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
116 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
117 | AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ | ||
118 | AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ | ||
119 | AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ | ||
120 | AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \ | ||
121 | AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \ | ||
122 | AES_DROUND01(KEY_BASE + 6, T2, T3, I2) | ||
123 | |||
124 | #define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ | ||
125 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
126 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
127 | AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ | ||
128 | AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) | ||
129 | |||
130 | #define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
131 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
132 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
133 | AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ | ||
134 | AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ | ||
135 | AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ | ||
136 | AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \ | ||
137 | AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \ | ||
138 | AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2) | ||
139 | |||
140 | /* 10 rounds */ | ||
141 | #define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ | ||
142 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
143 | DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
144 | DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
145 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
146 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) | ||
147 | |||
148 | #define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
149 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
150 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
151 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
152 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
153 | DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) | ||
154 | |||
155 | /* 12 rounds */ | ||
156 | #define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ | ||
157 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
158 | DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
159 | DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
160 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
161 | DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
162 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) | ||
163 | |||
164 | #define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
165 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
166 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
167 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
168 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
169 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
170 | DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) | ||
171 | |||
172 | /* 14 rounds */ | ||
173 | #define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ | ||
174 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
175 | DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
176 | DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
177 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
178 | DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
179 | DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ | ||
180 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) | ||
181 | |||
182 | #define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ | ||
183 | DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ | ||
184 | TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) | ||
185 | |||
186 | #define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ | ||
187 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ | ||
188 | ldd [%o0 + 0x18], %f56; \ | ||
189 | ldd [%o0 + 0x10], %f58; \ | ||
190 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
191 | ldd [%o0 + 0x08], %f60; \ | ||
192 | ldd [%o0 + 0x00], %f62; \ | ||
193 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
194 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
195 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
196 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
197 | AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \ | ||
198 | AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \ | ||
199 | AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \ | ||
200 | AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \ | ||
201 | AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \ | ||
202 | AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \ | ||
203 | ldd [%o0 + 0xd8], %f8; \ | ||
204 | ldd [%o0 + 0xd0], %f10; \ | ||
205 | AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \ | ||
206 | AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \ | ||
207 | ldd [%o0 + 0xc8], %f12; \ | ||
208 | ldd [%o0 + 0xc0], %f14; | ||
209 | |||
210 | .align 32 | ||
211 | ENTRY(aes_sparc64_key_expand) | ||
212 | /* %o0=input_key, %o1=output_key, %o2=key_len */ | ||
213 | VISEntry | ||
214 | ld [%o0 + 0x00], %f0 | ||
215 | ld [%o0 + 0x04], %f1 | ||
216 | ld [%o0 + 0x08], %f2 | ||
217 | ld [%o0 + 0x0c], %f3 | ||
218 | |||
219 | std %f0, [%o1 + 0x00] | ||
220 | std %f2, [%o1 + 0x08] | ||
221 | add %o1, 0x10, %o1 | ||
222 | |||
223 | cmp %o2, 24 | ||
224 | bl 2f | ||
225 | nop | ||
226 | |||
227 | be 1f | ||
228 | nop | ||
229 | |||
230 | /* 256-bit key expansion */ | ||
231 | ld [%o0 + 0x10], %f4 | ||
232 | ld [%o0 + 0x14], %f5 | ||
233 | ld [%o0 + 0x18], %f6 | ||
234 | ld [%o0 + 0x1c], %f7 | ||
235 | |||
236 | std %f4, [%o1 + 0x00] | ||
237 | std %f6, [%o1 + 0x08] | ||
238 | add %o1, 0x10, %o1 | ||
239 | |||
240 | AES_KEXPAND1(0, 6, 0x0, 8) | ||
241 | AES_KEXPAND2(2, 8, 10) | ||
242 | AES_KEXPAND0(4, 10, 12) | ||
243 | AES_KEXPAND2(6, 12, 14) | ||
244 | AES_KEXPAND1(8, 14, 0x1, 16) | ||
245 | AES_KEXPAND2(10, 16, 18) | ||
246 | AES_KEXPAND0(12, 18, 20) | ||
247 | AES_KEXPAND2(14, 20, 22) | ||
248 | AES_KEXPAND1(16, 22, 0x2, 24) | ||
249 | AES_KEXPAND2(18, 24, 26) | ||
250 | AES_KEXPAND0(20, 26, 28) | ||
251 | AES_KEXPAND2(22, 28, 30) | ||
252 | AES_KEXPAND1(24, 30, 0x3, 32) | ||
253 | AES_KEXPAND2(26, 32, 34) | ||
254 | AES_KEXPAND0(28, 34, 36) | ||
255 | AES_KEXPAND2(30, 36, 38) | ||
256 | AES_KEXPAND1(32, 38, 0x4, 40) | ||
257 | AES_KEXPAND2(34, 40, 42) | ||
258 | AES_KEXPAND0(36, 42, 44) | ||
259 | AES_KEXPAND2(38, 44, 46) | ||
260 | AES_KEXPAND1(40, 46, 0x5, 48) | ||
261 | AES_KEXPAND2(42, 48, 50) | ||
262 | AES_KEXPAND0(44, 50, 52) | ||
263 | AES_KEXPAND2(46, 52, 54) | ||
264 | AES_KEXPAND1(48, 54, 0x6, 56) | ||
265 | AES_KEXPAND2(50, 56, 58) | ||
266 | |||
267 | std %f8, [%o1 + 0x00] | ||
268 | std %f10, [%o1 + 0x08] | ||
269 | std %f12, [%o1 + 0x10] | ||
270 | std %f14, [%o1 + 0x18] | ||
271 | std %f16, [%o1 + 0x20] | ||
272 | std %f18, [%o1 + 0x28] | ||
273 | std %f20, [%o1 + 0x30] | ||
274 | std %f22, [%o1 + 0x38] | ||
275 | std %f24, [%o1 + 0x40] | ||
276 | std %f26, [%o1 + 0x48] | ||
277 | std %f28, [%o1 + 0x50] | ||
278 | std %f30, [%o1 + 0x58] | ||
279 | std %f32, [%o1 + 0x60] | ||
280 | std %f34, [%o1 + 0x68] | ||
281 | std %f36, [%o1 + 0x70] | ||
282 | std %f38, [%o1 + 0x78] | ||
283 | std %f40, [%o1 + 0x80] | ||
284 | std %f42, [%o1 + 0x88] | ||
285 | std %f44, [%o1 + 0x90] | ||
286 | std %f46, [%o1 + 0x98] | ||
287 | std %f48, [%o1 + 0xa0] | ||
288 | std %f50, [%o1 + 0xa8] | ||
289 | std %f52, [%o1 + 0xb0] | ||
290 | std %f54, [%o1 + 0xb8] | ||
291 | std %f56, [%o1 + 0xc0] | ||
292 | ba,pt %xcc, 80f | ||
293 | std %f58, [%o1 + 0xc8] | ||
294 | |||
295 | 1: | ||
296 | /* 192-bit key expansion */ | ||
297 | ld [%o0 + 0x10], %f4 | ||
298 | ld [%o0 + 0x14], %f5 | ||
299 | |||
300 | std %f4, [%o1 + 0x00] | ||
301 | add %o1, 0x08, %o1 | ||
302 | |||
303 | AES_KEXPAND1(0, 4, 0x0, 6) | ||
304 | AES_KEXPAND2(2, 6, 8) | ||
305 | AES_KEXPAND2(4, 8, 10) | ||
306 | AES_KEXPAND1(6, 10, 0x1, 12) | ||
307 | AES_KEXPAND2(8, 12, 14) | ||
308 | AES_KEXPAND2(10, 14, 16) | ||
309 | AES_KEXPAND1(12, 16, 0x2, 18) | ||
310 | AES_KEXPAND2(14, 18, 20) | ||
311 | AES_KEXPAND2(16, 20, 22) | ||
312 | AES_KEXPAND1(18, 22, 0x3, 24) | ||
313 | AES_KEXPAND2(20, 24, 26) | ||
314 | AES_KEXPAND2(22, 26, 28) | ||
315 | AES_KEXPAND1(24, 28, 0x4, 30) | ||
316 | AES_KEXPAND2(26, 30, 32) | ||
317 | AES_KEXPAND2(28, 32, 34) | ||
318 | AES_KEXPAND1(30, 34, 0x5, 36) | ||
319 | AES_KEXPAND2(32, 36, 38) | ||
320 | AES_KEXPAND2(34, 38, 40) | ||
321 | AES_KEXPAND1(36, 40, 0x6, 42) | ||
322 | AES_KEXPAND2(38, 42, 44) | ||
323 | AES_KEXPAND2(40, 44, 46) | ||
324 | AES_KEXPAND1(42, 46, 0x7, 48) | ||
325 | AES_KEXPAND2(44, 48, 50) | ||
326 | |||
327 | std %f6, [%o1 + 0x00] | ||
328 | std %f8, [%o1 + 0x08] | ||
329 | std %f10, [%o1 + 0x10] | ||
330 | std %f12, [%o1 + 0x18] | ||
331 | std %f14, [%o1 + 0x20] | ||
332 | std %f16, [%o1 + 0x28] | ||
333 | std %f18, [%o1 + 0x30] | ||
334 | std %f20, [%o1 + 0x38] | ||
335 | std %f22, [%o1 + 0x40] | ||
336 | std %f24, [%o1 + 0x48] | ||
337 | std %f26, [%o1 + 0x50] | ||
338 | std %f28, [%o1 + 0x58] | ||
339 | std %f30, [%o1 + 0x60] | ||
340 | std %f32, [%o1 + 0x68] | ||
341 | std %f34, [%o1 + 0x70] | ||
342 | std %f36, [%o1 + 0x78] | ||
343 | std %f38, [%o1 + 0x80] | ||
344 | std %f40, [%o1 + 0x88] | ||
345 | std %f42, [%o1 + 0x90] | ||
346 | std %f44, [%o1 + 0x98] | ||
347 | std %f46, [%o1 + 0xa0] | ||
348 | std %f48, [%o1 + 0xa8] | ||
349 | ba,pt %xcc, 80f | ||
350 | std %f50, [%o1 + 0xb0] | ||
351 | |||
352 | 2: | ||
353 | /* 128-bit key expansion */ | ||
354 | AES_KEXPAND1(0, 2, 0x0, 4) | ||
355 | AES_KEXPAND2(2, 4, 6) | ||
356 | AES_KEXPAND1(4, 6, 0x1, 8) | ||
357 | AES_KEXPAND2(6, 8, 10) | ||
358 | AES_KEXPAND1(8, 10, 0x2, 12) | ||
359 | AES_KEXPAND2(10, 12, 14) | ||
360 | AES_KEXPAND1(12, 14, 0x3, 16) | ||
361 | AES_KEXPAND2(14, 16, 18) | ||
362 | AES_KEXPAND1(16, 18, 0x4, 20) | ||
363 | AES_KEXPAND2(18, 20, 22) | ||
364 | AES_KEXPAND1(20, 22, 0x5, 24) | ||
365 | AES_KEXPAND2(22, 24, 26) | ||
366 | AES_KEXPAND1(24, 26, 0x6, 28) | ||
367 | AES_KEXPAND2(26, 28, 30) | ||
368 | AES_KEXPAND1(28, 30, 0x7, 32) | ||
369 | AES_KEXPAND2(30, 32, 34) | ||
370 | AES_KEXPAND1(32, 34, 0x8, 36) | ||
371 | AES_KEXPAND2(34, 36, 38) | ||
372 | AES_KEXPAND1(36, 38, 0x9, 40) | ||
373 | AES_KEXPAND2(38, 40, 42) | ||
374 | |||
375 | std %f4, [%o1 + 0x00] | ||
376 | std %f6, [%o1 + 0x08] | ||
377 | std %f8, [%o1 + 0x10] | ||
378 | std %f10, [%o1 + 0x18] | ||
379 | std %f12, [%o1 + 0x20] | ||
380 | std %f14, [%o1 + 0x28] | ||
381 | std %f16, [%o1 + 0x30] | ||
382 | std %f18, [%o1 + 0x38] | ||
383 | std %f20, [%o1 + 0x40] | ||
384 | std %f22, [%o1 + 0x48] | ||
385 | std %f24, [%o1 + 0x50] | ||
386 | std %f26, [%o1 + 0x58] | ||
387 | std %f28, [%o1 + 0x60] | ||
388 | std %f30, [%o1 + 0x68] | ||
389 | std %f32, [%o1 + 0x70] | ||
390 | std %f34, [%o1 + 0x78] | ||
391 | std %f36, [%o1 + 0x80] | ||
392 | std %f38, [%o1 + 0x88] | ||
393 | std %f40, [%o1 + 0x90] | ||
394 | std %f42, [%o1 + 0x98] | ||
395 | 80: | ||
396 | retl | ||
397 | VISExit | ||
398 | ENDPROC(aes_sparc64_key_expand) | ||
399 | |||
400 | .align 32 | ||
401 | ENTRY(aes_sparc64_encrypt_128) | ||
402 | /* %o0=key, %o1=input, %o2=output */ | ||
403 | VISEntry | ||
404 | ld [%o1 + 0x00], %f4 | ||
405 | ld [%o1 + 0x04], %f5 | ||
406 | ld [%o1 + 0x08], %f6 | ||
407 | ld [%o1 + 0x0c], %f7 | ||
408 | ldd [%o0 + 0x00], %f8 | ||
409 | ldd [%o0 + 0x08], %f10 | ||
410 | ldd [%o0 + 0x10], %f12 | ||
411 | ldd [%o0 + 0x18], %f14 | ||
412 | ldd [%o0 + 0x20], %f16 | ||
413 | ldd [%o0 + 0x28], %f18 | ||
414 | ldd [%o0 + 0x30], %f20 | ||
415 | ldd [%o0 + 0x38], %f22 | ||
416 | ldd [%o0 + 0x40], %f24 | ||
417 | ldd [%o0 + 0x48], %f26 | ||
418 | ldd [%o0 + 0x50], %f28 | ||
419 | ldd [%o0 + 0x58], %f30 | ||
420 | ldd [%o0 + 0x60], %f32 | ||
421 | ldd [%o0 + 0x68], %f34 | ||
422 | ldd [%o0 + 0x70], %f36 | ||
423 | ldd [%o0 + 0x78], %f38 | ||
424 | ldd [%o0 + 0x80], %f40 | ||
425 | ldd [%o0 + 0x88], %f42 | ||
426 | ldd [%o0 + 0x90], %f44 | ||
427 | ldd [%o0 + 0x98], %f46 | ||
428 | ldd [%o0 + 0xa0], %f48 | ||
429 | ldd [%o0 + 0xa8], %f50 | ||
430 | fxor %f8, %f4, %f4 | ||
431 | fxor %f10, %f6, %f6 | ||
432 | ENCRYPT_128(12, 4, 6, 0, 2) | ||
433 | st %f4, [%o2 + 0x00] | ||
434 | st %f5, [%o2 + 0x04] | ||
435 | st %f6, [%o2 + 0x08] | ||
436 | st %f7, [%o2 + 0x0c] | ||
437 | retl | ||
438 | VISExit | ||
439 | ENDPROC(aes_sparc64_encrypt_128) | ||
440 | |||
441 | .align 32 | ||
442 | ENTRY(aes_sparc64_encrypt_192) | ||
443 | /* %o0=key, %o1=input, %o2=output */ | ||
444 | VISEntry | ||
445 | ld [%o1 + 0x00], %f4 | ||
446 | ld [%o1 + 0x04], %f5 | ||
447 | ld [%o1 + 0x08], %f6 | ||
448 | ld [%o1 + 0x0c], %f7 | ||
449 | |||
450 | ldd [%o0 + 0x00], %f8 | ||
451 | ldd [%o0 + 0x08], %f10 | ||
452 | |||
453 | fxor %f8, %f4, %f4 | ||
454 | fxor %f10, %f6, %f6 | ||
455 | |||
456 | ldd [%o0 + 0x10], %f8 | ||
457 | ldd [%o0 + 0x18], %f10 | ||
458 | ldd [%o0 + 0x20], %f12 | ||
459 | ldd [%o0 + 0x28], %f14 | ||
460 | add %o0, 0x20, %o0 | ||
461 | |||
462 | ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) | ||
463 | |||
464 | ldd [%o0 + 0x10], %f12 | ||
465 | ldd [%o0 + 0x18], %f14 | ||
466 | ldd [%o0 + 0x20], %f16 | ||
467 | ldd [%o0 + 0x28], %f18 | ||
468 | ldd [%o0 + 0x30], %f20 | ||
469 | ldd [%o0 + 0x38], %f22 | ||
470 | ldd [%o0 + 0x40], %f24 | ||
471 | ldd [%o0 + 0x48], %f26 | ||
472 | ldd [%o0 + 0x50], %f28 | ||
473 | ldd [%o0 + 0x58], %f30 | ||
474 | ldd [%o0 + 0x60], %f32 | ||
475 | ldd [%o0 + 0x68], %f34 | ||
476 | ldd [%o0 + 0x70], %f36 | ||
477 | ldd [%o0 + 0x78], %f38 | ||
478 | ldd [%o0 + 0x80], %f40 | ||
479 | ldd [%o0 + 0x88], %f42 | ||
480 | ldd [%o0 + 0x90], %f44 | ||
481 | ldd [%o0 + 0x98], %f46 | ||
482 | ldd [%o0 + 0xa0], %f48 | ||
483 | ldd [%o0 + 0xa8], %f50 | ||
484 | |||
485 | |||
486 | ENCRYPT_128(12, 4, 6, 0, 2) | ||
487 | |||
488 | st %f4, [%o2 + 0x00] | ||
489 | st %f5, [%o2 + 0x04] | ||
490 | st %f6, [%o2 + 0x08] | ||
491 | st %f7, [%o2 + 0x0c] | ||
492 | |||
493 | retl | ||
494 | VISExit | ||
495 | ENDPROC(aes_sparc64_encrypt_192) | ||
496 | |||
497 | .align 32 | ||
498 | ENTRY(aes_sparc64_encrypt_256) | ||
499 | /* %o0=key, %o1=input, %o2=output */ | ||
500 | VISEntry | ||
501 | ld [%o1 + 0x00], %f4 | ||
502 | ld [%o1 + 0x04], %f5 | ||
503 | ld [%o1 + 0x08], %f6 | ||
504 | ld [%o1 + 0x0c], %f7 | ||
505 | |||
506 | ldd [%o0 + 0x00], %f8 | ||
507 | ldd [%o0 + 0x08], %f10 | ||
508 | |||
509 | fxor %f8, %f4, %f4 | ||
510 | fxor %f10, %f6, %f6 | ||
511 | |||
512 | ldd [%o0 + 0x10], %f8 | ||
513 | |||
514 | ldd [%o0 + 0x18], %f10 | ||
515 | ldd [%o0 + 0x20], %f12 | ||
516 | ldd [%o0 + 0x28], %f14 | ||
517 | add %o0, 0x20, %o0 | ||
518 | |||
519 | ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) | ||
520 | |||
521 | ldd [%o0 + 0x10], %f8 | ||
522 | |||
523 | ldd [%o0 + 0x18], %f10 | ||
524 | ldd [%o0 + 0x20], %f12 | ||
525 | ldd [%o0 + 0x28], %f14 | ||
526 | add %o0, 0x20, %o0 | ||
527 | |||
528 | ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) | ||
529 | |||
530 | ldd [%o0 + 0x10], %f12 | ||
531 | ldd [%o0 + 0x18], %f14 | ||
532 | ldd [%o0 + 0x20], %f16 | ||
533 | ldd [%o0 + 0x28], %f18 | ||
534 | ldd [%o0 + 0x30], %f20 | ||
535 | ldd [%o0 + 0x38], %f22 | ||
536 | ldd [%o0 + 0x40], %f24 | ||
537 | ldd [%o0 + 0x48], %f26 | ||
538 | ldd [%o0 + 0x50], %f28 | ||
539 | ldd [%o0 + 0x58], %f30 | ||
540 | ldd [%o0 + 0x60], %f32 | ||
541 | ldd [%o0 + 0x68], %f34 | ||
542 | ldd [%o0 + 0x70], %f36 | ||
543 | ldd [%o0 + 0x78], %f38 | ||
544 | ldd [%o0 + 0x80], %f40 | ||
545 | ldd [%o0 + 0x88], %f42 | ||
546 | ldd [%o0 + 0x90], %f44 | ||
547 | ldd [%o0 + 0x98], %f46 | ||
548 | ldd [%o0 + 0xa0], %f48 | ||
549 | ldd [%o0 + 0xa8], %f50 | ||
550 | |||
551 | ENCRYPT_128(12, 4, 6, 0, 2) | ||
552 | |||
553 | st %f4, [%o2 + 0x00] | ||
554 | st %f5, [%o2 + 0x04] | ||
555 | st %f6, [%o2 + 0x08] | ||
556 | st %f7, [%o2 + 0x0c] | ||
557 | |||
558 | retl | ||
559 | VISExit | ||
560 | ENDPROC(aes_sparc64_encrypt_256) | ||
561 | |||
562 | .align 32 | ||
563 | ENTRY(aes_sparc64_decrypt_128) | ||
564 | /* %o0=key, %o1=input, %o2=output */ | ||
565 | VISEntry | ||
566 | ld [%o1 + 0x00], %f4 | ||
567 | ld [%o1 + 0x04], %f5 | ||
568 | ld [%o1 + 0x08], %f6 | ||
569 | ld [%o1 + 0x0c], %f7 | ||
570 | ldd [%o0 + 0xa0], %f8 | ||
571 | ldd [%o0 + 0xa8], %f10 | ||
572 | ldd [%o0 + 0x98], %f12 | ||
573 | ldd [%o0 + 0x90], %f14 | ||
574 | ldd [%o0 + 0x88], %f16 | ||
575 | ldd [%o0 + 0x80], %f18 | ||
576 | ldd [%o0 + 0x78], %f20 | ||
577 | ldd [%o0 + 0x70], %f22 | ||
578 | ldd [%o0 + 0x68], %f24 | ||
579 | ldd [%o0 + 0x60], %f26 | ||
580 | ldd [%o0 + 0x58], %f28 | ||
581 | ldd [%o0 + 0x50], %f30 | ||
582 | ldd [%o0 + 0x48], %f32 | ||
583 | ldd [%o0 + 0x40], %f34 | ||
584 | ldd [%o0 + 0x38], %f36 | ||
585 | ldd [%o0 + 0x30], %f38 | ||
586 | ldd [%o0 + 0x28], %f40 | ||
587 | ldd [%o0 + 0x20], %f42 | ||
588 | ldd [%o0 + 0x18], %f44 | ||
589 | ldd [%o0 + 0x10], %f46 | ||
590 | ldd [%o0 + 0x08], %f48 | ||
591 | ldd [%o0 + 0x00], %f50 | ||
592 | fxor %f8, %f4, %f4 | ||
593 | fxor %f10, %f6, %f6 | ||
594 | DECRYPT_128(12, 4, 6, 0, 2) | ||
595 | st %f4, [%o2 + 0x00] | ||
596 | st %f5, [%o2 + 0x04] | ||
597 | st %f6, [%o2 + 0x08] | ||
598 | st %f7, [%o2 + 0x0c] | ||
599 | retl | ||
600 | VISExit | ||
601 | ENDPROC(aes_sparc64_decrypt_128) | ||
602 | |||
603 | .align 32 | ||
604 | ENTRY(aes_sparc64_decrypt_192) | ||
605 | /* %o0=key, %o1=input, %o2=output */ | ||
606 | VISEntry | ||
607 | ld [%o1 + 0x00], %f4 | ||
608 | ld [%o1 + 0x04], %f5 | ||
609 | ld [%o1 + 0x08], %f6 | ||
610 | ld [%o1 + 0x0c], %f7 | ||
611 | ldd [%o0 + 0xc0], %f8 | ||
612 | ldd [%o0 + 0xc8], %f10 | ||
613 | ldd [%o0 + 0xb8], %f12 | ||
614 | ldd [%o0 + 0xb0], %f14 | ||
615 | ldd [%o0 + 0xa8], %f16 | ||
616 | ldd [%o0 + 0xa0], %f18 | ||
617 | fxor %f8, %f4, %f4 | ||
618 | fxor %f10, %f6, %f6 | ||
619 | ldd [%o0 + 0x98], %f20 | ||
620 | ldd [%o0 + 0x90], %f22 | ||
621 | ldd [%o0 + 0x88], %f24 | ||
622 | ldd [%o0 + 0x80], %f26 | ||
623 | DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2) | ||
624 | ldd [%o0 + 0x78], %f28 | ||
625 | ldd [%o0 + 0x70], %f30 | ||
626 | ldd [%o0 + 0x68], %f32 | ||
627 | ldd [%o0 + 0x60], %f34 | ||
628 | ldd [%o0 + 0x58], %f36 | ||
629 | ldd [%o0 + 0x50], %f38 | ||
630 | ldd [%o0 + 0x48], %f40 | ||
631 | ldd [%o0 + 0x40], %f42 | ||
632 | ldd [%o0 + 0x38], %f44 | ||
633 | ldd [%o0 + 0x30], %f46 | ||
634 | ldd [%o0 + 0x28], %f48 | ||
635 | ldd [%o0 + 0x20], %f50 | ||
636 | ldd [%o0 + 0x18], %f52 | ||
637 | ldd [%o0 + 0x10], %f54 | ||
638 | ldd [%o0 + 0x08], %f56 | ||
639 | ldd [%o0 + 0x00], %f58 | ||
640 | DECRYPT_128(20, 4, 6, 0, 2) | ||
641 | st %f4, [%o2 + 0x00] | ||
642 | st %f5, [%o2 + 0x04] | ||
643 | st %f6, [%o2 + 0x08] | ||
644 | st %f7, [%o2 + 0x0c] | ||
645 | retl | ||
646 | VISExit | ||
647 | ENDPROC(aes_sparc64_decrypt_192) | ||
648 | |||
649 | .align 32 | ||
650 | ENTRY(aes_sparc64_decrypt_256) | ||
651 | /* %o0=key, %o1=input, %o2=output */ | ||
652 | VISEntry | ||
653 | ld [%o1 + 0x00], %f4 | ||
654 | ld [%o1 + 0x04], %f5 | ||
655 | ld [%o1 + 0x08], %f6 | ||
656 | ld [%o1 + 0x0c], %f7 | ||
657 | ldd [%o0 + 0xe0], %f8 | ||
658 | ldd [%o0 + 0xe8], %f10 | ||
659 | ldd [%o0 + 0xd8], %f12 | ||
660 | ldd [%o0 + 0xd0], %f14 | ||
661 | ldd [%o0 + 0xc8], %f16 | ||
662 | fxor %f8, %f4, %f4 | ||
663 | ldd [%o0 + 0xc0], %f18 | ||
664 | fxor %f10, %f6, %f6 | ||
665 | ldd [%o0 + 0xb8], %f20 | ||
666 | AES_DROUND23(12, 4, 6, 2) | ||
667 | ldd [%o0 + 0xb0], %f22 | ||
668 | AES_DROUND01(14, 4, 6, 0) | ||
669 | ldd [%o0 + 0xa8], %f24 | ||
670 | AES_DROUND23(16, 0, 2, 6) | ||
671 | ldd [%o0 + 0xa0], %f26 | ||
672 | AES_DROUND01(18, 0, 2, 4) | ||
673 | ldd [%o0 + 0x98], %f12 | ||
674 | AES_DROUND23(20, 4, 6, 2) | ||
675 | ldd [%o0 + 0x90], %f14 | ||
676 | AES_DROUND01(22, 4, 6, 0) | ||
677 | ldd [%o0 + 0x88], %f16 | ||
678 | AES_DROUND23(24, 0, 2, 6) | ||
679 | ldd [%o0 + 0x80], %f18 | ||
680 | AES_DROUND01(26, 0, 2, 4) | ||
681 | ldd [%o0 + 0x78], %f20 | ||
682 | AES_DROUND23(12, 4, 6, 2) | ||
683 | ldd [%o0 + 0x70], %f22 | ||
684 | AES_DROUND01(14, 4, 6, 0) | ||
685 | ldd [%o0 + 0x68], %f24 | ||
686 | AES_DROUND23(16, 0, 2, 6) | ||
687 | ldd [%o0 + 0x60], %f26 | ||
688 | AES_DROUND01(18, 0, 2, 4) | ||
689 | ldd [%o0 + 0x58], %f28 | ||
690 | AES_DROUND23(20, 4, 6, 2) | ||
691 | ldd [%o0 + 0x50], %f30 | ||
692 | AES_DROUND01(22, 4, 6, 0) | ||
693 | ldd [%o0 + 0x48], %f32 | ||
694 | AES_DROUND23(24, 0, 2, 6) | ||
695 | ldd [%o0 + 0x40], %f34 | ||
696 | AES_DROUND01(26, 0, 2, 4) | ||
697 | ldd [%o0 + 0x38], %f36 | ||
698 | AES_DROUND23(28, 4, 6, 2) | ||
699 | ldd [%o0 + 0x30], %f38 | ||
700 | AES_DROUND01(30, 4, 6, 0) | ||
701 | ldd [%o0 + 0x28], %f40 | ||
702 | AES_DROUND23(32, 0, 2, 6) | ||
703 | ldd [%o0 + 0x20], %f42 | ||
704 | AES_DROUND01(34, 0, 2, 4) | ||
705 | ldd [%o0 + 0x18], %f44 | ||
706 | AES_DROUND23(36, 4, 6, 2) | ||
707 | ldd [%o0 + 0x10], %f46 | ||
708 | AES_DROUND01(38, 4, 6, 0) | ||
709 | ldd [%o0 + 0x08], %f48 | ||
710 | AES_DROUND23(40, 0, 2, 6) | ||
711 | ldd [%o0 + 0x00], %f50 | ||
712 | AES_DROUND01(42, 0, 2, 4) | ||
713 | AES_DROUND23(44, 4, 6, 2) | ||
714 | AES_DROUND01(46, 4, 6, 0) | ||
715 | AES_DROUND23_L(48, 0, 2, 6) | ||
716 | AES_DROUND01_L(50, 0, 2, 4) | ||
717 | st %f4, [%o2 + 0x00] | ||
718 | st %f5, [%o2 + 0x04] | ||
719 | st %f6, [%o2 + 0x08] | ||
720 | st %f7, [%o2 + 0x0c] | ||
721 | retl | ||
722 | VISExit | ||
723 | ENDPROC(aes_sparc64_decrypt_256) | ||
724 | |||
725 | .align 32 | ||
726 | ENTRY(aes_sparc64_load_encrypt_keys_128) | ||
727 | /* %o0=key */ | ||
728 | VISEntry | ||
729 | ldd [%o0 + 0x10], %f8 | ||
730 | ldd [%o0 + 0x18], %f10 | ||
731 | ldd [%o0 + 0x20], %f12 | ||
732 | ldd [%o0 + 0x28], %f14 | ||
733 | ldd [%o0 + 0x30], %f16 | ||
734 | ldd [%o0 + 0x38], %f18 | ||
735 | ldd [%o0 + 0x40], %f20 | ||
736 | ldd [%o0 + 0x48], %f22 | ||
737 | ldd [%o0 + 0x50], %f24 | ||
738 | ldd [%o0 + 0x58], %f26 | ||
739 | ldd [%o0 + 0x60], %f28 | ||
740 | ldd [%o0 + 0x68], %f30 | ||
741 | ldd [%o0 + 0x70], %f32 | ||
742 | ldd [%o0 + 0x78], %f34 | ||
743 | ldd [%o0 + 0x80], %f36 | ||
744 | ldd [%o0 + 0x88], %f38 | ||
745 | ldd [%o0 + 0x90], %f40 | ||
746 | ldd [%o0 + 0x98], %f42 | ||
747 | ldd [%o0 + 0xa0], %f44 | ||
748 | retl | ||
749 | ldd [%o0 + 0xa8], %f46 | ||
750 | ENDPROC(aes_sparc64_load_encrypt_keys_128) | ||
751 | |||
752 | .align 32 | ||
753 | ENTRY(aes_sparc64_load_encrypt_keys_192) | ||
754 | /* %o0=key */ | ||
755 | VISEntry | ||
756 | ldd [%o0 + 0x10], %f8 | ||
757 | ldd [%o0 + 0x18], %f10 | ||
758 | ldd [%o0 + 0x20], %f12 | ||
759 | ldd [%o0 + 0x28], %f14 | ||
760 | ldd [%o0 + 0x30], %f16 | ||
761 | ldd [%o0 + 0x38], %f18 | ||
762 | ldd [%o0 + 0x40], %f20 | ||
763 | ldd [%o0 + 0x48], %f22 | ||
764 | ldd [%o0 + 0x50], %f24 | ||
765 | ldd [%o0 + 0x58], %f26 | ||
766 | ldd [%o0 + 0x60], %f28 | ||
767 | ldd [%o0 + 0x68], %f30 | ||
768 | ldd [%o0 + 0x70], %f32 | ||
769 | ldd [%o0 + 0x78], %f34 | ||
770 | ldd [%o0 + 0x80], %f36 | ||
771 | ldd [%o0 + 0x88], %f38 | ||
772 | ldd [%o0 + 0x90], %f40 | ||
773 | ldd [%o0 + 0x98], %f42 | ||
774 | ldd [%o0 + 0xa0], %f44 | ||
775 | ldd [%o0 + 0xa8], %f46 | ||
776 | ldd [%o0 + 0xb0], %f48 | ||
777 | ldd [%o0 + 0xb8], %f50 | ||
778 | ldd [%o0 + 0xc0], %f52 | ||
779 | retl | ||
780 | ldd [%o0 + 0xc8], %f54 | ||
781 | ENDPROC(aes_sparc64_load_encrypt_keys_192) | ||
782 | |||
783 | .align 32 | ||
784 | ENTRY(aes_sparc64_load_encrypt_keys_256) | ||
785 | /* %o0=key */ | ||
786 | VISEntry | ||
787 | ldd [%o0 + 0x10], %f8 | ||
788 | ldd [%o0 + 0x18], %f10 | ||
789 | ldd [%o0 + 0x20], %f12 | ||
790 | ldd [%o0 + 0x28], %f14 | ||
791 | ldd [%o0 + 0x30], %f16 | ||
792 | ldd [%o0 + 0x38], %f18 | ||
793 | ldd [%o0 + 0x40], %f20 | ||
794 | ldd [%o0 + 0x48], %f22 | ||
795 | ldd [%o0 + 0x50], %f24 | ||
796 | ldd [%o0 + 0x58], %f26 | ||
797 | ldd [%o0 + 0x60], %f28 | ||
798 | ldd [%o0 + 0x68], %f30 | ||
799 | ldd [%o0 + 0x70], %f32 | ||
800 | ldd [%o0 + 0x78], %f34 | ||
801 | ldd [%o0 + 0x80], %f36 | ||
802 | ldd [%o0 + 0x88], %f38 | ||
803 | ldd [%o0 + 0x90], %f40 | ||
804 | ldd [%o0 + 0x98], %f42 | ||
805 | ldd [%o0 + 0xa0], %f44 | ||
806 | ldd [%o0 + 0xa8], %f46 | ||
807 | ldd [%o0 + 0xb0], %f48 | ||
808 | ldd [%o0 + 0xb8], %f50 | ||
809 | ldd [%o0 + 0xc0], %f52 | ||
810 | ldd [%o0 + 0xc8], %f54 | ||
811 | ldd [%o0 + 0xd0], %f56 | ||
812 | ldd [%o0 + 0xd8], %f58 | ||
813 | ldd [%o0 + 0xe0], %f60 | ||
814 | retl | ||
815 | ldd [%o0 + 0xe8], %f62 | ||
816 | ENDPROC(aes_sparc64_load_encrypt_keys_256) | ||
817 | |||
818 | .align 32 | ||
819 | ENTRY(aes_sparc64_load_decrypt_keys_128) | ||
820 | /* %o0=key */ | ||
821 | VISEntry | ||
822 | ldd [%o0 + 0x98], %f8 | ||
823 | ldd [%o0 + 0x90], %f10 | ||
824 | ldd [%o0 + 0x88], %f12 | ||
825 | ldd [%o0 + 0x80], %f14 | ||
826 | ldd [%o0 + 0x78], %f16 | ||
827 | ldd [%o0 + 0x70], %f18 | ||
828 | ldd [%o0 + 0x68], %f20 | ||
829 | ldd [%o0 + 0x60], %f22 | ||
830 | ldd [%o0 + 0x58], %f24 | ||
831 | ldd [%o0 + 0x50], %f26 | ||
832 | ldd [%o0 + 0x48], %f28 | ||
833 | ldd [%o0 + 0x40], %f30 | ||
834 | ldd [%o0 + 0x38], %f32 | ||
835 | ldd [%o0 + 0x30], %f34 | ||
836 | ldd [%o0 + 0x28], %f36 | ||
837 | ldd [%o0 + 0x20], %f38 | ||
838 | ldd [%o0 + 0x18], %f40 | ||
839 | ldd [%o0 + 0x10], %f42 | ||
840 | ldd [%o0 + 0x08], %f44 | ||
841 | retl | ||
842 | ldd [%o0 + 0x00], %f46 | ||
843 | ENDPROC(aes_sparc64_load_decrypt_keys_128) | ||
844 | |||
845 | .align 32 | ||
846 | ENTRY(aes_sparc64_load_decrypt_keys_192) | ||
847 | /* %o0=key */ | ||
848 | VISEntry | ||
849 | ldd [%o0 + 0xb8], %f8 | ||
850 | ldd [%o0 + 0xb0], %f10 | ||
851 | ldd [%o0 + 0xa8], %f12 | ||
852 | ldd [%o0 + 0xa0], %f14 | ||
853 | ldd [%o0 + 0x98], %f16 | ||
854 | ldd [%o0 + 0x90], %f18 | ||
855 | ldd [%o0 + 0x88], %f20 | ||
856 | ldd [%o0 + 0x80], %f22 | ||
857 | ldd [%o0 + 0x78], %f24 | ||
858 | ldd [%o0 + 0x70], %f26 | ||
859 | ldd [%o0 + 0x68], %f28 | ||
860 | ldd [%o0 + 0x60], %f30 | ||
861 | ldd [%o0 + 0x58], %f32 | ||
862 | ldd [%o0 + 0x50], %f34 | ||
863 | ldd [%o0 + 0x48], %f36 | ||
864 | ldd [%o0 + 0x40], %f38 | ||
865 | ldd [%o0 + 0x38], %f40 | ||
866 | ldd [%o0 + 0x30], %f42 | ||
867 | ldd [%o0 + 0x28], %f44 | ||
868 | ldd [%o0 + 0x20], %f46 | ||
869 | ldd [%o0 + 0x18], %f48 | ||
870 | ldd [%o0 + 0x10], %f50 | ||
871 | ldd [%o0 + 0x08], %f52 | ||
872 | retl | ||
873 | ldd [%o0 + 0x00], %f54 | ||
874 | ENDPROC(aes_sparc64_load_decrypt_keys_192) | ||
875 | |||
876 | .align 32 | ||
877 | ENTRY(aes_sparc64_load_decrypt_keys_256) | ||
878 | /* %o0=key */ | ||
879 | VISEntry | ||
880 | ldd [%o0 + 0xd8], %f8 | ||
881 | ldd [%o0 + 0xd0], %f10 | ||
882 | ldd [%o0 + 0xc8], %f12 | ||
883 | ldd [%o0 + 0xc0], %f14 | ||
884 | ldd [%o0 + 0xb8], %f16 | ||
885 | ldd [%o0 + 0xb0], %f18 | ||
886 | ldd [%o0 + 0xa8], %f20 | ||
887 | ldd [%o0 + 0xa0], %f22 | ||
888 | ldd [%o0 + 0x98], %f24 | ||
889 | ldd [%o0 + 0x90], %f26 | ||
890 | ldd [%o0 + 0x88], %f28 | ||
891 | ldd [%o0 + 0x80], %f30 | ||
892 | ldd [%o0 + 0x78], %f32 | ||
893 | ldd [%o0 + 0x70], %f34 | ||
894 | ldd [%o0 + 0x68], %f36 | ||
895 | ldd [%o0 + 0x60], %f38 | ||
896 | ldd [%o0 + 0x58], %f40 | ||
897 | ldd [%o0 + 0x50], %f42 | ||
898 | ldd [%o0 + 0x48], %f44 | ||
899 | ldd [%o0 + 0x40], %f46 | ||
900 | ldd [%o0 + 0x38], %f48 | ||
901 | ldd [%o0 + 0x30], %f50 | ||
902 | ldd [%o0 + 0x28], %f52 | ||
903 | ldd [%o0 + 0x20], %f54 | ||
904 | ldd [%o0 + 0x18], %f56 | ||
905 | ldd [%o0 + 0x10], %f58 | ||
906 | ldd [%o0 + 0x08], %f60 | ||
907 | retl | ||
908 | ldd [%o0 + 0x00], %f62 | ||
909 | ENDPROC(aes_sparc64_load_decrypt_keys_256) | ||
910 | |||
911 | .align 32 | ||
912 | ENTRY(aes_sparc64_ecb_encrypt_128) | ||
913 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
914 | ldx [%o0 + 0x00], %g1 | ||
915 | subcc %o3, 0x10, %o3 | ||
916 | be 10f | ||
917 | ldx [%o0 + 0x08], %g2 | ||
918 | 1: ldx [%o1 + 0x00], %g3 | ||
919 | ldx [%o1 + 0x08], %g7 | ||
920 | ldx [%o1 + 0x10], %o4 | ||
921 | ldx [%o1 + 0x18], %o5 | ||
922 | xor %g1, %g3, %g3 | ||
923 | xor %g2, %g7, %g7 | ||
924 | MOVXTOD_G3_F4 | ||
925 | MOVXTOD_G7_F6 | ||
926 | xor %g1, %o4, %g3 | ||
927 | xor %g2, %o5, %g7 | ||
928 | MOVXTOD_G3_F60 | ||
929 | MOVXTOD_G7_F62 | ||
930 | ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
931 | std %f4, [%o2 + 0x00] | ||
932 | std %f6, [%o2 + 0x08] | ||
933 | std %f60, [%o2 + 0x10] | ||
934 | std %f62, [%o2 + 0x18] | ||
935 | sub %o3, 0x20, %o3 | ||
936 | add %o1, 0x20, %o1 | ||
937 | brgz %o3, 1b | ||
938 | add %o2, 0x20, %o2 | ||
939 | brlz,pt %o3, 11f | ||
940 | nop | ||
941 | 10: ldx [%o1 + 0x00], %g3 | ||
942 | ldx [%o1 + 0x08], %g7 | ||
943 | xor %g1, %g3, %g3 | ||
944 | xor %g2, %g7, %g7 | ||
945 | MOVXTOD_G3_F4 | ||
946 | MOVXTOD_G7_F6 | ||
947 | ENCRYPT_128(8, 4, 6, 0, 2) | ||
948 | std %f4, [%o2 + 0x00] | ||
949 | std %f6, [%o2 + 0x08] | ||
950 | 11: retl | ||
951 | nop | ||
952 | ENDPROC(aes_sparc64_ecb_encrypt_128) | ||
953 | |||
954 | .align 32 | ||
955 | ENTRY(aes_sparc64_ecb_encrypt_192) | ||
956 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
957 | ldx [%o0 + 0x00], %g1 | ||
958 | subcc %o3, 0x10, %o3 | ||
959 | be 10f | ||
960 | ldx [%o0 + 0x08], %g2 | ||
961 | 1: ldx [%o1 + 0x00], %g3 | ||
962 | ldx [%o1 + 0x08], %g7 | ||
963 | ldx [%o1 + 0x10], %o4 | ||
964 | ldx [%o1 + 0x18], %o5 | ||
965 | xor %g1, %g3, %g3 | ||
966 | xor %g2, %g7, %g7 | ||
967 | MOVXTOD_G3_F4 | ||
968 | MOVXTOD_G7_F6 | ||
969 | xor %g1, %o4, %g3 | ||
970 | xor %g2, %o5, %g7 | ||
971 | MOVXTOD_G3_F60 | ||
972 | MOVXTOD_G7_F62 | ||
973 | ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
974 | std %f4, [%o2 + 0x00] | ||
975 | std %f6, [%o2 + 0x08] | ||
976 | std %f60, [%o2 + 0x10] | ||
977 | std %f62, [%o2 + 0x18] | ||
978 | sub %o3, 0x20, %o3 | ||
979 | add %o1, 0x20, %o1 | ||
980 | brgz %o3, 1b | ||
981 | add %o2, 0x20, %o2 | ||
982 | brlz,pt %o3, 11f | ||
983 | nop | ||
984 | 10: ldx [%o1 + 0x00], %g3 | ||
985 | ldx [%o1 + 0x08], %g7 | ||
986 | xor %g1, %g3, %g3 | ||
987 | xor %g2, %g7, %g7 | ||
988 | MOVXTOD_G3_F4 | ||
989 | MOVXTOD_G7_F6 | ||
990 | ENCRYPT_192(8, 4, 6, 0, 2) | ||
991 | std %f4, [%o2 + 0x00] | ||
992 | std %f6, [%o2 + 0x08] | ||
993 | 11: retl | ||
994 | nop | ||
995 | ENDPROC(aes_sparc64_ecb_encrypt_192) | ||
996 | |||
997 | .align 32 | ||
998 | ENTRY(aes_sparc64_ecb_encrypt_256) | ||
999 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
1000 | ldx [%o0 + 0x00], %g1 | ||
1001 | subcc %o3, 0x10, %o3 | ||
1002 | be 10f | ||
1003 | ldx [%o0 + 0x08], %g2 | ||
1004 | 1: ldx [%o1 + 0x00], %g3 | ||
1005 | ldx [%o1 + 0x08], %g7 | ||
1006 | ldx [%o1 + 0x10], %o4 | ||
1007 | ldx [%o1 + 0x18], %o5 | ||
1008 | xor %g1, %g3, %g3 | ||
1009 | xor %g2, %g7, %g7 | ||
1010 | MOVXTOD_G3_F4 | ||
1011 | MOVXTOD_G7_F6 | ||
1012 | xor %g1, %o4, %g3 | ||
1013 | xor %g2, %o5, %g7 | ||
1014 | MOVXTOD_G3_F0 | ||
1015 | MOVXTOD_G7_F2 | ||
1016 | ENCRYPT_256_2(8, 4, 6, 0, 2) | ||
1017 | std %f4, [%o2 + 0x00] | ||
1018 | std %f6, [%o2 + 0x08] | ||
1019 | std %f0, [%o2 + 0x10] | ||
1020 | std %f2, [%o2 + 0x18] | ||
1021 | sub %o3, 0x20, %o3 | ||
1022 | add %o1, 0x20, %o1 | ||
1023 | brgz %o3, 1b | ||
1024 | add %o2, 0x20, %o2 | ||
1025 | brlz,pt %o3, 11f | ||
1026 | nop | ||
1027 | 10: ldx [%o1 + 0x00], %g3 | ||
1028 | ldx [%o1 + 0x08], %g7 | ||
1029 | xor %g1, %g3, %g3 | ||
1030 | xor %g2, %g7, %g7 | ||
1031 | MOVXTOD_G3_F4 | ||
1032 | MOVXTOD_G7_F6 | ||
1033 | ENCRYPT_256(8, 4, 6, 0, 2) | ||
1034 | std %f4, [%o2 + 0x00] | ||
1035 | std %f6, [%o2 + 0x08] | ||
1036 | 11: retl | ||
1037 | nop | ||
1038 | ENDPROC(aes_sparc64_ecb_encrypt_256) | ||
1039 | |||
1040 | .align 32 | ||
1041 | ENTRY(aes_sparc64_ecb_decrypt_128) | ||
1042 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | ||
1043 | ldx [%o0 - 0x10], %g1 | ||
1044 | subcc %o3, 0x10, %o3 | ||
1045 | be 10f | ||
1046 | ldx [%o0 - 0x08], %g2 | ||
1047 | 1: ldx [%o1 + 0x00], %g3 | ||
1048 | ldx [%o1 + 0x08], %g7 | ||
1049 | ldx [%o1 + 0x10], %o4 | ||
1050 | ldx [%o1 + 0x18], %o5 | ||
1051 | xor %g1, %g3, %g3 | ||
1052 | xor %g2, %g7, %g7 | ||
1053 | MOVXTOD_G3_F4 | ||
1054 | MOVXTOD_G7_F6 | ||
1055 | xor %g1, %o4, %g3 | ||
1056 | xor %g2, %o5, %g7 | ||
1057 | MOVXTOD_G3_F60 | ||
1058 | MOVXTOD_G7_F62 | ||
1059 | DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
1060 | std %f4, [%o2 + 0x00] | ||
1061 | std %f6, [%o2 + 0x08] | ||
1062 | std %f60, [%o2 + 0x10] | ||
1063 | std %f62, [%o2 + 0x18] | ||
1064 | sub %o3, 0x20, %o3 | ||
1065 | add %o1, 0x20, %o1 | ||
1066 | brgz,pt %o3, 1b | ||
1067 | add %o2, 0x20, %o2 | ||
1068 | brlz,pt %o3, 11f | ||
1069 | nop | ||
1070 | 10: ldx [%o1 + 0x00], %g3 | ||
1071 | ldx [%o1 + 0x08], %g7 | ||
1072 | xor %g1, %g3, %g3 | ||
1073 | xor %g2, %g7, %g7 | ||
1074 | MOVXTOD_G3_F4 | ||
1075 | MOVXTOD_G7_F6 | ||
1076 | DECRYPT_128(8, 4, 6, 0, 2) | ||
1077 | std %f4, [%o2 + 0x00] | ||
1078 | std %f6, [%o2 + 0x08] | ||
1079 | 11: retl | ||
1080 | nop | ||
1081 | ENDPROC(aes_sparc64_ecb_decrypt_128) | ||
1082 | |||
1083 | .align 32 | ||
1084 | ENTRY(aes_sparc64_ecb_decrypt_192) | ||
1085 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | ||
1086 | ldx [%o0 - 0x10], %g1 | ||
1087 | subcc %o3, 0x10, %o3 | ||
1088 | be 10f | ||
1089 | ldx [%o0 - 0x08], %g2 | ||
1090 | 1: ldx [%o1 + 0x00], %g3 | ||
1091 | ldx [%o1 + 0x08], %g7 | ||
1092 | ldx [%o1 + 0x10], %o4 | ||
1093 | ldx [%o1 + 0x18], %o5 | ||
1094 | xor %g1, %g3, %g3 | ||
1095 | xor %g2, %g7, %g7 | ||
1096 | MOVXTOD_G3_F4 | ||
1097 | MOVXTOD_G7_F6 | ||
1098 | xor %g1, %o4, %g3 | ||
1099 | xor %g2, %o5, %g7 | ||
1100 | MOVXTOD_G3_F60 | ||
1101 | MOVXTOD_G7_F62 | ||
1102 | DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
1103 | std %f4, [%o2 + 0x00] | ||
1104 | std %f6, [%o2 + 0x08] | ||
1105 | std %f60, [%o2 + 0x10] | ||
1106 | std %f62, [%o2 + 0x18] | ||
1107 | sub %o3, 0x20, %o3 | ||
1108 | add %o1, 0x20, %o1 | ||
1109 | brgz,pt %o3, 1b | ||
1110 | add %o2, 0x20, %o2 | ||
1111 | brlz,pt %o3, 11f | ||
1112 | nop | ||
1113 | 10: ldx [%o1 + 0x00], %g3 | ||
1114 | ldx [%o1 + 0x08], %g7 | ||
1115 | xor %g1, %g3, %g3 | ||
1116 | xor %g2, %g7, %g7 | ||
1117 | MOVXTOD_G3_F4 | ||
1118 | MOVXTOD_G7_F6 | ||
1119 | DECRYPT_192(8, 4, 6, 0, 2) | ||
1120 | std %f4, [%o2 + 0x00] | ||
1121 | std %f6, [%o2 + 0x08] | ||
1122 | 11: retl | ||
1123 | nop | ||
1124 | ENDPROC(aes_sparc64_ecb_decrypt_192) | ||
1125 | |||
1126 | .align 32 | ||
1127 | ENTRY(aes_sparc64_ecb_decrypt_256) | ||
1128 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | ||
1129 | ldx [%o0 - 0x10], %g1 | ||
1130 | subcc %o3, 0x10, %o3 | ||
1131 | be 10f | ||
1132 | ldx [%o0 - 0x08], %g2 | ||
1133 | sub %o0, 0xf0, %o0 | ||
1134 | 1: ldx [%o1 + 0x00], %g3 | ||
1135 | ldx [%o1 + 0x08], %g7 | ||
1136 | ldx [%o1 + 0x10], %o4 | ||
1137 | ldx [%o1 + 0x18], %o5 | ||
1138 | xor %g1, %g3, %g3 | ||
1139 | xor %g2, %g7, %g7 | ||
1140 | MOVXTOD_G3_F4 | ||
1141 | MOVXTOD_G7_F6 | ||
1142 | xor %g1, %o4, %g3 | ||
1143 | xor %g2, %o5, %g7 | ||
1144 | MOVXTOD_G3_F0 | ||
1145 | MOVXTOD_G7_F2 | ||
1146 | DECRYPT_256_2(8, 4, 6, 0, 2) | ||
1147 | std %f4, [%o2 + 0x00] | ||
1148 | std %f6, [%o2 + 0x08] | ||
1149 | std %f0, [%o2 + 0x10] | ||
1150 | std %f2, [%o2 + 0x18] | ||
1151 | sub %o3, 0x20, %o3 | ||
1152 | add %o1, 0x20, %o1 | ||
1153 | brgz,pt %o3, 1b | ||
1154 | add %o2, 0x20, %o2 | ||
1155 | brlz,pt %o3, 11f | ||
1156 | nop | ||
1157 | 10: ldx [%o1 + 0x00], %g3 | ||
1158 | ldx [%o1 + 0x08], %g7 | ||
1159 | xor %g1, %g3, %g3 | ||
1160 | xor %g2, %g7, %g7 | ||
1161 | MOVXTOD_G3_F4 | ||
1162 | MOVXTOD_G7_F6 | ||
1163 | DECRYPT_256(8, 4, 6, 0, 2) | ||
1164 | std %f4, [%o2 + 0x00] | ||
1165 | std %f6, [%o2 + 0x08] | ||
1166 | 11: retl | ||
1167 | nop | ||
1168 | ENDPROC(aes_sparc64_ecb_decrypt_256) | ||
1169 | |||
1170 | .align 32 | ||
1171 | ENTRY(aes_sparc64_cbc_encrypt_128) | ||
1172 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1173 | ldd [%o4 + 0x00], %f4 | ||
1174 | ldd [%o4 + 0x08], %f6 | ||
1175 | ldx [%o0 + 0x00], %g1 | ||
1176 | ldx [%o0 + 0x08], %g2 | ||
1177 | 1: ldx [%o1 + 0x00], %g3 | ||
1178 | ldx [%o1 + 0x08], %g7 | ||
1179 | add %o1, 0x10, %o1 | ||
1180 | xor %g1, %g3, %g3 | ||
1181 | xor %g2, %g7, %g7 | ||
1182 | MOVXTOD_G3_F0 | ||
1183 | MOVXTOD_G7_F2 | ||
1184 | fxor %f4, %f0, %f4 | ||
1185 | fxor %f6, %f2, %f6 | ||
1186 | ENCRYPT_128(8, 4, 6, 0, 2) | ||
1187 | std %f4, [%o2 + 0x00] | ||
1188 | std %f6, [%o2 + 0x08] | ||
1189 | subcc %o3, 0x10, %o3 | ||
1190 | bne,pt %xcc, 1b | ||
1191 | add %o2, 0x10, %o2 | ||
1192 | std %f4, [%o4 + 0x00] | ||
1193 | std %f6, [%o4 + 0x08] | ||
1194 | retl | ||
1195 | nop | ||
1196 | ENDPROC(aes_sparc64_cbc_encrypt_128) | ||
1197 | |||
1198 | .align 32 | ||
1199 | ENTRY(aes_sparc64_cbc_encrypt_192) | ||
1200 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1201 | ldd [%o4 + 0x00], %f4 | ||
1202 | ldd [%o4 + 0x08], %f6 | ||
1203 | ldx [%o0 + 0x00], %g1 | ||
1204 | ldx [%o0 + 0x08], %g2 | ||
1205 | 1: ldx [%o1 + 0x00], %g3 | ||
1206 | ldx [%o1 + 0x08], %g7 | ||
1207 | add %o1, 0x10, %o1 | ||
1208 | xor %g1, %g3, %g3 | ||
1209 | xor %g2, %g7, %g7 | ||
1210 | MOVXTOD_G3_F0 | ||
1211 | MOVXTOD_G7_F2 | ||
1212 | fxor %f4, %f0, %f4 | ||
1213 | fxor %f6, %f2, %f6 | ||
1214 | ENCRYPT_192(8, 4, 6, 0, 2) | ||
1215 | std %f4, [%o2 + 0x00] | ||
1216 | std %f6, [%o2 + 0x08] | ||
1217 | subcc %o3, 0x10, %o3 | ||
1218 | bne,pt %xcc, 1b | ||
1219 | add %o2, 0x10, %o2 | ||
1220 | std %f4, [%o4 + 0x00] | ||
1221 | std %f6, [%o4 + 0x08] | ||
1222 | retl | ||
1223 | nop | ||
1224 | ENDPROC(aes_sparc64_cbc_encrypt_192) | ||
1225 | |||
1226 | .align 32 | ||
1227 | ENTRY(aes_sparc64_cbc_encrypt_256) | ||
1228 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1229 | ldd [%o4 + 0x00], %f4 | ||
1230 | ldd [%o4 + 0x08], %f6 | ||
1231 | ldx [%o0 + 0x00], %g1 | ||
1232 | ldx [%o0 + 0x08], %g2 | ||
1233 | 1: ldx [%o1 + 0x00], %g3 | ||
1234 | ldx [%o1 + 0x08], %g7 | ||
1235 | add %o1, 0x10, %o1 | ||
1236 | xor %g1, %g3, %g3 | ||
1237 | xor %g2, %g7, %g7 | ||
1238 | MOVXTOD_G3_F0 | ||
1239 | MOVXTOD_G7_F2 | ||
1240 | fxor %f4, %f0, %f4 | ||
1241 | fxor %f6, %f2, %f6 | ||
1242 | ENCRYPT_256(8, 4, 6, 0, 2) | ||
1243 | std %f4, [%o2 + 0x00] | ||
1244 | std %f6, [%o2 + 0x08] | ||
1245 | subcc %o3, 0x10, %o3 | ||
1246 | bne,pt %xcc, 1b | ||
1247 | add %o2, 0x10, %o2 | ||
1248 | std %f4, [%o4 + 0x00] | ||
1249 | std %f6, [%o4 + 0x08] | ||
1250 | retl | ||
1251 | nop | ||
1252 | ENDPROC(aes_sparc64_cbc_encrypt_256) | ||
1253 | |||
1254 | .align 32 | ||
1255 | ENTRY(aes_sparc64_cbc_decrypt_128) | ||
1256 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ | ||
1257 | ldx [%o0 - 0x10], %g1 | ||
1258 | ldx [%o0 - 0x08], %g2 | ||
1259 | ldx [%o4 + 0x00], %o0 | ||
1260 | ldx [%o4 + 0x08], %o5 | ||
1261 | 1: ldx [%o1 + 0x00], %g3 | ||
1262 | ldx [%o1 + 0x08], %g7 | ||
1263 | add %o1, 0x10, %o1 | ||
1264 | xor %g1, %g3, %g3 | ||
1265 | xor %g2, %g7, %g7 | ||
1266 | MOVXTOD_G3_F4 | ||
1267 | MOVXTOD_G7_F6 | ||
1268 | DECRYPT_128(8, 4, 6, 0, 2) | ||
1269 | MOVXTOD_O0_F0 | ||
1270 | MOVXTOD_O5_F2 | ||
1271 | xor %g1, %g3, %o0 | ||
1272 | xor %g2, %g7, %o5 | ||
1273 | fxor %f4, %f0, %f4 | ||
1274 | fxor %f6, %f2, %f6 | ||
1275 | std %f4, [%o2 + 0x00] | ||
1276 | std %f6, [%o2 + 0x08] | ||
1277 | subcc %o3, 0x10, %o3 | ||
1278 | bne,pt %xcc, 1b | ||
1279 | add %o2, 0x10, %o2 | ||
1280 | stx %o0, [%o4 + 0x00] | ||
1281 | stx %o5, [%o4 + 0x08] | ||
1282 | retl | ||
1283 | nop | ||
1284 | ENDPROC(aes_sparc64_cbc_decrypt_128) | ||
1285 | |||
1286 | .align 32 | ||
1287 | ENTRY(aes_sparc64_cbc_decrypt_192) | ||
1288 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ | ||
1289 | ldx [%o0 - 0x10], %g1 | ||
1290 | ldx [%o0 - 0x08], %g2 | ||
1291 | ldx [%o4 + 0x00], %o0 | ||
1292 | ldx [%o4 + 0x08], %o5 | ||
1293 | 1: ldx [%o1 + 0x00], %g3 | ||
1294 | ldx [%o1 + 0x08], %g7 | ||
1295 | add %o1, 0x10, %o1 | ||
1296 | xor %g1, %g3, %g3 | ||
1297 | xor %g2, %g7, %g7 | ||
1298 | MOVXTOD_G3_F4 | ||
1299 | MOVXTOD_G7_F6 | ||
1300 | DECRYPT_192(8, 4, 6, 0, 2) | ||
1301 | MOVXTOD_O0_F0 | ||
1302 | MOVXTOD_O5_F2 | ||
1303 | xor %g1, %g3, %o0 | ||
1304 | xor %g2, %g7, %o5 | ||
1305 | fxor %f4, %f0, %f4 | ||
1306 | fxor %f6, %f2, %f6 | ||
1307 | std %f4, [%o2 + 0x00] | ||
1308 | std %f6, [%o2 + 0x08] | ||
1309 | subcc %o3, 0x10, %o3 | ||
1310 | bne,pt %xcc, 1b | ||
1311 | add %o2, 0x10, %o2 | ||
1312 | stx %o0, [%o4 + 0x00] | ||
1313 | stx %o5, [%o4 + 0x08] | ||
1314 | retl | ||
1315 | nop | ||
1316 | ENDPROC(aes_sparc64_cbc_decrypt_192) | ||
1317 | |||
1318 | .align 32 | ||
1319 | ENTRY(aes_sparc64_cbc_decrypt_256) | ||
1320 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ | ||
1321 | ldx [%o0 - 0x10], %g1 | ||
1322 | ldx [%o0 - 0x08], %g2 | ||
1323 | ldx [%o4 + 0x00], %o0 | ||
1324 | ldx [%o4 + 0x08], %o5 | ||
1325 | 1: ldx [%o1 + 0x00], %g3 | ||
1326 | ldx [%o1 + 0x08], %g7 | ||
1327 | add %o1, 0x10, %o1 | ||
1328 | xor %g1, %g3, %g3 | ||
1329 | xor %g2, %g7, %g7 | ||
1330 | MOVXTOD_G3_F4 | ||
1331 | MOVXTOD_G7_F6 | ||
1332 | DECRYPT_256(8, 4, 6, 0, 2) | ||
1333 | MOVXTOD_O0_F0 | ||
1334 | MOVXTOD_O5_F2 | ||
1335 | xor %g1, %g3, %o0 | ||
1336 | xor %g2, %g7, %o5 | ||
1337 | fxor %f4, %f0, %f4 | ||
1338 | fxor %f6, %f2, %f6 | ||
1339 | std %f4, [%o2 + 0x00] | ||
1340 | std %f6, [%o2 + 0x08] | ||
1341 | subcc %o3, 0x10, %o3 | ||
1342 | bne,pt %xcc, 1b | ||
1343 | add %o2, 0x10, %o2 | ||
1344 | stx %o0, [%o4 + 0x00] | ||
1345 | stx %o5, [%o4 + 0x08] | ||
1346 | retl | ||
1347 | nop | ||
1348 | ENDPROC(aes_sparc64_cbc_decrypt_256) | ||
1349 | |||
1350 | .align 32 | ||
1351 | ENTRY(aes_sparc64_ctr_crypt_128) | ||
1352 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1353 | ldx [%o4 + 0x00], %g3 | ||
1354 | ldx [%o4 + 0x08], %g7 | ||
1355 | subcc %o3, 0x10, %o3 | ||
1356 | ldx [%o0 + 0x00], %g1 | ||
1357 | be 10f | ||
1358 | ldx [%o0 + 0x08], %g2 | ||
1359 | 1: xor %g1, %g3, %o5 | ||
1360 | MOVXTOD_O5_F0 | ||
1361 | xor %g2, %g7, %o5 | ||
1362 | MOVXTOD_O5_F2 | ||
1363 | add %g7, 1, %g7 | ||
1364 | add %g3, 1, %o5 | ||
1365 | movrz %g7, %o5, %g3 | ||
1366 | xor %g1, %g3, %o5 | ||
1367 | MOVXTOD_O5_F4 | ||
1368 | xor %g2, %g7, %o5 | ||
1369 | MOVXTOD_O5_F6 | ||
1370 | add %g7, 1, %g7 | ||
1371 | add %g3, 1, %o5 | ||
1372 | movrz %g7, %o5, %g3 | ||
1373 | ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62) | ||
1374 | ldd [%o1 + 0x00], %f56 | ||
1375 | ldd [%o1 + 0x08], %f58 | ||
1376 | ldd [%o1 + 0x10], %f60 | ||
1377 | ldd [%o1 + 0x18], %f62 | ||
1378 | fxor %f56, %f0, %f56 | ||
1379 | fxor %f58, %f2, %f58 | ||
1380 | fxor %f60, %f4, %f60 | ||
1381 | fxor %f62, %f6, %f62 | ||
1382 | std %f56, [%o2 + 0x00] | ||
1383 | std %f58, [%o2 + 0x08] | ||
1384 | std %f60, [%o2 + 0x10] | ||
1385 | std %f62, [%o2 + 0x18] | ||
1386 | subcc %o3, 0x20, %o3 | ||
1387 | add %o1, 0x20, %o1 | ||
1388 | brgz %o3, 1b | ||
1389 | add %o2, 0x20, %o2 | ||
1390 | brlz,pt %o3, 11f | ||
1391 | nop | ||
1392 | 10: xor %g1, %g3, %o5 | ||
1393 | MOVXTOD_O5_F0 | ||
1394 | xor %g2, %g7, %o5 | ||
1395 | MOVXTOD_O5_F2 | ||
1396 | add %g7, 1, %g7 | ||
1397 | add %g3, 1, %o5 | ||
1398 | movrz %g7, %o5, %g3 | ||
1399 | ENCRYPT_128(8, 0, 2, 4, 6) | ||
1400 | ldd [%o1 + 0x00], %f4 | ||
1401 | ldd [%o1 + 0x08], %f6 | ||
1402 | fxor %f4, %f0, %f4 | ||
1403 | fxor %f6, %f2, %f6 | ||
1404 | std %f4, [%o2 + 0x00] | ||
1405 | std %f6, [%o2 + 0x08] | ||
1406 | 11: stx %g3, [%o4 + 0x00] | ||
1407 | retl | ||
1408 | stx %g7, [%o4 + 0x08] | ||
1409 | ENDPROC(aes_sparc64_ctr_crypt_128) | ||
1410 | |||
1411 | .align 32 | ||
1412 | ENTRY(aes_sparc64_ctr_crypt_192) | ||
1413 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1414 | ldx [%o4 + 0x00], %g3 | ||
1415 | ldx [%o4 + 0x08], %g7 | ||
1416 | subcc %o3, 0x10, %o3 | ||
1417 | ldx [%o0 + 0x00], %g1 | ||
1418 | be 10f | ||
1419 | ldx [%o0 + 0x08], %g2 | ||
1420 | 1: xor %g1, %g3, %o5 | ||
1421 | MOVXTOD_O5_F0 | ||
1422 | xor %g2, %g7, %o5 | ||
1423 | MOVXTOD_O5_F2 | ||
1424 | add %g7, 1, %g7 | ||
1425 | add %g3, 1, %o5 | ||
1426 | movrz %g7, %o5, %g3 | ||
1427 | xor %g1, %g3, %o5 | ||
1428 | MOVXTOD_O5_F4 | ||
1429 | xor %g2, %g7, %o5 | ||
1430 | MOVXTOD_O5_F6 | ||
1431 | add %g7, 1, %g7 | ||
1432 | add %g3, 1, %o5 | ||
1433 | movrz %g7, %o5, %g3 | ||
1434 | ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62) | ||
1435 | ldd [%o1 + 0x00], %f56 | ||
1436 | ldd [%o1 + 0x08], %f58 | ||
1437 | ldd [%o1 + 0x10], %f60 | ||
1438 | ldd [%o1 + 0x18], %f62 | ||
1439 | fxor %f56, %f0, %f56 | ||
1440 | fxor %f58, %f2, %f58 | ||
1441 | fxor %f60, %f4, %f60 | ||
1442 | fxor %f62, %f6, %f62 | ||
1443 | std %f56, [%o2 + 0x00] | ||
1444 | std %f58, [%o2 + 0x08] | ||
1445 | std %f60, [%o2 + 0x10] | ||
1446 | std %f62, [%o2 + 0x18] | ||
1447 | subcc %o3, 0x20, %o3 | ||
1448 | add %o1, 0x20, %o1 | ||
1449 | brgz %o3, 1b | ||
1450 | add %o2, 0x20, %o2 | ||
1451 | brlz,pt %o3, 11f | ||
1452 | nop | ||
1453 | 10: xor %g1, %g3, %o5 | ||
1454 | MOVXTOD_O5_F0 | ||
1455 | xor %g2, %g7, %o5 | ||
1456 | MOVXTOD_O5_F2 | ||
1457 | add %g7, 1, %g7 | ||
1458 | add %g3, 1, %o5 | ||
1459 | movrz %g7, %o5, %g3 | ||
1460 | ENCRYPT_192(8, 0, 2, 4, 6) | ||
1461 | ldd [%o1 + 0x00], %f4 | ||
1462 | ldd [%o1 + 0x08], %f6 | ||
1463 | fxor %f4, %f0, %f4 | ||
1464 | fxor %f6, %f2, %f6 | ||
1465 | std %f4, [%o2 + 0x00] | ||
1466 | std %f6, [%o2 + 0x08] | ||
1467 | 11: stx %g3, [%o4 + 0x00] | ||
1468 | retl | ||
1469 | stx %g7, [%o4 + 0x08] | ||
1470 | ENDPROC(aes_sparc64_ctr_crypt_192) | ||
1471 | |||
1472 | .align 32 | ||
1473 | ENTRY(aes_sparc64_ctr_crypt_256) | ||
1474 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1475 | ldx [%o4 + 0x00], %g3 | ||
1476 | ldx [%o4 + 0x08], %g7 | ||
1477 | subcc %o3, 0x10, %o3 | ||
1478 | ldx [%o0 + 0x00], %g1 | ||
1479 | be 10f | ||
1480 | ldx [%o0 + 0x08], %g2 | ||
1481 | 1: xor %g1, %g3, %o5 | ||
1482 | MOVXTOD_O5_F0 | ||
1483 | xor %g2, %g7, %o5 | ||
1484 | MOVXTOD_O5_F2 | ||
1485 | add %g7, 1, %g7 | ||
1486 | add %g3, 1, %o5 | ||
1487 | movrz %g7, %o5, %g3 | ||
1488 | xor %g1, %g3, %o5 | ||
1489 | MOVXTOD_O5_F4 | ||
1490 | xor %g2, %g7, %o5 | ||
1491 | MOVXTOD_O5_F6 | ||
1492 | add %g7, 1, %g7 | ||
1493 | add %g3, 1, %o5 | ||
1494 | movrz %g7, %o5, %g3 | ||
1495 | ENCRYPT_256_2(8, 0, 2, 4, 6) | ||
1496 | ldd [%o1 + 0x00], %f56 | ||
1497 | ldd [%o1 + 0x08], %f58 | ||
1498 | ldd [%o1 + 0x10], %f60 | ||
1499 | ldd [%o1 + 0x18], %f62 | ||
1500 | fxor %f56, %f0, %f56 | ||
1501 | fxor %f58, %f2, %f58 | ||
1502 | fxor %f60, %f4, %f60 | ||
1503 | fxor %f62, %f6, %f62 | ||
1504 | std %f56, [%o2 + 0x00] | ||
1505 | std %f58, [%o2 + 0x08] | ||
1506 | std %f60, [%o2 + 0x10] | ||
1507 | std %f62, [%o2 + 0x18] | ||
1508 | subcc %o3, 0x20, %o3 | ||
1509 | add %o1, 0x20, %o1 | ||
1510 | brgz %o3, 1b | ||
1511 | add %o2, 0x20, %o2 | ||
1512 | brlz,pt %o3, 11f | ||
1513 | nop | ||
1514 | ldd [%o0 + 0xd0], %f56 | ||
1515 | ldd [%o0 + 0xd8], %f58 | ||
1516 | ldd [%o0 + 0xe0], %f60 | ||
1517 | ldd [%o0 + 0xe8], %f62 | ||
1518 | 10: xor %g1, %g3, %o5 | ||
1519 | MOVXTOD_O5_F0 | ||
1520 | xor %g2, %g7, %o5 | ||
1521 | MOVXTOD_O5_F2 | ||
1522 | add %g7, 1, %g7 | ||
1523 | add %g3, 1, %o5 | ||
1524 | movrz %g7, %o5, %g3 | ||
1525 | ENCRYPT_256(8, 0, 2, 4, 6) | ||
1526 | ldd [%o1 + 0x00], %f4 | ||
1527 | ldd [%o1 + 0x08], %f6 | ||
1528 | fxor %f4, %f0, %f4 | ||
1529 | fxor %f6, %f2, %f6 | ||
1530 | std %f4, [%o2 + 0x00] | ||
1531 | std %f6, [%o2 + 0x08] | ||
1532 | 11: stx %g3, [%o4 + 0x00] | ||
1533 | retl | ||
1534 | stx %g7, [%o4 + 0x08] | ||
1535 | ENDPROC(aes_sparc64_ctr_crypt_256) | ||
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c new file mode 100644 index 000000000000..8f1c9980f637 --- /dev/null +++ b/arch/sparc/crypto/aes_glue.c | |||
@@ -0,0 +1,477 @@ | |||
1 | /* Glue code for AES encryption optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/aesni-intel_glue.c | ||
4 | * | ||
5 | * Copyright (C) 2008, Intel Corp. | ||
6 | * Author: Huang Ying <ying.huang@intel.com> | ||
7 | * | ||
8 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
9 | * interface for 64-bit kernels. | ||
10 | * Authors: Adrian Hoban <adrian.hoban@intel.com> | ||
11 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
12 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
13 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
14 | * Copyright (c) 2010, Intel Corporation. | ||
15 | */ | ||
16 | |||
17 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
18 | |||
19 | #include <linux/crypto.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <crypto/algapi.h> | ||
25 | #include <crypto/aes.h> | ||
26 | |||
27 | #include <asm/fpumacro.h> | ||
28 | #include <asm/pstate.h> | ||
29 | #include <asm/elf.h> | ||
30 | |||
31 | #include "opcodes.h" | ||
32 | |||
33 | struct aes_ops { | ||
34 | void (*encrypt)(const u64 *key, const u32 *input, u32 *output); | ||
35 | void (*decrypt)(const u64 *key, const u32 *input, u32 *output); | ||
36 | void (*load_encrypt_keys)(const u64 *key); | ||
37 | void (*load_decrypt_keys)(const u64 *key); | ||
38 | void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output, | ||
39 | unsigned int len); | ||
40 | void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output, | ||
41 | unsigned int len); | ||
42 | void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output, | ||
43 | unsigned int len, u64 *iv); | ||
44 | void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output, | ||
45 | unsigned int len, u64 *iv); | ||
46 | void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output, | ||
47 | unsigned int len, u64 *iv); | ||
48 | }; | ||
49 | |||
50 | struct crypto_sparc64_aes_ctx { | ||
51 | struct aes_ops *ops; | ||
52 | u64 key[AES_MAX_KEYLENGTH / sizeof(u64)]; | ||
53 | u32 key_length; | ||
54 | u32 expanded_key_length; | ||
55 | }; | ||
56 | |||
57 | extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input, | ||
58 | u32 *output); | ||
59 | extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input, | ||
60 | u32 *output); | ||
61 | extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input, | ||
62 | u32 *output); | ||
63 | |||
64 | extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input, | ||
65 | u32 *output); | ||
66 | extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input, | ||
67 | u32 *output); | ||
68 | extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input, | ||
69 | u32 *output); | ||
70 | |||
71 | extern void aes_sparc64_load_encrypt_keys_128(const u64 *key); | ||
72 | extern void aes_sparc64_load_encrypt_keys_192(const u64 *key); | ||
73 | extern void aes_sparc64_load_encrypt_keys_256(const u64 *key); | ||
74 | |||
75 | extern void aes_sparc64_load_decrypt_keys_128(const u64 *key); | ||
76 | extern void aes_sparc64_load_decrypt_keys_192(const u64 *key); | ||
77 | extern void aes_sparc64_load_decrypt_keys_256(const u64 *key); | ||
78 | |||
79 | extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input, | ||
80 | u64 *output, unsigned int len); | ||
81 | extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input, | ||
82 | u64 *output, unsigned int len); | ||
83 | extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input, | ||
84 | u64 *output, unsigned int len); | ||
85 | |||
86 | extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input, | ||
87 | u64 *output, unsigned int len); | ||
88 | extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input, | ||
89 | u64 *output, unsigned int len); | ||
90 | extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input, | ||
91 | u64 *output, unsigned int len); | ||
92 | |||
93 | extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input, | ||
94 | u64 *output, unsigned int len, | ||
95 | u64 *iv); | ||
96 | |||
97 | extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input, | ||
98 | u64 *output, unsigned int len, | ||
99 | u64 *iv); | ||
100 | |||
101 | extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input, | ||
102 | u64 *output, unsigned int len, | ||
103 | u64 *iv); | ||
104 | |||
105 | extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input, | ||
106 | u64 *output, unsigned int len, | ||
107 | u64 *iv); | ||
108 | |||
109 | extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input, | ||
110 | u64 *output, unsigned int len, | ||
111 | u64 *iv); | ||
112 | |||
113 | extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input, | ||
114 | u64 *output, unsigned int len, | ||
115 | u64 *iv); | ||
116 | |||
117 | extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input, | ||
118 | u64 *output, unsigned int len, | ||
119 | u64 *iv); | ||
120 | extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input, | ||
121 | u64 *output, unsigned int len, | ||
122 | u64 *iv); | ||
123 | extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input, | ||
124 | u64 *output, unsigned int len, | ||
125 | u64 *iv); | ||
126 | |||
127 | struct aes_ops aes128_ops = { | ||
128 | .encrypt = aes_sparc64_encrypt_128, | ||
129 | .decrypt = aes_sparc64_decrypt_128, | ||
130 | .load_encrypt_keys = aes_sparc64_load_encrypt_keys_128, | ||
131 | .load_decrypt_keys = aes_sparc64_load_decrypt_keys_128, | ||
132 | .ecb_encrypt = aes_sparc64_ecb_encrypt_128, | ||
133 | .ecb_decrypt = aes_sparc64_ecb_decrypt_128, | ||
134 | .cbc_encrypt = aes_sparc64_cbc_encrypt_128, | ||
135 | .cbc_decrypt = aes_sparc64_cbc_decrypt_128, | ||
136 | .ctr_crypt = aes_sparc64_ctr_crypt_128, | ||
137 | }; | ||
138 | |||
139 | struct aes_ops aes192_ops = { | ||
140 | .encrypt = aes_sparc64_encrypt_192, | ||
141 | .decrypt = aes_sparc64_decrypt_192, | ||
142 | .load_encrypt_keys = aes_sparc64_load_encrypt_keys_192, | ||
143 | .load_decrypt_keys = aes_sparc64_load_decrypt_keys_192, | ||
144 | .ecb_encrypt = aes_sparc64_ecb_encrypt_192, | ||
145 | .ecb_decrypt = aes_sparc64_ecb_decrypt_192, | ||
146 | .cbc_encrypt = aes_sparc64_cbc_encrypt_192, | ||
147 | .cbc_decrypt = aes_sparc64_cbc_decrypt_192, | ||
148 | .ctr_crypt = aes_sparc64_ctr_crypt_192, | ||
149 | }; | ||
150 | |||
151 | struct aes_ops aes256_ops = { | ||
152 | .encrypt = aes_sparc64_encrypt_256, | ||
153 | .decrypt = aes_sparc64_decrypt_256, | ||
154 | .load_encrypt_keys = aes_sparc64_load_encrypt_keys_256, | ||
155 | .load_decrypt_keys = aes_sparc64_load_decrypt_keys_256, | ||
156 | .ecb_encrypt = aes_sparc64_ecb_encrypt_256, | ||
157 | .ecb_decrypt = aes_sparc64_ecb_decrypt_256, | ||
158 | .cbc_encrypt = aes_sparc64_cbc_encrypt_256, | ||
159 | .cbc_decrypt = aes_sparc64_cbc_decrypt_256, | ||
160 | .ctr_crypt = aes_sparc64_ctr_crypt_256, | ||
161 | }; | ||
162 | |||
163 | extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, | ||
164 | unsigned int key_len); | ||
165 | |||
166 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
167 | unsigned int key_len) | ||
168 | { | ||
169 | struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
170 | u32 *flags = &tfm->crt_flags; | ||
171 | |||
172 | switch (key_len) { | ||
173 | case AES_KEYSIZE_128: | ||
174 | ctx->expanded_key_length = 0xb0; | ||
175 | ctx->ops = &aes128_ops; | ||
176 | break; | ||
177 | |||
178 | case AES_KEYSIZE_192: | ||
179 | ctx->expanded_key_length = 0xd0; | ||
180 | ctx->ops = &aes192_ops; | ||
181 | break; | ||
182 | |||
183 | case AES_KEYSIZE_256: | ||
184 | ctx->expanded_key_length = 0xf0; | ||
185 | ctx->ops = &aes256_ops; | ||
186 | break; | ||
187 | |||
188 | default: | ||
189 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
190 | return -EINVAL; | ||
191 | } | ||
192 | |||
193 | aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len); | ||
194 | ctx->key_length = key_len; | ||
195 | |||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
200 | { | ||
201 | struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
202 | |||
203 | ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); | ||
204 | } | ||
205 | |||
206 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
207 | { | ||
208 | struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
209 | |||
210 | ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); | ||
211 | } | ||
212 | |||
213 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | ||
214 | |||
215 | static int ecb_encrypt(struct blkcipher_desc *desc, | ||
216 | struct scatterlist *dst, struct scatterlist *src, | ||
217 | unsigned int nbytes) | ||
218 | { | ||
219 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
220 | struct blkcipher_walk walk; | ||
221 | int err; | ||
222 | |||
223 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
224 | err = blkcipher_walk_virt(desc, &walk); | ||
225 | |||
226 | ctx->ops->load_encrypt_keys(&ctx->key[0]); | ||
227 | while ((nbytes = walk.nbytes)) { | ||
228 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
229 | |||
230 | if (likely(block_len)) { | ||
231 | ctx->ops->ecb_encrypt(&ctx->key[0], | ||
232 | (const u64 *)walk.src.virt.addr, | ||
233 | (u64 *) walk.dst.virt.addr, | ||
234 | block_len); | ||
235 | } | ||
236 | nbytes &= AES_BLOCK_SIZE - 1; | ||
237 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
238 | } | ||
239 | fprs_write(0); | ||
240 | return err; | ||
241 | } | ||
242 | |||
243 | static int ecb_decrypt(struct blkcipher_desc *desc, | ||
244 | struct scatterlist *dst, struct scatterlist *src, | ||
245 | unsigned int nbytes) | ||
246 | { | ||
247 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
248 | struct blkcipher_walk walk; | ||
249 | u64 *key_end; | ||
250 | int err; | ||
251 | |||
252 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
253 | err = blkcipher_walk_virt(desc, &walk); | ||
254 | |||
255 | ctx->ops->load_decrypt_keys(&ctx->key[0]); | ||
256 | key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; | ||
257 | while ((nbytes = walk.nbytes)) { | ||
258 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
259 | |||
260 | if (likely(block_len)) { | ||
261 | ctx->ops->ecb_decrypt(key_end, | ||
262 | (const u64 *) walk.src.virt.addr, | ||
263 | (u64 *) walk.dst.virt.addr, block_len); | ||
264 | } | ||
265 | nbytes &= AES_BLOCK_SIZE - 1; | ||
266 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
267 | } | ||
268 | fprs_write(0); | ||
269 | |||
270 | return err; | ||
271 | } | ||
272 | |||
273 | static int cbc_encrypt(struct blkcipher_desc *desc, | ||
274 | struct scatterlist *dst, struct scatterlist *src, | ||
275 | unsigned int nbytes) | ||
276 | { | ||
277 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
278 | struct blkcipher_walk walk; | ||
279 | int err; | ||
280 | |||
281 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
282 | err = blkcipher_walk_virt(desc, &walk); | ||
283 | |||
284 | ctx->ops->load_encrypt_keys(&ctx->key[0]); | ||
285 | while ((nbytes = walk.nbytes)) { | ||
286 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
287 | |||
288 | if (likely(block_len)) { | ||
289 | ctx->ops->cbc_encrypt(&ctx->key[0], | ||
290 | (const u64 *)walk.src.virt.addr, | ||
291 | (u64 *) walk.dst.virt.addr, | ||
292 | block_len, (u64 *) walk.iv); | ||
293 | } | ||
294 | nbytes &= AES_BLOCK_SIZE - 1; | ||
295 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
296 | } | ||
297 | fprs_write(0); | ||
298 | return err; | ||
299 | } | ||
300 | |||
301 | static int cbc_decrypt(struct blkcipher_desc *desc, | ||
302 | struct scatterlist *dst, struct scatterlist *src, | ||
303 | unsigned int nbytes) | ||
304 | { | ||
305 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
306 | struct blkcipher_walk walk; | ||
307 | u64 *key_end; | ||
308 | int err; | ||
309 | |||
310 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
311 | err = blkcipher_walk_virt(desc, &walk); | ||
312 | |||
313 | ctx->ops->load_decrypt_keys(&ctx->key[0]); | ||
314 | key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; | ||
315 | while ((nbytes = walk.nbytes)) { | ||
316 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
317 | |||
318 | if (likely(block_len)) { | ||
319 | ctx->ops->cbc_decrypt(key_end, | ||
320 | (const u64 *) walk.src.virt.addr, | ||
321 | (u64 *) walk.dst.virt.addr, | ||
322 | block_len, (u64 *) walk.iv); | ||
323 | } | ||
324 | nbytes &= AES_BLOCK_SIZE - 1; | ||
325 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
326 | } | ||
327 | fprs_write(0); | ||
328 | |||
329 | return err; | ||
330 | } | ||
331 | |||
332 | static int ctr_crypt(struct blkcipher_desc *desc, | ||
333 | struct scatterlist *dst, struct scatterlist *src, | ||
334 | unsigned int nbytes) | ||
335 | { | ||
336 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
337 | struct blkcipher_walk walk; | ||
338 | int err; | ||
339 | |||
340 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
341 | err = blkcipher_walk_virt(desc, &walk); | ||
342 | |||
343 | ctx->ops->load_encrypt_keys(&ctx->key[0]); | ||
344 | while ((nbytes = walk.nbytes)) { | ||
345 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
346 | |||
347 | if (likely(block_len)) { | ||
348 | ctx->ops->ctr_crypt(&ctx->key[0], | ||
349 | (const u64 *)walk.src.virt.addr, | ||
350 | (u64 *) walk.dst.virt.addr, | ||
351 | block_len, (u64 *) walk.iv); | ||
352 | } | ||
353 | nbytes &= AES_BLOCK_SIZE - 1; | ||
354 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
355 | } | ||
356 | fprs_write(0); | ||
357 | return err; | ||
358 | } | ||
359 | |||
360 | static struct crypto_alg algs[] = { { | ||
361 | .cra_name = "aes", | ||
362 | .cra_driver_name = "aes-sparc64", | ||
363 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
364 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
365 | .cra_blocksize = AES_BLOCK_SIZE, | ||
366 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
367 | .cra_alignmask = 3, | ||
368 | .cra_module = THIS_MODULE, | ||
369 | .cra_u = { | ||
370 | .cipher = { | ||
371 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
372 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
373 | .cia_setkey = aes_set_key, | ||
374 | .cia_encrypt = aes_encrypt, | ||
375 | .cia_decrypt = aes_decrypt | ||
376 | } | ||
377 | } | ||
378 | }, { | ||
379 | .cra_name = "ecb(aes)", | ||
380 | .cra_driver_name = "ecb-aes-sparc64", | ||
381 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
383 | .cra_blocksize = AES_BLOCK_SIZE, | ||
384 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
385 | .cra_alignmask = 7, | ||
386 | .cra_type = &crypto_blkcipher_type, | ||
387 | .cra_module = THIS_MODULE, | ||
388 | .cra_u = { | ||
389 | .blkcipher = { | ||
390 | .min_keysize = AES_MIN_KEY_SIZE, | ||
391 | .max_keysize = AES_MAX_KEY_SIZE, | ||
392 | .setkey = aes_set_key, | ||
393 | .encrypt = ecb_encrypt, | ||
394 | .decrypt = ecb_decrypt, | ||
395 | }, | ||
396 | }, | ||
397 | }, { | ||
398 | .cra_name = "cbc(aes)", | ||
399 | .cra_driver_name = "cbc-aes-sparc64", | ||
400 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
402 | .cra_blocksize = AES_BLOCK_SIZE, | ||
403 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
404 | .cra_alignmask = 7, | ||
405 | .cra_type = &crypto_blkcipher_type, | ||
406 | .cra_module = THIS_MODULE, | ||
407 | .cra_u = { | ||
408 | .blkcipher = { | ||
409 | .min_keysize = AES_MIN_KEY_SIZE, | ||
410 | .max_keysize = AES_MAX_KEY_SIZE, | ||
411 | .setkey = aes_set_key, | ||
412 | .encrypt = cbc_encrypt, | ||
413 | .decrypt = cbc_decrypt, | ||
414 | }, | ||
415 | }, | ||
416 | }, { | ||
417 | .cra_name = "ctr(aes)", | ||
418 | .cra_driver_name = "ctr-aes-sparc64", | ||
419 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
420 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
421 | .cra_blocksize = AES_BLOCK_SIZE, | ||
422 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
423 | .cra_alignmask = 7, | ||
424 | .cra_type = &crypto_blkcipher_type, | ||
425 | .cra_module = THIS_MODULE, | ||
426 | .cra_u = { | ||
427 | .blkcipher = { | ||
428 | .min_keysize = AES_MIN_KEY_SIZE, | ||
429 | .max_keysize = AES_MAX_KEY_SIZE, | ||
430 | .setkey = aes_set_key, | ||
431 | .encrypt = ctr_crypt, | ||
432 | .decrypt = ctr_crypt, | ||
433 | }, | ||
434 | }, | ||
435 | } }; | ||
436 | |||
437 | static bool __init sparc64_has_aes_opcode(void) | ||
438 | { | ||
439 | unsigned long cfr; | ||
440 | |||
441 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
442 | return false; | ||
443 | |||
444 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
445 | if (!(cfr & CFR_AES)) | ||
446 | return false; | ||
447 | |||
448 | return true; | ||
449 | } | ||
450 | |||
451 | static int __init aes_sparc64_mod_init(void) | ||
452 | { | ||
453 | int i; | ||
454 | |||
455 | for (i = 0; i < ARRAY_SIZE(algs); i++) | ||
456 | INIT_LIST_HEAD(&algs[i].cra_list); | ||
457 | |||
458 | if (sparc64_has_aes_opcode()) { | ||
459 | pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); | ||
460 | return crypto_register_algs(algs, ARRAY_SIZE(algs)); | ||
461 | } | ||
462 | pr_info("sparc64 aes opcodes not available.\n"); | ||
463 | return -ENODEV; | ||
464 | } | ||
465 | |||
466 | static void __exit aes_sparc64_mod_fini(void) | ||
467 | { | ||
468 | crypto_unregister_algs(algs, ARRAY_SIZE(algs)); | ||
469 | } | ||
470 | |||
471 | module_init(aes_sparc64_mod_init); | ||
472 | module_exit(aes_sparc64_mod_fini); | ||
473 | |||
474 | MODULE_LICENSE("GPL"); | ||
475 | MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); | ||
476 | |||
477 | MODULE_ALIAS("aes"); | ||
diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S new file mode 100644 index 000000000000..cc39553a4e43 --- /dev/null +++ b/arch/sparc/crypto/camellia_asm.S | |||
@@ -0,0 +1,563 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | #define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ | ||
7 | CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \ | ||
8 | CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \ | ||
9 | CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \ | ||
10 | CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \ | ||
11 | CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \ | ||
12 | CAMELLIA_F(KEY_BASE + 10, I0, I1, I0) | ||
13 | |||
14 | #define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \ | ||
15 | CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ | ||
16 | CAMELLIA_FL(KEY_BASE + 12, I0, I0) \ | ||
17 | CAMELLIA_FLI(KEY_BASE + 14, I1, I1) | ||
18 | |||
19 | .data | ||
20 | |||
21 | .align 8 | ||
22 | SIGMA: .xword 0xA09E667F3BCC908B | ||
23 | .xword 0xB67AE8584CAA73B2 | ||
24 | .xword 0xC6EF372FE94F82BE | ||
25 | .xword 0x54FF53A5F1D36F1C | ||
26 | .xword 0x10E527FADE682D1D | ||
27 | .xword 0xB05688C2B3E6C1FD | ||
28 | |||
29 | .text | ||
30 | |||
31 | .align 32 | ||
32 | ENTRY(camellia_sparc64_key_expand) | ||
33 | /* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */ | ||
34 | VISEntry | ||
35 | ld [%o0 + 0x00], %f0 ! i0, k[0] | ||
36 | ld [%o0 + 0x04], %f1 ! i1, k[1] | ||
37 | ld [%o0 + 0x08], %f2 ! i2, k[2] | ||
38 | ld [%o0 + 0x0c], %f3 ! i3, k[3] | ||
39 | std %f0, [%o1 + 0x00] ! k[0, 1] | ||
40 | fsrc2 %f0, %f28 | ||
41 | std %f2, [%o1 + 0x08] ! k[2, 3] | ||
42 | cmp %o2, 16 | ||
43 | be 10f | ||
44 | fsrc2 %f2, %f30 | ||
45 | |||
46 | ld [%o0 + 0x10], %f0 | ||
47 | ld [%o0 + 0x14], %f1 | ||
48 | std %f0, [%o1 + 0x20] ! k[8, 9] | ||
49 | cmp %o2, 24 | ||
50 | fone %f10 | ||
51 | be,a 1f | ||
52 | fxor %f10, %f0, %f2 | ||
53 | ld [%o0 + 0x18], %f2 | ||
54 | ld [%o0 + 0x1c], %f3 | ||
55 | 1: | ||
56 | std %f2, [%o1 + 0x28] ! k[10, 11] | ||
57 | fxor %f28, %f0, %f0 | ||
58 | fxor %f30, %f2, %f2 | ||
59 | |||
60 | 10: | ||
61 | sethi %hi(SIGMA), %g3 | ||
62 | or %g3, %lo(SIGMA), %g3 | ||
63 | ldd [%g3 + 0x00], %f16 | ||
64 | ldd [%g3 + 0x08], %f18 | ||
65 | ldd [%g3 + 0x10], %f20 | ||
66 | ldd [%g3 + 0x18], %f22 | ||
67 | ldd [%g3 + 0x20], %f24 | ||
68 | ldd [%g3 + 0x28], %f26 | ||
69 | CAMELLIA_F(16, 2, 0, 2) | ||
70 | CAMELLIA_F(18, 0, 2, 0) | ||
71 | fxor %f28, %f0, %f0 | ||
72 | fxor %f30, %f2, %f2 | ||
73 | CAMELLIA_F(20, 2, 0, 2) | ||
74 | CAMELLIA_F(22, 0, 2, 0) | ||
75 | |||
76 | #define ROTL128(S01, S23, TMP1, TMP2, N) \ | ||
77 | srlx S01, (64 - N), TMP1; \ | ||
78 | sllx S01, N, S01; \ | ||
79 | srlx S23, (64 - N), TMP2; \ | ||
80 | sllx S23, N, S23; \ | ||
81 | or S01, TMP2, S01; \ | ||
82 | or S23, TMP1, S23 | ||
83 | |||
84 | cmp %o2, 16 | ||
85 | bne 1f | ||
86 | nop | ||
87 | /* 128-bit key */ | ||
88 | std %f0, [%o1 + 0x10] ! k[ 4, 5] | ||
89 | std %f2, [%o1 + 0x18] ! k[ 6, 7] | ||
90 | MOVDTOX_F0_O4 | ||
91 | MOVDTOX_F2_O5 | ||
92 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
93 | stx %o4, [%o1 + 0x30] ! k[12, 13] | ||
94 | stx %o5, [%o1 + 0x38] ! k[14, 15] | ||
95 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
96 | stx %o4, [%o1 + 0x40] ! k[16, 17] | ||
97 | stx %o5, [%o1 + 0x48] ! k[18, 19] | ||
98 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
99 | stx %o4, [%o1 + 0x60] ! k[24, 25] | ||
100 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
101 | stx %o4, [%o1 + 0x70] ! k[28, 29] | ||
102 | stx %o5, [%o1 + 0x78] ! k[30, 31] | ||
103 | ROTL128(%o4, %o5, %g2, %g3, 34) | ||
104 | stx %o4, [%o1 + 0xa0] ! k[40, 41] | ||
105 | stx %o5, [%o1 + 0xa8] ! k[42, 43] | ||
106 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
107 | stx %o4, [%o1 + 0xc0] ! k[48, 49] | ||
108 | stx %o5, [%o1 + 0xc8] ! k[50, 51] | ||
109 | |||
110 | ldx [%o1 + 0x00], %o4 ! k[ 0, 1] | ||
111 | ldx [%o1 + 0x08], %o5 ! k[ 2, 3] | ||
112 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
113 | stx %o4, [%o1 + 0x20] ! k[ 8, 9] | ||
114 | stx %o5, [%o1 + 0x28] ! k[10, 11] | ||
115 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
116 | stx %o4, [%o1 + 0x50] ! k[20, 21] | ||
117 | stx %o5, [%o1 + 0x58] ! k[22, 23] | ||
118 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
119 | stx %o5, [%o1 + 0x68] ! k[26, 27] | ||
120 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
121 | stx %o4, [%o1 + 0x80] ! k[32, 33] | ||
122 | stx %o5, [%o1 + 0x88] ! k[34, 35] | ||
123 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
124 | stx %o4, [%o1 + 0x90] ! k[36, 37] | ||
125 | stx %o5, [%o1 + 0x98] ! k[38, 39] | ||
126 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
127 | stx %o4, [%o1 + 0xb0] ! k[44, 45] | ||
128 | stx %o5, [%o1 + 0xb8] ! k[46, 47] | ||
129 | |||
130 | ba,pt %xcc, 2f | ||
131 | mov (3 * 16 * 4), %o0 | ||
132 | |||
133 | 1: | ||
134 | /* 192-bit or 256-bit key */ | ||
135 | std %f0, [%o1 + 0x30] ! k[12, 13] | ||
136 | std %f2, [%o1 + 0x38] ! k[14, 15] | ||
137 | ldd [%o1 + 0x20], %f4 ! k[ 8, 9] | ||
138 | ldd [%o1 + 0x28], %f6 ! k[10, 11] | ||
139 | fxor %f0, %f4, %f0 | ||
140 | fxor %f2, %f6, %f2 | ||
141 | CAMELLIA_F(24, 2, 0, 2) | ||
142 | CAMELLIA_F(26, 0, 2, 0) | ||
143 | std %f0, [%o1 + 0x10] ! k[ 4, 5] | ||
144 | std %f2, [%o1 + 0x18] ! k[ 6, 7] | ||
145 | MOVDTOX_F0_O4 | ||
146 | MOVDTOX_F2_O5 | ||
147 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
148 | stx %o4, [%o1 + 0x50] ! k[20, 21] | ||
149 | stx %o5, [%o1 + 0x58] ! k[22, 23] | ||
150 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
151 | stx %o4, [%o1 + 0xa0] ! k[40, 41] | ||
152 | stx %o5, [%o1 + 0xa8] ! k[42, 43] | ||
153 | ROTL128(%o4, %o5, %g2, %g3, 51) | ||
154 | stx %o4, [%o1 + 0x100] ! k[64, 65] | ||
155 | stx %o5, [%o1 + 0x108] ! k[66, 67] | ||
156 | ldx [%o1 + 0x20], %o4 ! k[ 8, 9] | ||
157 | ldx [%o1 + 0x28], %o5 ! k[10, 11] | ||
158 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
159 | stx %o4, [%o1 + 0x20] ! k[ 8, 9] | ||
160 | stx %o5, [%o1 + 0x28] ! k[10, 11] | ||
161 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
162 | stx %o4, [%o1 + 0x40] ! k[16, 17] | ||
163 | stx %o5, [%o1 + 0x48] ! k[18, 19] | ||
164 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
165 | stx %o4, [%o1 + 0x90] ! k[36, 37] | ||
166 | stx %o5, [%o1 + 0x98] ! k[38, 39] | ||
167 | ROTL128(%o4, %o5, %g2, %g3, 34) | ||
168 | stx %o4, [%o1 + 0xd0] ! k[52, 53] | ||
169 | stx %o5, [%o1 + 0xd8] ! k[54, 55] | ||
170 | ldx [%o1 + 0x30], %o4 ! k[12, 13] | ||
171 | ldx [%o1 + 0x38], %o5 ! k[14, 15] | ||
172 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
173 | stx %o4, [%o1 + 0x30] ! k[12, 13] | ||
174 | stx %o5, [%o1 + 0x38] ! k[14, 15] | ||
175 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
176 | stx %o4, [%o1 + 0x70] ! k[28, 29] | ||
177 | stx %o5, [%o1 + 0x78] ! k[30, 31] | ||
178 | srlx %o4, 32, %g2 | ||
179 | srlx %o5, 32, %g3 | ||
180 | stw %o4, [%o1 + 0xc0] ! k[48] | ||
181 | stw %g3, [%o1 + 0xc4] ! k[49] | ||
182 | stw %o5, [%o1 + 0xc8] ! k[50] | ||
183 | stw %g2, [%o1 + 0xcc] ! k[51] | ||
184 | ROTL128(%o4, %o5, %g2, %g3, 49) | ||
185 | stx %o4, [%o1 + 0xe0] ! k[56, 57] | ||
186 | stx %o5, [%o1 + 0xe8] ! k[58, 59] | ||
187 | ldx [%o1 + 0x00], %o4 ! k[ 0, 1] | ||
188 | ldx [%o1 + 0x08], %o5 ! k[ 2, 3] | ||
189 | ROTL128(%o4, %o5, %g2, %g3, 45) | ||
190 | stx %o4, [%o1 + 0x60] ! k[24, 25] | ||
191 | stx %o5, [%o1 + 0x68] ! k[26, 27] | ||
192 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
193 | stx %o4, [%o1 + 0x80] ! k[32, 33] | ||
194 | stx %o5, [%o1 + 0x88] ! k[34, 35] | ||
195 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
196 | stx %o4, [%o1 + 0xb0] ! k[44, 45] | ||
197 | stx %o5, [%o1 + 0xb8] ! k[46, 47] | ||
198 | ROTL128(%o4, %o5, %g2, %g3, 34) | ||
199 | stx %o4, [%o1 + 0xf0] ! k[60, 61] | ||
200 | stx %o5, [%o1 + 0xf8] ! k[62, 63] | ||
201 | mov (4 * 16 * 4), %o0 | ||
202 | 2: | ||
203 | add %o1, %o0, %o1 | ||
204 | ldd [%o1 + 0x00], %f0 | ||
205 | ldd [%o1 + 0x08], %f2 | ||
206 | std %f0, [%o3 + 0x00] | ||
207 | std %f2, [%o3 + 0x08] | ||
208 | add %o3, 0x10, %o3 | ||
209 | 1: | ||
210 | sub %o1, (16 * 4), %o1 | ||
211 | ldd [%o1 + 0x38], %f0 | ||
212 | ldd [%o1 + 0x30], %f2 | ||
213 | ldd [%o1 + 0x28], %f4 | ||
214 | ldd [%o1 + 0x20], %f6 | ||
215 | ldd [%o1 + 0x18], %f8 | ||
216 | ldd [%o1 + 0x10], %f10 | ||
217 | std %f0, [%o3 + 0x00] | ||
218 | std %f2, [%o3 + 0x08] | ||
219 | std %f4, [%o3 + 0x10] | ||
220 | std %f6, [%o3 + 0x18] | ||
221 | std %f8, [%o3 + 0x20] | ||
222 | std %f10, [%o3 + 0x28] | ||
223 | |||
224 | ldd [%o1 + 0x08], %f0 | ||
225 | ldd [%o1 + 0x00], %f2 | ||
226 | std %f0, [%o3 + 0x30] | ||
227 | std %f2, [%o3 + 0x38] | ||
228 | subcc %o0, (16 * 4), %o0 | ||
229 | bne,pt %icc, 1b | ||
230 | add %o3, (16 * 4), %o3 | ||
231 | |||
232 | std %f2, [%o3 - 0x10] | ||
233 | std %f0, [%o3 - 0x08] | ||
234 | |||
235 | retl | ||
236 | VISExit | ||
237 | ENDPROC(camellia_sparc64_key_expand) | ||
238 | |||
239 | .align 32 | ||
240 | ENTRY(camellia_sparc64_crypt) | ||
241 | /* %o0=key, %o1=input, %o2=output, %o3=key_len */ | ||
242 | VISEntry | ||
243 | |||
244 | ld [%o1 + 0x00], %f0 | ||
245 | ld [%o1 + 0x04], %f1 | ||
246 | ld [%o1 + 0x08], %f2 | ||
247 | ld [%o1 + 0x0c], %f3 | ||
248 | |||
249 | ldd [%o0 + 0x00], %f4 | ||
250 | ldd [%o0 + 0x08], %f6 | ||
251 | |||
252 | cmp %o3, 16 | ||
253 | fxor %f4, %f0, %f0 | ||
254 | be 1f | ||
255 | fxor %f6, %f2, %f2 | ||
256 | |||
257 | ldd [%o0 + 0x10], %f8 | ||
258 | ldd [%o0 + 0x18], %f10 | ||
259 | ldd [%o0 + 0x20], %f12 | ||
260 | ldd [%o0 + 0x28], %f14 | ||
261 | ldd [%o0 + 0x30], %f16 | ||
262 | ldd [%o0 + 0x38], %f18 | ||
263 | ldd [%o0 + 0x40], %f20 | ||
264 | ldd [%o0 + 0x48], %f22 | ||
265 | add %o0, 0x40, %o0 | ||
266 | |||
267 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
268 | |||
269 | 1: | ||
270 | ldd [%o0 + 0x10], %f8 | ||
271 | ldd [%o0 + 0x18], %f10 | ||
272 | ldd [%o0 + 0x20], %f12 | ||
273 | ldd [%o0 + 0x28], %f14 | ||
274 | ldd [%o0 + 0x30], %f16 | ||
275 | ldd [%o0 + 0x38], %f18 | ||
276 | ldd [%o0 + 0x40], %f20 | ||
277 | ldd [%o0 + 0x48], %f22 | ||
278 | ldd [%o0 + 0x50], %f24 | ||
279 | ldd [%o0 + 0x58], %f26 | ||
280 | ldd [%o0 + 0x60], %f28 | ||
281 | ldd [%o0 + 0x68], %f30 | ||
282 | ldd [%o0 + 0x70], %f32 | ||
283 | ldd [%o0 + 0x78], %f34 | ||
284 | ldd [%o0 + 0x80], %f36 | ||
285 | ldd [%o0 + 0x88], %f38 | ||
286 | ldd [%o0 + 0x90], %f40 | ||
287 | ldd [%o0 + 0x98], %f42 | ||
288 | ldd [%o0 + 0xa0], %f44 | ||
289 | ldd [%o0 + 0xa8], %f46 | ||
290 | ldd [%o0 + 0xb0], %f48 | ||
291 | ldd [%o0 + 0xb8], %f50 | ||
292 | ldd [%o0 + 0xc0], %f52 | ||
293 | ldd [%o0 + 0xc8], %f54 | ||
294 | |||
295 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
296 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
297 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
298 | fxor %f52, %f2, %f2 | ||
299 | fxor %f54, %f0, %f0 | ||
300 | |||
301 | st %f2, [%o2 + 0x00] | ||
302 | st %f3, [%o2 + 0x04] | ||
303 | st %f0, [%o2 + 0x08] | ||
304 | st %f1, [%o2 + 0x0c] | ||
305 | |||
306 | retl | ||
307 | VISExit | ||
308 | ENDPROC(camellia_sparc64_crypt) | ||
309 | |||
310 | .align 32 | ||
311 | ENTRY(camellia_sparc64_load_keys) | ||
312 | /* %o0=key, %o1=key_len */ | ||
313 | VISEntry | ||
314 | ldd [%o0 + 0x00], %f4 | ||
315 | ldd [%o0 + 0x08], %f6 | ||
316 | ldd [%o0 + 0x10], %f8 | ||
317 | ldd [%o0 + 0x18], %f10 | ||
318 | ldd [%o0 + 0x20], %f12 | ||
319 | ldd [%o0 + 0x28], %f14 | ||
320 | ldd [%o0 + 0x30], %f16 | ||
321 | ldd [%o0 + 0x38], %f18 | ||
322 | ldd [%o0 + 0x40], %f20 | ||
323 | ldd [%o0 + 0x48], %f22 | ||
324 | ldd [%o0 + 0x50], %f24 | ||
325 | ldd [%o0 + 0x58], %f26 | ||
326 | ldd [%o0 + 0x60], %f28 | ||
327 | ldd [%o0 + 0x68], %f30 | ||
328 | ldd [%o0 + 0x70], %f32 | ||
329 | ldd [%o0 + 0x78], %f34 | ||
330 | ldd [%o0 + 0x80], %f36 | ||
331 | ldd [%o0 + 0x88], %f38 | ||
332 | ldd [%o0 + 0x90], %f40 | ||
333 | ldd [%o0 + 0x98], %f42 | ||
334 | ldd [%o0 + 0xa0], %f44 | ||
335 | ldd [%o0 + 0xa8], %f46 | ||
336 | ldd [%o0 + 0xb0], %f48 | ||
337 | ldd [%o0 + 0xb8], %f50 | ||
338 | ldd [%o0 + 0xc0], %f52 | ||
339 | retl | ||
340 | ldd [%o0 + 0xc8], %f54 | ||
341 | ENDPROC(camellia_sparc64_load_keys) | ||
342 | |||
343 | .align 32 | ||
344 | ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds) | ||
345 | /* %o0=input, %o1=output, %o2=len, %o3=key */ | ||
346 | 1: ldd [%o0 + 0x00], %f0 | ||
347 | ldd [%o0 + 0x08], %f2 | ||
348 | add %o0, 0x10, %o0 | ||
349 | fxor %f4, %f0, %f0 | ||
350 | fxor %f6, %f2, %f2 | ||
351 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
352 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
353 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
354 | fxor %f52, %f2, %f2 | ||
355 | fxor %f54, %f0, %f0 | ||
356 | std %f2, [%o1 + 0x00] | ||
357 | std %f0, [%o1 + 0x08] | ||
358 | subcc %o2, 0x10, %o2 | ||
359 | bne,pt %icc, 1b | ||
360 | add %o1, 0x10, %o1 | ||
361 | retl | ||
362 | nop | ||
363 | ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds) | ||
364 | |||
365 | .align 32 | ||
366 | ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds) | ||
367 | /* %o0=input, %o1=output, %o2=len, %o3=key */ | ||
368 | 1: ldd [%o0 + 0x00], %f0 | ||
369 | ldd [%o0 + 0x08], %f2 | ||
370 | add %o0, 0x10, %o0 | ||
371 | fxor %f4, %f0, %f0 | ||
372 | fxor %f6, %f2, %f2 | ||
373 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
374 | ldd [%o3 + 0xd0], %f8 | ||
375 | ldd [%o3 + 0xd8], %f10 | ||
376 | ldd [%o3 + 0xe0], %f12 | ||
377 | ldd [%o3 + 0xe8], %f14 | ||
378 | ldd [%o3 + 0xf0], %f16 | ||
379 | ldd [%o3 + 0xf8], %f18 | ||
380 | ldd [%o3 + 0x100], %f20 | ||
381 | ldd [%o3 + 0x108], %f22 | ||
382 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
383 | CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) | ||
384 | CAMELLIA_F(8, 2, 0, 2) | ||
385 | CAMELLIA_F(10, 0, 2, 0) | ||
386 | ldd [%o3 + 0x10], %f8 | ||
387 | ldd [%o3 + 0x18], %f10 | ||
388 | CAMELLIA_F(12, 2, 0, 2) | ||
389 | CAMELLIA_F(14, 0, 2, 0) | ||
390 | ldd [%o3 + 0x20], %f12 | ||
391 | ldd [%o3 + 0x28], %f14 | ||
392 | CAMELLIA_F(16, 2, 0, 2) | ||
393 | CAMELLIA_F(18, 0, 2, 0) | ||
394 | ldd [%o3 + 0x30], %f16 | ||
395 | ldd [%o3 + 0x38], %f18 | ||
396 | fxor %f20, %f2, %f2 | ||
397 | fxor %f22, %f0, %f0 | ||
398 | ldd [%o3 + 0x40], %f20 | ||
399 | ldd [%o3 + 0x48], %f22 | ||
400 | std %f2, [%o1 + 0x00] | ||
401 | std %f0, [%o1 + 0x08] | ||
402 | subcc %o2, 0x10, %o2 | ||
403 | bne,pt %icc, 1b | ||
404 | add %o1, 0x10, %o1 | ||
405 | retl | ||
406 | nop | ||
407 | ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds) | ||
408 | |||
409 | .align 32 | ||
410 | ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds) | ||
411 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
412 | ldd [%o4 + 0x00], %f60 | ||
413 | ldd [%o4 + 0x08], %f62 | ||
414 | 1: ldd [%o0 + 0x00], %f0 | ||
415 | ldd [%o0 + 0x08], %f2 | ||
416 | add %o0, 0x10, %o0 | ||
417 | fxor %f60, %f0, %f0 | ||
418 | fxor %f62, %f2, %f2 | ||
419 | fxor %f4, %f0, %f0 | ||
420 | fxor %f6, %f2, %f2 | ||
421 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
422 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
423 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
424 | fxor %f52, %f2, %f60 | ||
425 | fxor %f54, %f0, %f62 | ||
426 | std %f60, [%o1 + 0x00] | ||
427 | std %f62, [%o1 + 0x08] | ||
428 | subcc %o2, 0x10, %o2 | ||
429 | bne,pt %icc, 1b | ||
430 | add %o1, 0x10, %o1 | ||
431 | std %f60, [%o4 + 0x00] | ||
432 | retl | ||
433 | std %f62, [%o4 + 0x08] | ||
434 | ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds) | ||
435 | |||
436 | .align 32 | ||
437 | ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds) | ||
438 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
439 | ldd [%o4 + 0x00], %f60 | ||
440 | ldd [%o4 + 0x08], %f62 | ||
441 | 1: ldd [%o0 + 0x00], %f0 | ||
442 | ldd [%o0 + 0x08], %f2 | ||
443 | add %o0, 0x10, %o0 | ||
444 | fxor %f60, %f0, %f0 | ||
445 | fxor %f62, %f2, %f2 | ||
446 | fxor %f4, %f0, %f0 | ||
447 | fxor %f6, %f2, %f2 | ||
448 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
449 | ldd [%o3 + 0xd0], %f8 | ||
450 | ldd [%o3 + 0xd8], %f10 | ||
451 | ldd [%o3 + 0xe0], %f12 | ||
452 | ldd [%o3 + 0xe8], %f14 | ||
453 | ldd [%o3 + 0xf0], %f16 | ||
454 | ldd [%o3 + 0xf8], %f18 | ||
455 | ldd [%o3 + 0x100], %f20 | ||
456 | ldd [%o3 + 0x108], %f22 | ||
457 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
458 | CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) | ||
459 | CAMELLIA_F(8, 2, 0, 2) | ||
460 | CAMELLIA_F(10, 0, 2, 0) | ||
461 | ldd [%o3 + 0x10], %f8 | ||
462 | ldd [%o3 + 0x18], %f10 | ||
463 | CAMELLIA_F(12, 2, 0, 2) | ||
464 | CAMELLIA_F(14, 0, 2, 0) | ||
465 | ldd [%o3 + 0x20], %f12 | ||
466 | ldd [%o3 + 0x28], %f14 | ||
467 | CAMELLIA_F(16, 2, 0, 2) | ||
468 | CAMELLIA_F(18, 0, 2, 0) | ||
469 | ldd [%o3 + 0x30], %f16 | ||
470 | ldd [%o3 + 0x38], %f18 | ||
471 | fxor %f20, %f2, %f60 | ||
472 | fxor %f22, %f0, %f62 | ||
473 | ldd [%o3 + 0x40], %f20 | ||
474 | ldd [%o3 + 0x48], %f22 | ||
475 | std %f60, [%o1 + 0x00] | ||
476 | std %f62, [%o1 + 0x08] | ||
477 | subcc %o2, 0x10, %o2 | ||
478 | bne,pt %icc, 1b | ||
479 | add %o1, 0x10, %o1 | ||
480 | std %f60, [%o4 + 0x00] | ||
481 | retl | ||
482 | std %f62, [%o4 + 0x08] | ||
483 | ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds) | ||
484 | |||
485 | .align 32 | ||
486 | ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds) | ||
487 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
488 | ldd [%o4 + 0x00], %f60 | ||
489 | ldd [%o4 + 0x08], %f62 | ||
490 | 1: ldd [%o0 + 0x00], %f56 | ||
491 | ldd [%o0 + 0x08], %f58 | ||
492 | add %o0, 0x10, %o0 | ||
493 | fxor %f4, %f56, %f0 | ||
494 | fxor %f6, %f58, %f2 | ||
495 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
496 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
497 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
498 | fxor %f52, %f2, %f2 | ||
499 | fxor %f54, %f0, %f0 | ||
500 | fxor %f60, %f2, %f2 | ||
501 | fxor %f62, %f0, %f0 | ||
502 | fsrc2 %f56, %f60 | ||
503 | fsrc2 %f58, %f62 | ||
504 | std %f2, [%o1 + 0x00] | ||
505 | std %f0, [%o1 + 0x08] | ||
506 | subcc %o2, 0x10, %o2 | ||
507 | bne,pt %icc, 1b | ||
508 | add %o1, 0x10, %o1 | ||
509 | std %f60, [%o4 + 0x00] | ||
510 | retl | ||
511 | std %f62, [%o4 + 0x08] | ||
512 | ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds) | ||
513 | |||
514 | .align 32 | ||
515 | ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds) | ||
516 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
517 | ldd [%o4 + 0x00], %f60 | ||
518 | ldd [%o4 + 0x08], %f62 | ||
519 | 1: ldd [%o0 + 0x00], %f56 | ||
520 | ldd [%o0 + 0x08], %f58 | ||
521 | add %o0, 0x10, %o0 | ||
522 | fxor %f4, %f56, %f0 | ||
523 | fxor %f6, %f58, %f2 | ||
524 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
525 | ldd [%o3 + 0xd0], %f8 | ||
526 | ldd [%o3 + 0xd8], %f10 | ||
527 | ldd [%o3 + 0xe0], %f12 | ||
528 | ldd [%o3 + 0xe8], %f14 | ||
529 | ldd [%o3 + 0xf0], %f16 | ||
530 | ldd [%o3 + 0xf8], %f18 | ||
531 | ldd [%o3 + 0x100], %f20 | ||
532 | ldd [%o3 + 0x108], %f22 | ||
533 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
534 | CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) | ||
535 | CAMELLIA_F(8, 2, 0, 2) | ||
536 | CAMELLIA_F(10, 0, 2, 0) | ||
537 | ldd [%o3 + 0x10], %f8 | ||
538 | ldd [%o3 + 0x18], %f10 | ||
539 | CAMELLIA_F(12, 2, 0, 2) | ||
540 | CAMELLIA_F(14, 0, 2, 0) | ||
541 | ldd [%o3 + 0x20], %f12 | ||
542 | ldd [%o3 + 0x28], %f14 | ||
543 | CAMELLIA_F(16, 2, 0, 2) | ||
544 | CAMELLIA_F(18, 0, 2, 0) | ||
545 | ldd [%o3 + 0x30], %f16 | ||
546 | ldd [%o3 + 0x38], %f18 | ||
547 | fxor %f20, %f2, %f2 | ||
548 | fxor %f22, %f0, %f0 | ||
549 | ldd [%o3 + 0x40], %f20 | ||
550 | ldd [%o3 + 0x48], %f22 | ||
551 | fxor %f60, %f2, %f2 | ||
552 | fxor %f62, %f0, %f0 | ||
553 | fsrc2 %f56, %f60 | ||
554 | fsrc2 %f58, %f62 | ||
555 | std %f2, [%o1 + 0x00] | ||
556 | std %f0, [%o1 + 0x08] | ||
557 | subcc %o2, 0x10, %o2 | ||
558 | bne,pt %icc, 1b | ||
559 | add %o1, 0x10, %o1 | ||
560 | std %f60, [%o4 + 0x00] | ||
561 | retl | ||
562 | std %f62, [%o4 + 0x08] | ||
563 | ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds) | ||
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c new file mode 100644 index 000000000000..42905c084299 --- /dev/null +++ b/arch/sparc/crypto/camellia_glue.c | |||
@@ -0,0 +1,322 @@ | |||
1 | /* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <crypto/algapi.h> | ||
14 | |||
15 | #include <asm/fpumacro.h> | ||
16 | #include <asm/pstate.h> | ||
17 | #include <asm/elf.h> | ||
18 | |||
19 | #include "opcodes.h" | ||
20 | |||
21 | #define CAMELLIA_MIN_KEY_SIZE 16 | ||
22 | #define CAMELLIA_MAX_KEY_SIZE 32 | ||
23 | #define CAMELLIA_BLOCK_SIZE 16 | ||
24 | #define CAMELLIA_TABLE_BYTE_LEN 272 | ||
25 | |||
26 | struct camellia_sparc64_ctx { | ||
27 | u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; | ||
28 | u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; | ||
29 | int key_len; | ||
30 | }; | ||
31 | |||
32 | extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key, | ||
33 | unsigned int key_len, u64 *decrypt_key); | ||
34 | |||
35 | static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, | ||
36 | unsigned int key_len) | ||
37 | { | ||
38 | struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
39 | const u32 *in_key = (const u32 *) _in_key; | ||
40 | u32 *flags = &tfm->crt_flags; | ||
41 | |||
42 | if (key_len != 16 && key_len != 24 && key_len != 32) { | ||
43 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
44 | return -EINVAL; | ||
45 | } | ||
46 | |||
47 | ctx->key_len = key_len; | ||
48 | |||
49 | camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0], | ||
50 | key_len, &ctx->decrypt_key[0]); | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | extern void camellia_sparc64_crypt(const u64 *key, const u32 *input, | ||
55 | u32 *output, unsigned int key_len); | ||
56 | |||
57 | static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
58 | { | ||
59 | struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
60 | |||
61 | camellia_sparc64_crypt(&ctx->encrypt_key[0], | ||
62 | (const u32 *) src, | ||
63 | (u32 *) dst, ctx->key_len); | ||
64 | } | ||
65 | |||
66 | static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
67 | { | ||
68 | struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
69 | |||
70 | camellia_sparc64_crypt(&ctx->decrypt_key[0], | ||
71 | (const u32 *) src, | ||
72 | (u32 *) dst, ctx->key_len); | ||
73 | } | ||
74 | |||
75 | extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len); | ||
76 | |||
77 | typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len, | ||
78 | const u64 *key); | ||
79 | |||
80 | extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds; | ||
81 | extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds; | ||
82 | |||
83 | #define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1)) | ||
84 | |||
85 | static int __ecb_crypt(struct blkcipher_desc *desc, | ||
86 | struct scatterlist *dst, struct scatterlist *src, | ||
87 | unsigned int nbytes, bool encrypt) | ||
88 | { | ||
89 | struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
90 | struct blkcipher_walk walk; | ||
91 | ecb_crypt_op *op; | ||
92 | const u64 *key; | ||
93 | int err; | ||
94 | |||
95 | op = camellia_sparc64_ecb_crypt_3_grand_rounds; | ||
96 | if (ctx->key_len != 16) | ||
97 | op = camellia_sparc64_ecb_crypt_4_grand_rounds; | ||
98 | |||
99 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
100 | err = blkcipher_walk_virt(desc, &walk); | ||
101 | |||
102 | if (encrypt) | ||
103 | key = &ctx->encrypt_key[0]; | ||
104 | else | ||
105 | key = &ctx->decrypt_key[0]; | ||
106 | camellia_sparc64_load_keys(key, ctx->key_len); | ||
107 | while ((nbytes = walk.nbytes)) { | ||
108 | unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; | ||
109 | |||
110 | if (likely(block_len)) { | ||
111 | const u64 *src64; | ||
112 | u64 *dst64; | ||
113 | |||
114 | src64 = (const u64 *)walk.src.virt.addr; | ||
115 | dst64 = (u64 *) walk.dst.virt.addr; | ||
116 | op(src64, dst64, block_len, key); | ||
117 | } | ||
118 | nbytes &= CAMELLIA_BLOCK_SIZE - 1; | ||
119 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
120 | } | ||
121 | fprs_write(0); | ||
122 | return err; | ||
123 | } | ||
124 | |||
125 | static int ecb_encrypt(struct blkcipher_desc *desc, | ||
126 | struct scatterlist *dst, struct scatterlist *src, | ||
127 | unsigned int nbytes) | ||
128 | { | ||
129 | return __ecb_crypt(desc, dst, src, nbytes, true); | ||
130 | } | ||
131 | |||
132 | static int ecb_decrypt(struct blkcipher_desc *desc, | ||
133 | struct scatterlist *dst, struct scatterlist *src, | ||
134 | unsigned int nbytes) | ||
135 | { | ||
136 | return __ecb_crypt(desc, dst, src, nbytes, false); | ||
137 | } | ||
138 | |||
139 | typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len, | ||
140 | const u64 *key, u64 *iv); | ||
141 | |||
142 | extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds; | ||
143 | extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds; | ||
144 | extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds; | ||
145 | extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds; | ||
146 | |||
147 | static int cbc_encrypt(struct blkcipher_desc *desc, | ||
148 | struct scatterlist *dst, struct scatterlist *src, | ||
149 | unsigned int nbytes) | ||
150 | { | ||
151 | struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
152 | struct blkcipher_walk walk; | ||
153 | cbc_crypt_op *op; | ||
154 | const u64 *key; | ||
155 | int err; | ||
156 | |||
157 | op = camellia_sparc64_cbc_encrypt_3_grand_rounds; | ||
158 | if (ctx->key_len != 16) | ||
159 | op = camellia_sparc64_cbc_encrypt_4_grand_rounds; | ||
160 | |||
161 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
162 | err = blkcipher_walk_virt(desc, &walk); | ||
163 | |||
164 | key = &ctx->encrypt_key[0]; | ||
165 | camellia_sparc64_load_keys(key, ctx->key_len); | ||
166 | while ((nbytes = walk.nbytes)) { | ||
167 | unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; | ||
168 | |||
169 | if (likely(block_len)) { | ||
170 | const u64 *src64; | ||
171 | u64 *dst64; | ||
172 | |||
173 | src64 = (const u64 *)walk.src.virt.addr; | ||
174 | dst64 = (u64 *) walk.dst.virt.addr; | ||
175 | op(src64, dst64, block_len, key, | ||
176 | (u64 *) walk.iv); | ||
177 | } | ||
178 | nbytes &= CAMELLIA_BLOCK_SIZE - 1; | ||
179 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
180 | } | ||
181 | fprs_write(0); | ||
182 | return err; | ||
183 | } | ||
184 | |||
185 | static int cbc_decrypt(struct blkcipher_desc *desc, | ||
186 | struct scatterlist *dst, struct scatterlist *src, | ||
187 | unsigned int nbytes) | ||
188 | { | ||
189 | struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
190 | struct blkcipher_walk walk; | ||
191 | cbc_crypt_op *op; | ||
192 | const u64 *key; | ||
193 | int err; | ||
194 | |||
195 | op = camellia_sparc64_cbc_decrypt_3_grand_rounds; | ||
196 | if (ctx->key_len != 16) | ||
197 | op = camellia_sparc64_cbc_decrypt_4_grand_rounds; | ||
198 | |||
199 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
200 | err = blkcipher_walk_virt(desc, &walk); | ||
201 | |||
202 | key = &ctx->decrypt_key[0]; | ||
203 | camellia_sparc64_load_keys(key, ctx->key_len); | ||
204 | while ((nbytes = walk.nbytes)) { | ||
205 | unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; | ||
206 | |||
207 | if (likely(block_len)) { | ||
208 | const u64 *src64; | ||
209 | u64 *dst64; | ||
210 | |||
211 | src64 = (const u64 *)walk.src.virt.addr; | ||
212 | dst64 = (u64 *) walk.dst.virt.addr; | ||
213 | op(src64, dst64, block_len, key, | ||
214 | (u64 *) walk.iv); | ||
215 | } | ||
216 | nbytes &= CAMELLIA_BLOCK_SIZE - 1; | ||
217 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
218 | } | ||
219 | fprs_write(0); | ||
220 | return err; | ||
221 | } | ||
222 | |||
223 | static struct crypto_alg algs[] = { { | ||
224 | .cra_name = "camellia", | ||
225 | .cra_driver_name = "camellia-sparc64", | ||
226 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
227 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
228 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
229 | .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), | ||
230 | .cra_alignmask = 3, | ||
231 | .cra_module = THIS_MODULE, | ||
232 | .cra_u = { | ||
233 | .cipher = { | ||
234 | .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
235 | .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
236 | .cia_setkey = camellia_set_key, | ||
237 | .cia_encrypt = camellia_encrypt, | ||
238 | .cia_decrypt = camellia_decrypt | ||
239 | } | ||
240 | } | ||
241 | }, { | ||
242 | .cra_name = "ecb(camellia)", | ||
243 | .cra_driver_name = "ecb-camellia-sparc64", | ||
244 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
245 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
246 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
247 | .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), | ||
248 | .cra_alignmask = 7, | ||
249 | .cra_type = &crypto_blkcipher_type, | ||
250 | .cra_module = THIS_MODULE, | ||
251 | .cra_u = { | ||
252 | .blkcipher = { | ||
253 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
254 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
255 | .setkey = camellia_set_key, | ||
256 | .encrypt = ecb_encrypt, | ||
257 | .decrypt = ecb_decrypt, | ||
258 | }, | ||
259 | }, | ||
260 | }, { | ||
261 | .cra_name = "cbc(camellia)", | ||
262 | .cra_driver_name = "cbc-camellia-sparc64", | ||
263 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
264 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
265 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
266 | .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), | ||
267 | .cra_alignmask = 7, | ||
268 | .cra_type = &crypto_blkcipher_type, | ||
269 | .cra_module = THIS_MODULE, | ||
270 | .cra_u = { | ||
271 | .blkcipher = { | ||
272 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
273 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
274 | .setkey = camellia_set_key, | ||
275 | .encrypt = cbc_encrypt, | ||
276 | .decrypt = cbc_decrypt, | ||
277 | }, | ||
278 | }, | ||
279 | } | ||
280 | }; | ||
281 | |||
282 | static bool __init sparc64_has_camellia_opcode(void) | ||
283 | { | ||
284 | unsigned long cfr; | ||
285 | |||
286 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
287 | return false; | ||
288 | |||
289 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
290 | if (!(cfr & CFR_CAMELLIA)) | ||
291 | return false; | ||
292 | |||
293 | return true; | ||
294 | } | ||
295 | |||
296 | static int __init camellia_sparc64_mod_init(void) | ||
297 | { | ||
298 | int i; | ||
299 | |||
300 | for (i = 0; i < ARRAY_SIZE(algs); i++) | ||
301 | INIT_LIST_HEAD(&algs[i].cra_list); | ||
302 | |||
303 | if (sparc64_has_camellia_opcode()) { | ||
304 | pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); | ||
305 | return crypto_register_algs(algs, ARRAY_SIZE(algs)); | ||
306 | } | ||
307 | pr_info("sparc64 camellia opcodes not available.\n"); | ||
308 | return -ENODEV; | ||
309 | } | ||
310 | |||
311 | static void __exit camellia_sparc64_mod_fini(void) | ||
312 | { | ||
313 | crypto_unregister_algs(algs, ARRAY_SIZE(algs)); | ||
314 | } | ||
315 | |||
316 | module_init(camellia_sparc64_mod_init); | ||
317 | module_exit(camellia_sparc64_mod_fini); | ||
318 | |||
319 | MODULE_LICENSE("GPL"); | ||
320 | MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); | ||
321 | |||
322 | MODULE_ALIAS("aes"); | ||
diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S new file mode 100644 index 000000000000..2b1976e765b5 --- /dev/null +++ b/arch/sparc/crypto/crc32c_asm.S | |||
@@ -0,0 +1,20 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | #include <asm/asi.h> | ||
4 | |||
5 | #include "opcodes.h" | ||
6 | |||
7 | ENTRY(crc32c_sparc64) | ||
8 | /* %o0=crc32p, %o1=data_ptr, %o2=len */ | ||
9 | VISEntryHalf | ||
10 | lda [%o0] ASI_PL, %f1 | ||
11 | 1: ldd [%o1], %f2 | ||
12 | CRC32C(0,2,0) | ||
13 | subcc %o2, 8, %o2 | ||
14 | bne,pt %icc, 1b | ||
15 | add %o1, 0x8, %o1 | ||
16 | sta %f1, [%o0] ASI_PL | ||
17 | VISExitHalf | ||
18 | 2: retl | ||
19 | nop | ||
20 | ENDPROC(crc32c_sparc64) | ||
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c new file mode 100644 index 000000000000..0bd89cea8d8e --- /dev/null +++ b/arch/sparc/crypto/crc32c_glue.c | |||
@@ -0,0 +1,179 @@ | |||
1 | /* Glue code for CRC32C optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/crc32c-intel.c | ||
4 | * | ||
5 | * Copyright (C) 2008 Intel Corporation | ||
6 | * Authors: Austin Zhang <austin_zhang@linux.intel.com> | ||
7 | * Kent Liu <kent.liu@intel.com> | ||
8 | */ | ||
9 | |||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/crc32.h> | ||
17 | |||
18 | #include <crypto/internal/hash.h> | ||
19 | |||
20 | #include <asm/pstate.h> | ||
21 | #include <asm/elf.h> | ||
22 | |||
23 | #include "opcodes.h" | ||
24 | |||
25 | /* | ||
26 | * Setting the seed allows arbitrary accumulators and flexible XOR policy | ||
27 | * If your algorithm starts with ~0, then XOR with ~0 before you set | ||
28 | * the seed. | ||
29 | */ | ||
30 | static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, | ||
31 | unsigned int keylen) | ||
32 | { | ||
33 | u32 *mctx = crypto_shash_ctx(hash); | ||
34 | |||
35 | if (keylen != sizeof(u32)) { | ||
36 | crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
37 | return -EINVAL; | ||
38 | } | ||
39 | *(__le32 *)mctx = le32_to_cpup((__le32 *)key); | ||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | static int crc32c_sparc64_init(struct shash_desc *desc) | ||
44 | { | ||
45 | u32 *mctx = crypto_shash_ctx(desc->tfm); | ||
46 | u32 *crcp = shash_desc_ctx(desc); | ||
47 | |||
48 | *crcp = *mctx; | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); | ||
54 | |||
55 | static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len) | ||
56 | { | ||
57 | unsigned int asm_len; | ||
58 | |||
59 | asm_len = len & ~7U; | ||
60 | if (asm_len) { | ||
61 | crc32c_sparc64(crcp, data, asm_len); | ||
62 | data += asm_len / 8; | ||
63 | len -= asm_len; | ||
64 | } | ||
65 | if (len) | ||
66 | *crcp = __crc32c_le(*crcp, (const unsigned char *) data, len); | ||
67 | } | ||
68 | |||
69 | static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
70 | unsigned int len) | ||
71 | { | ||
72 | u32 *crcp = shash_desc_ctx(desc); | ||
73 | |||
74 | crc32c_compute(crcp, (const u64 *) data, len); | ||
75 | |||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len, | ||
80 | u8 *out) | ||
81 | { | ||
82 | u32 tmp = *crcp; | ||
83 | |||
84 | crc32c_compute(&tmp, (const u64 *) data, len); | ||
85 | |||
86 | *(__le32 *) out = ~cpu_to_le32(tmp); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, | ||
91 | unsigned int len, u8 *out) | ||
92 | { | ||
93 | return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); | ||
94 | } | ||
95 | |||
96 | static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) | ||
97 | { | ||
98 | u32 *crcp = shash_desc_ctx(desc); | ||
99 | |||
100 | *(__le32 *) out = ~cpu_to_le32p(crcp); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, | ||
105 | unsigned int len, u8 *out) | ||
106 | { | ||
107 | return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, | ||
108 | out); | ||
109 | } | ||
110 | |||
111 | static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) | ||
112 | { | ||
113 | u32 *key = crypto_tfm_ctx(tfm); | ||
114 | |||
115 | *key = ~0; | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | #define CHKSUM_BLOCK_SIZE 1 | ||
121 | #define CHKSUM_DIGEST_SIZE 4 | ||
122 | |||
123 | static struct shash_alg alg = { | ||
124 | .setkey = crc32c_sparc64_setkey, | ||
125 | .init = crc32c_sparc64_init, | ||
126 | .update = crc32c_sparc64_update, | ||
127 | .final = crc32c_sparc64_final, | ||
128 | .finup = crc32c_sparc64_finup, | ||
129 | .digest = crc32c_sparc64_digest, | ||
130 | .descsize = sizeof(u32), | ||
131 | .digestsize = CHKSUM_DIGEST_SIZE, | ||
132 | .base = { | ||
133 | .cra_name = "crc32c", | ||
134 | .cra_driver_name = "crc32c-sparc64", | ||
135 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
136 | .cra_blocksize = CHKSUM_BLOCK_SIZE, | ||
137 | .cra_ctxsize = sizeof(u32), | ||
138 | .cra_alignmask = 7, | ||
139 | .cra_module = THIS_MODULE, | ||
140 | .cra_init = crc32c_sparc64_cra_init, | ||
141 | } | ||
142 | }; | ||
143 | |||
144 | static bool __init sparc64_has_crc32c_opcode(void) | ||
145 | { | ||
146 | unsigned long cfr; | ||
147 | |||
148 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
149 | return false; | ||
150 | |||
151 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
152 | if (!(cfr & CFR_CRC32C)) | ||
153 | return false; | ||
154 | |||
155 | return true; | ||
156 | } | ||
157 | |||
158 | static int __init crc32c_sparc64_mod_init(void) | ||
159 | { | ||
160 | if (sparc64_has_crc32c_opcode()) { | ||
161 | pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); | ||
162 | return crypto_register_shash(&alg); | ||
163 | } | ||
164 | pr_info("sparc64 crc32c opcode not available.\n"); | ||
165 | return -ENODEV; | ||
166 | } | ||
167 | |||
168 | static void __exit crc32c_sparc64_mod_fini(void) | ||
169 | { | ||
170 | crypto_unregister_shash(&alg); | ||
171 | } | ||
172 | |||
173 | module_init(crc32c_sparc64_mod_init); | ||
174 | module_exit(crc32c_sparc64_mod_fini); | ||
175 | |||
176 | MODULE_LICENSE("GPL"); | ||
177 | MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); | ||
178 | |||
179 | MODULE_ALIAS("crc32c"); | ||
diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c new file mode 100644 index 000000000000..5f5724a0ae22 --- /dev/null +++ b/arch/sparc/crypto/crop_devid.c | |||
@@ -0,0 +1,14 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/of_device.h> | ||
3 | |||
4 | /* This is a dummy device table linked into all of the crypto | ||
5 | * opcode drivers. It serves to trigger the module autoloading | ||
6 | * mechanisms in userspace which scan the OF device tree and | ||
7 | * load any modules which have device table entries that | ||
8 | * match OF device nodes. | ||
9 | */ | ||
10 | static const struct of_device_id crypto_opcode_match[] = { | ||
11 | { .name = "cpu", .compatible = "sun4v", }, | ||
12 | {}, | ||
13 | }; | ||
14 | MODULE_DEVICE_TABLE(of, crypto_opcode_match); | ||
diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S new file mode 100644 index 000000000000..30b6e90b28b2 --- /dev/null +++ b/arch/sparc/crypto/des_asm.S | |||
@@ -0,0 +1,418 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | .align 32 | ||
7 | ENTRY(des_sparc64_key_expand) | ||
8 | /* %o0=input_key, %o1=output_key */ | ||
9 | VISEntryHalf | ||
10 | ld [%o0 + 0x00], %f0 | ||
11 | ld [%o0 + 0x04], %f1 | ||
12 | DES_KEXPAND(0, 0, 0) | ||
13 | DES_KEXPAND(0, 1, 2) | ||
14 | DES_KEXPAND(2, 3, 6) | ||
15 | DES_KEXPAND(2, 2, 4) | ||
16 | DES_KEXPAND(6, 3, 10) | ||
17 | DES_KEXPAND(6, 2, 8) | ||
18 | DES_KEXPAND(10, 3, 14) | ||
19 | DES_KEXPAND(10, 2, 12) | ||
20 | DES_KEXPAND(14, 1, 16) | ||
21 | DES_KEXPAND(16, 3, 20) | ||
22 | DES_KEXPAND(16, 2, 18) | ||
23 | DES_KEXPAND(20, 3, 24) | ||
24 | DES_KEXPAND(20, 2, 22) | ||
25 | DES_KEXPAND(24, 3, 28) | ||
26 | DES_KEXPAND(24, 2, 26) | ||
27 | DES_KEXPAND(28, 1, 30) | ||
28 | std %f0, [%o1 + 0x00] | ||
29 | std %f2, [%o1 + 0x08] | ||
30 | std %f4, [%o1 + 0x10] | ||
31 | std %f6, [%o1 + 0x18] | ||
32 | std %f8, [%o1 + 0x20] | ||
33 | std %f10, [%o1 + 0x28] | ||
34 | std %f12, [%o1 + 0x30] | ||
35 | std %f14, [%o1 + 0x38] | ||
36 | std %f16, [%o1 + 0x40] | ||
37 | std %f18, [%o1 + 0x48] | ||
38 | std %f20, [%o1 + 0x50] | ||
39 | std %f22, [%o1 + 0x58] | ||
40 | std %f24, [%o1 + 0x60] | ||
41 | std %f26, [%o1 + 0x68] | ||
42 | std %f28, [%o1 + 0x70] | ||
43 | std %f30, [%o1 + 0x78] | ||
44 | retl | ||
45 | VISExitHalf | ||
46 | ENDPROC(des_sparc64_key_expand) | ||
47 | |||
48 | .align 32 | ||
49 | ENTRY(des_sparc64_crypt) | ||
50 | /* %o0=key, %o1=input, %o2=output */ | ||
51 | VISEntry | ||
52 | ldd [%o1 + 0x00], %f32 | ||
53 | ldd [%o0 + 0x00], %f0 | ||
54 | ldd [%o0 + 0x08], %f2 | ||
55 | ldd [%o0 + 0x10], %f4 | ||
56 | ldd [%o0 + 0x18], %f6 | ||
57 | ldd [%o0 + 0x20], %f8 | ||
58 | ldd [%o0 + 0x28], %f10 | ||
59 | ldd [%o0 + 0x30], %f12 | ||
60 | ldd [%o0 + 0x38], %f14 | ||
61 | ldd [%o0 + 0x40], %f16 | ||
62 | ldd [%o0 + 0x48], %f18 | ||
63 | ldd [%o0 + 0x50], %f20 | ||
64 | ldd [%o0 + 0x58], %f22 | ||
65 | ldd [%o0 + 0x60], %f24 | ||
66 | ldd [%o0 + 0x68], %f26 | ||
67 | ldd [%o0 + 0x70], %f28 | ||
68 | ldd [%o0 + 0x78], %f30 | ||
69 | DES_IP(32, 32) | ||
70 | DES_ROUND(0, 2, 32, 32) | ||
71 | DES_ROUND(4, 6, 32, 32) | ||
72 | DES_ROUND(8, 10, 32, 32) | ||
73 | DES_ROUND(12, 14, 32, 32) | ||
74 | DES_ROUND(16, 18, 32, 32) | ||
75 | DES_ROUND(20, 22, 32, 32) | ||
76 | DES_ROUND(24, 26, 32, 32) | ||
77 | DES_ROUND(28, 30, 32, 32) | ||
78 | DES_IIP(32, 32) | ||
79 | std %f32, [%o2 + 0x00] | ||
80 | retl | ||
81 | VISExit | ||
82 | ENDPROC(des_sparc64_crypt) | ||
83 | |||
84 | .align 32 | ||
85 | ENTRY(des_sparc64_load_keys) | ||
86 | /* %o0=key */ | ||
87 | VISEntry | ||
88 | ldd [%o0 + 0x00], %f0 | ||
89 | ldd [%o0 + 0x08], %f2 | ||
90 | ldd [%o0 + 0x10], %f4 | ||
91 | ldd [%o0 + 0x18], %f6 | ||
92 | ldd [%o0 + 0x20], %f8 | ||
93 | ldd [%o0 + 0x28], %f10 | ||
94 | ldd [%o0 + 0x30], %f12 | ||
95 | ldd [%o0 + 0x38], %f14 | ||
96 | ldd [%o0 + 0x40], %f16 | ||
97 | ldd [%o0 + 0x48], %f18 | ||
98 | ldd [%o0 + 0x50], %f20 | ||
99 | ldd [%o0 + 0x58], %f22 | ||
100 | ldd [%o0 + 0x60], %f24 | ||
101 | ldd [%o0 + 0x68], %f26 | ||
102 | ldd [%o0 + 0x70], %f28 | ||
103 | retl | ||
104 | ldd [%o0 + 0x78], %f30 | ||
105 | ENDPROC(des_sparc64_load_keys) | ||
106 | |||
107 | .align 32 | ||
108 | ENTRY(des_sparc64_ecb_crypt) | ||
109 | /* %o0=input, %o1=output, %o2=len */ | ||
110 | 1: ldd [%o0 + 0x00], %f32 | ||
111 | add %o0, 0x08, %o0 | ||
112 | DES_IP(32, 32) | ||
113 | DES_ROUND(0, 2, 32, 32) | ||
114 | DES_ROUND(4, 6, 32, 32) | ||
115 | DES_ROUND(8, 10, 32, 32) | ||
116 | DES_ROUND(12, 14, 32, 32) | ||
117 | DES_ROUND(16, 18, 32, 32) | ||
118 | DES_ROUND(20, 22, 32, 32) | ||
119 | DES_ROUND(24, 26, 32, 32) | ||
120 | DES_ROUND(28, 30, 32, 32) | ||
121 | DES_IIP(32, 32) | ||
122 | std %f32, [%o1 + 0x00] | ||
123 | subcc %o2, 0x08, %o2 | ||
124 | bne,pt %icc, 1b | ||
125 | add %o1, 0x08, %o1 | ||
126 | retl | ||
127 | nop | ||
128 | ENDPROC(des_sparc64_ecb_crypt) | ||
129 | |||
130 | .align 32 | ||
131 | ENTRY(des_sparc64_cbc_encrypt) | ||
132 | /* %o0=input, %o1=output, %o2=len, %o3=IV */ | ||
133 | ldd [%o3 + 0x00], %f32 | ||
134 | 1: ldd [%o0 + 0x00], %f34 | ||
135 | fxor %f32, %f34, %f32 | ||
136 | DES_IP(32, 32) | ||
137 | DES_ROUND(0, 2, 32, 32) | ||
138 | DES_ROUND(4, 6, 32, 32) | ||
139 | DES_ROUND(8, 10, 32, 32) | ||
140 | DES_ROUND(12, 14, 32, 32) | ||
141 | DES_ROUND(16, 18, 32, 32) | ||
142 | DES_ROUND(20, 22, 32, 32) | ||
143 | DES_ROUND(24, 26, 32, 32) | ||
144 | DES_ROUND(28, 30, 32, 32) | ||
145 | DES_IIP(32, 32) | ||
146 | std %f32, [%o1 + 0x00] | ||
147 | add %o0, 0x08, %o0 | ||
148 | subcc %o2, 0x08, %o2 | ||
149 | bne,pt %icc, 1b | ||
150 | add %o1, 0x08, %o1 | ||
151 | retl | ||
152 | std %f32, [%o3 + 0x00] | ||
153 | ENDPROC(des_sparc64_cbc_encrypt) | ||
154 | |||
155 | .align 32 | ||
156 | ENTRY(des_sparc64_cbc_decrypt) | ||
157 | /* %o0=input, %o1=output, %o2=len, %o3=IV */ | ||
158 | ldd [%o3 + 0x00], %f34 | ||
159 | 1: ldd [%o0 + 0x00], %f36 | ||
160 | DES_IP(36, 32) | ||
161 | DES_ROUND(0, 2, 32, 32) | ||
162 | DES_ROUND(4, 6, 32, 32) | ||
163 | DES_ROUND(8, 10, 32, 32) | ||
164 | DES_ROUND(12, 14, 32, 32) | ||
165 | DES_ROUND(16, 18, 32, 32) | ||
166 | DES_ROUND(20, 22, 32, 32) | ||
167 | DES_ROUND(24, 26, 32, 32) | ||
168 | DES_ROUND(28, 30, 32, 32) | ||
169 | DES_IIP(32, 32) | ||
170 | fxor %f32, %f34, %f32 | ||
171 | fsrc2 %f36, %f34 | ||
172 | std %f32, [%o1 + 0x00] | ||
173 | add %o0, 0x08, %o0 | ||
174 | subcc %o2, 0x08, %o2 | ||
175 | bne,pt %icc, 1b | ||
176 | add %o1, 0x08, %o1 | ||
177 | retl | ||
178 | std %f36, [%o3 + 0x00] | ||
179 | ENDPROC(des_sparc64_cbc_decrypt) | ||
180 | |||
181 | .align 32 | ||
182 | ENTRY(des3_ede_sparc64_crypt) | ||
183 | /* %o0=key, %o1=input, %o2=output */ | ||
184 | VISEntry | ||
185 | ldd [%o1 + 0x00], %f32 | ||
186 | ldd [%o0 + 0x00], %f0 | ||
187 | ldd [%o0 + 0x08], %f2 | ||
188 | ldd [%o0 + 0x10], %f4 | ||
189 | ldd [%o0 + 0x18], %f6 | ||
190 | ldd [%o0 + 0x20], %f8 | ||
191 | ldd [%o0 + 0x28], %f10 | ||
192 | ldd [%o0 + 0x30], %f12 | ||
193 | ldd [%o0 + 0x38], %f14 | ||
194 | ldd [%o0 + 0x40], %f16 | ||
195 | ldd [%o0 + 0x48], %f18 | ||
196 | ldd [%o0 + 0x50], %f20 | ||
197 | ldd [%o0 + 0x58], %f22 | ||
198 | ldd [%o0 + 0x60], %f24 | ||
199 | ldd [%o0 + 0x68], %f26 | ||
200 | ldd [%o0 + 0x70], %f28 | ||
201 | ldd [%o0 + 0x78], %f30 | ||
202 | DES_IP(32, 32) | ||
203 | DES_ROUND(0, 2, 32, 32) | ||
204 | ldd [%o0 + 0x80], %f0 | ||
205 | ldd [%o0 + 0x88], %f2 | ||
206 | DES_ROUND(4, 6, 32, 32) | ||
207 | ldd [%o0 + 0x90], %f4 | ||
208 | ldd [%o0 + 0x98], %f6 | ||
209 | DES_ROUND(8, 10, 32, 32) | ||
210 | ldd [%o0 + 0xa0], %f8 | ||
211 | ldd [%o0 + 0xa8], %f10 | ||
212 | DES_ROUND(12, 14, 32, 32) | ||
213 | ldd [%o0 + 0xb0], %f12 | ||
214 | ldd [%o0 + 0xb8], %f14 | ||
215 | DES_ROUND(16, 18, 32, 32) | ||
216 | ldd [%o0 + 0xc0], %f16 | ||
217 | ldd [%o0 + 0xc8], %f18 | ||
218 | DES_ROUND(20, 22, 32, 32) | ||
219 | ldd [%o0 + 0xd0], %f20 | ||
220 | ldd [%o0 + 0xd8], %f22 | ||
221 | DES_ROUND(24, 26, 32, 32) | ||
222 | ldd [%o0 + 0xe0], %f24 | ||
223 | ldd [%o0 + 0xe8], %f26 | ||
224 | DES_ROUND(28, 30, 32, 32) | ||
225 | ldd [%o0 + 0xf0], %f28 | ||
226 | ldd [%o0 + 0xf8], %f30 | ||
227 | DES_IIP(32, 32) | ||
228 | DES_IP(32, 32) | ||
229 | DES_ROUND(0, 2, 32, 32) | ||
230 | ldd [%o0 + 0x100], %f0 | ||
231 | ldd [%o0 + 0x108], %f2 | ||
232 | DES_ROUND(4, 6, 32, 32) | ||
233 | ldd [%o0 + 0x110], %f4 | ||
234 | ldd [%o0 + 0x118], %f6 | ||
235 | DES_ROUND(8, 10, 32, 32) | ||
236 | ldd [%o0 + 0x120], %f8 | ||
237 | ldd [%o0 + 0x128], %f10 | ||
238 | DES_ROUND(12, 14, 32, 32) | ||
239 | ldd [%o0 + 0x130], %f12 | ||
240 | ldd [%o0 + 0x138], %f14 | ||
241 | DES_ROUND(16, 18, 32, 32) | ||
242 | ldd [%o0 + 0x140], %f16 | ||
243 | ldd [%o0 + 0x148], %f18 | ||
244 | DES_ROUND(20, 22, 32, 32) | ||
245 | ldd [%o0 + 0x150], %f20 | ||
246 | ldd [%o0 + 0x158], %f22 | ||
247 | DES_ROUND(24, 26, 32, 32) | ||
248 | ldd [%o0 + 0x160], %f24 | ||
249 | ldd [%o0 + 0x168], %f26 | ||
250 | DES_ROUND(28, 30, 32, 32) | ||
251 | ldd [%o0 + 0x170], %f28 | ||
252 | ldd [%o0 + 0x178], %f30 | ||
253 | DES_IIP(32, 32) | ||
254 | DES_IP(32, 32) | ||
255 | DES_ROUND(0, 2, 32, 32) | ||
256 | DES_ROUND(4, 6, 32, 32) | ||
257 | DES_ROUND(8, 10, 32, 32) | ||
258 | DES_ROUND(12, 14, 32, 32) | ||
259 | DES_ROUND(16, 18, 32, 32) | ||
260 | DES_ROUND(20, 22, 32, 32) | ||
261 | DES_ROUND(24, 26, 32, 32) | ||
262 | DES_ROUND(28, 30, 32, 32) | ||
263 | DES_IIP(32, 32) | ||
264 | |||
265 | std %f32, [%o2 + 0x00] | ||
266 | retl | ||
267 | VISExit | ||
268 | ENDPROC(des3_ede_sparc64_crypt) | ||
269 | |||
270 | .align 32 | ||
271 | ENTRY(des3_ede_sparc64_load_keys) | ||
272 | /* %o0=key */ | ||
273 | VISEntry | ||
274 | ldd [%o0 + 0x00], %f0 | ||
275 | ldd [%o0 + 0x08], %f2 | ||
276 | ldd [%o0 + 0x10], %f4 | ||
277 | ldd [%o0 + 0x18], %f6 | ||
278 | ldd [%o0 + 0x20], %f8 | ||
279 | ldd [%o0 + 0x28], %f10 | ||
280 | ldd [%o0 + 0x30], %f12 | ||
281 | ldd [%o0 + 0x38], %f14 | ||
282 | ldd [%o0 + 0x40], %f16 | ||
283 | ldd [%o0 + 0x48], %f18 | ||
284 | ldd [%o0 + 0x50], %f20 | ||
285 | ldd [%o0 + 0x58], %f22 | ||
286 | ldd [%o0 + 0x60], %f24 | ||
287 | ldd [%o0 + 0x68], %f26 | ||
288 | ldd [%o0 + 0x70], %f28 | ||
289 | ldd [%o0 + 0x78], %f30 | ||
290 | ldd [%o0 + 0x80], %f32 | ||
291 | ldd [%o0 + 0x88], %f34 | ||
292 | ldd [%o0 + 0x90], %f36 | ||
293 | ldd [%o0 + 0x98], %f38 | ||
294 | ldd [%o0 + 0xa0], %f40 | ||
295 | ldd [%o0 + 0xa8], %f42 | ||
296 | ldd [%o0 + 0xb0], %f44 | ||
297 | ldd [%o0 + 0xb8], %f46 | ||
298 | ldd [%o0 + 0xc0], %f48 | ||
299 | ldd [%o0 + 0xc8], %f50 | ||
300 | ldd [%o0 + 0xd0], %f52 | ||
301 | ldd [%o0 + 0xd8], %f54 | ||
302 | ldd [%o0 + 0xe0], %f56 | ||
303 | retl | ||
304 | ldd [%o0 + 0xe8], %f58 | ||
305 | ENDPROC(des3_ede_sparc64_load_keys) | ||
306 | |||
307 | #define DES3_LOOP_BODY(X) \ | ||
308 | DES_IP(X, X) \ | ||
309 | DES_ROUND(0, 2, X, X) \ | ||
310 | DES_ROUND(4, 6, X, X) \ | ||
311 | DES_ROUND(8, 10, X, X) \ | ||
312 | DES_ROUND(12, 14, X, X) \ | ||
313 | DES_ROUND(16, 18, X, X) \ | ||
314 | ldd [%o0 + 0xf0], %f16; \ | ||
315 | ldd [%o0 + 0xf8], %f18; \ | ||
316 | DES_ROUND(20, 22, X, X) \ | ||
317 | ldd [%o0 + 0x100], %f20; \ | ||
318 | ldd [%o0 + 0x108], %f22; \ | ||
319 | DES_ROUND(24, 26, X, X) \ | ||
320 | ldd [%o0 + 0x110], %f24; \ | ||
321 | ldd [%o0 + 0x118], %f26; \ | ||
322 | DES_ROUND(28, 30, X, X) \ | ||
323 | ldd [%o0 + 0x120], %f28; \ | ||
324 | ldd [%o0 + 0x128], %f30; \ | ||
325 | DES_IIP(X, X) \ | ||
326 | DES_IP(X, X) \ | ||
327 | DES_ROUND(32, 34, X, X) \ | ||
328 | ldd [%o0 + 0x130], %f0; \ | ||
329 | ldd [%o0 + 0x138], %f2; \ | ||
330 | DES_ROUND(36, 38, X, X) \ | ||
331 | ldd [%o0 + 0x140], %f4; \ | ||
332 | ldd [%o0 + 0x148], %f6; \ | ||
333 | DES_ROUND(40, 42, X, X) \ | ||
334 | ldd [%o0 + 0x150], %f8; \ | ||
335 | ldd [%o0 + 0x158], %f10; \ | ||
336 | DES_ROUND(44, 46, X, X) \ | ||
337 | ldd [%o0 + 0x160], %f12; \ | ||
338 | ldd [%o0 + 0x168], %f14; \ | ||
339 | DES_ROUND(48, 50, X, X) \ | ||
340 | DES_ROUND(52, 54, X, X) \ | ||
341 | DES_ROUND(56, 58, X, X) \ | ||
342 | DES_ROUND(16, 18, X, X) \ | ||
343 | ldd [%o0 + 0x170], %f16; \ | ||
344 | ldd [%o0 + 0x178], %f18; \ | ||
345 | DES_IIP(X, X) \ | ||
346 | DES_IP(X, X) \ | ||
347 | DES_ROUND(20, 22, X, X) \ | ||
348 | ldd [%o0 + 0x50], %f20; \ | ||
349 | ldd [%o0 + 0x58], %f22; \ | ||
350 | DES_ROUND(24, 26, X, X) \ | ||
351 | ldd [%o0 + 0x60], %f24; \ | ||
352 | ldd [%o0 + 0x68], %f26; \ | ||
353 | DES_ROUND(28, 30, X, X) \ | ||
354 | ldd [%o0 + 0x70], %f28; \ | ||
355 | ldd [%o0 + 0x78], %f30; \ | ||
356 | DES_ROUND(0, 2, X, X) \ | ||
357 | ldd [%o0 + 0x00], %f0; \ | ||
358 | ldd [%o0 + 0x08], %f2; \ | ||
359 | DES_ROUND(4, 6, X, X) \ | ||
360 | ldd [%o0 + 0x10], %f4; \ | ||
361 | ldd [%o0 + 0x18], %f6; \ | ||
362 | DES_ROUND(8, 10, X, X) \ | ||
363 | ldd [%o0 + 0x20], %f8; \ | ||
364 | ldd [%o0 + 0x28], %f10; \ | ||
365 | DES_ROUND(12, 14, X, X) \ | ||
366 | ldd [%o0 + 0x30], %f12; \ | ||
367 | ldd [%o0 + 0x38], %f14; \ | ||
368 | DES_ROUND(16, 18, X, X) \ | ||
369 | ldd [%o0 + 0x40], %f16; \ | ||
370 | ldd [%o0 + 0x48], %f18; \ | ||
371 | DES_IIP(X, X) | ||
372 | |||
373 | .align 32 | ||
374 | ENTRY(des3_ede_sparc64_ecb_crypt) | ||
375 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
376 | 1: ldd [%o1 + 0x00], %f60 | ||
377 | DES3_LOOP_BODY(60) | ||
378 | std %f60, [%o2 + 0x00] | ||
379 | subcc %o3, 0x08, %o3 | ||
380 | bne,pt %icc, 1b | ||
381 | add %o2, 0x08, %o2 | ||
382 | retl | ||
383 | nop | ||
384 | ENDPROC(des3_ede_sparc64_ecb_crypt) | ||
385 | |||
386 | .align 32 | ||
387 | ENTRY(des3_ede_sparc64_cbc_encrypt) | ||
388 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
389 | ldd [%o4 + 0x00], %f60 | ||
390 | 1: ldd [%o1 + 0x00], %f62 | ||
391 | fxor %f60, %f62, %f60 | ||
392 | DES3_LOOP_BODY(60) | ||
393 | std %f60, [%o2 + 0x00] | ||
394 | add %o1, 0x08, %o1 | ||
395 | subcc %o3, 0x08, %o3 | ||
396 | bne,pt %icc, 1b | ||
397 | add %o2, 0x08, %o2 | ||
398 | retl | ||
399 | std %f60, [%o4 + 0x00] | ||
400 | ENDPROC(des3_ede_sparc64_cbc_encrypt) | ||
401 | |||
402 | .align 32 | ||
403 | ENTRY(des3_ede_sparc64_cbc_decrypt) | ||
404 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
405 | ldd [%o4 + 0x00], %f62 | ||
406 | 1: ldx [%o1 + 0x00], %g1 | ||
407 | MOVXTOD_G1_F60 | ||
408 | DES3_LOOP_BODY(60) | ||
409 | fxor %f62, %f60, %f60 | ||
410 | MOVXTOD_G1_F62 | ||
411 | std %f60, [%o2 + 0x00] | ||
412 | add %o1, 0x08, %o1 | ||
413 | subcc %o3, 0x08, %o3 | ||
414 | bne,pt %icc, 1b | ||
415 | add %o2, 0x08, %o2 | ||
416 | retl | ||
417 | stx %g1, [%o4 + 0x00] | ||
418 | ENDPROC(des3_ede_sparc64_cbc_decrypt) | ||
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c new file mode 100644 index 000000000000..c4940c2d3073 --- /dev/null +++ b/arch/sparc/crypto/des_glue.c | |||
@@ -0,0 +1,529 @@ | |||
1 | /* Glue code for DES encryption optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <crypto/algapi.h> | ||
14 | #include <crypto/des.h> | ||
15 | |||
16 | #include <asm/fpumacro.h> | ||
17 | #include <asm/pstate.h> | ||
18 | #include <asm/elf.h> | ||
19 | |||
20 | #include "opcodes.h" | ||
21 | |||
22 | struct des_sparc64_ctx { | ||
23 | u64 encrypt_expkey[DES_EXPKEY_WORDS / 2]; | ||
24 | u64 decrypt_expkey[DES_EXPKEY_WORDS / 2]; | ||
25 | }; | ||
26 | |||
27 | struct des3_ede_sparc64_ctx { | ||
28 | u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; | ||
29 | u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; | ||
30 | }; | ||
31 | |||
32 | static void encrypt_to_decrypt(u64 *d, const u64 *e) | ||
33 | { | ||
34 | const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1; | ||
35 | int i; | ||
36 | |||
37 | for (i = 0; i < DES_EXPKEY_WORDS / 2; i++) | ||
38 | *d++ = *s--; | ||
39 | } | ||
40 | |||
41 | extern void des_sparc64_key_expand(const u32 *input_key, u64 *key); | ||
42 | |||
43 | static int des_set_key(struct crypto_tfm *tfm, const u8 *key, | ||
44 | unsigned int keylen) | ||
45 | { | ||
46 | struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); | ||
47 | u32 *flags = &tfm->crt_flags; | ||
48 | u32 tmp[DES_EXPKEY_WORDS]; | ||
49 | int ret; | ||
50 | |||
51 | /* Even though we have special instructions for key expansion, | ||
52 | * we call des_ekey() so that we don't have to write our own | ||
53 | * weak key detection code. | ||
54 | */ | ||
55 | ret = des_ekey(tmp, key); | ||
56 | if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { | ||
57 | *flags |= CRYPTO_TFM_RES_WEAK_KEY; | ||
58 | return -EINVAL; | ||
59 | } | ||
60 | |||
61 | des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]); | ||
62 | encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]); | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | extern void des_sparc64_crypt(const u64 *key, const u64 *input, | ||
68 | u64 *output); | ||
69 | |||
70 | static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
71 | { | ||
72 | struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
73 | const u64 *K = ctx->encrypt_expkey; | ||
74 | |||
75 | des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
76 | } | ||
77 | |||
78 | static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
79 | { | ||
80 | struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
81 | const u64 *K = ctx->decrypt_expkey; | ||
82 | |||
83 | des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
84 | } | ||
85 | |||
86 | extern void des_sparc64_load_keys(const u64 *key); | ||
87 | |||
88 | extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output, | ||
89 | unsigned int len); | ||
90 | |||
91 | #define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1)) | ||
92 | |||
93 | static int __ecb_crypt(struct blkcipher_desc *desc, | ||
94 | struct scatterlist *dst, struct scatterlist *src, | ||
95 | unsigned int nbytes, bool encrypt) | ||
96 | { | ||
97 | struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
98 | struct blkcipher_walk walk; | ||
99 | int err; | ||
100 | |||
101 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
102 | err = blkcipher_walk_virt(desc, &walk); | ||
103 | |||
104 | if (encrypt) | ||
105 | des_sparc64_load_keys(&ctx->encrypt_expkey[0]); | ||
106 | else | ||
107 | des_sparc64_load_keys(&ctx->decrypt_expkey[0]); | ||
108 | while ((nbytes = walk.nbytes)) { | ||
109 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
110 | |||
111 | if (likely(block_len)) { | ||
112 | des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr, | ||
113 | (u64 *) walk.dst.virt.addr, | ||
114 | block_len); | ||
115 | } | ||
116 | nbytes &= DES_BLOCK_SIZE - 1; | ||
117 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
118 | } | ||
119 | fprs_write(0); | ||
120 | return err; | ||
121 | } | ||
122 | |||
123 | static int ecb_encrypt(struct blkcipher_desc *desc, | ||
124 | struct scatterlist *dst, struct scatterlist *src, | ||
125 | unsigned int nbytes) | ||
126 | { | ||
127 | return __ecb_crypt(desc, dst, src, nbytes, true); | ||
128 | } | ||
129 | |||
130 | static int ecb_decrypt(struct blkcipher_desc *desc, | ||
131 | struct scatterlist *dst, struct scatterlist *src, | ||
132 | unsigned int nbytes) | ||
133 | { | ||
134 | return __ecb_crypt(desc, dst, src, nbytes, false); | ||
135 | } | ||
136 | |||
137 | extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output, | ||
138 | unsigned int len, u64 *iv); | ||
139 | |||
140 | static int cbc_encrypt(struct blkcipher_desc *desc, | ||
141 | struct scatterlist *dst, struct scatterlist *src, | ||
142 | unsigned int nbytes) | ||
143 | { | ||
144 | struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
145 | struct blkcipher_walk walk; | ||
146 | int err; | ||
147 | |||
148 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
149 | err = blkcipher_walk_virt(desc, &walk); | ||
150 | |||
151 | des_sparc64_load_keys(&ctx->encrypt_expkey[0]); | ||
152 | while ((nbytes = walk.nbytes)) { | ||
153 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
154 | |||
155 | if (likely(block_len)) { | ||
156 | des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr, | ||
157 | (u64 *) walk.dst.virt.addr, | ||
158 | block_len, (u64 *) walk.iv); | ||
159 | } | ||
160 | nbytes &= DES_BLOCK_SIZE - 1; | ||
161 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
162 | } | ||
163 | fprs_write(0); | ||
164 | return err; | ||
165 | } | ||
166 | |||
167 | extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, | ||
168 | unsigned int len, u64 *iv); | ||
169 | |||
170 | static int cbc_decrypt(struct blkcipher_desc *desc, | ||
171 | struct scatterlist *dst, struct scatterlist *src, | ||
172 | unsigned int nbytes) | ||
173 | { | ||
174 | struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
175 | struct blkcipher_walk walk; | ||
176 | int err; | ||
177 | |||
178 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
179 | err = blkcipher_walk_virt(desc, &walk); | ||
180 | |||
181 | des_sparc64_load_keys(&ctx->decrypt_expkey[0]); | ||
182 | while ((nbytes = walk.nbytes)) { | ||
183 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
184 | |||
185 | if (likely(block_len)) { | ||
186 | des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr, | ||
187 | (u64 *) walk.dst.virt.addr, | ||
188 | block_len, (u64 *) walk.iv); | ||
189 | } | ||
190 | nbytes &= DES_BLOCK_SIZE - 1; | ||
191 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
192 | } | ||
193 | fprs_write(0); | ||
194 | return err; | ||
195 | } | ||
196 | |||
197 | static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, | ||
198 | unsigned int keylen) | ||
199 | { | ||
200 | struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); | ||
201 | const u32 *K = (const u32 *)key; | ||
202 | u32 *flags = &tfm->crt_flags; | ||
203 | u64 k1[DES_EXPKEY_WORDS / 2]; | ||
204 | u64 k2[DES_EXPKEY_WORDS / 2]; | ||
205 | u64 k3[DES_EXPKEY_WORDS / 2]; | ||
206 | |||
207 | if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || | ||
208 | !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && | ||
209 | (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { | ||
210 | *flags |= CRYPTO_TFM_RES_WEAK_KEY; | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | |||
214 | des_sparc64_key_expand((const u32 *)key, k1); | ||
215 | key += DES_KEY_SIZE; | ||
216 | des_sparc64_key_expand((const u32 *)key, k2); | ||
217 | key += DES_KEY_SIZE; | ||
218 | des_sparc64_key_expand((const u32 *)key, k3); | ||
219 | |||
220 | memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1)); | ||
221 | encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]); | ||
222 | memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], | ||
223 | &k3[0], sizeof(k3)); | ||
224 | |||
225 | encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]); | ||
226 | memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2], | ||
227 | &k2[0], sizeof(k2)); | ||
228 | encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], | ||
229 | &k1[0]); | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, | ||
235 | u64 *output); | ||
236 | |||
237 | static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
238 | { | ||
239 | struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
240 | const u64 *K = ctx->encrypt_expkey; | ||
241 | |||
242 | des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
243 | } | ||
244 | |||
245 | static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
246 | { | ||
247 | struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
248 | const u64 *K = ctx->decrypt_expkey; | ||
249 | |||
250 | des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
251 | } | ||
252 | |||
253 | extern void des3_ede_sparc64_load_keys(const u64 *key); | ||
254 | |||
255 | extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input, | ||
256 | u64 *output, unsigned int len); | ||
257 | |||
258 | static int __ecb3_crypt(struct blkcipher_desc *desc, | ||
259 | struct scatterlist *dst, struct scatterlist *src, | ||
260 | unsigned int nbytes, bool encrypt) | ||
261 | { | ||
262 | struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
263 | struct blkcipher_walk walk; | ||
264 | const u64 *K; | ||
265 | int err; | ||
266 | |||
267 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
268 | err = blkcipher_walk_virt(desc, &walk); | ||
269 | |||
270 | if (encrypt) | ||
271 | K = &ctx->encrypt_expkey[0]; | ||
272 | else | ||
273 | K = &ctx->decrypt_expkey[0]; | ||
274 | des3_ede_sparc64_load_keys(K); | ||
275 | while ((nbytes = walk.nbytes)) { | ||
276 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
277 | |||
278 | if (likely(block_len)) { | ||
279 | const u64 *src64 = (const u64 *)walk.src.virt.addr; | ||
280 | des3_ede_sparc64_ecb_crypt(K, src64, | ||
281 | (u64 *) walk.dst.virt.addr, | ||
282 | block_len); | ||
283 | } | ||
284 | nbytes &= DES_BLOCK_SIZE - 1; | ||
285 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
286 | } | ||
287 | fprs_write(0); | ||
288 | return err; | ||
289 | } | ||
290 | |||
291 | static int ecb3_encrypt(struct blkcipher_desc *desc, | ||
292 | struct scatterlist *dst, struct scatterlist *src, | ||
293 | unsigned int nbytes) | ||
294 | { | ||
295 | return __ecb3_crypt(desc, dst, src, nbytes, true); | ||
296 | } | ||
297 | |||
298 | static int ecb3_decrypt(struct blkcipher_desc *desc, | ||
299 | struct scatterlist *dst, struct scatterlist *src, | ||
300 | unsigned int nbytes) | ||
301 | { | ||
302 | return __ecb3_crypt(desc, dst, src, nbytes, false); | ||
303 | } | ||
304 | |||
305 | extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input, | ||
306 | u64 *output, unsigned int len, | ||
307 | u64 *iv); | ||
308 | |||
309 | static int cbc3_encrypt(struct blkcipher_desc *desc, | ||
310 | struct scatterlist *dst, struct scatterlist *src, | ||
311 | unsigned int nbytes) | ||
312 | { | ||
313 | struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
314 | struct blkcipher_walk walk; | ||
315 | const u64 *K; | ||
316 | int err; | ||
317 | |||
318 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
319 | err = blkcipher_walk_virt(desc, &walk); | ||
320 | |||
321 | K = &ctx->encrypt_expkey[0]; | ||
322 | des3_ede_sparc64_load_keys(K); | ||
323 | while ((nbytes = walk.nbytes)) { | ||
324 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
325 | |||
326 | if (likely(block_len)) { | ||
327 | const u64 *src64 = (const u64 *)walk.src.virt.addr; | ||
328 | des3_ede_sparc64_cbc_encrypt(K, src64, | ||
329 | (u64 *) walk.dst.virt.addr, | ||
330 | block_len, | ||
331 | (u64 *) walk.iv); | ||
332 | } | ||
333 | nbytes &= DES_BLOCK_SIZE - 1; | ||
334 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
335 | } | ||
336 | fprs_write(0); | ||
337 | return err; | ||
338 | } | ||
339 | |||
340 | extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input, | ||
341 | u64 *output, unsigned int len, | ||
342 | u64 *iv); | ||
343 | |||
344 | static int cbc3_decrypt(struct blkcipher_desc *desc, | ||
345 | struct scatterlist *dst, struct scatterlist *src, | ||
346 | unsigned int nbytes) | ||
347 | { | ||
348 | struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
349 | struct blkcipher_walk walk; | ||
350 | const u64 *K; | ||
351 | int err; | ||
352 | |||
353 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
354 | err = blkcipher_walk_virt(desc, &walk); | ||
355 | |||
356 | K = &ctx->decrypt_expkey[0]; | ||
357 | des3_ede_sparc64_load_keys(K); | ||
358 | while ((nbytes = walk.nbytes)) { | ||
359 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
360 | |||
361 | if (likely(block_len)) { | ||
362 | const u64 *src64 = (const u64 *)walk.src.virt.addr; | ||
363 | des3_ede_sparc64_cbc_decrypt(K, src64, | ||
364 | (u64 *) walk.dst.virt.addr, | ||
365 | block_len, | ||
366 | (u64 *) walk.iv); | ||
367 | } | ||
368 | nbytes &= DES_BLOCK_SIZE - 1; | ||
369 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
370 | } | ||
371 | fprs_write(0); | ||
372 | return err; | ||
373 | } | ||
374 | |||
375 | static struct crypto_alg algs[] = { { | ||
376 | .cra_name = "des", | ||
377 | .cra_driver_name = "des-sparc64", | ||
378 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
379 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
380 | .cra_blocksize = DES_BLOCK_SIZE, | ||
381 | .cra_ctxsize = sizeof(struct des_sparc64_ctx), | ||
382 | .cra_alignmask = 7, | ||
383 | .cra_module = THIS_MODULE, | ||
384 | .cra_u = { | ||
385 | .cipher = { | ||
386 | .cia_min_keysize = DES_KEY_SIZE, | ||
387 | .cia_max_keysize = DES_KEY_SIZE, | ||
388 | .cia_setkey = des_set_key, | ||
389 | .cia_encrypt = des_encrypt, | ||
390 | .cia_decrypt = des_decrypt | ||
391 | } | ||
392 | } | ||
393 | }, { | ||
394 | .cra_name = "ecb(des)", | ||
395 | .cra_driver_name = "ecb-des-sparc64", | ||
396 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
397 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
398 | .cra_blocksize = DES_BLOCK_SIZE, | ||
399 | .cra_ctxsize = sizeof(struct des_sparc64_ctx), | ||
400 | .cra_alignmask = 7, | ||
401 | .cra_type = &crypto_blkcipher_type, | ||
402 | .cra_module = THIS_MODULE, | ||
403 | .cra_u = { | ||
404 | .blkcipher = { | ||
405 | .min_keysize = DES_KEY_SIZE, | ||
406 | .max_keysize = DES_KEY_SIZE, | ||
407 | .setkey = des_set_key, | ||
408 | .encrypt = ecb_encrypt, | ||
409 | .decrypt = ecb_decrypt, | ||
410 | }, | ||
411 | }, | ||
412 | }, { | ||
413 | .cra_name = "cbc(des)", | ||
414 | .cra_driver_name = "cbc-des-sparc64", | ||
415 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
416 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
417 | .cra_blocksize = DES_BLOCK_SIZE, | ||
418 | .cra_ctxsize = sizeof(struct des_sparc64_ctx), | ||
419 | .cra_alignmask = 7, | ||
420 | .cra_type = &crypto_blkcipher_type, | ||
421 | .cra_module = THIS_MODULE, | ||
422 | .cra_u = { | ||
423 | .blkcipher = { | ||
424 | .min_keysize = DES_KEY_SIZE, | ||
425 | .max_keysize = DES_KEY_SIZE, | ||
426 | .setkey = des_set_key, | ||
427 | .encrypt = cbc_encrypt, | ||
428 | .decrypt = cbc_decrypt, | ||
429 | }, | ||
430 | }, | ||
431 | }, { | ||
432 | .cra_name = "des3_ede", | ||
433 | .cra_driver_name = "des3_ede-sparc64", | ||
434 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
435 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
436 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
437 | .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), | ||
438 | .cra_alignmask = 7, | ||
439 | .cra_module = THIS_MODULE, | ||
440 | .cra_u = { | ||
441 | .cipher = { | ||
442 | .cia_min_keysize = DES3_EDE_KEY_SIZE, | ||
443 | .cia_max_keysize = DES3_EDE_KEY_SIZE, | ||
444 | .cia_setkey = des3_ede_set_key, | ||
445 | .cia_encrypt = des3_ede_encrypt, | ||
446 | .cia_decrypt = des3_ede_decrypt | ||
447 | } | ||
448 | } | ||
449 | }, { | ||
450 | .cra_name = "ecb(des3_ede)", | ||
451 | .cra_driver_name = "ecb-des3_ede-sparc64", | ||
452 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
453 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
454 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
455 | .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), | ||
456 | .cra_alignmask = 7, | ||
457 | .cra_type = &crypto_blkcipher_type, | ||
458 | .cra_module = THIS_MODULE, | ||
459 | .cra_u = { | ||
460 | .blkcipher = { | ||
461 | .min_keysize = DES3_EDE_KEY_SIZE, | ||
462 | .max_keysize = DES3_EDE_KEY_SIZE, | ||
463 | .setkey = des3_ede_set_key, | ||
464 | .encrypt = ecb3_encrypt, | ||
465 | .decrypt = ecb3_decrypt, | ||
466 | }, | ||
467 | }, | ||
468 | }, { | ||
469 | .cra_name = "cbc(des3_ede)", | ||
470 | .cra_driver_name = "cbc-des3_ede-sparc64", | ||
471 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
472 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
473 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
474 | .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), | ||
475 | .cra_alignmask = 7, | ||
476 | .cra_type = &crypto_blkcipher_type, | ||
477 | .cra_module = THIS_MODULE, | ||
478 | .cra_u = { | ||
479 | .blkcipher = { | ||
480 | .min_keysize = DES3_EDE_KEY_SIZE, | ||
481 | .max_keysize = DES3_EDE_KEY_SIZE, | ||
482 | .setkey = des3_ede_set_key, | ||
483 | .encrypt = cbc3_encrypt, | ||
484 | .decrypt = cbc3_decrypt, | ||
485 | }, | ||
486 | }, | ||
487 | } }; | ||
488 | |||
489 | static bool __init sparc64_has_des_opcode(void) | ||
490 | { | ||
491 | unsigned long cfr; | ||
492 | |||
493 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
494 | return false; | ||
495 | |||
496 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
497 | if (!(cfr & CFR_DES)) | ||
498 | return false; | ||
499 | |||
500 | return true; | ||
501 | } | ||
502 | |||
503 | static int __init des_sparc64_mod_init(void) | ||
504 | { | ||
505 | int i; | ||
506 | |||
507 | for (i = 0; i < ARRAY_SIZE(algs); i++) | ||
508 | INIT_LIST_HEAD(&algs[i].cra_list); | ||
509 | |||
510 | if (sparc64_has_des_opcode()) { | ||
511 | pr_info("Using sparc64 des opcodes optimized DES implementation\n"); | ||
512 | return crypto_register_algs(algs, ARRAY_SIZE(algs)); | ||
513 | } | ||
514 | pr_info("sparc64 des opcodes not available.\n"); | ||
515 | return -ENODEV; | ||
516 | } | ||
517 | |||
518 | static void __exit des_sparc64_mod_fini(void) | ||
519 | { | ||
520 | crypto_unregister_algs(algs, ARRAY_SIZE(algs)); | ||
521 | } | ||
522 | |||
523 | module_init(des_sparc64_mod_init); | ||
524 | module_exit(des_sparc64_mod_fini); | ||
525 | |||
526 | MODULE_LICENSE("GPL"); | ||
527 | MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); | ||
528 | |||
529 | MODULE_ALIAS("des"); | ||
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S new file mode 100644 index 000000000000..3150404e602e --- /dev/null +++ b/arch/sparc/crypto/md5_asm.S | |||
@@ -0,0 +1,70 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(md5_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntryHalf | ||
9 | ld [%o0 + 0x00], %f0 | ||
10 | ld [%o0 + 0x04], %f1 | ||
11 | andcc %o1, 0x7, %g0 | ||
12 | ld [%o0 + 0x08], %f2 | ||
13 | bne,pn %xcc, 10f | ||
14 | ld [%o0 + 0x0c], %f3 | ||
15 | |||
16 | 1: | ||
17 | ldd [%o1 + 0x00], %f8 | ||
18 | ldd [%o1 + 0x08], %f10 | ||
19 | ldd [%o1 + 0x10], %f12 | ||
20 | ldd [%o1 + 0x18], %f14 | ||
21 | ldd [%o1 + 0x20], %f16 | ||
22 | ldd [%o1 + 0x28], %f18 | ||
23 | ldd [%o1 + 0x30], %f20 | ||
24 | ldd [%o1 + 0x38], %f22 | ||
25 | |||
26 | MD5 | ||
27 | |||
28 | subcc %o2, 1, %o2 | ||
29 | bne,pt %xcc, 1b | ||
30 | add %o1, 0x40, %o1 | ||
31 | |||
32 | 5: | ||
33 | st %f0, [%o0 + 0x00] | ||
34 | st %f1, [%o0 + 0x04] | ||
35 | st %f2, [%o0 + 0x08] | ||
36 | st %f3, [%o0 + 0x0c] | ||
37 | retl | ||
38 | VISExitHalf | ||
39 | 10: | ||
40 | alignaddr %o1, %g0, %o1 | ||
41 | |||
42 | ldd [%o1 + 0x00], %f10 | ||
43 | 1: | ||
44 | ldd [%o1 + 0x08], %f12 | ||
45 | ldd [%o1 + 0x10], %f14 | ||
46 | ldd [%o1 + 0x18], %f16 | ||
47 | ldd [%o1 + 0x20], %f18 | ||
48 | ldd [%o1 + 0x28], %f20 | ||
49 | ldd [%o1 + 0x30], %f22 | ||
50 | ldd [%o1 + 0x38], %f24 | ||
51 | ldd [%o1 + 0x40], %f26 | ||
52 | |||
53 | faligndata %f10, %f12, %f8 | ||
54 | faligndata %f12, %f14, %f10 | ||
55 | faligndata %f14, %f16, %f12 | ||
56 | faligndata %f16, %f18, %f14 | ||
57 | faligndata %f18, %f20, %f16 | ||
58 | faligndata %f20, %f22, %f18 | ||
59 | faligndata %f22, %f24, %f20 | ||
60 | faligndata %f24, %f26, %f22 | ||
61 | |||
62 | MD5 | ||
63 | |||
64 | subcc %o2, 1, %o2 | ||
65 | fsrc2 %f26, %f10 | ||
66 | bne,pt %xcc, 1b | ||
67 | add %o1, 0x40, %o1 | ||
68 | |||
69 | ba,a,pt %xcc, 5b | ||
70 | ENDPROC(md5_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c new file mode 100644 index 000000000000..603d723038ce --- /dev/null +++ b/arch/sparc/crypto/md5_glue.c | |||
@@ -0,0 +1,188 @@ | |||
1 | /* Glue code for MD5 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c | ||
4 | * and crypto/md5.c which are: | ||
5 | * | ||
6 | * Copyright (c) Alan Smithee. | ||
7 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
8 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
9 | * Copyright (c) Mathias Krause <minipli@googlemail.com> | ||
10 | * Copyright (c) Cryptoapi developers. | ||
11 | * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> | ||
12 | */ | ||
13 | |||
14 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
15 | |||
16 | #include <crypto/internal/hash.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/cryptohash.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <crypto/md5.h> | ||
23 | |||
24 | #include <asm/pstate.h> | ||
25 | #include <asm/elf.h> | ||
26 | |||
27 | #include "opcodes.h" | ||
28 | |||
29 | asmlinkage void md5_sparc64_transform(u32 *digest, const char *data, | ||
30 | unsigned int rounds); | ||
31 | |||
32 | static int md5_sparc64_init(struct shash_desc *desc) | ||
33 | { | ||
34 | struct md5_state *mctx = shash_desc_ctx(desc); | ||
35 | |||
36 | mctx->hash[0] = cpu_to_le32(0x67452301); | ||
37 | mctx->hash[1] = cpu_to_le32(0xefcdab89); | ||
38 | mctx->hash[2] = cpu_to_le32(0x98badcfe); | ||
39 | mctx->hash[3] = cpu_to_le32(0x10325476); | ||
40 | mctx->byte_count = 0; | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data, | ||
46 | unsigned int len, unsigned int partial) | ||
47 | { | ||
48 | unsigned int done = 0; | ||
49 | |||
50 | sctx->byte_count += len; | ||
51 | if (partial) { | ||
52 | done = MD5_HMAC_BLOCK_SIZE - partial; | ||
53 | memcpy((u8 *)sctx->block + partial, data, done); | ||
54 | md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1); | ||
55 | } | ||
56 | if (len - done >= MD5_HMAC_BLOCK_SIZE) { | ||
57 | const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE; | ||
58 | |||
59 | md5_sparc64_transform(sctx->hash, data + done, rounds); | ||
60 | done += rounds * MD5_HMAC_BLOCK_SIZE; | ||
61 | } | ||
62 | |||
63 | memcpy(sctx->block, data + done, len - done); | ||
64 | } | ||
65 | |||
66 | static int md5_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
67 | unsigned int len) | ||
68 | { | ||
69 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
70 | unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; | ||
71 | |||
72 | /* Handle the fast case right here */ | ||
73 | if (partial + len < MD5_HMAC_BLOCK_SIZE) { | ||
74 | sctx->byte_count += len; | ||
75 | memcpy((u8 *)sctx->block + partial, data, len); | ||
76 | } else | ||
77 | __md5_sparc64_update(sctx, data, len, partial); | ||
78 | |||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /* Add padding and return the message digest. */ | ||
83 | static int md5_sparc64_final(struct shash_desc *desc, u8 *out) | ||
84 | { | ||
85 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
86 | unsigned int i, index, padlen; | ||
87 | u32 *dst = (u32 *)out; | ||
88 | __le64 bits; | ||
89 | static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, }; | ||
90 | |||
91 | bits = cpu_to_le64(sctx->byte_count << 3); | ||
92 | |||
93 | /* Pad out to 56 mod 64 and append length */ | ||
94 | index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; | ||
95 | padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index); | ||
96 | |||
97 | /* We need to fill a whole block for __md5_sparc64_update() */ | ||
98 | if (padlen <= 56) { | ||
99 | sctx->byte_count += padlen; | ||
100 | memcpy((u8 *)sctx->block + index, padding, padlen); | ||
101 | } else { | ||
102 | __md5_sparc64_update(sctx, padding, padlen, index); | ||
103 | } | ||
104 | __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
105 | |||
106 | /* Store state in digest */ | ||
107 | for (i = 0; i < MD5_HASH_WORDS; i++) | ||
108 | dst[i] = sctx->hash[i]; | ||
109 | |||
110 | /* Wipe context */ | ||
111 | memset(sctx, 0, sizeof(*sctx)); | ||
112 | |||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static int md5_sparc64_export(struct shash_desc *desc, void *out) | ||
117 | { | ||
118 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
119 | |||
120 | memcpy(out, sctx, sizeof(*sctx)); | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int md5_sparc64_import(struct shash_desc *desc, const void *in) | ||
126 | { | ||
127 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
128 | |||
129 | memcpy(sctx, in, sizeof(*sctx)); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static struct shash_alg alg = { | ||
135 | .digestsize = MD5_DIGEST_SIZE, | ||
136 | .init = md5_sparc64_init, | ||
137 | .update = md5_sparc64_update, | ||
138 | .final = md5_sparc64_final, | ||
139 | .export = md5_sparc64_export, | ||
140 | .import = md5_sparc64_import, | ||
141 | .descsize = sizeof(struct md5_state), | ||
142 | .statesize = sizeof(struct md5_state), | ||
143 | .base = { | ||
144 | .cra_name = "md5", | ||
145 | .cra_driver_name= "md5-sparc64", | ||
146 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
147 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
148 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, | ||
149 | .cra_module = THIS_MODULE, | ||
150 | } | ||
151 | }; | ||
152 | |||
153 | static bool __init sparc64_has_md5_opcode(void) | ||
154 | { | ||
155 | unsigned long cfr; | ||
156 | |||
157 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
158 | return false; | ||
159 | |||
160 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
161 | if (!(cfr & CFR_MD5)) | ||
162 | return false; | ||
163 | |||
164 | return true; | ||
165 | } | ||
166 | |||
167 | static int __init md5_sparc64_mod_init(void) | ||
168 | { | ||
169 | if (sparc64_has_md5_opcode()) { | ||
170 | pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); | ||
171 | return crypto_register_shash(&alg); | ||
172 | } | ||
173 | pr_info("sparc64 md5 opcode not available.\n"); | ||
174 | return -ENODEV; | ||
175 | } | ||
176 | |||
177 | static void __exit md5_sparc64_mod_fini(void) | ||
178 | { | ||
179 | crypto_unregister_shash(&alg); | ||
180 | } | ||
181 | |||
182 | module_init(md5_sparc64_mod_init); | ||
183 | module_exit(md5_sparc64_mod_fini); | ||
184 | |||
185 | MODULE_LICENSE("GPL"); | ||
186 | MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); | ||
187 | |||
188 | MODULE_ALIAS("md5"); | ||
diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h new file mode 100644 index 000000000000..19cbaea6976f --- /dev/null +++ b/arch/sparc/crypto/opcodes.h | |||
@@ -0,0 +1,99 @@ | |||
1 | #ifndef _OPCODES_H | ||
2 | #define _OPCODES_H | ||
3 | |||
4 | #define SPARC_CR_OPCODE_PRIORITY 300 | ||
5 | |||
6 | #define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) | ||
7 | |||
8 | #define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) | ||
9 | |||
10 | #define RS1(x) (FPD_ENCODE(x) << 14) | ||
11 | #define RS2(x) (FPD_ENCODE(x) << 0) | ||
12 | #define RS3(x) (FPD_ENCODE(x) << 9) | ||
13 | #define RD(x) (FPD_ENCODE(x) << 25) | ||
14 | #define IMM5_0(x) ((x) << 0) | ||
15 | #define IMM5_9(x) ((x) << 9) | ||
16 | |||
17 | #define CRC32C(a,b,c) \ | ||
18 | .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); | ||
19 | |||
20 | #define MD5 \ | ||
21 | .word 0x81b02800; | ||
22 | #define SHA1 \ | ||
23 | .word 0x81b02820; | ||
24 | #define SHA256 \ | ||
25 | .word 0x81b02840; | ||
26 | #define SHA512 \ | ||
27 | .word 0x81b02860; | ||
28 | |||
29 | #define AES_EROUND01(a,b,c,d) \ | ||
30 | .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
31 | #define AES_EROUND23(a,b,c,d) \ | ||
32 | .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
33 | #define AES_DROUND01(a,b,c,d) \ | ||
34 | .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
35 | #define AES_DROUND23(a,b,c,d) \ | ||
36 | .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
37 | #define AES_EROUND01_L(a,b,c,d) \ | ||
38 | .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
39 | #define AES_EROUND23_L(a,b,c,d) \ | ||
40 | .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
41 | #define AES_DROUND01_L(a,b,c,d) \ | ||
42 | .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
43 | #define AES_DROUND23_L(a,b,c,d) \ | ||
44 | .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
45 | #define AES_KEXPAND1(a,b,c,d) \ | ||
46 | .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d)); | ||
47 | #define AES_KEXPAND0(a,b,c) \ | ||
48 | .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); | ||
49 | #define AES_KEXPAND2(a,b,c) \ | ||
50 | .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); | ||
51 | |||
52 | #define DES_IP(a,b) \ | ||
53 | .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); | ||
54 | #define DES_IIP(a,b) \ | ||
55 | .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); | ||
56 | #define DES_KEXPAND(a,b,c) \ | ||
57 | .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c)); | ||
58 | #define DES_ROUND(a,b,c,d) \ | ||
59 | .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
60 | |||
61 | #define CAMELLIA_F(a,b,c,d) \ | ||
62 | .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
63 | #define CAMELLIA_FL(a,b,c) \ | ||
64 | .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); | ||
65 | #define CAMELLIA_FLI(a,b,c) \ | ||
66 | .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); | ||
67 | |||
68 | #define MOVDTOX_F0_O4 \ | ||
69 | .word 0x99b02200 | ||
70 | #define MOVDTOX_F2_O5 \ | ||
71 | .word 0x9bb02202 | ||
72 | #define MOVXTOD_G1_F60 \ | ||
73 | .word 0xbbb02301 | ||
74 | #define MOVXTOD_G1_F62 \ | ||
75 | .word 0xbfb02301 | ||
76 | #define MOVXTOD_G3_F4 \ | ||
77 | .word 0x89b02303; | ||
78 | #define MOVXTOD_G7_F6 \ | ||
79 | .word 0x8db02307; | ||
80 | #define MOVXTOD_G3_F0 \ | ||
81 | .word 0x81b02303; | ||
82 | #define MOVXTOD_G7_F2 \ | ||
83 | .word 0x85b02307; | ||
84 | #define MOVXTOD_O0_F0 \ | ||
85 | .word 0x81b02308; | ||
86 | #define MOVXTOD_O5_F0 \ | ||
87 | .word 0x81b0230d; | ||
88 | #define MOVXTOD_O5_F2 \ | ||
89 | .word 0x85b0230d; | ||
90 | #define MOVXTOD_O5_F4 \ | ||
91 | .word 0x89b0230d; | ||
92 | #define MOVXTOD_O5_F6 \ | ||
93 | .word 0x8db0230d; | ||
94 | #define MOVXTOD_G3_F60 \ | ||
95 | .word 0xbbb02303; | ||
96 | #define MOVXTOD_G7_F62 \ | ||
97 | .word 0xbfb02307; | ||
98 | |||
99 | #endif /* _OPCODES_H */ | ||
diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S new file mode 100644 index 000000000000..219d10c5ae0e --- /dev/null +++ b/arch/sparc/crypto/sha1_asm.S | |||
@@ -0,0 +1,72 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(sha1_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntryHalf | ||
9 | ld [%o0 + 0x00], %f0 | ||
10 | ld [%o0 + 0x04], %f1 | ||
11 | ld [%o0 + 0x08], %f2 | ||
12 | andcc %o1, 0x7, %g0 | ||
13 | ld [%o0 + 0x0c], %f3 | ||
14 | bne,pn %xcc, 10f | ||
15 | ld [%o0 + 0x10], %f4 | ||
16 | |||
17 | 1: | ||
18 | ldd [%o1 + 0x00], %f8 | ||
19 | ldd [%o1 + 0x08], %f10 | ||
20 | ldd [%o1 + 0x10], %f12 | ||
21 | ldd [%o1 + 0x18], %f14 | ||
22 | ldd [%o1 + 0x20], %f16 | ||
23 | ldd [%o1 + 0x28], %f18 | ||
24 | ldd [%o1 + 0x30], %f20 | ||
25 | ldd [%o1 + 0x38], %f22 | ||
26 | |||
27 | SHA1 | ||
28 | |||
29 | subcc %o2, 1, %o2 | ||
30 | bne,pt %xcc, 1b | ||
31 | add %o1, 0x40, %o1 | ||
32 | |||
33 | 5: | ||
34 | st %f0, [%o0 + 0x00] | ||
35 | st %f1, [%o0 + 0x04] | ||
36 | st %f2, [%o0 + 0x08] | ||
37 | st %f3, [%o0 + 0x0c] | ||
38 | st %f4, [%o0 + 0x10] | ||
39 | retl | ||
40 | VISExitHalf | ||
41 | 10: | ||
42 | alignaddr %o1, %g0, %o1 | ||
43 | |||
44 | ldd [%o1 + 0x00], %f10 | ||
45 | 1: | ||
46 | ldd [%o1 + 0x08], %f12 | ||
47 | ldd [%o1 + 0x10], %f14 | ||
48 | ldd [%o1 + 0x18], %f16 | ||
49 | ldd [%o1 + 0x20], %f18 | ||
50 | ldd [%o1 + 0x28], %f20 | ||
51 | ldd [%o1 + 0x30], %f22 | ||
52 | ldd [%o1 + 0x38], %f24 | ||
53 | ldd [%o1 + 0x40], %f26 | ||
54 | |||
55 | faligndata %f10, %f12, %f8 | ||
56 | faligndata %f12, %f14, %f10 | ||
57 | faligndata %f14, %f16, %f12 | ||
58 | faligndata %f16, %f18, %f14 | ||
59 | faligndata %f18, %f20, %f16 | ||
60 | faligndata %f20, %f22, %f18 | ||
61 | faligndata %f22, %f24, %f20 | ||
62 | faligndata %f24, %f26, %f22 | ||
63 | |||
64 | SHA1 | ||
65 | |||
66 | subcc %o2, 1, %o2 | ||
67 | fsrc2 %f26, %f10 | ||
68 | bne,pt %xcc, 1b | ||
69 | add %o1, 0x40, %o1 | ||
70 | |||
71 | ba,a,pt %xcc, 5b | ||
72 | ENDPROC(sha1_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c new file mode 100644 index 000000000000..2bbb20bee9f1 --- /dev/null +++ b/arch/sparc/crypto/sha1_glue.c | |||
@@ -0,0 +1,183 @@ | |||
1 | /* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c | ||
4 | * | ||
5 | * Copyright (c) Alan Smithee. | ||
6 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
7 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
8 | * Copyright (c) Mathias Krause <minipli@googlemail.com> | ||
9 | */ | ||
10 | |||
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
12 | |||
13 | #include <crypto/internal/hash.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/cryptohash.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <crypto/sha.h> | ||
20 | |||
21 | #include <asm/pstate.h> | ||
22 | #include <asm/elf.h> | ||
23 | |||
24 | #include "opcodes.h" | ||
25 | |||
26 | asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data, | ||
27 | unsigned int rounds); | ||
28 | |||
29 | static int sha1_sparc64_init(struct shash_desc *desc) | ||
30 | { | ||
31 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
32 | |||
33 | *sctx = (struct sha1_state){ | ||
34 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
35 | }; | ||
36 | |||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data, | ||
41 | unsigned int len, unsigned int partial) | ||
42 | { | ||
43 | unsigned int done = 0; | ||
44 | |||
45 | sctx->count += len; | ||
46 | if (partial) { | ||
47 | done = SHA1_BLOCK_SIZE - partial; | ||
48 | memcpy(sctx->buffer + partial, data, done); | ||
49 | sha1_sparc64_transform(sctx->state, sctx->buffer, 1); | ||
50 | } | ||
51 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
52 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
53 | |||
54 | sha1_sparc64_transform(sctx->state, data + done, rounds); | ||
55 | done += rounds * SHA1_BLOCK_SIZE; | ||
56 | } | ||
57 | |||
58 | memcpy(sctx->buffer, data + done, len - done); | ||
59 | } | ||
60 | |||
61 | static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
62 | unsigned int len) | ||
63 | { | ||
64 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
65 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
66 | |||
67 | /* Handle the fast case right here */ | ||
68 | if (partial + len < SHA1_BLOCK_SIZE) { | ||
69 | sctx->count += len; | ||
70 | memcpy(sctx->buffer + partial, data, len); | ||
71 | } else | ||
72 | __sha1_sparc64_update(sctx, data, len, partial); | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | /* Add padding and return the message digest. */ | ||
78 | static int sha1_sparc64_final(struct shash_desc *desc, u8 *out) | ||
79 | { | ||
80 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
81 | unsigned int i, index, padlen; | ||
82 | __be32 *dst = (__be32 *)out; | ||
83 | __be64 bits; | ||
84 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
85 | |||
86 | bits = cpu_to_be64(sctx->count << 3); | ||
87 | |||
88 | /* Pad out to 56 mod 64 and append length */ | ||
89 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
90 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
91 | |||
92 | /* We need to fill a whole block for __sha1_sparc64_update() */ | ||
93 | if (padlen <= 56) { | ||
94 | sctx->count += padlen; | ||
95 | memcpy(sctx->buffer + index, padding, padlen); | ||
96 | } else { | ||
97 | __sha1_sparc64_update(sctx, padding, padlen, index); | ||
98 | } | ||
99 | __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
100 | |||
101 | /* Store state in digest */ | ||
102 | for (i = 0; i < 5; i++) | ||
103 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
104 | |||
105 | /* Wipe context */ | ||
106 | memset(sctx, 0, sizeof(*sctx)); | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static int sha1_sparc64_export(struct shash_desc *desc, void *out) | ||
112 | { | ||
113 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
114 | |||
115 | memcpy(out, sctx, sizeof(*sctx)); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static int sha1_sparc64_import(struct shash_desc *desc, const void *in) | ||
121 | { | ||
122 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
123 | |||
124 | memcpy(sctx, in, sizeof(*sctx)); | ||
125 | |||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static struct shash_alg alg = { | ||
130 | .digestsize = SHA1_DIGEST_SIZE, | ||
131 | .init = sha1_sparc64_init, | ||
132 | .update = sha1_sparc64_update, | ||
133 | .final = sha1_sparc64_final, | ||
134 | .export = sha1_sparc64_export, | ||
135 | .import = sha1_sparc64_import, | ||
136 | .descsize = sizeof(struct sha1_state), | ||
137 | .statesize = sizeof(struct sha1_state), | ||
138 | .base = { | ||
139 | .cra_name = "sha1", | ||
140 | .cra_driver_name= "sha1-sparc64", | ||
141 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
142 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
143 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
144 | .cra_module = THIS_MODULE, | ||
145 | } | ||
146 | }; | ||
147 | |||
148 | static bool __init sparc64_has_sha1_opcode(void) | ||
149 | { | ||
150 | unsigned long cfr; | ||
151 | |||
152 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
153 | return false; | ||
154 | |||
155 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
156 | if (!(cfr & CFR_SHA1)) | ||
157 | return false; | ||
158 | |||
159 | return true; | ||
160 | } | ||
161 | |||
162 | static int __init sha1_sparc64_mod_init(void) | ||
163 | { | ||
164 | if (sparc64_has_sha1_opcode()) { | ||
165 | pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n"); | ||
166 | return crypto_register_shash(&alg); | ||
167 | } | ||
168 | pr_info("sparc64 sha1 opcode not available.\n"); | ||
169 | return -ENODEV; | ||
170 | } | ||
171 | |||
172 | static void __exit sha1_sparc64_mod_fini(void) | ||
173 | { | ||
174 | crypto_unregister_shash(&alg); | ||
175 | } | ||
176 | |||
177 | module_init(sha1_sparc64_mod_init); | ||
178 | module_exit(sha1_sparc64_mod_fini); | ||
179 | |||
180 | MODULE_LICENSE("GPL"); | ||
181 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); | ||
182 | |||
183 | MODULE_ALIAS("sha1"); | ||
diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S new file mode 100644 index 000000000000..b5f3d5826eb4 --- /dev/null +++ b/arch/sparc/crypto/sha256_asm.S | |||
@@ -0,0 +1,78 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(sha256_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntryHalf | ||
9 | ld [%o0 + 0x00], %f0 | ||
10 | ld [%o0 + 0x04], %f1 | ||
11 | ld [%o0 + 0x08], %f2 | ||
12 | ld [%o0 + 0x0c], %f3 | ||
13 | ld [%o0 + 0x10], %f4 | ||
14 | ld [%o0 + 0x14], %f5 | ||
15 | andcc %o1, 0x7, %g0 | ||
16 | ld [%o0 + 0x18], %f6 | ||
17 | bne,pn %xcc, 10f | ||
18 | ld [%o0 + 0x1c], %f7 | ||
19 | |||
20 | 1: | ||
21 | ldd [%o1 + 0x00], %f8 | ||
22 | ldd [%o1 + 0x08], %f10 | ||
23 | ldd [%o1 + 0x10], %f12 | ||
24 | ldd [%o1 + 0x18], %f14 | ||
25 | ldd [%o1 + 0x20], %f16 | ||
26 | ldd [%o1 + 0x28], %f18 | ||
27 | ldd [%o1 + 0x30], %f20 | ||
28 | ldd [%o1 + 0x38], %f22 | ||
29 | |||
30 | SHA256 | ||
31 | |||
32 | subcc %o2, 1, %o2 | ||
33 | bne,pt %xcc, 1b | ||
34 | add %o1, 0x40, %o1 | ||
35 | |||
36 | 5: | ||
37 | st %f0, [%o0 + 0x00] | ||
38 | st %f1, [%o0 + 0x04] | ||
39 | st %f2, [%o0 + 0x08] | ||
40 | st %f3, [%o0 + 0x0c] | ||
41 | st %f4, [%o0 + 0x10] | ||
42 | st %f5, [%o0 + 0x14] | ||
43 | st %f6, [%o0 + 0x18] | ||
44 | st %f7, [%o0 + 0x1c] | ||
45 | retl | ||
46 | VISExitHalf | ||
47 | 10: | ||
48 | alignaddr %o1, %g0, %o1 | ||
49 | |||
50 | ldd [%o1 + 0x00], %f10 | ||
51 | 1: | ||
52 | ldd [%o1 + 0x08], %f12 | ||
53 | ldd [%o1 + 0x10], %f14 | ||
54 | ldd [%o1 + 0x18], %f16 | ||
55 | ldd [%o1 + 0x20], %f18 | ||
56 | ldd [%o1 + 0x28], %f20 | ||
57 | ldd [%o1 + 0x30], %f22 | ||
58 | ldd [%o1 + 0x38], %f24 | ||
59 | ldd [%o1 + 0x40], %f26 | ||
60 | |||
61 | faligndata %f10, %f12, %f8 | ||
62 | faligndata %f12, %f14, %f10 | ||
63 | faligndata %f14, %f16, %f12 | ||
64 | faligndata %f16, %f18, %f14 | ||
65 | faligndata %f18, %f20, %f16 | ||
66 | faligndata %f20, %f22, %f18 | ||
67 | faligndata %f22, %f24, %f20 | ||
68 | faligndata %f24, %f26, %f22 | ||
69 | |||
70 | SHA256 | ||
71 | |||
72 | subcc %o2, 1, %o2 | ||
73 | fsrc2 %f26, %f10 | ||
74 | bne,pt %xcc, 1b | ||
75 | add %o1, 0x40, %o1 | ||
76 | |||
77 | ba,a,pt %xcc, 5b | ||
78 | ENDPROC(sha256_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c new file mode 100644 index 000000000000..591e656bd891 --- /dev/null +++ b/arch/sparc/crypto/sha256_glue.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon crypto/sha256_generic.c | ||
4 | * | ||
5 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
6 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
7 | * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> | ||
8 | * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> | ||
9 | */ | ||
10 | |||
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
12 | |||
13 | #include <crypto/internal/hash.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/cryptohash.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <crypto/sha.h> | ||
20 | |||
21 | #include <asm/pstate.h> | ||
22 | #include <asm/elf.h> | ||
23 | |||
24 | #include "opcodes.h" | ||
25 | |||
26 | asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data, | ||
27 | unsigned int rounds); | ||
28 | |||
29 | static int sha224_sparc64_init(struct shash_desc *desc) | ||
30 | { | ||
31 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
32 | sctx->state[0] = SHA224_H0; | ||
33 | sctx->state[1] = SHA224_H1; | ||
34 | sctx->state[2] = SHA224_H2; | ||
35 | sctx->state[3] = SHA224_H3; | ||
36 | sctx->state[4] = SHA224_H4; | ||
37 | sctx->state[5] = SHA224_H5; | ||
38 | sctx->state[6] = SHA224_H6; | ||
39 | sctx->state[7] = SHA224_H7; | ||
40 | sctx->count = 0; | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static int sha256_sparc64_init(struct shash_desc *desc) | ||
46 | { | ||
47 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
48 | sctx->state[0] = SHA256_H0; | ||
49 | sctx->state[1] = SHA256_H1; | ||
50 | sctx->state[2] = SHA256_H2; | ||
51 | sctx->state[3] = SHA256_H3; | ||
52 | sctx->state[4] = SHA256_H4; | ||
53 | sctx->state[5] = SHA256_H5; | ||
54 | sctx->state[6] = SHA256_H6; | ||
55 | sctx->state[7] = SHA256_H7; | ||
56 | sctx->count = 0; | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data, | ||
62 | unsigned int len, unsigned int partial) | ||
63 | { | ||
64 | unsigned int done = 0; | ||
65 | |||
66 | sctx->count += len; | ||
67 | if (partial) { | ||
68 | done = SHA256_BLOCK_SIZE - partial; | ||
69 | memcpy(sctx->buf + partial, data, done); | ||
70 | sha256_sparc64_transform(sctx->state, sctx->buf, 1); | ||
71 | } | ||
72 | if (len - done >= SHA256_BLOCK_SIZE) { | ||
73 | const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; | ||
74 | |||
75 | sha256_sparc64_transform(sctx->state, data + done, rounds); | ||
76 | done += rounds * SHA256_BLOCK_SIZE; | ||
77 | } | ||
78 | |||
79 | memcpy(sctx->buf, data + done, len - done); | ||
80 | } | ||
81 | |||
82 | static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
83 | unsigned int len) | ||
84 | { | ||
85 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
86 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
87 | |||
88 | /* Handle the fast case right here */ | ||
89 | if (partial + len < SHA256_BLOCK_SIZE) { | ||
90 | sctx->count += len; | ||
91 | memcpy(sctx->buf + partial, data, len); | ||
92 | } else | ||
93 | __sha256_sparc64_update(sctx, data, len, partial); | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int sha256_sparc64_final(struct shash_desc *desc, u8 *out) | ||
99 | { | ||
100 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
101 | unsigned int i, index, padlen; | ||
102 | __be32 *dst = (__be32 *)out; | ||
103 | __be64 bits; | ||
104 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | ||
105 | |||
106 | bits = cpu_to_be64(sctx->count << 3); | ||
107 | |||
108 | /* Pad out to 56 mod 64 and append length */ | ||
109 | index = sctx->count % SHA256_BLOCK_SIZE; | ||
110 | padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index); | ||
111 | |||
112 | /* We need to fill a whole block for __sha256_sparc64_update() */ | ||
113 | if (padlen <= 56) { | ||
114 | sctx->count += padlen; | ||
115 | memcpy(sctx->buf + index, padding, padlen); | ||
116 | } else { | ||
117 | __sha256_sparc64_update(sctx, padding, padlen, index); | ||
118 | } | ||
119 | __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
120 | |||
121 | /* Store state in digest */ | ||
122 | for (i = 0; i < 8; i++) | ||
123 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
124 | |||
125 | /* Wipe context */ | ||
126 | memset(sctx, 0, sizeof(*sctx)); | ||
127 | |||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash) | ||
132 | { | ||
133 | u8 D[SHA256_DIGEST_SIZE]; | ||
134 | |||
135 | sha256_sparc64_final(desc, D); | ||
136 | |||
137 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
138 | memset(D, 0, SHA256_DIGEST_SIZE); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int sha256_sparc64_export(struct shash_desc *desc, void *out) | ||
144 | { | ||
145 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
146 | |||
147 | memcpy(out, sctx, sizeof(*sctx)); | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | static int sha256_sparc64_import(struct shash_desc *desc, const void *in) | ||
152 | { | ||
153 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
154 | |||
155 | memcpy(sctx, in, sizeof(*sctx)); | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static struct shash_alg sha256 = { | ||
160 | .digestsize = SHA256_DIGEST_SIZE, | ||
161 | .init = sha256_sparc64_init, | ||
162 | .update = sha256_sparc64_update, | ||
163 | .final = sha256_sparc64_final, | ||
164 | .export = sha256_sparc64_export, | ||
165 | .import = sha256_sparc64_import, | ||
166 | .descsize = sizeof(struct sha256_state), | ||
167 | .statesize = sizeof(struct sha256_state), | ||
168 | .base = { | ||
169 | .cra_name = "sha256", | ||
170 | .cra_driver_name= "sha256-sparc64", | ||
171 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
172 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
173 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
174 | .cra_module = THIS_MODULE, | ||
175 | } | ||
176 | }; | ||
177 | |||
178 | static struct shash_alg sha224 = { | ||
179 | .digestsize = SHA224_DIGEST_SIZE, | ||
180 | .init = sha224_sparc64_init, | ||
181 | .update = sha256_sparc64_update, | ||
182 | .final = sha224_sparc64_final, | ||
183 | .descsize = sizeof(struct sha256_state), | ||
184 | .base = { | ||
185 | .cra_name = "sha224", | ||
186 | .cra_driver_name= "sha224-sparc64", | ||
187 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
188 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
189 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
190 | .cra_module = THIS_MODULE, | ||
191 | } | ||
192 | }; | ||
193 | |||
194 | static bool __init sparc64_has_sha256_opcode(void) | ||
195 | { | ||
196 | unsigned long cfr; | ||
197 | |||
198 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
199 | return false; | ||
200 | |||
201 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
202 | if (!(cfr & CFR_SHA256)) | ||
203 | return false; | ||
204 | |||
205 | return true; | ||
206 | } | ||
207 | |||
208 | static int __init sha256_sparc64_mod_init(void) | ||
209 | { | ||
210 | if (sparc64_has_sha256_opcode()) { | ||
211 | int ret = crypto_register_shash(&sha224); | ||
212 | if (ret < 0) | ||
213 | return ret; | ||
214 | |||
215 | ret = crypto_register_shash(&sha256); | ||
216 | if (ret < 0) { | ||
217 | crypto_unregister_shash(&sha224); | ||
218 | return ret; | ||
219 | } | ||
220 | |||
221 | pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n"); | ||
222 | return 0; | ||
223 | } | ||
224 | pr_info("sparc64 sha256 opcode not available.\n"); | ||
225 | return -ENODEV; | ||
226 | } | ||
227 | |||
228 | static void __exit sha256_sparc64_mod_fini(void) | ||
229 | { | ||
230 | crypto_unregister_shash(&sha224); | ||
231 | crypto_unregister_shash(&sha256); | ||
232 | } | ||
233 | |||
234 | module_init(sha256_sparc64_mod_init); | ||
235 | module_exit(sha256_sparc64_mod_fini); | ||
236 | |||
237 | MODULE_LICENSE("GPL"); | ||
238 | MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); | ||
239 | |||
240 | MODULE_ALIAS("sha224"); | ||
241 | MODULE_ALIAS("sha256"); | ||
diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S new file mode 100644 index 000000000000..54bfba713c0e --- /dev/null +++ b/arch/sparc/crypto/sha512_asm.S | |||
@@ -0,0 +1,102 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(sha512_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntry | ||
9 | ldd [%o0 + 0x00], %f0 | ||
10 | ldd [%o0 + 0x08], %f2 | ||
11 | ldd [%o0 + 0x10], %f4 | ||
12 | ldd [%o0 + 0x18], %f6 | ||
13 | ldd [%o0 + 0x20], %f8 | ||
14 | ldd [%o0 + 0x28], %f10 | ||
15 | andcc %o1, 0x7, %g0 | ||
16 | ldd [%o0 + 0x30], %f12 | ||
17 | bne,pn %xcc, 10f | ||
18 | ldd [%o0 + 0x38], %f14 | ||
19 | |||
20 | 1: | ||
21 | ldd [%o1 + 0x00], %f16 | ||
22 | ldd [%o1 + 0x08], %f18 | ||
23 | ldd [%o1 + 0x10], %f20 | ||
24 | ldd [%o1 + 0x18], %f22 | ||
25 | ldd [%o1 + 0x20], %f24 | ||
26 | ldd [%o1 + 0x28], %f26 | ||
27 | ldd [%o1 + 0x30], %f28 | ||
28 | ldd [%o1 + 0x38], %f30 | ||
29 | ldd [%o1 + 0x40], %f32 | ||
30 | ldd [%o1 + 0x48], %f34 | ||
31 | ldd [%o1 + 0x50], %f36 | ||
32 | ldd [%o1 + 0x58], %f38 | ||
33 | ldd [%o1 + 0x60], %f40 | ||
34 | ldd [%o1 + 0x68], %f42 | ||
35 | ldd [%o1 + 0x70], %f44 | ||
36 | ldd [%o1 + 0x78], %f46 | ||
37 | |||
38 | SHA512 | ||
39 | |||
40 | subcc %o2, 1, %o2 | ||
41 | bne,pt %xcc, 1b | ||
42 | add %o1, 0x80, %o1 | ||
43 | |||
44 | 5: | ||
45 | std %f0, [%o0 + 0x00] | ||
46 | std %f2, [%o0 + 0x08] | ||
47 | std %f4, [%o0 + 0x10] | ||
48 | std %f6, [%o0 + 0x18] | ||
49 | std %f8, [%o0 + 0x20] | ||
50 | std %f10, [%o0 + 0x28] | ||
51 | std %f12, [%o0 + 0x30] | ||
52 | std %f14, [%o0 + 0x38] | ||
53 | retl | ||
54 | VISExit | ||
55 | 10: | ||
56 | alignaddr %o1, %g0, %o1 | ||
57 | |||
58 | ldd [%o1 + 0x00], %f18 | ||
59 | 1: | ||
60 | ldd [%o1 + 0x08], %f20 | ||
61 | ldd [%o1 + 0x10], %f22 | ||
62 | ldd [%o1 + 0x18], %f24 | ||
63 | ldd [%o1 + 0x20], %f26 | ||
64 | ldd [%o1 + 0x28], %f28 | ||
65 | ldd [%o1 + 0x30], %f30 | ||
66 | ldd [%o1 + 0x38], %f32 | ||
67 | ldd [%o1 + 0x40], %f34 | ||
68 | ldd [%o1 + 0x48], %f36 | ||
69 | ldd [%o1 + 0x50], %f38 | ||
70 | ldd [%o1 + 0x58], %f40 | ||
71 | ldd [%o1 + 0x60], %f42 | ||
72 | ldd [%o1 + 0x68], %f44 | ||
73 | ldd [%o1 + 0x70], %f46 | ||
74 | ldd [%o1 + 0x78], %f48 | ||
75 | ldd [%o1 + 0x80], %f50 | ||
76 | |||
77 | faligndata %f18, %f20, %f16 | ||
78 | faligndata %f20, %f22, %f18 | ||
79 | faligndata %f22, %f24, %f20 | ||
80 | faligndata %f24, %f26, %f22 | ||
81 | faligndata %f26, %f28, %f24 | ||
82 | faligndata %f28, %f30, %f26 | ||
83 | faligndata %f30, %f32, %f28 | ||
84 | faligndata %f32, %f34, %f30 | ||
85 | faligndata %f34, %f36, %f32 | ||
86 | faligndata %f36, %f38, %f34 | ||
87 | faligndata %f38, %f40, %f36 | ||
88 | faligndata %f40, %f42, %f38 | ||
89 | faligndata %f42, %f44, %f40 | ||
90 | faligndata %f44, %f46, %f42 | ||
91 | faligndata %f46, %f48, %f44 | ||
92 | faligndata %f48, %f50, %f46 | ||
93 | |||
94 | SHA512 | ||
95 | |||
96 | subcc %o2, 1, %o2 | ||
97 | fsrc2 %f50, %f18 | ||
98 | bne,pt %xcc, 1b | ||
99 | add %o1, 0x80, %o1 | ||
100 | |||
101 | ba,a,pt %xcc, 5b | ||
102 | ENDPROC(sha512_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c new file mode 100644 index 000000000000..486f0a2b7001 --- /dev/null +++ b/arch/sparc/crypto/sha512_glue.c | |||
@@ -0,0 +1,226 @@ | |||
1 | /* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon crypto/sha512_generic.c | ||
4 | * | ||
5 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
6 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
7 | * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> | ||
8 | */ | ||
9 | |||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
12 | #include <crypto/internal/hash.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/cryptohash.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <crypto/sha.h> | ||
19 | |||
20 | #include <asm/pstate.h> | ||
21 | #include <asm/elf.h> | ||
22 | |||
23 | #include "opcodes.h" | ||
24 | |||
25 | asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data, | ||
26 | unsigned int rounds); | ||
27 | |||
28 | static int sha512_sparc64_init(struct shash_desc *desc) | ||
29 | { | ||
30 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
31 | sctx->state[0] = SHA512_H0; | ||
32 | sctx->state[1] = SHA512_H1; | ||
33 | sctx->state[2] = SHA512_H2; | ||
34 | sctx->state[3] = SHA512_H3; | ||
35 | sctx->state[4] = SHA512_H4; | ||
36 | sctx->state[5] = SHA512_H5; | ||
37 | sctx->state[6] = SHA512_H6; | ||
38 | sctx->state[7] = SHA512_H7; | ||
39 | sctx->count[0] = sctx->count[1] = 0; | ||
40 | |||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | static int sha384_sparc64_init(struct shash_desc *desc) | ||
45 | { | ||
46 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
47 | sctx->state[0] = SHA384_H0; | ||
48 | sctx->state[1] = SHA384_H1; | ||
49 | sctx->state[2] = SHA384_H2; | ||
50 | sctx->state[3] = SHA384_H3; | ||
51 | sctx->state[4] = SHA384_H4; | ||
52 | sctx->state[5] = SHA384_H5; | ||
53 | sctx->state[6] = SHA384_H6; | ||
54 | sctx->state[7] = SHA384_H7; | ||
55 | sctx->count[0] = sctx->count[1] = 0; | ||
56 | |||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data, | ||
61 | unsigned int len, unsigned int partial) | ||
62 | { | ||
63 | unsigned int done = 0; | ||
64 | |||
65 | if ((sctx->count[0] += len) < len) | ||
66 | sctx->count[1]++; | ||
67 | if (partial) { | ||
68 | done = SHA512_BLOCK_SIZE - partial; | ||
69 | memcpy(sctx->buf + partial, data, done); | ||
70 | sha512_sparc64_transform(sctx->state, sctx->buf, 1); | ||
71 | } | ||
72 | if (len - done >= SHA512_BLOCK_SIZE) { | ||
73 | const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; | ||
74 | |||
75 | sha512_sparc64_transform(sctx->state, data + done, rounds); | ||
76 | done += rounds * SHA512_BLOCK_SIZE; | ||
77 | } | ||
78 | |||
79 | memcpy(sctx->buf, data + done, len - done); | ||
80 | } | ||
81 | |||
82 | static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
83 | unsigned int len) | ||
84 | { | ||
85 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
86 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
87 | |||
88 | /* Handle the fast case right here */ | ||
89 | if (partial + len < SHA512_BLOCK_SIZE) { | ||
90 | if ((sctx->count[0] += len) < len) | ||
91 | sctx->count[1]++; | ||
92 | memcpy(sctx->buf + partial, data, len); | ||
93 | } else | ||
94 | __sha512_sparc64_update(sctx, data, len, partial); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | static int sha512_sparc64_final(struct shash_desc *desc, u8 *out) | ||
100 | { | ||
101 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
102 | unsigned int i, index, padlen; | ||
103 | __be64 *dst = (__be64 *)out; | ||
104 | __be64 bits[2]; | ||
105 | static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; | ||
106 | |||
107 | /* Save number of bits */ | ||
108 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
109 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
110 | |||
111 | /* Pad out to 112 mod 128 and append length */ | ||
112 | index = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
113 | padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index); | ||
114 | |||
115 | /* We need to fill a whole block for __sha512_sparc64_update() */ | ||
116 | if (padlen <= 112) { | ||
117 | if ((sctx->count[0] += padlen) < padlen) | ||
118 | sctx->count[1]++; | ||
119 | memcpy(sctx->buf + index, padding, padlen); | ||
120 | } else { | ||
121 | __sha512_sparc64_update(sctx, padding, padlen, index); | ||
122 | } | ||
123 | __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112); | ||
124 | |||
125 | /* Store state in digest */ | ||
126 | for (i = 0; i < 8; i++) | ||
127 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
128 | |||
129 | /* Wipe context */ | ||
130 | memset(sctx, 0, sizeof(*sctx)); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash) | ||
136 | { | ||
137 | u8 D[64]; | ||
138 | |||
139 | sha512_sparc64_final(desc, D); | ||
140 | |||
141 | memcpy(hash, D, 48); | ||
142 | memset(D, 0, 64); | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | static struct shash_alg sha512 = { | ||
148 | .digestsize = SHA512_DIGEST_SIZE, | ||
149 | .init = sha512_sparc64_init, | ||
150 | .update = sha512_sparc64_update, | ||
151 | .final = sha512_sparc64_final, | ||
152 | .descsize = sizeof(struct sha512_state), | ||
153 | .base = { | ||
154 | .cra_name = "sha512", | ||
155 | .cra_driver_name= "sha512-sparc64", | ||
156 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
158 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
159 | .cra_module = THIS_MODULE, | ||
160 | } | ||
161 | }; | ||
162 | |||
163 | static struct shash_alg sha384 = { | ||
164 | .digestsize = SHA384_DIGEST_SIZE, | ||
165 | .init = sha384_sparc64_init, | ||
166 | .update = sha512_sparc64_update, | ||
167 | .final = sha384_sparc64_final, | ||
168 | .descsize = sizeof(struct sha512_state), | ||
169 | .base = { | ||
170 | .cra_name = "sha384", | ||
171 | .cra_driver_name= "sha384-sparc64", | ||
172 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
173 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
174 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
175 | .cra_module = THIS_MODULE, | ||
176 | } | ||
177 | }; | ||
178 | |||
179 | static bool __init sparc64_has_sha512_opcode(void) | ||
180 | { | ||
181 | unsigned long cfr; | ||
182 | |||
183 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
184 | return false; | ||
185 | |||
186 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
187 | if (!(cfr & CFR_SHA512)) | ||
188 | return false; | ||
189 | |||
190 | return true; | ||
191 | } | ||
192 | |||
193 | static int __init sha512_sparc64_mod_init(void) | ||
194 | { | ||
195 | if (sparc64_has_sha512_opcode()) { | ||
196 | int ret = crypto_register_shash(&sha384); | ||
197 | if (ret < 0) | ||
198 | return ret; | ||
199 | |||
200 | ret = crypto_register_shash(&sha512); | ||
201 | if (ret < 0) { | ||
202 | crypto_unregister_shash(&sha384); | ||
203 | return ret; | ||
204 | } | ||
205 | |||
206 | pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n"); | ||
207 | return 0; | ||
208 | } | ||
209 | pr_info("sparc64 sha512 opcode not available.\n"); | ||
210 | return -ENODEV; | ||
211 | } | ||
212 | |||
213 | static void __exit sha512_sparc64_mod_fini(void) | ||
214 | { | ||
215 | crypto_unregister_shash(&sha384); | ||
216 | crypto_unregister_shash(&sha512); | ||
217 | } | ||
218 | |||
219 | module_init(sha512_sparc64_mod_init); | ||
220 | module_exit(sha512_sparc64_mod_fini); | ||
221 | |||
222 | MODULE_LICENSE("GPL"); | ||
223 | MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); | ||
224 | |||
225 | MODULE_ALIAS("sha384"); | ||
226 | MODULE_ALIAS("sha512"); | ||
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 67f83e0a0d68..f80ff93f6f75 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild | |||
@@ -17,6 +17,7 @@ header-y += uctx.h | |||
17 | header-y += utrap.h | 17 | header-y += utrap.h |
18 | header-y += watchdog.h | 18 | header-y += watchdog.h |
19 | 19 | ||
20 | generic-y += clkdev.h | ||
20 | generic-y += div64.h | 21 | generic-y += div64.h |
21 | generic-y += local64.h | 22 | generic-y += local64.h |
22 | generic-y += irq_regs.h | 23 | generic-y += irq_regs.h |
diff --git a/arch/sparc/include/asm/asi.h b/arch/sparc/include/asm/asi.h index 61ebe7411ceb..aace6f313716 100644 --- a/arch/sparc/include/asm/asi.h +++ b/arch/sparc/include/asm/asi.h | |||
@@ -141,7 +141,8 @@ | |||
141 | /* SpitFire and later extended ASIs. The "(III)" marker designates | 141 | /* SpitFire and later extended ASIs. The "(III)" marker designates |
142 | * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates | 142 | * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates |
143 | * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific | 143 | * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific |
144 | * ASIs, "(4V)" designates SUN4V specific ASIs. | 144 | * ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4 |
145 | * and later ASIs. | ||
145 | */ | 146 | */ |
146 | #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ | 147 | #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ |
147 | #define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ | 148 | #define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ |
@@ -243,6 +244,7 @@ | |||
243 | #define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ | 244 | #define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ |
244 | #define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ | 245 | #define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ |
245 | #define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ | 246 | #define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ |
247 | #define ASI_PIC 0xb0 /* (NG4) PIC registers */ | ||
246 | #define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ | 248 | #define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ |
247 | #define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ | 249 | #define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ |
248 | #define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */ | 250 | #define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */ |
@@ -268,9 +270,28 @@ | |||
268 | #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 /* (NG) init-store, twin load, | 270 | #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 /* (NG) init-store, twin load, |
269 | * primary, implicit | 271 | * primary, implicit |
270 | */ | 272 | */ |
273 | #define ASI_BLK_INIT_QUAD_LDD_S 0xe3 /* (NG) init-store, twin load, | ||
274 | * secondary, implicit | ||
275 | */ | ||
271 | #define ASI_BLK_P 0xf0 /* Primary, blk ld/st */ | 276 | #define ASI_BLK_P 0xf0 /* Primary, blk ld/st */ |
272 | #define ASI_BLK_S 0xf1 /* Secondary, blk ld/st */ | 277 | #define ASI_BLK_S 0xf1 /* Secondary, blk ld/st */ |
278 | #define ASI_ST_BLKINIT_MRU_P 0xf2 /* (NG4) init-store, twin load, | ||
279 | * Most-Recently-Used, primary, | ||
280 | * implicit | ||
281 | */ | ||
282 | #define ASI_ST_BLKINIT_MRU_S 0xf2 /* (NG4) init-store, twin load, | ||
283 | * Most-Recently-Used, secondary, | ||
284 | * implicit | ||
285 | */ | ||
273 | #define ASI_BLK_PL 0xf8 /* Primary, blk ld/st, little */ | 286 | #define ASI_BLK_PL 0xf8 /* Primary, blk ld/st, little */ |
274 | #define ASI_BLK_SL 0xf9 /* Secondary, blk ld/st, little */ | 287 | #define ASI_BLK_SL 0xf9 /* Secondary, blk ld/st, little */ |
288 | #define ASI_ST_BLKINIT_MRU_PL 0xfa /* (NG4) init-store, twin load, | ||
289 | * Most-Recently-Used, primary, | ||
290 | * implicit, little-endian | ||
291 | */ | ||
292 | #define ASI_ST_BLKINIT_MRU_SL 0xfb /* (NG4) init-store, twin load, | ||
293 | * Most-Recently-Used, secondary, | ||
294 | * implicit, little-endian | ||
295 | */ | ||
275 | 296 | ||
276 | #endif /* _SPARC_ASI_H */ | 297 | #endif /* _SPARC_ASI_H */ |
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index b8be20d42a0a..cef99fbc0a21 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h | |||
@@ -36,6 +36,7 @@ typedef s64 compat_s64; | |||
36 | typedef u32 compat_uint_t; | 36 | typedef u32 compat_uint_t; |
37 | typedef u32 compat_ulong_t; | 37 | typedef u32 compat_ulong_t; |
38 | typedef u64 compat_u64; | 38 | typedef u64 compat_u64; |
39 | typedef u32 compat_uptr_t; | ||
39 | 40 | ||
40 | struct compat_timespec { | 41 | struct compat_timespec { |
41 | compat_time_t tv_sec; | 42 | compat_time_t tv_sec; |
@@ -147,6 +148,65 @@ typedef u32 compat_old_sigset_t; | |||
147 | 148 | ||
148 | typedef u32 compat_sigset_word; | 149 | typedef u32 compat_sigset_word; |
149 | 150 | ||
151 | typedef union compat_sigval { | ||
152 | compat_int_t sival_int; | ||
153 | compat_uptr_t sival_ptr; | ||
154 | } compat_sigval_t; | ||
155 | |||
156 | #define SI_PAD_SIZE32 (128/sizeof(int) - 3) | ||
157 | |||
158 | typedef struct compat_siginfo { | ||
159 | int si_signo; | ||
160 | int si_errno; | ||
161 | int si_code; | ||
162 | |||
163 | union { | ||
164 | int _pad[SI_PAD_SIZE32]; | ||
165 | |||
166 | /* kill() */ | ||
167 | struct { | ||
168 | compat_pid_t _pid; /* sender's pid */ | ||
169 | unsigned int _uid; /* sender's uid */ | ||
170 | } _kill; | ||
171 | |||
172 | /* POSIX.1b timers */ | ||
173 | struct { | ||
174 | compat_timer_t _tid; /* timer id */ | ||
175 | int _overrun; /* overrun count */ | ||
176 | compat_sigval_t _sigval; /* same as below */ | ||
177 | int _sys_private; /* not to be passed to user */ | ||
178 | } _timer; | ||
179 | |||
180 | /* POSIX.1b signals */ | ||
181 | struct { | ||
182 | compat_pid_t _pid; /* sender's pid */ | ||
183 | unsigned int _uid; /* sender's uid */ | ||
184 | compat_sigval_t _sigval; | ||
185 | } _rt; | ||
186 | |||
187 | /* SIGCHLD */ | ||
188 | struct { | ||
189 | compat_pid_t _pid; /* which child */ | ||
190 | unsigned int _uid; /* sender's uid */ | ||
191 | int _status; /* exit code */ | ||
192 | compat_clock_t _utime; | ||
193 | compat_clock_t _stime; | ||
194 | } _sigchld; | ||
195 | |||
196 | /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ | ||
197 | struct { | ||
198 | u32 _addr; /* faulting insn/memory ref. */ | ||
199 | int _trapno; | ||
200 | } _sigfault; | ||
201 | |||
202 | /* SIGPOLL */ | ||
203 | struct { | ||
204 | int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ | ||
205 | int _fd; | ||
206 | } _sigpoll; | ||
207 | } _sifields; | ||
208 | } compat_siginfo_t; | ||
209 | |||
150 | #define COMPAT_OFF_T_MAX 0x7fffffff | 210 | #define COMPAT_OFF_T_MAX 0x7fffffff |
151 | #define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL | 211 | #define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL |
152 | 212 | ||
@@ -156,7 +216,6 @@ typedef u32 compat_sigset_word; | |||
156 | * as pointers because the syscall entry code will have | 216 | * as pointers because the syscall entry code will have |
157 | * appropriately converted them already. | 217 | * appropriately converted them already. |
158 | */ | 218 | */ |
159 | typedef u32 compat_uptr_t; | ||
160 | 219 | ||
161 | static inline void __user *compat_ptr(compat_uptr_t uptr) | 220 | static inline void __user *compat_ptr(compat_uptr_t uptr) |
162 | { | 221 | { |
diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h index 2d4d755cba9e..ac74a2c98e6d 100644 --- a/arch/sparc/include/asm/elf_32.h +++ b/arch/sparc/include/asm/elf_32.h | |||
@@ -128,6 +128,7 @@ typedef struct { | |||
128 | 128 | ||
129 | #define ELF_PLATFORM (NULL) | 129 | #define ELF_PLATFORM (NULL) |
130 | 130 | ||
131 | #define SET_PERSONALITY(ex) set_personality(PER_LINUX) | 131 | #define SET_PERSONALITY(ex) \ |
132 | set_personality(PER_LINUX | (current->personality & (~PER_MASK))) | ||
132 | 133 | ||
133 | #endif /* !(__ASMSPARC_ELF_H) */ | 134 | #endif /* !(__ASMSPARC_ELF_H) */ |
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 7df8b7f544d4..370ca1e71ffb 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h | |||
@@ -86,6 +86,15 @@ | |||
86 | #define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ | 86 | #define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ |
87 | #define AV_SPARC_ASI_CACHE_SPARING \ | 87 | #define AV_SPARC_ASI_CACHE_SPARING \ |
88 | 0x00800000 /* cache sparing ASIs available */ | 88 | 0x00800000 /* cache sparing ASIs available */ |
89 | #define AV_SPARC_PAUSE 0x01000000 /* PAUSE available */ | ||
90 | #define AV_SPARC_CBCOND 0x02000000 /* CBCOND insns available */ | ||
91 | |||
92 | /* Solaris decided to enumerate every single crypto instruction type | ||
93 | * in the AT_HWCAP bits. This is wasteful, since if crypto is present, | ||
94 | * you still need to look in the CFR register to see if the opcode is | ||
95 | * really available. So we simply advertise only "crypto" support. | ||
96 | */ | ||
97 | #define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */ | ||
89 | 98 | ||
90 | #define CORE_DUMP_USE_REGSET | 99 | #define CORE_DUMP_USE_REGSET |
91 | 100 | ||
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 177061064ee6..8c5eed6d267f 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h | |||
@@ -10,7 +10,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | |||
10 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | 10 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
11 | pte_t *ptep); | 11 | pte_t *ptep); |
12 | 12 | ||
13 | void hugetlb_prefault_arch_hook(struct mm_struct *mm); | 13 | static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) |
14 | { | ||
15 | hugetlb_setup(mm); | ||
16 | } | ||
14 | 17 | ||
15 | static inline int is_hugepage_only_range(struct mm_struct *mm, | 18 | static inline int is_hugepage_only_range(struct mm_struct *mm, |
16 | unsigned long addr, | 19 | unsigned long addr, |
@@ -82,4 +85,8 @@ static inline void arch_release_hugepage(struct page *page) | |||
82 | { | 85 | { |
83 | } | 86 | } |
84 | 87 | ||
88 | static inline void arch_clear_hugepage_flags(struct page *page) | ||
89 | { | ||
90 | } | ||
91 | |||
85 | #endif /* _ASM_SPARC64_HUGETLB_H */ | 92 | #endif /* _ASM_SPARC64_HUGETLB_H */ |
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 015a761eaa32..ca121f0fa3ec 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h | |||
@@ -2934,6 +2934,16 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra, | |||
2934 | unsigned long len); | 2934 | unsigned long len); |
2935 | #endif | 2935 | #endif |
2936 | 2936 | ||
2937 | #define HV_FAST_VT_GET_PERFREG 0x184 | ||
2938 | #define HV_FAST_VT_SET_PERFREG 0x185 | ||
2939 | |||
2940 | #ifndef __ASSEMBLY__ | ||
2941 | extern unsigned long sun4v_vt_get_perfreg(unsigned long reg_num, | ||
2942 | unsigned long *reg_val); | ||
2943 | extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, | ||
2944 | unsigned long reg_val); | ||
2945 | #endif | ||
2946 | |||
2937 | /* Function numbers for HV_CORE_TRAP. */ | 2947 | /* Function numbers for HV_CORE_TRAP. */ |
2938 | #define HV_CORE_SET_VER 0x00 | 2948 | #define HV_CORE_SET_VER 0x00 |
2939 | #define HV_CORE_PUTCHAR 0x01 | 2949 | #define HV_CORE_PUTCHAR 0x01 |
@@ -2964,6 +2974,7 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra, | |||
2964 | #define HV_GRP_NIU 0x0204 | 2974 | #define HV_GRP_NIU 0x0204 |
2965 | #define HV_GRP_VF_CPU 0x0205 | 2975 | #define HV_GRP_VF_CPU 0x0205 |
2966 | #define HV_GRP_KT_CPU 0x0209 | 2976 | #define HV_GRP_KT_CPU 0x0209 |
2977 | #define HV_GRP_VT_CPU 0x020c | ||
2967 | #define HV_GRP_DIAG 0x0300 | 2978 | #define HV_GRP_DIAG 0x0300 |
2968 | 2979 | ||
2969 | #ifndef __ASSEMBLY__ | 2980 | #ifndef __ASSEMBLY__ |
diff --git a/arch/sparc/include/asm/mdesc.h b/arch/sparc/include/asm/mdesc.h index 9faa046713fb..139097f3a67b 100644 --- a/arch/sparc/include/asm/mdesc.h +++ b/arch/sparc/include/asm/mdesc.h | |||
@@ -73,6 +73,7 @@ extern void mdesc_register_notifier(struct mdesc_notifier_client *client); | |||
73 | 73 | ||
74 | extern void mdesc_fill_in_cpu_data(cpumask_t *mask); | 74 | extern void mdesc_fill_in_cpu_data(cpumask_t *mask); |
75 | extern void mdesc_populate_present_mask(cpumask_t *mask); | 75 | extern void mdesc_populate_present_mask(cpumask_t *mask); |
76 | extern void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask); | ||
76 | 77 | ||
77 | extern void sun4v_mdesc_init(void); | 78 | extern void sun4v_mdesc_init(void); |
78 | 79 | ||
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index 9067dc500535..76092c4dd277 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h | |||
@@ -30,22 +30,8 @@ | |||
30 | #define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \ | 30 | #define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \ |
31 | (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT)) | 31 | (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT)) |
32 | 32 | ||
33 | #if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) | ||
34 | #define CTX_PGSZ_BASE CTX_PGSZ_8KB | 33 | #define CTX_PGSZ_BASE CTX_PGSZ_8KB |
35 | #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) | 34 | #define CTX_PGSZ_HUGE CTX_PGSZ_4MB |
36 | #define CTX_PGSZ_BASE CTX_PGSZ_64KB | ||
37 | #else | ||
38 | #error No page size specified in kernel configuration | ||
39 | #endif | ||
40 | |||
41 | #if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) | ||
42 | #define CTX_PGSZ_HUGE CTX_PGSZ_4MB | ||
43 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) | ||
44 | #define CTX_PGSZ_HUGE CTX_PGSZ_512KB | ||
45 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) | ||
46 | #define CTX_PGSZ_HUGE CTX_PGSZ_64KB | ||
47 | #endif | ||
48 | |||
49 | #define CTX_PGSZ_KERN CTX_PGSZ_4MB | 35 | #define CTX_PGSZ_KERN CTX_PGSZ_4MB |
50 | 36 | ||
51 | /* Thus, when running on UltraSPARC-III+ and later, we use the following | 37 | /* Thus, when running on UltraSPARC-III+ and later, we use the following |
@@ -96,7 +82,7 @@ struct tsb_config { | |||
96 | 82 | ||
97 | #define MM_TSB_BASE 0 | 83 | #define MM_TSB_BASE 0 |
98 | 84 | ||
99 | #ifdef CONFIG_HUGETLB_PAGE | 85 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
100 | #define MM_TSB_HUGE 1 | 86 | #define MM_TSB_HUGE 1 |
101 | #define MM_NUM_TSBS 2 | 87 | #define MM_NUM_TSBS 2 |
102 | #else | 88 | #else |
@@ -107,6 +93,7 @@ typedef struct { | |||
107 | spinlock_t lock; | 93 | spinlock_t lock; |
108 | unsigned long sparc64_ctx_val; | 94 | unsigned long sparc64_ctx_val; |
109 | unsigned long huge_pte_count; | 95 | unsigned long huge_pte_count; |
96 | struct page *pgtable_page; | ||
110 | struct tsb_config tsb_block[MM_NUM_TSBS]; | 97 | struct tsb_config tsb_block[MM_NUM_TSBS]; |
111 | struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; | 98 | struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; |
112 | } mm_context_t; | 99 | } mm_context_t; |
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index a97fd085cebe..9191ca62ed9c 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h | |||
@@ -36,7 +36,7 @@ static inline void tsb_context_switch(struct mm_struct *mm) | |||
36 | { | 36 | { |
37 | __tsb_context_switch(__pa(mm->pgd), | 37 | __tsb_context_switch(__pa(mm->pgd), |
38 | &mm->context.tsb_block[0], | 38 | &mm->context.tsb_block[0], |
39 | #ifdef CONFIG_HUGETLB_PAGE | 39 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
40 | (mm->context.tsb_block[1].tsb ? | 40 | (mm->context.tsb_block[1].tsb ? |
41 | &mm->context.tsb_block[1] : | 41 | &mm->context.tsb_block[1] : |
42 | NULL) | 42 | NULL) |
diff --git a/arch/sparc/include/asm/oplib_32.h b/arch/sparc/include/asm/oplib_32.h index 27517879a6c2..c72f3045820c 100644 --- a/arch/sparc/include/asm/oplib_32.h +++ b/arch/sparc/include/asm/oplib_32.h | |||
@@ -94,7 +94,7 @@ extern int prom_getprev(void); | |||
94 | extern void prom_console_write_buf(const char *buf, int len); | 94 | extern void prom_console_write_buf(const char *buf, int len); |
95 | 95 | ||
96 | /* Prom's internal routines, don't use in kernel/boot code. */ | 96 | /* Prom's internal routines, don't use in kernel/boot code. */ |
97 | extern void prom_printf(const char *fmt, ...); | 97 | extern __printf(1, 2) void prom_printf(const char *fmt, ...); |
98 | extern void prom_write(const char *buf, unsigned int len); | 98 | extern void prom_write(const char *buf, unsigned int len); |
99 | 99 | ||
100 | /* Multiprocessor operations... */ | 100 | /* Multiprocessor operations... */ |
diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h index 97a90475c314..a12dbe3b7762 100644 --- a/arch/sparc/include/asm/oplib_64.h +++ b/arch/sparc/include/asm/oplib_64.h | |||
@@ -98,7 +98,7 @@ extern unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size); | |||
98 | extern void prom_console_write_buf(const char *buf, int len); | 98 | extern void prom_console_write_buf(const char *buf, int len); |
99 | 99 | ||
100 | /* Prom's internal routines, don't use in kernel/boot code. */ | 100 | /* Prom's internal routines, don't use in kernel/boot code. */ |
101 | extern void prom_printf(const char *fmt, ...); | 101 | extern __printf(1, 2) void prom_printf(const char *fmt, ...); |
102 | extern void prom_write(const char *buf, unsigned int len); | 102 | extern void prom_write(const char *buf, unsigned int len); |
103 | 103 | ||
104 | /* Multiprocessor operations... */ | 104 | /* Multiprocessor operations... */ |
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index f0d09b401036..4b39f74d6ca0 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h | |||
@@ -3,13 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/const.h> | 4 | #include <linux/const.h> |
5 | 5 | ||
6 | #if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) | ||
7 | #define PAGE_SHIFT 13 | 6 | #define PAGE_SHIFT 13 |
8 | #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) | ||
9 | #define PAGE_SHIFT 16 | ||
10 | #else | ||
11 | #error No page size specified in kernel configuration | ||
12 | #endif | ||
13 | 7 | ||
14 | #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) | 8 | #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) |
15 | #define PAGE_MASK (~(PAGE_SIZE-1)) | 9 | #define PAGE_MASK (~(PAGE_SIZE-1)) |
@@ -21,15 +15,9 @@ | |||
21 | #define DCACHE_ALIASING_POSSIBLE | 15 | #define DCACHE_ALIASING_POSSIBLE |
22 | #endif | 16 | #endif |
23 | 17 | ||
24 | #if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) | ||
25 | #define HPAGE_SHIFT 22 | 18 | #define HPAGE_SHIFT 22 |
26 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) | ||
27 | #define HPAGE_SHIFT 19 | ||
28 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) | ||
29 | #define HPAGE_SHIFT 16 | ||
30 | #endif | ||
31 | 19 | ||
32 | #ifdef CONFIG_HUGETLB_PAGE | 20 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
33 | #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) | 21 | #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) |
34 | #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) | 22 | #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) |
35 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | 23 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
@@ -38,6 +26,11 @@ | |||
38 | 26 | ||
39 | #ifndef __ASSEMBLY__ | 27 | #ifndef __ASSEMBLY__ |
40 | 28 | ||
29 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
30 | struct mm_struct; | ||
31 | extern void hugetlb_setup(struct mm_struct *mm); | ||
32 | #endif | ||
33 | |||
41 | #define WANT_PAGE_VIRTUAL | 34 | #define WANT_PAGE_VIRTUAL |
42 | 35 | ||
43 | extern void _clear_page(void *page); | 36 | extern void _clear_page(void *page); |
@@ -98,7 +91,7 @@ typedef unsigned long pgprot_t; | |||
98 | 91 | ||
99 | #endif /* (STRICT_MM_TYPECHECKS) */ | 92 | #endif /* (STRICT_MM_TYPECHECKS) */ |
100 | 93 | ||
101 | typedef struct page *pgtable_t; | 94 | typedef pte_t *pgtable_t; |
102 | 95 | ||
103 | #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ | 96 | #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ |
104 | (_AC(0x0000000070000000,UL)) : \ | 97 | (_AC(0x0000000070000000,UL)) : \ |
diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h index 288d7beba051..942bb17f60cd 100644 --- a/arch/sparc/include/asm/pcr.h +++ b/arch/sparc/include/asm/pcr.h | |||
@@ -2,8 +2,13 @@ | |||
2 | #define __PCR_H | 2 | #define __PCR_H |
3 | 3 | ||
4 | struct pcr_ops { | 4 | struct pcr_ops { |
5 | u64 (*read)(void); | 5 | u64 (*read_pcr)(unsigned long); |
6 | void (*write)(u64); | 6 | void (*write_pcr)(unsigned long, u64); |
7 | u64 (*read_pic)(unsigned long); | ||
8 | void (*write_pic)(unsigned long, u64); | ||
9 | u64 (*nmi_picl_value)(unsigned int nmi_hz); | ||
10 | u64 pcr_nmi_enable; | ||
11 | u64 pcr_nmi_disable; | ||
7 | }; | 12 | }; |
8 | extern const struct pcr_ops *pcr_ops; | 13 | extern const struct pcr_ops *pcr_ops; |
9 | 14 | ||
@@ -27,21 +32,18 @@ extern void schedule_deferred_pcr_work(void); | |||
27 | #define PCR_N2_SL1_SHIFT 27 | 32 | #define PCR_N2_SL1_SHIFT 27 |
28 | #define PCR_N2_OV1 0x80000000 | 33 | #define PCR_N2_OV1 0x80000000 |
29 | 34 | ||
30 | extern unsigned int picl_shift; | 35 | #define PCR_N4_OV 0x00000001 /* PIC overflow */ |
31 | 36 | #define PCR_N4_TOE 0x00000002 /* Trap On Event */ | |
32 | /* In order to commonize as much of the implementation as | 37 | #define PCR_N4_UTRACE 0x00000004 /* Trace user events */ |
33 | * possible, we use PICH as our counter. Mostly this is | 38 | #define PCR_N4_STRACE 0x00000008 /* Trace supervisor events */ |
34 | * to accommodate Niagara-1 which can only count insn cycles | 39 | #define PCR_N4_HTRACE 0x00000010 /* Trace hypervisor events */ |
35 | * in PICH. | 40 | #define PCR_N4_MASK 0x000007e0 /* Event mask */ |
36 | */ | 41 | #define PCR_N4_MASK_SHIFT 5 |
37 | static inline u64 picl_value(unsigned int nmi_hz) | 42 | #define PCR_N4_SL 0x0000f800 /* Event Select */ |
38 | { | 43 | #define PCR_N4_SL_SHIFT 11 |
39 | u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift); | 44 | #define PCR_N4_PICNPT 0x00010000 /* PIC non-privileged trap */ |
40 | 45 | #define PCR_N4_PICNHT 0x00020000 /* PIC non-hypervisor trap */ | |
41 | return ((u64)((0 - delta) & 0xffffffff)) << 32; | 46 | #define PCR_N4_NTC 0x00040000 /* Next-To-Commit wrap */ |
42 | } | ||
43 | |||
44 | extern u64 pcr_enable; | ||
45 | 47 | ||
46 | extern int pcr_arch_init(void); | 48 | extern int pcr_arch_init(void); |
47 | 49 | ||
diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h index 3332d2cba6c1..214feefa577c 100644 --- a/arch/sparc/include/asm/perfctr.h +++ b/arch/sparc/include/asm/perfctr.h | |||
@@ -54,11 +54,6 @@ enum perfctr_opcode { | |||
54 | PERFCTR_GETPCR | 54 | PERFCTR_GETPCR |
55 | }; | 55 | }; |
56 | 56 | ||
57 | /* I don't want the kernel's namespace to be polluted with this | ||
58 | * stuff when this file is included. --DaveM | ||
59 | */ | ||
60 | #ifndef __KERNEL__ | ||
61 | |||
62 | #define PRIV 0x00000001 | 57 | #define PRIV 0x00000001 |
63 | #define SYS 0x00000002 | 58 | #define SYS 0x00000002 |
64 | #define USR 0x00000004 | 59 | #define USR 0x00000004 |
@@ -168,29 +163,4 @@ struct vcounter_struct { | |||
168 | unsigned long long vcnt1; | 163 | unsigned long long vcnt1; |
169 | }; | 164 | }; |
170 | 165 | ||
171 | #else /* !(__KERNEL__) */ | ||
172 | |||
173 | #ifndef CONFIG_SPARC32 | ||
174 | |||
175 | /* Performance counter register access. */ | ||
176 | #define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p)) | ||
177 | #define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p)) | ||
178 | #define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p)) | ||
179 | |||
180 | /* Blackbird errata workaround. See commentary in | ||
181 | * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() | ||
182 | * for more information. | ||
183 | */ | ||
184 | #define write_pic(__p) \ | ||
185 | __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \ | ||
186 | " nop\n\t" \ | ||
187 | ".align 64\n" \ | ||
188 | "99:wr %0, 0x0, %%pic\n\t" \ | ||
189 | "rd %%pic, %%g0" : : "r" (__p)) | ||
190 | #define reset_pic() write_pic(0) | ||
191 | |||
192 | #endif /* !CONFIG_SPARC32 */ | ||
193 | |||
194 | #endif /* !(__KERNEL__) */ | ||
195 | |||
196 | #endif /* !(PERF_COUNTER_API) */ | 166 | #endif /* !(PERF_COUNTER_API) */ |
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 40b2d7a7023d..bcfe063bce23 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h | |||
@@ -38,51 +38,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | |||
38 | kmem_cache_free(pgtable_cache, pmd); | 38 | kmem_cache_free(pgtable_cache, pmd); |
39 | } | 39 | } |
40 | 40 | ||
41 | static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, | 41 | extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, |
42 | unsigned long address) | 42 | unsigned long address); |
43 | { | 43 | extern pgtable_t pte_alloc_one(struct mm_struct *mm, |
44 | return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); | 44 | unsigned long address); |
45 | } | 45 | extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte); |
46 | 46 | extern void pte_free(struct mm_struct *mm, pgtable_t ptepage); | |
47 | static inline pgtable_t pte_alloc_one(struct mm_struct *mm, | ||
48 | unsigned long address) | ||
49 | { | ||
50 | struct page *page; | ||
51 | pte_t *pte; | ||
52 | |||
53 | pte = pte_alloc_one_kernel(mm, address); | ||
54 | if (!pte) | ||
55 | return NULL; | ||
56 | page = virt_to_page(pte); | ||
57 | pgtable_page_ctor(page); | ||
58 | return page; | ||
59 | } | ||
60 | |||
61 | static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | ||
62 | { | ||
63 | free_page((unsigned long)pte); | ||
64 | } | ||
65 | |||
66 | static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) | ||
67 | { | ||
68 | pgtable_page_dtor(ptepage); | ||
69 | __free_page(ptepage); | ||
70 | } | ||
71 | 47 | ||
72 | #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) | 48 | #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE) |
73 | #define pmd_populate(MM,PMD,PTE_PAGE) \ | 49 | #define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE) |
74 | pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) | 50 | #define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD)) |
75 | #define pmd_pgtable(pmd) pmd_page(pmd) | ||
76 | 51 | ||
77 | #define check_pgt_cache() do { } while (0) | 52 | #define check_pgt_cache() do { } while (0) |
78 | 53 | ||
79 | static inline void pgtable_free(void *table, bool is_page) | 54 | extern void pgtable_free(void *table, bool is_page); |
80 | { | ||
81 | if (is_page) | ||
82 | free_page((unsigned long)table); | ||
83 | else | ||
84 | kmem_cache_free(pgtable_cache, table); | ||
85 | } | ||
86 | 55 | ||
87 | #ifdef CONFIG_SMP | 56 | #ifdef CONFIG_SMP |
88 | 57 | ||
@@ -113,11 +82,10 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is | |||
113 | } | 82 | } |
114 | #endif /* !CONFIG_SMP */ | 83 | #endif /* !CONFIG_SMP */ |
115 | 84 | ||
116 | static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage, | 85 | static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte, |
117 | unsigned long address) | 86 | unsigned long address) |
118 | { | 87 | { |
119 | pgtable_page_dtor(ptepage); | 88 | pgtable_free_tlb(tlb, pte, true); |
120 | pgtable_free_tlb(tlb, page_address(ptepage), true); | ||
121 | } | 89 | } |
122 | 90 | ||
123 | #define __pmd_free_tlb(tlb, pmd, addr) \ | 91 | #define __pmd_free_tlb(tlb, pmd, addr) \ |
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 61210db139fb..95515f1e7cef 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h | |||
@@ -45,40 +45,59 @@ | |||
45 | 45 | ||
46 | #define vmemmap ((struct page *)VMEMMAP_BASE) | 46 | #define vmemmap ((struct page *)VMEMMAP_BASE) |
47 | 47 | ||
48 | /* XXX All of this needs to be rethought so we can take advantage | ||
49 | * XXX cheetah's full 64-bit virtual address space, ie. no more hole | ||
50 | * XXX in the middle like on spitfire. -DaveM | ||
51 | */ | ||
52 | /* | ||
53 | * Given a virtual address, the lowest PAGE_SHIFT bits determine offset | ||
54 | * into the page; the next higher PAGE_SHIFT-3 bits determine the pte# | ||
55 | * in the proper pagetable (the -3 is from the 8 byte ptes, and each page | ||
56 | * table is a single page long). The next higher PMD_BITS determine pmd# | ||
57 | * in the proper pmdtable (where we must have PMD_BITS <= (PAGE_SHIFT-2) | ||
58 | * since the pmd entries are 4 bytes, and each pmd page is a single page | ||
59 | * long). Finally, the higher few bits determine pgde#. | ||
60 | */ | ||
61 | |||
62 | /* PMD_SHIFT determines the size of the area a second-level page | 48 | /* PMD_SHIFT determines the size of the area a second-level page |
63 | * table can map | 49 | * table can map |
64 | */ | 50 | */ |
65 | #define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) | 51 | #define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4)) |
66 | #define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) | 52 | #define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) |
67 | #define PMD_MASK (~(PMD_SIZE-1)) | 53 | #define PMD_MASK (~(PMD_SIZE-1)) |
68 | #define PMD_BITS (PAGE_SHIFT - 2) | 54 | #define PMD_BITS (PAGE_SHIFT - 2) |
69 | 55 | ||
70 | /* PGDIR_SHIFT determines what a third-level page table entry can map */ | 56 | /* PGDIR_SHIFT determines what a third-level page table entry can map */ |
71 | #define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) | 57 | #define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS) |
72 | #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) | 58 | #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) |
73 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 59 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
74 | #define PGDIR_BITS (PAGE_SHIFT - 2) | 60 | #define PGDIR_BITS (PAGE_SHIFT - 2) |
75 | 61 | ||
62 | #if (PGDIR_SHIFT + PGDIR_BITS) != 44 | ||
63 | #error Page table parameters do not cover virtual address space properly. | ||
64 | #endif | ||
65 | |||
66 | #if (PMD_SHIFT != HPAGE_SHIFT) | ||
67 | #error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages. | ||
68 | #endif | ||
69 | |||
70 | /* PMDs point to PTE tables which are 4K aligned. */ | ||
71 | #define PMD_PADDR _AC(0xfffffffe,UL) | ||
72 | #define PMD_PADDR_SHIFT _AC(11,UL) | ||
73 | |||
74 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
75 | #define PMD_ISHUGE _AC(0x00000001,UL) | ||
76 | |||
77 | /* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge | ||
78 | * pages, this frees up a bunch of bits in the layout that we can | ||
79 | * use for the protection settings and software metadata. | ||
80 | */ | ||
81 | #define PMD_HUGE_PADDR _AC(0xfffff800,UL) | ||
82 | #define PMD_HUGE_PROTBITS _AC(0x000007ff,UL) | ||
83 | #define PMD_HUGE_PRESENT _AC(0x00000400,UL) | ||
84 | #define PMD_HUGE_WRITE _AC(0x00000200,UL) | ||
85 | #define PMD_HUGE_DIRTY _AC(0x00000100,UL) | ||
86 | #define PMD_HUGE_ACCESSED _AC(0x00000080,UL) | ||
87 | #define PMD_HUGE_EXEC _AC(0x00000040,UL) | ||
88 | #define PMD_HUGE_SPLITTING _AC(0x00000020,UL) | ||
89 | #endif | ||
90 | |||
91 | /* PGDs point to PMD tables which are 8K aligned. */ | ||
92 | #define PGD_PADDR _AC(0xfffffffc,UL) | ||
93 | #define PGD_PADDR_SHIFT _AC(11,UL) | ||
94 | |||
76 | #ifndef __ASSEMBLY__ | 95 | #ifndef __ASSEMBLY__ |
77 | 96 | ||
78 | #include <linux/sched.h> | 97 | #include <linux/sched.h> |
79 | 98 | ||
80 | /* Entries per page directory level. */ | 99 | /* Entries per page directory level. */ |
81 | #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) | 100 | #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4)) |
82 | #define PTRS_PER_PMD (1UL << PMD_BITS) | 101 | #define PTRS_PER_PMD (1UL << PMD_BITS) |
83 | #define PTRS_PER_PGD (1UL << PGDIR_BITS) | 102 | #define PTRS_PER_PGD (1UL << PGDIR_BITS) |
84 | 103 | ||
@@ -160,26 +179,11 @@ | |||
160 | #define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */ | 179 | #define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */ |
161 | #define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */ | 180 | #define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */ |
162 | 181 | ||
163 | #if PAGE_SHIFT == 13 | ||
164 | #define _PAGE_SZBITS_4U _PAGE_SZ8K_4U | 182 | #define _PAGE_SZBITS_4U _PAGE_SZ8K_4U |
165 | #define _PAGE_SZBITS_4V _PAGE_SZ8K_4V | 183 | #define _PAGE_SZBITS_4V _PAGE_SZ8K_4V |
166 | #elif PAGE_SHIFT == 16 | ||
167 | #define _PAGE_SZBITS_4U _PAGE_SZ64K_4U | ||
168 | #define _PAGE_SZBITS_4V _PAGE_SZ64K_4V | ||
169 | #else | ||
170 | #error Wrong PAGE_SHIFT specified | ||
171 | #endif | ||
172 | 184 | ||
173 | #if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) | ||
174 | #define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U | 185 | #define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U |
175 | #define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V | 186 | #define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V |
176 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) | ||
177 | #define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U | ||
178 | #define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V | ||
179 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) | ||
180 | #define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U | ||
181 | #define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V | ||
182 | #endif | ||
183 | 187 | ||
184 | /* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */ | 188 | /* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */ |
185 | #define __P000 __pgprot(0) | 189 | #define __P000 __pgprot(0) |
@@ -218,7 +222,6 @@ extern unsigned long _PAGE_CACHE; | |||
218 | 222 | ||
219 | extern unsigned long pg_iobits; | 223 | extern unsigned long pg_iobits; |
220 | extern unsigned long _PAGE_ALL_SZ_BITS; | 224 | extern unsigned long _PAGE_ALL_SZ_BITS; |
221 | extern unsigned long _PAGE_SZBITS; | ||
222 | 225 | ||
223 | extern struct page *mem_map_zero; | 226 | extern struct page *mem_map_zero; |
224 | #define ZERO_PAGE(vaddr) (mem_map_zero) | 227 | #define ZERO_PAGE(vaddr) (mem_map_zero) |
@@ -231,25 +234,25 @@ extern struct page *mem_map_zero; | |||
231 | static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) | 234 | static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) |
232 | { | 235 | { |
233 | unsigned long paddr = pfn << PAGE_SHIFT; | 236 | unsigned long paddr = pfn << PAGE_SHIFT; |
234 | unsigned long sz_bits; | 237 | |
235 | 238 | BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL); | |
236 | sz_bits = 0UL; | 239 | return __pte(paddr | pgprot_val(prot)); |
237 | if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) { | ||
238 | __asm__ __volatile__( | ||
239 | "\n661: sethi %%uhi(%1), %0\n" | ||
240 | " sllx %0, 32, %0\n" | ||
241 | " .section .sun4v_2insn_patch, \"ax\"\n" | ||
242 | " .word 661b\n" | ||
243 | " mov %2, %0\n" | ||
244 | " nop\n" | ||
245 | " .previous\n" | ||
246 | : "=r" (sz_bits) | ||
247 | : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V)); | ||
248 | } | ||
249 | return __pte(paddr | sz_bits | pgprot_val(prot)); | ||
250 | } | 240 | } |
251 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) | 241 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) |
252 | 242 | ||
243 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
244 | extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot); | ||
245 | #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) | ||
246 | |||
247 | extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); | ||
248 | |||
249 | static inline pmd_t pmd_mkhuge(pmd_t pmd) | ||
250 | { | ||
251 | /* Do nothing, mk_pmd() does this part. */ | ||
252 | return pmd; | ||
253 | } | ||
254 | #endif | ||
255 | |||
253 | /* This one can be done with two shifts. */ | 256 | /* This one can be done with two shifts. */ |
254 | static inline unsigned long pte_pfn(pte_t pte) | 257 | static inline unsigned long pte_pfn(pte_t pte) |
255 | { | 258 | { |
@@ -286,6 +289,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) | |||
286 | * Note: We encode this into 3 sun4v 2-insn patch sequences. | 289 | * Note: We encode this into 3 sun4v 2-insn patch sequences. |
287 | */ | 290 | */ |
288 | 291 | ||
292 | BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL); | ||
289 | __asm__ __volatile__( | 293 | __asm__ __volatile__( |
290 | "\n661: sethi %%uhi(%2), %1\n" | 294 | "\n661: sethi %%uhi(%2), %1\n" |
291 | " sethi %%hi(%2), %0\n" | 295 | " sethi %%hi(%2), %0\n" |
@@ -307,10 +311,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) | |||
307 | : "=r" (mask), "=r" (tmp) | 311 | : "=r" (mask), "=r" (tmp) |
308 | : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | | 312 | : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | |
309 | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | | 313 | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | |
310 | _PAGE_SZBITS_4U | _PAGE_SPECIAL), | 314 | _PAGE_SPECIAL), |
311 | "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | | 315 | "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | |
312 | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | | 316 | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | |
313 | _PAGE_SZBITS_4V | _PAGE_SPECIAL)); | 317 | _PAGE_SPECIAL)); |
314 | 318 | ||
315 | return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); | 319 | return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); |
316 | } | 320 | } |
@@ -618,19 +622,130 @@ static inline unsigned long pte_special(pte_t pte) | |||
618 | return pte_val(pte) & _PAGE_SPECIAL; | 622 | return pte_val(pte) & _PAGE_SPECIAL; |
619 | } | 623 | } |
620 | 624 | ||
621 | #define pmd_set(pmdp, ptep) \ | 625 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
622 | (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) | 626 | static inline int pmd_young(pmd_t pmd) |
627 | { | ||
628 | return pmd_val(pmd) & PMD_HUGE_ACCESSED; | ||
629 | } | ||
630 | |||
631 | static inline int pmd_write(pmd_t pmd) | ||
632 | { | ||
633 | return pmd_val(pmd) & PMD_HUGE_WRITE; | ||
634 | } | ||
635 | |||
636 | static inline unsigned long pmd_pfn(pmd_t pmd) | ||
637 | { | ||
638 | unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR; | ||
639 | |||
640 | return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT); | ||
641 | } | ||
642 | |||
643 | static inline int pmd_large(pmd_t pmd) | ||
644 | { | ||
645 | return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) == | ||
646 | (PMD_ISHUGE | PMD_HUGE_PRESENT); | ||
647 | } | ||
648 | |||
649 | static inline int pmd_trans_splitting(pmd_t pmd) | ||
650 | { | ||
651 | return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) == | ||
652 | (PMD_ISHUGE|PMD_HUGE_SPLITTING); | ||
653 | } | ||
654 | |||
655 | static inline int pmd_trans_huge(pmd_t pmd) | ||
656 | { | ||
657 | return pmd_val(pmd) & PMD_ISHUGE; | ||
658 | } | ||
659 | |||
660 | #define has_transparent_hugepage() 1 | ||
661 | |||
662 | static inline pmd_t pmd_mkold(pmd_t pmd) | ||
663 | { | ||
664 | pmd_val(pmd) &= ~PMD_HUGE_ACCESSED; | ||
665 | return pmd; | ||
666 | } | ||
667 | |||
668 | static inline pmd_t pmd_wrprotect(pmd_t pmd) | ||
669 | { | ||
670 | pmd_val(pmd) &= ~PMD_HUGE_WRITE; | ||
671 | return pmd; | ||
672 | } | ||
673 | |||
674 | static inline pmd_t pmd_mkdirty(pmd_t pmd) | ||
675 | { | ||
676 | pmd_val(pmd) |= PMD_HUGE_DIRTY; | ||
677 | return pmd; | ||
678 | } | ||
679 | |||
680 | static inline pmd_t pmd_mkyoung(pmd_t pmd) | ||
681 | { | ||
682 | pmd_val(pmd) |= PMD_HUGE_ACCESSED; | ||
683 | return pmd; | ||
684 | } | ||
685 | |||
686 | static inline pmd_t pmd_mkwrite(pmd_t pmd) | ||
687 | { | ||
688 | pmd_val(pmd) |= PMD_HUGE_WRITE; | ||
689 | return pmd; | ||
690 | } | ||
691 | |||
692 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | ||
693 | { | ||
694 | pmd_val(pmd) &= ~PMD_HUGE_PRESENT; | ||
695 | return pmd; | ||
696 | } | ||
697 | |||
698 | static inline pmd_t pmd_mksplitting(pmd_t pmd) | ||
699 | { | ||
700 | pmd_val(pmd) |= PMD_HUGE_SPLITTING; | ||
701 | return pmd; | ||
702 | } | ||
703 | |||
704 | extern pgprot_t pmd_pgprot(pmd_t entry); | ||
705 | #endif | ||
706 | |||
707 | static inline int pmd_present(pmd_t pmd) | ||
708 | { | ||
709 | return pmd_val(pmd) != 0U; | ||
710 | } | ||
711 | |||
712 | #define pmd_none(pmd) (!pmd_val(pmd)) | ||
713 | |||
714 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
715 | extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
716 | pmd_t *pmdp, pmd_t pmd); | ||
717 | #else | ||
718 | static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
719 | pmd_t *pmdp, pmd_t pmd) | ||
720 | { | ||
721 | *pmdp = pmd; | ||
722 | } | ||
723 | #endif | ||
724 | |||
725 | static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) | ||
726 | { | ||
727 | unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT; | ||
728 | |||
729 | pmd_val(*pmdp) = val; | ||
730 | } | ||
731 | |||
623 | #define pud_set(pudp, pmdp) \ | 732 | #define pud_set(pudp, pmdp) \ |
624 | (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> 11UL)) | 733 | (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT)) |
625 | #define __pmd_page(pmd) \ | 734 | static inline unsigned long __pmd_page(pmd_t pmd) |
626 | ((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL))) | 735 | { |
736 | unsigned long paddr = (unsigned long) pmd_val(pmd); | ||
737 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
738 | if (pmd_val(pmd) & PMD_ISHUGE) | ||
739 | paddr &= PMD_HUGE_PADDR; | ||
740 | #endif | ||
741 | paddr <<= PMD_PADDR_SHIFT; | ||
742 | return ((unsigned long) __va(paddr)); | ||
743 | } | ||
627 | #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) | 744 | #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) |
628 | #define pud_page_vaddr(pud) \ | 745 | #define pud_page_vaddr(pud) \ |
629 | ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL))) | 746 | ((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT))) |
630 | #define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) | 747 | #define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) |
631 | #define pmd_none(pmd) (!pmd_val(pmd)) | ||
632 | #define pmd_bad(pmd) (0) | 748 | #define pmd_bad(pmd) (0) |
633 | #define pmd_present(pmd) (pmd_val(pmd) != 0U) | ||
634 | #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U) | 749 | #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U) |
635 | #define pud_none(pud) (!pud_val(pud)) | 750 | #define pud_none(pud) (!pud_val(pud)) |
636 | #define pud_bad(pud) (0) | 751 | #define pud_bad(pud) (0) |
@@ -664,6 +779,16 @@ static inline unsigned long pte_special(pte_t pte) | |||
664 | extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, | 779 | extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, |
665 | pte_t *ptep, pte_t orig, int fullmm); | 780 | pte_t *ptep, pte_t orig, int fullmm); |
666 | 781 | ||
782 | #define __HAVE_ARCH_PMDP_GET_AND_CLEAR | ||
783 | static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, | ||
784 | unsigned long addr, | ||
785 | pmd_t *pmdp) | ||
786 | { | ||
787 | pmd_t pmd = *pmdp; | ||
788 | set_pmd_at(mm, addr, pmdp, __pmd(0U)); | ||
789 | return pmd; | ||
790 | } | ||
791 | |||
667 | static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, | 792 | static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, |
668 | pte_t *ptep, pte_t pte, int fullmm) | 793 | pte_t *ptep, pte_t pte, int fullmm) |
669 | { | 794 | { |
@@ -719,6 +844,16 @@ extern void mmu_info(struct seq_file *); | |||
719 | 844 | ||
720 | struct vm_area_struct; | 845 | struct vm_area_struct; |
721 | extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); | 846 | extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); |
847 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
848 | extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
849 | pmd_t *pmd); | ||
850 | |||
851 | #define __HAVE_ARCH_PGTABLE_DEPOSIT | ||
852 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); | ||
853 | |||
854 | #define __HAVE_ARCH_PGTABLE_WITHDRAW | ||
855 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); | ||
856 | #endif | ||
722 | 857 | ||
723 | /* Encode and de-code a swap entry */ | 858 | /* Encode and de-code a swap entry */ |
724 | #define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL) | 859 | #define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL) |
diff --git a/arch/sparc/include/asm/pstate.h b/arch/sparc/include/asm/pstate.h index a26a53777bb0..4b6b998afd99 100644 --- a/arch/sparc/include/asm/pstate.h +++ b/arch/sparc/include/asm/pstate.h | |||
@@ -88,4 +88,18 @@ | |||
88 | #define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */ | 88 | #define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */ |
89 | #define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/ | 89 | #define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/ |
90 | 90 | ||
91 | /* Compatability Feature Register (%asr26), SPARC-T4 and later */ | ||
92 | #define CFR_AES _AC(0x0000000000000001,UL) /* Supports AES opcodes */ | ||
93 | #define CFR_DES _AC(0x0000000000000002,UL) /* Supports DES opcodes */ | ||
94 | #define CFR_KASUMI _AC(0x0000000000000004,UL) /* Supports KASUMI opcodes */ | ||
95 | #define CFR_CAMELLIA _AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/ | ||
96 | #define CFR_MD5 _AC(0x0000000000000010,UL) /* Supports MD5 opcodes */ | ||
97 | #define CFR_SHA1 _AC(0x0000000000000020,UL) /* Supports SHA1 opcodes */ | ||
98 | #define CFR_SHA256 _AC(0x0000000000000040,UL) /* Supports SHA256 opcodes */ | ||
99 | #define CFR_SHA512 _AC(0x0000000000000080,UL) /* Supports SHA512 opcodes */ | ||
100 | #define CFR_MPMUL _AC(0x0000000000000100,UL) /* Supports MPMUL opcodes */ | ||
101 | #define CFR_MONTMUL _AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */ | ||
102 | #define CFR_MONTSQR _AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */ | ||
103 | #define CFR_CRC32C _AC(0x0000000000000800,UL) /* Supports CRC32C opcodes */ | ||
104 | |||
91 | #endif /* !(_SPARC64_PSTATE_H) */ | 105 | #endif /* !(_SPARC64_PSTATE_H) */ |
diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h index 215900fce21b..dbc182c438b4 100644 --- a/arch/sparc/include/asm/siginfo.h +++ b/arch/sparc/include/asm/siginfo.h | |||
@@ -3,7 +3,6 @@ | |||
3 | 3 | ||
4 | #if defined(__sparc__) && defined(__arch64__) | 4 | #if defined(__sparc__) && defined(__arch64__) |
5 | 5 | ||
6 | #define SI_PAD_SIZE32 ((SI_MAX_SIZE/sizeof(int)) - 3) | ||
7 | #define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) | 6 | #define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) |
8 | #define __ARCH_SI_BAND_T int | 7 | #define __ARCH_SI_BAND_T int |
9 | 8 | ||
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index 1a8afd1ad04f..b4c258de4443 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h | |||
@@ -147,20 +147,96 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; | |||
147 | brz,pn REG1, FAIL_LABEL; \ | 147 | brz,pn REG1, FAIL_LABEL; \ |
148 | sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ | 148 | sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ |
149 | srlx REG2, 64 - PAGE_SHIFT, REG2; \ | 149 | srlx REG2, 64 - PAGE_SHIFT, REG2; \ |
150 | sllx REG1, 11, REG1; \ | 150 | sllx REG1, PGD_PADDR_SHIFT, REG1; \ |
151 | andn REG2, 0x3, REG2; \ | 151 | andn REG2, 0x3, REG2; \ |
152 | lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ | 152 | lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ |
153 | brz,pn REG1, FAIL_LABEL; \ | 153 | brz,pn REG1, FAIL_LABEL; \ |
154 | sllx VADDR, 64 - PMD_SHIFT, REG2; \ | 154 | sllx VADDR, 64 - PMD_SHIFT, REG2; \ |
155 | srlx REG2, 64 - PAGE_SHIFT, REG2; \ | 155 | srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ |
156 | sllx REG1, 11, REG1; \ | 156 | sllx REG1, PMD_PADDR_SHIFT, REG1; \ |
157 | andn REG2, 0x7, REG2; \ | 157 | andn REG2, 0x7, REG2; \ |
158 | add REG1, REG2, REG1; | 158 | add REG1, REG2, REG1; |
159 | 159 | ||
160 | /* Do a user page table walk in MMU globals. Leaves physical PTE | 160 | /* This macro exists only to make the PMD translator below easier |
161 | * pointer in REG1. Jumps to FAIL_LABEL on early page table walk | 161 | * to read. It hides the ELF section switch for the sun4v code |
162 | * termination. Physical base of page tables is in PHYS_PGD which | 162 | * patching. |
163 | * will not be modified. | 163 | */ |
164 | #define OR_PTE_BIT(REG, NAME) \ | ||
165 | 661: or REG, _PAGE_##NAME##_4U, REG; \ | ||
166 | .section .sun4v_1insn_patch, "ax"; \ | ||
167 | .word 661b; \ | ||
168 | or REG, _PAGE_##NAME##_4V, REG; \ | ||
169 | .previous; | ||
170 | |||
171 | /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */ | ||
172 | #define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \ | ||
173 | 661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \ | ||
174 | .section .sun4v_1insn_patch, "ax"; \ | ||
175 | .word 661b; \ | ||
176 | sethi %uhi(_PAGE_VALID), REG; \ | ||
177 | .previous; \ | ||
178 | sllx REG, 32, REG; \ | ||
179 | 661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \ | ||
180 | .section .sun4v_1insn_patch, "ax"; \ | ||
181 | .word 661b; \ | ||
182 | or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \ | ||
183 | .previous; | ||
184 | |||
185 | /* PMD has been loaded into REG1, interpret the value, seeing | ||
186 | * if it is a HUGE PMD or a normal one. If it is not valid | ||
187 | * then jump to FAIL_LABEL. If it is a HUGE PMD, and it | ||
188 | * translates to a valid PTE, branch to PTE_LABEL. | ||
189 | * | ||
190 | * We translate the PMD by hand, one bit at a time, | ||
191 | * constructing the huge PTE. | ||
192 | * | ||
193 | * So we construct the PTE in REG2 as follows: | ||
194 | * | ||
195 | * 1) Extract the PMD PFN from REG1 and place it into REG2. | ||
196 | * | ||
197 | * 2) Translate PMD protection bits in REG1 into REG2, one bit | ||
198 | * at a time using andcc tests on REG1 and OR's into REG2. | ||
199 | * | ||
200 | * Only two bits to be concerned with here, EXEC and WRITE. | ||
201 | * Now REG1 is freed up and we can use it as a temporary. | ||
202 | * | ||
203 | * 3) Construct the VALID, CACHE, and page size PTE bits in | ||
204 | * REG1, OR with REG2 to form final PTE. | ||
205 | */ | ||
206 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
207 | #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ | ||
208 | brz,pn REG1, FAIL_LABEL; \ | ||
209 | andcc REG1, PMD_ISHUGE, %g0; \ | ||
210 | be,pt %xcc, 700f; \ | ||
211 | and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \ | ||
212 | cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \ | ||
213 | bne,pn %xcc, FAIL_LABEL; \ | ||
214 | andn REG1, PMD_HUGE_PROTBITS, REG2; \ | ||
215 | sllx REG2, PMD_PADDR_SHIFT, REG2; \ | ||
216 | /* REG2 now holds PFN << PAGE_SHIFT */ \ | ||
217 | andcc REG1, PMD_HUGE_EXEC, %g0; \ | ||
218 | bne,a,pt %xcc, 1f; \ | ||
219 | OR_PTE_BIT(REG2, EXEC); \ | ||
220 | 1: andcc REG1, PMD_HUGE_WRITE, %g0; \ | ||
221 | bne,a,pt %xcc, 1f; \ | ||
222 | OR_PTE_BIT(REG2, W); \ | ||
223 | /* REG1 can now be clobbered, build final PTE */ \ | ||
224 | 1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \ | ||
225 | ba,pt %xcc, PTE_LABEL; \ | ||
226 | or REG1, REG2, REG1; \ | ||
227 | 700: | ||
228 | #else | ||
229 | #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ | ||
230 | brz,pn REG1, FAIL_LABEL; \ | ||
231 | nop; | ||
232 | #endif | ||
233 | |||
234 | /* Do a user page table walk in MMU globals. Leaves final, | ||
235 | * valid, PTE value in REG1. Jumps to FAIL_LABEL on early | ||
236 | * page table walk termination or if the PTE is not valid. | ||
237 | * | ||
238 | * Physical base of page tables is in PHYS_PGD which will not | ||
239 | * be modified. | ||
164 | * | 240 | * |
165 | * VADDR will not be clobbered, but REG1 and REG2 will. | 241 | * VADDR will not be clobbered, but REG1 and REG2 will. |
166 | */ | 242 | */ |
@@ -172,15 +248,19 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; | |||
172 | brz,pn REG1, FAIL_LABEL; \ | 248 | brz,pn REG1, FAIL_LABEL; \ |
173 | sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ | 249 | sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ |
174 | srlx REG2, 64 - PAGE_SHIFT, REG2; \ | 250 | srlx REG2, 64 - PAGE_SHIFT, REG2; \ |
175 | sllx REG1, 11, REG1; \ | 251 | sllx REG1, PGD_PADDR_SHIFT, REG1; \ |
176 | andn REG2, 0x3, REG2; \ | 252 | andn REG2, 0x3, REG2; \ |
177 | lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ | 253 | lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ |
178 | brz,pn REG1, FAIL_LABEL; \ | 254 | USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ |
179 | sllx VADDR, 64 - PMD_SHIFT, REG2; \ | 255 | sllx VADDR, 64 - PMD_SHIFT, REG2; \ |
180 | srlx REG2, 64 - PAGE_SHIFT, REG2; \ | 256 | srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ |
181 | sllx REG1, 11, REG1; \ | 257 | sllx REG1, PMD_PADDR_SHIFT, REG1; \ |
182 | andn REG2, 0x7, REG2; \ | 258 | andn REG2, 0x7, REG2; \ |
183 | add REG1, REG2, REG1; | 259 | add REG1, REG2, REG1; \ |
260 | ldxa [REG1] ASI_PHYS_USE_EC, REG1; \ | ||
261 | brgez,pn REG1, FAIL_LABEL; \ | ||
262 | nop; \ | ||
263 | 800: | ||
184 | 264 | ||
185 | /* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0. | 265 | /* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0. |
186 | * If no entry is found, FAIL_LABEL will be branched to. On success | 266 | * If no entry is found, FAIL_LABEL will be branched to. On success |
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index fb2693464807..d9a677c51926 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h | |||
@@ -447,6 +447,7 @@ | |||
447 | #else | 447 | #else |
448 | #define __ARCH_WANT_COMPAT_SYS_TIME | 448 | #define __ARCH_WANT_COMPAT_SYS_TIME |
449 | #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND | 449 | #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND |
450 | #define __ARCH_WANT_COMPAT_SYS_SENDFILE | ||
450 | #endif | 451 | #endif |
451 | 452 | ||
452 | /* | 453 | /* |
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild new file mode 100644 index 000000000000..7518ad286963 --- /dev/null +++ b/arch/sparc/include/uapi/asm/Kbuild | |||
@@ -0,0 +1,5 @@ | |||
1 | # UAPI Header export list | ||
2 | # User exported sparc header files | ||
3 | |||
4 | include include/uapi/asm-generic/Kbuild.asm | ||
5 | |||
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index b42ddbf9651e..2feb15c35d9e 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S | |||
@@ -559,10 +559,10 @@ niagara_tlb_fixup: | |||
559 | be,pt %xcc, niagara2_patch | 559 | be,pt %xcc, niagara2_patch |
560 | nop | 560 | nop |
561 | cmp %g1, SUN4V_CHIP_NIAGARA4 | 561 | cmp %g1, SUN4V_CHIP_NIAGARA4 |
562 | be,pt %xcc, niagara2_patch | 562 | be,pt %xcc, niagara4_patch |
563 | nop | 563 | nop |
564 | cmp %g1, SUN4V_CHIP_NIAGARA5 | 564 | cmp %g1, SUN4V_CHIP_NIAGARA5 |
565 | be,pt %xcc, niagara2_patch | 565 | be,pt %xcc, niagara4_patch |
566 | nop | 566 | nop |
567 | 567 | ||
568 | call generic_patch_copyops | 568 | call generic_patch_copyops |
@@ -573,6 +573,16 @@ niagara_tlb_fixup: | |||
573 | nop | 573 | nop |
574 | 574 | ||
575 | ba,a,pt %xcc, 80f | 575 | ba,a,pt %xcc, 80f |
576 | niagara4_patch: | ||
577 | call niagara4_patch_copyops | ||
578 | nop | ||
579 | call niagara4_patch_bzero | ||
580 | nop | ||
581 | call niagara4_patch_pageops | ||
582 | nop | ||
583 | |||
584 | ba,a,pt %xcc, 80f | ||
585 | |||
576 | niagara2_patch: | 586 | niagara2_patch: |
577 | call niagara2_patch_copyops | 587 | call niagara2_patch_copyops |
578 | nop | 588 | nop |
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 8593672838fd..c0a2de0fd624 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c | |||
@@ -45,6 +45,7 @@ static struct api_info api_table[] = { | |||
45 | { .group = HV_GRP_NIU, }, | 45 | { .group = HV_GRP_NIU, }, |
46 | { .group = HV_GRP_VF_CPU, }, | 46 | { .group = HV_GRP_VF_CPU, }, |
47 | { .group = HV_GRP_KT_CPU, }, | 47 | { .group = HV_GRP_KT_CPU, }, |
48 | { .group = HV_GRP_VT_CPU, }, | ||
48 | { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, | 49 | { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, |
49 | }; | 50 | }; |
50 | 51 | ||
@@ -193,7 +194,7 @@ void __init sun4v_hvapi_init(void) | |||
193 | 194 | ||
194 | bad: | 195 | bad: |
195 | prom_printf("HVAPI: Cannot register API group " | 196 | prom_printf("HVAPI: Cannot register API group " |
196 | "%lx with major(%u) minor(%u)\n", | 197 | "%lx with major(%lu) minor(%lu)\n", |
197 | group, major, minor); | 198 | group, major, minor); |
198 | prom_halt(); | 199 | prom_halt(); |
199 | } | 200 | } |
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index 58d60de4d65b..f3ab509b76a8 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S | |||
@@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set) | |||
805 | retl | 805 | retl |
806 | nop | 806 | nop |
807 | ENDPROC(sun4v_reboot_data_set) | 807 | ENDPROC(sun4v_reboot_data_set) |
808 | |||
809 | ENTRY(sun4v_vt_get_perfreg) | ||
810 | mov %o1, %o4 | ||
811 | mov HV_FAST_VT_GET_PERFREG, %o5 | ||
812 | ta HV_FAST_TRAP | ||
813 | stx %o1, [%o4] | ||
814 | retl | ||
815 | nop | ||
816 | ENDPROC(sun4v_vt_get_perfreg) | ||
817 | |||
818 | ENTRY(sun4v_vt_set_perfreg) | ||
819 | mov HV_FAST_VT_SET_PERFREG, %o5 | ||
820 | ta HV_FAST_TRAP | ||
821 | retl | ||
822 | nop | ||
823 | ENDPROC(sun4v_vt_set_perfreg) | ||
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 79f310364849..0746e5e32b37 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S | |||
@@ -188,31 +188,26 @@ valid_addr_bitmap_patch: | |||
188 | be,pn %xcc, kvmap_dtlb_longpath | 188 | be,pn %xcc, kvmap_dtlb_longpath |
189 | 189 | ||
190 | 2: sethi %hi(kpte_linear_bitmap), %g2 | 190 | 2: sethi %hi(kpte_linear_bitmap), %g2 |
191 | or %g2, %lo(kpte_linear_bitmap), %g2 | ||
192 | 191 | ||
193 | /* Get the 256MB physical address index. */ | 192 | /* Get the 256MB physical address index. */ |
194 | sllx %g4, 21, %g5 | 193 | sllx %g4, 21, %g5 |
195 | mov 1, %g7 | 194 | or %g2, %lo(kpte_linear_bitmap), %g2 |
196 | srlx %g5, 21 + 28, %g5 | 195 | srlx %g5, 21 + 28, %g5 |
196 | and %g5, (32 - 1), %g7 | ||
197 | 197 | ||
198 | /* Don't try this at home kids... this depends upon srlx | 198 | /* Divide by 32 to get the offset into the bitmask. */ |
199 | * only taking the low 6 bits of the shift count in %g5. | 199 | srlx %g5, 5, %g5 |
200 | */ | 200 | add %g7, %g7, %g7 |
201 | sllx %g7, %g5, %g7 | ||
202 | |||
203 | /* Divide by 64 to get the offset into the bitmask. */ | ||
204 | srlx %g5, 6, %g5 | ||
205 | sllx %g5, 3, %g5 | 201 | sllx %g5, 3, %g5 |
206 | 202 | ||
207 | /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ | 203 | /* kern_linear_pte_xor[(mask >> shift) & 3)] */ |
208 | ldx [%g2 + %g5], %g2 | 204 | ldx [%g2 + %g5], %g2 |
209 | andcc %g2, %g7, %g0 | 205 | srlx %g2, %g7, %g7 |
210 | sethi %hi(kern_linear_pte_xor), %g5 | 206 | sethi %hi(kern_linear_pte_xor), %g5 |
207 | and %g7, 3, %g7 | ||
211 | or %g5, %lo(kern_linear_pte_xor), %g5 | 208 | or %g5, %lo(kern_linear_pte_xor), %g5 |
212 | bne,a,pt %xcc, 1f | 209 | sllx %g7, 3, %g7 |
213 | add %g5, 8, %g5 | 210 | ldx [%g5 + %g7], %g2 |
214 | |||
215 | 1: ldx [%g5], %g2 | ||
216 | 211 | ||
217 | .globl kvmap_linear_patch | 212 | .globl kvmap_linear_patch |
218 | kvmap_linear_patch: | 213 | kvmap_linear_patch: |
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 21dcda75a520..fc0521161568 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c | |||
@@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) | |||
102 | return pci_enable_resources(dev, mask); | 102 | return pci_enable_resources(dev, mask); |
103 | } | 103 | } |
104 | 104 | ||
105 | void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) | ||
106 | { | ||
107 | #ifdef CONFIG_PCI_DEBUG | ||
108 | printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq, | ||
109 | pci_name(dev)); | ||
110 | #endif | ||
111 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); | ||
112 | } | ||
113 | |||
114 | /* in/out routines taken from pcic.c | 105 | /* in/out routines taken from pcic.c |
115 | * | 106 | * |
116 | * This probably belongs here rather than ioport.c because | 107 | * This probably belongs here rather than ioport.c because |
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 6dc796280589..831c001604e8 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c | |||
@@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask) | |||
817 | mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); | 817 | mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); |
818 | } | 818 | } |
819 | 819 | ||
820 | static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) | ||
821 | { | ||
822 | const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL); | ||
823 | unsigned long *pgsz_mask = arg; | ||
824 | u64 val; | ||
825 | |||
826 | val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | | ||
827 | HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); | ||
828 | if (pgsz_prop) | ||
829 | val = *pgsz_prop; | ||
830 | |||
831 | if (!*pgsz_mask) | ||
832 | *pgsz_mask = val; | ||
833 | else | ||
834 | *pgsz_mask &= val; | ||
835 | return NULL; | ||
836 | } | ||
837 | |||
838 | void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask) | ||
839 | { | ||
840 | *pgsz_mask = 0; | ||
841 | mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask); | ||
842 | } | ||
843 | |||
820 | static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) | 844 | static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) |
821 | { | 845 | { |
822 | const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); | 846 | const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); |
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 15e0a1693976..f1ddc0d23679 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c | |||
@@ -48,9 +48,7 @@ void *module_alloc(unsigned long size) | |||
48 | return NULL; | 48 | return NULL; |
49 | 49 | ||
50 | ret = module_map(size); | 50 | ret = module_map(size); |
51 | if (!ret) | 51 | if (ret) |
52 | ret = ERR_PTR(-ENOMEM); | ||
53 | else | ||
54 | memset(ret, 0, size); | 52 | memset(ret, 0, size); |
55 | 53 | ||
56 | return ret; | 54 | return ret; |
@@ -116,6 +114,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, | |||
116 | v = sym->st_value + rel[i].r_addend; | 114 | v = sym->st_value + rel[i].r_addend; |
117 | 115 | ||
118 | switch (ELF_R_TYPE(rel[i].r_info) & 0xff) { | 116 | switch (ELF_R_TYPE(rel[i].r_info) & 0xff) { |
117 | case R_SPARC_DISP32: | ||
118 | v -= (Elf_Addr) location; | ||
119 | *loc32 = v; | ||
120 | break; | ||
119 | #ifdef CONFIG_SPARC64 | 121 | #ifdef CONFIG_SPARC64 |
120 | case R_SPARC_64: | 122 | case R_SPARC_64: |
121 | location[0] = v >> 56; | 123 | location[0] = v >> 56; |
@@ -128,11 +130,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, | |||
128 | location[7] = v >> 0; | 130 | location[7] = v >> 0; |
129 | break; | 131 | break; |
130 | 132 | ||
131 | case R_SPARC_DISP32: | ||
132 | v -= (Elf_Addr) location; | ||
133 | *loc32 = v; | ||
134 | break; | ||
135 | |||
136 | case R_SPARC_WDISP19: | 133 | case R_SPARC_WDISP19: |
137 | v -= (Elf_Addr) location; | 134 | v -= (Elf_Addr) location; |
138 | *loc32 = (*loc32 & ~0x7ffff) | | 135 | *loc32 = (*loc32 & ~0x7ffff) | |
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index eb1c1f010a47..6479256fd5a4 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <asm/perf_event.h> | 22 | #include <asm/perf_event.h> |
23 | #include <asm/ptrace.h> | 23 | #include <asm/ptrace.h> |
24 | #include <asm/pcr.h> | 24 | #include <asm/pcr.h> |
25 | #include <asm/perfctr.h> | ||
26 | 25 | ||
27 | #include "kstack.h" | 26 | #include "kstack.h" |
28 | 27 | ||
@@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) | |||
109 | pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) | 108 | pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) |
110 | touched = 1; | 109 | touched = 1; |
111 | else | 110 | else |
112 | pcr_ops->write(PCR_PIC_PRIV); | 111 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
113 | 112 | ||
114 | sum = local_cpu_data().irq0_irqs; | 113 | sum = local_cpu_data().irq0_irqs; |
115 | if (__get_cpu_var(nmi_touch)) { | 114 | if (__get_cpu_var(nmi_touch)) { |
@@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) | |||
126 | __this_cpu_write(alert_counter, 0); | 125 | __this_cpu_write(alert_counter, 0); |
127 | } | 126 | } |
128 | if (__get_cpu_var(wd_enabled)) { | 127 | if (__get_cpu_var(wd_enabled)) { |
129 | write_pic(picl_value(nmi_hz)); | 128 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
130 | pcr_ops->write(pcr_enable); | 129 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
131 | } | 130 | } |
132 | 131 | ||
133 | restore_hardirq_stack(orig_sp); | 132 | restore_hardirq_stack(orig_sp); |
@@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) | |||
166 | 165 | ||
167 | void stop_nmi_watchdog(void *unused) | 166 | void stop_nmi_watchdog(void *unused) |
168 | { | 167 | { |
169 | pcr_ops->write(PCR_PIC_PRIV); | 168 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
170 | __get_cpu_var(wd_enabled) = 0; | 169 | __get_cpu_var(wd_enabled) = 0; |
171 | atomic_dec(&nmi_active); | 170 | atomic_dec(&nmi_active); |
172 | } | 171 | } |
@@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused) | |||
223 | __get_cpu_var(wd_enabled) = 1; | 222 | __get_cpu_var(wd_enabled) = 1; |
224 | atomic_inc(&nmi_active); | 223 | atomic_inc(&nmi_active); |
225 | 224 | ||
226 | pcr_ops->write(PCR_PIC_PRIV); | 225 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
227 | write_pic(picl_value(nmi_hz)); | 226 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
228 | 227 | ||
229 | pcr_ops->write(pcr_enable); | 228 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
230 | } | 229 | } |
231 | 230 | ||
232 | static void nmi_adjust_hz_one(void *unused) | 231 | static void nmi_adjust_hz_one(void *unused) |
@@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused) | |||
234 | if (!__get_cpu_var(wd_enabled)) | 233 | if (!__get_cpu_var(wd_enabled)) |
235 | return; | 234 | return; |
236 | 235 | ||
237 | pcr_ops->write(PCR_PIC_PRIV); | 236 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
238 | write_pic(picl_value(nmi_hz)); | 237 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
239 | 238 | ||
240 | pcr_ops->write(pcr_enable); | 239 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
241 | } | 240 | } |
242 | 241 | ||
243 | void nmi_adjust_hz(unsigned int new_hz) | 242 | void nmi_adjust_hz(unsigned int new_hz) |
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 065b88c4f868..75b31bcdeadf 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c | |||
@@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus) | |||
622 | { | 622 | { |
623 | } | 623 | } |
624 | 624 | ||
625 | void pcibios_update_irq(struct pci_dev *pdev, int irq) | ||
626 | { | ||
627 | } | ||
628 | |||
629 | resource_size_t pcibios_align_resource(void *data, const struct resource *res, | 625 | resource_size_t pcibios_align_resource(void *data, const struct resource *res, |
630 | resource_size_t size, resource_size_t align) | 626 | resource_size_t size, resource_size_t align) |
631 | { | 627 | { |
@@ -783,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev, | |||
783 | static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma, | 779 | static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma, |
784 | enum pci_mmap_state mmap_state) | 780 | enum pci_mmap_state mmap_state) |
785 | { | 781 | { |
786 | vma->vm_flags |= (VM_IO | VM_RESERVED); | 782 | vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; |
787 | } | 783 | } |
788 | 784 | ||
789 | /* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci | 785 | /* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci |
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 7661e84a05a0..051b69caeffd 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c | |||
@@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm) | |||
594 | printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", | 594 | printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", |
595 | vdma[0], vdma[1]); | 595 | vdma[0], vdma[1]); |
596 | return -EINVAL; | 596 | return -EINVAL; |
597 | }; | 597 | } |
598 | 598 | ||
599 | dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); | 599 | dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); |
600 | num_tsb_entries = vdma[1] / IO_PAGE_SIZE; | 600 | num_tsb_entries = vdma[1] / IO_PAGE_SIZE; |
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 0ce0dd2332aa..269af58497aa 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c | |||
@@ -13,23 +13,14 @@ | |||
13 | #include <asm/pil.h> | 13 | #include <asm/pil.h> |
14 | #include <asm/pcr.h> | 14 | #include <asm/pcr.h> |
15 | #include <asm/nmi.h> | 15 | #include <asm/nmi.h> |
16 | #include <asm/asi.h> | ||
16 | #include <asm/spitfire.h> | 17 | #include <asm/spitfire.h> |
17 | #include <asm/perfctr.h> | ||
18 | 18 | ||
19 | /* This code is shared between various users of the performance | 19 | /* This code is shared between various users of the performance |
20 | * counters. Users will be oprofile, pseudo-NMI watchdog, and the | 20 | * counters. Users will be oprofile, pseudo-NMI watchdog, and the |
21 | * perf_event support layer. | 21 | * perf_event support layer. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) | ||
25 | #define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ | ||
26 | PCR_N2_TOE_OV1 | \ | ||
27 | (2 << PCR_N2_SL1_SHIFT) | \ | ||
28 | (0xff << PCR_N2_MASK1_SHIFT)) | ||
29 | |||
30 | u64 pcr_enable; | ||
31 | unsigned int picl_shift; | ||
32 | |||
33 | /* Performance counter interrupts run unmasked at PIL level 15. | 24 | /* Performance counter interrupts run unmasked at PIL level 15. |
34 | * Therefore we can't do things like wakeups and other work | 25 | * Therefore we can't do things like wakeups and other work |
35 | * that expects IRQ disabling to be adhered to in locking etc. | 26 | * that expects IRQ disabling to be adhered to in locking etc. |
@@ -60,39 +51,144 @@ void arch_irq_work_raise(void) | |||
60 | const struct pcr_ops *pcr_ops; | 51 | const struct pcr_ops *pcr_ops; |
61 | EXPORT_SYMBOL_GPL(pcr_ops); | 52 | EXPORT_SYMBOL_GPL(pcr_ops); |
62 | 53 | ||
63 | static u64 direct_pcr_read(void) | 54 | static u64 direct_pcr_read(unsigned long reg_num) |
64 | { | 55 | { |
65 | u64 val; | 56 | u64 val; |
66 | 57 | ||
67 | read_pcr(val); | 58 | WARN_ON_ONCE(reg_num != 0); |
59 | __asm__ __volatile__("rd %%pcr, %0" : "=r" (val)); | ||
68 | return val; | 60 | return val; |
69 | } | 61 | } |
70 | 62 | ||
71 | static void direct_pcr_write(u64 val) | 63 | static void direct_pcr_write(unsigned long reg_num, u64 val) |
64 | { | ||
65 | WARN_ON_ONCE(reg_num != 0); | ||
66 | __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val)); | ||
67 | } | ||
68 | |||
69 | static u64 direct_pic_read(unsigned long reg_num) | ||
72 | { | 70 | { |
73 | write_pcr(val); | 71 | u64 val; |
72 | |||
73 | WARN_ON_ONCE(reg_num != 0); | ||
74 | __asm__ __volatile__("rd %%pic, %0" : "=r" (val)); | ||
75 | return val; | ||
76 | } | ||
77 | |||
78 | static void direct_pic_write(unsigned long reg_num, u64 val) | ||
79 | { | ||
80 | WARN_ON_ONCE(reg_num != 0); | ||
81 | |||
82 | /* Blackbird errata workaround. See commentary in | ||
83 | * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() | ||
84 | * for more information. | ||
85 | */ | ||
86 | __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" | ||
87 | " nop\n\t" | ||
88 | ".align 64\n" | ||
89 | "99:wr %0, 0x0, %%pic\n\t" | ||
90 | "rd %%pic, %%g0" : : "r" (val)); | ||
91 | } | ||
92 | |||
93 | static u64 direct_picl_value(unsigned int nmi_hz) | ||
94 | { | ||
95 | u32 delta = local_cpu_data().clock_tick / nmi_hz; | ||
96 | |||
97 | return ((u64)((0 - delta) & 0xffffffff)) << 32; | ||
74 | } | 98 | } |
75 | 99 | ||
76 | static const struct pcr_ops direct_pcr_ops = { | 100 | static const struct pcr_ops direct_pcr_ops = { |
77 | .read = direct_pcr_read, | 101 | .read_pcr = direct_pcr_read, |
78 | .write = direct_pcr_write, | 102 | .write_pcr = direct_pcr_write, |
103 | .read_pic = direct_pic_read, | ||
104 | .write_pic = direct_pic_write, | ||
105 | .nmi_picl_value = direct_picl_value, | ||
106 | .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE), | ||
107 | .pcr_nmi_disable = PCR_PIC_PRIV, | ||
79 | }; | 108 | }; |
80 | 109 | ||
81 | static void n2_pcr_write(u64 val) | 110 | static void n2_pcr_write(unsigned long reg_num, u64 val) |
82 | { | 111 | { |
83 | unsigned long ret; | 112 | unsigned long ret; |
84 | 113 | ||
114 | WARN_ON_ONCE(reg_num != 0); | ||
85 | if (val & PCR_N2_HTRACE) { | 115 | if (val & PCR_N2_HTRACE) { |
86 | ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); | 116 | ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); |
87 | if (ret != HV_EOK) | 117 | if (ret != HV_EOK) |
88 | write_pcr(val); | 118 | direct_pcr_write(reg_num, val); |
89 | } else | 119 | } else |
90 | write_pcr(val); | 120 | direct_pcr_write(reg_num, val); |
121 | } | ||
122 | |||
123 | static u64 n2_picl_value(unsigned int nmi_hz) | ||
124 | { | ||
125 | u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); | ||
126 | |||
127 | return ((u64)((0 - delta) & 0xffffffff)) << 32; | ||
91 | } | 128 | } |
92 | 129 | ||
93 | static const struct pcr_ops n2_pcr_ops = { | 130 | static const struct pcr_ops n2_pcr_ops = { |
94 | .read = direct_pcr_read, | 131 | .read_pcr = direct_pcr_read, |
95 | .write = n2_pcr_write, | 132 | .write_pcr = n2_pcr_write, |
133 | .read_pic = direct_pic_read, | ||
134 | .write_pic = direct_pic_write, | ||
135 | .nmi_picl_value = n2_picl_value, | ||
136 | .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | | ||
137 | PCR_N2_TOE_OV1 | | ||
138 | (2 << PCR_N2_SL1_SHIFT) | | ||
139 | (0xff << PCR_N2_MASK1_SHIFT)), | ||
140 | .pcr_nmi_disable = PCR_PIC_PRIV, | ||
141 | }; | ||
142 | |||
143 | static u64 n4_pcr_read(unsigned long reg_num) | ||
144 | { | ||
145 | unsigned long val; | ||
146 | |||
147 | (void) sun4v_vt_get_perfreg(reg_num, &val); | ||
148 | |||
149 | return val; | ||
150 | } | ||
151 | |||
152 | static void n4_pcr_write(unsigned long reg_num, u64 val) | ||
153 | { | ||
154 | (void) sun4v_vt_set_perfreg(reg_num, val); | ||
155 | } | ||
156 | |||
157 | static u64 n4_pic_read(unsigned long reg_num) | ||
158 | { | ||
159 | unsigned long val; | ||
160 | |||
161 | __asm__ __volatile__("ldxa [%1] %2, %0" | ||
162 | : "=r" (val) | ||
163 | : "r" (reg_num * 0x8UL), "i" (ASI_PIC)); | ||
164 | |||
165 | return val; | ||
166 | } | ||
167 | |||
168 | static void n4_pic_write(unsigned long reg_num, u64 val) | ||
169 | { | ||
170 | __asm__ __volatile__("stxa %0, [%1] %2" | ||
171 | : /* no outputs */ | ||
172 | : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC)); | ||
173 | } | ||
174 | |||
175 | static u64 n4_picl_value(unsigned int nmi_hz) | ||
176 | { | ||
177 | u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); | ||
178 | |||
179 | return ((u64)((0 - delta) & 0xffffffff)); | ||
180 | } | ||
181 | |||
182 | static const struct pcr_ops n4_pcr_ops = { | ||
183 | .read_pcr = n4_pcr_read, | ||
184 | .write_pcr = n4_pcr_write, | ||
185 | .read_pic = n4_pic_read, | ||
186 | .write_pic = n4_pic_write, | ||
187 | .nmi_picl_value = n4_picl_value, | ||
188 | .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | | ||
189 | PCR_N4_UTRACE | PCR_N4_TOE | | ||
190 | (26 << PCR_N4_SL_SHIFT)), | ||
191 | .pcr_nmi_disable = PCR_N4_PICNPT, | ||
96 | }; | 192 | }; |
97 | 193 | ||
98 | static unsigned long perf_hsvc_group; | 194 | static unsigned long perf_hsvc_group; |
@@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void) | |||
115 | perf_hsvc_group = HV_GRP_KT_CPU; | 211 | perf_hsvc_group = HV_GRP_KT_CPU; |
116 | break; | 212 | break; |
117 | 213 | ||
214 | case SUN4V_CHIP_NIAGARA4: | ||
215 | perf_hsvc_group = HV_GRP_VT_CPU; | ||
216 | break; | ||
217 | |||
118 | default: | 218 | default: |
119 | return -ENODEV; | 219 | return -ENODEV; |
120 | } | 220 | } |
@@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void) | |||
139 | sun4v_hvapi_unregister(perf_hsvc_group); | 239 | sun4v_hvapi_unregister(perf_hsvc_group); |
140 | } | 240 | } |
141 | 241 | ||
242 | static int __init setup_sun4v_pcr_ops(void) | ||
243 | { | ||
244 | int ret = 0; | ||
245 | |||
246 | switch (sun4v_chip_type) { | ||
247 | case SUN4V_CHIP_NIAGARA1: | ||
248 | case SUN4V_CHIP_NIAGARA2: | ||
249 | case SUN4V_CHIP_NIAGARA3: | ||
250 | pcr_ops = &n2_pcr_ops; | ||
251 | break; | ||
252 | |||
253 | case SUN4V_CHIP_NIAGARA4: | ||
254 | pcr_ops = &n4_pcr_ops; | ||
255 | break; | ||
256 | |||
257 | default: | ||
258 | ret = -ENODEV; | ||
259 | break; | ||
260 | } | ||
261 | |||
262 | return ret; | ||
263 | } | ||
264 | |||
142 | int __init pcr_arch_init(void) | 265 | int __init pcr_arch_init(void) |
143 | { | 266 | { |
144 | int err = register_perf_hsvc(); | 267 | int err = register_perf_hsvc(); |
@@ -148,15 +271,14 @@ int __init pcr_arch_init(void) | |||
148 | 271 | ||
149 | switch (tlb_type) { | 272 | switch (tlb_type) { |
150 | case hypervisor: | 273 | case hypervisor: |
151 | pcr_ops = &n2_pcr_ops; | 274 | err = setup_sun4v_pcr_ops(); |
152 | pcr_enable = PCR_N2_ENABLE; | 275 | if (err) |
153 | picl_shift = 2; | 276 | goto out_unregister; |
154 | break; | 277 | break; |
155 | 278 | ||
156 | case cheetah: | 279 | case cheetah: |
157 | case cheetah_plus: | 280 | case cheetah_plus: |
158 | pcr_ops = &direct_pcr_ops; | 281 | pcr_ops = &direct_pcr_ops; |
159 | pcr_enable = PCR_SUN4U_ENABLE; | ||
160 | break; | 282 | break; |
161 | 283 | ||
162 | case spitfire: | 284 | case spitfire: |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5713957dcb8a..e48651dace1b 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -25,36 +25,48 @@ | |||
25 | #include <linux/atomic.h> | 25 | #include <linux/atomic.h> |
26 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
27 | #include <asm/pcr.h> | 27 | #include <asm/pcr.h> |
28 | #include <asm/perfctr.h> | ||
29 | #include <asm/cacheflush.h> | 28 | #include <asm/cacheflush.h> |
30 | 29 | ||
31 | #include "kernel.h" | 30 | #include "kernel.h" |
32 | #include "kstack.h" | 31 | #include "kstack.h" |
33 | 32 | ||
34 | /* Sparc64 chips have two performance counters, 32-bits each, with | 33 | /* Two classes of sparc64 chips currently exist. All of which have |
35 | * overflow interrupts generated on transition from 0xffffffff to 0. | 34 | * 32-bit counters which can generate overflow interrupts on the |
36 | * The counters are accessed in one go using a 64-bit register. | 35 | * transition from 0xffffffff to 0. |
37 | * | 36 | * |
38 | * Both counters are controlled using a single control register. The | 37 | * All chips upto and including SPARC-T3 have two performance |
39 | * only way to stop all sampling is to clear all of the context (user, | 38 | * counters. The two 32-bit counters are accessed in one go using a |
40 | * supervisor, hypervisor) sampling enable bits. But these bits apply | 39 | * single 64-bit register. |
41 | * to both counters, thus the two counters can't be enabled/disabled | ||
42 | * individually. | ||
43 | * | 40 | * |
44 | * The control register has two event fields, one for each of the two | 41 | * On these older chips both counters are controlled using a single |
45 | * counters. It's thus nearly impossible to have one counter going | 42 | * control register. The only way to stop all sampling is to clear |
46 | * while keeping the other one stopped. Therefore it is possible to | 43 | * all of the context (user, supervisor, hypervisor) sampling enable |
47 | * get overflow interrupts for counters not currently "in use" and | 44 | * bits. But these bits apply to both counters, thus the two counters |
48 | * that condition must be checked in the overflow interrupt handler. | 45 | * can't be enabled/disabled individually. |
46 | * | ||
47 | * Furthermore, the control register on these older chips have two | ||
48 | * event fields, one for each of the two counters. It's thus nearly | ||
49 | * impossible to have one counter going while keeping the other one | ||
50 | * stopped. Therefore it is possible to get overflow interrupts for | ||
51 | * counters not currently "in use" and that condition must be checked | ||
52 | * in the overflow interrupt handler. | ||
49 | * | 53 | * |
50 | * So we use a hack, in that we program inactive counters with the | 54 | * So we use a hack, in that we program inactive counters with the |
51 | * "sw_count0" and "sw_count1" events. These count how many times | 55 | * "sw_count0" and "sw_count1" events. These count how many times |
52 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an | 56 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an |
53 | * unusual way to encode a NOP and therefore will not trigger in | 57 | * unusual way to encode a NOP and therefore will not trigger in |
54 | * normal code. | 58 | * normal code. |
59 | * | ||
60 | * Starting with SPARC-T4 we have one control register per counter. | ||
61 | * And the counters are stored in individual registers. The registers | ||
62 | * for the counters are 64-bit but only a 32-bit counter is | ||
63 | * implemented. The event selections on SPARC-T4 lack any | ||
64 | * restrictions, therefore we can elide all of the complicated | ||
65 | * conflict resolution code we have for SPARC-T3 and earlier chips. | ||
55 | */ | 66 | */ |
56 | 67 | ||
57 | #define MAX_HWEVENTS 2 | 68 | #define MAX_HWEVENTS 4 |
69 | #define MAX_PCRS 4 | ||
58 | #define MAX_PERIOD ((1UL << 32) - 1) | 70 | #define MAX_PERIOD ((1UL << 32) - 1) |
59 | 71 | ||
60 | #define PIC_UPPER_INDEX 0 | 72 | #define PIC_UPPER_INDEX 0 |
@@ -90,8 +102,8 @@ struct cpu_hw_events { | |||
90 | */ | 102 | */ |
91 | int current_idx[MAX_HWEVENTS]; | 103 | int current_idx[MAX_HWEVENTS]; |
92 | 104 | ||
93 | /* Software copy of %pcr register on this cpu. */ | 105 | /* Software copy of %pcr register(s) on this cpu. */ |
94 | u64 pcr; | 106 | u64 pcr[MAX_HWEVENTS]; |
95 | 107 | ||
96 | /* Enabled/disable state. */ | 108 | /* Enabled/disable state. */ |
97 | int enabled; | 109 | int enabled; |
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | |||
103 | /* An event map describes the characteristics of a performance | 115 | /* An event map describes the characteristics of a performance |
104 | * counter event. In particular it gives the encoding as well as | 116 | * counter event. In particular it gives the encoding as well as |
105 | * a mask telling which counters the event can be measured on. | 117 | * a mask telling which counters the event can be measured on. |
118 | * | ||
119 | * The mask is unused on SPARC-T4 and later. | ||
106 | */ | 120 | */ |
107 | struct perf_event_map { | 121 | struct perf_event_map { |
108 | u16 encoding; | 122 | u16 encoding; |
@@ -142,15 +156,53 @@ struct sparc_pmu { | |||
142 | const struct perf_event_map *(*event_map)(int); | 156 | const struct perf_event_map *(*event_map)(int); |
143 | const cache_map_t *cache_map; | 157 | const cache_map_t *cache_map; |
144 | int max_events; | 158 | int max_events; |
159 | u32 (*read_pmc)(int); | ||
160 | void (*write_pmc)(int, u64); | ||
145 | int upper_shift; | 161 | int upper_shift; |
146 | int lower_shift; | 162 | int lower_shift; |
147 | int event_mask; | 163 | int event_mask; |
164 | int user_bit; | ||
165 | int priv_bit; | ||
148 | int hv_bit; | 166 | int hv_bit; |
149 | int irq_bit; | 167 | int irq_bit; |
150 | int upper_nop; | 168 | int upper_nop; |
151 | int lower_nop; | 169 | int lower_nop; |
170 | unsigned int flags; | ||
171 | #define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 | ||
172 | #define SPARC_PMU_HAS_CONFLICTS 0x00000002 | ||
173 | int max_hw_events; | ||
174 | int num_pcrs; | ||
175 | int num_pic_regs; | ||
152 | }; | 176 | }; |
153 | 177 | ||
178 | static u32 sparc_default_read_pmc(int idx) | ||
179 | { | ||
180 | u64 val; | ||
181 | |||
182 | val = pcr_ops->read_pic(0); | ||
183 | if (idx == PIC_UPPER_INDEX) | ||
184 | val >>= 32; | ||
185 | |||
186 | return val & 0xffffffff; | ||
187 | } | ||
188 | |||
189 | static void sparc_default_write_pmc(int idx, u64 val) | ||
190 | { | ||
191 | u64 shift, mask, pic; | ||
192 | |||
193 | shift = 0; | ||
194 | if (idx == PIC_UPPER_INDEX) | ||
195 | shift = 32; | ||
196 | |||
197 | mask = ((u64) 0xffffffff) << shift; | ||
198 | val <<= shift; | ||
199 | |||
200 | pic = pcr_ops->read_pic(0); | ||
201 | pic &= ~mask; | ||
202 | pic |= val; | ||
203 | pcr_ops->write_pic(0, pic); | ||
204 | } | ||
205 | |||
154 | static const struct perf_event_map ultra3_perfmon_event_map[] = { | 206 | static const struct perf_event_map ultra3_perfmon_event_map[] = { |
155 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, | 207 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, |
156 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, | 208 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, |
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = { | |||
268 | .event_map = ultra3_event_map, | 320 | .event_map = ultra3_event_map, |
269 | .cache_map = &ultra3_cache_map, | 321 | .cache_map = &ultra3_cache_map, |
270 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), | 322 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), |
323 | .read_pmc = sparc_default_read_pmc, | ||
324 | .write_pmc = sparc_default_write_pmc, | ||
271 | .upper_shift = 11, | 325 | .upper_shift = 11, |
272 | .lower_shift = 4, | 326 | .lower_shift = 4, |
273 | .event_mask = 0x3f, | 327 | .event_mask = 0x3f, |
328 | .user_bit = PCR_UTRACE, | ||
329 | .priv_bit = PCR_STRACE, | ||
274 | .upper_nop = 0x1c, | 330 | .upper_nop = 0x1c, |
275 | .lower_nop = 0x14, | 331 | .lower_nop = 0x14, |
332 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
333 | SPARC_PMU_HAS_CONFLICTS), | ||
334 | .max_hw_events = 2, | ||
335 | .num_pcrs = 1, | ||
336 | .num_pic_regs = 1, | ||
276 | }; | 337 | }; |
277 | 338 | ||
278 | /* Niagara1 is very limited. The upper PIC is hard-locked to count | 339 | /* Niagara1 is very limited. The upper PIC is hard-locked to count |
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = { | |||
397 | .event_map = niagara1_event_map, | 458 | .event_map = niagara1_event_map, |
398 | .cache_map = &niagara1_cache_map, | 459 | .cache_map = &niagara1_cache_map, |
399 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), | 460 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), |
461 | .read_pmc = sparc_default_read_pmc, | ||
462 | .write_pmc = sparc_default_write_pmc, | ||
400 | .upper_shift = 0, | 463 | .upper_shift = 0, |
401 | .lower_shift = 4, | 464 | .lower_shift = 4, |
402 | .event_mask = 0x7, | 465 | .event_mask = 0x7, |
466 | .user_bit = PCR_UTRACE, | ||
467 | .priv_bit = PCR_STRACE, | ||
403 | .upper_nop = 0x0, | 468 | .upper_nop = 0x0, |
404 | .lower_nop = 0x0, | 469 | .lower_nop = 0x0, |
470 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
471 | SPARC_PMU_HAS_CONFLICTS), | ||
472 | .max_hw_events = 2, | ||
473 | .num_pcrs = 1, | ||
474 | .num_pic_regs = 1, | ||
405 | }; | 475 | }; |
406 | 476 | ||
407 | static const struct perf_event_map niagara2_perfmon_event_map[] = { | 477 | static const struct perf_event_map niagara2_perfmon_event_map[] = { |
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = { | |||
523 | .event_map = niagara2_event_map, | 593 | .event_map = niagara2_event_map, |
524 | .cache_map = &niagara2_cache_map, | 594 | .cache_map = &niagara2_cache_map, |
525 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), | 595 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), |
596 | .read_pmc = sparc_default_read_pmc, | ||
597 | .write_pmc = sparc_default_write_pmc, | ||
526 | .upper_shift = 19, | 598 | .upper_shift = 19, |
527 | .lower_shift = 6, | 599 | .lower_shift = 6, |
528 | .event_mask = 0xfff, | 600 | .event_mask = 0xfff, |
529 | .hv_bit = 0x8, | 601 | .user_bit = PCR_UTRACE, |
602 | .priv_bit = PCR_STRACE, | ||
603 | .hv_bit = PCR_N2_HTRACE, | ||
530 | .irq_bit = 0x30, | 604 | .irq_bit = 0x30, |
531 | .upper_nop = 0x220, | 605 | .upper_nop = 0x220, |
532 | .lower_nop = 0x220, | 606 | .lower_nop = 0x220, |
607 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
608 | SPARC_PMU_HAS_CONFLICTS), | ||
609 | .max_hw_events = 2, | ||
610 | .num_pcrs = 1, | ||
611 | .num_pic_regs = 1, | ||
612 | }; | ||
613 | |||
614 | static const struct perf_event_map niagara4_perfmon_event_map[] = { | ||
615 | [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) }, | ||
616 | [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f }, | ||
617 | [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 }, | ||
618 | [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 }, | ||
619 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 }, | ||
620 | [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f }, | ||
621 | }; | ||
622 | |||
623 | static const struct perf_event_map *niagara4_event_map(int event_id) | ||
624 | { | ||
625 | return &niagara4_perfmon_event_map[event_id]; | ||
626 | } | ||
627 | |||
628 | static const cache_map_t niagara4_cache_map = { | ||
629 | [C(L1D)] = { | ||
630 | [C(OP_READ)] = { | ||
631 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, | ||
632 | [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, | ||
633 | }, | ||
634 | [C(OP_WRITE)] = { | ||
635 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, | ||
636 | [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, | ||
637 | }, | ||
638 | [C(OP_PREFETCH)] = { | ||
639 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
640 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
641 | }, | ||
642 | }, | ||
643 | [C(L1I)] = { | ||
644 | [C(OP_READ)] = { | ||
645 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f }, | ||
646 | [C(RESULT_MISS)] = { (11 << 6) | 0x03 }, | ||
647 | }, | ||
648 | [ C(OP_WRITE) ] = { | ||
649 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, | ||
650 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, | ||
651 | }, | ||
652 | [ C(OP_PREFETCH) ] = { | ||
653 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
654 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
655 | }, | ||
656 | }, | ||
657 | [C(LL)] = { | ||
658 | [C(OP_READ)] = { | ||
659 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, | ||
660 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
661 | }, | ||
662 | [C(OP_WRITE)] = { | ||
663 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, | ||
664 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
665 | }, | ||
666 | [C(OP_PREFETCH)] = { | ||
667 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
668 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
669 | }, | ||
670 | }, | ||
671 | [C(DTLB)] = { | ||
672 | [C(OP_READ)] = { | ||
673 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
674 | [C(RESULT_MISS)] = { (17 << 6) | 0x3f }, | ||
675 | }, | ||
676 | [ C(OP_WRITE) ] = { | ||
677 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
678 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
679 | }, | ||
680 | [ C(OP_PREFETCH) ] = { | ||
681 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
682 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
683 | }, | ||
684 | }, | ||
685 | [C(ITLB)] = { | ||
686 | [C(OP_READ)] = { | ||
687 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
688 | [C(RESULT_MISS)] = { (6 << 6) | 0x3f }, | ||
689 | }, | ||
690 | [ C(OP_WRITE) ] = { | ||
691 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
692 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
693 | }, | ||
694 | [ C(OP_PREFETCH) ] = { | ||
695 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
696 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
697 | }, | ||
698 | }, | ||
699 | [C(BPU)] = { | ||
700 | [C(OP_READ)] = { | ||
701 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
702 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
703 | }, | ||
704 | [ C(OP_WRITE) ] = { | ||
705 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
706 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
707 | }, | ||
708 | [ C(OP_PREFETCH) ] = { | ||
709 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
710 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
711 | }, | ||
712 | }, | ||
713 | [C(NODE)] = { | ||
714 | [C(OP_READ)] = { | ||
715 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
716 | [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
717 | }, | ||
718 | [ C(OP_WRITE) ] = { | ||
719 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
720 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
721 | }, | ||
722 | [ C(OP_PREFETCH) ] = { | ||
723 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
724 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
725 | }, | ||
726 | }, | ||
727 | }; | ||
728 | |||
729 | static u32 sparc_vt_read_pmc(int idx) | ||
730 | { | ||
731 | u64 val = pcr_ops->read_pic(idx); | ||
732 | |||
733 | return val & 0xffffffff; | ||
734 | } | ||
735 | |||
736 | static void sparc_vt_write_pmc(int idx, u64 val) | ||
737 | { | ||
738 | u64 pcr; | ||
739 | |||
740 | /* There seems to be an internal latch on the overflow event | ||
741 | * on SPARC-T4 that prevents it from triggering unless you | ||
742 | * update the PIC exactly as we do here. The requirement | ||
743 | * seems to be that you have to turn off event counting in the | ||
744 | * PCR around the PIC update. | ||
745 | * | ||
746 | * For example, after the following sequence: | ||
747 | * | ||
748 | * 1) set PIC to -1 | ||
749 | * 2) enable event counting and overflow reporting in PCR | ||
750 | * 3) overflow triggers, softint 15 handler invoked | ||
751 | * 4) clear OV bit in PCR | ||
752 | * 5) write PIC to -1 | ||
753 | * | ||
754 | * a subsequent overflow event will not trigger. This | ||
755 | * sequence works on SPARC-T3 and previous chips. | ||
756 | */ | ||
757 | pcr = pcr_ops->read_pcr(idx); | ||
758 | pcr_ops->write_pcr(idx, PCR_N4_PICNPT); | ||
759 | |||
760 | pcr_ops->write_pic(idx, val & 0xffffffff); | ||
761 | |||
762 | pcr_ops->write_pcr(idx, pcr); | ||
763 | } | ||
764 | |||
765 | static const struct sparc_pmu niagara4_pmu = { | ||
766 | .event_map = niagara4_event_map, | ||
767 | .cache_map = &niagara4_cache_map, | ||
768 | .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), | ||
769 | .read_pmc = sparc_vt_read_pmc, | ||
770 | .write_pmc = sparc_vt_write_pmc, | ||
771 | .upper_shift = 5, | ||
772 | .lower_shift = 5, | ||
773 | .event_mask = 0x7ff, | ||
774 | .user_bit = PCR_N4_UTRACE, | ||
775 | .priv_bit = PCR_N4_STRACE, | ||
776 | |||
777 | /* We explicitly don't support hypervisor tracing. The T4 | ||
778 | * generates the overflow event for precise events via a trap | ||
779 | * which will not be generated (ie. it's completely lost) if | ||
780 | * we happen to be in the hypervisor when the event triggers. | ||
781 | * Essentially, the overflow event reporting is completely | ||
782 | * unusable when you have hypervisor mode tracing enabled. | ||
783 | */ | ||
784 | .hv_bit = 0, | ||
785 | |||
786 | .irq_bit = PCR_N4_TOE, | ||
787 | .upper_nop = 0, | ||
788 | .lower_nop = 0, | ||
789 | .flags = 0, | ||
790 | .max_hw_events = 4, | ||
791 | .num_pcrs = 4, | ||
792 | .num_pic_regs = 4, | ||
533 | }; | 793 | }; |
534 | 794 | ||
535 | static const struct sparc_pmu *sparc_pmu __read_mostly; | 795 | static const struct sparc_pmu *sparc_pmu __read_mostly; |
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx) | |||
558 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) | 818 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
559 | { | 819 | { |
560 | u64 val, mask = mask_for_index(idx); | 820 | u64 val, mask = mask_for_index(idx); |
821 | int pcr_index = 0; | ||
561 | 822 | ||
562 | val = cpuc->pcr; | 823 | if (sparc_pmu->num_pcrs > 1) |
824 | pcr_index = idx; | ||
825 | |||
826 | val = cpuc->pcr[pcr_index]; | ||
563 | val &= ~mask; | 827 | val &= ~mask; |
564 | val |= hwc->config; | 828 | val |= hwc->config; |
565 | cpuc->pcr = val; | 829 | cpuc->pcr[pcr_index] = val; |
566 | 830 | ||
567 | pcr_ops->write(cpuc->pcr); | 831 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
568 | } | 832 | } |
569 | 833 | ||
570 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) | 834 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
571 | { | 835 | { |
572 | u64 mask = mask_for_index(idx); | 836 | u64 mask = mask_for_index(idx); |
573 | u64 nop = nop_for_index(idx); | 837 | u64 nop = nop_for_index(idx); |
838 | int pcr_index = 0; | ||
574 | u64 val; | 839 | u64 val; |
575 | 840 | ||
576 | val = cpuc->pcr; | 841 | if (sparc_pmu->num_pcrs > 1) |
842 | pcr_index = idx; | ||
843 | |||
844 | val = cpuc->pcr[pcr_index]; | ||
577 | val &= ~mask; | 845 | val &= ~mask; |
578 | val |= nop; | 846 | val |= nop; |
579 | cpuc->pcr = val; | 847 | cpuc->pcr[pcr_index] = val; |
580 | 848 | ||
581 | pcr_ops->write(cpuc->pcr); | 849 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
582 | } | ||
583 | |||
584 | static u32 read_pmc(int idx) | ||
585 | { | ||
586 | u64 val; | ||
587 | |||
588 | read_pic(val); | ||
589 | if (idx == PIC_UPPER_INDEX) | ||
590 | val >>= 32; | ||
591 | |||
592 | return val & 0xffffffff; | ||
593 | } | ||
594 | |||
595 | static void write_pmc(int idx, u64 val) | ||
596 | { | ||
597 | u64 shift, mask, pic; | ||
598 | |||
599 | shift = 0; | ||
600 | if (idx == PIC_UPPER_INDEX) | ||
601 | shift = 32; | ||
602 | |||
603 | mask = ((u64) 0xffffffff) << shift; | ||
604 | val <<= shift; | ||
605 | |||
606 | read_pic(pic); | ||
607 | pic &= ~mask; | ||
608 | pic |= val; | ||
609 | write_pic(pic); | ||
610 | } | 850 | } |
611 | 851 | ||
612 | static u64 sparc_perf_event_update(struct perf_event *event, | 852 | static u64 sparc_perf_event_update(struct perf_event *event, |
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event, | |||
618 | 858 | ||
619 | again: | 859 | again: |
620 | prev_raw_count = local64_read(&hwc->prev_count); | 860 | prev_raw_count = local64_read(&hwc->prev_count); |
621 | new_raw_count = read_pmc(idx); | 861 | new_raw_count = sparc_pmu->read_pmc(idx); |
622 | 862 | ||
623 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | 863 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
624 | new_raw_count) != prev_raw_count) | 864 | new_raw_count) != prev_raw_count) |
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event, | |||
658 | 898 | ||
659 | local64_set(&hwc->prev_count, (u64)-left); | 899 | local64_set(&hwc->prev_count, (u64)-left); |
660 | 900 | ||
661 | write_pmc(idx, (u64)(-left) & 0xffffffff); | 901 | sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff); |
662 | 902 | ||
663 | perf_event_update_userpage(event); | 903 | perf_event_update_userpage(event); |
664 | 904 | ||
665 | return ret; | 905 | return ret; |
666 | } | 906 | } |
667 | 907 | ||
668 | /* If performance event entries have been added, move existing | 908 | static void read_in_all_counters(struct cpu_hw_events *cpuc) |
669 | * events around (if necessary) and then assign new entries to | ||
670 | * counters. | ||
671 | */ | ||
672 | static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | ||
673 | { | 909 | { |
674 | int i; | 910 | int i; |
675 | 911 | ||
676 | if (!cpuc->n_added) | ||
677 | goto out; | ||
678 | |||
679 | /* Read in the counters which are moving. */ | ||
680 | for (i = 0; i < cpuc->n_events; i++) { | 912 | for (i = 0; i < cpuc->n_events; i++) { |
681 | struct perf_event *cp = cpuc->event[i]; | 913 | struct perf_event *cp = cpuc->event[i]; |
682 | 914 | ||
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | |||
687 | cpuc->current_idx[i] = PIC_NO_INDEX; | 919 | cpuc->current_idx[i] = PIC_NO_INDEX; |
688 | } | 920 | } |
689 | } | 921 | } |
922 | } | ||
923 | |||
924 | /* On this PMU all PICs are programmed using a single PCR. Calculate | ||
925 | * the combined control register value. | ||
926 | * | ||
927 | * For such chips we require that all of the events have the same | ||
928 | * configuration, so just fetch the settings from the first entry. | ||
929 | */ | ||
930 | static void calculate_single_pcr(struct cpu_hw_events *cpuc) | ||
931 | { | ||
932 | int i; | ||
933 | |||
934 | if (!cpuc->n_added) | ||
935 | goto out; | ||
690 | 936 | ||
691 | /* Assign to counters all unassigned events. */ | 937 | /* Assign to counters all unassigned events. */ |
692 | for (i = 0; i < cpuc->n_events; i++) { | 938 | for (i = 0; i < cpuc->n_events; i++) { |
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | |||
702 | cpuc->current_idx[i] = idx; | 948 | cpuc->current_idx[i] = idx; |
703 | 949 | ||
704 | enc = perf_event_get_enc(cpuc->events[i]); | 950 | enc = perf_event_get_enc(cpuc->events[i]); |
705 | pcr &= ~mask_for_index(idx); | 951 | cpuc->pcr[0] &= ~mask_for_index(idx); |
706 | if (hwc->state & PERF_HES_STOPPED) | 952 | if (hwc->state & PERF_HES_STOPPED) |
707 | pcr |= nop_for_index(idx); | 953 | cpuc->pcr[0] |= nop_for_index(idx); |
708 | else | 954 | else |
709 | pcr |= event_encoding(enc, idx); | 955 | cpuc->pcr[0] |= event_encoding(enc, idx); |
710 | } | 956 | } |
711 | out: | 957 | out: |
712 | return pcr; | 958 | cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; |
959 | } | ||
960 | |||
961 | /* On this PMU each PIC has it's own PCR control register. */ | ||
962 | static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) | ||
963 | { | ||
964 | int i; | ||
965 | |||
966 | if (!cpuc->n_added) | ||
967 | goto out; | ||
968 | |||
969 | for (i = 0; i < cpuc->n_events; i++) { | ||
970 | struct perf_event *cp = cpuc->event[i]; | ||
971 | struct hw_perf_event *hwc = &cp->hw; | ||
972 | int idx = hwc->idx; | ||
973 | u64 enc; | ||
974 | |||
975 | if (cpuc->current_idx[i] != PIC_NO_INDEX) | ||
976 | continue; | ||
977 | |||
978 | sparc_perf_event_set_period(cp, hwc, idx); | ||
979 | cpuc->current_idx[i] = idx; | ||
980 | |||
981 | enc = perf_event_get_enc(cpuc->events[i]); | ||
982 | cpuc->pcr[idx] &= ~mask_for_index(idx); | ||
983 | if (hwc->state & PERF_HES_STOPPED) | ||
984 | cpuc->pcr[idx] |= nop_for_index(idx); | ||
985 | else | ||
986 | cpuc->pcr[idx] |= event_encoding(enc, idx); | ||
987 | } | ||
988 | out: | ||
989 | for (i = 0; i < cpuc->n_events; i++) { | ||
990 | struct perf_event *cp = cpuc->event[i]; | ||
991 | int idx = cp->hw.idx; | ||
992 | |||
993 | cpuc->pcr[idx] |= cp->hw.config_base; | ||
994 | } | ||
995 | } | ||
996 | |||
997 | /* If performance event entries have been added, move existing events | ||
998 | * around (if necessary) and then assign new entries to counters. | ||
999 | */ | ||
1000 | static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) | ||
1001 | { | ||
1002 | if (cpuc->n_added) | ||
1003 | read_in_all_counters(cpuc); | ||
1004 | |||
1005 | if (sparc_pmu->num_pcrs == 1) { | ||
1006 | calculate_single_pcr(cpuc); | ||
1007 | } else { | ||
1008 | calculate_multiple_pcrs(cpuc); | ||
1009 | } | ||
713 | } | 1010 | } |
714 | 1011 | ||
715 | static void sparc_pmu_enable(struct pmu *pmu) | 1012 | static void sparc_pmu_enable(struct pmu *pmu) |
716 | { | 1013 | { |
717 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1014 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
718 | u64 pcr; | 1015 | int i; |
719 | 1016 | ||
720 | if (cpuc->enabled) | 1017 | if (cpuc->enabled) |
721 | return; | 1018 | return; |
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu) | |||
723 | cpuc->enabled = 1; | 1020 | cpuc->enabled = 1; |
724 | barrier(); | 1021 | barrier(); |
725 | 1022 | ||
726 | pcr = cpuc->pcr; | 1023 | if (cpuc->n_events) |
727 | if (!cpuc->n_events) { | 1024 | update_pcrs_for_enable(cpuc); |
728 | pcr = 0; | ||
729 | } else { | ||
730 | pcr = maybe_change_configuration(cpuc, pcr); | ||
731 | |||
732 | /* We require that all of the events have the same | ||
733 | * configuration, so just fetch the settings from the | ||
734 | * first entry. | ||
735 | */ | ||
736 | cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; | ||
737 | } | ||
738 | 1025 | ||
739 | pcr_ops->write(cpuc->pcr); | 1026 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1027 | pcr_ops->write_pcr(i, cpuc->pcr[i]); | ||
740 | } | 1028 | } |
741 | 1029 | ||
742 | static void sparc_pmu_disable(struct pmu *pmu) | 1030 | static void sparc_pmu_disable(struct pmu *pmu) |
743 | { | 1031 | { |
744 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1032 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
745 | u64 val; | 1033 | int i; |
746 | 1034 | ||
747 | if (!cpuc->enabled) | 1035 | if (!cpuc->enabled) |
748 | return; | 1036 | return; |
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu) | |||
750 | cpuc->enabled = 0; | 1038 | cpuc->enabled = 0; |
751 | cpuc->n_added = 0; | 1039 | cpuc->n_added = 0; |
752 | 1040 | ||
753 | val = cpuc->pcr; | 1041 | for (i = 0; i < sparc_pmu->num_pcrs; i++) { |
754 | val &= ~(PCR_UTRACE | PCR_STRACE | | 1042 | u64 val = cpuc->pcr[i]; |
755 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | ||
756 | cpuc->pcr = val; | ||
757 | 1043 | ||
758 | pcr_ops->write(cpuc->pcr); | 1044 | val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | |
1045 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | ||
1046 | cpuc->pcr[i] = val; | ||
1047 | pcr_ops->write_pcr(i, cpuc->pcr[i]); | ||
1048 | } | ||
759 | } | 1049 | } |
760 | 1050 | ||
761 | static int active_event_index(struct cpu_hw_events *cpuc, | 1051 | static int active_event_index(struct cpu_hw_events *cpuc, |
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex); | |||
854 | static void perf_stop_nmi_watchdog(void *unused) | 1144 | static void perf_stop_nmi_watchdog(void *unused) |
855 | { | 1145 | { |
856 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1146 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1147 | int i; | ||
857 | 1148 | ||
858 | stop_nmi_watchdog(NULL); | 1149 | stop_nmi_watchdog(NULL); |
859 | cpuc->pcr = pcr_ops->read(); | 1150 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1151 | cpuc->pcr[i] = pcr_ops->read_pcr(i); | ||
860 | } | 1152 | } |
861 | 1153 | ||
862 | void perf_event_grab_pmc(void) | 1154 | void perf_event_grab_pmc(void) |
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts, | |||
942 | if (!n_ev) | 1234 | if (!n_ev) |
943 | return 0; | 1235 | return 0; |
944 | 1236 | ||
945 | if (n_ev > MAX_HWEVENTS) | 1237 | if (n_ev > sparc_pmu->max_hw_events) |
946 | return -1; | 1238 | return -1; |
947 | 1239 | ||
1240 | if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { | ||
1241 | int i; | ||
1242 | |||
1243 | for (i = 0; i < n_ev; i++) | ||
1244 | evts[i]->hw.idx = i; | ||
1245 | return 0; | ||
1246 | } | ||
1247 | |||
948 | msk0 = perf_event_get_msk(events[0]); | 1248 | msk0 = perf_event_get_msk(events[0]); |
949 | if (n_ev == 1) { | 1249 | if (n_ev == 1) { |
950 | if (msk0 & PIC_LOWER) | 1250 | if (msk0 & PIC_LOWER) |
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new) | |||
1000 | struct perf_event *event; | 1300 | struct perf_event *event; |
1001 | int i, n, first; | 1301 | int i, n, first; |
1002 | 1302 | ||
1303 | if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME)) | ||
1304 | return 0; | ||
1305 | |||
1003 | n = n_prev + n_new; | 1306 | n = n_prev + n_new; |
1004 | if (n <= 1) | 1307 | if (n <= 1) |
1005 | return 0; | 1308 | return 0; |
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) | |||
1059 | perf_pmu_disable(event->pmu); | 1362 | perf_pmu_disable(event->pmu); |
1060 | 1363 | ||
1061 | n0 = cpuc->n_events; | 1364 | n0 = cpuc->n_events; |
1062 | if (n0 >= MAX_HWEVENTS) | 1365 | if (n0 >= sparc_pmu->max_hw_events) |
1063 | goto out; | 1366 | goto out; |
1064 | 1367 | ||
1065 | cpuc->event[n0] = event; | 1368 | cpuc->event[n0] = event; |
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
1146 | /* We save the enable bits in the config_base. */ | 1449 | /* We save the enable bits in the config_base. */ |
1147 | hwc->config_base = sparc_pmu->irq_bit; | 1450 | hwc->config_base = sparc_pmu->irq_bit; |
1148 | if (!attr->exclude_user) | 1451 | if (!attr->exclude_user) |
1149 | hwc->config_base |= PCR_UTRACE; | 1452 | hwc->config_base |= sparc_pmu->user_bit; |
1150 | if (!attr->exclude_kernel) | 1453 | if (!attr->exclude_kernel) |
1151 | hwc->config_base |= PCR_STRACE; | 1454 | hwc->config_base |= sparc_pmu->priv_bit; |
1152 | if (!attr->exclude_hv) | 1455 | if (!attr->exclude_hv) |
1153 | hwc->config_base |= sparc_pmu->hv_bit; | 1456 | hwc->config_base |= sparc_pmu->hv_bit; |
1154 | 1457 | ||
1155 | n = 0; | 1458 | n = 0; |
1156 | if (event->group_leader != event) { | 1459 | if (event->group_leader != event) { |
1157 | n = collect_events(event->group_leader, | 1460 | n = collect_events(event->group_leader, |
1158 | MAX_HWEVENTS - 1, | 1461 | sparc_pmu->max_hw_events - 1, |
1159 | evts, events, current_idx_dmy); | 1462 | evts, events, current_idx_dmy); |
1160 | if (n < 0) | 1463 | if (n < 0) |
1161 | return -EINVAL; | 1464 | return -EINVAL; |
@@ -1254,8 +1557,7 @@ static struct pmu pmu = { | |||
1254 | void perf_event_print_debug(void) | 1557 | void perf_event_print_debug(void) |
1255 | { | 1558 | { |
1256 | unsigned long flags; | 1559 | unsigned long flags; |
1257 | u64 pcr, pic; | 1560 | int cpu, i; |
1258 | int cpu; | ||
1259 | 1561 | ||
1260 | if (!sparc_pmu) | 1562 | if (!sparc_pmu) |
1261 | return; | 1563 | return; |
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void) | |||
1264 | 1566 | ||
1265 | cpu = smp_processor_id(); | 1567 | cpu = smp_processor_id(); |
1266 | 1568 | ||
1267 | pcr = pcr_ops->read(); | ||
1268 | read_pic(pic); | ||
1269 | |||
1270 | pr_info("\n"); | 1569 | pr_info("\n"); |
1271 | pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", | 1570 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1272 | cpu, pcr, pic); | 1571 | pr_info("CPU#%d: PCR%d[%016llx]\n", |
1572 | cpu, i, pcr_ops->read_pcr(i)); | ||
1573 | for (i = 0; i < sparc_pmu->num_pic_regs; i++) | ||
1574 | pr_info("CPU#%d: PIC%d[%016llx]\n", | ||
1575 | cpu, i, pcr_ops->read_pic(i)); | ||
1273 | 1576 | ||
1274 | local_irq_restore(flags); | 1577 | local_irq_restore(flags); |
1275 | } | 1578 | } |
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
1305 | * Do this before we peek at the counters to determine | 1608 | * Do this before we peek at the counters to determine |
1306 | * overflow so we don't lose any events. | 1609 | * overflow so we don't lose any events. |
1307 | */ | 1610 | */ |
1308 | if (sparc_pmu->irq_bit) | 1611 | if (sparc_pmu->irq_bit && |
1309 | pcr_ops->write(cpuc->pcr); | 1612 | sparc_pmu->num_pcrs == 1) |
1613 | pcr_ops->write_pcr(0, cpuc->pcr[0]); | ||
1310 | 1614 | ||
1311 | for (i = 0; i < cpuc->n_events; i++) { | 1615 | for (i = 0; i < cpuc->n_events; i++) { |
1312 | struct perf_event *event = cpuc->event[i]; | 1616 | struct perf_event *event = cpuc->event[i]; |
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
1314 | struct hw_perf_event *hwc; | 1618 | struct hw_perf_event *hwc; |
1315 | u64 val; | 1619 | u64 val; |
1316 | 1620 | ||
1621 | if (sparc_pmu->irq_bit && | ||
1622 | sparc_pmu->num_pcrs > 1) | ||
1623 | pcr_ops->write_pcr(idx, cpuc->pcr[idx]); | ||
1624 | |||
1317 | hwc = &event->hw; | 1625 | hwc = &event->hw; |
1318 | val = sparc_perf_event_update(event, hwc, idx); | 1626 | val = sparc_perf_event_update(event, hwc, idx); |
1319 | if (val & (1ULL << 31)) | 1627 | if (val & (1ULL << 31)) |
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void) | |||
1352 | sparc_pmu = &niagara2_pmu; | 1660 | sparc_pmu = &niagara2_pmu; |
1353 | return true; | 1661 | return true; |
1354 | } | 1662 | } |
1663 | if (!strcmp(sparc_pmu_type, "niagara4")) { | ||
1664 | sparc_pmu = &niagara4_pmu; | ||
1665 | return true; | ||
1666 | } | ||
1355 | return false; | 1667 | return false; |
1356 | } | 1668 | } |
1357 | 1669 | ||
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index 340c5b976d28..d397d7fc5c28 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c | |||
@@ -37,7 +37,7 @@ void * __init prom_early_alloc(unsigned long size) | |||
37 | void *ret; | 37 | void *ret; |
38 | 38 | ||
39 | if (!paddr) { | 39 | if (!paddr) { |
40 | prom_printf("prom_early_alloc(%lu) failed\n"); | 40 | prom_printf("prom_early_alloc(%lu) failed\n", size); |
41 | prom_halt(); | 41 | prom_halt(); |
42 | } | 42 | } |
43 | 43 | ||
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 1414d16712b2..0800e71d8a88 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c | |||
@@ -340,7 +340,12 @@ static const char *hwcaps[] = { | |||
340 | */ | 340 | */ |
341 | "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", | 341 | "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", |
342 | "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", | 342 | "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", |
343 | "ima", "cspare", | 343 | "ima", "cspare", "pause", "cbcond", |
344 | }; | ||
345 | |||
346 | static const char *crypto_hwcaps[] = { | ||
347 | "aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256", | ||
348 | "sha512", "mpmul", "montmul", "montsqr", "crc32c", | ||
344 | }; | 349 | }; |
345 | 350 | ||
346 | void cpucap_info(struct seq_file *m) | 351 | void cpucap_info(struct seq_file *m) |
@@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m) | |||
357 | printed++; | 362 | printed++; |
358 | } | 363 | } |
359 | } | 364 | } |
365 | if (caps & HWCAP_SPARC_CRYPTO) { | ||
366 | unsigned long cfr; | ||
367 | |||
368 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
369 | for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { | ||
370 | unsigned long bit = 1UL << i; | ||
371 | if (cfr & bit) { | ||
372 | seq_printf(m, "%s%s", | ||
373 | printed ? "," : "", crypto_hwcaps[i]); | ||
374 | printed++; | ||
375 | } | ||
376 | } | ||
377 | } | ||
360 | seq_putc(m, '\n'); | 378 | seq_putc(m, '\n'); |
361 | } | 379 | } |
362 | 380 | ||
381 | static void __init report_one_hwcap(int *printed, const char *name) | ||
382 | { | ||
383 | if ((*printed) == 0) | ||
384 | printk(KERN_INFO "CPU CAPS: ["); | ||
385 | printk(KERN_CONT "%s%s", | ||
386 | (*printed) ? "," : "", name); | ||
387 | if (++(*printed) == 8) { | ||
388 | printk(KERN_CONT "]\n"); | ||
389 | *printed = 0; | ||
390 | } | ||
391 | } | ||
392 | |||
393 | static void __init report_crypto_hwcaps(int *printed) | ||
394 | { | ||
395 | unsigned long cfr; | ||
396 | int i; | ||
397 | |||
398 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
399 | |||
400 | for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { | ||
401 | unsigned long bit = 1UL << i; | ||
402 | if (cfr & bit) | ||
403 | report_one_hwcap(printed, crypto_hwcaps[i]); | ||
404 | } | ||
405 | } | ||
406 | |||
363 | static void __init report_hwcaps(unsigned long caps) | 407 | static void __init report_hwcaps(unsigned long caps) |
364 | { | 408 | { |
365 | int i, printed = 0; | 409 | int i, printed = 0; |
366 | 410 | ||
367 | printk(KERN_INFO "CPU CAPS: ["); | ||
368 | for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { | 411 | for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { |
369 | unsigned long bit = 1UL << i; | 412 | unsigned long bit = 1UL << i; |
370 | if (caps & bit) { | 413 | if (caps & bit) |
371 | printk(KERN_CONT "%s%s", | 414 | report_one_hwcap(&printed, hwcaps[i]); |
372 | printed ? "," : "", hwcaps[i]); | ||
373 | if (++printed == 8) { | ||
374 | printk(KERN_CONT "]\n"); | ||
375 | printk(KERN_INFO "CPU CAPS: ["); | ||
376 | printed = 0; | ||
377 | } | ||
378 | } | ||
379 | } | 415 | } |
380 | printk(KERN_CONT "]\n"); | 416 | if (caps & HWCAP_SPARC_CRYPTO) |
417 | report_crypto_hwcaps(&printed); | ||
418 | if (printed != 0) | ||
419 | printk(KERN_CONT "]\n"); | ||
381 | } | 420 | } |
382 | 421 | ||
383 | static unsigned long __init mdesc_cpu_hwcap_list(void) | 422 | static unsigned long __init mdesc_cpu_hwcap_list(void) |
@@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void) | |||
411 | break; | 450 | break; |
412 | } | 451 | } |
413 | } | 452 | } |
453 | for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { | ||
454 | if (!strcmp(prop, crypto_hwcaps[i])) | ||
455 | caps |= HWCAP_SPARC_CRYPTO; | ||
456 | } | ||
414 | 457 | ||
415 | plen = strlen(prop) + 1; | 458 | plen = strlen(prop) + 1; |
416 | prop += plen; | 459 | prop += plen; |
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index a53e0a5fd3a3..53e48f721ce3 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c | |||
@@ -54,58 +54,6 @@ struct signal_frame32 { | |||
54 | /* __siginfo_rwin_t * */u32 rwin_save; | 54 | /* __siginfo_rwin_t * */u32 rwin_save; |
55 | } __attribute__((aligned(8))); | 55 | } __attribute__((aligned(8))); |
56 | 56 | ||
57 | typedef struct compat_siginfo{ | ||
58 | int si_signo; | ||
59 | int si_errno; | ||
60 | int si_code; | ||
61 | |||
62 | union { | ||
63 | int _pad[SI_PAD_SIZE32]; | ||
64 | |||
65 | /* kill() */ | ||
66 | struct { | ||
67 | compat_pid_t _pid; /* sender's pid */ | ||
68 | unsigned int _uid; /* sender's uid */ | ||
69 | } _kill; | ||
70 | |||
71 | /* POSIX.1b timers */ | ||
72 | struct { | ||
73 | compat_timer_t _tid; /* timer id */ | ||
74 | int _overrun; /* overrun count */ | ||
75 | compat_sigval_t _sigval; /* same as below */ | ||
76 | int _sys_private; /* not to be passed to user */ | ||
77 | } _timer; | ||
78 | |||
79 | /* POSIX.1b signals */ | ||
80 | struct { | ||
81 | compat_pid_t _pid; /* sender's pid */ | ||
82 | unsigned int _uid; /* sender's uid */ | ||
83 | compat_sigval_t _sigval; | ||
84 | } _rt; | ||
85 | |||
86 | /* SIGCHLD */ | ||
87 | struct { | ||
88 | compat_pid_t _pid; /* which child */ | ||
89 | unsigned int _uid; /* sender's uid */ | ||
90 | int _status; /* exit code */ | ||
91 | compat_clock_t _utime; | ||
92 | compat_clock_t _stime; | ||
93 | } _sigchld; | ||
94 | |||
95 | /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ | ||
96 | struct { | ||
97 | u32 _addr; /* faulting insn/memory ref. */ | ||
98 | int _trapno; | ||
99 | } _sigfault; | ||
100 | |||
101 | /* SIGPOLL */ | ||
102 | struct { | ||
103 | int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ | ||
104 | int _fd; | ||
105 | } _sigpoll; | ||
106 | } _sifields; | ||
107 | }compat_siginfo_t; | ||
108 | |||
109 | struct rt_signal_frame32 { | 57 | struct rt_signal_frame32 { |
110 | struct sparc_stackf32 ss; | 58 | struct sparc_stackf32 ss; |
111 | compat_siginfo_t info; | 59 | compat_siginfo_t info; |
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index e1fbf8c75787..bde867fd71e8 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S | |||
@@ -176,7 +176,7 @@ sun4v_tsb_miss_common: | |||
176 | 176 | ||
177 | sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 | 177 | sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 |
178 | 178 | ||
179 | #ifdef CONFIG_HUGETLB_PAGE | 179 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
180 | mov SCRATCHPAD_UTSBREG2, %g5 | 180 | mov SCRATCHPAD_UTSBREG2, %g5 |
181 | ldxa [%g5] ASI_SCRATCHPAD, %g5 | 181 | ldxa [%g5] ASI_SCRATCHPAD, %g5 |
182 | cmp %g5, -1 | 182 | cmp %g5, -1 |
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index d97f3eb72e06..44025f4ba41f 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S | |||
@@ -90,7 +90,7 @@ SIGN1(sys32_mkdir, sys_mkdir, %o1) | |||
90 | SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) | 90 | SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) |
91 | SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) | 91 | SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) |
92 | SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) | 92 | SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) |
93 | SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1) | 93 | SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1) |
94 | SIGN1(sys32_prctl, sys_prctl, %o0) | 94 | SIGN1(sys32_prctl, sys_prctl, %o0) |
95 | SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) | 95 | SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) |
96 | SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2) | 96 | SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2) |
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f7392336961f..d862499eb01c 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c | |||
@@ -506,52 +506,6 @@ long compat_sys_fadvise64_64(int fd, | |||
506 | advice); | 506 | advice); |
507 | } | 507 | } |
508 | 508 | ||
509 | asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, | ||
510 | compat_off_t __user *offset, | ||
511 | compat_size_t count) | ||
512 | { | ||
513 | mm_segment_t old_fs = get_fs(); | ||
514 | int ret; | ||
515 | off_t of; | ||
516 | |||
517 | if (offset && get_user(of, offset)) | ||
518 | return -EFAULT; | ||
519 | |||
520 | set_fs(KERNEL_DS); | ||
521 | ret = sys_sendfile(out_fd, in_fd, | ||
522 | offset ? (off_t __user *) &of : NULL, | ||
523 | count); | ||
524 | set_fs(old_fs); | ||
525 | |||
526 | if (offset && put_user(of, offset)) | ||
527 | return -EFAULT; | ||
528 | |||
529 | return ret; | ||
530 | } | ||
531 | |||
532 | asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, | ||
533 | compat_loff_t __user *offset, | ||
534 | compat_size_t count) | ||
535 | { | ||
536 | mm_segment_t old_fs = get_fs(); | ||
537 | int ret; | ||
538 | loff_t lof; | ||
539 | |||
540 | if (offset && get_user(lof, offset)) | ||
541 | return -EFAULT; | ||
542 | |||
543 | set_fs(KERNEL_DS); | ||
544 | ret = sys_sendfile64(out_fd, in_fd, | ||
545 | offset ? (loff_t __user *) &lof : NULL, | ||
546 | count); | ||
547 | set_fs(old_fs); | ||
548 | |||
549 | if (offset && put_user(lof, offset)) | ||
550 | return -EFAULT; | ||
551 | |||
552 | return ret; | ||
553 | } | ||
554 | |||
555 | /* This is just a version for 32-bit applications which does | 509 | /* This is just a version for 32-bit applications which does |
556 | * not force O_LARGEFILE on. | 510 | * not force O_LARGEFILE on. |
557 | */ | 511 | */ |
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 0dc1f5786081..11c6c9603e71 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c | |||
@@ -502,12 +502,12 @@ SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality) | |||
502 | { | 502 | { |
503 | int ret; | 503 | int ret; |
504 | 504 | ||
505 | if (current->personality == PER_LINUX32 && | 505 | if (personality(current->personality) == PER_LINUX32 && |
506 | personality == PER_LINUX) | 506 | personality(personality) == PER_LINUX) |
507 | personality = PER_LINUX32; | 507 | personality |= PER_LINUX32; |
508 | ret = sys_personality(personality); | 508 | ret = sys_personality(personality); |
509 | if (ret == PER_LINUX32) | 509 | if (personality(ret) == PER_LINUX32) |
510 | ret = PER_LINUX; | 510 | ret &= ~PER_LINUX32; |
511 | 511 | ||
512 | return ret; | 512 | return ret; |
513 | } | 513 | } |
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 3b05e6697710..fa1f1d375ffc 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c | |||
@@ -850,7 +850,7 @@ void __init cheetah_ecache_flush_init(void) | |||
850 | ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size); | 850 | ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size); |
851 | 851 | ||
852 | if (ecache_flush_physbase == ~0UL) { | 852 | if (ecache_flush_physbase == ~0UL) { |
853 | prom_printf("cheetah_ecache_flush_init: Cannot find %d byte " | 853 | prom_printf("cheetah_ecache_flush_init: Cannot find %ld byte " |
854 | "contiguous physical memory.\n", | 854 | "contiguous physical memory.\n", |
855 | ecache_flush_size); | 855 | ecache_flush_size); |
856 | prom_halt(); | 856 | prom_halt(); |
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index db15d123f054..d4bdc7a62375 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S | |||
@@ -49,7 +49,7 @@ tsb_miss_page_table_walk: | |||
49 | /* Before committing to a full page table walk, | 49 | /* Before committing to a full page table walk, |
50 | * check the huge page TSB. | 50 | * check the huge page TSB. |
51 | */ | 51 | */ |
52 | #ifdef CONFIG_HUGETLB_PAGE | 52 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
53 | 53 | ||
54 | 661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5 | 54 | 661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5 |
55 | nop | 55 | nop |
@@ -110,12 +110,9 @@ tsb_miss_page_table_walk: | |||
110 | tsb_miss_page_table_walk_sun4v_fastpath: | 110 | tsb_miss_page_table_walk_sun4v_fastpath: |
111 | USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) | 111 | USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) |
112 | 112 | ||
113 | /* Load and check PTE. */ | 113 | /* Valid PTE is now in %g5. */ |
114 | ldxa [%g5] ASI_PHYS_USE_EC, %g5 | ||
115 | brgez,pn %g5, tsb_do_fault | ||
116 | nop | ||
117 | 114 | ||
118 | #ifdef CONFIG_HUGETLB_PAGE | 115 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
119 | 661: sethi %uhi(_PAGE_SZALL_4U), %g7 | 116 | 661: sethi %uhi(_PAGE_SZALL_4U), %g7 |
120 | sllx %g7, 32, %g7 | 117 | sllx %g7, 32, %g7 |
121 | .section .sun4v_2insn_patch, "ax" | 118 | .section .sun4v_2insn_patch, "ax" |
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index dff4096f3dec..8410065f2862 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile | |||
@@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o | |||
32 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o | 32 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o |
33 | lib-$(CONFIG_SPARC64) += NG2patch.o | 33 | lib-$(CONFIG_SPARC64) += NG2patch.o |
34 | 34 | ||
35 | lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o | ||
36 | lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o | ||
37 | |||
35 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o | 38 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o |
36 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o | 39 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o |
37 | 40 | ||
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 03eadf66b0d3..2c20ad63ddbf 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S | |||
@@ -14,7 +14,7 @@ | |||
14 | #define FPRS_FEF 0x04 | 14 | #define FPRS_FEF 0x04 |
15 | #ifdef MEMCPY_DEBUG | 15 | #ifdef MEMCPY_DEBUG |
16 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ | 16 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ |
17 | clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; | 17 | clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; |
18 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | 18 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs |
19 | #else | 19 | #else |
20 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs | 20 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs |
@@ -182,13 +182,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
182 | cmp %g2, 0 | 182 | cmp %g2, 0 |
183 | tne %xcc, 5 | 183 | tne %xcc, 5 |
184 | PREAMBLE | 184 | PREAMBLE |
185 | mov %o0, GLOBAL_SPARE | 185 | mov %o0, %o3 |
186 | cmp %o2, 0 | 186 | cmp %o2, 0 |
187 | be,pn %XCC, 85f | 187 | be,pn %XCC, 85f |
188 | or %o0, %o1, %o3 | 188 | or %o0, %o1, GLOBAL_SPARE |
189 | cmp %o2, 16 | 189 | cmp %o2, 16 |
190 | blu,a,pn %XCC, 80f | 190 | blu,a,pn %XCC, 80f |
191 | or %o3, %o2, %o3 | 191 | or GLOBAL_SPARE, %o2, GLOBAL_SPARE |
192 | 192 | ||
193 | /* 2 blocks (128 bytes) is the minimum we can do the block | 193 | /* 2 blocks (128 bytes) is the minimum we can do the block |
194 | * copy with. We need to ensure that we'll iterate at least | 194 | * copy with. We need to ensure that we'll iterate at least |
@@ -202,7 +202,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
202 | */ | 202 | */ |
203 | cmp %o2, (4 * 64) | 203 | cmp %o2, (4 * 64) |
204 | blu,pt %XCC, 75f | 204 | blu,pt %XCC, 75f |
205 | andcc %o3, 0x7, %g0 | 205 | andcc GLOBAL_SPARE, 0x7, %g0 |
206 | 206 | ||
207 | /* %o0: dst | 207 | /* %o0: dst |
208 | * %o1: src | 208 | * %o1: src |
@@ -404,13 +404,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
404 | * over. If anything is left, we copy it one byte at a time. | 404 | * over. If anything is left, we copy it one byte at a time. |
405 | */ | 405 | */ |
406 | brz,pt %o2, 85f | 406 | brz,pt %o2, 85f |
407 | sub %o0, %o1, %o3 | 407 | sub %o0, %o1, GLOBAL_SPARE |
408 | ba,a,pt %XCC, 90f | 408 | ba,a,pt %XCC, 90f |
409 | 409 | ||
410 | .align 64 | 410 | .align 64 |
411 | 75: /* 16 < len <= 64 */ | 411 | 75: /* 16 < len <= 64 */ |
412 | bne,pn %XCC, 75f | 412 | bne,pn %XCC, 75f |
413 | sub %o0, %o1, %o3 | 413 | sub %o0, %o1, GLOBAL_SPARE |
414 | 414 | ||
415 | 72: | 415 | 72: |
416 | andn %o2, 0xf, %o4 | 416 | andn %o2, 0xf, %o4 |
@@ -420,9 +420,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
420 | add %o1, 0x08, %o1 | 420 | add %o1, 0x08, %o1 |
421 | EX_LD(LOAD(ldx, %o1, %g1)) | 421 | EX_LD(LOAD(ldx, %o1, %g1)) |
422 | sub %o1, 0x08, %o1 | 422 | sub %o1, 0x08, %o1 |
423 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 423 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) |
424 | add %o1, 0x8, %o1 | 424 | add %o1, 0x8, %o1 |
425 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | 425 | EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) |
426 | bgu,pt %XCC, 1b | 426 | bgu,pt %XCC, 1b |
427 | add %o1, 0x8, %o1 | 427 | add %o1, 0x8, %o1 |
428 | 73: andcc %o2, 0x8, %g0 | 428 | 73: andcc %o2, 0x8, %g0 |
@@ -430,14 +430,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
430 | nop | 430 | nop |
431 | sub %o2, 0x8, %o2 | 431 | sub %o2, 0x8, %o2 |
432 | EX_LD(LOAD(ldx, %o1, %o5)) | 432 | EX_LD(LOAD(ldx, %o1, %o5)) |
433 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 433 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) |
434 | add %o1, 0x8, %o1 | 434 | add %o1, 0x8, %o1 |
435 | 1: andcc %o2, 0x4, %g0 | 435 | 1: andcc %o2, 0x4, %g0 |
436 | be,pt %XCC, 1f | 436 | be,pt %XCC, 1f |
437 | nop | 437 | nop |
438 | sub %o2, 0x4, %o2 | 438 | sub %o2, 0x4, %o2 |
439 | EX_LD(LOAD(lduw, %o1, %o5)) | 439 | EX_LD(LOAD(lduw, %o1, %o5)) |
440 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | 440 | EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) |
441 | add %o1, 0x4, %o1 | 441 | add %o1, 0x4, %o1 |
442 | 1: cmp %o2, 0 | 442 | 1: cmp %o2, 0 |
443 | be,pt %XCC, 85f | 443 | be,pt %XCC, 85f |
@@ -454,11 +454,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
454 | 454 | ||
455 | 1: subcc %g1, 1, %g1 | 455 | 1: subcc %g1, 1, %g1 |
456 | EX_LD(LOAD(ldub, %o1, %o5)) | 456 | EX_LD(LOAD(ldub, %o1, %o5)) |
457 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | 457 | EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) |
458 | bgu,pt %icc, 1b | 458 | bgu,pt %icc, 1b |
459 | add %o1, 1, %o1 | 459 | add %o1, 1, %o1 |
460 | 460 | ||
461 | 2: add %o1, %o3, %o0 | 461 | 2: add %o1, GLOBAL_SPARE, %o0 |
462 | andcc %o1, 0x7, %g1 | 462 | andcc %o1, 0x7, %g1 |
463 | bne,pt %icc, 8f | 463 | bne,pt %icc, 8f |
464 | sll %g1, 3, %g1 | 464 | sll %g1, 3, %g1 |
@@ -468,16 +468,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
468 | nop | 468 | nop |
469 | ba,a,pt %xcc, 73b | 469 | ba,a,pt %xcc, 73b |
470 | 470 | ||
471 | 8: mov 64, %o3 | 471 | 8: mov 64, GLOBAL_SPARE |
472 | andn %o1, 0x7, %o1 | 472 | andn %o1, 0x7, %o1 |
473 | EX_LD(LOAD(ldx, %o1, %g2)) | 473 | EX_LD(LOAD(ldx, %o1, %g2)) |
474 | sub %o3, %g1, %o3 | 474 | sub GLOBAL_SPARE, %g1, GLOBAL_SPARE |
475 | andn %o2, 0x7, %o4 | 475 | andn %o2, 0x7, %o4 |
476 | sllx %g2, %g1, %g2 | 476 | sllx %g2, %g1, %g2 |
477 | 1: add %o1, 0x8, %o1 | 477 | 1: add %o1, 0x8, %o1 |
478 | EX_LD(LOAD(ldx, %o1, %g3)) | 478 | EX_LD(LOAD(ldx, %o1, %g3)) |
479 | subcc %o4, 0x8, %o4 | 479 | subcc %o4, 0x8, %o4 |
480 | srlx %g3, %o3, %o5 | 480 | srlx %g3, GLOBAL_SPARE, %o5 |
481 | or %o5, %g2, %o5 | 481 | or %o5, %g2, %o5 |
482 | EX_ST(STORE(stx, %o5, %o0)) | 482 | EX_ST(STORE(stx, %o5, %o0)) |
483 | add %o0, 0x8, %o0 | 483 | add %o0, 0x8, %o0 |
@@ -489,32 +489,32 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
489 | be,pn %icc, 85f | 489 | be,pn %icc, 85f |
490 | add %o1, %g1, %o1 | 490 | add %o1, %g1, %o1 |
491 | ba,pt %xcc, 90f | 491 | ba,pt %xcc, 90f |
492 | sub %o0, %o1, %o3 | 492 | sub %o0, %o1, GLOBAL_SPARE |
493 | 493 | ||
494 | .align 64 | 494 | .align 64 |
495 | 80: /* 0 < len <= 16 */ | 495 | 80: /* 0 < len <= 16 */ |
496 | andcc %o3, 0x3, %g0 | 496 | andcc GLOBAL_SPARE, 0x3, %g0 |
497 | bne,pn %XCC, 90f | 497 | bne,pn %XCC, 90f |
498 | sub %o0, %o1, %o3 | 498 | sub %o0, %o1, GLOBAL_SPARE |
499 | 499 | ||
500 | 1: | 500 | 1: |
501 | subcc %o2, 4, %o2 | 501 | subcc %o2, 4, %o2 |
502 | EX_LD(LOAD(lduw, %o1, %g1)) | 502 | EX_LD(LOAD(lduw, %o1, %g1)) |
503 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | 503 | EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) |
504 | bgu,pt %XCC, 1b | 504 | bgu,pt %XCC, 1b |
505 | add %o1, 4, %o1 | 505 | add %o1, 4, %o1 |
506 | 506 | ||
507 | 85: retl | 507 | 85: retl |
508 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 508 | mov EX_RETVAL(%o3), %o0 |
509 | 509 | ||
510 | .align 32 | 510 | .align 32 |
511 | 90: | 511 | 90: |
512 | subcc %o2, 1, %o2 | 512 | subcc %o2, 1, %o2 |
513 | EX_LD(LOAD(ldub, %o1, %g1)) | 513 | EX_LD(LOAD(ldub, %o1, %g1)) |
514 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | 514 | EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) |
515 | bgu,pt %XCC, 90b | 515 | bgu,pt %XCC, 90b |
516 | add %o1, 1, %o1 | 516 | add %o1, 1, %o1 |
517 | retl | 517 | retl |
518 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 518 | mov EX_RETVAL(%o3), %o0 |
519 | 519 | ||
520 | .size FUNC_NAME, .-FUNC_NAME | 520 | .size FUNC_NAME, .-FUNC_NAME |
diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S new file mode 100644 index 000000000000..e16c88204a42 --- /dev/null +++ b/arch/sparc/lib/NG4clear_page.S | |||
@@ -0,0 +1,29 @@ | |||
1 | /* NG4copy_page.S: Niagara-4 optimized clear page. | ||
2 | * | ||
3 | * Copyright (C) 2012 (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | #include <asm/page.h> | ||
8 | |||
9 | .text | ||
10 | |||
11 | .register %g3, #scratch | ||
12 | |||
13 | .align 32 | ||
14 | .globl NG4clear_page | ||
15 | .globl NG4clear_user_page | ||
16 | NG4clear_page: /* %o0=dest */ | ||
17 | NG4clear_user_page: /* %o0=dest, %o1=vaddr */ | ||
18 | set PAGE_SIZE, %g7 | ||
19 | mov 0x20, %g3 | ||
20 | 1: stxa %g0, [%o0 + %g0] ASI_ST_BLKINIT_MRU_P | ||
21 | subcc %g7, 0x40, %g7 | ||
22 | stxa %g0, [%o0 + %g3] ASI_ST_BLKINIT_MRU_P | ||
23 | bne,pt %xcc, 1b | ||
24 | add %o0, 0x40, %o0 | ||
25 | membar #StoreLoad|#StoreStore | ||
26 | retl | ||
27 | nop | ||
28 | .size NG4clear_page,.-NG4clear_page | ||
29 | .size NG4clear_user_page,.-NG4clear_user_page \ No newline at end of file | ||
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S new file mode 100644 index 000000000000..fd9f903ffa32 --- /dev/null +++ b/arch/sparc/lib/NG4copy_from_user.S | |||
@@ -0,0 +1,30 @@ | |||
1 | /* NG4copy_from_user.S: Niagara-4 optimized copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section __ex_table,"a";\ | ||
9 | .align 4; \ | ||
10 | .word 98b, __retl_one_asi;\ | ||
11 | .text; \ | ||
12 | .align 4; | ||
13 | |||
14 | #ifndef ASI_AIUS | ||
15 | #define ASI_AIUS 0x11 | ||
16 | #endif | ||
17 | |||
18 | #define FUNC_NAME NG4copy_from_user | ||
19 | #define LOAD(type,addr,dest) type##a [addr] %asi, dest | ||
20 | #define EX_RETVAL(x) 0 | ||
21 | |||
22 | #ifdef __KERNEL__ | ||
23 | #define PREAMBLE \ | ||
24 | rd %asi, %g1; \ | ||
25 | cmp %g1, ASI_AIUS; \ | ||
26 | bne,pn %icc, ___copy_in_user; \ | ||
27 | nop | ||
28 | #endif | ||
29 | |||
30 | #include "NG4memcpy.S" | ||
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S new file mode 100644 index 000000000000..28504e88c535 --- /dev/null +++ b/arch/sparc/lib/NG4copy_page.S | |||
@@ -0,0 +1,57 @@ | |||
1 | /* NG4copy_page.S: Niagara-4 optimized copy page. | ||
2 | * | ||
3 | * Copyright (C) 2012 (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | #include <asm/page.h> | ||
8 | |||
9 | .text | ||
10 | .align 32 | ||
11 | |||
12 | .register %g2, #scratch | ||
13 | .register %g3, #scratch | ||
14 | |||
15 | .globl NG4copy_user_page | ||
16 | NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | ||
17 | prefetch [%o1 + 0x000], #n_reads_strong | ||
18 | prefetch [%o1 + 0x040], #n_reads_strong | ||
19 | prefetch [%o1 + 0x080], #n_reads_strong | ||
20 | prefetch [%o1 + 0x0c0], #n_reads_strong | ||
21 | set PAGE_SIZE, %g7 | ||
22 | prefetch [%o1 + 0x100], #n_reads_strong | ||
23 | prefetch [%o1 + 0x140], #n_reads_strong | ||
24 | prefetch [%o1 + 0x180], #n_reads_strong | ||
25 | prefetch [%o1 + 0x1c0], #n_reads_strong | ||
26 | 1: | ||
27 | ldx [%o1 + 0x00], %o2 | ||
28 | subcc %g7, 0x40, %g7 | ||
29 | ldx [%o1 + 0x08], %o3 | ||
30 | ldx [%o1 + 0x10], %o4 | ||
31 | ldx [%o1 + 0x18], %o5 | ||
32 | ldx [%o1 + 0x20], %g1 | ||
33 | stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P | ||
34 | add %o0, 0x08, %o0 | ||
35 | ldx [%o1 + 0x28], %g2 | ||
36 | stxa %o3, [%o0] ASI_ST_BLKINIT_MRU_P | ||
37 | add %o0, 0x08, %o0 | ||
38 | ldx [%o1 + 0x30], %g3 | ||
39 | stxa %o4, [%o0] ASI_ST_BLKINIT_MRU_P | ||
40 | add %o0, 0x08, %o0 | ||
41 | ldx [%o1 + 0x38], %o2 | ||
42 | add %o1, 0x40, %o1 | ||
43 | stxa %o5, [%o0] ASI_ST_BLKINIT_MRU_P | ||
44 | add %o0, 0x08, %o0 | ||
45 | stxa %g1, [%o0] ASI_ST_BLKINIT_MRU_P | ||
46 | add %o0, 0x08, %o0 | ||
47 | stxa %g2, [%o0] ASI_ST_BLKINIT_MRU_P | ||
48 | add %o0, 0x08, %o0 | ||
49 | stxa %g3, [%o0] ASI_ST_BLKINIT_MRU_P | ||
50 | add %o0, 0x08, %o0 | ||
51 | stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P | ||
52 | add %o0, 0x08, %o0 | ||
53 | bne,pt %icc, 1b | ||
54 | prefetch [%o1 + 0x200], #n_reads_strong | ||
55 | retl | ||
56 | membar #StoreLoad | #StoreStore | ||
57 | .size NG4copy_user_page,.-NG4copy_user_page | ||
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S new file mode 100644 index 000000000000..9744c4540a8d --- /dev/null +++ b/arch/sparc/lib/NG4copy_to_user.S | |||
@@ -0,0 +1,39 @@ | |||
1 | /* NG4copy_to_user.S: Niagara-4 optimized copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section __ex_table,"a";\ | ||
9 | .align 4; \ | ||
10 | .word 98b, __retl_one_asi;\ | ||
11 | .text; \ | ||
12 | .align 4; | ||
13 | |||
14 | #ifndef ASI_AIUS | ||
15 | #define ASI_AIUS 0x11 | ||
16 | #endif | ||
17 | |||
18 | #ifndef ASI_BLK_INIT_QUAD_LDD_AIUS | ||
19 | #define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 | ||
20 | #endif | ||
21 | |||
22 | #define FUNC_NAME NG4copy_to_user | ||
23 | #define STORE(type,src,addr) type##a src, [addr] %asi | ||
24 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS | ||
25 | #define EX_RETVAL(x) 0 | ||
26 | |||
27 | #ifdef __KERNEL__ | ||
28 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
29 | * Reading %asi to check for KERNEL_DS is comparatively | ||
30 | * cheap. | ||
31 | */ | ||
32 | #define PREAMBLE \ | ||
33 | rd %asi, %g1; \ | ||
34 | cmp %g1, ASI_AIUS; \ | ||
35 | bne,pn %icc, ___copy_in_user; \ | ||
36 | nop | ||
37 | #endif | ||
38 | |||
39 | #include "NG4memcpy.S" | ||
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S new file mode 100644 index 000000000000..9cf2ee01cee3 --- /dev/null +++ b/arch/sparc/lib/NG4memcpy.S | |||
@@ -0,0 +1,360 @@ | |||
1 | /* NG4memcpy.S: Niagara-4 optimized memcpy. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #ifdef __KERNEL__ | ||
7 | #include <asm/visasm.h> | ||
8 | #include <asm/asi.h> | ||
9 | #define GLOBAL_SPARE %g7 | ||
10 | #else | ||
11 | #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 | ||
12 | #define FPRS_FEF 0x04 | ||
13 | |||
14 | /* On T4 it is very expensive to access ASRs like %fprs and | ||
15 | * %asi, avoiding a read or a write can save ~50 cycles. | ||
16 | */ | ||
17 | #define FPU_ENTER \ | ||
18 | rd %fprs, %o5; \ | ||
19 | andcc %o5, FPRS_FEF, %g0; \ | ||
20 | be,a,pn %icc, 999f; \ | ||
21 | wr %g0, FPRS_FEF, %fprs; \ | ||
22 | 999: | ||
23 | |||
24 | #ifdef MEMCPY_DEBUG | ||
25 | #define VISEntryHalf FPU_ENTER; \ | ||
26 | clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; | ||
27 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
28 | #else | ||
29 | #define VISEntryHalf FPU_ENTER | ||
30 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
31 | #endif | ||
32 | |||
33 | #define GLOBAL_SPARE %g5 | ||
34 | #endif | ||
35 | |||
36 | #ifndef STORE_ASI | ||
37 | #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA | ||
38 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P | ||
39 | #else | ||
40 | #define STORE_ASI 0x80 /* ASI_P */ | ||
41 | #endif | ||
42 | #endif | ||
43 | |||
44 | #ifndef EX_LD | ||
45 | #define EX_LD(x) x | ||
46 | #endif | ||
47 | |||
48 | #ifndef EX_ST | ||
49 | #define EX_ST(x) x | ||
50 | #endif | ||
51 | |||
52 | #ifndef EX_RETVAL | ||
53 | #define EX_RETVAL(x) x | ||
54 | #endif | ||
55 | |||
56 | #ifndef LOAD | ||
57 | #define LOAD(type,addr,dest) type [addr], dest | ||
58 | #endif | ||
59 | |||
60 | #ifndef STORE | ||
61 | #ifndef MEMCPY_DEBUG | ||
62 | #define STORE(type,src,addr) type src, [addr] | ||
63 | #else | ||
64 | #define STORE(type,src,addr) type##a src, [addr] %asi | ||
65 | #endif | ||
66 | #endif | ||
67 | |||
68 | #ifndef STORE_INIT | ||
69 | #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI | ||
70 | #endif | ||
71 | |||
72 | #ifndef FUNC_NAME | ||
73 | #define FUNC_NAME NG4memcpy | ||
74 | #endif | ||
75 | #ifndef PREAMBLE | ||
76 | #define PREAMBLE | ||
77 | #endif | ||
78 | |||
79 | #ifndef XCC | ||
80 | #define XCC xcc | ||
81 | #endif | ||
82 | |||
83 | .register %g2,#scratch | ||
84 | .register %g3,#scratch | ||
85 | |||
86 | .text | ||
87 | .align 64 | ||
88 | |||
89 | .globl FUNC_NAME | ||
90 | .type FUNC_NAME,#function | ||
91 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
92 | #ifdef MEMCPY_DEBUG | ||
93 | wr %g0, 0x80, %asi | ||
94 | #endif | ||
95 | srlx %o2, 31, %g2 | ||
96 | cmp %g2, 0 | ||
97 | tne %XCC, 5 | ||
98 | PREAMBLE | ||
99 | mov %o0, %o3 | ||
100 | brz,pn %o2, .Lexit | ||
101 | cmp %o2, 3 | ||
102 | ble,pn %icc, .Ltiny | ||
103 | cmp %o2, 19 | ||
104 | ble,pn %icc, .Lsmall | ||
105 | or %o0, %o1, %g2 | ||
106 | cmp %o2, 128 | ||
107 | bl,pn %icc, .Lmedium | ||
108 | nop | ||
109 | |||
110 | .Llarge:/* len >= 0x80 */ | ||
111 | /* First get dest 8 byte aligned. */ | ||
112 | sub %g0, %o0, %g1 | ||
113 | and %g1, 0x7, %g1 | ||
114 | brz,pt %g1, 51f | ||
115 | sub %o2, %g1, %o2 | ||
116 | |||
117 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
118 | add %o1, 1, %o1 | ||
119 | subcc %g1, 1, %g1 | ||
120 | add %o0, 1, %o0 | ||
121 | bne,pt %icc, 1b | ||
122 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
123 | |||
124 | 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) | ||
125 | LOAD(prefetch, %o1 + 0x080, #n_reads_strong) | ||
126 | LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) | ||
127 | LOAD(prefetch, %o1 + 0x100, #n_reads_strong) | ||
128 | LOAD(prefetch, %o1 + 0x140, #n_reads_strong) | ||
129 | LOAD(prefetch, %o1 + 0x180, #n_reads_strong) | ||
130 | LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) | ||
131 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | ||
132 | |||
133 | /* Check if we can use the straight fully aligned | ||
134 | * loop, or we require the alignaddr/faligndata variant. | ||
135 | */ | ||
136 | andcc %o1, 0x7, %o5 | ||
137 | bne,pn %icc, .Llarge_src_unaligned | ||
138 | sub %g0, %o0, %g1 | ||
139 | |||
140 | /* Legitimize the use of initializing stores by getting dest | ||
141 | * to be 64-byte aligned. | ||
142 | */ | ||
143 | and %g1, 0x3f, %g1 | ||
144 | brz,pt %g1, .Llarge_aligned | ||
145 | sub %o2, %g1, %o2 | ||
146 | |||
147 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) | ||
148 | add %o1, 8, %o1 | ||
149 | subcc %g1, 8, %g1 | ||
150 | add %o0, 8, %o0 | ||
151 | bne,pt %icc, 1b | ||
152 | EX_ST(STORE(stx, %g2, %o0 - 0x08)) | ||
153 | |||
154 | .Llarge_aligned: | ||
155 | /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ | ||
156 | andn %o2, 0x3f, %o4 | ||
157 | sub %o2, %o4, %o2 | ||
158 | |||
159 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
160 | add %o1, 0x40, %o1 | ||
161 | EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) | ||
162 | subcc %o4, 0x40, %o4 | ||
163 | EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) | ||
164 | EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) | ||
165 | EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) | ||
166 | EX_ST(STORE_INIT(%g1, %o0)) | ||
167 | add %o0, 0x08, %o0 | ||
168 | EX_ST(STORE_INIT(%g2, %o0)) | ||
169 | add %o0, 0x08, %o0 | ||
170 | EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) | ||
171 | EX_ST(STORE_INIT(%g3, %o0)) | ||
172 | add %o0, 0x08, %o0 | ||
173 | EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) | ||
174 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
175 | add %o0, 0x08, %o0 | ||
176 | EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) | ||
177 | EX_ST(STORE_INIT(%o5, %o0)) | ||
178 | add %o0, 0x08, %o0 | ||
179 | EX_ST(STORE_INIT(%g2, %o0)) | ||
180 | add %o0, 0x08, %o0 | ||
181 | EX_ST(STORE_INIT(%g3, %o0)) | ||
182 | add %o0, 0x08, %o0 | ||
183 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
184 | add %o0, 0x08, %o0 | ||
185 | bne,pt %icc, 1b | ||
186 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | ||
187 | |||
188 | membar #StoreLoad | #StoreStore | ||
189 | |||
190 | brz,pn %o2, .Lexit | ||
191 | cmp %o2, 19 | ||
192 | ble,pn %icc, .Lsmall_unaligned | ||
193 | nop | ||
194 | ba,a,pt %icc, .Lmedium_noprefetch | ||
195 | |||
196 | .Lexit: retl | ||
197 | mov EX_RETVAL(%o3), %o0 | ||
198 | |||
199 | .Llarge_src_unaligned: | ||
200 | andn %o2, 0x3f, %o4 | ||
201 | sub %o2, %o4, %o2 | ||
202 | VISEntryHalf | ||
203 | alignaddr %o1, %g0, %g1 | ||
204 | add %o1, %o4, %o1 | ||
205 | EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) | ||
206 | 1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) | ||
207 | subcc %o4, 0x40, %o4 | ||
208 | EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) | ||
209 | EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) | ||
210 | EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) | ||
211 | EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) | ||
212 | EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) | ||
213 | EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) | ||
214 | faligndata %f0, %f2, %f16 | ||
215 | EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) | ||
216 | faligndata %f2, %f4, %f18 | ||
217 | add %g1, 0x40, %g1 | ||
218 | faligndata %f4, %f6, %f20 | ||
219 | faligndata %f6, %f8, %f22 | ||
220 | faligndata %f8, %f10, %f24 | ||
221 | faligndata %f10, %f12, %f26 | ||
222 | faligndata %f12, %f14, %f28 | ||
223 | faligndata %f14, %f0, %f30 | ||
224 | EX_ST(STORE(std, %f16, %o0 + 0x00)) | ||
225 | EX_ST(STORE(std, %f18, %o0 + 0x08)) | ||
226 | EX_ST(STORE(std, %f20, %o0 + 0x10)) | ||
227 | EX_ST(STORE(std, %f22, %o0 + 0x18)) | ||
228 | EX_ST(STORE(std, %f24, %o0 + 0x20)) | ||
229 | EX_ST(STORE(std, %f26, %o0 + 0x28)) | ||
230 | EX_ST(STORE(std, %f28, %o0 + 0x30)) | ||
231 | EX_ST(STORE(std, %f30, %o0 + 0x38)) | ||
232 | add %o0, 0x40, %o0 | ||
233 | bne,pt %icc, 1b | ||
234 | LOAD(prefetch, %g1 + 0x200, #n_reads_strong) | ||
235 | VISExitHalf | ||
236 | |||
237 | brz,pn %o2, .Lexit | ||
238 | cmp %o2, 19 | ||
239 | ble,pn %icc, .Lsmall_unaligned | ||
240 | nop | ||
241 | ba,a,pt %icc, .Lmedium_unaligned | ||
242 | |||
243 | .Lmedium: | ||
244 | LOAD(prefetch, %o1 + 0x40, #n_reads_strong) | ||
245 | andcc %g2, 0x7, %g0 | ||
246 | bne,pn %icc, .Lmedium_unaligned | ||
247 | nop | ||
248 | .Lmedium_noprefetch: | ||
249 | andncc %o2, 0x20 - 1, %o5 | ||
250 | be,pn %icc, 2f | ||
251 | sub %o2, %o5, %o2 | ||
252 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
253 | EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) | ||
254 | EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) | ||
255 | EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) | ||
256 | add %o1, 0x20, %o1 | ||
257 | subcc %o5, 0x20, %o5 | ||
258 | EX_ST(STORE(stx, %g1, %o0 + 0x00)) | ||
259 | EX_ST(STORE(stx, %g2, %o0 + 0x08)) | ||
260 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) | ||
261 | EX_ST(STORE(stx, %o4, %o0 + 0x18)) | ||
262 | bne,pt %icc, 1b | ||
263 | add %o0, 0x20, %o0 | ||
264 | 2: andcc %o2, 0x18, %o5 | ||
265 | be,pt %icc, 3f | ||
266 | sub %o2, %o5, %o2 | ||
267 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
268 | add %o1, 0x08, %o1 | ||
269 | add %o0, 0x08, %o0 | ||
270 | subcc %o5, 0x08, %o5 | ||
271 | bne,pt %icc, 1b | ||
272 | EX_ST(STORE(stx, %g1, %o0 - 0x08)) | ||
273 | 3: brz,pt %o2, .Lexit | ||
274 | cmp %o2, 0x04 | ||
275 | bl,pn %icc, .Ltiny | ||
276 | nop | ||
277 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
278 | add %o1, 0x04, %o1 | ||
279 | add %o0, 0x04, %o0 | ||
280 | subcc %o2, 0x04, %o2 | ||
281 | bne,pn %icc, .Ltiny | ||
282 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
283 | ba,a,pt %icc, .Lexit | ||
284 | .Lmedium_unaligned: | ||
285 | /* First get dest 8 byte aligned. */ | ||
286 | sub %g0, %o0, %g1 | ||
287 | and %g1, 0x7, %g1 | ||
288 | brz,pt %g1, 2f | ||
289 | sub %o2, %g1, %o2 | ||
290 | |||
291 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
292 | add %o1, 1, %o1 | ||
293 | subcc %g1, 1, %g1 | ||
294 | add %o0, 1, %o0 | ||
295 | bne,pt %icc, 1b | ||
296 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
297 | 2: | ||
298 | and %o1, 0x7, %g1 | ||
299 | brz,pn %g1, .Lmedium_noprefetch | ||
300 | sll %g1, 3, %g1 | ||
301 | mov 64, %g2 | ||
302 | sub %g2, %g1, %g2 | ||
303 | andn %o1, 0x7, %o1 | ||
304 | EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) | ||
305 | sllx %o4, %g1, %o4 | ||
306 | andn %o2, 0x08 - 1, %o5 | ||
307 | sub %o2, %o5, %o2 | ||
308 | 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) | ||
309 | add %o1, 0x08, %o1 | ||
310 | subcc %o5, 0x08, %o5 | ||
311 | srlx %g3, %g2, GLOBAL_SPARE | ||
312 | or GLOBAL_SPARE, %o4, GLOBAL_SPARE | ||
313 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) | ||
314 | add %o0, 0x08, %o0 | ||
315 | bne,pt %icc, 1b | ||
316 | sllx %g3, %g1, %o4 | ||
317 | srl %g1, 3, %g1 | ||
318 | add %o1, %g1, %o1 | ||
319 | brz,pn %o2, .Lexit | ||
320 | nop | ||
321 | ba,pt %icc, .Lsmall_unaligned | ||
322 | |||
323 | .Ltiny: | ||
324 | EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
325 | subcc %o2, 1, %o2 | ||
326 | be,pn %icc, .Lexit | ||
327 | EX_ST(STORE(stb, %g1, %o0 + 0x00)) | ||
328 | EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) | ||
329 | subcc %o2, 1, %o2 | ||
330 | be,pn %icc, .Lexit | ||
331 | EX_ST(STORE(stb, %g1, %o0 + 0x01)) | ||
332 | EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) | ||
333 | ba,pt %icc, .Lexit | ||
334 | EX_ST(STORE(stb, %g1, %o0 + 0x02)) | ||
335 | |||
336 | .Lsmall: | ||
337 | andcc %g2, 0x3, %g0 | ||
338 | bne,pn %icc, .Lsmall_unaligned | ||
339 | andn %o2, 0x4 - 1, %o5 | ||
340 | sub %o2, %o5, %o2 | ||
341 | 1: | ||
342 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
343 | add %o1, 0x04, %o1 | ||
344 | subcc %o5, 0x04, %o5 | ||
345 | add %o0, 0x04, %o0 | ||
346 | bne,pt %icc, 1b | ||
347 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
348 | brz,pt %o2, .Lexit | ||
349 | nop | ||
350 | ba,a,pt %icc, .Ltiny | ||
351 | |||
352 | .Lsmall_unaligned: | ||
353 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
354 | add %o1, 1, %o1 | ||
355 | add %o0, 1, %o0 | ||
356 | subcc %o2, 1, %o2 | ||
357 | bne,pt %icc, 1b | ||
358 | EX_ST(STORE(stb, %g1, %o0 - 0x01)) | ||
359 | ba,a,pt %icc, .Lexit | ||
360 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S new file mode 100644 index 000000000000..41da4bdd95cb --- /dev/null +++ b/arch/sparc/lib/NG4memset.S | |||
@@ -0,0 +1,105 @@ | |||
1 | /* NG4memset.S: Niagara-4 optimized memset/bzero. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | |||
8 | .register %g2, #scratch | ||
9 | .register %g3, #scratch | ||
10 | |||
11 | .text | ||
12 | .align 32 | ||
13 | .globl NG4memset | ||
14 | NG4memset: | ||
15 | andcc %o1, 0xff, %o4 | ||
16 | be,pt %icc, 1f | ||
17 | mov %o2, %o1 | ||
18 | sllx %o4, 8, %g1 | ||
19 | or %g1, %o4, %o2 | ||
20 | sllx %o2, 16, %g1 | ||
21 | or %g1, %o2, %o2 | ||
22 | sllx %o2, 32, %g1 | ||
23 | ba,pt %icc, 1f | ||
24 | or %g1, %o2, %o4 | ||
25 | .size NG4memset,.-NG4memset | ||
26 | |||
27 | .align 32 | ||
28 | .globl NG4bzero | ||
29 | NG4bzero: | ||
30 | clr %o4 | ||
31 | 1: cmp %o1, 16 | ||
32 | ble %icc, .Ltiny | ||
33 | mov %o0, %o3 | ||
34 | sub %g0, %o0, %g1 | ||
35 | and %g1, 0x7, %g1 | ||
36 | brz,pt %g1, .Laligned8 | ||
37 | sub %o1, %g1, %o1 | ||
38 | 1: stb %o4, [%o0 + 0x00] | ||
39 | subcc %g1, 1, %g1 | ||
40 | bne,pt %icc, 1b | ||
41 | add %o0, 1, %o0 | ||
42 | .Laligned8: | ||
43 | cmp %o1, 64 + (64 - 8) | ||
44 | ble .Lmedium | ||
45 | sub %g0, %o0, %g1 | ||
46 | andcc %g1, (64 - 1), %g1 | ||
47 | brz,pn %g1, .Laligned64 | ||
48 | sub %o1, %g1, %o1 | ||
49 | 1: stx %o4, [%o0 + 0x00] | ||
50 | subcc %g1, 8, %g1 | ||
51 | bne,pt %icc, 1b | ||
52 | add %o0, 0x8, %o0 | ||
53 | .Laligned64: | ||
54 | andn %o1, 64 - 1, %g1 | ||
55 | sub %o1, %g1, %o1 | ||
56 | brnz,pn %o4, .Lnon_bzero_loop | ||
57 | mov 0x20, %g2 | ||
58 | 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
59 | subcc %g1, 0x40, %g1 | ||
60 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
61 | bne,pt %icc, 1b | ||
62 | add %o0, 0x40, %o0 | ||
63 | .Lpostloop: | ||
64 | cmp %o1, 8 | ||
65 | bl,pn %icc, .Ltiny | ||
66 | membar #StoreStore|#StoreLoad | ||
67 | .Lmedium: | ||
68 | andn %o1, 0x7, %g1 | ||
69 | sub %o1, %g1, %o1 | ||
70 | 1: stx %o4, [%o0 + 0x00] | ||
71 | subcc %g1, 0x8, %g1 | ||
72 | bne,pt %icc, 1b | ||
73 | add %o0, 0x08, %o0 | ||
74 | andcc %o1, 0x4, %g1 | ||
75 | be,pt %icc, .Ltiny | ||
76 | sub %o1, %g1, %o1 | ||
77 | stw %o4, [%o0 + 0x00] | ||
78 | add %o0, 0x4, %o0 | ||
79 | .Ltiny: | ||
80 | cmp %o1, 0 | ||
81 | be,pn %icc, .Lexit | ||
82 | 1: subcc %o1, 1, %o1 | ||
83 | stb %o4, [%o0 + 0x00] | ||
84 | bne,pt %icc, 1b | ||
85 | add %o0, 1, %o0 | ||
86 | .Lexit: | ||
87 | retl | ||
88 | mov %o3, %o0 | ||
89 | .Lnon_bzero_loop: | ||
90 | mov 0x08, %g3 | ||
91 | mov 0x28, %o5 | ||
92 | 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
93 | subcc %g1, 0x40, %g1 | ||
94 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
95 | stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
96 | stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P | ||
97 | add %o0, 0x10, %o0 | ||
98 | stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
99 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
100 | stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
101 | stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P | ||
102 | bne,pt %icc, 1b | ||
103 | add %o0, 0x30, %o0 | ||
104 | ba,a,pt %icc, .Lpostloop | ||
105 | .size NG4bzero,.-NG4bzero | ||
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S new file mode 100644 index 000000000000..a114cbcf2a48 --- /dev/null +++ b/arch/sparc/lib/NG4patch.S | |||
@@ -0,0 +1,54 @@ | |||
1 | /* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define BRANCH_ALWAYS 0x10680000 | ||
7 | #define NOP 0x01000000 | ||
8 | #define NG_DO_PATCH(OLD, NEW) \ | ||
9 | sethi %hi(NEW), %g1; \ | ||
10 | or %g1, %lo(NEW), %g1; \ | ||
11 | sethi %hi(OLD), %g2; \ | ||
12 | or %g2, %lo(OLD), %g2; \ | ||
13 | sub %g1, %g2, %g1; \ | ||
14 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
15 | sll %g1, 11, %g1; \ | ||
16 | srl %g1, 11 + 2, %g1; \ | ||
17 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
18 | or %g3, %g1, %g3; \ | ||
19 | stw %g3, [%g2]; \ | ||
20 | sethi %hi(NOP), %g3; \ | ||
21 | or %g3, %lo(NOP), %g3; \ | ||
22 | stw %g3, [%g2 + 0x4]; \ | ||
23 | flush %g2; | ||
24 | |||
25 | .globl niagara4_patch_copyops | ||
26 | .type niagara4_patch_copyops,#function | ||
27 | niagara4_patch_copyops: | ||
28 | NG_DO_PATCH(memcpy, NG4memcpy) | ||
29 | NG_DO_PATCH(___copy_from_user, NG4copy_from_user) | ||
30 | NG_DO_PATCH(___copy_to_user, NG4copy_to_user) | ||
31 | retl | ||
32 | nop | ||
33 | .size niagara4_patch_copyops,.-niagara4_patch_copyops | ||
34 | |||
35 | .globl niagara4_patch_bzero | ||
36 | .type niagara4_patch_bzero,#function | ||
37 | niagara4_patch_bzero: | ||
38 | NG_DO_PATCH(memset, NG4memset) | ||
39 | NG_DO_PATCH(__bzero, NG4bzero) | ||
40 | NG_DO_PATCH(__clear_user, NGclear_user) | ||
41 | NG_DO_PATCH(tsb_init, NGtsb_init) | ||
42 | retl | ||
43 | nop | ||
44 | .size niagara4_patch_bzero,.-niagara4_patch_bzero | ||
45 | |||
46 | .globl niagara4_patch_pageops | ||
47 | .type niagara4_patch_pageops,#function | ||
48 | niagara4_patch_pageops: | ||
49 | NG_DO_PATCH(copy_user_page, NG4copy_user_page) | ||
50 | NG_DO_PATCH(_clear_page, NG4clear_page) | ||
51 | NG_DO_PATCH(clear_user_page, NG4clear_user_page) | ||
52 | retl | ||
53 | nop | ||
54 | .size niagara4_patch_pageops,.-niagara4_patch_pageops | ||
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index b9e790b9c6b8..423d46e2258b 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S | |||
@@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | |||
59 | restore | 59 | restore |
60 | 60 | ||
61 | .align 32 | 61 | .align 32 |
62 | .globl NGclear_page | ||
63 | .globl NGclear_user_page | ||
62 | NGclear_page: /* %o0=dest */ | 64 | NGclear_page: /* %o0=dest */ |
63 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ | 65 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ |
64 | rd %asi, %g3 | 66 | rd %asi, %g3 |
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 3b31218cafc6..ee31b884c61b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c | |||
@@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page); | |||
134 | void VISenter(void); | 134 | void VISenter(void); |
135 | EXPORT_SYMBOL(VISenter); | 135 | EXPORT_SYMBOL(VISenter); |
136 | 136 | ||
137 | /* CRYPTO code needs this */ | ||
138 | void VISenterhalf(void); | ||
139 | EXPORT_SYMBOL(VISenterhalf); | ||
140 | |||
137 | extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); | 141 | extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); |
138 | extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, | 142 | extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, |
139 | unsigned long *); | 143 | unsigned long *); |
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 77ac917be152..e98bfda205a2 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c | |||
@@ -265,6 +265,7 @@ good_area: | |||
265 | } | 265 | } |
266 | if (fault & VM_FAULT_RETRY) { | 266 | if (fault & VM_FAULT_RETRY) { |
267 | flags &= ~FAULT_FLAG_ALLOW_RETRY; | 267 | flags &= ~FAULT_FLAG_ALLOW_RETRY; |
268 | flags |= FAULT_FLAG_TRIED; | ||
268 | 269 | ||
269 | /* No need to up_read(&mm->mmap_sem) as we would | 270 | /* No need to up_read(&mm->mmap_sem) as we would |
270 | * have already released it in __lock_page_or_retry | 271 | * have already released it in __lock_page_or_retry |
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 1fe0429b6314..2976dba1ebaf 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c | |||
@@ -452,6 +452,7 @@ good_area: | |||
452 | } | 452 | } |
453 | if (fault & VM_FAULT_RETRY) { | 453 | if (fault & VM_FAULT_RETRY) { |
454 | flags &= ~FAULT_FLAG_ALLOW_RETRY; | 454 | flags &= ~FAULT_FLAG_ALLOW_RETRY; |
455 | flags |= FAULT_FLAG_TRIED; | ||
455 | 456 | ||
456 | /* No need to up_read(&mm->mmap_sem) as we would | 457 | /* No need to up_read(&mm->mmap_sem) as we would |
457 | * have already released it in __lock_page_or_retry | 458 | * have already released it in __lock_page_or_retry |
@@ -464,13 +465,13 @@ good_area: | |||
464 | up_read(&mm->mmap_sem); | 465 | up_read(&mm->mmap_sem); |
465 | 466 | ||
466 | mm_rss = get_mm_rss(mm); | 467 | mm_rss = get_mm_rss(mm); |
467 | #ifdef CONFIG_HUGETLB_PAGE | 468 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
468 | mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); | 469 | mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); |
469 | #endif | 470 | #endif |
470 | if (unlikely(mm_rss > | 471 | if (unlikely(mm_rss > |
471 | mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) | 472 | mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) |
472 | tsb_grow(mm, MM_TSB_BASE, mm_rss); | 473 | tsb_grow(mm, MM_TSB_BASE, mm_rss); |
473 | #ifdef CONFIG_HUGETLB_PAGE | 474 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
474 | mm_rss = mm->context.huge_pte_count; | 475 | mm_rss = mm->context.huge_pte_count; |
475 | if (unlikely(mm_rss > | 476 | if (unlikely(mm_rss > |
476 | mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) | 477 | mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) |
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 07e14535375c..f76f83d5ac63 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c | |||
@@ -303,53 +303,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
303 | { | 303 | { |
304 | return NULL; | 304 | return NULL; |
305 | } | 305 | } |
306 | |||
307 | static void context_reload(void *__data) | ||
308 | { | ||
309 | struct mm_struct *mm = __data; | ||
310 | |||
311 | if (mm == current->mm) | ||
312 | load_secondary_context(mm); | ||
313 | } | ||
314 | |||
315 | void hugetlb_prefault_arch_hook(struct mm_struct *mm) | ||
316 | { | ||
317 | struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE]; | ||
318 | |||
319 | if (likely(tp->tsb != NULL)) | ||
320 | return; | ||
321 | |||
322 | tsb_grow(mm, MM_TSB_HUGE, 0); | ||
323 | tsb_context_switch(mm); | ||
324 | smp_tsb_sync(mm); | ||
325 | |||
326 | /* On UltraSPARC-III+ and later, configure the second half of | ||
327 | * the Data-TLB for huge pages. | ||
328 | */ | ||
329 | if (tlb_type == cheetah_plus) { | ||
330 | unsigned long ctx; | ||
331 | |||
332 | spin_lock(&ctx_alloc_lock); | ||
333 | ctx = mm->context.sparc64_ctx_val; | ||
334 | ctx &= ~CTX_PGSZ_MASK; | ||
335 | ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; | ||
336 | ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; | ||
337 | |||
338 | if (ctx != mm->context.sparc64_ctx_val) { | ||
339 | /* When changing the page size fields, we | ||
340 | * must perform a context flush so that no | ||
341 | * stale entries match. This flush must | ||
342 | * occur with the original context register | ||
343 | * settings. | ||
344 | */ | ||
345 | do_flush_tlb_mm(mm); | ||
346 | |||
347 | /* Reload the context register of all processors | ||
348 | * also executing in this address space. | ||
349 | */ | ||
350 | mm->context.sparc64_ctx_val = ctx; | ||
351 | on_each_cpu(context_reload, mm, 0); | ||
352 | } | ||
353 | spin_unlock(&ctx_alloc_lock); | ||
354 | } | ||
355 | } | ||
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 6026fdd1b2ed..9e28a118e6a4 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -51,22 +51,40 @@ | |||
51 | 51 | ||
52 | #include "init_64.h" | 52 | #include "init_64.h" |
53 | 53 | ||
54 | unsigned long kern_linear_pte_xor[2] __read_mostly; | 54 | unsigned long kern_linear_pte_xor[4] __read_mostly; |
55 | 55 | ||
56 | /* A bitmap, one bit for every 256MB of physical memory. If the bit | 56 | /* A bitmap, two bits for every 256MB of physical memory. These two |
57 | * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else | 57 | * bits determine what page size we use for kernel linear |
58 | * if set we should use a 256MB page (via kern_linear_pte_xor[1]). | 58 | * translations. They form an index into kern_linear_pte_xor[]. The |
59 | * value in the indexed slot is XOR'd with the TLB miss virtual | ||
60 | * address to form the resulting TTE. The mapping is: | ||
61 | * | ||
62 | * 0 ==> 4MB | ||
63 | * 1 ==> 256MB | ||
64 | * 2 ==> 2GB | ||
65 | * 3 ==> 16GB | ||
66 | * | ||
67 | * All sun4v chips support 256MB pages. Only SPARC-T4 and later | ||
68 | * support 2GB pages, and hopefully future cpus will support the 16GB | ||
69 | * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there | ||
70 | * if these larger page sizes are not supported by the cpu. | ||
71 | * | ||
72 | * It would be nice to determine this from the machine description | ||
73 | * 'cpu' properties, but we need to have this table setup before the | ||
74 | * MDESC is initialized. | ||
59 | */ | 75 | */ |
60 | unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; | 76 | unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; |
61 | 77 | ||
62 | #ifndef CONFIG_DEBUG_PAGEALLOC | 78 | #ifndef CONFIG_DEBUG_PAGEALLOC |
63 | /* A special kernel TSB for 4MB and 256MB linear mappings. | 79 | /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. |
64 | * Space is allocated for this right after the trap table | 80 | * Space is allocated for this right after the trap table in |
65 | * in arch/sparc64/kernel/head.S | 81 | * arch/sparc64/kernel/head.S |
66 | */ | 82 | */ |
67 | extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; | 83 | extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; |
68 | #endif | 84 | #endif |
69 | 85 | ||
86 | static unsigned long cpu_pgsz_mask; | ||
87 | |||
70 | #define MAX_BANKS 32 | 88 | #define MAX_BANKS 32 |
71 | 89 | ||
72 | static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata; | 90 | static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata; |
@@ -101,7 +119,8 @@ static void __init read_obp_memory(const char *property, | |||
101 | 119 | ||
102 | ret = prom_getproperty(node, property, (char *) regs, prop_size); | 120 | ret = prom_getproperty(node, property, (char *) regs, prop_size); |
103 | if (ret == -1) { | 121 | if (ret == -1) { |
104 | prom_printf("Couldn't get %s property from /memory.\n"); | 122 | prom_printf("Couldn't get %s property from /memory.\n", |
123 | property); | ||
105 | prom_halt(); | 124 | prom_halt(); |
106 | } | 125 | } |
107 | 126 | ||
@@ -257,7 +276,6 @@ static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long | |||
257 | } | 276 | } |
258 | 277 | ||
259 | unsigned long _PAGE_ALL_SZ_BITS __read_mostly; | 278 | unsigned long _PAGE_ALL_SZ_BITS __read_mostly; |
260 | unsigned long _PAGE_SZBITS __read_mostly; | ||
261 | 279 | ||
262 | static void flush_dcache(unsigned long pfn) | 280 | static void flush_dcache(unsigned long pfn) |
263 | { | 281 | { |
@@ -288,12 +306,24 @@ static void flush_dcache(unsigned long pfn) | |||
288 | } | 306 | } |
289 | } | 307 | } |
290 | 308 | ||
309 | /* mm->context.lock must be held */ | ||
310 | static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index, | ||
311 | unsigned long tsb_hash_shift, unsigned long address, | ||
312 | unsigned long tte) | ||
313 | { | ||
314 | struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb; | ||
315 | unsigned long tag; | ||
316 | |||
317 | tsb += ((address >> tsb_hash_shift) & | ||
318 | (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL)); | ||
319 | tag = (address >> 22UL); | ||
320 | tsb_insert(tsb, tag, tte); | ||
321 | } | ||
322 | |||
291 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) | 323 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) |
292 | { | 324 | { |
325 | unsigned long tsb_index, tsb_hash_shift, flags; | ||
293 | struct mm_struct *mm; | 326 | struct mm_struct *mm; |
294 | struct tsb *tsb; | ||
295 | unsigned long tag, flags; | ||
296 | unsigned long tsb_index, tsb_hash_shift; | ||
297 | pte_t pte = *ptep; | 327 | pte_t pte = *ptep; |
298 | 328 | ||
299 | if (tlb_type != hypervisor) { | 329 | if (tlb_type != hypervisor) { |
@@ -310,7 +340,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * | |||
310 | 340 | ||
311 | spin_lock_irqsave(&mm->context.lock, flags); | 341 | spin_lock_irqsave(&mm->context.lock, flags); |
312 | 342 | ||
313 | #ifdef CONFIG_HUGETLB_PAGE | 343 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
314 | if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) { | 344 | if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) { |
315 | if ((tlb_type == hypervisor && | 345 | if ((tlb_type == hypervisor && |
316 | (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || | 346 | (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || |
@@ -322,11 +352,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * | |||
322 | } | 352 | } |
323 | #endif | 353 | #endif |
324 | 354 | ||
325 | tsb = mm->context.tsb_block[tsb_index].tsb; | 355 | __update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift, |
326 | tsb += ((address >> tsb_hash_shift) & | 356 | address, pte_val(pte)); |
327 | (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL)); | ||
328 | tag = (address >> 22UL); | ||
329 | tsb_insert(tsb, tag, pte_val(pte)); | ||
330 | 357 | ||
331 | spin_unlock_irqrestore(&mm->context.lock, flags); | 358 | spin_unlock_irqrestore(&mm->context.lock, flags); |
332 | } | 359 | } |
@@ -403,6 +430,12 @@ EXPORT_SYMBOL(flush_icache_range); | |||
403 | 430 | ||
404 | void mmu_info(struct seq_file *m) | 431 | void mmu_info(struct seq_file *m) |
405 | { | 432 | { |
433 | static const char *pgsz_strings[] = { | ||
434 | "8K", "64K", "512K", "4MB", "32MB", | ||
435 | "256MB", "2GB", "16GB", | ||
436 | }; | ||
437 | int i, printed; | ||
438 | |||
406 | if (tlb_type == cheetah) | 439 | if (tlb_type == cheetah) |
407 | seq_printf(m, "MMU Type\t: Cheetah\n"); | 440 | seq_printf(m, "MMU Type\t: Cheetah\n"); |
408 | else if (tlb_type == cheetah_plus) | 441 | else if (tlb_type == cheetah_plus) |
@@ -414,6 +447,17 @@ void mmu_info(struct seq_file *m) | |||
414 | else | 447 | else |
415 | seq_printf(m, "MMU Type\t: ???\n"); | 448 | seq_printf(m, "MMU Type\t: ???\n"); |
416 | 449 | ||
450 | seq_printf(m, "MMU PGSZs\t: "); | ||
451 | printed = 0; | ||
452 | for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) { | ||
453 | if (cpu_pgsz_mask & (1UL << i)) { | ||
454 | seq_printf(m, "%s%s", | ||
455 | printed ? "," : "", pgsz_strings[i]); | ||
456 | printed++; | ||
457 | } | ||
458 | } | ||
459 | seq_putc(m, '\n'); | ||
460 | |||
417 | #ifdef CONFIG_DEBUG_DCFLUSH | 461 | #ifdef CONFIG_DEBUG_DCFLUSH |
418 | seq_printf(m, "DCPageFlushes\t: %d\n", | 462 | seq_printf(m, "DCPageFlushes\t: %d\n", |
419 | atomic_read(&dcpage_flushes)); | 463 | atomic_read(&dcpage_flushes)); |
@@ -462,7 +506,7 @@ static void __init read_obp_translations(void) | |||
462 | prom_halt(); | 506 | prom_halt(); |
463 | } | 507 | } |
464 | if (unlikely(n > sizeof(prom_trans))) { | 508 | if (unlikely(n > sizeof(prom_trans))) { |
465 | prom_printf("prom_mappings: Size %Zd is too big.\n", n); | 509 | prom_printf("prom_mappings: Size %d is too big.\n", n); |
466 | prom_halt(); | 510 | prom_halt(); |
467 | } | 511 | } |
468 | 512 | ||
@@ -524,7 +568,7 @@ static void __init hypervisor_tlb_lock(unsigned long vaddr, | |||
524 | unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu); | 568 | unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu); |
525 | 569 | ||
526 | if (ret != 0) { | 570 | if (ret != 0) { |
527 | prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: " | 571 | prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: " |
528 | "errors with %lx\n", vaddr, 0, pte, mmu, ret); | 572 | "errors with %lx\n", vaddr, 0, pte, mmu, ret); |
529 | prom_halt(); | 573 | prom_halt(); |
530 | } | 574 | } |
@@ -1358,32 +1402,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, | |||
1358 | extern unsigned int kvmap_linear_patch[1]; | 1402 | extern unsigned int kvmap_linear_patch[1]; |
1359 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | 1403 | #endif /* CONFIG_DEBUG_PAGEALLOC */ |
1360 | 1404 | ||
1361 | static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) | 1405 | static void __init kpte_set_val(unsigned long index, unsigned long val) |
1362 | { | 1406 | { |
1363 | const unsigned long shift_256MB = 28; | 1407 | unsigned long *ptr = kpte_linear_bitmap; |
1364 | const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL); | ||
1365 | const unsigned long size_256MB = (1UL << shift_256MB); | ||
1366 | 1408 | ||
1367 | while (start < end) { | 1409 | val <<= ((index % (BITS_PER_LONG / 2)) * 2); |
1368 | long remains; | 1410 | ptr += (index / (BITS_PER_LONG / 2)); |
1369 | 1411 | ||
1370 | remains = end - start; | 1412 | *ptr |= val; |
1371 | if (remains < size_256MB) | 1413 | } |
1372 | break; | ||
1373 | 1414 | ||
1374 | if (start & mask_256MB) { | 1415 | static const unsigned long kpte_shift_min = 28; /* 256MB */ |
1375 | start = (start + size_256MB) & ~mask_256MB; | 1416 | static const unsigned long kpte_shift_max = 34; /* 16GB */ |
1376 | continue; | 1417 | static const unsigned long kpte_shift_incr = 3; |
1377 | } | 1418 | |
1419 | static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, | ||
1420 | unsigned long shift) | ||
1421 | { | ||
1422 | unsigned long size = (1UL << shift); | ||
1423 | unsigned long mask = (size - 1UL); | ||
1424 | unsigned long remains = end - start; | ||
1425 | unsigned long val; | ||
1378 | 1426 | ||
1379 | while (remains >= size_256MB) { | 1427 | if (remains < size || (start & mask)) |
1380 | unsigned long index = start >> shift_256MB; | 1428 | return start; |
1381 | 1429 | ||
1382 | __set_bit(index, kpte_linear_bitmap); | 1430 | /* VAL maps: |
1431 | * | ||
1432 | * shift 28 --> kern_linear_pte_xor index 1 | ||
1433 | * shift 31 --> kern_linear_pte_xor index 2 | ||
1434 | * shift 34 --> kern_linear_pte_xor index 3 | ||
1435 | */ | ||
1436 | val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; | ||
1437 | |||
1438 | remains &= ~mask; | ||
1439 | if (shift != kpte_shift_max) | ||
1440 | remains = size; | ||
1441 | |||
1442 | while (remains) { | ||
1443 | unsigned long index = start >> kpte_shift_min; | ||
1444 | |||
1445 | kpte_set_val(index, val); | ||
1446 | |||
1447 | start += 1UL << kpte_shift_min; | ||
1448 | remains -= 1UL << kpte_shift_min; | ||
1449 | } | ||
1450 | |||
1451 | return start; | ||
1452 | } | ||
1453 | |||
1454 | static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) | ||
1455 | { | ||
1456 | unsigned long smallest_size, smallest_mask; | ||
1457 | unsigned long s; | ||
1458 | |||
1459 | smallest_size = (1UL << kpte_shift_min); | ||
1460 | smallest_mask = (smallest_size - 1UL); | ||
1383 | 1461 | ||
1384 | start += size_256MB; | 1462 | while (start < end) { |
1385 | remains -= size_256MB; | 1463 | unsigned long orig_start = start; |
1464 | |||
1465 | for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { | ||
1466 | start = kpte_mark_using_shift(start, end, s); | ||
1467 | |||
1468 | if (start != orig_start) | ||
1469 | break; | ||
1386 | } | 1470 | } |
1471 | |||
1472 | if (start == orig_start) | ||
1473 | start = (start + smallest_size) & ~smallest_mask; | ||
1387 | } | 1474 | } |
1388 | } | 1475 | } |
1389 | 1476 | ||
@@ -1577,13 +1664,16 @@ static void __init sun4v_ktsb_init(void) | |||
1577 | ktsb_descr[0].resv = 0; | 1664 | ktsb_descr[0].resv = 0; |
1578 | 1665 | ||
1579 | #ifndef CONFIG_DEBUG_PAGEALLOC | 1666 | #ifndef CONFIG_DEBUG_PAGEALLOC |
1580 | /* Second KTSB for 4MB/256MB mappings. */ | 1667 | /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */ |
1581 | ktsb_pa = (kern_base + | 1668 | ktsb_pa = (kern_base + |
1582 | ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); | 1669 | ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); |
1583 | 1670 | ||
1584 | ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; | 1671 | ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; |
1585 | ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | | 1672 | ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB | |
1586 | HV_PGSZ_MASK_256MB); | 1673 | HV_PGSZ_MASK_256MB | |
1674 | HV_PGSZ_MASK_2GB | | ||
1675 | HV_PGSZ_MASK_16GB) & | ||
1676 | cpu_pgsz_mask); | ||
1587 | ktsb_descr[1].assoc = 1; | 1677 | ktsb_descr[1].assoc = 1; |
1588 | ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; | 1678 | ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; |
1589 | ktsb_descr[1].ctx_idx = 0; | 1679 | ktsb_descr[1].ctx_idx = 0; |
@@ -1606,6 +1696,47 @@ void __cpuinit sun4v_ktsb_register(void) | |||
1606 | } | 1696 | } |
1607 | } | 1697 | } |
1608 | 1698 | ||
1699 | static void __init sun4u_linear_pte_xor_finalize(void) | ||
1700 | { | ||
1701 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
1702 | /* This is where we would add Panther support for | ||
1703 | * 32MB and 256MB pages. | ||
1704 | */ | ||
1705 | #endif | ||
1706 | } | ||
1707 | |||
1708 | static void __init sun4v_linear_pte_xor_finalize(void) | ||
1709 | { | ||
1710 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
1711 | if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { | ||
1712 | kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ | ||
1713 | 0xfffff80000000000UL; | ||
1714 | kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
1715 | _PAGE_P_4V | _PAGE_W_4V); | ||
1716 | } else { | ||
1717 | kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; | ||
1718 | } | ||
1719 | |||
1720 | if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { | ||
1721 | kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ | ||
1722 | 0xfffff80000000000UL; | ||
1723 | kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
1724 | _PAGE_P_4V | _PAGE_W_4V); | ||
1725 | } else { | ||
1726 | kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; | ||
1727 | } | ||
1728 | |||
1729 | if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { | ||
1730 | kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ | ||
1731 | 0xfffff80000000000UL; | ||
1732 | kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
1733 | _PAGE_P_4V | _PAGE_W_4V); | ||
1734 | } else { | ||
1735 | kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; | ||
1736 | } | ||
1737 | #endif | ||
1738 | } | ||
1739 | |||
1609 | /* paging_init() sets up the page tables */ | 1740 | /* paging_init() sets up the page tables */ |
1610 | 1741 | ||
1611 | static unsigned long last_valid_pfn; | 1742 | static unsigned long last_valid_pfn; |
@@ -1665,10 +1796,8 @@ void __init paging_init(void) | |||
1665 | ktsb_phys_patch(); | 1796 | ktsb_phys_patch(); |
1666 | } | 1797 | } |
1667 | 1798 | ||
1668 | if (tlb_type == hypervisor) { | 1799 | if (tlb_type == hypervisor) |
1669 | sun4v_patch_tlb_handlers(); | 1800 | sun4v_patch_tlb_handlers(); |
1670 | sun4v_ktsb_init(); | ||
1671 | } | ||
1672 | 1801 | ||
1673 | /* Find available physical memory... | 1802 | /* Find available physical memory... |
1674 | * | 1803 | * |
@@ -1727,9 +1856,6 @@ void __init paging_init(void) | |||
1727 | 1856 | ||
1728 | __flush_tlb_all(); | 1857 | __flush_tlb_all(); |
1729 | 1858 | ||
1730 | if (tlb_type == hypervisor) | ||
1731 | sun4v_ktsb_register(); | ||
1732 | |||
1733 | prom_build_devicetree(); | 1859 | prom_build_devicetree(); |
1734 | of_populate_present_mask(); | 1860 | of_populate_present_mask(); |
1735 | #ifndef CONFIG_SMP | 1861 | #ifndef CONFIG_SMP |
@@ -1742,8 +1868,36 @@ void __init paging_init(void) | |||
1742 | #ifndef CONFIG_SMP | 1868 | #ifndef CONFIG_SMP |
1743 | mdesc_fill_in_cpu_data(cpu_all_mask); | 1869 | mdesc_fill_in_cpu_data(cpu_all_mask); |
1744 | #endif | 1870 | #endif |
1871 | mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask); | ||
1872 | |||
1873 | sun4v_linear_pte_xor_finalize(); | ||
1874 | |||
1875 | sun4v_ktsb_init(); | ||
1876 | sun4v_ktsb_register(); | ||
1877 | } else { | ||
1878 | unsigned long impl, ver; | ||
1879 | |||
1880 | cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | | ||
1881 | HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); | ||
1882 | |||
1883 | __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); | ||
1884 | impl = ((ver >> 32) & 0xffff); | ||
1885 | if (impl == PANTHER_IMPL) | ||
1886 | cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB | | ||
1887 | HV_PGSZ_MASK_256MB); | ||
1888 | |||
1889 | sun4u_linear_pte_xor_finalize(); | ||
1745 | } | 1890 | } |
1746 | 1891 | ||
1892 | /* Flush the TLBs and the 4M TSB so that the updated linear | ||
1893 | * pte XOR settings are realized for all mappings. | ||
1894 | */ | ||
1895 | __flush_tlb_all(); | ||
1896 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
1897 | memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); | ||
1898 | #endif | ||
1899 | __flush_tlb_all(); | ||
1900 | |||
1747 | /* Setup bootmem... */ | 1901 | /* Setup bootmem... */ |
1748 | last_valid_pfn = end_pfn = bootmem_init(phys_base); | 1902 | last_valid_pfn = end_pfn = bootmem_init(phys_base); |
1749 | 1903 | ||
@@ -2020,6 +2174,9 @@ EXPORT_SYMBOL(_PAGE_CACHE); | |||
2020 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 2174 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
2021 | unsigned long vmemmap_table[VMEMMAP_SIZE]; | 2175 | unsigned long vmemmap_table[VMEMMAP_SIZE]; |
2022 | 2176 | ||
2177 | static long __meminitdata addr_start, addr_end; | ||
2178 | static int __meminitdata node_start; | ||
2179 | |||
2023 | int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | 2180 | int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) |
2024 | { | 2181 | { |
2025 | unsigned long vstart = (unsigned long) start; | 2182 | unsigned long vstart = (unsigned long) start; |
@@ -2050,15 +2207,30 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) | |||
2050 | 2207 | ||
2051 | *vmem_pp = pte_base | __pa(block); | 2208 | *vmem_pp = pte_base | __pa(block); |
2052 | 2209 | ||
2053 | printk(KERN_INFO "[%p-%p] page_structs=%lu " | 2210 | /* check to see if we have contiguous blocks */ |
2054 | "node=%d entry=%lu/%lu\n", start, block, nr, | 2211 | if (addr_end != addr || node_start != node) { |
2055 | node, | 2212 | if (addr_start) |
2056 | addr >> VMEMMAP_CHUNK_SHIFT, | 2213 | printk(KERN_DEBUG " [%lx-%lx] on node %d\n", |
2057 | VMEMMAP_SIZE); | 2214 | addr_start, addr_end-1, node_start); |
2215 | addr_start = addr; | ||
2216 | node_start = node; | ||
2217 | } | ||
2218 | addr_end = addr + VMEMMAP_CHUNK; | ||
2058 | } | 2219 | } |
2059 | } | 2220 | } |
2060 | return 0; | 2221 | return 0; |
2061 | } | 2222 | } |
2223 | |||
2224 | void __meminit vmemmap_populate_print_last(void) | ||
2225 | { | ||
2226 | if (addr_start) { | ||
2227 | printk(KERN_DEBUG " [%lx-%lx] on node %d\n", | ||
2228 | addr_start, addr_end-1, node_start); | ||
2229 | addr_start = 0; | ||
2230 | addr_end = 0; | ||
2231 | node_start = 0; | ||
2232 | } | ||
2233 | } | ||
2062 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 2234 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
2063 | 2235 | ||
2064 | static void prot_init_common(unsigned long page_none, | 2236 | static void prot_init_common(unsigned long page_none, |
@@ -2092,6 +2264,7 @@ static void __init sun4u_pgprot_init(void) | |||
2092 | { | 2264 | { |
2093 | unsigned long page_none, page_shared, page_copy, page_readonly; | 2265 | unsigned long page_none, page_shared, page_copy, page_readonly; |
2094 | unsigned long page_exec_bit; | 2266 | unsigned long page_exec_bit; |
2267 | int i; | ||
2095 | 2268 | ||
2096 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | | 2269 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | |
2097 | _PAGE_CACHE_4U | _PAGE_P_4U | | 2270 | _PAGE_CACHE_4U | _PAGE_P_4U | |
@@ -2110,8 +2283,7 @@ static void __init sun4u_pgprot_init(void) | |||
2110 | __ACCESS_BITS_4U | _PAGE_E_4U); | 2283 | __ACCESS_BITS_4U | _PAGE_E_4U); |
2111 | 2284 | ||
2112 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2285 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2113 | kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4U) ^ | 2286 | kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; |
2114 | 0xfffff80000000000UL; | ||
2115 | #else | 2287 | #else |
2116 | kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ | 2288 | kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ |
2117 | 0xfffff80000000000UL; | 2289 | 0xfffff80000000000UL; |
@@ -2119,10 +2291,9 @@ static void __init sun4u_pgprot_init(void) | |||
2119 | kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | | 2291 | kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | |
2120 | _PAGE_P_4U | _PAGE_W_4U); | 2292 | _PAGE_P_4U | _PAGE_W_4U); |
2121 | 2293 | ||
2122 | /* XXX Should use 256MB on Panther. XXX */ | 2294 | for (i = 1; i < 4; i++) |
2123 | kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; | 2295 | kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; |
2124 | 2296 | ||
2125 | _PAGE_SZBITS = _PAGE_SZBITS_4U; | ||
2126 | _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | | 2297 | _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | |
2127 | _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | | 2298 | _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | |
2128 | _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); | 2299 | _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); |
@@ -2146,6 +2317,7 @@ static void __init sun4v_pgprot_init(void) | |||
2146 | { | 2317 | { |
2147 | unsigned long page_none, page_shared, page_copy, page_readonly; | 2318 | unsigned long page_none, page_shared, page_copy, page_readonly; |
2148 | unsigned long page_exec_bit; | 2319 | unsigned long page_exec_bit; |
2320 | int i; | ||
2149 | 2321 | ||
2150 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | | 2322 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | |
2151 | _PAGE_CACHE_4V | _PAGE_P_4V | | 2323 | _PAGE_CACHE_4V | _PAGE_P_4V | |
@@ -2158,8 +2330,7 @@ static void __init sun4v_pgprot_init(void) | |||
2158 | _PAGE_CACHE = _PAGE_CACHE_4V; | 2330 | _PAGE_CACHE = _PAGE_CACHE_4V; |
2159 | 2331 | ||
2160 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2332 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2161 | kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ | 2333 | kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; |
2162 | 0xfffff80000000000UL; | ||
2163 | #else | 2334 | #else |
2164 | kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ | 2335 | kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ |
2165 | 0xfffff80000000000UL; | 2336 | 0xfffff80000000000UL; |
@@ -2167,20 +2338,12 @@ static void __init sun4v_pgprot_init(void) | |||
2167 | kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | | 2338 | kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | |
2168 | _PAGE_P_4V | _PAGE_W_4V); | 2339 | _PAGE_P_4V | _PAGE_W_4V); |
2169 | 2340 | ||
2170 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2341 | for (i = 1; i < 4; i++) |
2171 | kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ | 2342 | kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; |
2172 | 0xfffff80000000000UL; | ||
2173 | #else | ||
2174 | kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ | ||
2175 | 0xfffff80000000000UL; | ||
2176 | #endif | ||
2177 | kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
2178 | _PAGE_P_4V | _PAGE_W_4V); | ||
2179 | 2343 | ||
2180 | pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | | 2344 | pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | |
2181 | __ACCESS_BITS_4V | _PAGE_E_4V); | 2345 | __ACCESS_BITS_4V | _PAGE_E_4V); |
2182 | 2346 | ||
2183 | _PAGE_SZBITS = _PAGE_SZBITS_4V; | ||
2184 | _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | | 2347 | _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | |
2185 | _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | | 2348 | _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | |
2186 | _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | | 2349 | _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | |
@@ -2313,3 +2476,281 @@ void __flush_tlb_all(void) | |||
2313 | __asm__ __volatile__("wrpr %0, 0, %%pstate" | 2476 | __asm__ __volatile__("wrpr %0, 0, %%pstate" |
2314 | : : "r" (pstate)); | 2477 | : : "r" (pstate)); |
2315 | } | 2478 | } |
2479 | |||
2480 | static pte_t *get_from_cache(struct mm_struct *mm) | ||
2481 | { | ||
2482 | struct page *page; | ||
2483 | pte_t *ret; | ||
2484 | |||
2485 | spin_lock(&mm->page_table_lock); | ||
2486 | page = mm->context.pgtable_page; | ||
2487 | ret = NULL; | ||
2488 | if (page) { | ||
2489 | void *p = page_address(page); | ||
2490 | |||
2491 | mm->context.pgtable_page = NULL; | ||
2492 | |||
2493 | ret = (pte_t *) (p + (PAGE_SIZE / 2)); | ||
2494 | } | ||
2495 | spin_unlock(&mm->page_table_lock); | ||
2496 | |||
2497 | return ret; | ||
2498 | } | ||
2499 | |||
2500 | static struct page *__alloc_for_cache(struct mm_struct *mm) | ||
2501 | { | ||
2502 | struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | | ||
2503 | __GFP_REPEAT | __GFP_ZERO); | ||
2504 | |||
2505 | if (page) { | ||
2506 | spin_lock(&mm->page_table_lock); | ||
2507 | if (!mm->context.pgtable_page) { | ||
2508 | atomic_set(&page->_count, 2); | ||
2509 | mm->context.pgtable_page = page; | ||
2510 | } | ||
2511 | spin_unlock(&mm->page_table_lock); | ||
2512 | } | ||
2513 | return page; | ||
2514 | } | ||
2515 | |||
2516 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, | ||
2517 | unsigned long address) | ||
2518 | { | ||
2519 | struct page *page; | ||
2520 | pte_t *pte; | ||
2521 | |||
2522 | pte = get_from_cache(mm); | ||
2523 | if (pte) | ||
2524 | return pte; | ||
2525 | |||
2526 | page = __alloc_for_cache(mm); | ||
2527 | if (page) | ||
2528 | pte = (pte_t *) page_address(page); | ||
2529 | |||
2530 | return pte; | ||
2531 | } | ||
2532 | |||
2533 | pgtable_t pte_alloc_one(struct mm_struct *mm, | ||
2534 | unsigned long address) | ||
2535 | { | ||
2536 | struct page *page; | ||
2537 | pte_t *pte; | ||
2538 | |||
2539 | pte = get_from_cache(mm); | ||
2540 | if (pte) | ||
2541 | return pte; | ||
2542 | |||
2543 | page = __alloc_for_cache(mm); | ||
2544 | if (page) { | ||
2545 | pgtable_page_ctor(page); | ||
2546 | pte = (pte_t *) page_address(page); | ||
2547 | } | ||
2548 | |||
2549 | return pte; | ||
2550 | } | ||
2551 | |||
2552 | void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | ||
2553 | { | ||
2554 | struct page *page = virt_to_page(pte); | ||
2555 | if (put_page_testzero(page)) | ||
2556 | free_hot_cold_page(page, 0); | ||
2557 | } | ||
2558 | |||
2559 | static void __pte_free(pgtable_t pte) | ||
2560 | { | ||
2561 | struct page *page = virt_to_page(pte); | ||
2562 | if (put_page_testzero(page)) { | ||
2563 | pgtable_page_dtor(page); | ||
2564 | free_hot_cold_page(page, 0); | ||
2565 | } | ||
2566 | } | ||
2567 | |||
2568 | void pte_free(struct mm_struct *mm, pgtable_t pte) | ||
2569 | { | ||
2570 | __pte_free(pte); | ||
2571 | } | ||
2572 | |||
2573 | void pgtable_free(void *table, bool is_page) | ||
2574 | { | ||
2575 | if (is_page) | ||
2576 | __pte_free(table); | ||
2577 | else | ||
2578 | kmem_cache_free(pgtable_cache, table); | ||
2579 | } | ||
2580 | |||
2581 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
2582 | static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify) | ||
2583 | { | ||
2584 | if (pgprot_val(pgprot) & _PAGE_VALID) | ||
2585 | pmd_val(pmd) |= PMD_HUGE_PRESENT; | ||
2586 | if (tlb_type == hypervisor) { | ||
2587 | if (pgprot_val(pgprot) & _PAGE_WRITE_4V) | ||
2588 | pmd_val(pmd) |= PMD_HUGE_WRITE; | ||
2589 | if (pgprot_val(pgprot) & _PAGE_EXEC_4V) | ||
2590 | pmd_val(pmd) |= PMD_HUGE_EXEC; | ||
2591 | |||
2592 | if (!for_modify) { | ||
2593 | if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V) | ||
2594 | pmd_val(pmd) |= PMD_HUGE_ACCESSED; | ||
2595 | if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V) | ||
2596 | pmd_val(pmd) |= PMD_HUGE_DIRTY; | ||
2597 | } | ||
2598 | } else { | ||
2599 | if (pgprot_val(pgprot) & _PAGE_WRITE_4U) | ||
2600 | pmd_val(pmd) |= PMD_HUGE_WRITE; | ||
2601 | if (pgprot_val(pgprot) & _PAGE_EXEC_4U) | ||
2602 | pmd_val(pmd) |= PMD_HUGE_EXEC; | ||
2603 | |||
2604 | if (!for_modify) { | ||
2605 | if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U) | ||
2606 | pmd_val(pmd) |= PMD_HUGE_ACCESSED; | ||
2607 | if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U) | ||
2608 | pmd_val(pmd) |= PMD_HUGE_DIRTY; | ||
2609 | } | ||
2610 | } | ||
2611 | |||
2612 | return pmd; | ||
2613 | } | ||
2614 | |||
2615 | pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) | ||
2616 | { | ||
2617 | pmd_t pmd; | ||
2618 | |||
2619 | pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT))); | ||
2620 | pmd_val(pmd) |= PMD_ISHUGE; | ||
2621 | pmd = pmd_set_protbits(pmd, pgprot, false); | ||
2622 | return pmd; | ||
2623 | } | ||
2624 | |||
2625 | pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
2626 | { | ||
2627 | pmd_val(pmd) &= ~(PMD_HUGE_PRESENT | | ||
2628 | PMD_HUGE_WRITE | | ||
2629 | PMD_HUGE_EXEC); | ||
2630 | pmd = pmd_set_protbits(pmd, newprot, true); | ||
2631 | return pmd; | ||
2632 | } | ||
2633 | |||
2634 | pgprot_t pmd_pgprot(pmd_t entry) | ||
2635 | { | ||
2636 | unsigned long pte = 0; | ||
2637 | |||
2638 | if (pmd_val(entry) & PMD_HUGE_PRESENT) | ||
2639 | pte |= _PAGE_VALID; | ||
2640 | |||
2641 | if (tlb_type == hypervisor) { | ||
2642 | if (pmd_val(entry) & PMD_HUGE_PRESENT) | ||
2643 | pte |= _PAGE_PRESENT_4V; | ||
2644 | if (pmd_val(entry) & PMD_HUGE_EXEC) | ||
2645 | pte |= _PAGE_EXEC_4V; | ||
2646 | if (pmd_val(entry) & PMD_HUGE_WRITE) | ||
2647 | pte |= _PAGE_W_4V; | ||
2648 | if (pmd_val(entry) & PMD_HUGE_ACCESSED) | ||
2649 | pte |= _PAGE_ACCESSED_4V; | ||
2650 | if (pmd_val(entry) & PMD_HUGE_DIRTY) | ||
2651 | pte |= _PAGE_MODIFIED_4V; | ||
2652 | pte |= _PAGE_CP_4V|_PAGE_CV_4V; | ||
2653 | } else { | ||
2654 | if (pmd_val(entry) & PMD_HUGE_PRESENT) | ||
2655 | pte |= _PAGE_PRESENT_4U; | ||
2656 | if (pmd_val(entry) & PMD_HUGE_EXEC) | ||
2657 | pte |= _PAGE_EXEC_4U; | ||
2658 | if (pmd_val(entry) & PMD_HUGE_WRITE) | ||
2659 | pte |= _PAGE_W_4U; | ||
2660 | if (pmd_val(entry) & PMD_HUGE_ACCESSED) | ||
2661 | pte |= _PAGE_ACCESSED_4U; | ||
2662 | if (pmd_val(entry) & PMD_HUGE_DIRTY) | ||
2663 | pte |= _PAGE_MODIFIED_4U; | ||
2664 | pte |= _PAGE_CP_4U|_PAGE_CV_4U; | ||
2665 | } | ||
2666 | |||
2667 | return __pgprot(pte); | ||
2668 | } | ||
2669 | |||
2670 | void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
2671 | pmd_t *pmd) | ||
2672 | { | ||
2673 | unsigned long pte, flags; | ||
2674 | struct mm_struct *mm; | ||
2675 | pmd_t entry = *pmd; | ||
2676 | pgprot_t prot; | ||
2677 | |||
2678 | if (!pmd_large(entry) || !pmd_young(entry)) | ||
2679 | return; | ||
2680 | |||
2681 | pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS); | ||
2682 | pte <<= PMD_PADDR_SHIFT; | ||
2683 | pte |= _PAGE_VALID; | ||
2684 | |||
2685 | prot = pmd_pgprot(entry); | ||
2686 | |||
2687 | if (tlb_type == hypervisor) | ||
2688 | pgprot_val(prot) |= _PAGE_SZHUGE_4V; | ||
2689 | else | ||
2690 | pgprot_val(prot) |= _PAGE_SZHUGE_4U; | ||
2691 | |||
2692 | pte |= pgprot_val(prot); | ||
2693 | |||
2694 | mm = vma->vm_mm; | ||
2695 | |||
2696 | spin_lock_irqsave(&mm->context.lock, flags); | ||
2697 | |||
2698 | if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) | ||
2699 | __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, | ||
2700 | addr, pte); | ||
2701 | |||
2702 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
2703 | } | ||
2704 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
2705 | |||
2706 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
2707 | static void context_reload(void *__data) | ||
2708 | { | ||
2709 | struct mm_struct *mm = __data; | ||
2710 | |||
2711 | if (mm == current->mm) | ||
2712 | load_secondary_context(mm); | ||
2713 | } | ||
2714 | |||
2715 | void hugetlb_setup(struct mm_struct *mm) | ||
2716 | { | ||
2717 | struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE]; | ||
2718 | |||
2719 | if (likely(tp->tsb != NULL)) | ||
2720 | return; | ||
2721 | |||
2722 | tsb_grow(mm, MM_TSB_HUGE, 0); | ||
2723 | tsb_context_switch(mm); | ||
2724 | smp_tsb_sync(mm); | ||
2725 | |||
2726 | /* On UltraSPARC-III+ and later, configure the second half of | ||
2727 | * the Data-TLB for huge pages. | ||
2728 | */ | ||
2729 | if (tlb_type == cheetah_plus) { | ||
2730 | unsigned long ctx; | ||
2731 | |||
2732 | spin_lock(&ctx_alloc_lock); | ||
2733 | ctx = mm->context.sparc64_ctx_val; | ||
2734 | ctx &= ~CTX_PGSZ_MASK; | ||
2735 | ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; | ||
2736 | ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; | ||
2737 | |||
2738 | if (ctx != mm->context.sparc64_ctx_val) { | ||
2739 | /* When changing the page size fields, we | ||
2740 | * must perform a context flush so that no | ||
2741 | * stale entries match. This flush must | ||
2742 | * occur with the original context register | ||
2743 | * settings. | ||
2744 | */ | ||
2745 | do_flush_tlb_mm(mm); | ||
2746 | |||
2747 | /* Reload the context register of all processors | ||
2748 | * also executing in this address space. | ||
2749 | */ | ||
2750 | mm->context.sparc64_ctx_val = ctx; | ||
2751 | on_each_cpu(context_reload, mm, 0); | ||
2752 | } | ||
2753 | spin_unlock(&ctx_alloc_lock); | ||
2754 | } | ||
2755 | } | ||
2756 | #endif | ||
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 3e1ac8b96cae..0661aa606dec 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h | |||
@@ -8,12 +8,12 @@ | |||
8 | #define MAX_PHYS_ADDRESS (1UL << 41UL) | 8 | #define MAX_PHYS_ADDRESS (1UL << 41UL) |
9 | #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) | 9 | #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) |
10 | #define KPTE_BITMAP_BYTES \ | 10 | #define KPTE_BITMAP_BYTES \ |
11 | ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) | 11 | ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) |
12 | #define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) | 12 | #define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) |
13 | #define VALID_ADDR_BITMAP_BYTES \ | 13 | #define VALID_ADDR_BITMAP_BYTES \ |
14 | ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) | 14 | ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) |
15 | 15 | ||
16 | extern unsigned long kern_linear_pte_xor[2]; | 16 | extern unsigned long kern_linear_pte_xor[4]; |
17 | extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; | 17 | extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; |
18 | extern unsigned int sparc64_highest_unlocked_tlb_ent; | 18 | extern unsigned int sparc64_highest_unlocked_tlb_ent; |
19 | extern unsigned long sparc64_kern_pri_context; | 19 | extern unsigned long sparc64_kern_pri_context; |
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index a8a58cad9d2b..0f4f7191fbba 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c | |||
@@ -90,8 +90,8 @@ static void __init sbus_iommu_init(struct platform_device *op) | |||
90 | it to us. */ | 90 | it to us. */ |
91 | tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER); | 91 | tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER); |
92 | if (!tmp) { | 92 | if (!tmp) { |
93 | prom_printf("Unable to allocate iommu table [0x%08x]\n", | 93 | prom_printf("Unable to allocate iommu table [0x%lx]\n", |
94 | IOMMU_NPTES*sizeof(iopte_t)); | 94 | IOMMU_NPTES * sizeof(iopte_t)); |
95 | prom_halt(); | 95 | prom_halt(); |
96 | } | 96 | } |
97 | iommu->page_table = (iopte_t *)tmp; | 97 | iommu->page_table = (iopte_t *)tmp; |
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index b1f279cd00bf..3e8fec391fe0 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c | |||
@@ -43,16 +43,37 @@ void flush_tlb_pending(void) | |||
43 | put_cpu_var(tlb_batch); | 43 | put_cpu_var(tlb_batch); |
44 | } | 44 | } |
45 | 45 | ||
46 | void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, | 46 | static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, |
47 | pte_t *ptep, pte_t orig, int fullmm) | 47 | bool exec) |
48 | { | 48 | { |
49 | struct tlb_batch *tb = &get_cpu_var(tlb_batch); | 49 | struct tlb_batch *tb = &get_cpu_var(tlb_batch); |
50 | unsigned long nr; | 50 | unsigned long nr; |
51 | 51 | ||
52 | vaddr &= PAGE_MASK; | 52 | vaddr &= PAGE_MASK; |
53 | if (pte_exec(orig)) | 53 | if (exec) |
54 | vaddr |= 0x1UL; | 54 | vaddr |= 0x1UL; |
55 | 55 | ||
56 | nr = tb->tlb_nr; | ||
57 | |||
58 | if (unlikely(nr != 0 && mm != tb->mm)) { | ||
59 | flush_tlb_pending(); | ||
60 | nr = 0; | ||
61 | } | ||
62 | |||
63 | if (nr == 0) | ||
64 | tb->mm = mm; | ||
65 | |||
66 | tb->vaddrs[nr] = vaddr; | ||
67 | tb->tlb_nr = ++nr; | ||
68 | if (nr >= TLB_BATCH_NR) | ||
69 | flush_tlb_pending(); | ||
70 | |||
71 | put_cpu_var(tlb_batch); | ||
72 | } | ||
73 | |||
74 | void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, | ||
75 | pte_t *ptep, pte_t orig, int fullmm) | ||
76 | { | ||
56 | if (tlb_type != hypervisor && | 77 | if (tlb_type != hypervisor && |
57 | pte_dirty(orig)) { | 78 | pte_dirty(orig)) { |
58 | unsigned long paddr, pfn = pte_pfn(orig); | 79 | unsigned long paddr, pfn = pte_pfn(orig); |
@@ -77,26 +98,91 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, | |||
77 | } | 98 | } |
78 | 99 | ||
79 | no_cache_flush: | 100 | no_cache_flush: |
101 | if (!fullmm) | ||
102 | tlb_batch_add_one(mm, vaddr, pte_exec(orig)); | ||
103 | } | ||
104 | |||
105 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
106 | static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, | ||
107 | pmd_t pmd, bool exec) | ||
108 | { | ||
109 | unsigned long end; | ||
110 | pte_t *pte; | ||
111 | |||
112 | pte = pte_offset_map(&pmd, vaddr); | ||
113 | end = vaddr + HPAGE_SIZE; | ||
114 | while (vaddr < end) { | ||
115 | if (pte_val(*pte) & _PAGE_VALID) | ||
116 | tlb_batch_add_one(mm, vaddr, exec); | ||
117 | pte++; | ||
118 | vaddr += PAGE_SIZE; | ||
119 | } | ||
120 | pte_unmap(pte); | ||
121 | } | ||
80 | 122 | ||
81 | if (fullmm) { | 123 | void set_pmd_at(struct mm_struct *mm, unsigned long addr, |
82 | put_cpu_var(tlb_batch); | 124 | pmd_t *pmdp, pmd_t pmd) |
125 | { | ||
126 | pmd_t orig = *pmdp; | ||
127 | |||
128 | *pmdp = pmd; | ||
129 | |||
130 | if (mm == &init_mm) | ||
83 | return; | 131 | return; |
132 | |||
133 | if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) { | ||
134 | if (pmd_val(pmd) & PMD_ISHUGE) | ||
135 | mm->context.huge_pte_count++; | ||
136 | else | ||
137 | mm->context.huge_pte_count--; | ||
138 | if (mm->context.huge_pte_count == 1) | ||
139 | hugetlb_setup(mm); | ||
84 | } | 140 | } |
85 | 141 | ||
86 | nr = tb->tlb_nr; | 142 | if (!pmd_none(orig)) { |
143 | bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0); | ||
87 | 144 | ||
88 | if (unlikely(nr != 0 && mm != tb->mm)) { | 145 | addr &= HPAGE_MASK; |
89 | flush_tlb_pending(); | 146 | if (pmd_val(orig) & PMD_ISHUGE) |
90 | nr = 0; | 147 | tlb_batch_add_one(mm, addr, exec); |
148 | else | ||
149 | tlb_batch_pmd_scan(mm, addr, orig, exec); | ||
91 | } | 150 | } |
151 | } | ||
92 | 152 | ||
93 | if (nr == 0) | 153 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) |
94 | tb->mm = mm; | 154 | { |
155 | struct list_head *lh = (struct list_head *) pgtable; | ||
95 | 156 | ||
96 | tb->vaddrs[nr] = vaddr; | 157 | assert_spin_locked(&mm->page_table_lock); |
97 | tb->tlb_nr = ++nr; | ||
98 | if (nr >= TLB_BATCH_NR) | ||
99 | flush_tlb_pending(); | ||
100 | 158 | ||
101 | put_cpu_var(tlb_batch); | 159 | /* FIFO */ |
160 | if (!mm->pmd_huge_pte) | ||
161 | INIT_LIST_HEAD(lh); | ||
162 | else | ||
163 | list_add(lh, (struct list_head *) mm->pmd_huge_pte); | ||
164 | mm->pmd_huge_pte = pgtable; | ||
165 | } | ||
166 | |||
167 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) | ||
168 | { | ||
169 | struct list_head *lh; | ||
170 | pgtable_t pgtable; | ||
171 | |||
172 | assert_spin_locked(&mm->page_table_lock); | ||
173 | |||
174 | /* FIFO */ | ||
175 | pgtable = mm->pmd_huge_pte; | ||
176 | lh = (struct list_head *) pgtable; | ||
177 | if (list_empty(lh)) | ||
178 | mm->pmd_huge_pte = NULL; | ||
179 | else { | ||
180 | mm->pmd_huge_pte = (pgtable_t) lh->next; | ||
181 | list_del(lh); | ||
182 | } | ||
183 | pte_val(pgtable[0]) = 0; | ||
184 | pte_val(pgtable[1]) = 0; | ||
185 | |||
186 | return pgtable; | ||
102 | } | 187 | } |
188 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index c52add79b83d..7f6474347491 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c | |||
@@ -78,7 +78,7 @@ void flush_tsb_user(struct tlb_batch *tb) | |||
78 | base = __pa(base); | 78 | base = __pa(base); |
79 | __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); | 79 | __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); |
80 | 80 | ||
81 | #ifdef CONFIG_HUGETLB_PAGE | 81 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
82 | if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { | 82 | if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { |
83 | base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; | 83 | base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; |
84 | nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; | 84 | nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; |
@@ -90,29 +90,12 @@ void flush_tsb_user(struct tlb_batch *tb) | |||
90 | spin_unlock_irqrestore(&mm->context.lock, flags); | 90 | spin_unlock_irqrestore(&mm->context.lock, flags); |
91 | } | 91 | } |
92 | 92 | ||
93 | #if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) | ||
94 | #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K | 93 | #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K |
95 | #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K | 94 | #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K |
96 | #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) | ||
97 | #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K | ||
98 | #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K | ||
99 | #else | ||
100 | #error Broken base page size setting... | ||
101 | #endif | ||
102 | 95 | ||
103 | #ifdef CONFIG_HUGETLB_PAGE | 96 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
104 | #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K) | ||
105 | #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K | ||
106 | #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K | ||
107 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) | ||
108 | #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K | ||
109 | #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K | ||
110 | #elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) | ||
111 | #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB | 97 | #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB |
112 | #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB | 98 | #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB |
113 | #else | ||
114 | #error Broken huge page size setting... | ||
115 | #endif | ||
116 | #endif | 99 | #endif |
117 | 100 | ||
118 | static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) | 101 | static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) |
@@ -207,7 +190,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign | |||
207 | case MM_TSB_BASE: | 190 | case MM_TSB_BASE: |
208 | hp->pgsz_idx = HV_PGSZ_IDX_BASE; | 191 | hp->pgsz_idx = HV_PGSZ_IDX_BASE; |
209 | break; | 192 | break; |
210 | #ifdef CONFIG_HUGETLB_PAGE | 193 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
211 | case MM_TSB_HUGE: | 194 | case MM_TSB_HUGE: |
212 | hp->pgsz_idx = HV_PGSZ_IDX_HUGE; | 195 | hp->pgsz_idx = HV_PGSZ_IDX_HUGE; |
213 | break; | 196 | break; |
@@ -222,7 +205,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign | |||
222 | case MM_TSB_BASE: | 205 | case MM_TSB_BASE: |
223 | hp->pgsz_mask = HV_PGSZ_MASK_BASE; | 206 | hp->pgsz_mask = HV_PGSZ_MASK_BASE; |
224 | break; | 207 | break; |
225 | #ifdef CONFIG_HUGETLB_PAGE | 208 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
226 | case MM_TSB_HUGE: | 209 | case MM_TSB_HUGE: |
227 | hp->pgsz_mask = HV_PGSZ_MASK_HUGE; | 210 | hp->pgsz_mask = HV_PGSZ_MASK_HUGE; |
228 | break; | 211 | break; |
@@ -444,7 +427,7 @@ retry_tsb_alloc: | |||
444 | 427 | ||
445 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | 428 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) |
446 | { | 429 | { |
447 | #ifdef CONFIG_HUGETLB_PAGE | 430 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
448 | unsigned long huge_pte_count; | 431 | unsigned long huge_pte_count; |
449 | #endif | 432 | #endif |
450 | unsigned int i; | 433 | unsigned int i; |
@@ -453,7 +436,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
453 | 436 | ||
454 | mm->context.sparc64_ctx_val = 0UL; | 437 | mm->context.sparc64_ctx_val = 0UL; |
455 | 438 | ||
456 | #ifdef CONFIG_HUGETLB_PAGE | 439 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
457 | /* We reset it to zero because the fork() page copying | 440 | /* We reset it to zero because the fork() page copying |
458 | * will re-increment the counters as the parent PTEs are | 441 | * will re-increment the counters as the parent PTEs are |
459 | * copied into the child address space. | 442 | * copied into the child address space. |
@@ -462,6 +445,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
462 | mm->context.huge_pte_count = 0; | 445 | mm->context.huge_pte_count = 0; |
463 | #endif | 446 | #endif |
464 | 447 | ||
448 | mm->context.pgtable_page = NULL; | ||
449 | |||
465 | /* copy_mm() copies over the parent's mm_struct before calling | 450 | /* copy_mm() copies over the parent's mm_struct before calling |
466 | * us, so we need to zero out the TSB pointer or else tsb_grow() | 451 | * us, so we need to zero out the TSB pointer or else tsb_grow() |
467 | * will be confused and think there is an older TSB to free up. | 452 | * will be confused and think there is an older TSB to free up. |
@@ -474,7 +459,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
474 | */ | 459 | */ |
475 | tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); | 460 | tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); |
476 | 461 | ||
477 | #ifdef CONFIG_HUGETLB_PAGE | 462 | #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) |
478 | if (unlikely(huge_pte_count)) | 463 | if (unlikely(huge_pte_count)) |
479 | tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); | 464 | tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); |
480 | #endif | 465 | #endif |
@@ -500,10 +485,17 @@ static void tsb_destroy_one(struct tsb_config *tp) | |||
500 | void destroy_context(struct mm_struct *mm) | 485 | void destroy_context(struct mm_struct *mm) |
501 | { | 486 | { |
502 | unsigned long flags, i; | 487 | unsigned long flags, i; |
488 | struct page *page; | ||
503 | 489 | ||
504 | for (i = 0; i < MM_NUM_TSBS; i++) | 490 | for (i = 0; i < MM_NUM_TSBS; i++) |
505 | tsb_destroy_one(&mm->context.tsb_block[i]); | 491 | tsb_destroy_one(&mm->context.tsb_block[i]); |
506 | 492 | ||
493 | page = mm->context.pgtable_page; | ||
494 | if (page && put_page_testzero(page)) { | ||
495 | pgtable_page_dtor(page); | ||
496 | free_hot_cold_page(page, 0); | ||
497 | } | ||
498 | |||
507 | spin_lock_irqsave(&ctx_alloc_lock, flags); | 499 | spin_lock_irqsave(&ctx_alloc_lock, flags); |
508 | 500 | ||
509 | if (CTX_VALID(mm->context)) { | 501 | if (CTX_VALID(mm->context)) { |
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index e9073e9501b3..28368701ef79 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c | |||
@@ -464,8 +464,12 @@ void bpf_jit_compile(struct sk_filter *fp) | |||
464 | emit_alu_K(OR, K); | 464 | emit_alu_K(OR, K); |
465 | break; | 465 | break; |
466 | case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ | 466 | case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ |
467 | case BPF_S_ALU_XOR_X: | ||
467 | emit_alu_X(XOR); | 468 | emit_alu_X(XOR); |
468 | break; | 469 | break; |
470 | case BPF_S_ALU_XOR_K: /* A ^= K */ | ||
471 | emit_alu_K(XOR, K); | ||
472 | break; | ||
469 | case BPF_S_ALU_LSH_X: /* A <<= X */ | 473 | case BPF_S_ALU_LSH_X: /* A <<= X */ |
470 | emit_alu_X(SLL); | 474 | emit_alu_X(SLL); |
471 | break; | 475 | break; |