aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>2016-07-18 11:26:25 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2016-07-19 03:42:43 -0400
commit11c6e16ee13ab68b8ff04c17ab41611a4fcc5c81 (patch)
tree93c72797fc0a50b1177343762f5a5838ca8e23ca
parent5c562338dea29dc09415f39676e7107fa48ce763 (diff)
crypto: vmx - Adding asm subroutines for XTS
This patch add XTS subroutines using VMX-crypto driver. It gives a boost of 20 times using XTS. These code has been adopted from OpenSSL project in collaboration with the original author (Andy Polyakov <appro@openssl.org>). Signed-off-by: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> Signed-off-by: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.h4
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.pl1865
2 files changed, 1867 insertions, 2 deletions
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h
index 4cd34ee54a94..01972e16a6c0 100644
--- a/drivers/crypto/vmx/aesp8-ppc.h
+++ b/drivers/crypto/vmx/aesp8-ppc.h
@@ -19,3 +19,7 @@ void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len,
19void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, 19void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out,
20 size_t len, const struct aes_key *key, 20 size_t len, const struct aes_key *key,
21 const u8 *iv); 21 const u8 *iv);
22void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len,
23 const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
24void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len,
25 const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
index 228053921b3f..813ffcc4d17c 100644
--- a/drivers/crypto/vmx/aesp8-ppc.pl
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -1,4 +1,11 @@
1#!/usr/bin/env perl 1#! /usr/bin/env perl
2# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
2# 9#
3# ==================================================================== 10# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -20,6 +27,19 @@
20# instructions are interleaved. It's reckoned that eventual 27# instructions are interleaved. It's reckoned that eventual
21# misalignment penalties at page boundaries are in average lower 28# misalignment penalties at page boundaries are in average lower
22# than additional overhead in pure AltiVec approach. 29# than additional overhead in pure AltiVec approach.
30#
31# May 2016
32#
33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34# systems were measured.
35#
36######################################################################
37# Current large-block performance in cycles per byte processed with
38# 128-bit key (less is better).
39#
40# CBC en-/decrypt CTR XTS
41# POWER8[le] 3.96/0.72 0.74 1.1
42# POWER8[be] 3.75/0.65 0.66 1.0
23 43
24$flavour = shift; 44$flavour = shift;
25 45
@@ -1875,6 +1895,1847 @@ Lctr32_enc8x_done:
1875___ 1895___
1876}} }}} 1896}} }}}
1877 1897
1898#########################################################################
1899{{{ # XTS procedures #
1900# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1901# const AES_KEY *key1, const AES_KEY *key2, #
1902# [const] unsigned char iv[16]); #
1903# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1904# input tweak value is assumed to be encrypted already, and last tweak #
1905# value, one suitable for consecutive call on same chunk of data, is #
1906# written back to original buffer. In addition, in "tweak chaining" #
1907# mode only complete input blocks are processed. #
1908
1909my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1910my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1911my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1912my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1913my $taillen = $key2;
1914
1915 ($inp,$idx) = ($idx,$inp); # reassign
1916
1917$code.=<<___;
1918.globl .${prefix}_xts_encrypt
1919 mr $inp,r3 # reassign
1920 li r3,-1
1921 ${UCMP}i $len,16
1922 bltlr-
1923
1924 lis r0,0xfff0
1925 mfspr r12,256 # save vrsave
1926 li r11,0
1927 mtspr 256,r0
1928
1929 vspltisb $seven,0x07 # 0x070707..07
1930 le?lvsl $leperm,r11,r11
1931 le?vspltisb $tmp,0x0f
1932 le?vxor $leperm,$leperm,$seven
1933
1934 li $idx,15
1935 lvx $tweak,0,$ivp # load [unaligned] iv
1936 lvsl $inpperm,0,$ivp
1937 lvx $inptail,$idx,$ivp
1938 le?vxor $inpperm,$inpperm,$tmp
1939 vperm $tweak,$tweak,$inptail,$inpperm
1940
1941 neg r11,$inp
1942 lvsr $inpperm,0,r11 # prepare for unaligned load
1943 lvx $inout,0,$inp
1944 addi $inp,$inp,15 # 15 is not typo
1945 le?vxor $inpperm,$inpperm,$tmp
1946
1947 ${UCMP}i $key2,0 # key2==NULL?
1948 beq Lxts_enc_no_key2
1949
1950 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1951 lwz $rounds,240($key2)
1952 srwi $rounds,$rounds,1
1953 subi $rounds,$rounds,1
1954 li $idx,16
1955
1956 lvx $rndkey0,0,$key2
1957 lvx $rndkey1,$idx,$key2
1958 addi $idx,$idx,16
1959 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1960 vxor $tweak,$tweak,$rndkey0
1961 lvx $rndkey0,$idx,$key2
1962 addi $idx,$idx,16
1963 mtctr $rounds
1964
1965Ltweak_xts_enc:
1966 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1967 vcipher $tweak,$tweak,$rndkey1
1968 lvx $rndkey1,$idx,$key2
1969 addi $idx,$idx,16
1970 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1971 vcipher $tweak,$tweak,$rndkey0
1972 lvx $rndkey0,$idx,$key2
1973 addi $idx,$idx,16
1974 bdnz Ltweak_xts_enc
1975
1976 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1977 vcipher $tweak,$tweak,$rndkey1
1978 lvx $rndkey1,$idx,$key2
1979 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1980 vcipherlast $tweak,$tweak,$rndkey0
1981
1982 li $ivp,0 # don't chain the tweak
1983 b Lxts_enc
1984
1985Lxts_enc_no_key2:
1986 li $idx,-16
1987 and $len,$len,$idx # in "tweak chaining"
1988 # mode only complete
1989 # blocks are processed
1990Lxts_enc:
1991 lvx $inptail,0,$inp
1992 addi $inp,$inp,16
1993
1994 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
1995 lwz $rounds,240($key1)
1996 srwi $rounds,$rounds,1
1997 subi $rounds,$rounds,1
1998 li $idx,16
1999
2000 vslb $eighty7,$seven,$seven # 0x808080..80
2001 vor $eighty7,$eighty7,$seven # 0x878787..87
2002 vspltisb $tmp,1 # 0x010101..01
2003 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2004
2005 ${UCMP}i $len,96
2006 bge _aesp8_xts_encrypt6x
2007
2008 andi. $taillen,$len,15
2009 subic r0,$len,32
2010 subi $taillen,$taillen,16
2011 subfe r0,r0,r0
2012 and r0,r0,$taillen
2013 add $inp,$inp,r0
2014
2015 lvx $rndkey0,0,$key1
2016 lvx $rndkey1,$idx,$key1
2017 addi $idx,$idx,16
2018 vperm $inout,$inout,$inptail,$inpperm
2019 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2020 vxor $inout,$inout,$tweak
2021 vxor $inout,$inout,$rndkey0
2022 lvx $rndkey0,$idx,$key1
2023 addi $idx,$idx,16
2024 mtctr $rounds
2025 b Loop_xts_enc
2026
2027.align 5
2028Loop_xts_enc:
2029 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2030 vcipher $inout,$inout,$rndkey1
2031 lvx $rndkey1,$idx,$key1
2032 addi $idx,$idx,16
2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2034 vcipher $inout,$inout,$rndkey0
2035 lvx $rndkey0,$idx,$key1
2036 addi $idx,$idx,16
2037 bdnz Loop_xts_enc
2038
2039 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2040 vcipher $inout,$inout,$rndkey1
2041 lvx $rndkey1,$idx,$key1
2042 li $idx,16
2043 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2044 vxor $rndkey0,$rndkey0,$tweak
2045 vcipherlast $output,$inout,$rndkey0
2046
2047 le?vperm $tmp,$output,$output,$leperm
2048 be?nop
2049 le?stvx_u $tmp,0,$out
2050 be?stvx_u $output,0,$out
2051 addi $out,$out,16
2052
2053 subic. $len,$len,16
2054 beq Lxts_enc_done
2055
2056 vmr $inout,$inptail
2057 lvx $inptail,0,$inp
2058 addi $inp,$inp,16
2059 lvx $rndkey0,0,$key1
2060 lvx $rndkey1,$idx,$key1
2061 addi $idx,$idx,16
2062
2063 subic r0,$len,32
2064 subfe r0,r0,r0
2065 and r0,r0,$taillen
2066 add $inp,$inp,r0
2067
2068 vsrab $tmp,$tweak,$seven # next tweak value
2069 vaddubm $tweak,$tweak,$tweak
2070 vsldoi $tmp,$tmp,$tmp,15
2071 vand $tmp,$tmp,$eighty7
2072 vxor $tweak,$tweak,$tmp
2073
2074 vperm $inout,$inout,$inptail,$inpperm
2075 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2076 vxor $inout,$inout,$tweak
2077 vxor $output,$output,$rndkey0 # just in case $len<16
2078 vxor $inout,$inout,$rndkey0
2079 lvx $rndkey0,$idx,$key1
2080 addi $idx,$idx,16
2081
2082 mtctr $rounds
2083 ${UCMP}i $len,16
2084 bge Loop_xts_enc
2085
2086 vxor $output,$output,$tweak
2087 lvsr $inpperm,0,$len # $inpperm is no longer needed
2088 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2089 vspltisb $tmp,-1
2090 vperm $inptail,$inptail,$tmp,$inpperm
2091 vsel $inout,$inout,$output,$inptail
2092
2093 subi r11,$out,17
2094 subi $out,$out,16
2095 mtctr $len
2096 li $len,16
2097Loop_xts_enc_steal:
2098 lbzu r0,1(r11)
2099 stb r0,16(r11)
2100 bdnz Loop_xts_enc_steal
2101
2102 mtctr $rounds
2103 b Loop_xts_enc # one more time...
2104
2105Lxts_enc_done:
2106 ${UCMP}i $ivp,0
2107 beq Lxts_enc_ret
2108
2109 vsrab $tmp,$tweak,$seven # next tweak value
2110 vaddubm $tweak,$tweak,$tweak
2111 vsldoi $tmp,$tmp,$tmp,15
2112 vand $tmp,$tmp,$eighty7
2113 vxor $tweak,$tweak,$tmp
2114
2115 le?vperm $tweak,$tweak,$tweak,$leperm
2116 stvx_u $tweak,0,$ivp
2117
2118Lxts_enc_ret:
2119 mtspr 256,r12 # restore vrsave
2120 li r3,0
2121 blr
2122 .long 0
2123 .byte 0,12,0x04,0,0x80,6,6,0
2124 .long 0
2125.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2126
2127.globl .${prefix}_xts_decrypt
2128.align 5
2129.${prefix}_xts_decrypt:
2130 mr $inp,r3 # reassign
2131 li r3,-1
2132 ${UCMP}i $len,16
2133 bltlr-
2134
2135 lis r0,0xfff8
2136 mfspr r12,256 # save vrsave
2137 li r11,0
2138 mtspr 256,r0
2139
2140 andi. r0,$len,15
2141 neg r0,r0
2142 andi. r0,r0,16
2143 sub $len,$len,r0
2144
2145 vspltisb $seven,0x07 # 0x070707..07
2146 le?lvsl $leperm,r11,r11
2147 le?vspltisb $tmp,0x0f
2148 le?vxor $leperm,$leperm,$seven
2149
2150 li $idx,15
2151 lvx $tweak,0,$ivp # load [unaligned] iv
2152 lvsl $inpperm,0,$ivp
2153 lvx $inptail,$idx,$ivp
2154 le?vxor $inpperm,$inpperm,$tmp
2155 vperm $tweak,$tweak,$inptail,$inpperm
2156
2157 neg r11,$inp
2158 lvsr $inpperm,0,r11 # prepare for unaligned load
2159 lvx $inout,0,$inp
2160 addi $inp,$inp,15 # 15 is not typo
2161 le?vxor $inpperm,$inpperm,$tmp
2162
2163 ${UCMP}i $key2,0 # key2==NULL?
2164 beq Lxts_dec_no_key2
2165
2166 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2167 lwz $rounds,240($key2)
2168 srwi $rounds,$rounds,1
2169 subi $rounds,$rounds,1
2170 li $idx,16
2171
2172 lvx $rndkey0,0,$key2
2173 lvx $rndkey1,$idx,$key2
2174 addi $idx,$idx,16
2175 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2176 vxor $tweak,$tweak,$rndkey0
2177 lvx $rndkey0,$idx,$key2
2178 addi $idx,$idx,16
2179 mtctr $rounds
2180
2181Ltweak_xts_dec:
2182 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2183 vcipher $tweak,$tweak,$rndkey1
2184 lvx $rndkey1,$idx,$key2
2185 addi $idx,$idx,16
2186 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2187 vcipher $tweak,$tweak,$rndkey0
2188 lvx $rndkey0,$idx,$key2
2189 addi $idx,$idx,16
2190 bdnz Ltweak_xts_dec
2191
2192 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2193 vcipher $tweak,$tweak,$rndkey1
2194 lvx $rndkey1,$idx,$key2
2195 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2196 vcipherlast $tweak,$tweak,$rndkey0
2197
2198 li $ivp,0 # don't chain the tweak
2199 b Lxts_dec
2200
2201Lxts_dec_no_key2:
2202 neg $idx,$len
2203 andi. $idx,$idx,15
2204 add $len,$len,$idx # in "tweak chaining"
2205 # mode only complete
2206 # blocks are processed
2207Lxts_dec:
2208 lvx $inptail,0,$inp
2209 addi $inp,$inp,16
2210
2211 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2212 lwz $rounds,240($key1)
2213 srwi $rounds,$rounds,1
2214 subi $rounds,$rounds,1
2215 li $idx,16
2216
2217 vslb $eighty7,$seven,$seven # 0x808080..80
2218 vor $eighty7,$eighty7,$seven # 0x878787..87
2219 vspltisb $tmp,1 # 0x010101..01
2220 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2221
2222 ${UCMP}i $len,96
2223 bge _aesp8_xts_decrypt6x
2224
2225 lvx $rndkey0,0,$key1
2226 lvx $rndkey1,$idx,$key1
2227 addi $idx,$idx,16
2228 vperm $inout,$inout,$inptail,$inpperm
2229 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2230 vxor $inout,$inout,$tweak
2231 vxor $inout,$inout,$rndkey0
2232 lvx $rndkey0,$idx,$key1
2233 addi $idx,$idx,16
2234 mtctr $rounds
2235
2236 ${UCMP}i $len,16
2237 blt Ltail_xts_dec
2238 be?b Loop_xts_dec
2239
2240.align 5
2241Loop_xts_dec:
2242 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2243 vncipher $inout,$inout,$rndkey1
2244 lvx $rndkey1,$idx,$key1
2245 addi $idx,$idx,16
2246 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2247 vncipher $inout,$inout,$rndkey0
2248 lvx $rndkey0,$idx,$key1
2249 addi $idx,$idx,16
2250 bdnz Loop_xts_dec
2251
2252 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2253 vncipher $inout,$inout,$rndkey1
2254 lvx $rndkey1,$idx,$key1
2255 li $idx,16
2256 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2257 vxor $rndkey0,$rndkey0,$tweak
2258 vncipherlast $output,$inout,$rndkey0
2259
2260 le?vperm $tmp,$output,$output,$leperm
2261 be?nop
2262 le?stvx_u $tmp,0,$out
2263 be?stvx_u $output,0,$out
2264 addi $out,$out,16
2265
2266 subic. $len,$len,16
2267 beq Lxts_dec_done
2268
2269 vmr $inout,$inptail
2270 lvx $inptail,0,$inp
2271 addi $inp,$inp,16
2272 lvx $rndkey0,0,$key1
2273 lvx $rndkey1,$idx,$key1
2274 addi $idx,$idx,16
2275
2276 vsrab $tmp,$tweak,$seven # next tweak value
2277 vaddubm $tweak,$tweak,$tweak
2278 vsldoi $tmp,$tmp,$tmp,15
2279 vand $tmp,$tmp,$eighty7
2280 vxor $tweak,$tweak,$tmp
2281
2282 vperm $inout,$inout,$inptail,$inpperm
2283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2284 vxor $inout,$inout,$tweak
2285 vxor $inout,$inout,$rndkey0
2286 lvx $rndkey0,$idx,$key1
2287 addi $idx,$idx,16
2288
2289 mtctr $rounds
2290 ${UCMP}i $len,16
2291 bge Loop_xts_dec
2292
2293Ltail_xts_dec:
2294 vsrab $tmp,$tweak,$seven # next tweak value
2295 vaddubm $tweak1,$tweak,$tweak
2296 vsldoi $tmp,$tmp,$tmp,15
2297 vand $tmp,$tmp,$eighty7
2298 vxor $tweak1,$tweak1,$tmp
2299
2300 subi $inp,$inp,16
2301 add $inp,$inp,$len
2302
2303 vxor $inout,$inout,$tweak # :-(
2304 vxor $inout,$inout,$tweak1 # :-)
2305
2306Loop_xts_dec_short:
2307 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2308 vncipher $inout,$inout,$rndkey1
2309 lvx $rndkey1,$idx,$key1
2310 addi $idx,$idx,16
2311 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2312 vncipher $inout,$inout,$rndkey0
2313 lvx $rndkey0,$idx,$key1
2314 addi $idx,$idx,16
2315 bdnz Loop_xts_dec_short
2316
2317 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2318 vncipher $inout,$inout,$rndkey1
2319 lvx $rndkey1,$idx,$key1
2320 li $idx,16
2321 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2322 vxor $rndkey0,$rndkey0,$tweak1
2323 vncipherlast $output,$inout,$rndkey0
2324
2325 le?vperm $tmp,$output,$output,$leperm
2326 be?nop
2327 le?stvx_u $tmp,0,$out
2328 be?stvx_u $output,0,$out
2329
2330 vmr $inout,$inptail
2331 lvx $inptail,0,$inp
2332 #addi $inp,$inp,16
2333 lvx $rndkey0,0,$key1
2334 lvx $rndkey1,$idx,$key1
2335 addi $idx,$idx,16
2336 vperm $inout,$inout,$inptail,$inpperm
2337 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2338
2339 lvsr $inpperm,0,$len # $inpperm is no longer needed
2340 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2341 vspltisb $tmp,-1
2342 vperm $inptail,$inptail,$tmp,$inpperm
2343 vsel $inout,$inout,$output,$inptail
2344
2345 vxor $rndkey0,$rndkey0,$tweak
2346 vxor $inout,$inout,$rndkey0
2347 lvx $rndkey0,$idx,$key1
2348 addi $idx,$idx,16
2349
2350 subi r11,$out,1
2351 mtctr $len
2352 li $len,16
2353Loop_xts_dec_steal:
2354 lbzu r0,1(r11)
2355 stb r0,16(r11)
2356 bdnz Loop_xts_dec_steal
2357
2358 mtctr $rounds
2359 b Loop_xts_dec # one more time...
2360
2361Lxts_dec_done:
2362 ${UCMP}i $ivp,0
2363 beq Lxts_dec_ret
2364
2365 vsrab $tmp,$tweak,$seven # next tweak value
2366 vaddubm $tweak,$tweak,$tweak
2367 vsldoi $tmp,$tmp,$tmp,15
2368 vand $tmp,$tmp,$eighty7
2369 vxor $tweak,$tweak,$tmp
2370
2371 le?vperm $tweak,$tweak,$tweak,$leperm
2372 stvx_u $tweak,0,$ivp
2373
2374Lxts_dec_ret:
2375 mtspr 256,r12 # restore vrsave
2376 li r3,0
2377 blr
2378 .long 0
2379 .byte 0,12,0x04,0,0x80,6,6,0
2380 .long 0
2381.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2382___
2383#########################################################################
2384{{ # Optimized XTS procedures #
2385my $key_=$key2;
2386my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2387 $x00=0 if ($flavour =~ /osx/);
2388my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2389my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2390my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2391my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2392 # v26-v31 last 6 round keys
2393my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2394my $taillen=$x70;
2395
2396$code.=<<___;
2397.align 5
2398_aesp8_xts_encrypt6x:
2399 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2400 mflr r11
2401 li r7,`$FRAME+8*16+15`
2402 li r3,`$FRAME+8*16+31`
2403 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2404 stvx v20,r7,$sp # ABI says so
2405 addi r7,r7,32
2406 stvx v21,r3,$sp
2407 addi r3,r3,32
2408 stvx v22,r7,$sp
2409 addi r7,r7,32
2410 stvx v23,r3,$sp
2411 addi r3,r3,32
2412 stvx v24,r7,$sp
2413 addi r7,r7,32
2414 stvx v25,r3,$sp
2415 addi r3,r3,32
2416 stvx v26,r7,$sp
2417 addi r7,r7,32
2418 stvx v27,r3,$sp
2419 addi r3,r3,32
2420 stvx v28,r7,$sp
2421 addi r7,r7,32
2422 stvx v29,r3,$sp
2423 addi r3,r3,32
2424 stvx v30,r7,$sp
2425 stvx v31,r3,$sp
2426 li r0,-1
2427 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2428 li $x10,0x10
2429 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2430 li $x20,0x20
2431 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2432 li $x30,0x30
2433 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2434 li $x40,0x40
2435 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2436 li $x50,0x50
2437 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2438 li $x60,0x60
2439 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2440 li $x70,0x70
2441 mtspr 256,r0
2442
2443 subi $rounds,$rounds,3 # -4 in total
2444
2445 lvx $rndkey0,$x00,$key1 # load key schedule
2446 lvx v30,$x10,$key1
2447 addi $key1,$key1,0x20
2448 lvx v31,$x00,$key1
2449 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2450 addi $key_,$sp,$FRAME+15
2451 mtctr $rounds
2452
2453Load_xts_enc_key:
2454 ?vperm v24,v30,v31,$keyperm
2455 lvx v30,$x10,$key1
2456 addi $key1,$key1,0x20
2457 stvx v24,$x00,$key_ # off-load round[1]
2458 ?vperm v25,v31,v30,$keyperm
2459 lvx v31,$x00,$key1
2460 stvx v25,$x10,$key_ # off-load round[2]
2461 addi $key_,$key_,0x20
2462 bdnz Load_xts_enc_key
2463
2464 lvx v26,$x10,$key1
2465 ?vperm v24,v30,v31,$keyperm
2466 lvx v27,$x20,$key1
2467 stvx v24,$x00,$key_ # off-load round[3]
2468 ?vperm v25,v31,v26,$keyperm
2469 lvx v28,$x30,$key1
2470 stvx v25,$x10,$key_ # off-load round[4]
2471 addi $key_,$sp,$FRAME+15 # rewind $key_
2472 ?vperm v26,v26,v27,$keyperm
2473 lvx v29,$x40,$key1
2474 ?vperm v27,v27,v28,$keyperm
2475 lvx v30,$x50,$key1
2476 ?vperm v28,v28,v29,$keyperm
2477 lvx v31,$x60,$key1
2478 ?vperm v29,v29,v30,$keyperm
2479 lvx $twk5,$x70,$key1 # borrow $twk5
2480 ?vperm v30,v30,v31,$keyperm
2481 lvx v24,$x00,$key_ # pre-load round[1]
2482 ?vperm v31,v31,$twk5,$keyperm
2483 lvx v25,$x10,$key_ # pre-load round[2]
2484
2485 vperm $in0,$inout,$inptail,$inpperm
2486 subi $inp,$inp,31 # undo "caller"
2487 vxor $twk0,$tweak,$rndkey0
2488 vsrab $tmp,$tweak,$seven # next tweak value
2489 vaddubm $tweak,$tweak,$tweak
2490 vsldoi $tmp,$tmp,$tmp,15
2491 vand $tmp,$tmp,$eighty7
2492 vxor $out0,$in0,$twk0
2493 vxor $tweak,$tweak,$tmp
2494
2495 lvx_u $in1,$x10,$inp
2496 vxor $twk1,$tweak,$rndkey0
2497 vsrab $tmp,$tweak,$seven # next tweak value
2498 vaddubm $tweak,$tweak,$tweak
2499 vsldoi $tmp,$tmp,$tmp,15
2500 le?vperm $in1,$in1,$in1,$leperm
2501 vand $tmp,$tmp,$eighty7
2502 vxor $out1,$in1,$twk1
2503 vxor $tweak,$tweak,$tmp
2504
2505 lvx_u $in2,$x20,$inp
2506 andi. $taillen,$len,15
2507 vxor $twk2,$tweak,$rndkey0
2508 vsrab $tmp,$tweak,$seven # next tweak value
2509 vaddubm $tweak,$tweak,$tweak
2510 vsldoi $tmp,$tmp,$tmp,15
2511 le?vperm $in2,$in2,$in2,$leperm
2512 vand $tmp,$tmp,$eighty7
2513 vxor $out2,$in2,$twk2
2514 vxor $tweak,$tweak,$tmp
2515
2516 lvx_u $in3,$x30,$inp
2517 sub $len,$len,$taillen
2518 vxor $twk3,$tweak,$rndkey0
2519 vsrab $tmp,$tweak,$seven # next tweak value
2520 vaddubm $tweak,$tweak,$tweak
2521 vsldoi $tmp,$tmp,$tmp,15
2522 le?vperm $in3,$in3,$in3,$leperm
2523 vand $tmp,$tmp,$eighty7
2524 vxor $out3,$in3,$twk3
2525 vxor $tweak,$tweak,$tmp
2526
2527 lvx_u $in4,$x40,$inp
2528 subi $len,$len,0x60
2529 vxor $twk4,$tweak,$rndkey0
2530 vsrab $tmp,$tweak,$seven # next tweak value
2531 vaddubm $tweak,$tweak,$tweak
2532 vsldoi $tmp,$tmp,$tmp,15
2533 le?vperm $in4,$in4,$in4,$leperm
2534 vand $tmp,$tmp,$eighty7
2535 vxor $out4,$in4,$twk4
2536 vxor $tweak,$tweak,$tmp
2537
2538 lvx_u $in5,$x50,$inp
2539 addi $inp,$inp,0x60
2540 vxor $twk5,$tweak,$rndkey0
2541 vsrab $tmp,$tweak,$seven # next tweak value
2542 vaddubm $tweak,$tweak,$tweak
2543 vsldoi $tmp,$tmp,$tmp,15
2544 le?vperm $in5,$in5,$in5,$leperm
2545 vand $tmp,$tmp,$eighty7
2546 vxor $out5,$in5,$twk5
2547 vxor $tweak,$tweak,$tmp
2548
2549 vxor v31,v31,$rndkey0
2550 mtctr $rounds
2551 b Loop_xts_enc6x
2552
2553.align 5
2554Loop_xts_enc6x:
2555 vcipher $out0,$out0,v24
2556 vcipher $out1,$out1,v24
2557 vcipher $out2,$out2,v24
2558 vcipher $out3,$out3,v24
2559 vcipher $out4,$out4,v24
2560 vcipher $out5,$out5,v24
2561 lvx v24,$x20,$key_ # round[3]
2562 addi $key_,$key_,0x20
2563
2564 vcipher $out0,$out0,v25
2565 vcipher $out1,$out1,v25
2566 vcipher $out2,$out2,v25
2567 vcipher $out3,$out3,v25
2568 vcipher $out4,$out4,v25
2569 vcipher $out5,$out5,v25
2570 lvx v25,$x10,$key_ # round[4]
2571 bdnz Loop_xts_enc6x
2572
2573 subic $len,$len,96 # $len-=96
2574 vxor $in0,$twk0,v31 # xor with last round key
2575 vcipher $out0,$out0,v24
2576 vcipher $out1,$out1,v24
2577 vsrab $tmp,$tweak,$seven # next tweak value
2578 vxor $twk0,$tweak,$rndkey0
2579 vaddubm $tweak,$tweak,$tweak
2580 vcipher $out2,$out2,v24
2581 vcipher $out3,$out3,v24
2582 vsldoi $tmp,$tmp,$tmp,15
2583 vcipher $out4,$out4,v24
2584 vcipher $out5,$out5,v24
2585
2586 subfe. r0,r0,r0 # borrow?-1:0
2587 vand $tmp,$tmp,$eighty7
2588 vcipher $out0,$out0,v25
2589 vcipher $out1,$out1,v25
2590 vxor $tweak,$tweak,$tmp
2591 vcipher $out2,$out2,v25
2592 vcipher $out3,$out3,v25
2593 vxor $in1,$twk1,v31
2594 vsrab $tmp,$tweak,$seven # next tweak value
2595 vxor $twk1,$tweak,$rndkey0
2596 vcipher $out4,$out4,v25
2597 vcipher $out5,$out5,v25
2598
2599 and r0,r0,$len
2600 vaddubm $tweak,$tweak,$tweak
2601 vsldoi $tmp,$tmp,$tmp,15
2602 vcipher $out0,$out0,v26
2603 vcipher $out1,$out1,v26
2604 vand $tmp,$tmp,$eighty7
2605 vcipher $out2,$out2,v26
2606 vcipher $out3,$out3,v26
2607 vxor $tweak,$tweak,$tmp
2608 vcipher $out4,$out4,v26
2609 vcipher $out5,$out5,v26
2610
2611 add $inp,$inp,r0 # $inp is adjusted in such
2612 # way that at exit from the
2613 # loop inX-in5 are loaded
2614 # with last "words"
2615 vxor $in2,$twk2,v31
2616 vsrab $tmp,$tweak,$seven # next tweak value
2617 vxor $twk2,$tweak,$rndkey0
2618 vaddubm $tweak,$tweak,$tweak
2619 vcipher $out0,$out0,v27
2620 vcipher $out1,$out1,v27
2621 vsldoi $tmp,$tmp,$tmp,15
2622 vcipher $out2,$out2,v27
2623 vcipher $out3,$out3,v27
2624 vand $tmp,$tmp,$eighty7
2625 vcipher $out4,$out4,v27
2626 vcipher $out5,$out5,v27
2627
2628 addi $key_,$sp,$FRAME+15 # rewind $key_
2629 vxor $tweak,$tweak,$tmp
2630 vcipher $out0,$out0,v28
2631 vcipher $out1,$out1,v28
2632 vxor $in3,$twk3,v31
2633 vsrab $tmp,$tweak,$seven # next tweak value
2634 vxor $twk3,$tweak,$rndkey0
2635 vcipher $out2,$out2,v28
2636 vcipher $out3,$out3,v28
2637 vaddubm $tweak,$tweak,$tweak
2638 vsldoi $tmp,$tmp,$tmp,15
2639 vcipher $out4,$out4,v28
2640 vcipher $out5,$out5,v28
2641 lvx v24,$x00,$key_ # re-pre-load round[1]
2642 vand $tmp,$tmp,$eighty7
2643
2644 vcipher $out0,$out0,v29
2645 vcipher $out1,$out1,v29
2646 vxor $tweak,$tweak,$tmp
2647 vcipher $out2,$out2,v29
2648 vcipher $out3,$out3,v29
2649 vxor $in4,$twk4,v31
2650 vsrab $tmp,$tweak,$seven # next tweak value
2651 vxor $twk4,$tweak,$rndkey0
2652 vcipher $out4,$out4,v29
2653 vcipher $out5,$out5,v29
2654 lvx v25,$x10,$key_ # re-pre-load round[2]
2655 vaddubm $tweak,$tweak,$tweak
2656 vsldoi $tmp,$tmp,$tmp,15
2657
2658 vcipher $out0,$out0,v30
2659 vcipher $out1,$out1,v30
2660 vand $tmp,$tmp,$eighty7
2661 vcipher $out2,$out2,v30
2662 vcipher $out3,$out3,v30
2663 vxor $tweak,$tweak,$tmp
2664 vcipher $out4,$out4,v30
2665 vcipher $out5,$out5,v30
2666 vxor $in5,$twk5,v31
2667 vsrab $tmp,$tweak,$seven # next tweak value
2668 vxor $twk5,$tweak,$rndkey0
2669
2670 vcipherlast $out0,$out0,$in0
2671 lvx_u $in0,$x00,$inp # load next input block
2672 vaddubm $tweak,$tweak,$tweak
2673 vsldoi $tmp,$tmp,$tmp,15
2674 vcipherlast $out1,$out1,$in1
2675 lvx_u $in1,$x10,$inp
2676 vcipherlast $out2,$out2,$in2
2677 le?vperm $in0,$in0,$in0,$leperm
2678 lvx_u $in2,$x20,$inp
2679 vand $tmp,$tmp,$eighty7
2680 vcipherlast $out3,$out3,$in3
2681 le?vperm $in1,$in1,$in1,$leperm
2682 lvx_u $in3,$x30,$inp
2683 vcipherlast $out4,$out4,$in4
2684 le?vperm $in2,$in2,$in2,$leperm
2685 lvx_u $in4,$x40,$inp
2686 vxor $tweak,$tweak,$tmp
2687 vcipherlast $tmp,$out5,$in5 # last block might be needed
2688 # in stealing mode
2689 le?vperm $in3,$in3,$in3,$leperm
2690 lvx_u $in5,$x50,$inp
2691 addi $inp,$inp,0x60
2692 le?vperm $in4,$in4,$in4,$leperm
2693 le?vperm $in5,$in5,$in5,$leperm
2694
2695 le?vperm $out0,$out0,$out0,$leperm
2696 le?vperm $out1,$out1,$out1,$leperm
2697 stvx_u $out0,$x00,$out # store output
2698 vxor $out0,$in0,$twk0
2699 le?vperm $out2,$out2,$out2,$leperm
2700 stvx_u $out1,$x10,$out
2701 vxor $out1,$in1,$twk1
2702 le?vperm $out3,$out3,$out3,$leperm
2703 stvx_u $out2,$x20,$out
2704 vxor $out2,$in2,$twk2
2705 le?vperm $out4,$out4,$out4,$leperm
2706 stvx_u $out3,$x30,$out
2707 vxor $out3,$in3,$twk3
2708 le?vperm $out5,$tmp,$tmp,$leperm
2709 stvx_u $out4,$x40,$out
2710 vxor $out4,$in4,$twk4
2711 le?stvx_u $out5,$x50,$out
2712 be?stvx_u $tmp, $x50,$out
2713 vxor $out5,$in5,$twk5
2714 addi $out,$out,0x60
2715
2716 mtctr $rounds
2717 beq Loop_xts_enc6x # did $len-=96 borrow?
2718
2719 addic. $len,$len,0x60
2720 beq Lxts_enc6x_zero
2721 cmpwi $len,0x20
2722 blt Lxts_enc6x_one
2723 nop
2724 beq Lxts_enc6x_two
2725 cmpwi $len,0x40
2726 blt Lxts_enc6x_three
2727 nop
2728 beq Lxts_enc6x_four
2729
2730Lxts_enc6x_five:
2731 vxor $out0,$in1,$twk0
2732 vxor $out1,$in2,$twk1
2733 vxor $out2,$in3,$twk2
2734 vxor $out3,$in4,$twk3
2735 vxor $out4,$in5,$twk4
2736
2737 bl _aesp8_xts_enc5x
2738
2739 le?vperm $out0,$out0,$out0,$leperm
2740 vmr $twk0,$twk5 # unused tweak
2741 le?vperm $out1,$out1,$out1,$leperm
2742 stvx_u $out0,$x00,$out # store output
2743 le?vperm $out2,$out2,$out2,$leperm
2744 stvx_u $out1,$x10,$out
2745 le?vperm $out3,$out3,$out3,$leperm
2746 stvx_u $out2,$x20,$out
2747 vxor $tmp,$out4,$twk5 # last block prep for stealing
2748 le?vperm $out4,$out4,$out4,$leperm
2749 stvx_u $out3,$x30,$out
2750 stvx_u $out4,$x40,$out
2751 addi $out,$out,0x50
2752 bne Lxts_enc6x_steal
2753 b Lxts_enc6x_done
2754
2755.align 4
2756Lxts_enc6x_four:
2757 vxor $out0,$in2,$twk0
2758 vxor $out1,$in3,$twk1
2759 vxor $out2,$in4,$twk2
2760 vxor $out3,$in5,$twk3
2761 vxor $out4,$out4,$out4
2762
2763 bl _aesp8_xts_enc5x
2764
2765 le?vperm $out0,$out0,$out0,$leperm
2766 vmr $twk0,$twk4 # unused tweak
2767 le?vperm $out1,$out1,$out1,$leperm
2768 stvx_u $out0,$x00,$out # store output
2769 le?vperm $out2,$out2,$out2,$leperm
2770 stvx_u $out1,$x10,$out
2771 vxor $tmp,$out3,$twk4 # last block prep for stealing
2772 le?vperm $out3,$out3,$out3,$leperm
2773 stvx_u $out2,$x20,$out
2774 stvx_u $out3,$x30,$out
2775 addi $out,$out,0x40
2776 bne Lxts_enc6x_steal
2777 b Lxts_enc6x_done
2778
2779.align 4
2780Lxts_enc6x_three:
2781 vxor $out0,$in3,$twk0
2782 vxor $out1,$in4,$twk1
2783 vxor $out2,$in5,$twk2
2784 vxor $out3,$out3,$out3
2785 vxor $out4,$out4,$out4
2786
2787 bl _aesp8_xts_enc5x
2788
2789 le?vperm $out0,$out0,$out0,$leperm
2790 vmr $twk0,$twk3 # unused tweak
2791 le?vperm $out1,$out1,$out1,$leperm
2792 stvx_u $out0,$x00,$out # store output
2793 vxor $tmp,$out2,$twk3 # last block prep for stealing
2794 le?vperm $out2,$out2,$out2,$leperm
2795 stvx_u $out1,$x10,$out
2796 stvx_u $out2,$x20,$out
2797 addi $out,$out,0x30
2798 bne Lxts_enc6x_steal
2799 b Lxts_enc6x_done
2800
2801.align 4
2802Lxts_enc6x_two:
2803 vxor $out0,$in4,$twk0
2804 vxor $out1,$in5,$twk1
2805 vxor $out2,$out2,$out2
2806 vxor $out3,$out3,$out3
2807 vxor $out4,$out4,$out4
2808
2809 bl _aesp8_xts_enc5x
2810
2811 le?vperm $out0,$out0,$out0,$leperm
2812 vmr $twk0,$twk2 # unused tweak
2813 vxor $tmp,$out1,$twk2 # last block prep for stealing
2814 le?vperm $out1,$out1,$out1,$leperm
2815 stvx_u $out0,$x00,$out # store output
2816 stvx_u $out1,$x10,$out
2817 addi $out,$out,0x20
2818 bne Lxts_enc6x_steal
2819 b Lxts_enc6x_done
2820
2821.align 4
2822Lxts_enc6x_one:
2823 vxor $out0,$in5,$twk0
2824 nop
2825Loop_xts_enc1x:
2826 vcipher $out0,$out0,v24
2827 lvx v24,$x20,$key_ # round[3]
2828 addi $key_,$key_,0x20
2829
2830 vcipher $out0,$out0,v25
2831 lvx v25,$x10,$key_ # round[4]
2832 bdnz Loop_xts_enc1x
2833
2834 add $inp,$inp,$taillen
2835 cmpwi $taillen,0
2836 vcipher $out0,$out0,v24
2837
2838 subi $inp,$inp,16
2839 vcipher $out0,$out0,v25
2840
2841 lvsr $inpperm,0,$taillen
2842 vcipher $out0,$out0,v26
2843
2844 lvx_u $in0,0,$inp
2845 vcipher $out0,$out0,v27
2846
2847 addi $key_,$sp,$FRAME+15 # rewind $key_
2848 vcipher $out0,$out0,v28
2849 lvx v24,$x00,$key_ # re-pre-load round[1]
2850
2851 vcipher $out0,$out0,v29
2852 lvx v25,$x10,$key_ # re-pre-load round[2]
2853 vxor $twk0,$twk0,v31
2854
2855 le?vperm $in0,$in0,$in0,$leperm
2856 vcipher $out0,$out0,v30
2857
2858 vperm $in0,$in0,$in0,$inpperm
2859 vcipherlast $out0,$out0,$twk0
2860
2861 vmr $twk0,$twk1 # unused tweak
2862 vxor $tmp,$out0,$twk1 # last block prep for stealing
2863 le?vperm $out0,$out0,$out0,$leperm
2864 stvx_u $out0,$x00,$out # store output
2865 addi $out,$out,0x10
2866 bne Lxts_enc6x_steal
2867 b Lxts_enc6x_done
2868
2869.align 4
2870Lxts_enc6x_zero:
2871 cmpwi $taillen,0
2872 beq Lxts_enc6x_done
2873
2874 add $inp,$inp,$taillen
2875 subi $inp,$inp,16
2876 lvx_u $in0,0,$inp
2877 lvsr $inpperm,0,$taillen # $in5 is no more
2878 le?vperm $in0,$in0,$in0,$leperm
2879 vperm $in0,$in0,$in0,$inpperm
2880 vxor $tmp,$tmp,$twk0
2881Lxts_enc6x_steal:
2882 vxor $in0,$in0,$twk0
2883 vxor $out0,$out0,$out0
2884 vspltisb $out1,-1
2885 vperm $out0,$out0,$out1,$inpperm
2886 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2887
2888 subi r30,$out,17
2889 subi $out,$out,16
2890 mtctr $taillen
2891Loop_xts_enc6x_steal:
2892 lbzu r0,1(r30)
2893 stb r0,16(r30)
2894 bdnz Loop_xts_enc6x_steal
2895
2896 li $taillen,0
2897 mtctr $rounds
2898 b Loop_xts_enc1x # one more time...
2899
2900.align 4
2901Lxts_enc6x_done:
2902 ${UCMP}i $ivp,0
2903 beq Lxts_enc6x_ret
2904
2905 vxor $tweak,$twk0,$rndkey0
2906 le?vperm $tweak,$tweak,$tweak,$leperm
2907 stvx_u $tweak,0,$ivp
2908
2909Lxts_enc6x_ret:
2910 mtlr r11
2911 li r10,`$FRAME+15`
2912 li r11,`$FRAME+31`
2913 stvx $seven,r10,$sp # wipe copies of round keys
2914 addi r10,r10,32
2915 stvx $seven,r11,$sp
2916 addi r11,r11,32
2917 stvx $seven,r10,$sp
2918 addi r10,r10,32
2919 stvx $seven,r11,$sp
2920 addi r11,r11,32
2921 stvx $seven,r10,$sp
2922 addi r10,r10,32
2923 stvx $seven,r11,$sp
2924 addi r11,r11,32
2925 stvx $seven,r10,$sp
2926 addi r10,r10,32
2927 stvx $seven,r11,$sp
2928 addi r11,r11,32
2929
2930 mtspr 256,$vrsave
2931 lvx v20,r10,$sp # ABI says so
2932 addi r10,r10,32
2933 lvx v21,r11,$sp
2934 addi r11,r11,32
2935 lvx v22,r10,$sp
2936 addi r10,r10,32
2937 lvx v23,r11,$sp
2938 addi r11,r11,32
2939 lvx v24,r10,$sp
2940 addi r10,r10,32
2941 lvx v25,r11,$sp
2942 addi r11,r11,32
2943 lvx v26,r10,$sp
2944 addi r10,r10,32
2945 lvx v27,r11,$sp
2946 addi r11,r11,32
2947 lvx v28,r10,$sp
2948 addi r10,r10,32
2949 lvx v29,r11,$sp
2950 addi r11,r11,32
2951 lvx v30,r10,$sp
2952 lvx v31,r11,$sp
2953 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2954 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2955 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2956 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2957 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2958 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2959 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2960 blr
2961 .long 0
2962 .byte 0,12,0x04,1,0x80,6,6,0
2963 .long 0
2964
2965.align 5
2966_aesp8_xts_enc5x:
2967 vcipher $out0,$out0,v24
2968 vcipher $out1,$out1,v24
2969 vcipher $out2,$out2,v24
2970 vcipher $out3,$out3,v24
2971 vcipher $out4,$out4,v24
2972 lvx v24,$x20,$key_ # round[3]
2973 addi $key_,$key_,0x20
2974
2975 vcipher $out0,$out0,v25
2976 vcipher $out1,$out1,v25
2977 vcipher $out2,$out2,v25
2978 vcipher $out3,$out3,v25
2979 vcipher $out4,$out4,v25
2980 lvx v25,$x10,$key_ # round[4]
2981 bdnz _aesp8_xts_enc5x
2982
2983 add $inp,$inp,$taillen
2984 cmpwi $taillen,0
2985 vcipher $out0,$out0,v24
2986 vcipher $out1,$out1,v24
2987 vcipher $out2,$out2,v24
2988 vcipher $out3,$out3,v24
2989 vcipher $out4,$out4,v24
2990
2991 subi $inp,$inp,16
2992 vcipher $out0,$out0,v25
2993 vcipher $out1,$out1,v25
2994 vcipher $out2,$out2,v25
2995 vcipher $out3,$out3,v25
2996 vcipher $out4,$out4,v25
2997 vxor $twk0,$twk0,v31
2998
2999 vcipher $out0,$out0,v26
3000 lvsr $inpperm,r0,$taillen # $in5 is no more
3001 vcipher $out1,$out1,v26
3002 vcipher $out2,$out2,v26
3003 vcipher $out3,$out3,v26
3004 vcipher $out4,$out4,v26
3005 vxor $in1,$twk1,v31
3006
3007 vcipher $out0,$out0,v27
3008 lvx_u $in0,0,$inp
3009 vcipher $out1,$out1,v27
3010 vcipher $out2,$out2,v27
3011 vcipher $out3,$out3,v27
3012 vcipher $out4,$out4,v27
3013 vxor $in2,$twk2,v31
3014
3015 addi $key_,$sp,$FRAME+15 # rewind $key_
3016 vcipher $out0,$out0,v28
3017 vcipher $out1,$out1,v28
3018 vcipher $out2,$out2,v28
3019 vcipher $out3,$out3,v28
3020 vcipher $out4,$out4,v28
3021 lvx v24,$x00,$key_ # re-pre-load round[1]
3022 vxor $in3,$twk3,v31
3023
3024 vcipher $out0,$out0,v29
3025 le?vperm $in0,$in0,$in0,$leperm
3026 vcipher $out1,$out1,v29
3027 vcipher $out2,$out2,v29
3028 vcipher $out3,$out3,v29
3029 vcipher $out4,$out4,v29
3030 lvx v25,$x10,$key_ # re-pre-load round[2]
3031 vxor $in4,$twk4,v31
3032
3033 vcipher $out0,$out0,v30
3034 vperm $in0,$in0,$in0,$inpperm
3035 vcipher $out1,$out1,v30
3036 vcipher $out2,$out2,v30
3037 vcipher $out3,$out3,v30
3038 vcipher $out4,$out4,v30
3039
3040 vcipherlast $out0,$out0,$twk0
3041 vcipherlast $out1,$out1,$in1
3042 vcipherlast $out2,$out2,$in2
3043 vcipherlast $out3,$out3,$in3
3044 vcipherlast $out4,$out4,$in4
3045 blr
3046 .long 0
3047 .byte 0,12,0x14,0,0,0,0,0
3048
3049.align 5
3050_aesp8_xts_decrypt6x:
3051 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3052 mflr r11
3053 li r7,`$FRAME+8*16+15`
3054 li r3,`$FRAME+8*16+31`
3055 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3056 stvx v20,r7,$sp # ABI says so
3057 addi r7,r7,32
3058 stvx v21,r3,$sp
3059 addi r3,r3,32
3060 stvx v22,r7,$sp
3061 addi r7,r7,32
3062 stvx v23,r3,$sp
3063 addi r3,r3,32
3064 stvx v24,r7,$sp
3065 addi r7,r7,32
3066 stvx v25,r3,$sp
3067 addi r3,r3,32
3068 stvx v26,r7,$sp
3069 addi r7,r7,32
3070 stvx v27,r3,$sp
3071 addi r3,r3,32
3072 stvx v28,r7,$sp
3073 addi r7,r7,32
3074 stvx v29,r3,$sp
3075 addi r3,r3,32
3076 stvx v30,r7,$sp
3077 stvx v31,r3,$sp
3078 li r0,-1
3079 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3080 li $x10,0x10
3081 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3082 li $x20,0x20
3083 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3084 li $x30,0x30
3085 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3086 li $x40,0x40
3087 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3088 li $x50,0x50
3089 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3090 li $x60,0x60
3091 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3092 li $x70,0x70
3093 mtspr 256,r0
3094
3095 subi $rounds,$rounds,3 # -4 in total
3096
3097 lvx $rndkey0,$x00,$key1 # load key schedule
3098 lvx v30,$x10,$key1
3099 addi $key1,$key1,0x20
3100 lvx v31,$x00,$key1
3101 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3102 addi $key_,$sp,$FRAME+15
3103 mtctr $rounds
3104
3105Load_xts_dec_key:
3106 ?vperm v24,v30,v31,$keyperm
3107 lvx v30,$x10,$key1
3108 addi $key1,$key1,0x20
3109 stvx v24,$x00,$key_ # off-load round[1]
3110 ?vperm v25,v31,v30,$keyperm
3111 lvx v31,$x00,$key1
3112 stvx v25,$x10,$key_ # off-load round[2]
3113 addi $key_,$key_,0x20
3114 bdnz Load_xts_dec_key
3115
3116 lvx v26,$x10,$key1
3117 ?vperm v24,v30,v31,$keyperm
3118 lvx v27,$x20,$key1
3119 stvx v24,$x00,$key_ # off-load round[3]
3120 ?vperm v25,v31,v26,$keyperm
3121 lvx v28,$x30,$key1
3122 stvx v25,$x10,$key_ # off-load round[4]
3123 addi $key_,$sp,$FRAME+15 # rewind $key_
3124 ?vperm v26,v26,v27,$keyperm
3125 lvx v29,$x40,$key1
3126 ?vperm v27,v27,v28,$keyperm
3127 lvx v30,$x50,$key1
3128 ?vperm v28,v28,v29,$keyperm
3129 lvx v31,$x60,$key1
3130 ?vperm v29,v29,v30,$keyperm
3131 lvx $twk5,$x70,$key1 # borrow $twk5
3132 ?vperm v30,v30,v31,$keyperm
3133 lvx v24,$x00,$key_ # pre-load round[1]
3134 ?vperm v31,v31,$twk5,$keyperm
3135 lvx v25,$x10,$key_ # pre-load round[2]
3136
3137 vperm $in0,$inout,$inptail,$inpperm
3138 subi $inp,$inp,31 # undo "caller"
3139 vxor $twk0,$tweak,$rndkey0
3140 vsrab $tmp,$tweak,$seven # next tweak value
3141 vaddubm $tweak,$tweak,$tweak
3142 vsldoi $tmp,$tmp,$tmp,15
3143 vand $tmp,$tmp,$eighty7
3144 vxor $out0,$in0,$twk0
3145 vxor $tweak,$tweak,$tmp
3146
3147 lvx_u $in1,$x10,$inp
3148 vxor $twk1,$tweak,$rndkey0
3149 vsrab $tmp,$tweak,$seven # next tweak value
3150 vaddubm $tweak,$tweak,$tweak
3151 vsldoi $tmp,$tmp,$tmp,15
3152 le?vperm $in1,$in1,$in1,$leperm
3153 vand $tmp,$tmp,$eighty7
3154 vxor $out1,$in1,$twk1
3155 vxor $tweak,$tweak,$tmp
3156
3157 lvx_u $in2,$x20,$inp
3158 andi. $taillen,$len,15
3159 vxor $twk2,$tweak,$rndkey0
3160 vsrab $tmp,$tweak,$seven # next tweak value
3161 vaddubm $tweak,$tweak,$tweak
3162 vsldoi $tmp,$tmp,$tmp,15
3163 le?vperm $in2,$in2,$in2,$leperm
3164 vand $tmp,$tmp,$eighty7
3165 vxor $out2,$in2,$twk2
3166 vxor $tweak,$tweak,$tmp
3167
3168 lvx_u $in3,$x30,$inp
3169 sub $len,$len,$taillen
3170 vxor $twk3,$tweak,$rndkey0
3171 vsrab $tmp,$tweak,$seven # next tweak value
3172 vaddubm $tweak,$tweak,$tweak
3173 vsldoi $tmp,$tmp,$tmp,15
3174 le?vperm $in3,$in3,$in3,$leperm
3175 vand $tmp,$tmp,$eighty7
3176 vxor $out3,$in3,$twk3
3177 vxor $tweak,$tweak,$tmp
3178
3179 lvx_u $in4,$x40,$inp
3180 subi $len,$len,0x60
3181 vxor $twk4,$tweak,$rndkey0
3182 vsrab $tmp,$tweak,$seven # next tweak value
3183 vaddubm $tweak,$tweak,$tweak
3184 vsldoi $tmp,$tmp,$tmp,15
3185 le?vperm $in4,$in4,$in4,$leperm
3186 vand $tmp,$tmp,$eighty7
3187 vxor $out4,$in4,$twk4
3188 vxor $tweak,$tweak,$tmp
3189
3190 lvx_u $in5,$x50,$inp
3191 addi $inp,$inp,0x60
3192 vxor $twk5,$tweak,$rndkey0
3193 vsrab $tmp,$tweak,$seven # next tweak value
3194 vaddubm $tweak,$tweak,$tweak
3195 vsldoi $tmp,$tmp,$tmp,15
3196 le?vperm $in5,$in5,$in5,$leperm
3197 vand $tmp,$tmp,$eighty7
3198 vxor $out5,$in5,$twk5
3199 vxor $tweak,$tweak,$tmp
3200
3201 vxor v31,v31,$rndkey0
3202 mtctr $rounds
3203 b Loop_xts_dec6x
3204
3205.align 5
3206Loop_xts_dec6x:
3207 vncipher $out0,$out0,v24
3208 vncipher $out1,$out1,v24
3209 vncipher $out2,$out2,v24
3210 vncipher $out3,$out3,v24
3211 vncipher $out4,$out4,v24
3212 vncipher $out5,$out5,v24
3213 lvx v24,$x20,$key_ # round[3]
3214 addi $key_,$key_,0x20
3215
3216 vncipher $out0,$out0,v25
3217 vncipher $out1,$out1,v25
3218 vncipher $out2,$out2,v25
3219 vncipher $out3,$out3,v25
3220 vncipher $out4,$out4,v25
3221 vncipher $out5,$out5,v25
3222 lvx v25,$x10,$key_ # round[4]
3223 bdnz Loop_xts_dec6x
3224
3225 subic $len,$len,96 # $len-=96
3226 vxor $in0,$twk0,v31 # xor with last round key
3227 vncipher $out0,$out0,v24
3228 vncipher $out1,$out1,v24
3229 vsrab $tmp,$tweak,$seven # next tweak value
3230 vxor $twk0,$tweak,$rndkey0
3231 vaddubm $tweak,$tweak,$tweak
3232 vncipher $out2,$out2,v24
3233 vncipher $out3,$out3,v24
3234 vsldoi $tmp,$tmp,$tmp,15
3235 vncipher $out4,$out4,v24
3236 vncipher $out5,$out5,v24
3237
3238 subfe. r0,r0,r0 # borrow?-1:0
3239 vand $tmp,$tmp,$eighty7
3240 vncipher $out0,$out0,v25
3241 vncipher $out1,$out1,v25
3242 vxor $tweak,$tweak,$tmp
3243 vncipher $out2,$out2,v25
3244 vncipher $out3,$out3,v25
3245 vxor $in1,$twk1,v31
3246 vsrab $tmp,$tweak,$seven # next tweak value
3247 vxor $twk1,$tweak,$rndkey0
3248 vncipher $out4,$out4,v25
3249 vncipher $out5,$out5,v25
3250
3251 and r0,r0,$len
3252 vaddubm $tweak,$tweak,$tweak
3253 vsldoi $tmp,$tmp,$tmp,15
3254 vncipher $out0,$out0,v26
3255 vncipher $out1,$out1,v26
3256 vand $tmp,$tmp,$eighty7
3257 vncipher $out2,$out2,v26
3258 vncipher $out3,$out3,v26
3259 vxor $tweak,$tweak,$tmp
3260 vncipher $out4,$out4,v26
3261 vncipher $out5,$out5,v26
3262
3263 add $inp,$inp,r0 # $inp is adjusted in such
3264 # way that at exit from the
3265 # loop inX-in5 are loaded
3266 # with last "words"
3267 vxor $in2,$twk2,v31
3268 vsrab $tmp,$tweak,$seven # next tweak value
3269 vxor $twk2,$tweak,$rndkey0
3270 vaddubm $tweak,$tweak,$tweak
3271 vncipher $out0,$out0,v27
3272 vncipher $out1,$out1,v27
3273 vsldoi $tmp,$tmp,$tmp,15
3274 vncipher $out2,$out2,v27
3275 vncipher $out3,$out3,v27
3276 vand $tmp,$tmp,$eighty7
3277 vncipher $out4,$out4,v27
3278 vncipher $out5,$out5,v27
3279
3280 addi $key_,$sp,$FRAME+15 # rewind $key_
3281 vxor $tweak,$tweak,$tmp
3282 vncipher $out0,$out0,v28
3283 vncipher $out1,$out1,v28
3284 vxor $in3,$twk3,v31
3285 vsrab $tmp,$tweak,$seven # next tweak value
3286 vxor $twk3,$tweak,$rndkey0
3287 vncipher $out2,$out2,v28
3288 vncipher $out3,$out3,v28
3289 vaddubm $tweak,$tweak,$tweak
3290 vsldoi $tmp,$tmp,$tmp,15
3291 vncipher $out4,$out4,v28
3292 vncipher $out5,$out5,v28
3293 lvx v24,$x00,$key_ # re-pre-load round[1]
3294 vand $tmp,$tmp,$eighty7
3295
3296 vncipher $out0,$out0,v29
3297 vncipher $out1,$out1,v29
3298 vxor $tweak,$tweak,$tmp
3299 vncipher $out2,$out2,v29
3300 vncipher $out3,$out3,v29
3301 vxor $in4,$twk4,v31
3302 vsrab $tmp,$tweak,$seven # next tweak value
3303 vxor $twk4,$tweak,$rndkey0
3304 vncipher $out4,$out4,v29
3305 vncipher $out5,$out5,v29
3306 lvx v25,$x10,$key_ # re-pre-load round[2]
3307 vaddubm $tweak,$tweak,$tweak
3308 vsldoi $tmp,$tmp,$tmp,15
3309
3310 vncipher $out0,$out0,v30
3311 vncipher $out1,$out1,v30
3312 vand $tmp,$tmp,$eighty7
3313 vncipher $out2,$out2,v30
3314 vncipher $out3,$out3,v30
3315 vxor $tweak,$tweak,$tmp
3316 vncipher $out4,$out4,v30
3317 vncipher $out5,$out5,v30
3318 vxor $in5,$twk5,v31
3319 vsrab $tmp,$tweak,$seven # next tweak value
3320 vxor $twk5,$tweak,$rndkey0
3321
3322 vncipherlast $out0,$out0,$in0
3323 lvx_u $in0,$x00,$inp # load next input block
3324 vaddubm $tweak,$tweak,$tweak
3325 vsldoi $tmp,$tmp,$tmp,15
3326 vncipherlast $out1,$out1,$in1
3327 lvx_u $in1,$x10,$inp
3328 vncipherlast $out2,$out2,$in2
3329 le?vperm $in0,$in0,$in0,$leperm
3330 lvx_u $in2,$x20,$inp
3331 vand $tmp,$tmp,$eighty7
3332 vncipherlast $out3,$out3,$in3
3333 le?vperm $in1,$in1,$in1,$leperm
3334 lvx_u $in3,$x30,$inp
3335 vncipherlast $out4,$out4,$in4
3336 le?vperm $in2,$in2,$in2,$leperm
3337 lvx_u $in4,$x40,$inp
3338 vxor $tweak,$tweak,$tmp
3339 vncipherlast $out5,$out5,$in5
3340 le?vperm $in3,$in3,$in3,$leperm
3341 lvx_u $in5,$x50,$inp
3342 addi $inp,$inp,0x60
3343 le?vperm $in4,$in4,$in4,$leperm
3344 le?vperm $in5,$in5,$in5,$leperm
3345
3346 le?vperm $out0,$out0,$out0,$leperm
3347 le?vperm $out1,$out1,$out1,$leperm
3348 stvx_u $out0,$x00,$out # store output
3349 vxor $out0,$in0,$twk0
3350 le?vperm $out2,$out2,$out2,$leperm
3351 stvx_u $out1,$x10,$out
3352 vxor $out1,$in1,$twk1
3353 le?vperm $out3,$out3,$out3,$leperm
3354 stvx_u $out2,$x20,$out
3355 vxor $out2,$in2,$twk2
3356 le?vperm $out4,$out4,$out4,$leperm
3357 stvx_u $out3,$x30,$out
3358 vxor $out3,$in3,$twk3
3359 le?vperm $out5,$out5,$out5,$leperm
3360 stvx_u $out4,$x40,$out
3361 vxor $out4,$in4,$twk4
3362 stvx_u $out5,$x50,$out
3363 vxor $out5,$in5,$twk5
3364 addi $out,$out,0x60
3365
3366 mtctr $rounds
3367 beq Loop_xts_dec6x # did $len-=96 borrow?
3368
3369 addic. $len,$len,0x60
3370 beq Lxts_dec6x_zero
3371 cmpwi $len,0x20
3372 blt Lxts_dec6x_one
3373 nop
3374 beq Lxts_dec6x_two
3375 cmpwi $len,0x40
3376 blt Lxts_dec6x_three
3377 nop
3378 beq Lxts_dec6x_four
3379
3380Lxts_dec6x_five:
3381 vxor $out0,$in1,$twk0
3382 vxor $out1,$in2,$twk1
3383 vxor $out2,$in3,$twk2
3384 vxor $out3,$in4,$twk3
3385 vxor $out4,$in5,$twk4
3386
3387 bl _aesp8_xts_dec5x
3388
3389 le?vperm $out0,$out0,$out0,$leperm
3390 vmr $twk0,$twk5 # unused tweak
3391 vxor $twk1,$tweak,$rndkey0
3392 le?vperm $out1,$out1,$out1,$leperm
3393 stvx_u $out0,$x00,$out # store output
3394 vxor $out0,$in0,$twk1
3395 le?vperm $out2,$out2,$out2,$leperm
3396 stvx_u $out1,$x10,$out
3397 le?vperm $out3,$out3,$out3,$leperm
3398 stvx_u $out2,$x20,$out
3399 le?vperm $out4,$out4,$out4,$leperm
3400 stvx_u $out3,$x30,$out
3401 stvx_u $out4,$x40,$out
3402 addi $out,$out,0x50
3403 bne Lxts_dec6x_steal
3404 b Lxts_dec6x_done
3405
3406.align 4
3407Lxts_dec6x_four:
3408 vxor $out0,$in2,$twk0
3409 vxor $out1,$in3,$twk1
3410 vxor $out2,$in4,$twk2
3411 vxor $out3,$in5,$twk3
3412 vxor $out4,$out4,$out4
3413
3414 bl _aesp8_xts_dec5x
3415
3416 le?vperm $out0,$out0,$out0,$leperm
3417 vmr $twk0,$twk4 # unused tweak
3418 vmr $twk1,$twk5
3419 le?vperm $out1,$out1,$out1,$leperm
3420 stvx_u $out0,$x00,$out # store output
3421 vxor $out0,$in0,$twk5
3422 le?vperm $out2,$out2,$out2,$leperm
3423 stvx_u $out1,$x10,$out
3424 le?vperm $out3,$out3,$out3,$leperm
3425 stvx_u $out2,$x20,$out
3426 stvx_u $out3,$x30,$out
3427 addi $out,$out,0x40
3428 bne Lxts_dec6x_steal
3429 b Lxts_dec6x_done
3430
3431.align 4
3432Lxts_dec6x_three:
3433 vxor $out0,$in3,$twk0
3434 vxor $out1,$in4,$twk1
3435 vxor $out2,$in5,$twk2
3436 vxor $out3,$out3,$out3
3437 vxor $out4,$out4,$out4
3438
3439 bl _aesp8_xts_dec5x
3440
3441 le?vperm $out0,$out0,$out0,$leperm
3442 vmr $twk0,$twk3 # unused tweak
3443 vmr $twk1,$twk4
3444 le?vperm $out1,$out1,$out1,$leperm
3445 stvx_u $out0,$x00,$out # store output
3446 vxor $out0,$in0,$twk4
3447 le?vperm $out2,$out2,$out2,$leperm
3448 stvx_u $out1,$x10,$out
3449 stvx_u $out2,$x20,$out
3450 addi $out,$out,0x30
3451 bne Lxts_dec6x_steal
3452 b Lxts_dec6x_done
3453
3454.align 4
3455Lxts_dec6x_two:
3456 vxor $out0,$in4,$twk0
3457 vxor $out1,$in5,$twk1
3458 vxor $out2,$out2,$out2
3459 vxor $out3,$out3,$out3
3460 vxor $out4,$out4,$out4
3461
3462 bl _aesp8_xts_dec5x
3463
3464 le?vperm $out0,$out0,$out0,$leperm
3465 vmr $twk0,$twk2 # unused tweak
3466 vmr $twk1,$twk3
3467 le?vperm $out1,$out1,$out1,$leperm
3468 stvx_u $out0,$x00,$out # store output
3469 vxor $out0,$in0,$twk3
3470 stvx_u $out1,$x10,$out
3471 addi $out,$out,0x20
3472 bne Lxts_dec6x_steal
3473 b Lxts_dec6x_done
3474
3475.align 4
3476Lxts_dec6x_one:
3477 vxor $out0,$in5,$twk0
3478 nop
3479Loop_xts_dec1x:
3480 vncipher $out0,$out0,v24
3481 lvx v24,$x20,$key_ # round[3]
3482 addi $key_,$key_,0x20
3483
3484 vncipher $out0,$out0,v25
3485 lvx v25,$x10,$key_ # round[4]
3486 bdnz Loop_xts_dec1x
3487
3488 subi r0,$taillen,1
3489 vncipher $out0,$out0,v24
3490
3491 andi. r0,r0,16
3492 cmpwi $taillen,0
3493 vncipher $out0,$out0,v25
3494
3495 sub $inp,$inp,r0
3496 vncipher $out0,$out0,v26
3497
3498 lvx_u $in0,0,$inp
3499 vncipher $out0,$out0,v27
3500
3501 addi $key_,$sp,$FRAME+15 # rewind $key_
3502 vncipher $out0,$out0,v28
3503 lvx v24,$x00,$key_ # re-pre-load round[1]
3504
3505 vncipher $out0,$out0,v29
3506 lvx v25,$x10,$key_ # re-pre-load round[2]
3507 vxor $twk0,$twk0,v31
3508
3509 le?vperm $in0,$in0,$in0,$leperm
3510 vncipher $out0,$out0,v30
3511
3512 mtctr $rounds
3513 vncipherlast $out0,$out0,$twk0
3514
3515 vmr $twk0,$twk1 # unused tweak
3516 vmr $twk1,$twk2
3517 le?vperm $out0,$out0,$out0,$leperm
3518 stvx_u $out0,$x00,$out # store output
3519 addi $out,$out,0x10
3520 vxor $out0,$in0,$twk2
3521 bne Lxts_dec6x_steal
3522 b Lxts_dec6x_done
3523
3524.align 4
3525Lxts_dec6x_zero:
3526 cmpwi $taillen,0
3527 beq Lxts_dec6x_done
3528
3529 lvx_u $in0,0,$inp
3530 le?vperm $in0,$in0,$in0,$leperm
3531 vxor $out0,$in0,$twk1
3532Lxts_dec6x_steal:
3533 vncipher $out0,$out0,v24
3534 lvx v24,$x20,$key_ # round[3]
3535 addi $key_,$key_,0x20
3536
3537 vncipher $out0,$out0,v25
3538 lvx v25,$x10,$key_ # round[4]
3539 bdnz Lxts_dec6x_steal
3540
3541 add $inp,$inp,$taillen
3542 vncipher $out0,$out0,v24
3543
3544 cmpwi $taillen,0
3545 vncipher $out0,$out0,v25
3546
3547 lvx_u $in0,0,$inp
3548 vncipher $out0,$out0,v26
3549
3550 lvsr $inpperm,0,$taillen # $in5 is no more
3551 vncipher $out0,$out0,v27
3552
3553 addi $key_,$sp,$FRAME+15 # rewind $key_
3554 vncipher $out0,$out0,v28
3555 lvx v24,$x00,$key_ # re-pre-load round[1]
3556
3557 vncipher $out0,$out0,v29
3558 lvx v25,$x10,$key_ # re-pre-load round[2]
3559 vxor $twk1,$twk1,v31
3560
3561 le?vperm $in0,$in0,$in0,$leperm
3562 vncipher $out0,$out0,v30
3563
3564 vperm $in0,$in0,$in0,$inpperm
3565 vncipherlast $tmp,$out0,$twk1
3566
3567 le?vperm $out0,$tmp,$tmp,$leperm
3568 le?stvx_u $out0,0,$out
3569 be?stvx_u $tmp,0,$out
3570
3571 vxor $out0,$out0,$out0
3572 vspltisb $out1,-1
3573 vperm $out0,$out0,$out1,$inpperm
3574 vsel $out0,$in0,$tmp,$out0
3575 vxor $out0,$out0,$twk0
3576
3577 subi r30,$out,1
3578 mtctr $taillen
3579Loop_xts_dec6x_steal:
3580 lbzu r0,1(r30)
3581 stb r0,16(r30)
3582 bdnz Loop_xts_dec6x_steal
3583
3584 li $taillen,0
3585 mtctr $rounds
3586 b Loop_xts_dec1x # one more time...
3587
3588.align 4
3589Lxts_dec6x_done:
3590 ${UCMP}i $ivp,0
3591 beq Lxts_dec6x_ret
3592
3593 vxor $tweak,$twk0,$rndkey0
3594 le?vperm $tweak,$tweak,$tweak,$leperm
3595 stvx_u $tweak,0,$ivp
3596
3597Lxts_dec6x_ret:
3598 mtlr r11
3599 li r10,`$FRAME+15`
3600 li r11,`$FRAME+31`
3601 stvx $seven,r10,$sp # wipe copies of round keys
3602 addi r10,r10,32
3603 stvx $seven,r11,$sp
3604 addi r11,r11,32
3605 stvx $seven,r10,$sp
3606 addi r10,r10,32
3607 stvx $seven,r11,$sp
3608 addi r11,r11,32
3609 stvx $seven,r10,$sp
3610 addi r10,r10,32
3611 stvx $seven,r11,$sp
3612 addi r11,r11,32
3613 stvx $seven,r10,$sp
3614 addi r10,r10,32
3615 stvx $seven,r11,$sp
3616 addi r11,r11,32
3617
3618 mtspr 256,$vrsave
3619 lvx v20,r10,$sp # ABI says so
3620 addi r10,r10,32
3621 lvx v21,r11,$sp
3622 addi r11,r11,32
3623 lvx v22,r10,$sp
3624 addi r10,r10,32
3625 lvx v23,r11,$sp
3626 addi r11,r11,32
3627 lvx v24,r10,$sp
3628 addi r10,r10,32
3629 lvx v25,r11,$sp
3630 addi r11,r11,32
3631 lvx v26,r10,$sp
3632 addi r10,r10,32
3633 lvx v27,r11,$sp
3634 addi r11,r11,32
3635 lvx v28,r10,$sp
3636 addi r10,r10,32
3637 lvx v29,r11,$sp
3638 addi r11,r11,32
3639 lvx v30,r10,$sp
3640 lvx v31,r11,$sp
3641 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3642 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3643 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3644 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3645 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3646 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3647 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3648 blr
3649 .long 0
3650 .byte 0,12,0x04,1,0x80,6,6,0
3651 .long 0
3652
3653.align 5
3654_aesp8_xts_dec5x:
3655 vncipher $out0,$out0,v24
3656 vncipher $out1,$out1,v24
3657 vncipher $out2,$out2,v24
3658 vncipher $out3,$out3,v24
3659 vncipher $out4,$out4,v24
3660 lvx v24,$x20,$key_ # round[3]
3661 addi $key_,$key_,0x20
3662
3663 vncipher $out0,$out0,v25
3664 vncipher $out1,$out1,v25
3665 vncipher $out2,$out2,v25
3666 vncipher $out3,$out3,v25
3667 vncipher $out4,$out4,v25
3668 lvx v25,$x10,$key_ # round[4]
3669 bdnz _aesp8_xts_dec5x
3670
3671 subi r0,$taillen,1
3672 vncipher $out0,$out0,v24
3673 vncipher $out1,$out1,v24
3674 vncipher $out2,$out2,v24
3675 vncipher $out3,$out3,v24
3676 vncipher $out4,$out4,v24
3677
3678 andi. r0,r0,16
3679 cmpwi $taillen,0
3680 vncipher $out0,$out0,v25
3681 vncipher $out1,$out1,v25
3682 vncipher $out2,$out2,v25
3683 vncipher $out3,$out3,v25
3684 vncipher $out4,$out4,v25
3685 vxor $twk0,$twk0,v31
3686
3687 sub $inp,$inp,r0
3688 vncipher $out0,$out0,v26
3689 vncipher $out1,$out1,v26
3690 vncipher $out2,$out2,v26
3691 vncipher $out3,$out3,v26
3692 vncipher $out4,$out4,v26
3693 vxor $in1,$twk1,v31
3694
3695 vncipher $out0,$out0,v27
3696 lvx_u $in0,0,$inp
3697 vncipher $out1,$out1,v27
3698 vncipher $out2,$out2,v27
3699 vncipher $out3,$out3,v27
3700 vncipher $out4,$out4,v27
3701 vxor $in2,$twk2,v31
3702
3703 addi $key_,$sp,$FRAME+15 # rewind $key_
3704 vncipher $out0,$out0,v28
3705 vncipher $out1,$out1,v28
3706 vncipher $out2,$out2,v28
3707 vncipher $out3,$out3,v28
3708 vncipher $out4,$out4,v28
3709 lvx v24,$x00,$key_ # re-pre-load round[1]
3710 vxor $in3,$twk3,v31
3711
3712 vncipher $out0,$out0,v29
3713 le?vperm $in0,$in0,$in0,$leperm
3714 vncipher $out1,$out1,v29
3715 vncipher $out2,$out2,v29
3716 vncipher $out3,$out3,v29
3717 vncipher $out4,$out4,v29
3718 lvx v25,$x10,$key_ # re-pre-load round[2]
3719 vxor $in4,$twk4,v31
3720
3721 vncipher $out0,$out0,v30
3722 vncipher $out1,$out1,v30
3723 vncipher $out2,$out2,v30
3724 vncipher $out3,$out3,v30
3725 vncipher $out4,$out4,v30
3726
3727 vncipherlast $out0,$out0,$twk0
3728 vncipherlast $out1,$out1,$in1
3729 vncipherlast $out2,$out2,$in2
3730 vncipherlast $out3,$out3,$in3
3731 vncipherlast $out4,$out4,$in4
3732 mtctr $rounds
3733 blr
3734 .long 0
3735 .byte 0,12,0x14,0,0,0,0,0
3736___
3737}} }}}
3738
1878my $consts=1; 3739my $consts=1;
1879foreach(split("\n",$code)) { 3740foreach(split("\n",$code)) {
1880 s/\`([^\`]*)\`/eval($1)/geo; 3741 s/\`([^\`]*)\`/eval($1)/geo;
@@ -1898,7 +3759,7 @@ foreach(split("\n",$code)) {
1898 if ($flavour =~ /le$/o) { 3759 if ($flavour =~ /le$/o) {
1899 SWITCH: for($conv) { 3760 SWITCH: for($conv) {
1900 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3761 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1901 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3762 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1902 } 3763 }
1903 } 3764 }
1904 3765