diff options
author | Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com> | 2016-07-18 11:26:25 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2016-07-19 03:42:43 -0400 |
commit | 11c6e16ee13ab68b8ff04c17ab41611a4fcc5c81 (patch) | |
tree | 93c72797fc0a50b1177343762f5a5838ca8e23ca | |
parent | 5c562338dea29dc09415f39676e7107fa48ce763 (diff) |
crypto: vmx - Adding asm subroutines for XTS
This patch add XTS subroutines using VMX-crypto driver.
It gives a boost of 20 times using XTS.
These code has been adopted from OpenSSL project in collaboration
with the original author (Andy Polyakov <appro@openssl.org>).
Signed-off-by: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
Signed-off-by: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | drivers/crypto/vmx/aesp8-ppc.h | 4 | ||||
-rw-r--r-- | drivers/crypto/vmx/aesp8-ppc.pl | 1865 |
2 files changed, 1867 insertions, 2 deletions
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h index 4cd34ee54a94..01972e16a6c0 100644 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ b/drivers/crypto/vmx/aesp8-ppc.h | |||
@@ -19,3 +19,7 @@ void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, | |||
19 | void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, | 19 | void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, |
20 | size_t len, const struct aes_key *key, | 20 | size_t len, const struct aes_key *key, |
21 | const u8 *iv); | 21 | const u8 *iv); |
22 | void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len, | ||
23 | const struct aes_key *key1, const struct aes_key *key2, u8 *iv); | ||
24 | void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len, | ||
25 | const struct aes_key *key1, const struct aes_key *key2, u8 *iv); | ||
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index 228053921b3f..813ffcc4d17c 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl | |||
@@ -1,4 +1,11 @@ | |||
1 | #!/usr/bin/env perl | 1 | #! /usr/bin/env perl |
2 | # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. | ||
3 | # | ||
4 | # Licensed under the OpenSSL license (the "License"). You may not use | ||
5 | # this file except in compliance with the License. You can obtain a copy | ||
6 | # in the file LICENSE in the source distribution or at | ||
7 | # https://www.openssl.org/source/license.html | ||
8 | |||
2 | # | 9 | # |
3 | # ==================================================================== | 10 | # ==================================================================== |
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | 11 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
@@ -20,6 +27,19 @@ | |||
20 | # instructions are interleaved. It's reckoned that eventual | 27 | # instructions are interleaved. It's reckoned that eventual |
21 | # misalignment penalties at page boundaries are in average lower | 28 | # misalignment penalties at page boundaries are in average lower |
22 | # than additional overhead in pure AltiVec approach. | 29 | # than additional overhead in pure AltiVec approach. |
30 | # | ||
31 | # May 2016 | ||
32 | # | ||
33 | # Add XTS subroutine, 9x on little- and 12x improvement on big-endian | ||
34 | # systems were measured. | ||
35 | # | ||
36 | ###################################################################### | ||
37 | # Current large-block performance in cycles per byte processed with | ||
38 | # 128-bit key (less is better). | ||
39 | # | ||
40 | # CBC en-/decrypt CTR XTS | ||
41 | # POWER8[le] 3.96/0.72 0.74 1.1 | ||
42 | # POWER8[be] 3.75/0.65 0.66 1.0 | ||
23 | 43 | ||
24 | $flavour = shift; | 44 | $flavour = shift; |
25 | 45 | ||
@@ -1875,6 +1895,1847 @@ Lctr32_enc8x_done: | |||
1875 | ___ | 1895 | ___ |
1876 | }} }}} | 1896 | }} }}} |
1877 | 1897 | ||
1898 | ######################################################################### | ||
1899 | {{{ # XTS procedures # | ||
1900 | # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # | ||
1901 | # const AES_KEY *key1, const AES_KEY *key2, # | ||
1902 | # [const] unsigned char iv[16]); # | ||
1903 | # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # | ||
1904 | # input tweak value is assumed to be encrypted already, and last tweak # | ||
1905 | # value, one suitable for consecutive call on same chunk of data, is # | ||
1906 | # written back to original buffer. In addition, in "tweak chaining" # | ||
1907 | # mode only complete input blocks are processed. # | ||
1908 | |||
1909 | my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); | ||
1910 | my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); | ||
1911 | my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); | ||
1912 | my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); | ||
1913 | my $taillen = $key2; | ||
1914 | |||
1915 | ($inp,$idx) = ($idx,$inp); # reassign | ||
1916 | |||
1917 | $code.=<<___; | ||
1918 | .globl .${prefix}_xts_encrypt | ||
1919 | mr $inp,r3 # reassign | ||
1920 | li r3,-1 | ||
1921 | ${UCMP}i $len,16 | ||
1922 | bltlr- | ||
1923 | |||
1924 | lis r0,0xfff0 | ||
1925 | mfspr r12,256 # save vrsave | ||
1926 | li r11,0 | ||
1927 | mtspr 256,r0 | ||
1928 | |||
1929 | vspltisb $seven,0x07 # 0x070707..07 | ||
1930 | le?lvsl $leperm,r11,r11 | ||
1931 | le?vspltisb $tmp,0x0f | ||
1932 | le?vxor $leperm,$leperm,$seven | ||
1933 | |||
1934 | li $idx,15 | ||
1935 | lvx $tweak,0,$ivp # load [unaligned] iv | ||
1936 | lvsl $inpperm,0,$ivp | ||
1937 | lvx $inptail,$idx,$ivp | ||
1938 | le?vxor $inpperm,$inpperm,$tmp | ||
1939 | vperm $tweak,$tweak,$inptail,$inpperm | ||
1940 | |||
1941 | neg r11,$inp | ||
1942 | lvsr $inpperm,0,r11 # prepare for unaligned load | ||
1943 | lvx $inout,0,$inp | ||
1944 | addi $inp,$inp,15 # 15 is not typo | ||
1945 | le?vxor $inpperm,$inpperm,$tmp | ||
1946 | |||
1947 | ${UCMP}i $key2,0 # key2==NULL? | ||
1948 | beq Lxts_enc_no_key2 | ||
1949 | |||
1950 | ?lvsl $keyperm,0,$key2 # prepare for unaligned key | ||
1951 | lwz $rounds,240($key2) | ||
1952 | srwi $rounds,$rounds,1 | ||
1953 | subi $rounds,$rounds,1 | ||
1954 | li $idx,16 | ||
1955 | |||
1956 | lvx $rndkey0,0,$key2 | ||
1957 | lvx $rndkey1,$idx,$key2 | ||
1958 | addi $idx,$idx,16 | ||
1959 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1960 | vxor $tweak,$tweak,$rndkey0 | ||
1961 | lvx $rndkey0,$idx,$key2 | ||
1962 | addi $idx,$idx,16 | ||
1963 | mtctr $rounds | ||
1964 | |||
1965 | Ltweak_xts_enc: | ||
1966 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
1967 | vcipher $tweak,$tweak,$rndkey1 | ||
1968 | lvx $rndkey1,$idx,$key2 | ||
1969 | addi $idx,$idx,16 | ||
1970 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1971 | vcipher $tweak,$tweak,$rndkey0 | ||
1972 | lvx $rndkey0,$idx,$key2 | ||
1973 | addi $idx,$idx,16 | ||
1974 | bdnz Ltweak_xts_enc | ||
1975 | |||
1976 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
1977 | vcipher $tweak,$tweak,$rndkey1 | ||
1978 | lvx $rndkey1,$idx,$key2 | ||
1979 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1980 | vcipherlast $tweak,$tweak,$rndkey0 | ||
1981 | |||
1982 | li $ivp,0 # don't chain the tweak | ||
1983 | b Lxts_enc | ||
1984 | |||
1985 | Lxts_enc_no_key2: | ||
1986 | li $idx,-16 | ||
1987 | and $len,$len,$idx # in "tweak chaining" | ||
1988 | # mode only complete | ||
1989 | # blocks are processed | ||
1990 | Lxts_enc: | ||
1991 | lvx $inptail,0,$inp | ||
1992 | addi $inp,$inp,16 | ||
1993 | |||
1994 | ?lvsl $keyperm,0,$key1 # prepare for unaligned key | ||
1995 | lwz $rounds,240($key1) | ||
1996 | srwi $rounds,$rounds,1 | ||
1997 | subi $rounds,$rounds,1 | ||
1998 | li $idx,16 | ||
1999 | |||
2000 | vslb $eighty7,$seven,$seven # 0x808080..80 | ||
2001 | vor $eighty7,$eighty7,$seven # 0x878787..87 | ||
2002 | vspltisb $tmp,1 # 0x010101..01 | ||
2003 | vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 | ||
2004 | |||
2005 | ${UCMP}i $len,96 | ||
2006 | bge _aesp8_xts_encrypt6x | ||
2007 | |||
2008 | andi. $taillen,$len,15 | ||
2009 | subic r0,$len,32 | ||
2010 | subi $taillen,$taillen,16 | ||
2011 | subfe r0,r0,r0 | ||
2012 | and r0,r0,$taillen | ||
2013 | add $inp,$inp,r0 | ||
2014 | |||
2015 | lvx $rndkey0,0,$key1 | ||
2016 | lvx $rndkey1,$idx,$key1 | ||
2017 | addi $idx,$idx,16 | ||
2018 | vperm $inout,$inout,$inptail,$inpperm | ||
2019 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2020 | vxor $inout,$inout,$tweak | ||
2021 | vxor $inout,$inout,$rndkey0 | ||
2022 | lvx $rndkey0,$idx,$key1 | ||
2023 | addi $idx,$idx,16 | ||
2024 | mtctr $rounds | ||
2025 | b Loop_xts_enc | ||
2026 | |||
2027 | .align 5 | ||
2028 | Loop_xts_enc: | ||
2029 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2030 | vcipher $inout,$inout,$rndkey1 | ||
2031 | lvx $rndkey1,$idx,$key1 | ||
2032 | addi $idx,$idx,16 | ||
2033 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2034 | vcipher $inout,$inout,$rndkey0 | ||
2035 | lvx $rndkey0,$idx,$key1 | ||
2036 | addi $idx,$idx,16 | ||
2037 | bdnz Loop_xts_enc | ||
2038 | |||
2039 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2040 | vcipher $inout,$inout,$rndkey1 | ||
2041 | lvx $rndkey1,$idx,$key1 | ||
2042 | li $idx,16 | ||
2043 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2044 | vxor $rndkey0,$rndkey0,$tweak | ||
2045 | vcipherlast $output,$inout,$rndkey0 | ||
2046 | |||
2047 | le?vperm $tmp,$output,$output,$leperm | ||
2048 | be?nop | ||
2049 | le?stvx_u $tmp,0,$out | ||
2050 | be?stvx_u $output,0,$out | ||
2051 | addi $out,$out,16 | ||
2052 | |||
2053 | subic. $len,$len,16 | ||
2054 | beq Lxts_enc_done | ||
2055 | |||
2056 | vmr $inout,$inptail | ||
2057 | lvx $inptail,0,$inp | ||
2058 | addi $inp,$inp,16 | ||
2059 | lvx $rndkey0,0,$key1 | ||
2060 | lvx $rndkey1,$idx,$key1 | ||
2061 | addi $idx,$idx,16 | ||
2062 | |||
2063 | subic r0,$len,32 | ||
2064 | subfe r0,r0,r0 | ||
2065 | and r0,r0,$taillen | ||
2066 | add $inp,$inp,r0 | ||
2067 | |||
2068 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2069 | vaddubm $tweak,$tweak,$tweak | ||
2070 | vsldoi $tmp,$tmp,$tmp,15 | ||
2071 | vand $tmp,$tmp,$eighty7 | ||
2072 | vxor $tweak,$tweak,$tmp | ||
2073 | |||
2074 | vperm $inout,$inout,$inptail,$inpperm | ||
2075 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2076 | vxor $inout,$inout,$tweak | ||
2077 | vxor $output,$output,$rndkey0 # just in case $len<16 | ||
2078 | vxor $inout,$inout,$rndkey0 | ||
2079 | lvx $rndkey0,$idx,$key1 | ||
2080 | addi $idx,$idx,16 | ||
2081 | |||
2082 | mtctr $rounds | ||
2083 | ${UCMP}i $len,16 | ||
2084 | bge Loop_xts_enc | ||
2085 | |||
2086 | vxor $output,$output,$tweak | ||
2087 | lvsr $inpperm,0,$len # $inpperm is no longer needed | ||
2088 | vxor $inptail,$inptail,$inptail # $inptail is no longer needed | ||
2089 | vspltisb $tmp,-1 | ||
2090 | vperm $inptail,$inptail,$tmp,$inpperm | ||
2091 | vsel $inout,$inout,$output,$inptail | ||
2092 | |||
2093 | subi r11,$out,17 | ||
2094 | subi $out,$out,16 | ||
2095 | mtctr $len | ||
2096 | li $len,16 | ||
2097 | Loop_xts_enc_steal: | ||
2098 | lbzu r0,1(r11) | ||
2099 | stb r0,16(r11) | ||
2100 | bdnz Loop_xts_enc_steal | ||
2101 | |||
2102 | mtctr $rounds | ||
2103 | b Loop_xts_enc # one more time... | ||
2104 | |||
2105 | Lxts_enc_done: | ||
2106 | ${UCMP}i $ivp,0 | ||
2107 | beq Lxts_enc_ret | ||
2108 | |||
2109 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2110 | vaddubm $tweak,$tweak,$tweak | ||
2111 | vsldoi $tmp,$tmp,$tmp,15 | ||
2112 | vand $tmp,$tmp,$eighty7 | ||
2113 | vxor $tweak,$tweak,$tmp | ||
2114 | |||
2115 | le?vperm $tweak,$tweak,$tweak,$leperm | ||
2116 | stvx_u $tweak,0,$ivp | ||
2117 | |||
2118 | Lxts_enc_ret: | ||
2119 | mtspr 256,r12 # restore vrsave | ||
2120 | li r3,0 | ||
2121 | blr | ||
2122 | .long 0 | ||
2123 | .byte 0,12,0x04,0,0x80,6,6,0 | ||
2124 | .long 0 | ||
2125 | .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt | ||
2126 | |||
2127 | .globl .${prefix}_xts_decrypt | ||
2128 | .align 5 | ||
2129 | .${prefix}_xts_decrypt: | ||
2130 | mr $inp,r3 # reassign | ||
2131 | li r3,-1 | ||
2132 | ${UCMP}i $len,16 | ||
2133 | bltlr- | ||
2134 | |||
2135 | lis r0,0xfff8 | ||
2136 | mfspr r12,256 # save vrsave | ||
2137 | li r11,0 | ||
2138 | mtspr 256,r0 | ||
2139 | |||
2140 | andi. r0,$len,15 | ||
2141 | neg r0,r0 | ||
2142 | andi. r0,r0,16 | ||
2143 | sub $len,$len,r0 | ||
2144 | |||
2145 | vspltisb $seven,0x07 # 0x070707..07 | ||
2146 | le?lvsl $leperm,r11,r11 | ||
2147 | le?vspltisb $tmp,0x0f | ||
2148 | le?vxor $leperm,$leperm,$seven | ||
2149 | |||
2150 | li $idx,15 | ||
2151 | lvx $tweak,0,$ivp # load [unaligned] iv | ||
2152 | lvsl $inpperm,0,$ivp | ||
2153 | lvx $inptail,$idx,$ivp | ||
2154 | le?vxor $inpperm,$inpperm,$tmp | ||
2155 | vperm $tweak,$tweak,$inptail,$inpperm | ||
2156 | |||
2157 | neg r11,$inp | ||
2158 | lvsr $inpperm,0,r11 # prepare for unaligned load | ||
2159 | lvx $inout,0,$inp | ||
2160 | addi $inp,$inp,15 # 15 is not typo | ||
2161 | le?vxor $inpperm,$inpperm,$tmp | ||
2162 | |||
2163 | ${UCMP}i $key2,0 # key2==NULL? | ||
2164 | beq Lxts_dec_no_key2 | ||
2165 | |||
2166 | ?lvsl $keyperm,0,$key2 # prepare for unaligned key | ||
2167 | lwz $rounds,240($key2) | ||
2168 | srwi $rounds,$rounds,1 | ||
2169 | subi $rounds,$rounds,1 | ||
2170 | li $idx,16 | ||
2171 | |||
2172 | lvx $rndkey0,0,$key2 | ||
2173 | lvx $rndkey1,$idx,$key2 | ||
2174 | addi $idx,$idx,16 | ||
2175 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2176 | vxor $tweak,$tweak,$rndkey0 | ||
2177 | lvx $rndkey0,$idx,$key2 | ||
2178 | addi $idx,$idx,16 | ||
2179 | mtctr $rounds | ||
2180 | |||
2181 | Ltweak_xts_dec: | ||
2182 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2183 | vcipher $tweak,$tweak,$rndkey1 | ||
2184 | lvx $rndkey1,$idx,$key2 | ||
2185 | addi $idx,$idx,16 | ||
2186 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2187 | vcipher $tweak,$tweak,$rndkey0 | ||
2188 | lvx $rndkey0,$idx,$key2 | ||
2189 | addi $idx,$idx,16 | ||
2190 | bdnz Ltweak_xts_dec | ||
2191 | |||
2192 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2193 | vcipher $tweak,$tweak,$rndkey1 | ||
2194 | lvx $rndkey1,$idx,$key2 | ||
2195 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2196 | vcipherlast $tweak,$tweak,$rndkey0 | ||
2197 | |||
2198 | li $ivp,0 # don't chain the tweak | ||
2199 | b Lxts_dec | ||
2200 | |||
2201 | Lxts_dec_no_key2: | ||
2202 | neg $idx,$len | ||
2203 | andi. $idx,$idx,15 | ||
2204 | add $len,$len,$idx # in "tweak chaining" | ||
2205 | # mode only complete | ||
2206 | # blocks are processed | ||
2207 | Lxts_dec: | ||
2208 | lvx $inptail,0,$inp | ||
2209 | addi $inp,$inp,16 | ||
2210 | |||
2211 | ?lvsl $keyperm,0,$key1 # prepare for unaligned key | ||
2212 | lwz $rounds,240($key1) | ||
2213 | srwi $rounds,$rounds,1 | ||
2214 | subi $rounds,$rounds,1 | ||
2215 | li $idx,16 | ||
2216 | |||
2217 | vslb $eighty7,$seven,$seven # 0x808080..80 | ||
2218 | vor $eighty7,$eighty7,$seven # 0x878787..87 | ||
2219 | vspltisb $tmp,1 # 0x010101..01 | ||
2220 | vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 | ||
2221 | |||
2222 | ${UCMP}i $len,96 | ||
2223 | bge _aesp8_xts_decrypt6x | ||
2224 | |||
2225 | lvx $rndkey0,0,$key1 | ||
2226 | lvx $rndkey1,$idx,$key1 | ||
2227 | addi $idx,$idx,16 | ||
2228 | vperm $inout,$inout,$inptail,$inpperm | ||
2229 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2230 | vxor $inout,$inout,$tweak | ||
2231 | vxor $inout,$inout,$rndkey0 | ||
2232 | lvx $rndkey0,$idx,$key1 | ||
2233 | addi $idx,$idx,16 | ||
2234 | mtctr $rounds | ||
2235 | |||
2236 | ${UCMP}i $len,16 | ||
2237 | blt Ltail_xts_dec | ||
2238 | be?b Loop_xts_dec | ||
2239 | |||
2240 | .align 5 | ||
2241 | Loop_xts_dec: | ||
2242 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2243 | vncipher $inout,$inout,$rndkey1 | ||
2244 | lvx $rndkey1,$idx,$key1 | ||
2245 | addi $idx,$idx,16 | ||
2246 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2247 | vncipher $inout,$inout,$rndkey0 | ||
2248 | lvx $rndkey0,$idx,$key1 | ||
2249 | addi $idx,$idx,16 | ||
2250 | bdnz Loop_xts_dec | ||
2251 | |||
2252 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2253 | vncipher $inout,$inout,$rndkey1 | ||
2254 | lvx $rndkey1,$idx,$key1 | ||
2255 | li $idx,16 | ||
2256 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2257 | vxor $rndkey0,$rndkey0,$tweak | ||
2258 | vncipherlast $output,$inout,$rndkey0 | ||
2259 | |||
2260 | le?vperm $tmp,$output,$output,$leperm | ||
2261 | be?nop | ||
2262 | le?stvx_u $tmp,0,$out | ||
2263 | be?stvx_u $output,0,$out | ||
2264 | addi $out,$out,16 | ||
2265 | |||
2266 | subic. $len,$len,16 | ||
2267 | beq Lxts_dec_done | ||
2268 | |||
2269 | vmr $inout,$inptail | ||
2270 | lvx $inptail,0,$inp | ||
2271 | addi $inp,$inp,16 | ||
2272 | lvx $rndkey0,0,$key1 | ||
2273 | lvx $rndkey1,$idx,$key1 | ||
2274 | addi $idx,$idx,16 | ||
2275 | |||
2276 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2277 | vaddubm $tweak,$tweak,$tweak | ||
2278 | vsldoi $tmp,$tmp,$tmp,15 | ||
2279 | vand $tmp,$tmp,$eighty7 | ||
2280 | vxor $tweak,$tweak,$tmp | ||
2281 | |||
2282 | vperm $inout,$inout,$inptail,$inpperm | ||
2283 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2284 | vxor $inout,$inout,$tweak | ||
2285 | vxor $inout,$inout,$rndkey0 | ||
2286 | lvx $rndkey0,$idx,$key1 | ||
2287 | addi $idx,$idx,16 | ||
2288 | |||
2289 | mtctr $rounds | ||
2290 | ${UCMP}i $len,16 | ||
2291 | bge Loop_xts_dec | ||
2292 | |||
2293 | Ltail_xts_dec: | ||
2294 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2295 | vaddubm $tweak1,$tweak,$tweak | ||
2296 | vsldoi $tmp,$tmp,$tmp,15 | ||
2297 | vand $tmp,$tmp,$eighty7 | ||
2298 | vxor $tweak1,$tweak1,$tmp | ||
2299 | |||
2300 | subi $inp,$inp,16 | ||
2301 | add $inp,$inp,$len | ||
2302 | |||
2303 | vxor $inout,$inout,$tweak # :-( | ||
2304 | vxor $inout,$inout,$tweak1 # :-) | ||
2305 | |||
2306 | Loop_xts_dec_short: | ||
2307 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2308 | vncipher $inout,$inout,$rndkey1 | ||
2309 | lvx $rndkey1,$idx,$key1 | ||
2310 | addi $idx,$idx,16 | ||
2311 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2312 | vncipher $inout,$inout,$rndkey0 | ||
2313 | lvx $rndkey0,$idx,$key1 | ||
2314 | addi $idx,$idx,16 | ||
2315 | bdnz Loop_xts_dec_short | ||
2316 | |||
2317 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
2318 | vncipher $inout,$inout,$rndkey1 | ||
2319 | lvx $rndkey1,$idx,$key1 | ||
2320 | li $idx,16 | ||
2321 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2322 | vxor $rndkey0,$rndkey0,$tweak1 | ||
2323 | vncipherlast $output,$inout,$rndkey0 | ||
2324 | |||
2325 | le?vperm $tmp,$output,$output,$leperm | ||
2326 | be?nop | ||
2327 | le?stvx_u $tmp,0,$out | ||
2328 | be?stvx_u $output,0,$out | ||
2329 | |||
2330 | vmr $inout,$inptail | ||
2331 | lvx $inptail,0,$inp | ||
2332 | #addi $inp,$inp,16 | ||
2333 | lvx $rndkey0,0,$key1 | ||
2334 | lvx $rndkey1,$idx,$key1 | ||
2335 | addi $idx,$idx,16 | ||
2336 | vperm $inout,$inout,$inptail,$inpperm | ||
2337 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
2338 | |||
2339 | lvsr $inpperm,0,$len # $inpperm is no longer needed | ||
2340 | vxor $inptail,$inptail,$inptail # $inptail is no longer needed | ||
2341 | vspltisb $tmp,-1 | ||
2342 | vperm $inptail,$inptail,$tmp,$inpperm | ||
2343 | vsel $inout,$inout,$output,$inptail | ||
2344 | |||
2345 | vxor $rndkey0,$rndkey0,$tweak | ||
2346 | vxor $inout,$inout,$rndkey0 | ||
2347 | lvx $rndkey0,$idx,$key1 | ||
2348 | addi $idx,$idx,16 | ||
2349 | |||
2350 | subi r11,$out,1 | ||
2351 | mtctr $len | ||
2352 | li $len,16 | ||
2353 | Loop_xts_dec_steal: | ||
2354 | lbzu r0,1(r11) | ||
2355 | stb r0,16(r11) | ||
2356 | bdnz Loop_xts_dec_steal | ||
2357 | |||
2358 | mtctr $rounds | ||
2359 | b Loop_xts_dec # one more time... | ||
2360 | |||
2361 | Lxts_dec_done: | ||
2362 | ${UCMP}i $ivp,0 | ||
2363 | beq Lxts_dec_ret | ||
2364 | |||
2365 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2366 | vaddubm $tweak,$tweak,$tweak | ||
2367 | vsldoi $tmp,$tmp,$tmp,15 | ||
2368 | vand $tmp,$tmp,$eighty7 | ||
2369 | vxor $tweak,$tweak,$tmp | ||
2370 | |||
2371 | le?vperm $tweak,$tweak,$tweak,$leperm | ||
2372 | stvx_u $tweak,0,$ivp | ||
2373 | |||
2374 | Lxts_dec_ret: | ||
2375 | mtspr 256,r12 # restore vrsave | ||
2376 | li r3,0 | ||
2377 | blr | ||
2378 | .long 0 | ||
2379 | .byte 0,12,0x04,0,0x80,6,6,0 | ||
2380 | .long 0 | ||
2381 | .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt | ||
2382 | ___ | ||
2383 | ######################################################################### | ||
2384 | {{ # Optimized XTS procedures # | ||
2385 | my $key_=$key2; | ||
2386 | my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); | ||
2387 | $x00=0 if ($flavour =~ /osx/); | ||
2388 | my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); | ||
2389 | my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); | ||
2390 | my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); | ||
2391 | my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys | ||
2392 | # v26-v31 last 6 round keys | ||
2393 | my ($keyperm)=($out0); # aliases with "caller", redundant assignment | ||
2394 | my $taillen=$x70; | ||
2395 | |||
2396 | $code.=<<___; | ||
2397 | .align 5 | ||
2398 | _aesp8_xts_encrypt6x: | ||
2399 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | ||
2400 | mflr r11 | ||
2401 | li r7,`$FRAME+8*16+15` | ||
2402 | li r3,`$FRAME+8*16+31` | ||
2403 | $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) | ||
2404 | stvx v20,r7,$sp # ABI says so | ||
2405 | addi r7,r7,32 | ||
2406 | stvx v21,r3,$sp | ||
2407 | addi r3,r3,32 | ||
2408 | stvx v22,r7,$sp | ||
2409 | addi r7,r7,32 | ||
2410 | stvx v23,r3,$sp | ||
2411 | addi r3,r3,32 | ||
2412 | stvx v24,r7,$sp | ||
2413 | addi r7,r7,32 | ||
2414 | stvx v25,r3,$sp | ||
2415 | addi r3,r3,32 | ||
2416 | stvx v26,r7,$sp | ||
2417 | addi r7,r7,32 | ||
2418 | stvx v27,r3,$sp | ||
2419 | addi r3,r3,32 | ||
2420 | stvx v28,r7,$sp | ||
2421 | addi r7,r7,32 | ||
2422 | stvx v29,r3,$sp | ||
2423 | addi r3,r3,32 | ||
2424 | stvx v30,r7,$sp | ||
2425 | stvx v31,r3,$sp | ||
2426 | li r0,-1 | ||
2427 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | ||
2428 | li $x10,0x10 | ||
2429 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
2430 | li $x20,0x20 | ||
2431 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
2432 | li $x30,0x30 | ||
2433 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
2434 | li $x40,0x40 | ||
2435 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
2436 | li $x50,0x50 | ||
2437 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
2438 | li $x60,0x60 | ||
2439 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
2440 | li $x70,0x70 | ||
2441 | mtspr 256,r0 | ||
2442 | |||
2443 | subi $rounds,$rounds,3 # -4 in total | ||
2444 | |||
2445 | lvx $rndkey0,$x00,$key1 # load key schedule | ||
2446 | lvx v30,$x10,$key1 | ||
2447 | addi $key1,$key1,0x20 | ||
2448 | lvx v31,$x00,$key1 | ||
2449 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | ||
2450 | addi $key_,$sp,$FRAME+15 | ||
2451 | mtctr $rounds | ||
2452 | |||
2453 | Load_xts_enc_key: | ||
2454 | ?vperm v24,v30,v31,$keyperm | ||
2455 | lvx v30,$x10,$key1 | ||
2456 | addi $key1,$key1,0x20 | ||
2457 | stvx v24,$x00,$key_ # off-load round[1] | ||
2458 | ?vperm v25,v31,v30,$keyperm | ||
2459 | lvx v31,$x00,$key1 | ||
2460 | stvx v25,$x10,$key_ # off-load round[2] | ||
2461 | addi $key_,$key_,0x20 | ||
2462 | bdnz Load_xts_enc_key | ||
2463 | |||
2464 | lvx v26,$x10,$key1 | ||
2465 | ?vperm v24,v30,v31,$keyperm | ||
2466 | lvx v27,$x20,$key1 | ||
2467 | stvx v24,$x00,$key_ # off-load round[3] | ||
2468 | ?vperm v25,v31,v26,$keyperm | ||
2469 | lvx v28,$x30,$key1 | ||
2470 | stvx v25,$x10,$key_ # off-load round[4] | ||
2471 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
2472 | ?vperm v26,v26,v27,$keyperm | ||
2473 | lvx v29,$x40,$key1 | ||
2474 | ?vperm v27,v27,v28,$keyperm | ||
2475 | lvx v30,$x50,$key1 | ||
2476 | ?vperm v28,v28,v29,$keyperm | ||
2477 | lvx v31,$x60,$key1 | ||
2478 | ?vperm v29,v29,v30,$keyperm | ||
2479 | lvx $twk5,$x70,$key1 # borrow $twk5 | ||
2480 | ?vperm v30,v30,v31,$keyperm | ||
2481 | lvx v24,$x00,$key_ # pre-load round[1] | ||
2482 | ?vperm v31,v31,$twk5,$keyperm | ||
2483 | lvx v25,$x10,$key_ # pre-load round[2] | ||
2484 | |||
2485 | vperm $in0,$inout,$inptail,$inpperm | ||
2486 | subi $inp,$inp,31 # undo "caller" | ||
2487 | vxor $twk0,$tweak,$rndkey0 | ||
2488 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2489 | vaddubm $tweak,$tweak,$tweak | ||
2490 | vsldoi $tmp,$tmp,$tmp,15 | ||
2491 | vand $tmp,$tmp,$eighty7 | ||
2492 | vxor $out0,$in0,$twk0 | ||
2493 | vxor $tweak,$tweak,$tmp | ||
2494 | |||
2495 | lvx_u $in1,$x10,$inp | ||
2496 | vxor $twk1,$tweak,$rndkey0 | ||
2497 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2498 | vaddubm $tweak,$tweak,$tweak | ||
2499 | vsldoi $tmp,$tmp,$tmp,15 | ||
2500 | le?vperm $in1,$in1,$in1,$leperm | ||
2501 | vand $tmp,$tmp,$eighty7 | ||
2502 | vxor $out1,$in1,$twk1 | ||
2503 | vxor $tweak,$tweak,$tmp | ||
2504 | |||
2505 | lvx_u $in2,$x20,$inp | ||
2506 | andi. $taillen,$len,15 | ||
2507 | vxor $twk2,$tweak,$rndkey0 | ||
2508 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2509 | vaddubm $tweak,$tweak,$tweak | ||
2510 | vsldoi $tmp,$tmp,$tmp,15 | ||
2511 | le?vperm $in2,$in2,$in2,$leperm | ||
2512 | vand $tmp,$tmp,$eighty7 | ||
2513 | vxor $out2,$in2,$twk2 | ||
2514 | vxor $tweak,$tweak,$tmp | ||
2515 | |||
2516 | lvx_u $in3,$x30,$inp | ||
2517 | sub $len,$len,$taillen | ||
2518 | vxor $twk3,$tweak,$rndkey0 | ||
2519 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2520 | vaddubm $tweak,$tweak,$tweak | ||
2521 | vsldoi $tmp,$tmp,$tmp,15 | ||
2522 | le?vperm $in3,$in3,$in3,$leperm | ||
2523 | vand $tmp,$tmp,$eighty7 | ||
2524 | vxor $out3,$in3,$twk3 | ||
2525 | vxor $tweak,$tweak,$tmp | ||
2526 | |||
2527 | lvx_u $in4,$x40,$inp | ||
2528 | subi $len,$len,0x60 | ||
2529 | vxor $twk4,$tweak,$rndkey0 | ||
2530 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2531 | vaddubm $tweak,$tweak,$tweak | ||
2532 | vsldoi $tmp,$tmp,$tmp,15 | ||
2533 | le?vperm $in4,$in4,$in4,$leperm | ||
2534 | vand $tmp,$tmp,$eighty7 | ||
2535 | vxor $out4,$in4,$twk4 | ||
2536 | vxor $tweak,$tweak,$tmp | ||
2537 | |||
2538 | lvx_u $in5,$x50,$inp | ||
2539 | addi $inp,$inp,0x60 | ||
2540 | vxor $twk5,$tweak,$rndkey0 | ||
2541 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2542 | vaddubm $tweak,$tweak,$tweak | ||
2543 | vsldoi $tmp,$tmp,$tmp,15 | ||
2544 | le?vperm $in5,$in5,$in5,$leperm | ||
2545 | vand $tmp,$tmp,$eighty7 | ||
2546 | vxor $out5,$in5,$twk5 | ||
2547 | vxor $tweak,$tweak,$tmp | ||
2548 | |||
2549 | vxor v31,v31,$rndkey0 | ||
2550 | mtctr $rounds | ||
2551 | b Loop_xts_enc6x | ||
2552 | |||
2553 | .align 5 | ||
2554 | Loop_xts_enc6x: | ||
2555 | vcipher $out0,$out0,v24 | ||
2556 | vcipher $out1,$out1,v24 | ||
2557 | vcipher $out2,$out2,v24 | ||
2558 | vcipher $out3,$out3,v24 | ||
2559 | vcipher $out4,$out4,v24 | ||
2560 | vcipher $out5,$out5,v24 | ||
2561 | lvx v24,$x20,$key_ # round[3] | ||
2562 | addi $key_,$key_,0x20 | ||
2563 | |||
2564 | vcipher $out0,$out0,v25 | ||
2565 | vcipher $out1,$out1,v25 | ||
2566 | vcipher $out2,$out2,v25 | ||
2567 | vcipher $out3,$out3,v25 | ||
2568 | vcipher $out4,$out4,v25 | ||
2569 | vcipher $out5,$out5,v25 | ||
2570 | lvx v25,$x10,$key_ # round[4] | ||
2571 | bdnz Loop_xts_enc6x | ||
2572 | |||
2573 | subic $len,$len,96 # $len-=96 | ||
2574 | vxor $in0,$twk0,v31 # xor with last round key | ||
2575 | vcipher $out0,$out0,v24 | ||
2576 | vcipher $out1,$out1,v24 | ||
2577 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2578 | vxor $twk0,$tweak,$rndkey0 | ||
2579 | vaddubm $tweak,$tweak,$tweak | ||
2580 | vcipher $out2,$out2,v24 | ||
2581 | vcipher $out3,$out3,v24 | ||
2582 | vsldoi $tmp,$tmp,$tmp,15 | ||
2583 | vcipher $out4,$out4,v24 | ||
2584 | vcipher $out5,$out5,v24 | ||
2585 | |||
2586 | subfe. r0,r0,r0 # borrow?-1:0 | ||
2587 | vand $tmp,$tmp,$eighty7 | ||
2588 | vcipher $out0,$out0,v25 | ||
2589 | vcipher $out1,$out1,v25 | ||
2590 | vxor $tweak,$tweak,$tmp | ||
2591 | vcipher $out2,$out2,v25 | ||
2592 | vcipher $out3,$out3,v25 | ||
2593 | vxor $in1,$twk1,v31 | ||
2594 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2595 | vxor $twk1,$tweak,$rndkey0 | ||
2596 | vcipher $out4,$out4,v25 | ||
2597 | vcipher $out5,$out5,v25 | ||
2598 | |||
2599 | and r0,r0,$len | ||
2600 | vaddubm $tweak,$tweak,$tweak | ||
2601 | vsldoi $tmp,$tmp,$tmp,15 | ||
2602 | vcipher $out0,$out0,v26 | ||
2603 | vcipher $out1,$out1,v26 | ||
2604 | vand $tmp,$tmp,$eighty7 | ||
2605 | vcipher $out2,$out2,v26 | ||
2606 | vcipher $out3,$out3,v26 | ||
2607 | vxor $tweak,$tweak,$tmp | ||
2608 | vcipher $out4,$out4,v26 | ||
2609 | vcipher $out5,$out5,v26 | ||
2610 | |||
2611 | add $inp,$inp,r0 # $inp is adjusted in such | ||
2612 | # way that at exit from the | ||
2613 | # loop inX-in5 are loaded | ||
2614 | # with last "words" | ||
2615 | vxor $in2,$twk2,v31 | ||
2616 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2617 | vxor $twk2,$tweak,$rndkey0 | ||
2618 | vaddubm $tweak,$tweak,$tweak | ||
2619 | vcipher $out0,$out0,v27 | ||
2620 | vcipher $out1,$out1,v27 | ||
2621 | vsldoi $tmp,$tmp,$tmp,15 | ||
2622 | vcipher $out2,$out2,v27 | ||
2623 | vcipher $out3,$out3,v27 | ||
2624 | vand $tmp,$tmp,$eighty7 | ||
2625 | vcipher $out4,$out4,v27 | ||
2626 | vcipher $out5,$out5,v27 | ||
2627 | |||
2628 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
2629 | vxor $tweak,$tweak,$tmp | ||
2630 | vcipher $out0,$out0,v28 | ||
2631 | vcipher $out1,$out1,v28 | ||
2632 | vxor $in3,$twk3,v31 | ||
2633 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2634 | vxor $twk3,$tweak,$rndkey0 | ||
2635 | vcipher $out2,$out2,v28 | ||
2636 | vcipher $out3,$out3,v28 | ||
2637 | vaddubm $tweak,$tweak,$tweak | ||
2638 | vsldoi $tmp,$tmp,$tmp,15 | ||
2639 | vcipher $out4,$out4,v28 | ||
2640 | vcipher $out5,$out5,v28 | ||
2641 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
2642 | vand $tmp,$tmp,$eighty7 | ||
2643 | |||
2644 | vcipher $out0,$out0,v29 | ||
2645 | vcipher $out1,$out1,v29 | ||
2646 | vxor $tweak,$tweak,$tmp | ||
2647 | vcipher $out2,$out2,v29 | ||
2648 | vcipher $out3,$out3,v29 | ||
2649 | vxor $in4,$twk4,v31 | ||
2650 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2651 | vxor $twk4,$tweak,$rndkey0 | ||
2652 | vcipher $out4,$out4,v29 | ||
2653 | vcipher $out5,$out5,v29 | ||
2654 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
2655 | vaddubm $tweak,$tweak,$tweak | ||
2656 | vsldoi $tmp,$tmp,$tmp,15 | ||
2657 | |||
2658 | vcipher $out0,$out0,v30 | ||
2659 | vcipher $out1,$out1,v30 | ||
2660 | vand $tmp,$tmp,$eighty7 | ||
2661 | vcipher $out2,$out2,v30 | ||
2662 | vcipher $out3,$out3,v30 | ||
2663 | vxor $tweak,$tweak,$tmp | ||
2664 | vcipher $out4,$out4,v30 | ||
2665 | vcipher $out5,$out5,v30 | ||
2666 | vxor $in5,$twk5,v31 | ||
2667 | vsrab $tmp,$tweak,$seven # next tweak value | ||
2668 | vxor $twk5,$tweak,$rndkey0 | ||
2669 | |||
2670 | vcipherlast $out0,$out0,$in0 | ||
2671 | lvx_u $in0,$x00,$inp # load next input block | ||
2672 | vaddubm $tweak,$tweak,$tweak | ||
2673 | vsldoi $tmp,$tmp,$tmp,15 | ||
2674 | vcipherlast $out1,$out1,$in1 | ||
2675 | lvx_u $in1,$x10,$inp | ||
2676 | vcipherlast $out2,$out2,$in2 | ||
2677 | le?vperm $in0,$in0,$in0,$leperm | ||
2678 | lvx_u $in2,$x20,$inp | ||
2679 | vand $tmp,$tmp,$eighty7 | ||
2680 | vcipherlast $out3,$out3,$in3 | ||
2681 | le?vperm $in1,$in1,$in1,$leperm | ||
2682 | lvx_u $in3,$x30,$inp | ||
2683 | vcipherlast $out4,$out4,$in4 | ||
2684 | le?vperm $in2,$in2,$in2,$leperm | ||
2685 | lvx_u $in4,$x40,$inp | ||
2686 | vxor $tweak,$tweak,$tmp | ||
2687 | vcipherlast $tmp,$out5,$in5 # last block might be needed | ||
2688 | # in stealing mode | ||
2689 | le?vperm $in3,$in3,$in3,$leperm | ||
2690 | lvx_u $in5,$x50,$inp | ||
2691 | addi $inp,$inp,0x60 | ||
2692 | le?vperm $in4,$in4,$in4,$leperm | ||
2693 | le?vperm $in5,$in5,$in5,$leperm | ||
2694 | |||
2695 | le?vperm $out0,$out0,$out0,$leperm | ||
2696 | le?vperm $out1,$out1,$out1,$leperm | ||
2697 | stvx_u $out0,$x00,$out # store output | ||
2698 | vxor $out0,$in0,$twk0 | ||
2699 | le?vperm $out2,$out2,$out2,$leperm | ||
2700 | stvx_u $out1,$x10,$out | ||
2701 | vxor $out1,$in1,$twk1 | ||
2702 | le?vperm $out3,$out3,$out3,$leperm | ||
2703 | stvx_u $out2,$x20,$out | ||
2704 | vxor $out2,$in2,$twk2 | ||
2705 | le?vperm $out4,$out4,$out4,$leperm | ||
2706 | stvx_u $out3,$x30,$out | ||
2707 | vxor $out3,$in3,$twk3 | ||
2708 | le?vperm $out5,$tmp,$tmp,$leperm | ||
2709 | stvx_u $out4,$x40,$out | ||
2710 | vxor $out4,$in4,$twk4 | ||
2711 | le?stvx_u $out5,$x50,$out | ||
2712 | be?stvx_u $tmp, $x50,$out | ||
2713 | vxor $out5,$in5,$twk5 | ||
2714 | addi $out,$out,0x60 | ||
2715 | |||
2716 | mtctr $rounds | ||
2717 | beq Loop_xts_enc6x # did $len-=96 borrow? | ||
2718 | |||
2719 | addic. $len,$len,0x60 | ||
2720 | beq Lxts_enc6x_zero | ||
2721 | cmpwi $len,0x20 | ||
2722 | blt Lxts_enc6x_one | ||
2723 | nop | ||
2724 | beq Lxts_enc6x_two | ||
2725 | cmpwi $len,0x40 | ||
2726 | blt Lxts_enc6x_three | ||
2727 | nop | ||
2728 | beq Lxts_enc6x_four | ||
2729 | |||
2730 | Lxts_enc6x_five: | ||
2731 | vxor $out0,$in1,$twk0 | ||
2732 | vxor $out1,$in2,$twk1 | ||
2733 | vxor $out2,$in3,$twk2 | ||
2734 | vxor $out3,$in4,$twk3 | ||
2735 | vxor $out4,$in5,$twk4 | ||
2736 | |||
2737 | bl _aesp8_xts_enc5x | ||
2738 | |||
2739 | le?vperm $out0,$out0,$out0,$leperm | ||
2740 | vmr $twk0,$twk5 # unused tweak | ||
2741 | le?vperm $out1,$out1,$out1,$leperm | ||
2742 | stvx_u $out0,$x00,$out # store output | ||
2743 | le?vperm $out2,$out2,$out2,$leperm | ||
2744 | stvx_u $out1,$x10,$out | ||
2745 | le?vperm $out3,$out3,$out3,$leperm | ||
2746 | stvx_u $out2,$x20,$out | ||
2747 | vxor $tmp,$out4,$twk5 # last block prep for stealing | ||
2748 | le?vperm $out4,$out4,$out4,$leperm | ||
2749 | stvx_u $out3,$x30,$out | ||
2750 | stvx_u $out4,$x40,$out | ||
2751 | addi $out,$out,0x50 | ||
2752 | bne Lxts_enc6x_steal | ||
2753 | b Lxts_enc6x_done | ||
2754 | |||
2755 | .align 4 | ||
2756 | Lxts_enc6x_four: | ||
2757 | vxor $out0,$in2,$twk0 | ||
2758 | vxor $out1,$in3,$twk1 | ||
2759 | vxor $out2,$in4,$twk2 | ||
2760 | vxor $out3,$in5,$twk3 | ||
2761 | vxor $out4,$out4,$out4 | ||
2762 | |||
2763 | bl _aesp8_xts_enc5x | ||
2764 | |||
2765 | le?vperm $out0,$out0,$out0,$leperm | ||
2766 | vmr $twk0,$twk4 # unused tweak | ||
2767 | le?vperm $out1,$out1,$out1,$leperm | ||
2768 | stvx_u $out0,$x00,$out # store output | ||
2769 | le?vperm $out2,$out2,$out2,$leperm | ||
2770 | stvx_u $out1,$x10,$out | ||
2771 | vxor $tmp,$out3,$twk4 # last block prep for stealing | ||
2772 | le?vperm $out3,$out3,$out3,$leperm | ||
2773 | stvx_u $out2,$x20,$out | ||
2774 | stvx_u $out3,$x30,$out | ||
2775 | addi $out,$out,0x40 | ||
2776 | bne Lxts_enc6x_steal | ||
2777 | b Lxts_enc6x_done | ||
2778 | |||
2779 | .align 4 | ||
2780 | Lxts_enc6x_three: | ||
2781 | vxor $out0,$in3,$twk0 | ||
2782 | vxor $out1,$in4,$twk1 | ||
2783 | vxor $out2,$in5,$twk2 | ||
2784 | vxor $out3,$out3,$out3 | ||
2785 | vxor $out4,$out4,$out4 | ||
2786 | |||
2787 | bl _aesp8_xts_enc5x | ||
2788 | |||
2789 | le?vperm $out0,$out0,$out0,$leperm | ||
2790 | vmr $twk0,$twk3 # unused tweak | ||
2791 | le?vperm $out1,$out1,$out1,$leperm | ||
2792 | stvx_u $out0,$x00,$out # store output | ||
2793 | vxor $tmp,$out2,$twk3 # last block prep for stealing | ||
2794 | le?vperm $out2,$out2,$out2,$leperm | ||
2795 | stvx_u $out1,$x10,$out | ||
2796 | stvx_u $out2,$x20,$out | ||
2797 | addi $out,$out,0x30 | ||
2798 | bne Lxts_enc6x_steal | ||
2799 | b Lxts_enc6x_done | ||
2800 | |||
2801 | .align 4 | ||
2802 | Lxts_enc6x_two: | ||
2803 | vxor $out0,$in4,$twk0 | ||
2804 | vxor $out1,$in5,$twk1 | ||
2805 | vxor $out2,$out2,$out2 | ||
2806 | vxor $out3,$out3,$out3 | ||
2807 | vxor $out4,$out4,$out4 | ||
2808 | |||
2809 | bl _aesp8_xts_enc5x | ||
2810 | |||
2811 | le?vperm $out0,$out0,$out0,$leperm | ||
2812 | vmr $twk0,$twk2 # unused tweak | ||
2813 | vxor $tmp,$out1,$twk2 # last block prep for stealing | ||
2814 | le?vperm $out1,$out1,$out1,$leperm | ||
2815 | stvx_u $out0,$x00,$out # store output | ||
2816 | stvx_u $out1,$x10,$out | ||
2817 | addi $out,$out,0x20 | ||
2818 | bne Lxts_enc6x_steal | ||
2819 | b Lxts_enc6x_done | ||
2820 | |||
2821 | .align 4 | ||
2822 | Lxts_enc6x_one: | ||
2823 | vxor $out0,$in5,$twk0 | ||
2824 | nop | ||
2825 | Loop_xts_enc1x: | ||
2826 | vcipher $out0,$out0,v24 | ||
2827 | lvx v24,$x20,$key_ # round[3] | ||
2828 | addi $key_,$key_,0x20 | ||
2829 | |||
2830 | vcipher $out0,$out0,v25 | ||
2831 | lvx v25,$x10,$key_ # round[4] | ||
2832 | bdnz Loop_xts_enc1x | ||
2833 | |||
2834 | add $inp,$inp,$taillen | ||
2835 | cmpwi $taillen,0 | ||
2836 | vcipher $out0,$out0,v24 | ||
2837 | |||
2838 | subi $inp,$inp,16 | ||
2839 | vcipher $out0,$out0,v25 | ||
2840 | |||
2841 | lvsr $inpperm,0,$taillen | ||
2842 | vcipher $out0,$out0,v26 | ||
2843 | |||
2844 | lvx_u $in0,0,$inp | ||
2845 | vcipher $out0,$out0,v27 | ||
2846 | |||
2847 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
2848 | vcipher $out0,$out0,v28 | ||
2849 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
2850 | |||
2851 | vcipher $out0,$out0,v29 | ||
2852 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
2853 | vxor $twk0,$twk0,v31 | ||
2854 | |||
2855 | le?vperm $in0,$in0,$in0,$leperm | ||
2856 | vcipher $out0,$out0,v30 | ||
2857 | |||
2858 | vperm $in0,$in0,$in0,$inpperm | ||
2859 | vcipherlast $out0,$out0,$twk0 | ||
2860 | |||
2861 | vmr $twk0,$twk1 # unused tweak | ||
2862 | vxor $tmp,$out0,$twk1 # last block prep for stealing | ||
2863 | le?vperm $out0,$out0,$out0,$leperm | ||
2864 | stvx_u $out0,$x00,$out # store output | ||
2865 | addi $out,$out,0x10 | ||
2866 | bne Lxts_enc6x_steal | ||
2867 | b Lxts_enc6x_done | ||
2868 | |||
2869 | .align 4 | ||
2870 | Lxts_enc6x_zero: | ||
2871 | cmpwi $taillen,0 | ||
2872 | beq Lxts_enc6x_done | ||
2873 | |||
2874 | add $inp,$inp,$taillen | ||
2875 | subi $inp,$inp,16 | ||
2876 | lvx_u $in0,0,$inp | ||
2877 | lvsr $inpperm,0,$taillen # $in5 is no more | ||
2878 | le?vperm $in0,$in0,$in0,$leperm | ||
2879 | vperm $in0,$in0,$in0,$inpperm | ||
2880 | vxor $tmp,$tmp,$twk0 | ||
2881 | Lxts_enc6x_steal: | ||
2882 | vxor $in0,$in0,$twk0 | ||
2883 | vxor $out0,$out0,$out0 | ||
2884 | vspltisb $out1,-1 | ||
2885 | vperm $out0,$out0,$out1,$inpperm | ||
2886 | vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? | ||
2887 | |||
2888 | subi r30,$out,17 | ||
2889 | subi $out,$out,16 | ||
2890 | mtctr $taillen | ||
2891 | Loop_xts_enc6x_steal: | ||
2892 | lbzu r0,1(r30) | ||
2893 | stb r0,16(r30) | ||
2894 | bdnz Loop_xts_enc6x_steal | ||
2895 | |||
2896 | li $taillen,0 | ||
2897 | mtctr $rounds | ||
2898 | b Loop_xts_enc1x # one more time... | ||
2899 | |||
2900 | .align 4 | ||
2901 | Lxts_enc6x_done: | ||
2902 | ${UCMP}i $ivp,0 | ||
2903 | beq Lxts_enc6x_ret | ||
2904 | |||
2905 | vxor $tweak,$twk0,$rndkey0 | ||
2906 | le?vperm $tweak,$tweak,$tweak,$leperm | ||
2907 | stvx_u $tweak,0,$ivp | ||
2908 | |||
2909 | Lxts_enc6x_ret: | ||
2910 | mtlr r11 | ||
2911 | li r10,`$FRAME+15` | ||
2912 | li r11,`$FRAME+31` | ||
2913 | stvx $seven,r10,$sp # wipe copies of round keys | ||
2914 | addi r10,r10,32 | ||
2915 | stvx $seven,r11,$sp | ||
2916 | addi r11,r11,32 | ||
2917 | stvx $seven,r10,$sp | ||
2918 | addi r10,r10,32 | ||
2919 | stvx $seven,r11,$sp | ||
2920 | addi r11,r11,32 | ||
2921 | stvx $seven,r10,$sp | ||
2922 | addi r10,r10,32 | ||
2923 | stvx $seven,r11,$sp | ||
2924 | addi r11,r11,32 | ||
2925 | stvx $seven,r10,$sp | ||
2926 | addi r10,r10,32 | ||
2927 | stvx $seven,r11,$sp | ||
2928 | addi r11,r11,32 | ||
2929 | |||
2930 | mtspr 256,$vrsave | ||
2931 | lvx v20,r10,$sp # ABI says so | ||
2932 | addi r10,r10,32 | ||
2933 | lvx v21,r11,$sp | ||
2934 | addi r11,r11,32 | ||
2935 | lvx v22,r10,$sp | ||
2936 | addi r10,r10,32 | ||
2937 | lvx v23,r11,$sp | ||
2938 | addi r11,r11,32 | ||
2939 | lvx v24,r10,$sp | ||
2940 | addi r10,r10,32 | ||
2941 | lvx v25,r11,$sp | ||
2942 | addi r11,r11,32 | ||
2943 | lvx v26,r10,$sp | ||
2944 | addi r10,r10,32 | ||
2945 | lvx v27,r11,$sp | ||
2946 | addi r11,r11,32 | ||
2947 | lvx v28,r10,$sp | ||
2948 | addi r10,r10,32 | ||
2949 | lvx v29,r11,$sp | ||
2950 | addi r11,r11,32 | ||
2951 | lvx v30,r10,$sp | ||
2952 | lvx v31,r11,$sp | ||
2953 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
2954 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
2955 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
2956 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
2957 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
2958 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
2959 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | ||
2960 | blr | ||
2961 | .long 0 | ||
2962 | .byte 0,12,0x04,1,0x80,6,6,0 | ||
2963 | .long 0 | ||
2964 | |||
2965 | .align 5 | ||
2966 | _aesp8_xts_enc5x: | ||
2967 | vcipher $out0,$out0,v24 | ||
2968 | vcipher $out1,$out1,v24 | ||
2969 | vcipher $out2,$out2,v24 | ||
2970 | vcipher $out3,$out3,v24 | ||
2971 | vcipher $out4,$out4,v24 | ||
2972 | lvx v24,$x20,$key_ # round[3] | ||
2973 | addi $key_,$key_,0x20 | ||
2974 | |||
2975 | vcipher $out0,$out0,v25 | ||
2976 | vcipher $out1,$out1,v25 | ||
2977 | vcipher $out2,$out2,v25 | ||
2978 | vcipher $out3,$out3,v25 | ||
2979 | vcipher $out4,$out4,v25 | ||
2980 | lvx v25,$x10,$key_ # round[4] | ||
2981 | bdnz _aesp8_xts_enc5x | ||
2982 | |||
2983 | add $inp,$inp,$taillen | ||
2984 | cmpwi $taillen,0 | ||
2985 | vcipher $out0,$out0,v24 | ||
2986 | vcipher $out1,$out1,v24 | ||
2987 | vcipher $out2,$out2,v24 | ||
2988 | vcipher $out3,$out3,v24 | ||
2989 | vcipher $out4,$out4,v24 | ||
2990 | |||
2991 | subi $inp,$inp,16 | ||
2992 | vcipher $out0,$out0,v25 | ||
2993 | vcipher $out1,$out1,v25 | ||
2994 | vcipher $out2,$out2,v25 | ||
2995 | vcipher $out3,$out3,v25 | ||
2996 | vcipher $out4,$out4,v25 | ||
2997 | vxor $twk0,$twk0,v31 | ||
2998 | |||
2999 | vcipher $out0,$out0,v26 | ||
3000 | lvsr $inpperm,r0,$taillen # $in5 is no more | ||
3001 | vcipher $out1,$out1,v26 | ||
3002 | vcipher $out2,$out2,v26 | ||
3003 | vcipher $out3,$out3,v26 | ||
3004 | vcipher $out4,$out4,v26 | ||
3005 | vxor $in1,$twk1,v31 | ||
3006 | |||
3007 | vcipher $out0,$out0,v27 | ||
3008 | lvx_u $in0,0,$inp | ||
3009 | vcipher $out1,$out1,v27 | ||
3010 | vcipher $out2,$out2,v27 | ||
3011 | vcipher $out3,$out3,v27 | ||
3012 | vcipher $out4,$out4,v27 | ||
3013 | vxor $in2,$twk2,v31 | ||
3014 | |||
3015 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
3016 | vcipher $out0,$out0,v28 | ||
3017 | vcipher $out1,$out1,v28 | ||
3018 | vcipher $out2,$out2,v28 | ||
3019 | vcipher $out3,$out3,v28 | ||
3020 | vcipher $out4,$out4,v28 | ||
3021 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
3022 | vxor $in3,$twk3,v31 | ||
3023 | |||
3024 | vcipher $out0,$out0,v29 | ||
3025 | le?vperm $in0,$in0,$in0,$leperm | ||
3026 | vcipher $out1,$out1,v29 | ||
3027 | vcipher $out2,$out2,v29 | ||
3028 | vcipher $out3,$out3,v29 | ||
3029 | vcipher $out4,$out4,v29 | ||
3030 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
3031 | vxor $in4,$twk4,v31 | ||
3032 | |||
3033 | vcipher $out0,$out0,v30 | ||
3034 | vperm $in0,$in0,$in0,$inpperm | ||
3035 | vcipher $out1,$out1,v30 | ||
3036 | vcipher $out2,$out2,v30 | ||
3037 | vcipher $out3,$out3,v30 | ||
3038 | vcipher $out4,$out4,v30 | ||
3039 | |||
3040 | vcipherlast $out0,$out0,$twk0 | ||
3041 | vcipherlast $out1,$out1,$in1 | ||
3042 | vcipherlast $out2,$out2,$in2 | ||
3043 | vcipherlast $out3,$out3,$in3 | ||
3044 | vcipherlast $out4,$out4,$in4 | ||
3045 | blr | ||
3046 | .long 0 | ||
3047 | .byte 0,12,0x14,0,0,0,0,0 | ||
3048 | |||
3049 | .align 5 | ||
3050 | _aesp8_xts_decrypt6x: | ||
3051 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | ||
3052 | mflr r11 | ||
3053 | li r7,`$FRAME+8*16+15` | ||
3054 | li r3,`$FRAME+8*16+31` | ||
3055 | $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) | ||
3056 | stvx v20,r7,$sp # ABI says so | ||
3057 | addi r7,r7,32 | ||
3058 | stvx v21,r3,$sp | ||
3059 | addi r3,r3,32 | ||
3060 | stvx v22,r7,$sp | ||
3061 | addi r7,r7,32 | ||
3062 | stvx v23,r3,$sp | ||
3063 | addi r3,r3,32 | ||
3064 | stvx v24,r7,$sp | ||
3065 | addi r7,r7,32 | ||
3066 | stvx v25,r3,$sp | ||
3067 | addi r3,r3,32 | ||
3068 | stvx v26,r7,$sp | ||
3069 | addi r7,r7,32 | ||
3070 | stvx v27,r3,$sp | ||
3071 | addi r3,r3,32 | ||
3072 | stvx v28,r7,$sp | ||
3073 | addi r7,r7,32 | ||
3074 | stvx v29,r3,$sp | ||
3075 | addi r3,r3,32 | ||
3076 | stvx v30,r7,$sp | ||
3077 | stvx v31,r3,$sp | ||
3078 | li r0,-1 | ||
3079 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | ||
3080 | li $x10,0x10 | ||
3081 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
3082 | li $x20,0x20 | ||
3083 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
3084 | li $x30,0x30 | ||
3085 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
3086 | li $x40,0x40 | ||
3087 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
3088 | li $x50,0x50 | ||
3089 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
3090 | li $x60,0x60 | ||
3091 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
3092 | li $x70,0x70 | ||
3093 | mtspr 256,r0 | ||
3094 | |||
3095 | subi $rounds,$rounds,3 # -4 in total | ||
3096 | |||
3097 | lvx $rndkey0,$x00,$key1 # load key schedule | ||
3098 | lvx v30,$x10,$key1 | ||
3099 | addi $key1,$key1,0x20 | ||
3100 | lvx v31,$x00,$key1 | ||
3101 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | ||
3102 | addi $key_,$sp,$FRAME+15 | ||
3103 | mtctr $rounds | ||
3104 | |||
3105 | Load_xts_dec_key: | ||
3106 | ?vperm v24,v30,v31,$keyperm | ||
3107 | lvx v30,$x10,$key1 | ||
3108 | addi $key1,$key1,0x20 | ||
3109 | stvx v24,$x00,$key_ # off-load round[1] | ||
3110 | ?vperm v25,v31,v30,$keyperm | ||
3111 | lvx v31,$x00,$key1 | ||
3112 | stvx v25,$x10,$key_ # off-load round[2] | ||
3113 | addi $key_,$key_,0x20 | ||
3114 | bdnz Load_xts_dec_key | ||
3115 | |||
3116 | lvx v26,$x10,$key1 | ||
3117 | ?vperm v24,v30,v31,$keyperm | ||
3118 | lvx v27,$x20,$key1 | ||
3119 | stvx v24,$x00,$key_ # off-load round[3] | ||
3120 | ?vperm v25,v31,v26,$keyperm | ||
3121 | lvx v28,$x30,$key1 | ||
3122 | stvx v25,$x10,$key_ # off-load round[4] | ||
3123 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
3124 | ?vperm v26,v26,v27,$keyperm | ||
3125 | lvx v29,$x40,$key1 | ||
3126 | ?vperm v27,v27,v28,$keyperm | ||
3127 | lvx v30,$x50,$key1 | ||
3128 | ?vperm v28,v28,v29,$keyperm | ||
3129 | lvx v31,$x60,$key1 | ||
3130 | ?vperm v29,v29,v30,$keyperm | ||
3131 | lvx $twk5,$x70,$key1 # borrow $twk5 | ||
3132 | ?vperm v30,v30,v31,$keyperm | ||
3133 | lvx v24,$x00,$key_ # pre-load round[1] | ||
3134 | ?vperm v31,v31,$twk5,$keyperm | ||
3135 | lvx v25,$x10,$key_ # pre-load round[2] | ||
3136 | |||
3137 | vperm $in0,$inout,$inptail,$inpperm | ||
3138 | subi $inp,$inp,31 # undo "caller" | ||
3139 | vxor $twk0,$tweak,$rndkey0 | ||
3140 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3141 | vaddubm $tweak,$tweak,$tweak | ||
3142 | vsldoi $tmp,$tmp,$tmp,15 | ||
3143 | vand $tmp,$tmp,$eighty7 | ||
3144 | vxor $out0,$in0,$twk0 | ||
3145 | vxor $tweak,$tweak,$tmp | ||
3146 | |||
3147 | lvx_u $in1,$x10,$inp | ||
3148 | vxor $twk1,$tweak,$rndkey0 | ||
3149 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3150 | vaddubm $tweak,$tweak,$tweak | ||
3151 | vsldoi $tmp,$tmp,$tmp,15 | ||
3152 | le?vperm $in1,$in1,$in1,$leperm | ||
3153 | vand $tmp,$tmp,$eighty7 | ||
3154 | vxor $out1,$in1,$twk1 | ||
3155 | vxor $tweak,$tweak,$tmp | ||
3156 | |||
3157 | lvx_u $in2,$x20,$inp | ||
3158 | andi. $taillen,$len,15 | ||
3159 | vxor $twk2,$tweak,$rndkey0 | ||
3160 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3161 | vaddubm $tweak,$tweak,$tweak | ||
3162 | vsldoi $tmp,$tmp,$tmp,15 | ||
3163 | le?vperm $in2,$in2,$in2,$leperm | ||
3164 | vand $tmp,$tmp,$eighty7 | ||
3165 | vxor $out2,$in2,$twk2 | ||
3166 | vxor $tweak,$tweak,$tmp | ||
3167 | |||
3168 | lvx_u $in3,$x30,$inp | ||
3169 | sub $len,$len,$taillen | ||
3170 | vxor $twk3,$tweak,$rndkey0 | ||
3171 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3172 | vaddubm $tweak,$tweak,$tweak | ||
3173 | vsldoi $tmp,$tmp,$tmp,15 | ||
3174 | le?vperm $in3,$in3,$in3,$leperm | ||
3175 | vand $tmp,$tmp,$eighty7 | ||
3176 | vxor $out3,$in3,$twk3 | ||
3177 | vxor $tweak,$tweak,$tmp | ||
3178 | |||
3179 | lvx_u $in4,$x40,$inp | ||
3180 | subi $len,$len,0x60 | ||
3181 | vxor $twk4,$tweak,$rndkey0 | ||
3182 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3183 | vaddubm $tweak,$tweak,$tweak | ||
3184 | vsldoi $tmp,$tmp,$tmp,15 | ||
3185 | le?vperm $in4,$in4,$in4,$leperm | ||
3186 | vand $tmp,$tmp,$eighty7 | ||
3187 | vxor $out4,$in4,$twk4 | ||
3188 | vxor $tweak,$tweak,$tmp | ||
3189 | |||
3190 | lvx_u $in5,$x50,$inp | ||
3191 | addi $inp,$inp,0x60 | ||
3192 | vxor $twk5,$tweak,$rndkey0 | ||
3193 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3194 | vaddubm $tweak,$tweak,$tweak | ||
3195 | vsldoi $tmp,$tmp,$tmp,15 | ||
3196 | le?vperm $in5,$in5,$in5,$leperm | ||
3197 | vand $tmp,$tmp,$eighty7 | ||
3198 | vxor $out5,$in5,$twk5 | ||
3199 | vxor $tweak,$tweak,$tmp | ||
3200 | |||
3201 | vxor v31,v31,$rndkey0 | ||
3202 | mtctr $rounds | ||
3203 | b Loop_xts_dec6x | ||
3204 | |||
3205 | .align 5 | ||
3206 | Loop_xts_dec6x: | ||
3207 | vncipher $out0,$out0,v24 | ||
3208 | vncipher $out1,$out1,v24 | ||
3209 | vncipher $out2,$out2,v24 | ||
3210 | vncipher $out3,$out3,v24 | ||
3211 | vncipher $out4,$out4,v24 | ||
3212 | vncipher $out5,$out5,v24 | ||
3213 | lvx v24,$x20,$key_ # round[3] | ||
3214 | addi $key_,$key_,0x20 | ||
3215 | |||
3216 | vncipher $out0,$out0,v25 | ||
3217 | vncipher $out1,$out1,v25 | ||
3218 | vncipher $out2,$out2,v25 | ||
3219 | vncipher $out3,$out3,v25 | ||
3220 | vncipher $out4,$out4,v25 | ||
3221 | vncipher $out5,$out5,v25 | ||
3222 | lvx v25,$x10,$key_ # round[4] | ||
3223 | bdnz Loop_xts_dec6x | ||
3224 | |||
3225 | subic $len,$len,96 # $len-=96 | ||
3226 | vxor $in0,$twk0,v31 # xor with last round key | ||
3227 | vncipher $out0,$out0,v24 | ||
3228 | vncipher $out1,$out1,v24 | ||
3229 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3230 | vxor $twk0,$tweak,$rndkey0 | ||
3231 | vaddubm $tweak,$tweak,$tweak | ||
3232 | vncipher $out2,$out2,v24 | ||
3233 | vncipher $out3,$out3,v24 | ||
3234 | vsldoi $tmp,$tmp,$tmp,15 | ||
3235 | vncipher $out4,$out4,v24 | ||
3236 | vncipher $out5,$out5,v24 | ||
3237 | |||
3238 | subfe. r0,r0,r0 # borrow?-1:0 | ||
3239 | vand $tmp,$tmp,$eighty7 | ||
3240 | vncipher $out0,$out0,v25 | ||
3241 | vncipher $out1,$out1,v25 | ||
3242 | vxor $tweak,$tweak,$tmp | ||
3243 | vncipher $out2,$out2,v25 | ||
3244 | vncipher $out3,$out3,v25 | ||
3245 | vxor $in1,$twk1,v31 | ||
3246 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3247 | vxor $twk1,$tweak,$rndkey0 | ||
3248 | vncipher $out4,$out4,v25 | ||
3249 | vncipher $out5,$out5,v25 | ||
3250 | |||
3251 | and r0,r0,$len | ||
3252 | vaddubm $tweak,$tweak,$tweak | ||
3253 | vsldoi $tmp,$tmp,$tmp,15 | ||
3254 | vncipher $out0,$out0,v26 | ||
3255 | vncipher $out1,$out1,v26 | ||
3256 | vand $tmp,$tmp,$eighty7 | ||
3257 | vncipher $out2,$out2,v26 | ||
3258 | vncipher $out3,$out3,v26 | ||
3259 | vxor $tweak,$tweak,$tmp | ||
3260 | vncipher $out4,$out4,v26 | ||
3261 | vncipher $out5,$out5,v26 | ||
3262 | |||
3263 | add $inp,$inp,r0 # $inp is adjusted in such | ||
3264 | # way that at exit from the | ||
3265 | # loop inX-in5 are loaded | ||
3266 | # with last "words" | ||
3267 | vxor $in2,$twk2,v31 | ||
3268 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3269 | vxor $twk2,$tweak,$rndkey0 | ||
3270 | vaddubm $tweak,$tweak,$tweak | ||
3271 | vncipher $out0,$out0,v27 | ||
3272 | vncipher $out1,$out1,v27 | ||
3273 | vsldoi $tmp,$tmp,$tmp,15 | ||
3274 | vncipher $out2,$out2,v27 | ||
3275 | vncipher $out3,$out3,v27 | ||
3276 | vand $tmp,$tmp,$eighty7 | ||
3277 | vncipher $out4,$out4,v27 | ||
3278 | vncipher $out5,$out5,v27 | ||
3279 | |||
3280 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
3281 | vxor $tweak,$tweak,$tmp | ||
3282 | vncipher $out0,$out0,v28 | ||
3283 | vncipher $out1,$out1,v28 | ||
3284 | vxor $in3,$twk3,v31 | ||
3285 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3286 | vxor $twk3,$tweak,$rndkey0 | ||
3287 | vncipher $out2,$out2,v28 | ||
3288 | vncipher $out3,$out3,v28 | ||
3289 | vaddubm $tweak,$tweak,$tweak | ||
3290 | vsldoi $tmp,$tmp,$tmp,15 | ||
3291 | vncipher $out4,$out4,v28 | ||
3292 | vncipher $out5,$out5,v28 | ||
3293 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
3294 | vand $tmp,$tmp,$eighty7 | ||
3295 | |||
3296 | vncipher $out0,$out0,v29 | ||
3297 | vncipher $out1,$out1,v29 | ||
3298 | vxor $tweak,$tweak,$tmp | ||
3299 | vncipher $out2,$out2,v29 | ||
3300 | vncipher $out3,$out3,v29 | ||
3301 | vxor $in4,$twk4,v31 | ||
3302 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3303 | vxor $twk4,$tweak,$rndkey0 | ||
3304 | vncipher $out4,$out4,v29 | ||
3305 | vncipher $out5,$out5,v29 | ||
3306 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
3307 | vaddubm $tweak,$tweak,$tweak | ||
3308 | vsldoi $tmp,$tmp,$tmp,15 | ||
3309 | |||
3310 | vncipher $out0,$out0,v30 | ||
3311 | vncipher $out1,$out1,v30 | ||
3312 | vand $tmp,$tmp,$eighty7 | ||
3313 | vncipher $out2,$out2,v30 | ||
3314 | vncipher $out3,$out3,v30 | ||
3315 | vxor $tweak,$tweak,$tmp | ||
3316 | vncipher $out4,$out4,v30 | ||
3317 | vncipher $out5,$out5,v30 | ||
3318 | vxor $in5,$twk5,v31 | ||
3319 | vsrab $tmp,$tweak,$seven # next tweak value | ||
3320 | vxor $twk5,$tweak,$rndkey0 | ||
3321 | |||
3322 | vncipherlast $out0,$out0,$in0 | ||
3323 | lvx_u $in0,$x00,$inp # load next input block | ||
3324 | vaddubm $tweak,$tweak,$tweak | ||
3325 | vsldoi $tmp,$tmp,$tmp,15 | ||
3326 | vncipherlast $out1,$out1,$in1 | ||
3327 | lvx_u $in1,$x10,$inp | ||
3328 | vncipherlast $out2,$out2,$in2 | ||
3329 | le?vperm $in0,$in0,$in0,$leperm | ||
3330 | lvx_u $in2,$x20,$inp | ||
3331 | vand $tmp,$tmp,$eighty7 | ||
3332 | vncipherlast $out3,$out3,$in3 | ||
3333 | le?vperm $in1,$in1,$in1,$leperm | ||
3334 | lvx_u $in3,$x30,$inp | ||
3335 | vncipherlast $out4,$out4,$in4 | ||
3336 | le?vperm $in2,$in2,$in2,$leperm | ||
3337 | lvx_u $in4,$x40,$inp | ||
3338 | vxor $tweak,$tweak,$tmp | ||
3339 | vncipherlast $out5,$out5,$in5 | ||
3340 | le?vperm $in3,$in3,$in3,$leperm | ||
3341 | lvx_u $in5,$x50,$inp | ||
3342 | addi $inp,$inp,0x60 | ||
3343 | le?vperm $in4,$in4,$in4,$leperm | ||
3344 | le?vperm $in5,$in5,$in5,$leperm | ||
3345 | |||
3346 | le?vperm $out0,$out0,$out0,$leperm | ||
3347 | le?vperm $out1,$out1,$out1,$leperm | ||
3348 | stvx_u $out0,$x00,$out # store output | ||
3349 | vxor $out0,$in0,$twk0 | ||
3350 | le?vperm $out2,$out2,$out2,$leperm | ||
3351 | stvx_u $out1,$x10,$out | ||
3352 | vxor $out1,$in1,$twk1 | ||
3353 | le?vperm $out3,$out3,$out3,$leperm | ||
3354 | stvx_u $out2,$x20,$out | ||
3355 | vxor $out2,$in2,$twk2 | ||
3356 | le?vperm $out4,$out4,$out4,$leperm | ||
3357 | stvx_u $out3,$x30,$out | ||
3358 | vxor $out3,$in3,$twk3 | ||
3359 | le?vperm $out5,$out5,$out5,$leperm | ||
3360 | stvx_u $out4,$x40,$out | ||
3361 | vxor $out4,$in4,$twk4 | ||
3362 | stvx_u $out5,$x50,$out | ||
3363 | vxor $out5,$in5,$twk5 | ||
3364 | addi $out,$out,0x60 | ||
3365 | |||
3366 | mtctr $rounds | ||
3367 | beq Loop_xts_dec6x # did $len-=96 borrow? | ||
3368 | |||
3369 | addic. $len,$len,0x60 | ||
3370 | beq Lxts_dec6x_zero | ||
3371 | cmpwi $len,0x20 | ||
3372 | blt Lxts_dec6x_one | ||
3373 | nop | ||
3374 | beq Lxts_dec6x_two | ||
3375 | cmpwi $len,0x40 | ||
3376 | blt Lxts_dec6x_three | ||
3377 | nop | ||
3378 | beq Lxts_dec6x_four | ||
3379 | |||
3380 | Lxts_dec6x_five: | ||
3381 | vxor $out0,$in1,$twk0 | ||
3382 | vxor $out1,$in2,$twk1 | ||
3383 | vxor $out2,$in3,$twk2 | ||
3384 | vxor $out3,$in4,$twk3 | ||
3385 | vxor $out4,$in5,$twk4 | ||
3386 | |||
3387 | bl _aesp8_xts_dec5x | ||
3388 | |||
3389 | le?vperm $out0,$out0,$out0,$leperm | ||
3390 | vmr $twk0,$twk5 # unused tweak | ||
3391 | vxor $twk1,$tweak,$rndkey0 | ||
3392 | le?vperm $out1,$out1,$out1,$leperm | ||
3393 | stvx_u $out0,$x00,$out # store output | ||
3394 | vxor $out0,$in0,$twk1 | ||
3395 | le?vperm $out2,$out2,$out2,$leperm | ||
3396 | stvx_u $out1,$x10,$out | ||
3397 | le?vperm $out3,$out3,$out3,$leperm | ||
3398 | stvx_u $out2,$x20,$out | ||
3399 | le?vperm $out4,$out4,$out4,$leperm | ||
3400 | stvx_u $out3,$x30,$out | ||
3401 | stvx_u $out4,$x40,$out | ||
3402 | addi $out,$out,0x50 | ||
3403 | bne Lxts_dec6x_steal | ||
3404 | b Lxts_dec6x_done | ||
3405 | |||
3406 | .align 4 | ||
3407 | Lxts_dec6x_four: | ||
3408 | vxor $out0,$in2,$twk0 | ||
3409 | vxor $out1,$in3,$twk1 | ||
3410 | vxor $out2,$in4,$twk2 | ||
3411 | vxor $out3,$in5,$twk3 | ||
3412 | vxor $out4,$out4,$out4 | ||
3413 | |||
3414 | bl _aesp8_xts_dec5x | ||
3415 | |||
3416 | le?vperm $out0,$out0,$out0,$leperm | ||
3417 | vmr $twk0,$twk4 # unused tweak | ||
3418 | vmr $twk1,$twk5 | ||
3419 | le?vperm $out1,$out1,$out1,$leperm | ||
3420 | stvx_u $out0,$x00,$out # store output | ||
3421 | vxor $out0,$in0,$twk5 | ||
3422 | le?vperm $out2,$out2,$out2,$leperm | ||
3423 | stvx_u $out1,$x10,$out | ||
3424 | le?vperm $out3,$out3,$out3,$leperm | ||
3425 | stvx_u $out2,$x20,$out | ||
3426 | stvx_u $out3,$x30,$out | ||
3427 | addi $out,$out,0x40 | ||
3428 | bne Lxts_dec6x_steal | ||
3429 | b Lxts_dec6x_done | ||
3430 | |||
3431 | .align 4 | ||
3432 | Lxts_dec6x_three: | ||
3433 | vxor $out0,$in3,$twk0 | ||
3434 | vxor $out1,$in4,$twk1 | ||
3435 | vxor $out2,$in5,$twk2 | ||
3436 | vxor $out3,$out3,$out3 | ||
3437 | vxor $out4,$out4,$out4 | ||
3438 | |||
3439 | bl _aesp8_xts_dec5x | ||
3440 | |||
3441 | le?vperm $out0,$out0,$out0,$leperm | ||
3442 | vmr $twk0,$twk3 # unused tweak | ||
3443 | vmr $twk1,$twk4 | ||
3444 | le?vperm $out1,$out1,$out1,$leperm | ||
3445 | stvx_u $out0,$x00,$out # store output | ||
3446 | vxor $out0,$in0,$twk4 | ||
3447 | le?vperm $out2,$out2,$out2,$leperm | ||
3448 | stvx_u $out1,$x10,$out | ||
3449 | stvx_u $out2,$x20,$out | ||
3450 | addi $out,$out,0x30 | ||
3451 | bne Lxts_dec6x_steal | ||
3452 | b Lxts_dec6x_done | ||
3453 | |||
3454 | .align 4 | ||
3455 | Lxts_dec6x_two: | ||
3456 | vxor $out0,$in4,$twk0 | ||
3457 | vxor $out1,$in5,$twk1 | ||
3458 | vxor $out2,$out2,$out2 | ||
3459 | vxor $out3,$out3,$out3 | ||
3460 | vxor $out4,$out4,$out4 | ||
3461 | |||
3462 | bl _aesp8_xts_dec5x | ||
3463 | |||
3464 | le?vperm $out0,$out0,$out0,$leperm | ||
3465 | vmr $twk0,$twk2 # unused tweak | ||
3466 | vmr $twk1,$twk3 | ||
3467 | le?vperm $out1,$out1,$out1,$leperm | ||
3468 | stvx_u $out0,$x00,$out # store output | ||
3469 | vxor $out0,$in0,$twk3 | ||
3470 | stvx_u $out1,$x10,$out | ||
3471 | addi $out,$out,0x20 | ||
3472 | bne Lxts_dec6x_steal | ||
3473 | b Lxts_dec6x_done | ||
3474 | |||
3475 | .align 4 | ||
3476 | Lxts_dec6x_one: | ||
3477 | vxor $out0,$in5,$twk0 | ||
3478 | nop | ||
3479 | Loop_xts_dec1x: | ||
3480 | vncipher $out0,$out0,v24 | ||
3481 | lvx v24,$x20,$key_ # round[3] | ||
3482 | addi $key_,$key_,0x20 | ||
3483 | |||
3484 | vncipher $out0,$out0,v25 | ||
3485 | lvx v25,$x10,$key_ # round[4] | ||
3486 | bdnz Loop_xts_dec1x | ||
3487 | |||
3488 | subi r0,$taillen,1 | ||
3489 | vncipher $out0,$out0,v24 | ||
3490 | |||
3491 | andi. r0,r0,16 | ||
3492 | cmpwi $taillen,0 | ||
3493 | vncipher $out0,$out0,v25 | ||
3494 | |||
3495 | sub $inp,$inp,r0 | ||
3496 | vncipher $out0,$out0,v26 | ||
3497 | |||
3498 | lvx_u $in0,0,$inp | ||
3499 | vncipher $out0,$out0,v27 | ||
3500 | |||
3501 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
3502 | vncipher $out0,$out0,v28 | ||
3503 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
3504 | |||
3505 | vncipher $out0,$out0,v29 | ||
3506 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
3507 | vxor $twk0,$twk0,v31 | ||
3508 | |||
3509 | le?vperm $in0,$in0,$in0,$leperm | ||
3510 | vncipher $out0,$out0,v30 | ||
3511 | |||
3512 | mtctr $rounds | ||
3513 | vncipherlast $out0,$out0,$twk0 | ||
3514 | |||
3515 | vmr $twk0,$twk1 # unused tweak | ||
3516 | vmr $twk1,$twk2 | ||
3517 | le?vperm $out0,$out0,$out0,$leperm | ||
3518 | stvx_u $out0,$x00,$out # store output | ||
3519 | addi $out,$out,0x10 | ||
3520 | vxor $out0,$in0,$twk2 | ||
3521 | bne Lxts_dec6x_steal | ||
3522 | b Lxts_dec6x_done | ||
3523 | |||
3524 | .align 4 | ||
3525 | Lxts_dec6x_zero: | ||
3526 | cmpwi $taillen,0 | ||
3527 | beq Lxts_dec6x_done | ||
3528 | |||
3529 | lvx_u $in0,0,$inp | ||
3530 | le?vperm $in0,$in0,$in0,$leperm | ||
3531 | vxor $out0,$in0,$twk1 | ||
3532 | Lxts_dec6x_steal: | ||
3533 | vncipher $out0,$out0,v24 | ||
3534 | lvx v24,$x20,$key_ # round[3] | ||
3535 | addi $key_,$key_,0x20 | ||
3536 | |||
3537 | vncipher $out0,$out0,v25 | ||
3538 | lvx v25,$x10,$key_ # round[4] | ||
3539 | bdnz Lxts_dec6x_steal | ||
3540 | |||
3541 | add $inp,$inp,$taillen | ||
3542 | vncipher $out0,$out0,v24 | ||
3543 | |||
3544 | cmpwi $taillen,0 | ||
3545 | vncipher $out0,$out0,v25 | ||
3546 | |||
3547 | lvx_u $in0,0,$inp | ||
3548 | vncipher $out0,$out0,v26 | ||
3549 | |||
3550 | lvsr $inpperm,0,$taillen # $in5 is no more | ||
3551 | vncipher $out0,$out0,v27 | ||
3552 | |||
3553 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
3554 | vncipher $out0,$out0,v28 | ||
3555 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
3556 | |||
3557 | vncipher $out0,$out0,v29 | ||
3558 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
3559 | vxor $twk1,$twk1,v31 | ||
3560 | |||
3561 | le?vperm $in0,$in0,$in0,$leperm | ||
3562 | vncipher $out0,$out0,v30 | ||
3563 | |||
3564 | vperm $in0,$in0,$in0,$inpperm | ||
3565 | vncipherlast $tmp,$out0,$twk1 | ||
3566 | |||
3567 | le?vperm $out0,$tmp,$tmp,$leperm | ||
3568 | le?stvx_u $out0,0,$out | ||
3569 | be?stvx_u $tmp,0,$out | ||
3570 | |||
3571 | vxor $out0,$out0,$out0 | ||
3572 | vspltisb $out1,-1 | ||
3573 | vperm $out0,$out0,$out1,$inpperm | ||
3574 | vsel $out0,$in0,$tmp,$out0 | ||
3575 | vxor $out0,$out0,$twk0 | ||
3576 | |||
3577 | subi r30,$out,1 | ||
3578 | mtctr $taillen | ||
3579 | Loop_xts_dec6x_steal: | ||
3580 | lbzu r0,1(r30) | ||
3581 | stb r0,16(r30) | ||
3582 | bdnz Loop_xts_dec6x_steal | ||
3583 | |||
3584 | li $taillen,0 | ||
3585 | mtctr $rounds | ||
3586 | b Loop_xts_dec1x # one more time... | ||
3587 | |||
3588 | .align 4 | ||
3589 | Lxts_dec6x_done: | ||
3590 | ${UCMP}i $ivp,0 | ||
3591 | beq Lxts_dec6x_ret | ||
3592 | |||
3593 | vxor $tweak,$twk0,$rndkey0 | ||
3594 | le?vperm $tweak,$tweak,$tweak,$leperm | ||
3595 | stvx_u $tweak,0,$ivp | ||
3596 | |||
3597 | Lxts_dec6x_ret: | ||
3598 | mtlr r11 | ||
3599 | li r10,`$FRAME+15` | ||
3600 | li r11,`$FRAME+31` | ||
3601 | stvx $seven,r10,$sp # wipe copies of round keys | ||
3602 | addi r10,r10,32 | ||
3603 | stvx $seven,r11,$sp | ||
3604 | addi r11,r11,32 | ||
3605 | stvx $seven,r10,$sp | ||
3606 | addi r10,r10,32 | ||
3607 | stvx $seven,r11,$sp | ||
3608 | addi r11,r11,32 | ||
3609 | stvx $seven,r10,$sp | ||
3610 | addi r10,r10,32 | ||
3611 | stvx $seven,r11,$sp | ||
3612 | addi r11,r11,32 | ||
3613 | stvx $seven,r10,$sp | ||
3614 | addi r10,r10,32 | ||
3615 | stvx $seven,r11,$sp | ||
3616 | addi r11,r11,32 | ||
3617 | |||
3618 | mtspr 256,$vrsave | ||
3619 | lvx v20,r10,$sp # ABI says so | ||
3620 | addi r10,r10,32 | ||
3621 | lvx v21,r11,$sp | ||
3622 | addi r11,r11,32 | ||
3623 | lvx v22,r10,$sp | ||
3624 | addi r10,r10,32 | ||
3625 | lvx v23,r11,$sp | ||
3626 | addi r11,r11,32 | ||
3627 | lvx v24,r10,$sp | ||
3628 | addi r10,r10,32 | ||
3629 | lvx v25,r11,$sp | ||
3630 | addi r11,r11,32 | ||
3631 | lvx v26,r10,$sp | ||
3632 | addi r10,r10,32 | ||
3633 | lvx v27,r11,$sp | ||
3634 | addi r11,r11,32 | ||
3635 | lvx v28,r10,$sp | ||
3636 | addi r10,r10,32 | ||
3637 | lvx v29,r11,$sp | ||
3638 | addi r11,r11,32 | ||
3639 | lvx v30,r10,$sp | ||
3640 | lvx v31,r11,$sp | ||
3641 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
3642 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
3643 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
3644 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
3645 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
3646 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
3647 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | ||
3648 | blr | ||
3649 | .long 0 | ||
3650 | .byte 0,12,0x04,1,0x80,6,6,0 | ||
3651 | .long 0 | ||
3652 | |||
3653 | .align 5 | ||
3654 | _aesp8_xts_dec5x: | ||
3655 | vncipher $out0,$out0,v24 | ||
3656 | vncipher $out1,$out1,v24 | ||
3657 | vncipher $out2,$out2,v24 | ||
3658 | vncipher $out3,$out3,v24 | ||
3659 | vncipher $out4,$out4,v24 | ||
3660 | lvx v24,$x20,$key_ # round[3] | ||
3661 | addi $key_,$key_,0x20 | ||
3662 | |||
3663 | vncipher $out0,$out0,v25 | ||
3664 | vncipher $out1,$out1,v25 | ||
3665 | vncipher $out2,$out2,v25 | ||
3666 | vncipher $out3,$out3,v25 | ||
3667 | vncipher $out4,$out4,v25 | ||
3668 | lvx v25,$x10,$key_ # round[4] | ||
3669 | bdnz _aesp8_xts_dec5x | ||
3670 | |||
3671 | subi r0,$taillen,1 | ||
3672 | vncipher $out0,$out0,v24 | ||
3673 | vncipher $out1,$out1,v24 | ||
3674 | vncipher $out2,$out2,v24 | ||
3675 | vncipher $out3,$out3,v24 | ||
3676 | vncipher $out4,$out4,v24 | ||
3677 | |||
3678 | andi. r0,r0,16 | ||
3679 | cmpwi $taillen,0 | ||
3680 | vncipher $out0,$out0,v25 | ||
3681 | vncipher $out1,$out1,v25 | ||
3682 | vncipher $out2,$out2,v25 | ||
3683 | vncipher $out3,$out3,v25 | ||
3684 | vncipher $out4,$out4,v25 | ||
3685 | vxor $twk0,$twk0,v31 | ||
3686 | |||
3687 | sub $inp,$inp,r0 | ||
3688 | vncipher $out0,$out0,v26 | ||
3689 | vncipher $out1,$out1,v26 | ||
3690 | vncipher $out2,$out2,v26 | ||
3691 | vncipher $out3,$out3,v26 | ||
3692 | vncipher $out4,$out4,v26 | ||
3693 | vxor $in1,$twk1,v31 | ||
3694 | |||
3695 | vncipher $out0,$out0,v27 | ||
3696 | lvx_u $in0,0,$inp | ||
3697 | vncipher $out1,$out1,v27 | ||
3698 | vncipher $out2,$out2,v27 | ||
3699 | vncipher $out3,$out3,v27 | ||
3700 | vncipher $out4,$out4,v27 | ||
3701 | vxor $in2,$twk2,v31 | ||
3702 | |||
3703 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
3704 | vncipher $out0,$out0,v28 | ||
3705 | vncipher $out1,$out1,v28 | ||
3706 | vncipher $out2,$out2,v28 | ||
3707 | vncipher $out3,$out3,v28 | ||
3708 | vncipher $out4,$out4,v28 | ||
3709 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
3710 | vxor $in3,$twk3,v31 | ||
3711 | |||
3712 | vncipher $out0,$out0,v29 | ||
3713 | le?vperm $in0,$in0,$in0,$leperm | ||
3714 | vncipher $out1,$out1,v29 | ||
3715 | vncipher $out2,$out2,v29 | ||
3716 | vncipher $out3,$out3,v29 | ||
3717 | vncipher $out4,$out4,v29 | ||
3718 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
3719 | vxor $in4,$twk4,v31 | ||
3720 | |||
3721 | vncipher $out0,$out0,v30 | ||
3722 | vncipher $out1,$out1,v30 | ||
3723 | vncipher $out2,$out2,v30 | ||
3724 | vncipher $out3,$out3,v30 | ||
3725 | vncipher $out4,$out4,v30 | ||
3726 | |||
3727 | vncipherlast $out0,$out0,$twk0 | ||
3728 | vncipherlast $out1,$out1,$in1 | ||
3729 | vncipherlast $out2,$out2,$in2 | ||
3730 | vncipherlast $out3,$out3,$in3 | ||
3731 | vncipherlast $out4,$out4,$in4 | ||
3732 | mtctr $rounds | ||
3733 | blr | ||
3734 | .long 0 | ||
3735 | .byte 0,12,0x14,0,0,0,0,0 | ||
3736 | ___ | ||
3737 | }} }}} | ||
3738 | |||
1878 | my $consts=1; | 3739 | my $consts=1; |
1879 | foreach(split("\n",$code)) { | 3740 | foreach(split("\n",$code)) { |
1880 | s/\`([^\`]*)\`/eval($1)/geo; | 3741 | s/\`([^\`]*)\`/eval($1)/geo; |
@@ -1898,7 +3759,7 @@ foreach(split("\n",$code)) { | |||
1898 | if ($flavour =~ /le$/o) { | 3759 | if ($flavour =~ /le$/o) { |
1899 | SWITCH: for($conv) { | 3760 | SWITCH: for($conv) { |
1900 | /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; | 3761 | /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; |
1901 | /\?rev/ && do { @bytes=reverse(@bytes); last; }; | 3762 | /\?rev/ && do { @bytes=reverse(@bytes); last; }; |
1902 | } | 3763 | } |
1903 | } | 3764 | } |
1904 | 3765 | ||