aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/crypto/sha1-powerpc-asm.S
blob: 125e16520061289aff815417fd7aa18ec41e3df7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
/*
 * SHA-1 implementation for PowerPC.
 *
 * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
 */

#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>

/*
 * We roll the registers for T, A, B, C, D, E around on each
 * iteration; T on iteration t is A on iteration t+1, and so on.
 * We use registers 7 - 12 for this.
 */
#define RT(t)	((((t)+5)%6)+7)
#define RA(t)	((((t)+4)%6)+7)
#define RB(t)	((((t)+3)%6)+7)
#define RC(t)	((((t)+2)%6)+7)
#define RD(t)	((((t)+1)%6)+7)
#define RE(t)	((((t)+0)%6)+7)

/* We use registers 16 - 31 for the W values */
#define W(t)	(((t)%16)+16)

#define LOADW(t)				\
	lwz	W(t),(t)*4(r4)

#define STEPD0_LOAD(t)				\
	andc	r0,RD(t),RB(t);		\
	and	r6,RB(t),RC(t);		\
	rotlwi	RT(t),RA(t),5;			\
	or	r6,r6,r0;			\
	add	r0,RE(t),r15;			\
	add	RT(t),RT(t),r6;		\
	add	r14,r0,W(t);			\
	lwz	W((t)+4),((t)+4)*4(r4);	\
	rotlwi	RB(t),RB(t),30;			\
	add	RT(t),RT(t),r14

#define STEPD0_UPDATE(t)			\
	and	r6,RB(t),RC(t);		\
	andc	r0,RD(t),RB(t);		\
	rotlwi	RT(t),RA(t),5;			\
	rotlwi	RB(t),RB(t),30;			\
	or	r6,r6,r0;			\
	add	r0,RE(t),r15;			\
	xor	r5,W((t)+4-3),W((t)+4-8);		\
	add	RT(t),RT(t),r6;		\
	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
	add	r0,r0,W(t);			\
	xor	W((t)+4),W((t)+4),r5;			\
	add	RT(t),RT(t),r0;		\
	rotlwi	W((t)+4),W((t)+4),1

#define STEPD1(t)				\
	xor	r6,RB(t),RC(t);		\
	rotlwi	RT(t),RA(t),5;			\
	rotlwi	RB(t),RB(t),30;			\
	xor	r6,r6,RD(t);			\
	add	r0,RE(t),r15;			\
	add	RT(t),RT(t),r6;		\
	add	r0,r0,W(t);			\
	add	RT(t),RT(t),r0

#define STEPD1_UPDATE(t)				\
	xor	r6,RB(t),RC(t);		\
	rotlwi	RT(t),RA(t),5;			\
	rotlwi	RB(t),RB(t),30;			\
	xor	r6,r6,RD(t);			\
	add	r0,RE(t),r15;			\
	xor	r5,W((t)+4-3),W((t)+4-8);		\
	add	RT(t),RT(t),r6;		\
	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
	add	r0,r0,W(t);			\
	xor	W((t)+4),W((t)+4),r5;			\
	add	RT(t),RT(t),r0;		\
	rotlwi	W((t)+4),W((t)+4),1

#define STEPD2_UPDATE(t)			\
	and	r6,RB(t),RC(t);		\
	and	r0,RB(t),RD(t);		\
	rotlwi	RT(t),RA(t),5;			\
	or	r6,r6,r0;			\
	rotlwi	RB(t),RB(t),30;			\
	and	r0,RC(t),RD(t);		\
	xor	r5,W((t)+4-3),W((t)+4-8);	\
	or	r6,r6,r0;			\
	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
	add	r0,RE(t),r15;			\
	add	RT(t),RT(t),r6;		\
	add	r0,r0,W(t);			\
	xor	W((t)+4),W((t)+4),r5;		\
	add	RT(t),RT(t),r0;		\
	rotlwi	W((t)+4),W((t)+4),1

#define STEP0LD4(t)				\
	STEPD0_LOAD(t);				\
	STEPD0_LOAD((t)+1);			\
	STEPD0_LOAD((t)+2);			\
	STEPD0_LOAD((t)+3)

#define STEPUP4(t, fn)				\
	STEP##fn##_UPDATE(t);			\
	STEP##fn##_UPDATE((t)+1);		\
	STEP##fn##_UPDATE((t)+2);		\
	STEP##fn##_UPDATE((t)+3)

#define STEPUP20(t, fn)				\
	STEPUP4(t, fn);				\
	STEPUP4((t)+4, fn);			\
	STEPUP4((t)+8, fn);			\
	STEPUP4((t)+12, fn);			\
	STEPUP4((t)+16, fn)

_GLOBAL(powerpc_sha_transform)
	PPC_STLU r1,-INT_FRAME_SIZE(r1)
	SAVE_8GPRS(14, r1)
	SAVE_10GPRS(22, r1)

	/* Load up A - E */
	lwz	RA(0),0(r3)	/* A */
	lwz	RB(0),4(r3)	/* B */
	lwz	RC(0),8(r3)	/* C */
	lwz	RD(0),12(r3)	/* D */
	lwz	RE(0),16(r3)	/* E */

	LOADW(0)
	LOADW(1)
	LOADW(2)
	LOADW(3)

	lis	r15,0x5a82	/* K0-19 */
	ori	r15,r15,0x7999
	STEP0LD4(0)
	STEP0LD4(4)
	STEP0LD4(8)
	STEPUP4(12, D0)
	STEPUP4(16, D0)

	lis	r15,0x6ed9	/* K20-39 */
	ori	r15,r15,0xeba1
	STEPUP20(20, D1)

	lis	r15,0x8f1b	/* K40-59 */
	ori	r15,r15,0xbcdc
	STEPUP20(40, D2)

	lis	r15,0xca62	/* K60-79 */
	ori	r15,r15,0xc1d6
	STEPUP4(60, D1)
	STEPUP4(64, D1)
	STEPUP4(68, D1)
	STEPUP4(72, D1)
	lwz	r20,16(r3)
	STEPD1(76)
	lwz	r19,12(r3)
	STEPD1(77)
	lwz	r18,8(r3)
	STEPD1(78)
	lwz	r17,4(r3)
	STEPD1(79)

	lwz	r16,0(r3)
	add	r20,RE(80),r20
	add	RD(0),RD(80),r19
	add	RC(0),RC(80),r18
	add	RB(0),RB(80),r17
	add	RA(0),RA(80),r16
	mr	RE(0),r20
	stw	RA(0),0(r3)
	stw	RB(0),4(r3)
	stw	RC(0),8(r3)
	stw	RD(0),12(r3)
	stw	RE(0),16(r3)

	REST_8GPRS(14, r1)
	REST_10GPRS(22, r1)
	addi	r1,r1,INT_FRAME_SIZE
	blr