1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Vinodh Gopal
* Erdinc Ozturk
* Deniz Karakoyunlu
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
DATA .req v0
SHASH .req v1
IN1 .req v2
T1 .req v2
T2 .req v3
T3 .req v4
VZR .req v5
.text
.arch armv8-a+crypto
/*
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update)
ld1 {DATA.16b}, [x1]
ld1 {SHASH.16b}, [x3]
eor VZR.16b, VZR.16b, VZR.16b
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {IN1.2d}, [x4]
b 1f
0: ld1 {IN1.2d}, [x2], #16
sub w0, w0, #1
1: ext IN1.16b, IN1.16b, IN1.16b, #8
CPU_LE( rev64 IN1.16b, IN1.16b )
eor DATA.16b, DATA.16b, IN1.16b
/* multiply DATA by SHASH in GF(2^128) */
ext T2.16b, DATA.16b, DATA.16b, #8
ext T3.16b, SHASH.16b, SHASH.16b, #8
eor T2.16b, T2.16b, DATA.16b
eor T3.16b, T3.16b, SHASH.16b
pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
eor T2.16b, T2.16b, DATA.16b
ext T3.16b, VZR.16b, T2.16b, #8
ext T2.16b, T2.16b, VZR.16b, #8
eor DATA.16b, DATA.16b, T3.16b
eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
// carry-less multiplication
/* first phase of the reduction */
shl T3.2d, DATA.2d, #1
eor T3.16b, T3.16b, DATA.16b
shl T3.2d, T3.2d, #5
eor T3.16b, T3.16b, DATA.16b
shl T3.2d, T3.2d, #57
ext T2.16b, VZR.16b, T3.16b, #8
ext T3.16b, T3.16b, VZR.16b, #8
eor DATA.16b, DATA.16b, T2.16b
eor T1.16b, T1.16b, T3.16b
/* second phase of the reduction */
ushr T2.2d, DATA.2d, #5
eor T2.16b, T2.16b, DATA.16b
ushr T2.2d, T2.2d, #1
eor T2.16b, T2.16b, DATA.16b
ushr T2.2d, T2.2d, #1
eor T1.16b, T1.16b, T2.16b
eor DATA.16b, DATA.16b, T1.16b
cbnz w0, 0b
st1 {DATA.16b}, [x1]
ret
ENDPROC(pmull_ghash_update)
|