summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/m68k/configs/amiga_defconfig1
-rw-r--r--arch/m68k/configs/apollo_defconfig1
-rw-r--r--arch/m68k/configs/atari_defconfig1
-rw-r--r--arch/m68k/configs/bvme6000_defconfig1
-rw-r--r--arch/m68k/configs/hp300_defconfig1
-rw-r--r--arch/m68k/configs/mac_defconfig1
-rw-r--r--arch/m68k/configs/multi_defconfig1
-rw-r--r--arch/m68k/configs/mvme147_defconfig1
-rw-r--r--arch/m68k/configs/mvme16x_defconfig1
-rw-r--r--arch/m68k/configs/q40_defconfig1
-rw-r--r--arch/m68k/configs/sun3_defconfig1
-rw-r--r--arch/m68k/configs/sun3x_defconfig1
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/performance_defconfig1
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/sha1-mb/Makefile14
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb.c1011
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_ctx.h134
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr.h110
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S287
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S304
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c64
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S209
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_x8_avx2.S492
-rw-r--r--arch/x86/crypto/sha256-mb/Makefile14
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb.c1013
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_ctx.h134
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr.h108
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S304
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S307
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c65
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S214
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_x8_avx2.S598
-rw-r--r--arch/x86/crypto/sha512-mb/Makefile12
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c1047
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_ctx.h128
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr.h104
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S281
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S297
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c69
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S224
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_x4_avx2.S531
-rw-r--r--crypto/Kconfig62
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/mcryptd.c675
-rw-r--r--include/crypto/mcryptd.h114
47 files changed, 0 insertions, 8952 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 9ad052aeac39..9c91490baa3d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7503,14 +7503,6 @@ S: Supported
7503F: drivers/infiniband/hw/i40iw/ 7503F: drivers/infiniband/hw/i40iw/
7504F: include/uapi/rdma/i40iw-abi.h 7504F: include/uapi/rdma/i40iw-abi.h
7505 7505
7506INTEL SHA MULTIBUFFER DRIVER
7507M: Megha Dey <megha.dey@linux.intel.com>
7508R: Tim Chen <tim.c.chen@linux.intel.com>
7509L: linux-crypto@vger.kernel.org
7510S: Supported
7511F: arch/x86/crypto/sha*-mb/
7512F: crypto/mcryptd.c
7513
7514INTEL TELEMETRY DRIVER 7506INTEL TELEMETRY DRIVER
7515M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com> 7507M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
7516L: platform-driver-x86@vger.kernel.org 7508L: platform-driver-x86@vger.kernel.org
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 93a3c3c0238c..85904b73e261 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
621CONFIG_CRYPTO_MANAGER=y 621CONFIG_CRYPTO_MANAGER=y
622CONFIG_CRYPTO_USER=m 622CONFIG_CRYPTO_USER=m
623CONFIG_CRYPTO_CRYPTD=m 623CONFIG_CRYPTO_CRYPTD=m
624CONFIG_CRYPTO_MCRYPTD=m
625CONFIG_CRYPTO_TEST=m 624CONFIG_CRYPTO_TEST=m
626CONFIG_CRYPTO_CHACHA20POLY1305=m 625CONFIG_CRYPTO_CHACHA20POLY1305=m
627CONFIG_CRYPTO_AEGIS128=m 626CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index e3d0efd6397d..9b3818bbb68b 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
578CONFIG_CRYPTO_MANAGER=y 578CONFIG_CRYPTO_MANAGER=y
579CONFIG_CRYPTO_USER=m 579CONFIG_CRYPTO_USER=m
580CONFIG_CRYPTO_CRYPTD=m 580CONFIG_CRYPTO_CRYPTD=m
581CONFIG_CRYPTO_MCRYPTD=m
582CONFIG_CRYPTO_TEST=m 581CONFIG_CRYPTO_TEST=m
583CONFIG_CRYPTO_CHACHA20POLY1305=m 582CONFIG_CRYPTO_CHACHA20POLY1305=m
584CONFIG_CRYPTO_AEGIS128=m 583CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 75ac0c76e884..769677809945 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
599CONFIG_CRYPTO_MANAGER=y 599CONFIG_CRYPTO_MANAGER=y
600CONFIG_CRYPTO_USER=m 600CONFIG_CRYPTO_USER=m
601CONFIG_CRYPTO_CRYPTD=m 601CONFIG_CRYPTO_CRYPTD=m
602CONFIG_CRYPTO_MCRYPTD=m
603CONFIG_CRYPTO_TEST=m 602CONFIG_CRYPTO_TEST=m
604CONFIG_CRYPTO_CHACHA20POLY1305=m 603CONFIG_CRYPTO_CHACHA20POLY1305=m
605CONFIG_CRYPTO_AEGIS128=m 604CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index c6e492700188..7dd264ddf2ea 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
570CONFIG_CRYPTO_MANAGER=y 570CONFIG_CRYPTO_MANAGER=y
571CONFIG_CRYPTO_USER=m 571CONFIG_CRYPTO_USER=m
572CONFIG_CRYPTO_CRYPTD=m 572CONFIG_CRYPTO_CRYPTD=m
573CONFIG_CRYPTO_MCRYPTD=m
574CONFIG_CRYPTO_TEST=m 573CONFIG_CRYPTO_TEST=m
575CONFIG_CRYPTO_CHACHA20POLY1305=m 574CONFIG_CRYPTO_CHACHA20POLY1305=m
576CONFIG_CRYPTO_AEGIS128=m 575CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index b00d1c477432..515f7439c755 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
580CONFIG_CRYPTO_MANAGER=y 580CONFIG_CRYPTO_MANAGER=y
581CONFIG_CRYPTO_USER=m 581CONFIG_CRYPTO_USER=m
582CONFIG_CRYPTO_CRYPTD=m 582CONFIG_CRYPTO_CRYPTD=m
583CONFIG_CRYPTO_MCRYPTD=m
584CONFIG_CRYPTO_TEST=m 583CONFIG_CRYPTO_TEST=m
585CONFIG_CRYPTO_CHACHA20POLY1305=m 584CONFIG_CRYPTO_CHACHA20POLY1305=m
586CONFIG_CRYPTO_AEGIS128=m 585CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 85cac3770d89..8e1038ceb407 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
602CONFIG_CRYPTO_MANAGER=y 602CONFIG_CRYPTO_MANAGER=y
603CONFIG_CRYPTO_USER=m 603CONFIG_CRYPTO_USER=m
604CONFIG_CRYPTO_CRYPTD=m 604CONFIG_CRYPTO_CRYPTD=m
605CONFIG_CRYPTO_MCRYPTD=m
606CONFIG_CRYPTO_TEST=m 605CONFIG_CRYPTO_TEST=m
607CONFIG_CRYPTO_CHACHA20POLY1305=m 606CONFIG_CRYPTO_CHACHA20POLY1305=m
608CONFIG_CRYPTO_AEGIS128=m 607CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index b3a5d1e99d27..62c8aaa15cc7 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
684CONFIG_CRYPTO_MANAGER=y 684CONFIG_CRYPTO_MANAGER=y
685CONFIG_CRYPTO_USER=m 685CONFIG_CRYPTO_USER=m
686CONFIG_CRYPTO_CRYPTD=m 686CONFIG_CRYPTO_CRYPTD=m
687CONFIG_CRYPTO_MCRYPTD=m
688CONFIG_CRYPTO_TEST=m 687CONFIG_CRYPTO_TEST=m
689CONFIG_CRYPTO_CHACHA20POLY1305=m 688CONFIG_CRYPTO_CHACHA20POLY1305=m
690CONFIG_CRYPTO_AEGIS128=m 689CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 0ca22608453f..733973f91297 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
570CONFIG_CRYPTO_MANAGER=y 570CONFIG_CRYPTO_MANAGER=y
571CONFIG_CRYPTO_USER=m 571CONFIG_CRYPTO_USER=m
572CONFIG_CRYPTO_CRYPTD=m 572CONFIG_CRYPTO_CRYPTD=m
573CONFIG_CRYPTO_MCRYPTD=m
574CONFIG_CRYPTO_TEST=m 573CONFIG_CRYPTO_TEST=m
575CONFIG_CRYPTO_CHACHA20POLY1305=m 574CONFIG_CRYPTO_CHACHA20POLY1305=m
576CONFIG_CRYPTO_AEGIS128=m 575CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 8e3d10d12d9c..fee30cc9ac16 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
570CONFIG_CRYPTO_MANAGER=y 570CONFIG_CRYPTO_MANAGER=y
571CONFIG_CRYPTO_USER=m 571CONFIG_CRYPTO_USER=m
572CONFIG_CRYPTO_CRYPTD=m 572CONFIG_CRYPTO_CRYPTD=m
573CONFIG_CRYPTO_MCRYPTD=m
574CONFIG_CRYPTO_TEST=m 573CONFIG_CRYPTO_TEST=m
575CONFIG_CRYPTO_CHACHA20POLY1305=m 574CONFIG_CRYPTO_CHACHA20POLY1305=m
576CONFIG_CRYPTO_AEGIS128=m 575CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index ff7e653ec7fa..eebf9c9088e7 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
593CONFIG_CRYPTO_MANAGER=y 593CONFIG_CRYPTO_MANAGER=y
594CONFIG_CRYPTO_USER=m 594CONFIG_CRYPTO_USER=m
595CONFIG_CRYPTO_CRYPTD=m 595CONFIG_CRYPTO_CRYPTD=m
596CONFIG_CRYPTO_MCRYPTD=m
597CONFIG_CRYPTO_TEST=m 596CONFIG_CRYPTO_TEST=m
598CONFIG_CRYPTO_CHACHA20POLY1305=m 597CONFIG_CRYPTO_CHACHA20POLY1305=m
599CONFIG_CRYPTO_AEGIS128=m 598CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 612cf46f6d0c..dabc54318c09 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
571CONFIG_CRYPTO_MANAGER=y 571CONFIG_CRYPTO_MANAGER=y
572CONFIG_CRYPTO_USER=m 572CONFIG_CRYPTO_USER=m
573CONFIG_CRYPTO_CRYPTD=m 573CONFIG_CRYPTO_CRYPTD=m
574CONFIG_CRYPTO_MCRYPTD=m
575CONFIG_CRYPTO_TEST=m 574CONFIG_CRYPTO_TEST=m
576CONFIG_CRYPTO_CHACHA20POLY1305=m 575CONFIG_CRYPTO_CHACHA20POLY1305=m
577CONFIG_CRYPTO_AEGIS128=m 576CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a6a7bb6dc3fd..0d9a5c2a311a 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
572CONFIG_CRYPTO_MANAGER=y 572CONFIG_CRYPTO_MANAGER=y
573CONFIG_CRYPTO_USER=m 573CONFIG_CRYPTO_USER=m
574CONFIG_CRYPTO_CRYPTD=m 574CONFIG_CRYPTO_CRYPTD=m
575CONFIG_CRYPTO_MCRYPTD=m
576CONFIG_CRYPTO_TEST=m 575CONFIG_CRYPTO_TEST=m
577CONFIG_CRYPTO_CHACHA20POLY1305=m 576CONFIG_CRYPTO_CHACHA20POLY1305=m
578CONFIG_CRYPTO_AEGIS128=m 577CONFIG_CRYPTO_AEGIS128=m
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 941d8cc6c9f5..259d1698ac50 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
668# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set 668# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
669CONFIG_CRYPTO_PCRYPT=m 669CONFIG_CRYPTO_PCRYPT=m
670CONFIG_CRYPTO_CRYPTD=m 670CONFIG_CRYPTO_CRYPTD=m
671CONFIG_CRYPTO_MCRYPTD=m
672CONFIG_CRYPTO_TEST=m 671CONFIG_CRYPTO_TEST=m
673CONFIG_CRYPTO_CHACHA20POLY1305=m 672CONFIG_CRYPTO_CHACHA20POLY1305=m
674CONFIG_CRYPTO_LRW=m 673CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index eb6f75f24208..37fd60c20e22 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
610# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set 610# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
611CONFIG_CRYPTO_PCRYPT=m 611CONFIG_CRYPTO_PCRYPT=m
612CONFIG_CRYPTO_CRYPTD=m 612CONFIG_CRYPTO_CRYPTD=m
613CONFIG_CRYPTO_MCRYPTD=m
614CONFIG_CRYPTO_TEST=m 613CONFIG_CRYPTO_TEST=m
615CONFIG_CRYPTO_CHACHA20POLY1305=m 614CONFIG_CRYPTO_CHACHA20POLY1305=m
616CONFIG_CRYPTO_LRW=m 615CONFIG_CRYPTO_LRW=m
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a450ad573dcb..9edfa5469f9f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -60,9 +60,6 @@ endif
60ifeq ($(avx2_supported),yes) 60ifeq ($(avx2_supported),yes)
61 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o 61 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
62 obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o 62 obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
63 obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
64 obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
65 obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
66 63
67 obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o 64 obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
68endif 65endif
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
deleted file mode 100644
index 815ded3ba90e..000000000000
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Arch-specific CryptoAPI modules.
4#
5
6OBJECT_FILES_NON_STANDARD := y
7
8avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
9 $(comma)4)$(comma)%ymm2,yes,no)
10ifeq ($(avx2_supported),yes)
11 obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
12 sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
13 sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
14endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
deleted file mode 100644
index b93805664c1d..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ /dev/null
@@ -1,1011 +0,0 @@
1/*
2 * Multi buffer SHA1 algorithm Glue Code
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Tim Chen <tim.c.chen@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2014 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56#include <crypto/internal/hash.h>
57#include <linux/init.h>
58#include <linux/module.h>
59#include <linux/mm.h>
60#include <linux/cryptohash.h>
61#include <linux/types.h>
62#include <linux/list.h>
63#include <crypto/scatterwalk.h>
64#include <crypto/sha.h>
65#include <crypto/mcryptd.h>
66#include <crypto/crypto_wq.h>
67#include <asm/byteorder.h>
68#include <linux/hardirq.h>
69#include <asm/fpu/api.h>
70#include "sha1_mb_ctx.h"
71
72#define FLUSH_INTERVAL 1000 /* in usec */
73
74static struct mcryptd_alg_state sha1_mb_alg_state;
75
76struct sha1_mb_ctx {
77 struct mcryptd_ahash *mcryptd_tfm;
78};
79
80static inline struct mcryptd_hash_request_ctx
81 *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
82{
83 struct ahash_request *areq;
84
85 areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
86 return container_of(areq, struct mcryptd_hash_request_ctx, areq);
87}
88
89static inline struct ahash_request
90 *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
91{
92 return container_of((void *) ctx, struct ahash_request, __ctx);
93}
94
95static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
96 struct ahash_request *areq)
97{
98 rctx->flag = HASH_UPDATE;
99}
100
101static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
102static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
103 (struct sha1_mb_mgr *state, struct job_sha1 *job);
104static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
105 (struct sha1_mb_mgr *state);
106static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
107 (struct sha1_mb_mgr *state);
108
109static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
110 uint64_t total_len)
111{
112 uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
113
114 memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
115 padblock[i] = 0x80;
116
117 i += ((SHA1_BLOCK_SIZE - 1) &
118 (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
119 + 1 + SHA1_PADLENGTHFIELD_SIZE;
120
121#if SHA1_PADLENGTHFIELD_SIZE == 16
122 *((uint64_t *) &padblock[i - 16]) = 0;
123#endif
124
125 *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
126
127 /* Number of extra blocks to hash */
128 return i >> SHA1_LOG2_BLOCK_SIZE;
129}
130
131static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
132 struct sha1_hash_ctx *ctx)
133{
134 while (ctx) {
135 if (ctx->status & HASH_CTX_STS_COMPLETE) {
136 /* Clear PROCESSING bit */
137 ctx->status = HASH_CTX_STS_COMPLETE;
138 return ctx;
139 }
140
141 /*
142 * If the extra blocks are empty, begin hashing what remains
143 * in the user's buffer.
144 */
145 if (ctx->partial_block_buffer_length == 0 &&
146 ctx->incoming_buffer_length) {
147
148 const void *buffer = ctx->incoming_buffer;
149 uint32_t len = ctx->incoming_buffer_length;
150 uint32_t copy_len;
151
152 /*
153 * Only entire blocks can be hashed.
154 * Copy remainder to extra blocks buffer.
155 */
156 copy_len = len & (SHA1_BLOCK_SIZE-1);
157
158 if (copy_len) {
159 len -= copy_len;
160 memcpy(ctx->partial_block_buffer,
161 ((const char *) buffer + len),
162 copy_len);
163 ctx->partial_block_buffer_length = copy_len;
164 }
165
166 ctx->incoming_buffer_length = 0;
167
168 /* len should be a multiple of the block size now */
169 assert((len % SHA1_BLOCK_SIZE) == 0);
170
171 /* Set len to the number of blocks to be hashed */
172 len >>= SHA1_LOG2_BLOCK_SIZE;
173
174 if (len) {
175
176 ctx->job.buffer = (uint8_t *) buffer;
177 ctx->job.len = len;
178 ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
179 &ctx->job);
180 continue;
181 }
182 }
183
184 /*
185 * If the extra blocks are not empty, then we are
186 * either on the last block(s) or we need more
187 * user input before continuing.
188 */
189 if (ctx->status & HASH_CTX_STS_LAST) {
190
191 uint8_t *buf = ctx->partial_block_buffer;
192 uint32_t n_extra_blocks =
193 sha1_pad(buf, ctx->total_length);
194
195 ctx->status = (HASH_CTX_STS_PROCESSING |
196 HASH_CTX_STS_COMPLETE);
197 ctx->job.buffer = buf;
198 ctx->job.len = (uint32_t) n_extra_blocks;
199 ctx = (struct sha1_hash_ctx *)
200 sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
201 continue;
202 }
203
204 ctx->status = HASH_CTX_STS_IDLE;
205 return ctx;
206 }
207
208 return NULL;
209}
210
211static struct sha1_hash_ctx
212 *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
213{
214 /*
215 * If get_comp_job returns NULL, there are no jobs complete.
216 * If get_comp_job returns a job, verify that it is safe to return to
217 * the user.
218 * If it is not ready, resubmit the job to finish processing.
219 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
220 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
221 * still need processing.
222 */
223 struct sha1_hash_ctx *ctx;
224
225 ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
226 return sha1_ctx_mgr_resubmit(mgr, ctx);
227}
228
229static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
230{
231 sha1_job_mgr_init(&mgr->mgr);
232}
233
234static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
235 struct sha1_hash_ctx *ctx,
236 const void *buffer,
237 uint32_t len,
238 int flags)
239{
240 if (flags & ~(HASH_UPDATE | HASH_LAST)) {
241 /* User should not pass anything other than UPDATE or LAST */
242 ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
243 return ctx;
244 }
245
246 if (ctx->status & HASH_CTX_STS_PROCESSING) {
247 /* Cannot submit to a currently processing job. */
248 ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
249 return ctx;
250 }
251
252 if (ctx->status & HASH_CTX_STS_COMPLETE) {
253 /* Cannot update a finished job. */
254 ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
255 return ctx;
256 }
257
258 /*
259 * If we made it here, there were no errors during this call to
260 * submit
261 */
262 ctx->error = HASH_CTX_ERROR_NONE;
263
264 /* Store buffer ptr info from user */
265 ctx->incoming_buffer = buffer;
266 ctx->incoming_buffer_length = len;
267
268 /*
269 * Store the user's request flags and mark this ctx as currently
270 * being processed.
271 */
272 ctx->status = (flags & HASH_LAST) ?
273 (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
274 HASH_CTX_STS_PROCESSING;
275
276 /* Advance byte counter */
277 ctx->total_length += len;
278
279 /*
280 * If there is anything currently buffered in the extra blocks,
281 * append to it until it contains a whole block.
282 * Or if the user's buffer contains less than a whole block,
283 * append as much as possible to the extra block.
284 */
285 if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
286 /*
287 * Compute how many bytes to copy from user buffer into
288 * extra block
289 */
290 uint32_t copy_len = SHA1_BLOCK_SIZE -
291 ctx->partial_block_buffer_length;
292 if (len < copy_len)
293 copy_len = len;
294
295 if (copy_len) {
296 /* Copy and update relevant pointers and counters */
297 memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
298 buffer, copy_len);
299
300 ctx->partial_block_buffer_length += copy_len;
301 ctx->incoming_buffer = (const void *)
302 ((const char *)buffer + copy_len);
303 ctx->incoming_buffer_length = len - copy_len;
304 }
305
306 /*
307 * The extra block should never contain more than 1 block
308 * here
309 */
310 assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
311
312 /*
313 * If the extra block buffer contains exactly 1 block, it can
314 * be hashed.
315 */
316 if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
317 ctx->partial_block_buffer_length = 0;
318
319 ctx->job.buffer = ctx->partial_block_buffer;
320 ctx->job.len = 1;
321 ctx = (struct sha1_hash_ctx *)
322 sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
323 }
324 }
325
326 return sha1_ctx_mgr_resubmit(mgr, ctx);
327}
328
329static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
330{
331 struct sha1_hash_ctx *ctx;
332
333 while (1) {
334 ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
335
336 /* If flush returned 0, there are no more jobs in flight. */
337 if (!ctx)
338 return NULL;
339
340 /*
341 * If flush returned a job, resubmit the job to finish
342 * processing.
343 */
344 ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
345
346 /*
347 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
348 * returned. Otherwise, all jobs currently being managed by the
349 * sha1_ctx_mgr still need processing. Loop.
350 */
351 if (ctx)
352 return ctx;
353 }
354}
355
356static int sha1_mb_init(struct ahash_request *areq)
357{
358 struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
359
360 hash_ctx_init(sctx);
361 sctx->job.result_digest[0] = SHA1_H0;
362 sctx->job.result_digest[1] = SHA1_H1;
363 sctx->job.result_digest[2] = SHA1_H2;
364 sctx->job.result_digest[3] = SHA1_H3;
365 sctx->job.result_digest[4] = SHA1_H4;
366 sctx->total_length = 0;
367 sctx->partial_block_buffer_length = 0;
368 sctx->status = HASH_CTX_STS_IDLE;
369
370 return 0;
371}
372
373static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
374{
375 int i;
376 struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
377 __be32 *dst = (__be32 *) rctx->out;
378
379 for (i = 0; i < 5; ++i)
380 dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
381
382 return 0;
383}
384
385static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
386 struct mcryptd_alg_cstate *cstate, bool flush)
387{
388 int flag = HASH_UPDATE;
389 int nbytes, err = 0;
390 struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
391 struct sha1_hash_ctx *sha_ctx;
392
393 /* more work ? */
394 while (!(rctx->flag & HASH_DONE)) {
395 nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
396 if (nbytes < 0) {
397 err = nbytes;
398 goto out;
399 }
400 /* check if the walk is done */
401 if (crypto_ahash_walk_last(&rctx->walk)) {
402 rctx->flag |= HASH_DONE;
403 if (rctx->flag & HASH_FINAL)
404 flag |= HASH_LAST;
405
406 }
407 sha_ctx = (struct sha1_hash_ctx *)
408 ahash_request_ctx(&rctx->areq);
409 kernel_fpu_begin();
410 sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
411 rctx->walk.data, nbytes, flag);
412 if (!sha_ctx) {
413 if (flush)
414 sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
415 }
416 kernel_fpu_end();
417 if (sha_ctx)
418 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
419 else {
420 rctx = NULL;
421 goto out;
422 }
423 }
424
425 /* copy the results */
426 if (rctx->flag & HASH_FINAL)
427 sha1_mb_set_results(rctx);
428
429out:
430 *ret_rctx = rctx;
431 return err;
432}
433
434static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
435 struct mcryptd_alg_cstate *cstate,
436 int err)
437{
438 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
439 struct sha1_hash_ctx *sha_ctx;
440 struct mcryptd_hash_request_ctx *req_ctx;
441 int ret;
442
443 /* remove from work list */
444 spin_lock(&cstate->work_lock);
445 list_del(&rctx->waiter);
446 spin_unlock(&cstate->work_lock);
447
448 if (irqs_disabled())
449 rctx->complete(&req->base, err);
450 else {
451 local_bh_disable();
452 rctx->complete(&req->base, err);
453 local_bh_enable();
454 }
455
456 /* check to see if there are other jobs that are done */
457 sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
458 while (sha_ctx) {
459 req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
460 ret = sha_finish_walk(&req_ctx, cstate, false);
461 if (req_ctx) {
462 spin_lock(&cstate->work_lock);
463 list_del(&req_ctx->waiter);
464 spin_unlock(&cstate->work_lock);
465
466 req = cast_mcryptd_ctx_to_req(req_ctx);
467 if (irqs_disabled())
468 req_ctx->complete(&req->base, ret);
469 else {
470 local_bh_disable();
471 req_ctx->complete(&req->base, ret);
472 local_bh_enable();
473 }
474 }
475 sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
476 }
477
478 return 0;
479}
480
481static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
482 struct mcryptd_alg_cstate *cstate)
483{
484 unsigned long next_flush;
485 unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
486
487 /* initialize tag */
488 rctx->tag.arrival = jiffies; /* tag the arrival time */
489 rctx->tag.seq_num = cstate->next_seq_num++;
490 next_flush = rctx->tag.arrival + delay;
491 rctx->tag.expire = next_flush;
492
493 spin_lock(&cstate->work_lock);
494 list_add_tail(&rctx->waiter, &cstate->work_list);
495 spin_unlock(&cstate->work_lock);
496
497 mcryptd_arm_flusher(cstate, delay);
498}
499
500static int sha1_mb_update(struct ahash_request *areq)
501{
502 struct mcryptd_hash_request_ctx *rctx =
503 container_of(areq, struct mcryptd_hash_request_ctx, areq);
504 struct mcryptd_alg_cstate *cstate =
505 this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
506
507 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
508 struct sha1_hash_ctx *sha_ctx;
509 int ret = 0, nbytes;
510
511
512 /* sanity check */
513 if (rctx->tag.cpu != smp_processor_id()) {
514 pr_err("mcryptd error: cpu clash\n");
515 goto done;
516 }
517
518 /* need to init context */
519 req_ctx_init(rctx, areq);
520
521 nbytes = crypto_ahash_walk_first(req, &rctx->walk);
522
523 if (nbytes < 0) {
524 ret = nbytes;
525 goto done;
526 }
527
528 if (crypto_ahash_walk_last(&rctx->walk))
529 rctx->flag |= HASH_DONE;
530
531 /* submit */
532 sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
533 sha1_mb_add_list(rctx, cstate);
534 kernel_fpu_begin();
535 sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
536 nbytes, HASH_UPDATE);
537 kernel_fpu_end();
538
539 /* check if anything is returned */
540 if (!sha_ctx)
541 return -EINPROGRESS;
542
543 if (sha_ctx->error) {
544 ret = sha_ctx->error;
545 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
546 goto done;
547 }
548
549 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
550 ret = sha_finish_walk(&rctx, cstate, false);
551
552 if (!rctx)
553 return -EINPROGRESS;
554done:
555 sha_complete_job(rctx, cstate, ret);
556 return ret;
557}
558
559static int sha1_mb_finup(struct ahash_request *areq)
560{
561 struct mcryptd_hash_request_ctx *rctx =
562 container_of(areq, struct mcryptd_hash_request_ctx, areq);
563 struct mcryptd_alg_cstate *cstate =
564 this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
565
566 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
567 struct sha1_hash_ctx *sha_ctx;
568 int ret = 0, flag = HASH_UPDATE, nbytes;
569
570 /* sanity check */
571 if (rctx->tag.cpu != smp_processor_id()) {
572 pr_err("mcryptd error: cpu clash\n");
573 goto done;
574 }
575
576 /* need to init context */
577 req_ctx_init(rctx, areq);
578
579 nbytes = crypto_ahash_walk_first(req, &rctx->walk);
580
581 if (nbytes < 0) {
582 ret = nbytes;
583 goto done;
584 }
585
586 if (crypto_ahash_walk_last(&rctx->walk)) {
587 rctx->flag |= HASH_DONE;
588 flag = HASH_LAST;
589 }
590
591 /* submit */
592 rctx->flag |= HASH_FINAL;
593 sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
594 sha1_mb_add_list(rctx, cstate);
595
596 kernel_fpu_begin();
597 sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
598 nbytes, flag);
599 kernel_fpu_end();
600
601 /* check if anything is returned */
602 if (!sha_ctx)
603 return -EINPROGRESS;
604
605 if (sha_ctx->error) {
606 ret = sha_ctx->error;
607 goto done;
608 }
609
610 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
611 ret = sha_finish_walk(&rctx, cstate, false);
612 if (!rctx)
613 return -EINPROGRESS;
614done:
615 sha_complete_job(rctx, cstate, ret);
616 return ret;
617}
618
619static int sha1_mb_final(struct ahash_request *areq)
620{
621 struct mcryptd_hash_request_ctx *rctx =
622 container_of(areq, struct mcryptd_hash_request_ctx, areq);
623 struct mcryptd_alg_cstate *cstate =
624 this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
625
626 struct sha1_hash_ctx *sha_ctx;
627 int ret = 0;
628 u8 data;
629
630 /* sanity check */
631 if (rctx->tag.cpu != smp_processor_id()) {
632 pr_err("mcryptd error: cpu clash\n");
633 goto done;
634 }
635
636 /* need to init context */
637 req_ctx_init(rctx, areq);
638
639 rctx->flag |= HASH_DONE | HASH_FINAL;
640
641 sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
642 /* flag HASH_FINAL and 0 data size */
643 sha1_mb_add_list(rctx, cstate);
644 kernel_fpu_begin();
645 sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
646 HASH_LAST);
647 kernel_fpu_end();
648
649 /* check if anything is returned */
650 if (!sha_ctx)
651 return -EINPROGRESS;
652
653 if (sha_ctx->error) {
654 ret = sha_ctx->error;
655 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
656 goto done;
657 }
658
659 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
660 ret = sha_finish_walk(&rctx, cstate, false);
661 if (!rctx)
662 return -EINPROGRESS;
663done:
664 sha_complete_job(rctx, cstate, ret);
665 return ret;
666}
667
668static int sha1_mb_export(struct ahash_request *areq, void *out)
669{
670 struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
671
672 memcpy(out, sctx, sizeof(*sctx));
673
674 return 0;
675}
676
677static int sha1_mb_import(struct ahash_request *areq, const void *in)
678{
679 struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
680
681 memcpy(sctx, in, sizeof(*sctx));
682
683 return 0;
684}
685
686static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
687{
688 struct mcryptd_ahash *mcryptd_tfm;
689 struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
690 struct mcryptd_hash_ctx *mctx;
691
692 mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
693 CRYPTO_ALG_INTERNAL,
694 CRYPTO_ALG_INTERNAL);
695 if (IS_ERR(mcryptd_tfm))
696 return PTR_ERR(mcryptd_tfm);
697 mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
698 mctx->alg_state = &sha1_mb_alg_state;
699 ctx->mcryptd_tfm = mcryptd_tfm;
700 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
701 sizeof(struct ahash_request) +
702 crypto_ahash_reqsize(&mcryptd_tfm->base));
703
704 return 0;
705}
706
707static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
708{
709 struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
710
711 mcryptd_free_ahash(ctx->mcryptd_tfm);
712}
713
714static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
715{
716 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
717 sizeof(struct ahash_request) +
718 sizeof(struct sha1_hash_ctx));
719
720 return 0;
721}
722
723static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
724{
725 struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
726
727 mcryptd_free_ahash(ctx->mcryptd_tfm);
728}
729
730static struct ahash_alg sha1_mb_areq_alg = {
731 .init = sha1_mb_init,
732 .update = sha1_mb_update,
733 .final = sha1_mb_final,
734 .finup = sha1_mb_finup,
735 .export = sha1_mb_export,
736 .import = sha1_mb_import,
737 .halg = {
738 .digestsize = SHA1_DIGEST_SIZE,
739 .statesize = sizeof(struct sha1_hash_ctx),
740 .base = {
741 .cra_name = "__sha1-mb",
742 .cra_driver_name = "__intel_sha1-mb",
743 .cra_priority = 100,
744 /*
745 * use ASYNC flag as some buffers in multi-buffer
746 * algo may not have completed before hashing thread
747 * sleep
748 */
749 .cra_flags = CRYPTO_ALG_ASYNC |
750 CRYPTO_ALG_INTERNAL,
751 .cra_blocksize = SHA1_BLOCK_SIZE,
752 .cra_module = THIS_MODULE,
753 .cra_list = LIST_HEAD_INIT
754 (sha1_mb_areq_alg.halg.base.cra_list),
755 .cra_init = sha1_mb_areq_init_tfm,
756 .cra_exit = sha1_mb_areq_exit_tfm,
757 .cra_ctxsize = sizeof(struct sha1_hash_ctx),
758 }
759 }
760};
761
762static int sha1_mb_async_init(struct ahash_request *req)
763{
764 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
765 struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
766 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
767 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
768
769 memcpy(mcryptd_req, req, sizeof(*req));
770 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
771 return crypto_ahash_init(mcryptd_req);
772}
773
774static int sha1_mb_async_update(struct ahash_request *req)
775{
776 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
777
778 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
779 struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
780 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
781
782 memcpy(mcryptd_req, req, sizeof(*req));
783 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
784 return crypto_ahash_update(mcryptd_req);
785}
786
787static int sha1_mb_async_finup(struct ahash_request *req)
788{
789 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
790
791 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
792 struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
793 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
794
795 memcpy(mcryptd_req, req, sizeof(*req));
796 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
797 return crypto_ahash_finup(mcryptd_req);
798}
799
800static int sha1_mb_async_final(struct ahash_request *req)
801{
802 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
803
804 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
805 struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
806 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
807
808 memcpy(mcryptd_req, req, sizeof(*req));
809 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
810 return crypto_ahash_final(mcryptd_req);
811}
812
813static int sha1_mb_async_digest(struct ahash_request *req)
814{
815 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
816 struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
817 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
818 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
819
820 memcpy(mcryptd_req, req, sizeof(*req));
821 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
822 return crypto_ahash_digest(mcryptd_req);
823}
824
825static int sha1_mb_async_export(struct ahash_request *req, void *out)
826{
827 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
828 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
829 struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
830 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
831
832 memcpy(mcryptd_req, req, sizeof(*req));
833 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
834 return crypto_ahash_export(mcryptd_req, out);
835}
836
837static int sha1_mb_async_import(struct ahash_request *req, const void *in)
838{
839 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
840 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
841 struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
842 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
843 struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
844 struct mcryptd_hash_request_ctx *rctx;
845 struct ahash_request *areq;
846
847 memcpy(mcryptd_req, req, sizeof(*req));
848 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
849 rctx = ahash_request_ctx(mcryptd_req);
850 areq = &rctx->areq;
851
852 ahash_request_set_tfm(areq, child);
853 ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
854 rctx->complete, req);
855
856 return crypto_ahash_import(mcryptd_req, in);
857}
858
859static struct ahash_alg sha1_mb_async_alg = {
860 .init = sha1_mb_async_init,
861 .update = sha1_mb_async_update,
862 .final = sha1_mb_async_final,
863 .finup = sha1_mb_async_finup,
864 .digest = sha1_mb_async_digest,
865 .export = sha1_mb_async_export,
866 .import = sha1_mb_async_import,
867 .halg = {
868 .digestsize = SHA1_DIGEST_SIZE,
869 .statesize = sizeof(struct sha1_hash_ctx),
870 .base = {
871 .cra_name = "sha1",
872 .cra_driver_name = "sha1_mb",
873 /*
874 * Low priority, since with few concurrent hash requests
875 * this is extremely slow due to the flush delay. Users
876 * whose workloads would benefit from this can request
877 * it explicitly by driver name, or can increase its
878 * priority at runtime using NETLINK_CRYPTO.
879 */
880 .cra_priority = 50,
881 .cra_flags = CRYPTO_ALG_ASYNC,
882 .cra_blocksize = SHA1_BLOCK_SIZE,
883 .cra_module = THIS_MODULE,
884 .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
885 .cra_init = sha1_mb_async_init_tfm,
886 .cra_exit = sha1_mb_async_exit_tfm,
887 .cra_ctxsize = sizeof(struct sha1_mb_ctx),
888 .cra_alignmask = 0,
889 },
890 },
891};
892
893static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
894{
895 struct mcryptd_hash_request_ctx *rctx;
896 unsigned long cur_time;
897 unsigned long next_flush = 0;
898 struct sha1_hash_ctx *sha_ctx;
899
900
901 cur_time = jiffies;
902
903 while (!list_empty(&cstate->work_list)) {
904 rctx = list_entry(cstate->work_list.next,
905 struct mcryptd_hash_request_ctx, waiter);
906 if (time_before(cur_time, rctx->tag.expire))
907 break;
908 kernel_fpu_begin();
909 sha_ctx = (struct sha1_hash_ctx *)
910 sha1_ctx_mgr_flush(cstate->mgr);
911 kernel_fpu_end();
912 if (!sha_ctx) {
913 pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
914 break;
915 }
916 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
917 sha_finish_walk(&rctx, cstate, true);
918 sha_complete_job(rctx, cstate, 0);
919 }
920
921 if (!list_empty(&cstate->work_list)) {
922 rctx = list_entry(cstate->work_list.next,
923 struct mcryptd_hash_request_ctx, waiter);
924 /* get the hash context and then flush time */
925 next_flush = rctx->tag.expire;
926 mcryptd_arm_flusher(cstate, get_delay(next_flush));
927 }
928 return next_flush;
929}
930
931static int __init sha1_mb_mod_init(void)
932{
933
934 int cpu;
935 int err;
936 struct mcryptd_alg_cstate *cpu_state;
937
938 /* check for dependent cpu features */
939 if (!boot_cpu_has(X86_FEATURE_AVX2) ||
940 !boot_cpu_has(X86_FEATURE_BMI2))
941 return -ENODEV;
942
943 /* initialize multibuffer structures */
944 sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
945
946 sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
947 sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
948 sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
949 sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
950
951 if (!sha1_mb_alg_state.alg_cstate)
952 return -ENOMEM;
953 for_each_possible_cpu(cpu) {
954 cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
955 cpu_state->next_flush = 0;
956 cpu_state->next_seq_num = 0;
957 cpu_state->flusher_engaged = false;
958 INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
959 cpu_state->cpu = cpu;
960 cpu_state->alg_state = &sha1_mb_alg_state;
961 cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
962 GFP_KERNEL);
963 if (!cpu_state->mgr)
964 goto err2;
965 sha1_ctx_mgr_init(cpu_state->mgr);
966 INIT_LIST_HEAD(&cpu_state->work_list);
967 spin_lock_init(&cpu_state->work_lock);
968 }
969 sha1_mb_alg_state.flusher = &sha1_mb_flusher;
970
971 err = crypto_register_ahash(&sha1_mb_areq_alg);
972 if (err)
973 goto err2;
974 err = crypto_register_ahash(&sha1_mb_async_alg);
975 if (err)
976 goto err1;
977
978
979 return 0;
980err1:
981 crypto_unregister_ahash(&sha1_mb_areq_alg);
982err2:
983 for_each_possible_cpu(cpu) {
984 cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
985 kfree(cpu_state->mgr);
986 }
987 free_percpu(sha1_mb_alg_state.alg_cstate);
988 return -ENODEV;
989}
990
991static void __exit sha1_mb_mod_fini(void)
992{
993 int cpu;
994 struct mcryptd_alg_cstate *cpu_state;
995
996 crypto_unregister_ahash(&sha1_mb_async_alg);
997 crypto_unregister_ahash(&sha1_mb_areq_alg);
998 for_each_possible_cpu(cpu) {
999 cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
1000 kfree(cpu_state->mgr);
1001 }
1002 free_percpu(sha1_mb_alg_state.alg_cstate);
1003}
1004
1005module_init(sha1_mb_mod_init);
1006module_exit(sha1_mb_mod_fini);
1007
1008MODULE_LICENSE("GPL");
1009MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
1010
1011MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
deleted file mode 100644
index 9454bd16f9f8..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * Header file for multi buffer SHA context
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Tim Chen <tim.c.chen@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2014 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#ifndef _SHA_MB_CTX_INTERNAL_H
55#define _SHA_MB_CTX_INTERNAL_H
56
57#include "sha1_mb_mgr.h"
58
59#define HASH_UPDATE 0x00
60#define HASH_LAST 0x01
61#define HASH_DONE 0x02
62#define HASH_FINAL 0x04
63
64#define HASH_CTX_STS_IDLE 0x00
65#define HASH_CTX_STS_PROCESSING 0x01
66#define HASH_CTX_STS_LAST 0x02
67#define HASH_CTX_STS_COMPLETE 0x04
68
69enum hash_ctx_error {
70 HASH_CTX_ERROR_NONE = 0,
71 HASH_CTX_ERROR_INVALID_FLAGS = -1,
72 HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
73 HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
74
75#ifdef HASH_CTX_DEBUG
76 HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
77#endif
78};
79
80
81#define hash_ctx_user_data(ctx) ((ctx)->user_data)
82#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
83#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
84#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
85#define hash_ctx_status(ctx) ((ctx)->status)
86#define hash_ctx_error(ctx) ((ctx)->error)
87#define hash_ctx_init(ctx) \
88 do { \
89 (ctx)->error = HASH_CTX_ERROR_NONE; \
90 (ctx)->status = HASH_CTX_STS_COMPLETE; \
91 } while (0)
92
93
94/* Hash Constants and Typedefs */
95#define SHA1_DIGEST_LENGTH 5
96#define SHA1_LOG2_BLOCK_SIZE 6
97
98#define SHA1_PADLENGTHFIELD_SIZE 8
99
100#ifdef SHA_MB_DEBUG
101#define assert(expr) \
102do { \
103 if (unlikely(!(expr))) { \
104 printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
105 #expr, __FILE__, __func__, __LINE__); \
106 } \
107} while (0)
108#else
109#define assert(expr) do {} while (0)
110#endif
111
112struct sha1_ctx_mgr {
113 struct sha1_mb_mgr mgr;
114};
115
116/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
117
118struct sha1_hash_ctx {
119 /* Must be at struct offset 0 */
120 struct job_sha1 job;
121 /* status flag */
122 int status;
123 /* error flag */
124 int error;
125
126 uint64_t total_length;
127 const void *incoming_buffer;
128 uint32_t incoming_buffer_length;
129 uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2];
130 uint32_t partial_block_buffer_length;
131 void *user_data;
132};
133
134#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
deleted file mode 100644
index 08ad1a9acfd7..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
+++ /dev/null
@@ -1,110 +0,0 @@
1/*
2 * Header file for multi buffer SHA1 algorithm manager
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * James Guilford <james.guilford@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2014 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54#ifndef __SHA_MB_MGR_H
55#define __SHA_MB_MGR_H
56
57
58#include <linux/types.h>
59
60#define NUM_SHA1_DIGEST_WORDS 5
61
62enum job_sts { STS_UNKNOWN = 0,
63 STS_BEING_PROCESSED = 1,
64 STS_COMPLETED = 2,
65 STS_INTERNAL_ERROR = 3,
66 STS_ERROR = 4
67};
68
69struct job_sha1 {
70 u8 *buffer;
71 u32 len;
72 u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
73 enum job_sts status;
74 void *user_data;
75};
76
77/* SHA1 out-of-order scheduler */
78
79/* typedef uint32_t sha1_digest_array[5][8]; */
80
81struct sha1_args_x8 {
82 uint32_t digest[5][8];
83 uint8_t *data_ptr[8];
84};
85
86struct sha1_lane_data {
87 struct job_sha1 *job_in_lane;
88};
89
90struct sha1_mb_mgr {
91 struct sha1_args_x8 args;
92
93 uint32_t lens[8];
94
95 /* each byte is index (0...7) of unused lanes */
96 uint64_t unused_lanes;
97 /* byte 4 is set to FF as a flag */
98 struct sha1_lane_data ldata[8];
99};
100
101
102#define SHA1_MB_MGR_NUM_LANES_AVX2 8
103
104void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
105struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
106 struct job_sha1 *job);
107struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
108struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
109
110#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
deleted file mode 100644
index 86688c6e7a25..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,287 +0,0 @@
1/*
2 * Header file for multi buffer SHA1 algorithm data structure
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * James Guilford <james.guilford@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2014 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55# Macros for defining data structures
56
57# Usage example
58
59#START_FIELDS # JOB_AES
60### name size align
61#FIELD _plaintext, 8, 8 # pointer to plaintext
62#FIELD _ciphertext, 8, 8 # pointer to ciphertext
63#FIELD _IV, 16, 8 # IV
64#FIELD _keys, 8, 8 # pointer to keys
65#FIELD _len, 4, 4 # length in bytes
66#FIELD _status, 4, 4 # status enumeration
67#FIELD _user_data, 8, 8 # pointer to user data
68#UNION _union, size1, align1, \
69# size2, align2, \
70# size3, align3, \
71# ...
72#END_FIELDS
73#%assign _JOB_AES_size _FIELD_OFFSET
74#%assign _JOB_AES_align _STRUCT_ALIGN
75
76#########################################################################
77
78# Alternate "struc-like" syntax:
79# STRUCT job_aes2
80# RES_Q .plaintext, 1
81# RES_Q .ciphertext, 1
82# RES_DQ .IV, 1
83# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
84# RES_U .union, size1, align1, \
85# size2, align2, \
86# ...
87# ENDSTRUCT
88# # Following only needed if nesting
89# %assign job_aes2_size _FIELD_OFFSET
90# %assign job_aes2_align _STRUCT_ALIGN
91#
92# RES_* macros take a name, a count and an optional alignment.
93# The count in in terms of the base size of the macro, and the
94# default alignment is the base size.
95# The macros are:
96# Macro Base size
97# RES_B 1
98# RES_W 2
99# RES_D 4
100# RES_Q 8
101# RES_DQ 16
102# RES_Y 32
103# RES_Z 64
104#
105# RES_U defines a union. It's arguments are a name and two or more
106# pairs of "size, alignment"
107#
108# The two assigns are only needed if this structure is being nested
109# within another. Even if the assigns are not done, one can still use
110# STRUCT_NAME_size as the size of the structure.
111#
112# Note that for nesting, you still need to assign to STRUCT_NAME_size.
113#
114# The differences between this and using "struc" directly are that each
115# type is implicitly aligned to its natural length (although this can be
116# over-ridden with an explicit third parameter), and that the structure
117# is padded at the end to its overall alignment.
118#
119
120#########################################################################
121
122#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
123#define _SHA1_MB_MGR_DATASTRUCT_ASM_
124
125## START_FIELDS
126.macro START_FIELDS
127 _FIELD_OFFSET = 0
128 _STRUCT_ALIGN = 0
129.endm
130
131## FIELD name size align
132.macro FIELD name size align
133 _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
134 \name = _FIELD_OFFSET
135 _FIELD_OFFSET = _FIELD_OFFSET + (\size)
136.if (\align > _STRUCT_ALIGN)
137 _STRUCT_ALIGN = \align
138.endif
139.endm
140
141## END_FIELDS
142.macro END_FIELDS
143 _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
144.endm
145
146########################################################################
147
148.macro STRUCT p1
149START_FIELDS
150.struc \p1
151.endm
152
153.macro ENDSTRUCT
154 tmp = _FIELD_OFFSET
155 END_FIELDS
156 tmp = (_FIELD_OFFSET - %%tmp)
157.if (tmp > 0)
158 .lcomm tmp
159.endif
160.endstruc
161.endm
162
163## RES_int name size align
164.macro RES_int p1 p2 p3
165 name = \p1
166 size = \p2
167 align = .\p3
168
169 _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
170.align align
171.lcomm name size
172 _FIELD_OFFSET = _FIELD_OFFSET + (size)
173.if (align > _STRUCT_ALIGN)
174 _STRUCT_ALIGN = align
175.endif
176.endm
177
178
179
180# macro RES_B name, size [, align]
181.macro RES_B _name, _size, _align=1
182RES_int _name _size _align
183.endm
184
185# macro RES_W name, size [, align]
186.macro RES_W _name, _size, _align=2
187RES_int _name 2*(_size) _align
188.endm
189
190# macro RES_D name, size [, align]
191.macro RES_D _name, _size, _align=4
192RES_int _name 4*(_size) _align
193.endm
194
195# macro RES_Q name, size [, align]
196.macro RES_Q _name, _size, _align=8
197RES_int _name 8*(_size) _align
198.endm
199
200# macro RES_DQ name, size [, align]
201.macro RES_DQ _name, _size, _align=16
202RES_int _name 16*(_size) _align
203.endm
204
205# macro RES_Y name, size [, align]
206.macro RES_Y _name, _size, _align=32
207RES_int _name 32*(_size) _align
208.endm
209
210# macro RES_Z name, size [, align]
211.macro RES_Z _name, _size, _align=64
212RES_int _name 64*(_size) _align
213.endm
214
215
216#endif
217
218########################################################################
219#### Define constants
220########################################################################
221
222########################################################################
223#### Define SHA1 Out Of Order Data Structures
224########################################################################
225
226START_FIELDS # LANE_DATA
227### name size align
228FIELD _job_in_lane, 8, 8 # pointer to job object
229END_FIELDS
230
231_LANE_DATA_size = _FIELD_OFFSET
232_LANE_DATA_align = _STRUCT_ALIGN
233
234########################################################################
235
236START_FIELDS # SHA1_ARGS_X8
237### name size align
238FIELD _digest, 4*5*8, 16 # transposed digest
239FIELD _data_ptr, 8*8, 8 # array of pointers to data
240END_FIELDS
241
242_SHA1_ARGS_X4_size = _FIELD_OFFSET
243_SHA1_ARGS_X4_align = _STRUCT_ALIGN
244_SHA1_ARGS_X8_size = _FIELD_OFFSET
245_SHA1_ARGS_X8_align = _STRUCT_ALIGN
246
247########################################################################
248
249START_FIELDS # MB_MGR
250### name size align
251FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
252FIELD _lens, 4*8, 8
253FIELD _unused_lanes, 8, 8
254FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
255END_FIELDS
256
257_MB_MGR_size = _FIELD_OFFSET
258_MB_MGR_align = _STRUCT_ALIGN
259
260_args_digest = _args + _digest
261_args_data_ptr = _args + _data_ptr
262
263
264########################################################################
265#### Define constants
266########################################################################
267
268#define STS_UNKNOWN 0
269#define STS_BEING_PROCESSED 1
270#define STS_COMPLETED 2
271
272########################################################################
273#### Define JOB_SHA1 structure
274########################################################################
275
276START_FIELDS # JOB_SHA1
277
278### name size align
279FIELD _buffer, 8, 8 # pointer to buffer
280FIELD _len, 4, 4 # length in bytes
281FIELD _result_digest, 5*4, 32 # Digest (output)
282FIELD _status, 4, 4
283FIELD _user_data, 8, 8
284END_FIELDS
285
286_JOB_SHA1_size = _FIELD_OFFSET
287_JOB_SHA1_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7cfba738f104..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,304 +0,0 @@
1/*
2 * Flush routine for SHA1 multibuffer
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * James Guilford <james.guilford@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2014 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54#include <linux/linkage.h>
55#include <asm/frame.h>
56#include "sha1_mb_mgr_datastruct.S"
57
58
59.extern sha1_x8_avx2
60
61# LINUX register definitions
62#define arg1 %rdi
63#define arg2 %rsi
64
65# Common definitions
66#define state arg1
67#define job arg2
68#define len2 arg2
69
70# idx must be a register not clobbered by sha1_x8_avx2
71#define idx %r8
72#define DWORD_idx %r8d
73
74#define unused_lanes %rbx
75#define lane_data %rbx
76#define tmp2 %rbx
77#define tmp2_w %ebx
78
79#define job_rax %rax
80#define tmp1 %rax
81#define size_offset %rax
82#define tmp %rax
83#define start_offset %rax
84
85#define tmp3 %arg1
86
87#define extra_blocks %arg2
88#define p %arg2
89
90.macro LABEL prefix n
91\prefix\n\():
92.endm
93
94.macro JNE_SKIP i
95jne skip_\i
96.endm
97
98.altmacro
99.macro SET_OFFSET _offset
100offset = \_offset
101.endm
102.noaltmacro
103
104# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
105# arg 1 : rcx : state
106ENTRY(sha1_mb_mgr_flush_avx2)
107 FRAME_BEGIN
108 push %rbx
109
110 # If bit (32+3) is set, then all lanes are empty
111 mov _unused_lanes(state), unused_lanes
112 bt $32+3, unused_lanes
113 jc return_null
114
115 # find a lane with a non-null job
116 xor idx, idx
117 offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
118 cmpq $0, offset(state)
119 cmovne one(%rip), idx
120 offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
121 cmpq $0, offset(state)
122 cmovne two(%rip), idx
123 offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
124 cmpq $0, offset(state)
125 cmovne three(%rip), idx
126 offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
127 cmpq $0, offset(state)
128 cmovne four(%rip), idx
129 offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
130 cmpq $0, offset(state)
131 cmovne five(%rip), idx
132 offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
133 cmpq $0, offset(state)
134 cmovne six(%rip), idx
135 offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
136 cmpq $0, offset(state)
137 cmovne seven(%rip), idx
138
139 # copy idx to empty lanes
140copy_lane_data:
141 offset = (_args + _data_ptr)
142 mov offset(state,idx,8), tmp
143
144 I = 0
145.rep 8
146 offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
147 cmpq $0, offset(state)
148.altmacro
149 JNE_SKIP %I
150 offset = (_args + _data_ptr + 8*I)
151 mov tmp, offset(state)
152 offset = (_lens + 4*I)
153 movl $0xFFFFFFFF, offset(state)
154LABEL skip_ %I
155 I = (I+1)
156.noaltmacro
157.endr
158
159 # Find min length
160 vmovdqu _lens+0*16(state), %xmm0
161 vmovdqu _lens+1*16(state), %xmm1
162
163 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
164 vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
165 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
166 vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
167 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
168
169 vmovd %xmm2, DWORD_idx
170 mov idx, len2
171 and $0xF, idx
172 shr $4, len2
173 jz len_is_0
174
175 vpand clear_low_nibble(%rip), %xmm2, %xmm2
176 vpshufd $0, %xmm2, %xmm2
177
178 vpsubd %xmm2, %xmm0, %xmm0
179 vpsubd %xmm2, %xmm1, %xmm1
180
181 vmovdqu %xmm0, _lens+0*16(state)
182 vmovdqu %xmm1, _lens+1*16(state)
183
184 # "state" and "args" are the same address, arg1
185 # len is arg2
186 call sha1_x8_avx2
187 # state and idx are intact
188
189
190len_is_0:
191 # process completed job "idx"
192 imul $_LANE_DATA_size, idx, lane_data
193 lea _ldata(state, lane_data), lane_data
194
195 mov _job_in_lane(lane_data), job_rax
196 movq $0, _job_in_lane(lane_data)
197 movl $STS_COMPLETED, _status(job_rax)
198 mov _unused_lanes(state), unused_lanes
199 shl $4, unused_lanes
200 or idx, unused_lanes
201 mov unused_lanes, _unused_lanes(state)
202
203 movl $0xFFFFFFFF, _lens(state, idx, 4)
204
205 vmovd _args_digest(state , idx, 4) , %xmm0
206 vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
207 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
208 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
209 movl _args_digest+4*32(state, idx, 4), tmp2_w
210
211 vmovdqu %xmm0, _result_digest(job_rax)
212 offset = (_result_digest + 1*16)
213 mov tmp2_w, offset(job_rax)
214
215return:
216 pop %rbx
217 FRAME_END
218 ret
219
220return_null:
221 xor job_rax, job_rax
222 jmp return
223ENDPROC(sha1_mb_mgr_flush_avx2)
224
225
226#################################################################
227
228.align 16
229ENTRY(sha1_mb_mgr_get_comp_job_avx2)
230 push %rbx
231
232 ## if bit 32+3 is set, then all lanes are empty
233 mov _unused_lanes(state), unused_lanes
234 bt $(32+3), unused_lanes
235 jc .return_null
236
237 # Find min length
238 vmovdqu _lens(state), %xmm0
239 vmovdqu _lens+1*16(state), %xmm1
240
241 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
242 vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
243 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
244 vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
245 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
246
247 vmovd %xmm2, DWORD_idx
248 test $~0xF, idx
249 jnz .return_null
250
251 # process completed job "idx"
252 imul $_LANE_DATA_size, idx, lane_data
253 lea _ldata(state, lane_data), lane_data
254
255 mov _job_in_lane(lane_data), job_rax
256 movq $0, _job_in_lane(lane_data)
257 movl $STS_COMPLETED, _status(job_rax)
258 mov _unused_lanes(state), unused_lanes
259 shl $4, unused_lanes
260 or idx, unused_lanes
261 mov unused_lanes, _unused_lanes(state)
262
263 movl $0xFFFFFFFF, _lens(state, idx, 4)
264
265 vmovd _args_digest(state, idx, 4), %xmm0
266 vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
267 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
268 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
269 movl _args_digest+4*32(state, idx, 4), tmp2_w
270
271 vmovdqu %xmm0, _result_digest(job_rax)
272 movl tmp2_w, _result_digest+1*16(job_rax)
273
274 pop %rbx
275
276 ret
277
278.return_null:
279 xor job_rax, job_rax
280 pop %rbx
281 ret
282ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
283
284.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
285.align 16
286clear_low_nibble:
287.octa 0x000000000000000000000000FFFFFFF0
288
289.section .rodata.cst8, "aM", @progbits, 8
290.align 8
291one:
292.quad 1
293two:
294.quad 2
295three:
296.quad 3
297four:
298.quad 4
299five:
300.quad 5
301six:
302.quad 6
303seven:
304.quad 7
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
deleted file mode 100644
index d2add0d35f43..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/*
2 * Initialization code for multi buffer SHA1 algorithm for AVX2
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Tim Chen <tim.c.chen@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2014 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include "sha1_mb_mgr.h"
55
56void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
57{
58 unsigned int j;
59 state->unused_lanes = 0xF76543210ULL;
60 for (j = 0; j < 8; j++) {
61 state->lens[j] = 0xFFFFFFFF;
62 state->ldata[j].job_in_lane = NULL;
63 }
64}
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
deleted file mode 100644
index 7a93b1c0d69a..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,209 +0,0 @@
1/*
2 * Buffer submit code for multi buffer SHA1 algorithm
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * James Guilford <james.guilford@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2014 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55#include <linux/linkage.h>
56#include <asm/frame.h>
57#include "sha1_mb_mgr_datastruct.S"
58
59
60.extern sha1_x8_avx
61
62# LINUX register definitions
63arg1 = %rdi
64arg2 = %rsi
65size_offset = %rcx
66tmp2 = %rcx
67extra_blocks = %rdx
68
69# Common definitions
70#define state arg1
71#define job %rsi
72#define len2 arg2
73#define p2 arg2
74
75# idx must be a register not clobberred by sha1_x8_avx2
76idx = %r8
77DWORD_idx = %r8d
78last_len = %r8
79
80p = %r11
81start_offset = %r11
82
83unused_lanes = %rbx
84BYTE_unused_lanes = %bl
85
86job_rax = %rax
87len = %rax
88DWORD_len = %eax
89
90lane = %r12
91tmp3 = %r12
92
93tmp = %r9
94DWORD_tmp = %r9d
95
96lane_data = %r10
97
98# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
99# arg 1 : rcx : state
100# arg 2 : rdx : job
101ENTRY(sha1_mb_mgr_submit_avx2)
102 FRAME_BEGIN
103 push %rbx
104 push %r12
105
106 mov _unused_lanes(state), unused_lanes
107 mov unused_lanes, lane
108 and $0xF, lane
109 shr $4, unused_lanes
110 imul $_LANE_DATA_size, lane, lane_data
111 movl $STS_BEING_PROCESSED, _status(job)
112 lea _ldata(state, lane_data), lane_data
113 mov unused_lanes, _unused_lanes(state)
114 movl _len(job), DWORD_len
115
116 mov job, _job_in_lane(lane_data)
117 shl $4, len
118 or lane, len
119
120 movl DWORD_len, _lens(state , lane, 4)
121
122 # Load digest words from result_digest
123 vmovdqu _result_digest(job), %xmm0
124 mov _result_digest+1*16(job), DWORD_tmp
125 vmovd %xmm0, _args_digest(state, lane, 4)
126 vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
127 vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
128 vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
129 movl DWORD_tmp, _args_digest+4*32(state , lane, 4)
130
131 mov _buffer(job), p
132 mov p, _args_data_ptr(state, lane, 8)
133
134 cmp $0xF, unused_lanes
135 jne return_null
136
137start_loop:
138 # Find min length
139 vmovdqa _lens(state), %xmm0
140 vmovdqa _lens+1*16(state), %xmm1
141
142 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
143 vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
144 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
145 vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
146 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
147
148 vmovd %xmm2, DWORD_idx
149 mov idx, len2
150 and $0xF, idx
151 shr $4, len2
152 jz len_is_0
153
154 vpand clear_low_nibble(%rip), %xmm2, %xmm2
155 vpshufd $0, %xmm2, %xmm2
156
157 vpsubd %xmm2, %xmm0, %xmm0
158 vpsubd %xmm2, %xmm1, %xmm1
159
160 vmovdqa %xmm0, _lens + 0*16(state)
161 vmovdqa %xmm1, _lens + 1*16(state)
162
163
164 # "state" and "args" are the same address, arg1
165 # len is arg2
166 call sha1_x8_avx2
167
168 # state and idx are intact
169
170len_is_0:
171 # process completed job "idx"
172 imul $_LANE_DATA_size, idx, lane_data
173 lea _ldata(state, lane_data), lane_data
174
175 mov _job_in_lane(lane_data), job_rax
176 mov _unused_lanes(state), unused_lanes
177 movq $0, _job_in_lane(lane_data)
178 movl $STS_COMPLETED, _status(job_rax)
179 shl $4, unused_lanes
180 or idx, unused_lanes
181 mov unused_lanes, _unused_lanes(state)
182
183 movl $0xFFFFFFFF, _lens(state, idx, 4)
184
185 vmovd _args_digest(state, idx, 4), %xmm0
186 vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
187 vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
188 vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
189 movl _args_digest+4*32(state, idx, 4), DWORD_tmp
190
191 vmovdqu %xmm0, _result_digest(job_rax)
192 movl DWORD_tmp, _result_digest+1*16(job_rax)
193
194return:
195 pop %r12
196 pop %rbx
197 FRAME_END
198 ret
199
200return_null:
201 xor job_rax, job_rax
202 jmp return
203
204ENDPROC(sha1_mb_mgr_submit_avx2)
205
206.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
207.align 16
208clear_low_nibble:
209 .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
deleted file mode 100644
index 20f77aa633de..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
+++ /dev/null
@@ -1,492 +0,0 @@
1/*
2 * Multi-buffer SHA1 algorithm hash compute routine
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2014 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * James Guilford <james.guilford@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
23 *
24 * BSD LICENSE
25 *
26 * Copyright(c) 2014 Intel Corporation.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55#include <linux/linkage.h>
56#include "sha1_mb_mgr_datastruct.S"
57
58## code to compute oct SHA1 using SSE-256
59## outer calling routine takes care of save and restore of XMM registers
60
61## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15
62##
63## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
64## Linux preserves: rdi rbp r8
65##
66## clobbers ymm0-15
67
68
69# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
70# "transpose" data in {r0...r7} using temps {t0...t1}
71# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
72# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
73# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
74# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
75# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
76# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
77# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
78# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
79# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
80#
81# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
82# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
83# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
84# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
85# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
86# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
87# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
88# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
89# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
90#
91
92.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
93 # process top half (r0..r3) {a...d}
94 vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
95 vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
96 vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
97 vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
98 vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
99 vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
100 vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
101 vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
102
103 # use r2 in place of t0
104 # process bottom half (r4..r7) {e...h}
105 vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
106 vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
107 vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
108 vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
109 vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
110 vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
111 vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
112 vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
113
114 vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
115 vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
116 vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
117 vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
118 vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
119 vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
120 vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
121 vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
122
123.endm
124##
125## Magic functions defined in FIPS 180-1
126##
127# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D)))
128.macro MAGIC_F0 regF regB regC regD regT
129 vpxor \regD, \regC, \regF
130 vpand \regB, \regF, \regF
131 vpxor \regD, \regF, \regF
132.endm
133
134# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D)
135.macro MAGIC_F1 regF regB regC regD regT
136 vpxor \regC, \regD, \regF
137 vpxor \regB, \regF, \regF
138.endm
139
140# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D))
141.macro MAGIC_F2 regF regB regC regD regT
142 vpor \regC, \regB, \regF
143 vpand \regC, \regB, \regT
144 vpand \regD, \regF, \regF
145 vpor \regT, \regF, \regF
146.endm
147
148# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D)
149.macro MAGIC_F3 regF regB regC regD regT
150 MAGIC_F1 \regF,\regB,\regC,\regD,\regT
151.endm
152
153# PROLD reg, imm, tmp
154.macro PROLD reg imm tmp
155 vpsrld $(32-\imm), \reg, \tmp
156 vpslld $\imm, \reg, \reg
157 vpor \tmp, \reg, \reg
158.endm
159
160.macro PROLD_nd reg imm tmp src
161 vpsrld $(32-\imm), \src, \tmp
162 vpslld $\imm, \src, \reg
163 vpor \tmp, \reg, \reg
164.endm
165
166.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
167 vpaddd \immCNT, \regE, \regE
168 vpaddd \memW*32(%rsp), \regE, \regE
169 PROLD_nd \regT, 5, \regF, \regA
170 vpaddd \regT, \regE, \regE
171 \MAGIC \regF, \regB, \regC, \regD, \regT
172 PROLD \regB, 30, \regT
173 vpaddd \regF, \regE, \regE
174.endm
175
176.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
177 vpaddd \immCNT, \regE, \regE
178 offset = ((\memW - 14) & 15) * 32
179 vmovdqu offset(%rsp), W14
180 vpxor W14, W16, W16
181 offset = ((\memW - 8) & 15) * 32
182 vpxor offset(%rsp), W16, W16
183 offset = ((\memW - 3) & 15) * 32
184 vpxor offset(%rsp), W16, W16
185 vpsrld $(32-1), W16, \regF
186 vpslld $1, W16, W16
187 vpor W16, \regF, \regF
188
189 ROTATE_W
190
191 offset = ((\memW - 0) & 15) * 32
192 vmovdqu \regF, offset(%rsp)
193 vpaddd \regF, \regE, \regE
194 PROLD_nd \regT, 5, \regF, \regA
195 vpaddd \regT, \regE, \regE
196 \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D)
197 PROLD \regB,30, \regT
198 vpaddd \regF, \regE, \regE
199.endm
200
201########################################################################
202########################################################################
203########################################################################
204
205## FRAMESZ plus pushes must be an odd multiple of 8
206YMM_SAVE = (15-15)*32
207FRAMESZ = 32*16 + YMM_SAVE
208_YMM = FRAMESZ - YMM_SAVE
209
210#define VMOVPS vmovups
211
212IDX = %rax
213inp0 = %r9
214inp1 = %r10
215inp2 = %r11
216inp3 = %r12
217inp4 = %r13
218inp5 = %r14
219inp6 = %r15
220inp7 = %rcx
221arg1 = %rdi
222arg2 = %rsi
223RSP_SAVE = %rdx
224
225# ymm0 A
226# ymm1 B
227# ymm2 C
228# ymm3 D
229# ymm4 E
230# ymm5 F AA
231# ymm6 T0 BB
232# ymm7 T1 CC
233# ymm8 T2 DD
234# ymm9 T3 EE
235# ymm10 T4 TMP
236# ymm11 T5 FUN
237# ymm12 T6 K
238# ymm13 T7 W14
239# ymm14 T8 W15
240# ymm15 T9 W16
241
242
243A = %ymm0
244B = %ymm1
245C = %ymm2
246D = %ymm3
247E = %ymm4
248F = %ymm5
249T0 = %ymm6
250T1 = %ymm7
251T2 = %ymm8
252T3 = %ymm9
253T4 = %ymm10
254T5 = %ymm11
255T6 = %ymm12
256T7 = %ymm13
257T8 = %ymm14
258T9 = %ymm15
259
260AA = %ymm5
261BB = %ymm6
262CC = %ymm7
263DD = %ymm8
264EE = %ymm9
265TMP = %ymm10
266FUN = %ymm11
267K = %ymm12
268W14 = %ymm13
269W15 = %ymm14
270W16 = %ymm15
271
272.macro ROTATE_ARGS
273 TMP_ = E
274 E = D
275 D = C
276 C = B
277 B = A
278 A = TMP_
279.endm
280
281.macro ROTATE_W
282TMP_ = W16
283W16 = W15
284W15 = W14
285W14 = TMP_
286.endm
287
288# 8 streams x 5 32bit words per digest x 4 bytes per word
289#define DIGEST_SIZE (8*5*4)
290
291.align 32
292
293# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
294# arg 1 : pointer to array[4] of pointer to input data
295# arg 2 : size (in blocks) ;; assumed to be >= 1
296#
297ENTRY(sha1_x8_avx2)
298
299 # save callee-saved clobbered registers to comply with C function ABI
300 push %r12
301 push %r13
302 push %r14
303 push %r15
304
305 #save rsp
306 mov %rsp, RSP_SAVE
307 sub $FRAMESZ, %rsp
308
309 #align rsp to 32 Bytes
310 and $~0x1F, %rsp
311
312 ## Initialize digests
313 vmovdqu 0*32(arg1), A
314 vmovdqu 1*32(arg1), B
315 vmovdqu 2*32(arg1), C
316 vmovdqu 3*32(arg1), D
317 vmovdqu 4*32(arg1), E
318
319 ## transpose input onto stack
320 mov _data_ptr+0*8(arg1),inp0
321 mov _data_ptr+1*8(arg1),inp1
322 mov _data_ptr+2*8(arg1),inp2
323 mov _data_ptr+3*8(arg1),inp3
324 mov _data_ptr+4*8(arg1),inp4
325 mov _data_ptr+5*8(arg1),inp5
326 mov _data_ptr+6*8(arg1),inp6
327 mov _data_ptr+7*8(arg1),inp7
328
329 xor IDX, IDX
330lloop:
331 vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F
332 I=0
333.rep 2
334 VMOVPS (inp0, IDX), T0
335 VMOVPS (inp1, IDX), T1
336 VMOVPS (inp2, IDX), T2
337 VMOVPS (inp3, IDX), T3
338 VMOVPS (inp4, IDX), T4
339 VMOVPS (inp5, IDX), T5
340 VMOVPS (inp6, IDX), T6
341 VMOVPS (inp7, IDX), T7
342
343 TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
344 vpshufb F, T0, T0
345 vmovdqu T0, (I*8)*32(%rsp)
346 vpshufb F, T1, T1
347 vmovdqu T1, (I*8+1)*32(%rsp)
348 vpshufb F, T2, T2
349 vmovdqu T2, (I*8+2)*32(%rsp)
350 vpshufb F, T3, T3
351 vmovdqu T3, (I*8+3)*32(%rsp)
352 vpshufb F, T4, T4
353 vmovdqu T4, (I*8+4)*32(%rsp)
354 vpshufb F, T5, T5
355 vmovdqu T5, (I*8+5)*32(%rsp)
356 vpshufb F, T6, T6
357 vmovdqu T6, (I*8+6)*32(%rsp)
358 vpshufb F, T7, T7
359 vmovdqu T7, (I*8+7)*32(%rsp)
360 add $32, IDX
361 I = (I+1)
362.endr
363 # save old digests
364 vmovdqu A,AA
365 vmovdqu B,BB
366 vmovdqu C,CC
367 vmovdqu D,DD
368 vmovdqu E,EE
369
370##
371## perform 0-79 steps
372##
373 vmovdqu K00_19(%rip), K
374## do rounds 0...15
375 I = 0
376.rep 16
377 SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
378 ROTATE_ARGS
379 I = (I+1)
380.endr
381
382## do rounds 16...19
383 vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16
384 vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15
385.rep 4
386 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
387 ROTATE_ARGS
388 I = (I+1)
389.endr
390
391## do rounds 20...39
392 vmovdqu K20_39(%rip), K
393.rep 20
394 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
395 ROTATE_ARGS
396 I = (I+1)
397.endr
398
399## do rounds 40...59
400 vmovdqu K40_59(%rip), K
401.rep 20
402 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
403 ROTATE_ARGS
404 I = (I+1)
405.endr
406
407## do rounds 60...79
408 vmovdqu K60_79(%rip), K
409.rep 20
410 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
411 ROTATE_ARGS
412 I = (I+1)
413.endr
414
415 vpaddd AA,A,A
416 vpaddd BB,B,B
417 vpaddd CC,C,C
418 vpaddd DD,D,D
419 vpaddd EE,E,E
420
421 sub $1, arg2
422 jne lloop
423
424 # write out digests
425 vmovdqu A, 0*32(arg1)
426 vmovdqu B, 1*32(arg1)
427 vmovdqu C, 2*32(arg1)
428 vmovdqu D, 3*32(arg1)
429 vmovdqu E, 4*32(arg1)
430
431 # update input pointers
432 add IDX, inp0
433 add IDX, inp1
434 add IDX, inp2
435 add IDX, inp3
436 add IDX, inp4
437 add IDX, inp5
438 add IDX, inp6
439 add IDX, inp7
440 mov inp0, _data_ptr (arg1)
441 mov inp1, _data_ptr + 1*8(arg1)
442 mov inp2, _data_ptr + 2*8(arg1)
443 mov inp3, _data_ptr + 3*8(arg1)
444 mov inp4, _data_ptr + 4*8(arg1)
445 mov inp5, _data_ptr + 5*8(arg1)
446 mov inp6, _data_ptr + 6*8(arg1)
447 mov inp7, _data_ptr + 7*8(arg1)
448
449 ################
450 ## Postamble
451
452 mov RSP_SAVE, %rsp
453
454 # restore callee-saved clobbered registers
455 pop %r15
456 pop %r14
457 pop %r13
458 pop %r12
459
460 ret
461ENDPROC(sha1_x8_avx2)
462
463
464.section .rodata.cst32.K00_19, "aM", @progbits, 32
465.align 32
466K00_19:
467.octa 0x5A8279995A8279995A8279995A827999
468.octa 0x5A8279995A8279995A8279995A827999
469
470.section .rodata.cst32.K20_39, "aM", @progbits, 32
471.align 32
472K20_39:
473.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
474.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
475
476.section .rodata.cst32.K40_59, "aM", @progbits, 32
477.align 32
478K40_59:
479.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
480.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
481
482.section .rodata.cst32.K60_79, "aM", @progbits, 32
483.align 32
484K60_79:
485.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
486.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
487
488.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
489.align 32
490PSHUFFLE_BYTE_FLIP_MASK:
491.octa 0x0c0d0e0f08090a0b0405060700010203
492.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
deleted file mode 100644
index 53ad6e7db747..000000000000
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Arch-specific CryptoAPI modules.
4#
5
6OBJECT_FILES_NON_STANDARD := y
7
8avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
9 $(comma)4)$(comma)%ymm2,yes,no)
10ifeq ($(avx2_supported),yes)
11 obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
12 sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
13 sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
14endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
deleted file mode 100644
index 97c5fc43e115..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ /dev/null
@@ -1,1013 +0,0 @@
1/*
2 * Multi buffer SHA256 algorithm Glue Code
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56#include <crypto/internal/hash.h>
57#include <linux/init.h>
58#include <linux/module.h>
59#include <linux/mm.h>
60#include <linux/cryptohash.h>
61#include <linux/types.h>
62#include <linux/list.h>
63#include <crypto/scatterwalk.h>
64#include <crypto/sha.h>
65#include <crypto/mcryptd.h>
66#include <crypto/crypto_wq.h>
67#include <asm/byteorder.h>
68#include <linux/hardirq.h>
69#include <asm/fpu/api.h>
70#include "sha256_mb_ctx.h"
71
72#define FLUSH_INTERVAL 1000 /* in usec */
73
74static struct mcryptd_alg_state sha256_mb_alg_state;
75
76struct sha256_mb_ctx {
77 struct mcryptd_ahash *mcryptd_tfm;
78};
79
80static inline struct mcryptd_hash_request_ctx
81 *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
82{
83 struct ahash_request *areq;
84
85 areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
86 return container_of(areq, struct mcryptd_hash_request_ctx, areq);
87}
88
89static inline struct ahash_request
90 *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
91{
92 return container_of((void *) ctx, struct ahash_request, __ctx);
93}
94
95static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
96 struct ahash_request *areq)
97{
98 rctx->flag = HASH_UPDATE;
99}
100
101static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
102static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
103 (struct sha256_mb_mgr *state, struct job_sha256 *job);
104static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
105 (struct sha256_mb_mgr *state);
106static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
107 (struct sha256_mb_mgr *state);
108
109inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
110 uint64_t total_len)
111{
112 uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
113
114 memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
115 padblock[i] = 0x80;
116
117 i += ((SHA256_BLOCK_SIZE - 1) &
118 (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
119 + 1 + SHA256_PADLENGTHFIELD_SIZE;
120
121#if SHA256_PADLENGTHFIELD_SIZE == 16
122 *((uint64_t *) &padblock[i - 16]) = 0;
123#endif
124
125 *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
126
127 /* Number of extra blocks to hash */
128 return i >> SHA256_LOG2_BLOCK_SIZE;
129}
130
131static struct sha256_hash_ctx
132 *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
133 struct sha256_hash_ctx *ctx)
134{
135 while (ctx) {
136 if (ctx->status & HASH_CTX_STS_COMPLETE) {
137 /* Clear PROCESSING bit */
138 ctx->status = HASH_CTX_STS_COMPLETE;
139 return ctx;
140 }
141
142 /*
143 * If the extra blocks are empty, begin hashing what remains
144 * in the user's buffer.
145 */
146 if (ctx->partial_block_buffer_length == 0 &&
147 ctx->incoming_buffer_length) {
148
149 const void *buffer = ctx->incoming_buffer;
150 uint32_t len = ctx->incoming_buffer_length;
151 uint32_t copy_len;
152
153 /*
154 * Only entire blocks can be hashed.
155 * Copy remainder to extra blocks buffer.
156 */
157 copy_len = len & (SHA256_BLOCK_SIZE-1);
158
159 if (copy_len) {
160 len -= copy_len;
161 memcpy(ctx->partial_block_buffer,
162 ((const char *) buffer + len),
163 copy_len);
164 ctx->partial_block_buffer_length = copy_len;
165 }
166
167 ctx->incoming_buffer_length = 0;
168
169 /* len should be a multiple of the block size now */
170 assert((len % SHA256_BLOCK_SIZE) == 0);
171
172 /* Set len to the number of blocks to be hashed */
173 len >>= SHA256_LOG2_BLOCK_SIZE;
174
175 if (len) {
176
177 ctx->job.buffer = (uint8_t *) buffer;
178 ctx->job.len = len;
179 ctx = (struct sha256_hash_ctx *)
180 sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
181 continue;
182 }
183 }
184
185 /*
186 * If the extra blocks are not empty, then we are
187 * either on the last block(s) or we need more
188 * user input before continuing.
189 */
190 if (ctx->status & HASH_CTX_STS_LAST) {
191
192 uint8_t *buf = ctx->partial_block_buffer;
193 uint32_t n_extra_blocks =
194 sha256_pad(buf, ctx->total_length);
195
196 ctx->status = (HASH_CTX_STS_PROCESSING |
197 HASH_CTX_STS_COMPLETE);
198 ctx->job.buffer = buf;
199 ctx->job.len = (uint32_t) n_extra_blocks;
200 ctx = (struct sha256_hash_ctx *)
201 sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
202 continue;
203 }
204
205 ctx->status = HASH_CTX_STS_IDLE;
206 return ctx;
207 }
208
209 return NULL;
210}
211
212static struct sha256_hash_ctx
213 *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
214{
215 /*
216 * If get_comp_job returns NULL, there are no jobs complete.
217 * If get_comp_job returns a job, verify that it is safe to return to
218 * the user. If it is not ready, resubmit the job to finish processing.
219 * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
220 * returned. Otherwise, all jobs currently being managed by the
221 * hash_ctx_mgr still need processing.
222 */
223 struct sha256_hash_ctx *ctx;
224
225 ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
226 return sha256_ctx_mgr_resubmit(mgr, ctx);
227}
228
229static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
230{
231 sha256_job_mgr_init(&mgr->mgr);
232}
233
234static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
235 struct sha256_hash_ctx *ctx,
236 const void *buffer,
237 uint32_t len,
238 int flags)
239{
240 if (flags & ~(HASH_UPDATE | HASH_LAST)) {
241 /* User should not pass anything other than UPDATE or LAST */
242 ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
243 return ctx;
244 }
245
246 if (ctx->status & HASH_CTX_STS_PROCESSING) {
247 /* Cannot submit to a currently processing job. */
248 ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
249 return ctx;
250 }
251
252 if (ctx->status & HASH_CTX_STS_COMPLETE) {
253 /* Cannot update a finished job. */
254 ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
255 return ctx;
256 }
257
258 /* If we made it here, there was no error during this call to submit */
259 ctx->error = HASH_CTX_ERROR_NONE;
260
261 /* Store buffer ptr info from user */
262 ctx->incoming_buffer = buffer;
263 ctx->incoming_buffer_length = len;
264
265 /*
266 * Store the user's request flags and mark this ctx as currently
267 * being processed.
268 */
269 ctx->status = (flags & HASH_LAST) ?
270 (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
271 HASH_CTX_STS_PROCESSING;
272
273 /* Advance byte counter */
274 ctx->total_length += len;
275
276 /*
277 * If there is anything currently buffered in the extra blocks,
278 * append to it until it contains a whole block.
279 * Or if the user's buffer contains less than a whole block,
280 * append as much as possible to the extra block.
281 */
282 if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
283 /*
284 * Compute how many bytes to copy from user buffer into
285 * extra block
286 */
287 uint32_t copy_len = SHA256_BLOCK_SIZE -
288 ctx->partial_block_buffer_length;
289 if (len < copy_len)
290 copy_len = len;
291
292 if (copy_len) {
293 /* Copy and update relevant pointers and counters */
294 memcpy(
295 &ctx->partial_block_buffer[ctx->partial_block_buffer_length],
296 buffer, copy_len);
297
298 ctx->partial_block_buffer_length += copy_len;
299 ctx->incoming_buffer = (const void *)
300 ((const char *)buffer + copy_len);
301 ctx->incoming_buffer_length = len - copy_len;
302 }
303
304 /* The extra block should never contain more than 1 block */
305 assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
306
307 /*
308 * If the extra block buffer contains exactly 1 block,
309 * it can be hashed.
310 */
311 if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
312 ctx->partial_block_buffer_length = 0;
313
314 ctx->job.buffer = ctx->partial_block_buffer;
315 ctx->job.len = 1;
316 ctx = (struct sha256_hash_ctx *)
317 sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
318 }
319 }
320
321 return sha256_ctx_mgr_resubmit(mgr, ctx);
322}
323
324static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
325{
326 struct sha256_hash_ctx *ctx;
327
328 while (1) {
329 ctx = (struct sha256_hash_ctx *)
330 sha256_job_mgr_flush(&mgr->mgr);
331
332 /* If flush returned 0, there are no more jobs in flight. */
333 if (!ctx)
334 return NULL;
335
336 /*
337 * If flush returned a job, resubmit the job to finish
338 * processing.
339 */
340 ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
341
342 /*
343 * If sha256_ctx_mgr_resubmit returned a job, it is ready to
344 * be returned. Otherwise, all jobs currently being managed by
345 * the sha256_ctx_mgr still need processing. Loop.
346 */
347 if (ctx)
348 return ctx;
349 }
350}
351
352static int sha256_mb_init(struct ahash_request *areq)
353{
354 struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
355
356 hash_ctx_init(sctx);
357 sctx->job.result_digest[0] = SHA256_H0;
358 sctx->job.result_digest[1] = SHA256_H1;
359 sctx->job.result_digest[2] = SHA256_H2;
360 sctx->job.result_digest[3] = SHA256_H3;
361 sctx->job.result_digest[4] = SHA256_H4;
362 sctx->job.result_digest[5] = SHA256_H5;
363 sctx->job.result_digest[6] = SHA256_H6;
364 sctx->job.result_digest[7] = SHA256_H7;
365 sctx->total_length = 0;
366 sctx->partial_block_buffer_length = 0;
367 sctx->status = HASH_CTX_STS_IDLE;
368
369 return 0;
370}
371
372static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
373{
374 int i;
375 struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
376 __be32 *dst = (__be32 *) rctx->out;
377
378 for (i = 0; i < 8; ++i)
379 dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
380
381 return 0;
382}
383
384static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
385 struct mcryptd_alg_cstate *cstate, bool flush)
386{
387 int flag = HASH_UPDATE;
388 int nbytes, err = 0;
389 struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
390 struct sha256_hash_ctx *sha_ctx;
391
392 /* more work ? */
393 while (!(rctx->flag & HASH_DONE)) {
394 nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
395 if (nbytes < 0) {
396 err = nbytes;
397 goto out;
398 }
399 /* check if the walk is done */
400 if (crypto_ahash_walk_last(&rctx->walk)) {
401 rctx->flag |= HASH_DONE;
402 if (rctx->flag & HASH_FINAL)
403 flag |= HASH_LAST;
404
405 }
406 sha_ctx = (struct sha256_hash_ctx *)
407 ahash_request_ctx(&rctx->areq);
408 kernel_fpu_begin();
409 sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
410 rctx->walk.data, nbytes, flag);
411 if (!sha_ctx) {
412 if (flush)
413 sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
414 }
415 kernel_fpu_end();
416 if (sha_ctx)
417 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
418 else {
419 rctx = NULL;
420 goto out;
421 }
422 }
423
424 /* copy the results */
425 if (rctx->flag & HASH_FINAL)
426 sha256_mb_set_results(rctx);
427
428out:
429 *ret_rctx = rctx;
430 return err;
431}
432
433static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
434 struct mcryptd_alg_cstate *cstate,
435 int err)
436{
437 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
438 struct sha256_hash_ctx *sha_ctx;
439 struct mcryptd_hash_request_ctx *req_ctx;
440 int ret;
441
442 /* remove from work list */
443 spin_lock(&cstate->work_lock);
444 list_del(&rctx->waiter);
445 spin_unlock(&cstate->work_lock);
446
447 if (irqs_disabled())
448 rctx->complete(&req->base, err);
449 else {
450 local_bh_disable();
451 rctx->complete(&req->base, err);
452 local_bh_enable();
453 }
454
455 /* check to see if there are other jobs that are done */
456 sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
457 while (sha_ctx) {
458 req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
459 ret = sha_finish_walk(&req_ctx, cstate, false);
460 if (req_ctx) {
461 spin_lock(&cstate->work_lock);
462 list_del(&req_ctx->waiter);
463 spin_unlock(&cstate->work_lock);
464
465 req = cast_mcryptd_ctx_to_req(req_ctx);
466 if (irqs_disabled())
467 req_ctx->complete(&req->base, ret);
468 else {
469 local_bh_disable();
470 req_ctx->complete(&req->base, ret);
471 local_bh_enable();
472 }
473 }
474 sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
475 }
476
477 return 0;
478}
479
480static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
481 struct mcryptd_alg_cstate *cstate)
482{
483 unsigned long next_flush;
484 unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
485
486 /* initialize tag */
487 rctx->tag.arrival = jiffies; /* tag the arrival time */
488 rctx->tag.seq_num = cstate->next_seq_num++;
489 next_flush = rctx->tag.arrival + delay;
490 rctx->tag.expire = next_flush;
491
492 spin_lock(&cstate->work_lock);
493 list_add_tail(&rctx->waiter, &cstate->work_list);
494 spin_unlock(&cstate->work_lock);
495
496 mcryptd_arm_flusher(cstate, delay);
497}
498
499static int sha256_mb_update(struct ahash_request *areq)
500{
501 struct mcryptd_hash_request_ctx *rctx =
502 container_of(areq, struct mcryptd_hash_request_ctx, areq);
503 struct mcryptd_alg_cstate *cstate =
504 this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
505
506 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
507 struct sha256_hash_ctx *sha_ctx;
508 int ret = 0, nbytes;
509
510 /* sanity check */
511 if (rctx->tag.cpu != smp_processor_id()) {
512 pr_err("mcryptd error: cpu clash\n");
513 goto done;
514 }
515
516 /* need to init context */
517 req_ctx_init(rctx, areq);
518
519 nbytes = crypto_ahash_walk_first(req, &rctx->walk);
520
521 if (nbytes < 0) {
522 ret = nbytes;
523 goto done;
524 }
525
526 if (crypto_ahash_walk_last(&rctx->walk))
527 rctx->flag |= HASH_DONE;
528
529 /* submit */
530 sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
531 sha256_mb_add_list(rctx, cstate);
532 kernel_fpu_begin();
533 sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
534 nbytes, HASH_UPDATE);
535 kernel_fpu_end();
536
537 /* check if anything is returned */
538 if (!sha_ctx)
539 return -EINPROGRESS;
540
541 if (sha_ctx->error) {
542 ret = sha_ctx->error;
543 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
544 goto done;
545 }
546
547 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
548 ret = sha_finish_walk(&rctx, cstate, false);
549
550 if (!rctx)
551 return -EINPROGRESS;
552done:
553 sha_complete_job(rctx, cstate, ret);
554 return ret;
555}
556
557static int sha256_mb_finup(struct ahash_request *areq)
558{
559 struct mcryptd_hash_request_ctx *rctx =
560 container_of(areq, struct mcryptd_hash_request_ctx, areq);
561 struct mcryptd_alg_cstate *cstate =
562 this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
563
564 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
565 struct sha256_hash_ctx *sha_ctx;
566 int ret = 0, flag = HASH_UPDATE, nbytes;
567
568 /* sanity check */
569 if (rctx->tag.cpu != smp_processor_id()) {
570 pr_err("mcryptd error: cpu clash\n");
571 goto done;
572 }
573
574 /* need to init context */
575 req_ctx_init(rctx, areq);
576
577 nbytes = crypto_ahash_walk_first(req, &rctx->walk);
578
579 if (nbytes < 0) {
580 ret = nbytes;
581 goto done;
582 }
583
584 if (crypto_ahash_walk_last(&rctx->walk)) {
585 rctx->flag |= HASH_DONE;
586 flag = HASH_LAST;
587 }
588
589 /* submit */
590 rctx->flag |= HASH_FINAL;
591 sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
592 sha256_mb_add_list(rctx, cstate);
593
594 kernel_fpu_begin();
595 sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
596 nbytes, flag);
597 kernel_fpu_end();
598
599 /* check if anything is returned */
600 if (!sha_ctx)
601 return -EINPROGRESS;
602
603 if (sha_ctx->error) {
604 ret = sha_ctx->error;
605 goto done;
606 }
607
608 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
609 ret = sha_finish_walk(&rctx, cstate, false);
610 if (!rctx)
611 return -EINPROGRESS;
612done:
613 sha_complete_job(rctx, cstate, ret);
614 return ret;
615}
616
617static int sha256_mb_final(struct ahash_request *areq)
618{
619 struct mcryptd_hash_request_ctx *rctx =
620 container_of(areq, struct mcryptd_hash_request_ctx,
621 areq);
622 struct mcryptd_alg_cstate *cstate =
623 this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
624
625 struct sha256_hash_ctx *sha_ctx;
626 int ret = 0;
627 u8 data;
628
629 /* sanity check */
630 if (rctx->tag.cpu != smp_processor_id()) {
631 pr_err("mcryptd error: cpu clash\n");
632 goto done;
633 }
634
635 /* need to init context */
636 req_ctx_init(rctx, areq);
637
638 rctx->flag |= HASH_DONE | HASH_FINAL;
639
640 sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
641 /* flag HASH_FINAL and 0 data size */
642 sha256_mb_add_list(rctx, cstate);
643 kernel_fpu_begin();
644 sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
645 HASH_LAST);
646 kernel_fpu_end();
647
648 /* check if anything is returned */
649 if (!sha_ctx)
650 return -EINPROGRESS;
651
652 if (sha_ctx->error) {
653 ret = sha_ctx->error;
654 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
655 goto done;
656 }
657
658 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
659 ret = sha_finish_walk(&rctx, cstate, false);
660 if (!rctx)
661 return -EINPROGRESS;
662done:
663 sha_complete_job(rctx, cstate, ret);
664 return ret;
665}
666
667static int sha256_mb_export(struct ahash_request *areq, void *out)
668{
669 struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
670
671 memcpy(out, sctx, sizeof(*sctx));
672
673 return 0;
674}
675
676static int sha256_mb_import(struct ahash_request *areq, const void *in)
677{
678 struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
679
680 memcpy(sctx, in, sizeof(*sctx));
681
682 return 0;
683}
684
685static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
686{
687 struct mcryptd_ahash *mcryptd_tfm;
688 struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
689 struct mcryptd_hash_ctx *mctx;
690
691 mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
692 CRYPTO_ALG_INTERNAL,
693 CRYPTO_ALG_INTERNAL);
694 if (IS_ERR(mcryptd_tfm))
695 return PTR_ERR(mcryptd_tfm);
696 mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
697 mctx->alg_state = &sha256_mb_alg_state;
698 ctx->mcryptd_tfm = mcryptd_tfm;
699 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
700 sizeof(struct ahash_request) +
701 crypto_ahash_reqsize(&mcryptd_tfm->base));
702
703 return 0;
704}
705
706static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
707{
708 struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
709
710 mcryptd_free_ahash(ctx->mcryptd_tfm);
711}
712
713static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
714{
715 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
716 sizeof(struct ahash_request) +
717 sizeof(struct sha256_hash_ctx));
718
719 return 0;
720}
721
722static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
723{
724 struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
725
726 mcryptd_free_ahash(ctx->mcryptd_tfm);
727}
728
729static struct ahash_alg sha256_mb_areq_alg = {
730 .init = sha256_mb_init,
731 .update = sha256_mb_update,
732 .final = sha256_mb_final,
733 .finup = sha256_mb_finup,
734 .export = sha256_mb_export,
735 .import = sha256_mb_import,
736 .halg = {
737 .digestsize = SHA256_DIGEST_SIZE,
738 .statesize = sizeof(struct sha256_hash_ctx),
739 .base = {
740 .cra_name = "__sha256-mb",
741 .cra_driver_name = "__intel_sha256-mb",
742 .cra_priority = 100,
743 /*
744 * use ASYNC flag as some buffers in multi-buffer
745 * algo may not have completed before hashing thread
746 * sleep
747 */
748 .cra_flags = CRYPTO_ALG_ASYNC |
749 CRYPTO_ALG_INTERNAL,
750 .cra_blocksize = SHA256_BLOCK_SIZE,
751 .cra_module = THIS_MODULE,
752 .cra_list = LIST_HEAD_INIT
753 (sha256_mb_areq_alg.halg.base.cra_list),
754 .cra_init = sha256_mb_areq_init_tfm,
755 .cra_exit = sha256_mb_areq_exit_tfm,
756 .cra_ctxsize = sizeof(struct sha256_hash_ctx),
757 }
758 }
759};
760
761static int sha256_mb_async_init(struct ahash_request *req)
762{
763 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
764 struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
765 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
766 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
767
768 memcpy(mcryptd_req, req, sizeof(*req));
769 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
770 return crypto_ahash_init(mcryptd_req);
771}
772
773static int sha256_mb_async_update(struct ahash_request *req)
774{
775 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
776
777 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
778 struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
779 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
780
781 memcpy(mcryptd_req, req, sizeof(*req));
782 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
783 return crypto_ahash_update(mcryptd_req);
784}
785
786static int sha256_mb_async_finup(struct ahash_request *req)
787{
788 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
789
790 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
791 struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
792 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
793
794 memcpy(mcryptd_req, req, sizeof(*req));
795 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
796 return crypto_ahash_finup(mcryptd_req);
797}
798
799static int sha256_mb_async_final(struct ahash_request *req)
800{
801 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
802
803 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
804 struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
805 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
806
807 memcpy(mcryptd_req, req, sizeof(*req));
808 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
809 return crypto_ahash_final(mcryptd_req);
810}
811
812static int sha256_mb_async_digest(struct ahash_request *req)
813{
814 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
815 struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
816 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
817 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
818
819 memcpy(mcryptd_req, req, sizeof(*req));
820 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
821 return crypto_ahash_digest(mcryptd_req);
822}
823
824static int sha256_mb_async_export(struct ahash_request *req, void *out)
825{
826 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
827 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
828 struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
829 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
830
831 memcpy(mcryptd_req, req, sizeof(*req));
832 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
833 return crypto_ahash_export(mcryptd_req, out);
834}
835
836static int sha256_mb_async_import(struct ahash_request *req, const void *in)
837{
838 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
839 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
840 struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
841 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
842 struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
843 struct mcryptd_hash_request_ctx *rctx;
844 struct ahash_request *areq;
845
846 memcpy(mcryptd_req, req, sizeof(*req));
847 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
848 rctx = ahash_request_ctx(mcryptd_req);
849 areq = &rctx->areq;
850
851 ahash_request_set_tfm(areq, child);
852 ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
853 rctx->complete, req);
854
855 return crypto_ahash_import(mcryptd_req, in);
856}
857
858static struct ahash_alg sha256_mb_async_alg = {
859 .init = sha256_mb_async_init,
860 .update = sha256_mb_async_update,
861 .final = sha256_mb_async_final,
862 .finup = sha256_mb_async_finup,
863 .export = sha256_mb_async_export,
864 .import = sha256_mb_async_import,
865 .digest = sha256_mb_async_digest,
866 .halg = {
867 .digestsize = SHA256_DIGEST_SIZE,
868 .statesize = sizeof(struct sha256_hash_ctx),
869 .base = {
870 .cra_name = "sha256",
871 .cra_driver_name = "sha256_mb",
872 /*
873 * Low priority, since with few concurrent hash requests
874 * this is extremely slow due to the flush delay. Users
875 * whose workloads would benefit from this can request
876 * it explicitly by driver name, or can increase its
877 * priority at runtime using NETLINK_CRYPTO.
878 */
879 .cra_priority = 50,
880 .cra_flags = CRYPTO_ALG_ASYNC,
881 .cra_blocksize = SHA256_BLOCK_SIZE,
882 .cra_module = THIS_MODULE,
883 .cra_list = LIST_HEAD_INIT
884 (sha256_mb_async_alg.halg.base.cra_list),
885 .cra_init = sha256_mb_async_init_tfm,
886 .cra_exit = sha256_mb_async_exit_tfm,
887 .cra_ctxsize = sizeof(struct sha256_mb_ctx),
888 .cra_alignmask = 0,
889 },
890 },
891};
892
893static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
894{
895 struct mcryptd_hash_request_ctx *rctx;
896 unsigned long cur_time;
897 unsigned long next_flush = 0;
898 struct sha256_hash_ctx *sha_ctx;
899
900
901 cur_time = jiffies;
902
903 while (!list_empty(&cstate->work_list)) {
904 rctx = list_entry(cstate->work_list.next,
905 struct mcryptd_hash_request_ctx, waiter);
906 if (time_before(cur_time, rctx->tag.expire))
907 break;
908 kernel_fpu_begin();
909 sha_ctx = (struct sha256_hash_ctx *)
910 sha256_ctx_mgr_flush(cstate->mgr);
911 kernel_fpu_end();
912 if (!sha_ctx) {
913 pr_err("sha256_mb error: nothing got"
914 " flushed for non-empty list\n");
915 break;
916 }
917 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
918 sha_finish_walk(&rctx, cstate, true);
919 sha_complete_job(rctx, cstate, 0);
920 }
921
922 if (!list_empty(&cstate->work_list)) {
923 rctx = list_entry(cstate->work_list.next,
924 struct mcryptd_hash_request_ctx, waiter);
925 /* get the hash context and then flush time */
926 next_flush = rctx->tag.expire;
927 mcryptd_arm_flusher(cstate, get_delay(next_flush));
928 }
929 return next_flush;
930}
931
932static int __init sha256_mb_mod_init(void)
933{
934
935 int cpu;
936 int err;
937 struct mcryptd_alg_cstate *cpu_state;
938
939 /* check for dependent cpu features */
940 if (!boot_cpu_has(X86_FEATURE_AVX2) ||
941 !boot_cpu_has(X86_FEATURE_BMI2))
942 return -ENODEV;
943
944 /* initialize multibuffer structures */
945 sha256_mb_alg_state.alg_cstate = alloc_percpu
946 (struct mcryptd_alg_cstate);
947
948 sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
949 sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
950 sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
951 sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
952
953 if (!sha256_mb_alg_state.alg_cstate)
954 return -ENOMEM;
955 for_each_possible_cpu(cpu) {
956 cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
957 cpu_state->next_flush = 0;
958 cpu_state->next_seq_num = 0;
959 cpu_state->flusher_engaged = false;
960 INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
961 cpu_state->cpu = cpu;
962 cpu_state->alg_state = &sha256_mb_alg_state;
963 cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
964 GFP_KERNEL);
965 if (!cpu_state->mgr)
966 goto err2;
967 sha256_ctx_mgr_init(cpu_state->mgr);
968 INIT_LIST_HEAD(&cpu_state->work_list);
969 spin_lock_init(&cpu_state->work_lock);
970 }
971 sha256_mb_alg_state.flusher = &sha256_mb_flusher;
972
973 err = crypto_register_ahash(&sha256_mb_areq_alg);
974 if (err)
975 goto err2;
976 err = crypto_register_ahash(&sha256_mb_async_alg);
977 if (err)
978 goto err1;
979
980
981 return 0;
982err1:
983 crypto_unregister_ahash(&sha256_mb_areq_alg);
984err2:
985 for_each_possible_cpu(cpu) {
986 cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
987 kfree(cpu_state->mgr);
988 }
989 free_percpu(sha256_mb_alg_state.alg_cstate);
990 return -ENODEV;
991}
992
993static void __exit sha256_mb_mod_fini(void)
994{
995 int cpu;
996 struct mcryptd_alg_cstate *cpu_state;
997
998 crypto_unregister_ahash(&sha256_mb_async_alg);
999 crypto_unregister_ahash(&sha256_mb_areq_alg);
1000 for_each_possible_cpu(cpu) {
1001 cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
1002 kfree(cpu_state->mgr);
1003 }
1004 free_percpu(sha256_mb_alg_state.alg_cstate);
1005}
1006
1007module_init(sha256_mb_mod_init);
1008module_exit(sha256_mb_mod_fini);
1009
1010MODULE_LICENSE("GPL");
1011MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
1012
1013MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
deleted file mode 100644
index 7c432543dc7f..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * Header file for multi buffer SHA256 context
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#ifndef _SHA_MB_CTX_INTERNAL_H
55#define _SHA_MB_CTX_INTERNAL_H
56
57#include "sha256_mb_mgr.h"
58
59#define HASH_UPDATE 0x00
60#define HASH_LAST 0x01
61#define HASH_DONE 0x02
62#define HASH_FINAL 0x04
63
64#define HASH_CTX_STS_IDLE 0x00
65#define HASH_CTX_STS_PROCESSING 0x01
66#define HASH_CTX_STS_LAST 0x02
67#define HASH_CTX_STS_COMPLETE 0x04
68
69enum hash_ctx_error {
70 HASH_CTX_ERROR_NONE = 0,
71 HASH_CTX_ERROR_INVALID_FLAGS = -1,
72 HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
73 HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
74
75#ifdef HASH_CTX_DEBUG
76 HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
77#endif
78};
79
80
81#define hash_ctx_user_data(ctx) ((ctx)->user_data)
82#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
83#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
84#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
85#define hash_ctx_status(ctx) ((ctx)->status)
86#define hash_ctx_error(ctx) ((ctx)->error)
87#define hash_ctx_init(ctx) \
88 do { \
89 (ctx)->error = HASH_CTX_ERROR_NONE; \
90 (ctx)->status = HASH_CTX_STS_COMPLETE; \
91 } while (0)
92
93
94/* Hash Constants and Typedefs */
95#define SHA256_DIGEST_LENGTH 8
96#define SHA256_LOG2_BLOCK_SIZE 6
97
98#define SHA256_PADLENGTHFIELD_SIZE 8
99
100#ifdef SHA_MB_DEBUG
101#define assert(expr) \
102do { \
103 if (unlikely(!(expr))) { \
104 printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
105 #expr, __FILE__, __func__, __LINE__); \
106 } \
107} while (0)
108#else
109#define assert(expr) do {} while (0)
110#endif
111
112struct sha256_ctx_mgr {
113 struct sha256_mb_mgr mgr;
114};
115
116/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
117
118struct sha256_hash_ctx {
119 /* Must be at struct offset 0 */
120 struct job_sha256 job;
121 /* status flag */
122 int status;
123 /* error flag */
124 int error;
125
126 uint64_t total_length;
127 const void *incoming_buffer;
128 uint32_t incoming_buffer_length;
129 uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2];
130 uint32_t partial_block_buffer_length;
131 void *user_data;
132};
133
134#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
deleted file mode 100644
index b01ae408c56d..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
+++ /dev/null
@@ -1,108 +0,0 @@
1/*
2 * Header file for multi buffer SHA256 algorithm manager
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53#ifndef __SHA_MB_MGR_H
54#define __SHA_MB_MGR_H
55
56#include <linux/types.h>
57
58#define NUM_SHA256_DIGEST_WORDS 8
59
60enum job_sts { STS_UNKNOWN = 0,
61 STS_BEING_PROCESSED = 1,
62 STS_COMPLETED = 2,
63 STS_INTERNAL_ERROR = 3,
64 STS_ERROR = 4
65};
66
67struct job_sha256 {
68 u8 *buffer;
69 u32 len;
70 u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
71 enum job_sts status;
72 void *user_data;
73};
74
75/* SHA256 out-of-order scheduler */
76
77/* typedef uint32_t sha8_digest_array[8][8]; */
78
79struct sha256_args_x8 {
80 uint32_t digest[8][8];
81 uint8_t *data_ptr[8];
82};
83
84struct sha256_lane_data {
85 struct job_sha256 *job_in_lane;
86};
87
88struct sha256_mb_mgr {
89 struct sha256_args_x8 args;
90
91 uint32_t lens[8];
92
93 /* each byte is index (0...7) of unused lanes */
94 uint64_t unused_lanes;
95 /* byte 4 is set to FF as a flag */
96 struct sha256_lane_data ldata[8];
97};
98
99
100#define SHA256_MB_MGR_NUM_LANES_AVX2 8
101
102void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
103struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
104 struct job_sha256 *job);
105struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
106struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
107
108#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
deleted file mode 100644
index 5c377bac21d0..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,304 +0,0 @@
1/*
2 * Header file for multi buffer SHA256 algorithm data structure
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54# Macros for defining data structures
55
56# Usage example
57
58#START_FIELDS # JOB_AES
59### name size align
60#FIELD _plaintext, 8, 8 # pointer to plaintext
61#FIELD _ciphertext, 8, 8 # pointer to ciphertext
62#FIELD _IV, 16, 8 # IV
63#FIELD _keys, 8, 8 # pointer to keys
64#FIELD _len, 4, 4 # length in bytes
65#FIELD _status, 4, 4 # status enumeration
66#FIELD _user_data, 8, 8 # pointer to user data
67#UNION _union, size1, align1, \
68# size2, align2, \
69# size3, align3, \
70# ...
71#END_FIELDS
72#%assign _JOB_AES_size _FIELD_OFFSET
73#%assign _JOB_AES_align _STRUCT_ALIGN
74
75#########################################################################
76
77# Alternate "struc-like" syntax:
78# STRUCT job_aes2
79# RES_Q .plaintext, 1
80# RES_Q .ciphertext, 1
81# RES_DQ .IV, 1
82# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
83# RES_U .union, size1, align1, \
84# size2, align2, \
85# ...
86# ENDSTRUCT
87# # Following only needed if nesting
88# %assign job_aes2_size _FIELD_OFFSET
89# %assign job_aes2_align _STRUCT_ALIGN
90#
91# RES_* macros take a name, a count and an optional alignment.
92# The count in in terms of the base size of the macro, and the
93# default alignment is the base size.
94# The macros are:
95# Macro Base size
96# RES_B 1
97# RES_W 2
98# RES_D 4
99# RES_Q 8
100# RES_DQ 16
101# RES_Y 32
102# RES_Z 64
103#
104# RES_U defines a union. It's arguments are a name and two or more
105# pairs of "size, alignment"
106#
107# The two assigns are only needed if this structure is being nested
108# within another. Even if the assigns are not done, one can still use
109# STRUCT_NAME_size as the size of the structure.
110#
111# Note that for nesting, you still need to assign to STRUCT_NAME_size.
112#
113# The differences between this and using "struc" directly are that each
114# type is implicitly aligned to its natural length (although this can be
115# over-ridden with an explicit third parameter), and that the structure
116# is padded at the end to its overall alignment.
117#
118
119#########################################################################
120
121#ifndef _DATASTRUCT_ASM_
122#define _DATASTRUCT_ASM_
123
124#define SZ8 8*SHA256_DIGEST_WORD_SIZE
125#define ROUNDS 64*SZ8
126#define PTR_SZ 8
127#define SHA256_DIGEST_WORD_SIZE 4
128#define MAX_SHA256_LANES 8
129#define SHA256_DIGEST_WORDS 8
130#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
131#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
132#define SHA256_BLK_SZ 64
133
134# START_FIELDS
135.macro START_FIELDS
136 _FIELD_OFFSET = 0
137 _STRUCT_ALIGN = 0
138.endm
139
140# FIELD name size align
141.macro FIELD name size align
142 _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
143 \name = _FIELD_OFFSET
144 _FIELD_OFFSET = _FIELD_OFFSET + (\size)
145.if (\align > _STRUCT_ALIGN)
146 _STRUCT_ALIGN = \align
147.endif
148.endm
149
150# END_FIELDS
151.macro END_FIELDS
152 _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
153.endm
154
155########################################################################
156
157.macro STRUCT p1
158START_FIELDS
159.struc \p1
160.endm
161
162.macro ENDSTRUCT
163 tmp = _FIELD_OFFSET
164 END_FIELDS
165 tmp = (_FIELD_OFFSET - %%tmp)
166.if (tmp > 0)
167 .lcomm tmp
168.endif
169.endstruc
170.endm
171
172## RES_int name size align
173.macro RES_int p1 p2 p3
174 name = \p1
175 size = \p2
176 align = .\p3
177
178 _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
179.align align
180.lcomm name size
181 _FIELD_OFFSET = _FIELD_OFFSET + (size)
182.if (align > _STRUCT_ALIGN)
183 _STRUCT_ALIGN = align
184.endif
185.endm
186
187# macro RES_B name, size [, align]
188.macro RES_B _name, _size, _align=1
189RES_int _name _size _align
190.endm
191
192# macro RES_W name, size [, align]
193.macro RES_W _name, _size, _align=2
194RES_int _name 2*(_size) _align
195.endm
196
197# macro RES_D name, size [, align]
198.macro RES_D _name, _size, _align=4
199RES_int _name 4*(_size) _align
200.endm
201
202# macro RES_Q name, size [, align]
203.macro RES_Q _name, _size, _align=8
204RES_int _name 8*(_size) _align
205.endm
206
207# macro RES_DQ name, size [, align]
208.macro RES_DQ _name, _size, _align=16
209RES_int _name 16*(_size) _align
210.endm
211
212# macro RES_Y name, size [, align]
213.macro RES_Y _name, _size, _align=32
214RES_int _name 32*(_size) _align
215.endm
216
217# macro RES_Z name, size [, align]
218.macro RES_Z _name, _size, _align=64
219RES_int _name 64*(_size) _align
220.endm
221
222#endif
223
224
225########################################################################
226#### Define SHA256 Out Of Order Data Structures
227########################################################################
228
229START_FIELDS # LANE_DATA
230### name size align
231FIELD _job_in_lane, 8, 8 # pointer to job object
232END_FIELDS
233
234 _LANE_DATA_size = _FIELD_OFFSET
235 _LANE_DATA_align = _STRUCT_ALIGN
236
237########################################################################
238
239START_FIELDS # SHA256_ARGS_X4
240### name size align
241FIELD _digest, 4*8*8, 4 # transposed digest
242FIELD _data_ptr, 8*8, 8 # array of pointers to data
243END_FIELDS
244
245 _SHA256_ARGS_X4_size = _FIELD_OFFSET
246 _SHA256_ARGS_X4_align = _STRUCT_ALIGN
247 _SHA256_ARGS_X8_size = _FIELD_OFFSET
248 _SHA256_ARGS_X8_align = _STRUCT_ALIGN
249
250#######################################################################
251
252START_FIELDS # MB_MGR
253### name size align
254FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
255FIELD _lens, 4*8, 8
256FIELD _unused_lanes, 8, 8
257FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
258END_FIELDS
259
260 _MB_MGR_size = _FIELD_OFFSET
261 _MB_MGR_align = _STRUCT_ALIGN
262
263_args_digest = _args + _digest
264_args_data_ptr = _args + _data_ptr
265
266#######################################################################
267
268START_FIELDS #STACK_FRAME
269### name size align
270FIELD _data, 16*SZ8, 1 # transposed digest
271FIELD _digest, 8*SZ8, 1 # array of pointers to data
272FIELD _ytmp, 4*SZ8, 1
273FIELD _rsp, 8, 1
274END_FIELDS
275
276 _STACK_FRAME_size = _FIELD_OFFSET
277 _STACK_FRAME_align = _STRUCT_ALIGN
278
279#######################################################################
280
281########################################################################
282#### Define constants
283########################################################################
284
285#define STS_UNKNOWN 0
286#define STS_BEING_PROCESSED 1
287#define STS_COMPLETED 2
288
289########################################################################
290#### Define JOB_SHA256 structure
291########################################################################
292
293START_FIELDS # JOB_SHA256
294
295### name size align
296FIELD _buffer, 8, 8 # pointer to buffer
297FIELD _len, 8, 8 # length in bytes
298FIELD _result_digest, 8*4, 32 # Digest (output)
299FIELD _status, 4, 4
300FIELD _user_data, 8, 8
301END_FIELDS
302
303 _JOB_SHA256_size = _FIELD_OFFSET
304 _JOB_SHA256_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
deleted file mode 100644
index d2364c55bbde..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,307 +0,0 @@
1/*
2 * Flush routine for SHA256 multibuffer
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53#include <linux/linkage.h>
54#include <asm/frame.h>
55#include "sha256_mb_mgr_datastruct.S"
56
57.extern sha256_x8_avx2
58
59#LINUX register definitions
60#define arg1 %rdi
61#define arg2 %rsi
62
63# Common register definitions
64#define state arg1
65#define job arg2
66#define len2 arg2
67
68# idx must be a register not clobberred by sha1_mult
69#define idx %r8
70#define DWORD_idx %r8d
71
72#define unused_lanes %rbx
73#define lane_data %rbx
74#define tmp2 %rbx
75#define tmp2_w %ebx
76
77#define job_rax %rax
78#define tmp1 %rax
79#define size_offset %rax
80#define tmp %rax
81#define start_offset %rax
82
83#define tmp3 %arg1
84
85#define extra_blocks %arg2
86#define p %arg2
87
88.macro LABEL prefix n
89\prefix\n\():
90.endm
91
92.macro JNE_SKIP i
93jne skip_\i
94.endm
95
96.altmacro
97.macro SET_OFFSET _offset
98offset = \_offset
99.endm
100.noaltmacro
101
102# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
103# arg 1 : rcx : state
104ENTRY(sha256_mb_mgr_flush_avx2)
105 FRAME_BEGIN
106 push %rbx
107
108 # If bit (32+3) is set, then all lanes are empty
109 mov _unused_lanes(state), unused_lanes
110 bt $32+3, unused_lanes
111 jc return_null
112
113 # find a lane with a non-null job
114 xor idx, idx
115 offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
116 cmpq $0, offset(state)
117 cmovne one(%rip), idx
118 offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
119 cmpq $0, offset(state)
120 cmovne two(%rip), idx
121 offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
122 cmpq $0, offset(state)
123 cmovne three(%rip), idx
124 offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
125 cmpq $0, offset(state)
126 cmovne four(%rip), idx
127 offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
128 cmpq $0, offset(state)
129 cmovne five(%rip), idx
130 offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
131 cmpq $0, offset(state)
132 cmovne six(%rip), idx
133 offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
134 cmpq $0, offset(state)
135 cmovne seven(%rip), idx
136
137 # copy idx to empty lanes
138copy_lane_data:
139 offset = (_args + _data_ptr)
140 mov offset(state,idx,8), tmp
141
142 I = 0
143.rep 8
144 offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
145 cmpq $0, offset(state)
146.altmacro
147 JNE_SKIP %I
148 offset = (_args + _data_ptr + 8*I)
149 mov tmp, offset(state)
150 offset = (_lens + 4*I)
151 movl $0xFFFFFFFF, offset(state)
152LABEL skip_ %I
153 I = (I+1)
154.noaltmacro
155.endr
156
157 # Find min length
158 vmovdqu _lens+0*16(state), %xmm0
159 vmovdqu _lens+1*16(state), %xmm1
160
161 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
162 vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
163 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
164 vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
165 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
166
167 vmovd %xmm2, DWORD_idx
168 mov idx, len2
169 and $0xF, idx
170 shr $4, len2
171 jz len_is_0
172
173 vpand clear_low_nibble(%rip), %xmm2, %xmm2
174 vpshufd $0, %xmm2, %xmm2
175
176 vpsubd %xmm2, %xmm0, %xmm0
177 vpsubd %xmm2, %xmm1, %xmm1
178
179 vmovdqu %xmm0, _lens+0*16(state)
180 vmovdqu %xmm1, _lens+1*16(state)
181
182 # "state" and "args" are the same address, arg1
183 # len is arg2
184 call sha256_x8_avx2
185 # state and idx are intact
186
187len_is_0:
188 # process completed job "idx"
189 imul $_LANE_DATA_size, idx, lane_data
190 lea _ldata(state, lane_data), lane_data
191
192 mov _job_in_lane(lane_data), job_rax
193 movq $0, _job_in_lane(lane_data)
194 movl $STS_COMPLETED, _status(job_rax)
195 mov _unused_lanes(state), unused_lanes
196 shl $4, unused_lanes
197 or idx, unused_lanes
198
199 mov unused_lanes, _unused_lanes(state)
200 movl $0xFFFFFFFF, _lens(state,idx,4)
201
202 vmovd _args_digest(state , idx, 4) , %xmm0
203 vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
204 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
205 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
206 vmovd _args_digest+4*32(state, idx, 4), %xmm1
207 vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
208 vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
209 vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
210
211 vmovdqu %xmm0, _result_digest(job_rax)
212 offset = (_result_digest + 1*16)
213 vmovdqu %xmm1, offset(job_rax)
214
215return:
216 pop %rbx
217 FRAME_END
218 ret
219
220return_null:
221 xor job_rax, job_rax
222 jmp return
223ENDPROC(sha256_mb_mgr_flush_avx2)
224
225##############################################################################
226
227.align 16
228ENTRY(sha256_mb_mgr_get_comp_job_avx2)
229 push %rbx
230
231 ## if bit 32+3 is set, then all lanes are empty
232 mov _unused_lanes(state), unused_lanes
233 bt $(32+3), unused_lanes
234 jc .return_null
235
236 # Find min length
237 vmovdqu _lens(state), %xmm0
238 vmovdqu _lens+1*16(state), %xmm1
239
240 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
241 vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
242 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
243 vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
244 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
245
246 vmovd %xmm2, DWORD_idx
247 test $~0xF, idx
248 jnz .return_null
249
250 # process completed job "idx"
251 imul $_LANE_DATA_size, idx, lane_data
252 lea _ldata(state, lane_data), lane_data
253
254 mov _job_in_lane(lane_data), job_rax
255 movq $0, _job_in_lane(lane_data)
256 movl $STS_COMPLETED, _status(job_rax)
257 mov _unused_lanes(state), unused_lanes
258 shl $4, unused_lanes
259 or idx, unused_lanes
260 mov unused_lanes, _unused_lanes(state)
261
262 movl $0xFFFFFFFF, _lens(state, idx, 4)
263
264 vmovd _args_digest(state, idx, 4), %xmm0
265 vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
266 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
267 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
268 vmovd _args_digest+4*32(state, idx, 4), %xmm1
269 vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
270 vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
271 vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
272
273 vmovdqu %xmm0, _result_digest(job_rax)
274 offset = (_result_digest + 1*16)
275 vmovdqu %xmm1, offset(job_rax)
276
277 pop %rbx
278
279 ret
280
281.return_null:
282 xor job_rax, job_rax
283 pop %rbx
284 ret
285ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
286
287.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
288.align 16
289clear_low_nibble:
290.octa 0x000000000000000000000000FFFFFFF0
291
292.section .rodata.cst8, "aM", @progbits, 8
293.align 8
294one:
295.quad 1
296two:
297.quad 2
298three:
299.quad 3
300four:
301.quad 4
302five:
303.quad 5
304six:
305.quad 6
306seven:
307.quad 7
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
deleted file mode 100644
index b0c498371e67..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,65 +0,0 @@
1/*
2 * Initialization code for multi buffer SHA256 algorithm for AVX2
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include "sha256_mb_mgr.h"
55
56void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
57{
58 unsigned int j;
59
60 state->unused_lanes = 0xF76543210ULL;
61 for (j = 0; j < 8; j++) {
62 state->lens[j] = 0xFFFFFFFF;
63 state->ldata[j].job_in_lane = NULL;
64 }
65}
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
deleted file mode 100644
index b36ae7454084..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,214 +0,0 @@
1/*
2 * Buffer submit code for multi buffer SHA256 algorithm
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include <linux/linkage.h>
55#include <asm/frame.h>
56#include "sha256_mb_mgr_datastruct.S"
57
58.extern sha256_x8_avx2
59
60# LINUX register definitions
61arg1 = %rdi
62arg2 = %rsi
63size_offset = %rcx
64tmp2 = %rcx
65extra_blocks = %rdx
66
67# Common definitions
68#define state arg1
69#define job %rsi
70#define len2 arg2
71#define p2 arg2
72
73# idx must be a register not clobberred by sha1_x8_avx2
74idx = %r8
75DWORD_idx = %r8d
76last_len = %r8
77
78p = %r11
79start_offset = %r11
80
81unused_lanes = %rbx
82BYTE_unused_lanes = %bl
83
84job_rax = %rax
85len = %rax
86DWORD_len = %eax
87
88lane = %r12
89tmp3 = %r12
90
91tmp = %r9
92DWORD_tmp = %r9d
93
94lane_data = %r10
95
96# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
97# arg 1 : rcx : state
98# arg 2 : rdx : job
99ENTRY(sha256_mb_mgr_submit_avx2)
100 FRAME_BEGIN
101 push %rbx
102 push %r12
103
104 mov _unused_lanes(state), unused_lanes
105 mov unused_lanes, lane
106 and $0xF, lane
107 shr $4, unused_lanes
108 imul $_LANE_DATA_size, lane, lane_data
109 movl $STS_BEING_PROCESSED, _status(job)
110 lea _ldata(state, lane_data), lane_data
111 mov unused_lanes, _unused_lanes(state)
112 movl _len(job), DWORD_len
113
114 mov job, _job_in_lane(lane_data)
115 shl $4, len
116 or lane, len
117
118 movl DWORD_len, _lens(state , lane, 4)
119
120 # Load digest words from result_digest
121 vmovdqu _result_digest(job), %xmm0
122 vmovdqu _result_digest+1*16(job), %xmm1
123 vmovd %xmm0, _args_digest(state, lane, 4)
124 vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
125 vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
126 vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
127 vmovd %xmm1, _args_digest+4*32(state , lane, 4)
128
129 vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4)
130 vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4)
131 vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4)
132
133 mov _buffer(job), p
134 mov p, _args_data_ptr(state, lane, 8)
135
136 cmp $0xF, unused_lanes
137 jne return_null
138
139start_loop:
140 # Find min length
141 vmovdqa _lens(state), %xmm0
142 vmovdqa _lens+1*16(state), %xmm1
143
144 vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
145 vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
146 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
147 vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
148 vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
149
150 vmovd %xmm2, DWORD_idx
151 mov idx, len2
152 and $0xF, idx
153 shr $4, len2
154 jz len_is_0
155
156 vpand clear_low_nibble(%rip), %xmm2, %xmm2
157 vpshufd $0, %xmm2, %xmm2
158
159 vpsubd %xmm2, %xmm0, %xmm0
160 vpsubd %xmm2, %xmm1, %xmm1
161
162 vmovdqa %xmm0, _lens + 0*16(state)
163 vmovdqa %xmm1, _lens + 1*16(state)
164
165 # "state" and "args" are the same address, arg1
166 # len is arg2
167 call sha256_x8_avx2
168
169 # state and idx are intact
170
171len_is_0:
172 # process completed job "idx"
173 imul $_LANE_DATA_size, idx, lane_data
174 lea _ldata(state, lane_data), lane_data
175
176 mov _job_in_lane(lane_data), job_rax
177 mov _unused_lanes(state), unused_lanes
178 movq $0, _job_in_lane(lane_data)
179 movl $STS_COMPLETED, _status(job_rax)
180 shl $4, unused_lanes
181 or idx, unused_lanes
182 mov unused_lanes, _unused_lanes(state)
183
184 movl $0xFFFFFFFF, _lens(state,idx,4)
185
186 vmovd _args_digest(state, idx, 4), %xmm0
187 vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
188 vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
189 vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
190 vmovd _args_digest+4*32(state, idx, 4), %xmm1
191
192 vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
193 vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
194 vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
195
196 vmovdqu %xmm0, _result_digest(job_rax)
197 vmovdqu %xmm1, _result_digest+1*16(job_rax)
198
199return:
200 pop %r12
201 pop %rbx
202 FRAME_END
203 ret
204
205return_null:
206 xor job_rax, job_rax
207 jmp return
208
209ENDPROC(sha256_mb_mgr_submit_avx2)
210
211.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
212.align 16
213clear_low_nibble:
214 .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
deleted file mode 100644
index 1687c80c5995..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
+++ /dev/null
@@ -1,598 +0,0 @@
1/*
2 * Multi-buffer SHA256 algorithm hash compute routine
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include <linux/linkage.h>
55#include "sha256_mb_mgr_datastruct.S"
56
57## code to compute oct SHA256 using SSE-256
58## outer calling routine takes care of save and restore of XMM registers
59## Logic designed/laid out by JDG
60
61## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15
62## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
63## Linux preserves: rdi rbp r8
64##
65## clobbers %ymm0-15
66
67arg1 = %rdi
68arg2 = %rsi
69reg3 = %rcx
70reg4 = %rdx
71
72# Common definitions
73STATE = arg1
74INP_SIZE = arg2
75
76IDX = %rax
77ROUND = %rbx
78TBL = reg3
79
80inp0 = %r9
81inp1 = %r10
82inp2 = %r11
83inp3 = %r12
84inp4 = %r13
85inp5 = %r14
86inp6 = %r15
87inp7 = reg4
88
89a = %ymm0
90b = %ymm1
91c = %ymm2
92d = %ymm3
93e = %ymm4
94f = %ymm5
95g = %ymm6
96h = %ymm7
97
98T1 = %ymm8
99
100a0 = %ymm12
101a1 = %ymm13
102a2 = %ymm14
103TMP = %ymm15
104TMP0 = %ymm6
105TMP1 = %ymm7
106
107TT0 = %ymm8
108TT1 = %ymm9
109TT2 = %ymm10
110TT3 = %ymm11
111TT4 = %ymm12
112TT5 = %ymm13
113TT6 = %ymm14
114TT7 = %ymm15
115
116# Define stack usage
117
118# Assume stack aligned to 32 bytes before call
119# Therefore FRAMESZ mod 32 must be 32-8 = 24
120
121#define FRAMESZ 0x388
122
123#define VMOVPS vmovups
124
125# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
126# "transpose" data in {r0...r7} using temps {t0...t1}
127# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
128# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
129# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
130# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
131# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
132# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
133# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
134# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
135# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
136#
137# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
138# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
139# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
140# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
141# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
142# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
143# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
144# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
145# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
146#
147
148.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
149 # process top half (r0..r3) {a...d}
150 vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
151 vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
152 vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
153 vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
154 vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
155 vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
156 vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
157 vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
158
159 # use r2 in place of t0
160 # process bottom half (r4..r7) {e...h}
161 vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
162 vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
163 vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
164 vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
165 vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
166 vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
167 vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
168 vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
169
170 vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
171 vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
172 vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
173 vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
174 vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
175 vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
176 vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
177 vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
178
179.endm
180
181.macro ROTATE_ARGS
182TMP_ = h
183h = g
184g = f
185f = e
186e = d
187d = c
188c = b
189b = a
190a = TMP_
191.endm
192
193.macro _PRORD reg imm tmp
194 vpslld $(32-\imm),\reg,\tmp
195 vpsrld $\imm,\reg, \reg
196 vpor \tmp,\reg, \reg
197.endm
198
199# PRORD_nd reg, imm, tmp, src
200.macro _PRORD_nd reg imm tmp src
201 vpslld $(32-\imm), \src, \tmp
202 vpsrld $\imm, \src, \reg
203 vpor \tmp, \reg, \reg
204.endm
205
206# PRORD dst/src, amt
207.macro PRORD reg imm
208 _PRORD \reg,\imm,TMP
209.endm
210
211# PRORD_nd dst, src, amt
212.macro PRORD_nd reg tmp imm
213 _PRORD_nd \reg, \imm, TMP, \tmp
214.endm
215
216# arguments passed implicitly in preprocessor symbols i, a...h
217.macro ROUND_00_15 _T1 i
218 PRORD_nd a0,e,5 # sig1: a0 = (e >> 5)
219
220 vpxor g, f, a2 # ch: a2 = f^g
221 vpand e,a2, a2 # ch: a2 = (f^g)&e
222 vpxor g, a2, a2 # a2 = ch
223
224 PRORD_nd a1,e,25 # sig1: a1 = (e >> 25)
225
226 vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp)
227 vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
228 vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
229 PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11)
230 vpaddd a2, h, h # h = h + ch
231 PRORD_nd a2,a,11 # sig0: a2 = (a >> 11)
232 vpaddd \_T1,h, h # h = h + ch + W + K
233 vpxor a1, a0, a0 # a0 = sigma1
234 PRORD_nd a1,a,22 # sig0: a1 = (a >> 22)
235 vpxor c, a, \_T1 # maj: T1 = a^c
236 add $SZ8, ROUND # ROUND++
237 vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
238 vpaddd a0, h, h
239 vpaddd h, d, d
240 vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
241 PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13)
242 vpxor a1, a2, a2 # a2 = sig0
243 vpand c, a, a1 # maj: a1 = a&c
244 vpor \_T1, a1, a1 # a1 = maj
245 vpaddd a1, h, h # h = h + ch + W + K + maj
246 vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0
247 ROTATE_ARGS
248.endm
249
250# arguments passed implicitly in preprocessor symbols i, a...h
251.macro ROUND_16_XX _T1 i
252 vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1
253 vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1
254 vmovdqu \_T1, a0
255 PRORD \_T1,11
256 vmovdqu a1, a2
257 PRORD a1,2
258 vpxor a0, \_T1, \_T1
259 PRORD \_T1, 7
260 vpxor a2, a1, a1
261 PRORD a1, 17
262 vpsrld $3, a0, a0
263 vpxor a0, \_T1, \_T1
264 vpsrld $10, a2, a2
265 vpxor a2, a1, a1
266 vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
267 vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1
268 vpaddd a1, \_T1, \_T1
269
270 ROUND_00_15 \_T1,\i
271.endm
272
273# SHA256_ARGS:
274# UINT128 digest[8]; // transposed digests
275# UINT8 *data_ptr[4];
276
277# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
278# arg 1 : STATE : pointer to array of pointers to input data
279# arg 2 : INP_SIZE : size of input in blocks
280 # general registers preserved in outer calling routine
281 # outer calling routine saves all the XMM registers
282 # save rsp, allocate 32-byte aligned for local variables
283ENTRY(sha256_x8_avx2)
284
285 # save callee-saved clobbered registers to comply with C function ABI
286 push %r12
287 push %r13
288 push %r14
289 push %r15
290
291 mov %rsp, IDX
292 sub $FRAMESZ, %rsp
293 and $~0x1F, %rsp
294 mov IDX, _rsp(%rsp)
295
296 # Load the pre-transposed incoming digest.
297 vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a
298 vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b
299 vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c
300 vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d
301 vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e
302 vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f
303 vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g
304 vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h
305
306 lea K256_8(%rip),TBL
307
308 # load the address of each of the 4 message lanes
309 # getting ready to transpose input onto stack
310 mov _args_data_ptr+0*PTR_SZ(STATE),inp0
311 mov _args_data_ptr+1*PTR_SZ(STATE),inp1
312 mov _args_data_ptr+2*PTR_SZ(STATE),inp2
313 mov _args_data_ptr+3*PTR_SZ(STATE),inp3
314 mov _args_data_ptr+4*PTR_SZ(STATE),inp4
315 mov _args_data_ptr+5*PTR_SZ(STATE),inp5
316 mov _args_data_ptr+6*PTR_SZ(STATE),inp6
317 mov _args_data_ptr+7*PTR_SZ(STATE),inp7
318
319 xor IDX, IDX
320lloop:
321 xor ROUND, ROUND
322
323 # save old digest
324 vmovdqu a, _digest(%rsp)
325 vmovdqu b, _digest+1*SZ8(%rsp)
326 vmovdqu c, _digest+2*SZ8(%rsp)
327 vmovdqu d, _digest+3*SZ8(%rsp)
328 vmovdqu e, _digest+4*SZ8(%rsp)
329 vmovdqu f, _digest+5*SZ8(%rsp)
330 vmovdqu g, _digest+6*SZ8(%rsp)
331 vmovdqu h, _digest+7*SZ8(%rsp)
332 i = 0
333.rep 2
334 VMOVPS i*32(inp0, IDX), TT0
335 VMOVPS i*32(inp1, IDX), TT1
336 VMOVPS i*32(inp2, IDX), TT2
337 VMOVPS i*32(inp3, IDX), TT3
338 VMOVPS i*32(inp4, IDX), TT4
339 VMOVPS i*32(inp5, IDX), TT5
340 VMOVPS i*32(inp6, IDX), TT6
341 VMOVPS i*32(inp7, IDX), TT7
342 vmovdqu g, _ytmp(%rsp)
343 vmovdqu h, _ytmp+1*SZ8(%rsp)
344 TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1
345 vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
346 vmovdqu _ytmp(%rsp), g
347 vpshufb TMP1, TT0, TT0
348 vpshufb TMP1, TT1, TT1
349 vpshufb TMP1, TT2, TT2
350 vpshufb TMP1, TT3, TT3
351 vpshufb TMP1, TT4, TT4
352 vpshufb TMP1, TT5, TT5
353 vpshufb TMP1, TT6, TT6
354 vpshufb TMP1, TT7, TT7
355 vmovdqu _ytmp+1*SZ8(%rsp), h
356 vmovdqu TT4, _ytmp(%rsp)
357 vmovdqu TT5, _ytmp+1*SZ8(%rsp)
358 vmovdqu TT6, _ytmp+2*SZ8(%rsp)
359 vmovdqu TT7, _ytmp+3*SZ8(%rsp)
360 ROUND_00_15 TT0,(i*8+0)
361 vmovdqu _ytmp(%rsp), TT0
362 ROUND_00_15 TT1,(i*8+1)
363 vmovdqu _ytmp+1*SZ8(%rsp), TT1
364 ROUND_00_15 TT2,(i*8+2)
365 vmovdqu _ytmp+2*SZ8(%rsp), TT2
366 ROUND_00_15 TT3,(i*8+3)
367 vmovdqu _ytmp+3*SZ8(%rsp), TT3
368 ROUND_00_15 TT0,(i*8+4)
369 ROUND_00_15 TT1,(i*8+5)
370 ROUND_00_15 TT2,(i*8+6)
371 ROUND_00_15 TT3,(i*8+7)
372 i = (i+1)
373.endr
374 add $64, IDX
375 i = (i*8)
376
377 jmp Lrounds_16_xx
378.align 16
379Lrounds_16_xx:
380.rep 16
381 ROUND_16_XX T1, i
382 i = (i+1)
383.endr
384
385 cmp $ROUNDS,ROUND
386 jb Lrounds_16_xx
387
388 # add old digest
389 vpaddd _digest+0*SZ8(%rsp), a, a
390 vpaddd _digest+1*SZ8(%rsp), b, b
391 vpaddd _digest+2*SZ8(%rsp), c, c
392 vpaddd _digest+3*SZ8(%rsp), d, d
393 vpaddd _digest+4*SZ8(%rsp), e, e
394 vpaddd _digest+5*SZ8(%rsp), f, f
395 vpaddd _digest+6*SZ8(%rsp), g, g
396 vpaddd _digest+7*SZ8(%rsp), h, h
397
398 sub $1, INP_SIZE # unit is blocks
399 jne lloop
400
401 # write back to memory (state object) the transposed digest
402 vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
403 vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
404 vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
405 vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
406 vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
407 vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
408 vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
409 vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
410
411 # update input pointers
412 add IDX, inp0
413 mov inp0, _args_data_ptr+0*8(STATE)
414 add IDX, inp1
415 mov inp1, _args_data_ptr+1*8(STATE)
416 add IDX, inp2
417 mov inp2, _args_data_ptr+2*8(STATE)
418 add IDX, inp3
419 mov inp3, _args_data_ptr+3*8(STATE)
420 add IDX, inp4
421 mov inp4, _args_data_ptr+4*8(STATE)
422 add IDX, inp5
423 mov inp5, _args_data_ptr+5*8(STATE)
424 add IDX, inp6
425 mov inp6, _args_data_ptr+6*8(STATE)
426 add IDX, inp7
427 mov inp7, _args_data_ptr+7*8(STATE)
428
429 # Postamble
430 mov _rsp(%rsp), %rsp
431
432 # restore callee-saved clobbered registers
433 pop %r15
434 pop %r14
435 pop %r13
436 pop %r12
437
438 ret
439ENDPROC(sha256_x8_avx2)
440
441.section .rodata.K256_8, "a", @progbits
442.align 64
443K256_8:
444 .octa 0x428a2f98428a2f98428a2f98428a2f98
445 .octa 0x428a2f98428a2f98428a2f98428a2f98
446 .octa 0x71374491713744917137449171374491
447 .octa 0x71374491713744917137449171374491
448 .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
449 .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
450 .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
451 .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
452 .octa 0x3956c25b3956c25b3956c25b3956c25b
453 .octa 0x3956c25b3956c25b3956c25b3956c25b
454 .octa 0x59f111f159f111f159f111f159f111f1
455 .octa 0x59f111f159f111f159f111f159f111f1
456 .octa 0x923f82a4923f82a4923f82a4923f82a4
457 .octa 0x923f82a4923f82a4923f82a4923f82a4
458 .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
459 .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
460 .octa 0xd807aa98d807aa98d807aa98d807aa98
461 .octa 0xd807aa98d807aa98d807aa98d807aa98
462 .octa 0x12835b0112835b0112835b0112835b01
463 .octa 0x12835b0112835b0112835b0112835b01
464 .octa 0x243185be243185be243185be243185be
465 .octa 0x243185be243185be243185be243185be
466 .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
467 .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
468 .octa 0x72be5d7472be5d7472be5d7472be5d74
469 .octa 0x72be5d7472be5d7472be5d7472be5d74
470 .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
471 .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
472 .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
473 .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
474 .octa 0xc19bf174c19bf174c19bf174c19bf174
475 .octa 0xc19bf174c19bf174c19bf174c19bf174
476 .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
477 .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
478 .octa 0xefbe4786efbe4786efbe4786efbe4786
479 .octa 0xefbe4786efbe4786efbe4786efbe4786
480 .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
481 .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
482 .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
483 .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
484 .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
485 .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
486 .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
487 .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
488 .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
489 .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
490 .octa 0x76f988da76f988da76f988da76f988da
491 .octa 0x76f988da76f988da76f988da76f988da
492 .octa 0x983e5152983e5152983e5152983e5152
493 .octa 0x983e5152983e5152983e5152983e5152
494 .octa 0xa831c66da831c66da831c66da831c66d
495 .octa 0xa831c66da831c66da831c66da831c66d
496 .octa 0xb00327c8b00327c8b00327c8b00327c8
497 .octa 0xb00327c8b00327c8b00327c8b00327c8
498 .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
499 .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
500 .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
501 .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
502 .octa 0xd5a79147d5a79147d5a79147d5a79147
503 .octa 0xd5a79147d5a79147d5a79147d5a79147
504 .octa 0x06ca635106ca635106ca635106ca6351
505 .octa 0x06ca635106ca635106ca635106ca6351
506 .octa 0x14292967142929671429296714292967
507 .octa 0x14292967142929671429296714292967
508 .octa 0x27b70a8527b70a8527b70a8527b70a85
509 .octa 0x27b70a8527b70a8527b70a8527b70a85
510 .octa 0x2e1b21382e1b21382e1b21382e1b2138
511 .octa 0x2e1b21382e1b21382e1b21382e1b2138
512 .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
513 .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
514 .octa 0x53380d1353380d1353380d1353380d13
515 .octa 0x53380d1353380d1353380d1353380d13
516 .octa 0x650a7354650a7354650a7354650a7354
517 .octa 0x650a7354650a7354650a7354650a7354
518 .octa 0x766a0abb766a0abb766a0abb766a0abb
519 .octa 0x766a0abb766a0abb766a0abb766a0abb
520 .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
521 .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
522 .octa 0x92722c8592722c8592722c8592722c85
523 .octa 0x92722c8592722c8592722c8592722c85
524 .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
525 .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
526 .octa 0xa81a664ba81a664ba81a664ba81a664b
527 .octa 0xa81a664ba81a664ba81a664ba81a664b
528 .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
529 .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
530 .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
531 .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
532 .octa 0xd192e819d192e819d192e819d192e819
533 .octa 0xd192e819d192e819d192e819d192e819
534 .octa 0xd6990624d6990624d6990624d6990624
535 .octa 0xd6990624d6990624d6990624d6990624
536 .octa 0xf40e3585f40e3585f40e3585f40e3585
537 .octa 0xf40e3585f40e3585f40e3585f40e3585
538 .octa 0x106aa070106aa070106aa070106aa070
539 .octa 0x106aa070106aa070106aa070106aa070
540 .octa 0x19a4c11619a4c11619a4c11619a4c116
541 .octa 0x19a4c11619a4c11619a4c11619a4c116
542 .octa 0x1e376c081e376c081e376c081e376c08
543 .octa 0x1e376c081e376c081e376c081e376c08
544 .octa 0x2748774c2748774c2748774c2748774c
545 .octa 0x2748774c2748774c2748774c2748774c
546 .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
547 .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
548 .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
549 .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
550 .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
551 .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
552 .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
553 .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
554 .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
555 .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
556 .octa 0x748f82ee748f82ee748f82ee748f82ee
557 .octa 0x748f82ee748f82ee748f82ee748f82ee
558 .octa 0x78a5636f78a5636f78a5636f78a5636f
559 .octa 0x78a5636f78a5636f78a5636f78a5636f
560 .octa 0x84c8781484c8781484c8781484c87814
561 .octa 0x84c8781484c8781484c8781484c87814
562 .octa 0x8cc702088cc702088cc702088cc70208
563 .octa 0x8cc702088cc702088cc702088cc70208
564 .octa 0x90befffa90befffa90befffa90befffa
565 .octa 0x90befffa90befffa90befffa90befffa
566 .octa 0xa4506ceba4506ceba4506ceba4506ceb
567 .octa 0xa4506ceba4506ceba4506ceba4506ceb
568 .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
569 .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
570 .octa 0xc67178f2c67178f2c67178f2c67178f2
571 .octa 0xc67178f2c67178f2c67178f2c67178f2
572
573.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
574.align 32
575PSHUFFLE_BYTE_FLIP_MASK:
576.octa 0x0c0d0e0f08090a0b0405060700010203
577.octa 0x0c0d0e0f08090a0b0405060700010203
578
579.section .rodata.cst256.K256, "aM", @progbits, 256
580.align 64
581.global K256
582K256:
583 .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
584 .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
585 .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
586 .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
587 .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
588 .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
589 .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
590 .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
591 .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
592 .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
593 .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
594 .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
595 .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
596 .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
597 .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
598 .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
deleted file mode 100644
index 90f1ef69152e..000000000000
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Arch-specific CryptoAPI modules.
4#
5
6avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
7 $(comma)4)$(comma)%ymm2,yes,no)
8ifeq ($(avx2_supported),yes)
9 obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
10 sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
11 sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
12endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
deleted file mode 100644
index 26b85678012d..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ /dev/null
@@ -1,1047 +0,0 @@
1/*
2 * Multi buffer SHA512 algorithm Glue Code
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56#include <crypto/internal/hash.h>
57#include <linux/init.h>
58#include <linux/module.h>
59#include <linux/mm.h>
60#include <linux/cryptohash.h>
61#include <linux/types.h>
62#include <linux/list.h>
63#include <crypto/scatterwalk.h>
64#include <crypto/sha.h>
65#include <crypto/mcryptd.h>
66#include <crypto/crypto_wq.h>
67#include <asm/byteorder.h>
68#include <linux/hardirq.h>
69#include <asm/fpu/api.h>
70#include "sha512_mb_ctx.h"
71
72#define FLUSH_INTERVAL 1000 /* in usec */
73
74static struct mcryptd_alg_state sha512_mb_alg_state;
75
76struct sha512_mb_ctx {
77 struct mcryptd_ahash *mcryptd_tfm;
78};
79
80static inline struct mcryptd_hash_request_ctx
81 *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
82{
83 struct ahash_request *areq;
84
85 areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
86 return container_of(areq, struct mcryptd_hash_request_ctx, areq);
87}
88
89static inline struct ahash_request
90 *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
91{
92 return container_of((void *) ctx, struct ahash_request, __ctx);
93}
94
95static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
96 struct ahash_request *areq)
97{
98 rctx->flag = HASH_UPDATE;
99}
100
101static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
102static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
103 (struct sha512_mb_mgr *state,
104 struct job_sha512 *job);
105static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
106 (struct sha512_mb_mgr *state);
107static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
108 (struct sha512_mb_mgr *state);
109
110inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
111 uint64_t total_len)
112{
113 uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
114
115 memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
116 padblock[i] = 0x80;
117
118 i += ((SHA512_BLOCK_SIZE - 1) &
119 (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
120 + 1 + SHA512_PADLENGTHFIELD_SIZE;
121
122#if SHA512_PADLENGTHFIELD_SIZE == 16
123 *((uint64_t *) &padblock[i - 16]) = 0;
124#endif
125
126 *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
127
128 /* Number of extra blocks to hash */
129 return i >> SHA512_LOG2_BLOCK_SIZE;
130}
131
132static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
133 (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
134{
135 while (ctx) {
136 if (ctx->status & HASH_CTX_STS_COMPLETE) {
137 /* Clear PROCESSING bit */
138 ctx->status = HASH_CTX_STS_COMPLETE;
139 return ctx;
140 }
141
142 /*
143 * If the extra blocks are empty, begin hashing what remains
144 * in the user's buffer.
145 */
146 if (ctx->partial_block_buffer_length == 0 &&
147 ctx->incoming_buffer_length) {
148
149 const void *buffer = ctx->incoming_buffer;
150 uint32_t len = ctx->incoming_buffer_length;
151 uint32_t copy_len;
152
153 /*
154 * Only entire blocks can be hashed.
155 * Copy remainder to extra blocks buffer.
156 */
157 copy_len = len & (SHA512_BLOCK_SIZE-1);
158
159 if (copy_len) {
160 len -= copy_len;
161 memcpy(ctx->partial_block_buffer,
162 ((const char *) buffer + len),
163 copy_len);
164 ctx->partial_block_buffer_length = copy_len;
165 }
166
167 ctx->incoming_buffer_length = 0;
168
169 /* len should be a multiple of the block size now */
170 assert((len % SHA512_BLOCK_SIZE) == 0);
171
172 /* Set len to the number of blocks to be hashed */
173 len >>= SHA512_LOG2_BLOCK_SIZE;
174
175 if (len) {
176
177 ctx->job.buffer = (uint8_t *) buffer;
178 ctx->job.len = len;
179 ctx = (struct sha512_hash_ctx *)
180 sha512_job_mgr_submit(&mgr->mgr,
181 &ctx->job);
182 continue;
183 }
184 }
185
186 /*
187 * If the extra blocks are not empty, then we are
188 * either on the last block(s) or we need more
189 * user input before continuing.
190 */
191 if (ctx->status & HASH_CTX_STS_LAST) {
192
193 uint8_t *buf = ctx->partial_block_buffer;
194 uint32_t n_extra_blocks =
195 sha512_pad(buf, ctx->total_length);
196
197 ctx->status = (HASH_CTX_STS_PROCESSING |
198 HASH_CTX_STS_COMPLETE);
199 ctx->job.buffer = buf;
200 ctx->job.len = (uint32_t) n_extra_blocks;
201 ctx = (struct sha512_hash_ctx *)
202 sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
203 continue;
204 }
205
206 if (ctx)
207 ctx->status = HASH_CTX_STS_IDLE;
208 return ctx;
209 }
210
211 return NULL;
212}
213
214static struct sha512_hash_ctx
215 *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
216{
217 /*
218 * If get_comp_job returns NULL, there are no jobs complete.
219 * If get_comp_job returns a job, verify that it is safe to return to
220 * the user.
221 * If it is not ready, resubmit the job to finish processing.
222 * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
223 * returned.
224 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
225 * still need processing.
226 */
227 struct sha512_ctx_mgr *mgr;
228 struct sha512_hash_ctx *ctx;
229 unsigned long flags;
230
231 mgr = cstate->mgr;
232 spin_lock_irqsave(&cstate->work_lock, flags);
233 ctx = (struct sha512_hash_ctx *)
234 sha512_job_mgr_get_comp_job(&mgr->mgr);
235 ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
236 spin_unlock_irqrestore(&cstate->work_lock, flags);
237 return ctx;
238}
239
240static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
241{
242 sha512_job_mgr_init(&mgr->mgr);
243}
244
245static struct sha512_hash_ctx
246 *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
247 struct sha512_hash_ctx *ctx,
248 const void *buffer,
249 uint32_t len,
250 int flags)
251{
252 struct sha512_ctx_mgr *mgr;
253 unsigned long irqflags;
254
255 mgr = cstate->mgr;
256 spin_lock_irqsave(&cstate->work_lock, irqflags);
257 if (flags & ~(HASH_UPDATE | HASH_LAST)) {
258 /* User should not pass anything other than UPDATE or LAST */
259 ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
260 goto unlock;
261 }
262
263 if (ctx->status & HASH_CTX_STS_PROCESSING) {
264 /* Cannot submit to a currently processing job. */
265 ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
266 goto unlock;
267 }
268
269 if (ctx->status & HASH_CTX_STS_COMPLETE) {
270 /* Cannot update a finished job. */
271 ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
272 goto unlock;
273 }
274
275 /*
276 * If we made it here, there were no errors during this call to
277 * submit
278 */
279 ctx->error = HASH_CTX_ERROR_NONE;
280
281 /* Store buffer ptr info from user */
282 ctx->incoming_buffer = buffer;
283 ctx->incoming_buffer_length = len;
284
285 /*
286 * Store the user's request flags and mark this ctx as currently being
287 * processed.
288 */
289 ctx->status = (flags & HASH_LAST) ?
290 (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
291 HASH_CTX_STS_PROCESSING;
292
293 /* Advance byte counter */
294 ctx->total_length += len;
295
296 /*
297 * If there is anything currently buffered in the extra blocks,
298 * append to it until it contains a whole block.
299 * Or if the user's buffer contains less than a whole block,
300 * append as much as possible to the extra block.
301 */
302 if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
303 /* Compute how many bytes to copy from user buffer into extra
304 * block
305 */
306 uint32_t copy_len = SHA512_BLOCK_SIZE -
307 ctx->partial_block_buffer_length;
308 if (len < copy_len)
309 copy_len = len;
310
311 if (copy_len) {
312 /* Copy and update relevant pointers and counters */
313 memcpy
314 (&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
315 buffer, copy_len);
316
317 ctx->partial_block_buffer_length += copy_len;
318 ctx->incoming_buffer = (const void *)
319 ((const char *)buffer + copy_len);
320 ctx->incoming_buffer_length = len - copy_len;
321 }
322
323 /* The extra block should never contain more than 1 block
324 * here
325 */
326 assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
327
328 /* If the extra block buffer contains exactly 1 block, it can
329 * be hashed.
330 */
331 if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
332 ctx->partial_block_buffer_length = 0;
333
334 ctx->job.buffer = ctx->partial_block_buffer;
335 ctx->job.len = 1;
336 ctx = (struct sha512_hash_ctx *)
337 sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
338 }
339 }
340
341 ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
342unlock:
343 spin_unlock_irqrestore(&cstate->work_lock, irqflags);
344 return ctx;
345}
346
347static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
348{
349 struct sha512_ctx_mgr *mgr;
350 struct sha512_hash_ctx *ctx;
351 unsigned long flags;
352
353 mgr = cstate->mgr;
354 spin_lock_irqsave(&cstate->work_lock, flags);
355 while (1) {
356 ctx = (struct sha512_hash_ctx *)
357 sha512_job_mgr_flush(&mgr->mgr);
358
359 /* If flush returned 0, there are no more jobs in flight. */
360 if (!ctx)
361 break;
362
363 /*
364 * If flush returned a job, resubmit the job to finish
365 * processing.
366 */
367 ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
368
369 /*
370 * If sha512_ctx_mgr_resubmit returned a job, it is ready to
371 * be returned. Otherwise, all jobs currently being managed by
372 * the sha512_ctx_mgr still need processing. Loop.
373 */
374 if (ctx)
375 break;
376 }
377 spin_unlock_irqrestore(&cstate->work_lock, flags);
378 return ctx;
379}
380
381static int sha512_mb_init(struct ahash_request *areq)
382{
383 struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
384
385 hash_ctx_init(sctx);
386 sctx->job.result_digest[0] = SHA512_H0;
387 sctx->job.result_digest[1] = SHA512_H1;
388 sctx->job.result_digest[2] = SHA512_H2;
389 sctx->job.result_digest[3] = SHA512_H3;
390 sctx->job.result_digest[4] = SHA512_H4;
391 sctx->job.result_digest[5] = SHA512_H5;
392 sctx->job.result_digest[6] = SHA512_H6;
393 sctx->job.result_digest[7] = SHA512_H7;
394 sctx->total_length = 0;
395 sctx->partial_block_buffer_length = 0;
396 sctx->status = HASH_CTX_STS_IDLE;
397
398 return 0;
399}
400
401static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
402{
403 int i;
404 struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
405 __be64 *dst = (__be64 *) rctx->out;
406
407 for (i = 0; i < 8; ++i)
408 dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
409
410 return 0;
411}
412
413static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
414 struct mcryptd_alg_cstate *cstate, bool flush)
415{
416 int flag = HASH_UPDATE;
417 int nbytes, err = 0;
418 struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
419 struct sha512_hash_ctx *sha_ctx;
420
421 /* more work ? */
422 while (!(rctx->flag & HASH_DONE)) {
423 nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
424 if (nbytes < 0) {
425 err = nbytes;
426 goto out;
427 }
428 /* check if the walk is done */
429 if (crypto_ahash_walk_last(&rctx->walk)) {
430 rctx->flag |= HASH_DONE;
431 if (rctx->flag & HASH_FINAL)
432 flag |= HASH_LAST;
433
434 }
435 sha_ctx = (struct sha512_hash_ctx *)
436 ahash_request_ctx(&rctx->areq);
437 kernel_fpu_begin();
438 sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
439 rctx->walk.data, nbytes, flag);
440 if (!sha_ctx) {
441 if (flush)
442 sha_ctx = sha512_ctx_mgr_flush(cstate);
443 }
444 kernel_fpu_end();
445 if (sha_ctx)
446 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
447 else {
448 rctx = NULL;
449 goto out;
450 }
451 }
452
453 /* copy the results */
454 if (rctx->flag & HASH_FINAL)
455 sha512_mb_set_results(rctx);
456
457out:
458 *ret_rctx = rctx;
459 return err;
460}
461
462static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
463 struct mcryptd_alg_cstate *cstate,
464 int err)
465{
466 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
467 struct sha512_hash_ctx *sha_ctx;
468 struct mcryptd_hash_request_ctx *req_ctx;
469 int ret;
470 unsigned long flags;
471
472 /* remove from work list */
473 spin_lock_irqsave(&cstate->work_lock, flags);
474 list_del(&rctx->waiter);
475 spin_unlock_irqrestore(&cstate->work_lock, flags);
476
477 if (irqs_disabled())
478 rctx->complete(&req->base, err);
479 else {
480 local_bh_disable();
481 rctx->complete(&req->base, err);
482 local_bh_enable();
483 }
484
485 /* check to see if there are other jobs that are done */
486 sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
487 while (sha_ctx) {
488 req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
489 ret = sha_finish_walk(&req_ctx, cstate, false);
490 if (req_ctx) {
491 spin_lock_irqsave(&cstate->work_lock, flags);
492 list_del(&req_ctx->waiter);
493 spin_unlock_irqrestore(&cstate->work_lock, flags);
494
495 req = cast_mcryptd_ctx_to_req(req_ctx);
496 if (irqs_disabled())
497 req_ctx->complete(&req->base, ret);
498 else {
499 local_bh_disable();
500 req_ctx->complete(&req->base, ret);
501 local_bh_enable();
502 }
503 }
504 sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
505 }
506
507 return 0;
508}
509
510static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
511 struct mcryptd_alg_cstate *cstate)
512{
513 unsigned long next_flush;
514 unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
515 unsigned long flags;
516
517 /* initialize tag */
518 rctx->tag.arrival = jiffies; /* tag the arrival time */
519 rctx->tag.seq_num = cstate->next_seq_num++;
520 next_flush = rctx->tag.arrival + delay;
521 rctx->tag.expire = next_flush;
522
523 spin_lock_irqsave(&cstate->work_lock, flags);
524 list_add_tail(&rctx->waiter, &cstate->work_list);
525 spin_unlock_irqrestore(&cstate->work_lock, flags);
526
527 mcryptd_arm_flusher(cstate, delay);
528}
529
530static int sha512_mb_update(struct ahash_request *areq)
531{
532 struct mcryptd_hash_request_ctx *rctx =
533 container_of(areq, struct mcryptd_hash_request_ctx,
534 areq);
535 struct mcryptd_alg_cstate *cstate =
536 this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
537
538 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
539 struct sha512_hash_ctx *sha_ctx;
540 int ret = 0, nbytes;
541
542
543 /* sanity check */
544 if (rctx->tag.cpu != smp_processor_id()) {
545 pr_err("mcryptd error: cpu clash\n");
546 goto done;
547 }
548
549 /* need to init context */
550 req_ctx_init(rctx, areq);
551
552 nbytes = crypto_ahash_walk_first(req, &rctx->walk);
553
554 if (nbytes < 0) {
555 ret = nbytes;
556 goto done;
557 }
558
559 if (crypto_ahash_walk_last(&rctx->walk))
560 rctx->flag |= HASH_DONE;
561
562 /* submit */
563 sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
564 sha512_mb_add_list(rctx, cstate);
565 kernel_fpu_begin();
566 sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
567 nbytes, HASH_UPDATE);
568 kernel_fpu_end();
569
570 /* check if anything is returned */
571 if (!sha_ctx)
572 return -EINPROGRESS;
573
574 if (sha_ctx->error) {
575 ret = sha_ctx->error;
576 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
577 goto done;
578 }
579
580 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
581 ret = sha_finish_walk(&rctx, cstate, false);
582
583 if (!rctx)
584 return -EINPROGRESS;
585done:
586 sha_complete_job(rctx, cstate, ret);
587 return ret;
588}
589
590static int sha512_mb_finup(struct ahash_request *areq)
591{
592 struct mcryptd_hash_request_ctx *rctx =
593 container_of(areq, struct mcryptd_hash_request_ctx,
594 areq);
595 struct mcryptd_alg_cstate *cstate =
596 this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
597
598 struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
599 struct sha512_hash_ctx *sha_ctx;
600 int ret = 0, flag = HASH_UPDATE, nbytes;
601
602 /* sanity check */
603 if (rctx->tag.cpu != smp_processor_id()) {
604 pr_err("mcryptd error: cpu clash\n");
605 goto done;
606 }
607
608 /* need to init context */
609 req_ctx_init(rctx, areq);
610
611 nbytes = crypto_ahash_walk_first(req, &rctx->walk);
612
613 if (nbytes < 0) {
614 ret = nbytes;
615 goto done;
616 }
617
618 if (crypto_ahash_walk_last(&rctx->walk)) {
619 rctx->flag |= HASH_DONE;
620 flag = HASH_LAST;
621 }
622
623 /* submit */
624 rctx->flag |= HASH_FINAL;
625 sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
626 sha512_mb_add_list(rctx, cstate);
627
628 kernel_fpu_begin();
629 sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
630 nbytes, flag);
631 kernel_fpu_end();
632
633 /* check if anything is returned */
634 if (!sha_ctx)
635 return -EINPROGRESS;
636
637 if (sha_ctx->error) {
638 ret = sha_ctx->error;
639 goto done;
640 }
641
642 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
643 ret = sha_finish_walk(&rctx, cstate, false);
644 if (!rctx)
645 return -EINPROGRESS;
646done:
647 sha_complete_job(rctx, cstate, ret);
648 return ret;
649}
650
651static int sha512_mb_final(struct ahash_request *areq)
652{
653 struct mcryptd_hash_request_ctx *rctx =
654 container_of(areq, struct mcryptd_hash_request_ctx,
655 areq);
656 struct mcryptd_alg_cstate *cstate =
657 this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
658
659 struct sha512_hash_ctx *sha_ctx;
660 int ret = 0;
661 u8 data;
662
663 /* sanity check */
664 if (rctx->tag.cpu != smp_processor_id()) {
665 pr_err("mcryptd error: cpu clash\n");
666 goto done;
667 }
668
669 /* need to init context */
670 req_ctx_init(rctx, areq);
671
672 rctx->flag |= HASH_DONE | HASH_FINAL;
673
674 sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
675 /* flag HASH_FINAL and 0 data size */
676 sha512_mb_add_list(rctx, cstate);
677 kernel_fpu_begin();
678 sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
679 kernel_fpu_end();
680
681 /* check if anything is returned */
682 if (!sha_ctx)
683 return -EINPROGRESS;
684
685 if (sha_ctx->error) {
686 ret = sha_ctx->error;
687 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
688 goto done;
689 }
690
691 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
692 ret = sha_finish_walk(&rctx, cstate, false);
693 if (!rctx)
694 return -EINPROGRESS;
695done:
696 sha_complete_job(rctx, cstate, ret);
697 return ret;
698}
699
700static int sha512_mb_export(struct ahash_request *areq, void *out)
701{
702 struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
703
704 memcpy(out, sctx, sizeof(*sctx));
705
706 return 0;
707}
708
709static int sha512_mb_import(struct ahash_request *areq, const void *in)
710{
711 struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
712
713 memcpy(sctx, in, sizeof(*sctx));
714
715 return 0;
716}
717
718static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
719{
720 struct mcryptd_ahash *mcryptd_tfm;
721 struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
722 struct mcryptd_hash_ctx *mctx;
723
724 mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
725 CRYPTO_ALG_INTERNAL,
726 CRYPTO_ALG_INTERNAL);
727 if (IS_ERR(mcryptd_tfm))
728 return PTR_ERR(mcryptd_tfm);
729 mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
730 mctx->alg_state = &sha512_mb_alg_state;
731 ctx->mcryptd_tfm = mcryptd_tfm;
732 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
733 sizeof(struct ahash_request) +
734 crypto_ahash_reqsize(&mcryptd_tfm->base));
735
736 return 0;
737}
738
739static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
740{
741 struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
742
743 mcryptd_free_ahash(ctx->mcryptd_tfm);
744}
745
746static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
747{
748 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
749 sizeof(struct ahash_request) +
750 sizeof(struct sha512_hash_ctx));
751
752 return 0;
753}
754
755static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
756{
757 struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
758
759 mcryptd_free_ahash(ctx->mcryptd_tfm);
760}
761
762static struct ahash_alg sha512_mb_areq_alg = {
763 .init = sha512_mb_init,
764 .update = sha512_mb_update,
765 .final = sha512_mb_final,
766 .finup = sha512_mb_finup,
767 .export = sha512_mb_export,
768 .import = sha512_mb_import,
769 .halg = {
770 .digestsize = SHA512_DIGEST_SIZE,
771 .statesize = sizeof(struct sha512_hash_ctx),
772 .base = {
773 .cra_name = "__sha512-mb",
774 .cra_driver_name = "__intel_sha512-mb",
775 .cra_priority = 100,
776 /*
777 * use ASYNC flag as some buffers in multi-buffer
778 * algo may not have completed before hashing thread
779 * sleep
780 */
781 .cra_flags = CRYPTO_ALG_ASYNC |
782 CRYPTO_ALG_INTERNAL,
783 .cra_blocksize = SHA512_BLOCK_SIZE,
784 .cra_module = THIS_MODULE,
785 .cra_list = LIST_HEAD_INIT
786 (sha512_mb_areq_alg.halg.base.cra_list),
787 .cra_init = sha512_mb_areq_init_tfm,
788 .cra_exit = sha512_mb_areq_exit_tfm,
789 .cra_ctxsize = sizeof(struct sha512_hash_ctx),
790 }
791 }
792};
793
794static int sha512_mb_async_init(struct ahash_request *req)
795{
796 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
797 struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
798 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
799 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
800
801 memcpy(mcryptd_req, req, sizeof(*req));
802 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
803 return crypto_ahash_init(mcryptd_req);
804}
805
806static int sha512_mb_async_update(struct ahash_request *req)
807{
808 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
809
810 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
811 struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
812 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
813
814 memcpy(mcryptd_req, req, sizeof(*req));
815 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
816 return crypto_ahash_update(mcryptd_req);
817}
818
819static int sha512_mb_async_finup(struct ahash_request *req)
820{
821 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
822
823 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
824 struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
825 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
826
827 memcpy(mcryptd_req, req, sizeof(*req));
828 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
829 return crypto_ahash_finup(mcryptd_req);
830}
831
832static int sha512_mb_async_final(struct ahash_request *req)
833{
834 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
835
836 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
837 struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
838 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
839
840 memcpy(mcryptd_req, req, sizeof(*req));
841 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
842 return crypto_ahash_final(mcryptd_req);
843}
844
845static int sha512_mb_async_digest(struct ahash_request *req)
846{
847 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
848 struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
849 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
850 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
851
852 memcpy(mcryptd_req, req, sizeof(*req));
853 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
854 return crypto_ahash_digest(mcryptd_req);
855}
856
857static int sha512_mb_async_export(struct ahash_request *req, void *out)
858{
859 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
860 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
861 struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
862 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
863
864 memcpy(mcryptd_req, req, sizeof(*req));
865 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
866 return crypto_ahash_export(mcryptd_req, out);
867}
868
869static int sha512_mb_async_import(struct ahash_request *req, const void *in)
870{
871 struct ahash_request *mcryptd_req = ahash_request_ctx(req);
872 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
873 struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
874 struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
875 struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
876 struct mcryptd_hash_request_ctx *rctx;
877 struct ahash_request *areq;
878
879 memcpy(mcryptd_req, req, sizeof(*req));
880 ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
881 rctx = ahash_request_ctx(mcryptd_req);
882
883 areq = &rctx->areq;
884
885 ahash_request_set_tfm(areq, child);
886 ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
887 rctx->complete, req);
888
889 return crypto_ahash_import(mcryptd_req, in);
890}
891
892static struct ahash_alg sha512_mb_async_alg = {
893 .init = sha512_mb_async_init,
894 .update = sha512_mb_async_update,
895 .final = sha512_mb_async_final,
896 .finup = sha512_mb_async_finup,
897 .digest = sha512_mb_async_digest,
898 .export = sha512_mb_async_export,
899 .import = sha512_mb_async_import,
900 .halg = {
901 .digestsize = SHA512_DIGEST_SIZE,
902 .statesize = sizeof(struct sha512_hash_ctx),
903 .base = {
904 .cra_name = "sha512",
905 .cra_driver_name = "sha512_mb",
906 /*
907 * Low priority, since with few concurrent hash requests
908 * this is extremely slow due to the flush delay. Users
909 * whose workloads would benefit from this can request
910 * it explicitly by driver name, or can increase its
911 * priority at runtime using NETLINK_CRYPTO.
912 */
913 .cra_priority = 50,
914 .cra_flags = CRYPTO_ALG_ASYNC,
915 .cra_blocksize = SHA512_BLOCK_SIZE,
916 .cra_module = THIS_MODULE,
917 .cra_list = LIST_HEAD_INIT
918 (sha512_mb_async_alg.halg.base.cra_list),
919 .cra_init = sha512_mb_async_init_tfm,
920 .cra_exit = sha512_mb_async_exit_tfm,
921 .cra_ctxsize = sizeof(struct sha512_mb_ctx),
922 .cra_alignmask = 0,
923 },
924 },
925};
926
927static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
928{
929 struct mcryptd_hash_request_ctx *rctx;
930 unsigned long cur_time;
931 unsigned long next_flush = 0;
932 struct sha512_hash_ctx *sha_ctx;
933
934
935 cur_time = jiffies;
936
937 while (!list_empty(&cstate->work_list)) {
938 rctx = list_entry(cstate->work_list.next,
939 struct mcryptd_hash_request_ctx, waiter);
940 if time_before(cur_time, rctx->tag.expire)
941 break;
942 kernel_fpu_begin();
943 sha_ctx = (struct sha512_hash_ctx *)
944 sha512_ctx_mgr_flush(cstate);
945 kernel_fpu_end();
946 if (!sha_ctx) {
947 pr_err("sha512_mb error: nothing got flushed for"
948 " non-empty list\n");
949 break;
950 }
951 rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
952 sha_finish_walk(&rctx, cstate, true);
953 sha_complete_job(rctx, cstate, 0);
954 }
955
956 if (!list_empty(&cstate->work_list)) {
957 rctx = list_entry(cstate->work_list.next,
958 struct mcryptd_hash_request_ctx, waiter);
959 /* get the hash context and then flush time */
960 next_flush = rctx->tag.expire;
961 mcryptd_arm_flusher(cstate, get_delay(next_flush));
962 }
963 return next_flush;
964}
965
966static int __init sha512_mb_mod_init(void)
967{
968
969 int cpu;
970 int err;
971 struct mcryptd_alg_cstate *cpu_state;
972
973 /* check for dependent cpu features */
974 if (!boot_cpu_has(X86_FEATURE_AVX2) ||
975 !boot_cpu_has(X86_FEATURE_BMI2))
976 return -ENODEV;
977
978 /* initialize multibuffer structures */
979 sha512_mb_alg_state.alg_cstate =
980 alloc_percpu(struct mcryptd_alg_cstate);
981
982 sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
983 sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
984 sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
985 sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
986
987 if (!sha512_mb_alg_state.alg_cstate)
988 return -ENOMEM;
989 for_each_possible_cpu(cpu) {
990 cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
991 cpu_state->next_flush = 0;
992 cpu_state->next_seq_num = 0;
993 cpu_state->flusher_engaged = false;
994 INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
995 cpu_state->cpu = cpu;
996 cpu_state->alg_state = &sha512_mb_alg_state;
997 cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
998 GFP_KERNEL);
999 if (!cpu_state->mgr)
1000 goto err2;
1001 sha512_ctx_mgr_init(cpu_state->mgr);
1002 INIT_LIST_HEAD(&cpu_state->work_list);
1003 spin_lock_init(&cpu_state->work_lock);
1004 }
1005 sha512_mb_alg_state.flusher = &sha512_mb_flusher;
1006
1007 err = crypto_register_ahash(&sha512_mb_areq_alg);
1008 if (err)
1009 goto err2;
1010 err = crypto_register_ahash(&sha512_mb_async_alg);
1011 if (err)
1012 goto err1;
1013
1014
1015 return 0;
1016err1:
1017 crypto_unregister_ahash(&sha512_mb_areq_alg);
1018err2:
1019 for_each_possible_cpu(cpu) {
1020 cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
1021 kfree(cpu_state->mgr);
1022 }
1023 free_percpu(sha512_mb_alg_state.alg_cstate);
1024 return -ENODEV;
1025}
1026
1027static void __exit sha512_mb_mod_fini(void)
1028{
1029 int cpu;
1030 struct mcryptd_alg_cstate *cpu_state;
1031
1032 crypto_unregister_ahash(&sha512_mb_async_alg);
1033 crypto_unregister_ahash(&sha512_mb_areq_alg);
1034 for_each_possible_cpu(cpu) {
1035 cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
1036 kfree(cpu_state->mgr);
1037 }
1038 free_percpu(sha512_mb_alg_state.alg_cstate);
1039}
1040
1041module_init(sha512_mb_mod_init);
1042module_exit(sha512_mb_mod_fini);
1043
1044MODULE_LICENSE("GPL");
1045MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
1046
1047MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
deleted file mode 100644
index e5c465bd821e..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ /dev/null
@@ -1,128 +0,0 @@
1/*
2 * Header file for multi buffer SHA512 context
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#ifndef _SHA_MB_CTX_INTERNAL_H
55#define _SHA_MB_CTX_INTERNAL_H
56
57#include "sha512_mb_mgr.h"
58
59#define HASH_UPDATE 0x00
60#define HASH_LAST 0x01
61#define HASH_DONE 0x02
62#define HASH_FINAL 0x04
63
64#define HASH_CTX_STS_IDLE 0x00
65#define HASH_CTX_STS_PROCESSING 0x01
66#define HASH_CTX_STS_LAST 0x02
67#define HASH_CTX_STS_COMPLETE 0x04
68
69enum hash_ctx_error {
70 HASH_CTX_ERROR_NONE = 0,
71 HASH_CTX_ERROR_INVALID_FLAGS = -1,
72 HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
73 HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
74};
75
76#define hash_ctx_user_data(ctx) ((ctx)->user_data)
77#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
78#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
79#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
80#define hash_ctx_status(ctx) ((ctx)->status)
81#define hash_ctx_error(ctx) ((ctx)->error)
82#define hash_ctx_init(ctx) \
83 do { \
84 (ctx)->error = HASH_CTX_ERROR_NONE; \
85 (ctx)->status = HASH_CTX_STS_COMPLETE; \
86 } while (0)
87
88/* Hash Constants and Typedefs */
89#define SHA512_DIGEST_LENGTH 8
90#define SHA512_LOG2_BLOCK_SIZE 7
91
92#define SHA512_PADLENGTHFIELD_SIZE 16
93
94#ifdef SHA_MB_DEBUG
95#define assert(expr) \
96do { \
97 if (unlikely(!(expr))) { \
98 printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
99 #expr, __FILE__, __func__, __LINE__); \
100 } \
101} while (0)
102#else
103#define assert(expr) do {} while (0)
104#endif
105
106struct sha512_ctx_mgr {
107 struct sha512_mb_mgr mgr;
108};
109
110/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
111
112struct sha512_hash_ctx {
113 /* Must be at struct offset 0 */
114 struct job_sha512 job;
115 /* status flag */
116 int status;
117 /* error flag */
118 int error;
119
120 uint64_t total_length;
121 const void *incoming_buffer;
122 uint32_t incoming_buffer_length;
123 uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
124 uint32_t partial_block_buffer_length;
125 void *user_data;
126};
127
128#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
deleted file mode 100644
index 178f17eef382..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ /dev/null
@@ -1,104 +0,0 @@
1/*
2 * Header file for multi buffer SHA512 algorithm manager
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#ifndef __SHA_MB_MGR_H
55#define __SHA_MB_MGR_H
56
57#include <linux/types.h>
58
59#define NUM_SHA512_DIGEST_WORDS 8
60
61enum job_sts {STS_UNKNOWN = 0,
62 STS_BEING_PROCESSED = 1,
63 STS_COMPLETED = 2,
64 STS_INTERNAL_ERROR = 3,
65 STS_ERROR = 4
66};
67
68struct job_sha512 {
69 u8 *buffer;
70 u64 len;
71 u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
72 enum job_sts status;
73 void *user_data;
74};
75
76struct sha512_args_x4 {
77 uint64_t digest[8][4];
78 uint8_t *data_ptr[4];
79};
80
81struct sha512_lane_data {
82 struct job_sha512 *job_in_lane;
83};
84
85struct sha512_mb_mgr {
86 struct sha512_args_x4 args;
87
88 uint64_t lens[4];
89
90 /* each byte is index (0...7) of unused lanes */
91 uint64_t unused_lanes;
92 /* byte 4 is set to FF as a flag */
93 struct sha512_lane_data ldata[4];
94};
95
96#define SHA512_MB_MGR_NUM_LANES_AVX2 4
97
98void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
99struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
100 struct job_sha512 *job);
101struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
102struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
103
104#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
deleted file mode 100644
index cf2636d4c9ba..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,281 +0,0 @@
1/*
2 * Header file for multi buffer SHA256 algorithm data structure
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54# Macros for defining data structures
55
56# Usage example
57
58#START_FIELDS # JOB_AES
59### name size align
60#FIELD _plaintext, 8, 8 # pointer to plaintext
61#FIELD _ciphertext, 8, 8 # pointer to ciphertext
62#FIELD _IV, 16, 8 # IV
63#FIELD _keys, 8, 8 # pointer to keys
64#FIELD _len, 4, 4 # length in bytes
65#FIELD _status, 4, 4 # status enumeration
66#FIELD _user_data, 8, 8 # pointer to user data
67#UNION _union, size1, align1, \
68# size2, align2, \
69# size3, align3, \
70# ...
71#END_FIELDS
72#%assign _JOB_AES_size _FIELD_OFFSET
73#%assign _JOB_AES_align _STRUCT_ALIGN
74
75#########################################################################
76
77# Alternate "struc-like" syntax:
78# STRUCT job_aes2
79# RES_Q .plaintext, 1
80# RES_Q .ciphertext, 1
81# RES_DQ .IV, 1
82# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
83# RES_U .union, size1, align1, \
84# size2, align2, \
85# ...
86# ENDSTRUCT
87# # Following only needed if nesting
88# %assign job_aes2_size _FIELD_OFFSET
89# %assign job_aes2_align _STRUCT_ALIGN
90#
91# RES_* macros take a name, a count and an optional alignment.
92# The count in in terms of the base size of the macro, and the
93# default alignment is the base size.
94# The macros are:
95# Macro Base size
96# RES_B 1
97# RES_W 2
98# RES_D 4
99# RES_Q 8
100# RES_DQ 16
101# RES_Y 32
102# RES_Z 64
103#
104# RES_U defines a union. It's arguments are a name and two or more
105# pairs of "size, alignment"
106#
107# The two assigns are only needed if this structure is being nested
108# within another. Even if the assigns are not done, one can still use
109# STRUCT_NAME_size as the size of the structure.
110#
111# Note that for nesting, you still need to assign to STRUCT_NAME_size.
112#
113# The differences between this and using "struc" directly are that each
114# type is implicitly aligned to its natural length (although this can be
115# over-ridden with an explicit third parameter), and that the structure
116# is padded at the end to its overall alignment.
117#
118
119#########################################################################
120
121#ifndef _DATASTRUCT_ASM_
122#define _DATASTRUCT_ASM_
123
124#define PTR_SZ 8
125#define SHA512_DIGEST_WORD_SIZE 8
126#define SHA512_MB_MGR_NUM_LANES_AVX2 4
127#define NUM_SHA512_DIGEST_WORDS 8
128#define SZ4 4*SHA512_DIGEST_WORD_SIZE
129#define ROUNDS 80*SZ4
130#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
131
132# START_FIELDS
133.macro START_FIELDS
134 _FIELD_OFFSET = 0
135 _STRUCT_ALIGN = 0
136.endm
137
138# FIELD name size align
139.macro FIELD name size align
140 _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
141 \name = _FIELD_OFFSET
142 _FIELD_OFFSET = _FIELD_OFFSET + (\size)
143.if (\align > _STRUCT_ALIGN)
144 _STRUCT_ALIGN = \align
145.endif
146.endm
147
148# END_FIELDS
149.macro END_FIELDS
150 _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
151.endm
152
153.macro STRUCT p1
154START_FIELDS
155.struc \p1
156.endm
157
158.macro ENDSTRUCT
159 tmp = _FIELD_OFFSET
160 END_FIELDS
161 tmp = (_FIELD_OFFSET - ##tmp)
162.if (tmp > 0)
163 .lcomm tmp
164.endm
165
166## RES_int name size align
167.macro RES_int p1 p2 p3
168 name = \p1
169 size = \p2
170 align = .\p3
171
172 _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
173.align align
174.lcomm name size
175 _FIELD_OFFSET = _FIELD_OFFSET + (size)
176.if (align > _STRUCT_ALIGN)
177 _STRUCT_ALIGN = align
178.endif
179.endm
180
181# macro RES_B name, size [, align]
182.macro RES_B _name, _size, _align=1
183RES_int _name _size _align
184.endm
185
186# macro RES_W name, size [, align]
187.macro RES_W _name, _size, _align=2
188RES_int _name 2*(_size) _align
189.endm
190
191# macro RES_D name, size [, align]
192.macro RES_D _name, _size, _align=4
193RES_int _name 4*(_size) _align
194.endm
195
196# macro RES_Q name, size [, align]
197.macro RES_Q _name, _size, _align=8
198RES_int _name 8*(_size) _align
199.endm
200
201# macro RES_DQ name, size [, align]
202.macro RES_DQ _name, _size, _align=16
203RES_int _name 16*(_size) _align
204.endm
205
206# macro RES_Y name, size [, align]
207.macro RES_Y _name, _size, _align=32
208RES_int _name 32*(_size) _align
209.endm
210
211# macro RES_Z name, size [, align]
212.macro RES_Z _name, _size, _align=64
213RES_int _name 64*(_size) _align
214.endm
215
216#endif
217
218###################################################################
219### Define SHA512 Out Of Order Data Structures
220###################################################################
221
222START_FIELDS # LANE_DATA
223### name size align
224FIELD _job_in_lane, 8, 8 # pointer to job object
225END_FIELDS
226
227 _LANE_DATA_size = _FIELD_OFFSET
228 _LANE_DATA_align = _STRUCT_ALIGN
229
230####################################################################
231
232START_FIELDS # SHA512_ARGS_X4
233### name size align
234FIELD _digest, 8*8*4, 4 # transposed digest
235FIELD _data_ptr, 8*4, 8 # array of pointers to data
236END_FIELDS
237
238 _SHA512_ARGS_X4_size = _FIELD_OFFSET
239 _SHA512_ARGS_X4_align = _STRUCT_ALIGN
240
241#####################################################################
242
243START_FIELDS # MB_MGR
244### name size align
245FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
246FIELD _lens, 8*4, 8
247FIELD _unused_lanes, 8, 8
248FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align
249END_FIELDS
250
251 _MB_MGR_size = _FIELD_OFFSET
252 _MB_MGR_align = _STRUCT_ALIGN
253
254_args_digest = _args + _digest
255_args_data_ptr = _args + _data_ptr
256
257#######################################################################
258
259#######################################################################
260#### Define constants
261#######################################################################
262
263#define STS_UNKNOWN 0
264#define STS_BEING_PROCESSED 1
265#define STS_COMPLETED 2
266
267#######################################################################
268#### Define JOB_SHA512 structure
269#######################################################################
270
271START_FIELDS # JOB_SHA512
272### name size align
273FIELD _buffer, 8, 8 # pointer to buffer
274FIELD _len, 8, 8 # length in bytes
275FIELD _result_digest, 8*8, 32 # Digest (output)
276FIELD _status, 4, 4
277FIELD _user_data, 8, 8
278END_FIELDS
279
280 _JOB_SHA512_size = _FIELD_OFFSET
281 _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7c629caebc05..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,297 +0,0 @@
1/*
2 * Flush routine for SHA512 multibuffer
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include <linux/linkage.h>
55#include <asm/frame.h>
56#include "sha512_mb_mgr_datastruct.S"
57
58.extern sha512_x4_avx2
59
60# LINUX register definitions
61#define arg1 %rdi
62#define arg2 %rsi
63
64# idx needs to be other than arg1, arg2, rbx, r12
65#define idx %rdx
66
67# Common definitions
68#define state arg1
69#define job arg2
70#define len2 arg2
71
72#define unused_lanes %rbx
73#define lane_data %rbx
74#define tmp2 %rbx
75
76#define job_rax %rax
77#define tmp1 %rax
78#define size_offset %rax
79#define tmp %rax
80#define start_offset %rax
81
82#define tmp3 arg1
83
84#define extra_blocks arg2
85#define p arg2
86
87#define tmp4 %r8
88#define lens0 %r8
89
90#define lens1 %r9
91#define lens2 %r10
92#define lens3 %r11
93
94.macro LABEL prefix n
95\prefix\n\():
96.endm
97
98.macro JNE_SKIP i
99jne skip_\i
100.endm
101
102.altmacro
103.macro SET_OFFSET _offset
104offset = \_offset
105.endm
106.noaltmacro
107
108# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
109# arg 1 : rcx : state
110ENTRY(sha512_mb_mgr_flush_avx2)
111 FRAME_BEGIN
112 push %rbx
113
114 # If bit (32+3) is set, then all lanes are empty
115 mov _unused_lanes(state), unused_lanes
116 bt $32+7, unused_lanes
117 jc return_null
118
119 # find a lane with a non-null job
120 xor idx, idx
121 offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
122 cmpq $0, offset(state)
123 cmovne one(%rip), idx
124 offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
125 cmpq $0, offset(state)
126 cmovne two(%rip), idx
127 offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
128 cmpq $0, offset(state)
129 cmovne three(%rip), idx
130
131 # copy idx to empty lanes
132copy_lane_data:
133 offset = (_args + _data_ptr)
134 mov offset(state,idx,8), tmp
135
136 I = 0
137.rep 4
138 offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
139 cmpq $0, offset(state)
140.altmacro
141 JNE_SKIP %I
142 offset = (_args + _data_ptr + 8*I)
143 mov tmp, offset(state)
144 offset = (_lens + 8*I +4)
145 movl $0xFFFFFFFF, offset(state)
146LABEL skip_ %I
147 I = (I+1)
148.noaltmacro
149.endr
150
151 # Find min length
152 mov _lens + 0*8(state),lens0
153 mov lens0,idx
154 mov _lens + 1*8(state),lens1
155 cmp idx,lens1
156 cmovb lens1,idx
157 mov _lens + 2*8(state),lens2
158 cmp idx,lens2
159 cmovb lens2,idx
160 mov _lens + 3*8(state),lens3
161 cmp idx,lens3
162 cmovb lens3,idx
163 mov idx,len2
164 and $0xF,idx
165 and $~0xFF,len2
166 jz len_is_0
167
168 sub len2, lens0
169 sub len2, lens1
170 sub len2, lens2
171 sub len2, lens3
172 shr $32,len2
173 mov lens0, _lens + 0*8(state)
174 mov lens1, _lens + 1*8(state)
175 mov lens2, _lens + 2*8(state)
176 mov lens3, _lens + 3*8(state)
177
178 # "state" and "args" are the same address, arg1
179 # len is arg2
180 call sha512_x4_avx2
181 # state and idx are intact
182
183len_is_0:
184 # process completed job "idx"
185 imul $_LANE_DATA_size, idx, lane_data
186 lea _ldata(state, lane_data), lane_data
187
188 mov _job_in_lane(lane_data), job_rax
189 movq $0, _job_in_lane(lane_data)
190 movl $STS_COMPLETED, _status(job_rax)
191 mov _unused_lanes(state), unused_lanes
192 shl $8, unused_lanes
193 or idx, unused_lanes
194 mov unused_lanes, _unused_lanes(state)
195
196 movl $0xFFFFFFFF, _lens+4(state, idx, 8)
197
198 vmovq _args_digest+0*32(state, idx, 8), %xmm0
199 vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
200 vmovq _args_digest+2*32(state, idx, 8), %xmm1
201 vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
202 vmovq _args_digest+4*32(state, idx, 8), %xmm2
203 vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
204 vmovq _args_digest+6*32(state, idx, 8), %xmm3
205 vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
206
207 vmovdqu %xmm0, _result_digest(job_rax)
208 vmovdqu %xmm1, _result_digest+1*16(job_rax)
209 vmovdqu %xmm2, _result_digest+2*16(job_rax)
210 vmovdqu %xmm3, _result_digest+3*16(job_rax)
211
212return:
213 pop %rbx
214 FRAME_END
215 ret
216
217return_null:
218 xor job_rax, job_rax
219 jmp return
220ENDPROC(sha512_mb_mgr_flush_avx2)
221.align 16
222
223ENTRY(sha512_mb_mgr_get_comp_job_avx2)
224 push %rbx
225
226 mov _unused_lanes(state), unused_lanes
227 bt $(32+7), unused_lanes
228 jc .return_null
229
230 # Find min length
231 mov _lens(state),lens0
232 mov lens0,idx
233 mov _lens+1*8(state),lens1
234 cmp idx,lens1
235 cmovb lens1,idx
236 mov _lens+2*8(state),lens2
237 cmp idx,lens2
238 cmovb lens2,idx
239 mov _lens+3*8(state),lens3
240 cmp idx,lens3
241 cmovb lens3,idx
242 test $~0xF,idx
243 jnz .return_null
244 and $0xF,idx
245
246 #process completed job "idx"
247 imul $_LANE_DATA_size, idx, lane_data
248 lea _ldata(state, lane_data), lane_data
249
250 mov _job_in_lane(lane_data), job_rax
251 movq $0, _job_in_lane(lane_data)
252 movl $STS_COMPLETED, _status(job_rax)
253 mov _unused_lanes(state), unused_lanes
254 shl $8, unused_lanes
255 or idx, unused_lanes
256 mov unused_lanes, _unused_lanes(state)
257
258 movl $0xFFFFFFFF, _lens+4(state, idx, 8)
259
260 vmovq _args_digest(state, idx, 8), %xmm0
261 vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
262 vmovq _args_digest+2*32(state, idx, 8), %xmm1
263 vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
264 vmovq _args_digest+4*32(state, idx, 8), %xmm2
265 vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
266 vmovq _args_digest+6*32(state, idx, 8), %xmm3
267 vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
268
269 vmovdqu %xmm0, _result_digest+0*16(job_rax)
270 vmovdqu %xmm1, _result_digest+1*16(job_rax)
271 vmovdqu %xmm2, _result_digest+2*16(job_rax)
272 vmovdqu %xmm3, _result_digest+3*16(job_rax)
273
274 pop %rbx
275
276 ret
277
278.return_null:
279 xor job_rax, job_rax
280 pop %rbx
281 ret
282ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
283
284.section .rodata.cst8.one, "aM", @progbits, 8
285.align 8
286one:
287.quad 1
288
289.section .rodata.cst8.two, "aM", @progbits, 8
290.align 8
291two:
292.quad 2
293
294.section .rodata.cst8.three, "aM", @progbits, 8
295.align 8
296three:
297.quad 3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
deleted file mode 100644
index d08805032f01..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * Initialization code for multi buffer SHA256 algorithm for AVX2
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include "sha512_mb_mgr.h"
55
56void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
57{
58 unsigned int j;
59
60 /* initially all lanes are unused */
61 state->lens[0] = 0xFFFFFFFF00000000;
62 state->lens[1] = 0xFFFFFFFF00000001;
63 state->lens[2] = 0xFFFFFFFF00000002;
64 state->lens[3] = 0xFFFFFFFF00000003;
65
66 state->unused_lanes = 0xFF03020100;
67 for (j = 0; j < 4; j++)
68 state->ldata[j].job_in_lane = NULL;
69}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
deleted file mode 100644
index 4ba709ba78e5..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,224 +0,0 @@
1/*
2 * Buffer submit code for multi buffer SHA512 algorithm
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54#include <linux/linkage.h>
55#include <asm/frame.h>
56#include "sha512_mb_mgr_datastruct.S"
57
58.extern sha512_x4_avx2
59
60#define arg1 %rdi
61#define arg2 %rsi
62
63#define idx %rdx
64#define last_len %rdx
65
66#define size_offset %rcx
67#define tmp2 %rcx
68
69# Common definitions
70#define state arg1
71#define job arg2
72#define len2 arg2
73#define p2 arg2
74
75#define p %r11
76#define start_offset %r11
77
78#define unused_lanes %rbx
79
80#define job_rax %rax
81#define len %rax
82
83#define lane %r12
84#define tmp3 %r12
85#define lens3 %r12
86
87#define extra_blocks %r8
88#define lens0 %r8
89
90#define tmp %r9
91#define lens1 %r9
92
93#define lane_data %r10
94#define lens2 %r10
95
96#define DWORD_len %eax
97
98# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
99# arg 1 : rcx : state
100# arg 2 : rdx : job
101ENTRY(sha512_mb_mgr_submit_avx2)
102 FRAME_BEGIN
103 push %rbx
104 push %r12
105
106 mov _unused_lanes(state), unused_lanes
107 movzb %bl,lane
108 shr $8, unused_lanes
109 imul $_LANE_DATA_size, lane,lane_data
110 movl $STS_BEING_PROCESSED, _status(job)
111 lea _ldata(state, lane_data), lane_data
112 mov unused_lanes, _unused_lanes(state)
113 movl _len(job), DWORD_len
114
115 mov job, _job_in_lane(lane_data)
116 movl DWORD_len,_lens+4(state , lane, 8)
117
118 # Load digest words from result_digest
119 vmovdqu _result_digest+0*16(job), %xmm0
120 vmovdqu _result_digest+1*16(job), %xmm1
121 vmovdqu _result_digest+2*16(job), %xmm2
122 vmovdqu _result_digest+3*16(job), %xmm3
123
124 vmovq %xmm0, _args_digest(state, lane, 8)
125 vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8)
126 vmovq %xmm1, _args_digest+2*32(state , lane, 8)
127 vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8)
128 vmovq %xmm2, _args_digest+4*32(state , lane, 8)
129 vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8)
130 vmovq %xmm3, _args_digest+6*32(state , lane, 8)
131 vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8)
132
133 mov _buffer(job), p
134 mov p, _args_data_ptr(state, lane, 8)
135
136 cmp $0xFF, unused_lanes
137 jne return_null
138
139start_loop:
140
141 # Find min length
142 mov _lens+0*8(state),lens0
143 mov lens0,idx
144 mov _lens+1*8(state),lens1
145 cmp idx,lens1
146 cmovb lens1, idx
147 mov _lens+2*8(state),lens2
148 cmp idx,lens2
149 cmovb lens2,idx
150 mov _lens+3*8(state),lens3
151 cmp idx,lens3
152 cmovb lens3,idx
153 mov idx,len2
154 and $0xF,idx
155 and $~0xFF,len2
156 jz len_is_0
157
158 sub len2,lens0
159 sub len2,lens1
160 sub len2,lens2
161 sub len2,lens3
162 shr $32,len2
163 mov lens0, _lens + 0*8(state)
164 mov lens1, _lens + 1*8(state)
165 mov lens2, _lens + 2*8(state)
166 mov lens3, _lens + 3*8(state)
167
168 # "state" and "args" are the same address, arg1
169 # len is arg2
170 call sha512_x4_avx2
171 # state and idx are intact
172
173len_is_0:
174
175 # process completed job "idx"
176 imul $_LANE_DATA_size, idx, lane_data
177 lea _ldata(state, lane_data), lane_data
178
179 mov _job_in_lane(lane_data), job_rax
180 mov _unused_lanes(state), unused_lanes
181 movq $0, _job_in_lane(lane_data)
182 movl $STS_COMPLETED, _status(job_rax)
183 shl $8, unused_lanes
184 or idx, unused_lanes
185 mov unused_lanes, _unused_lanes(state)
186
187 movl $0xFFFFFFFF,_lens+4(state,idx,8)
188 vmovq _args_digest+0*32(state , idx, 8), %xmm0
189 vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
190 vmovq _args_digest+2*32(state , idx, 8), %xmm1
191 vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
192 vmovq _args_digest+4*32(state , idx, 8), %xmm2
193 vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
194 vmovq _args_digest+6*32(state , idx, 8), %xmm3
195 vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
196
197 vmovdqu %xmm0, _result_digest + 0*16(job_rax)
198 vmovdqu %xmm1, _result_digest + 1*16(job_rax)
199 vmovdqu %xmm2, _result_digest + 2*16(job_rax)
200 vmovdqu %xmm3, _result_digest + 3*16(job_rax)
201
202return:
203 pop %r12
204 pop %rbx
205 FRAME_END
206 ret
207
208return_null:
209 xor job_rax, job_rax
210 jmp return
211ENDPROC(sha512_mb_mgr_submit_avx2)
212
213/* UNUSED?
214.section .rodata.cst16, "aM", @progbits, 16
215.align 16
216H0: .int 0x6a09e667
217H1: .int 0xbb67ae85
218H2: .int 0x3c6ef372
219H3: .int 0xa54ff53a
220H4: .int 0x510e527f
221H5: .int 0x9b05688c
222H6: .int 0x1f83d9ab
223H7: .int 0x5be0cd19
224*/
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
deleted file mode 100644
index e22e907643a6..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ /dev/null
@@ -1,531 +0,0 @@
1/*
2 * Multi-buffer SHA512 algorithm hash compute routine
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2016 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * Contact Information:
21 * Megha Dey <megha.dey@linux.intel.com>
22 *
23 * BSD LICENSE
24 *
25 * Copyright(c) 2016 Intel Corporation.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in
35 * the documentation and/or other materials provided with the
36 * distribution.
37 * * Neither the name of Intel Corporation nor the names of its
38 * contributors may be used to endorse or promote products derived
39 * from this software without specific prior written permission.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
44 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
45 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54# code to compute quad SHA512 using AVX2
55# use YMMs to tackle the larger digest size
56# outer calling routine takes care of save and restore of XMM registers
57# Logic designed/laid out by JDG
58
59# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
60# Stack must be aligned to 32 bytes before call
61# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
62# Linux preserves: rcx rdx rdi rbp r13 r14 r15
63# clobbers ymm0-15
64
65#include <linux/linkage.h>
66#include "sha512_mb_mgr_datastruct.S"
67
68arg1 = %rdi
69arg2 = %rsi
70
71# Common definitions
72STATE = arg1
73INP_SIZE = arg2
74
75IDX = %rax
76ROUND = %rbx
77TBL = %r8
78
79inp0 = %r9
80inp1 = %r10
81inp2 = %r11
82inp3 = %r12
83
84a = %ymm0
85b = %ymm1
86c = %ymm2
87d = %ymm3
88e = %ymm4
89f = %ymm5
90g = %ymm6
91h = %ymm7
92
93a0 = %ymm8
94a1 = %ymm9
95a2 = %ymm10
96
97TT0 = %ymm14
98TT1 = %ymm13
99TT2 = %ymm12
100TT3 = %ymm11
101TT4 = %ymm10
102TT5 = %ymm9
103
104T1 = %ymm14
105TMP = %ymm15
106
107# Define stack usage
108STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
109
110#define VMOVPD vmovupd
111_digest = SZ4*16
112
113# transpose r0, r1, r2, r3, t0, t1
114# "transpose" data in {r0..r3} using temps {t0..t3}
115# Input looks like: {r0 r1 r2 r3}
116# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
117# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
118# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
119# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
120#
121# output looks like: {t0 r1 r0 r3}
122# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
123# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
124# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
125# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
126
127.macro TRANSPOSE r0 r1 r2 r3 t0 t1
128 vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
129 vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
130 vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
131 vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
132
133 vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6
134 vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2
135 vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5
136 vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1
137.endm
138
139.macro ROTATE_ARGS
140TMP_ = h
141h = g
142g = f
143f = e
144e = d
145d = c
146c = b
147b = a
148a = TMP_
149.endm
150
151# PRORQ reg, imm, tmp
152# packed-rotate-right-double
153# does a rotate by doing two shifts and an or
154.macro _PRORQ reg imm tmp
155 vpsllq $(64-\imm),\reg,\tmp
156 vpsrlq $\imm,\reg, \reg
157 vpor \tmp,\reg, \reg
158.endm
159
160# non-destructive
161# PRORQ_nd reg, imm, tmp, src
162.macro _PRORQ_nd reg imm tmp src
163 vpsllq $(64-\imm), \src, \tmp
164 vpsrlq $\imm, \src, \reg
165 vpor \tmp, \reg, \reg
166.endm
167
168# PRORQ dst/src, amt
169.macro PRORQ reg imm
170 _PRORQ \reg, \imm, TMP
171.endm
172
173# PRORQ_nd dst, src, amt
174.macro PRORQ_nd reg tmp imm
175 _PRORQ_nd \reg, \imm, TMP, \tmp
176.endm
177
178#; arguments passed implicitly in preprocessor symbols i, a...h
179.macro ROUND_00_15 _T1 i
180 PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4)
181
182 vpxor g, f, a2 # ch: a2 = f^g
183 vpand e,a2, a2 # ch: a2 = (f^g)&e
184 vpxor g, a2, a2 # a2 = ch
185
186 PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25)
187
188 offset = SZ4*(\i & 0xf)
189 vmovdqu \_T1,offset(%rsp)
190 vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
191 vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
192 PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11)
193 vpaddq a2, h, h # h = h + ch
194 PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11)
195 vpaddq \_T1,h, h # h = h + ch + W + K
196 vpxor a1, a0, a0 # a0 = sigma1
197 vmovdqu a,\_T1
198 PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22)
199 vpxor c, \_T1, \_T1 # maj: T1 = a^c
200 add $SZ4, ROUND # ROUND++
201 vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
202 vpaddq a0, h, h
203 vpaddq h, d, d
204 vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
205 PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13)
206 vpxor a1, a2, a2 # a2 = sig0
207 vpand c, a, a1 # maj: a1 = a&c
208 vpor \_T1, a1, a1 # a1 = maj
209 vpaddq a1, h, h # h = h + ch + W + K + maj
210 vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0
211 ROTATE_ARGS
212.endm
213
214
215#; arguments passed implicitly in preprocessor symbols i, a...h
216.macro ROUND_16_XX _T1 i
217 vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
218 vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
219 vmovdqu \_T1, a0
220 PRORQ \_T1,7
221 vmovdqu a1, a2
222 PRORQ a1,42
223 vpxor a0, \_T1, \_T1
224 PRORQ \_T1, 1
225 vpxor a2, a1, a1
226 PRORQ a1, 19
227 vpsrlq $7, a0, a0
228 vpxor a0, \_T1, \_T1
229 vpsrlq $6, a2, a2
230 vpxor a2, a1, a1
231 vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
232 vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1
233 vpaddq a1, \_T1, \_T1
234
235 ROUND_00_15 \_T1,\i
236.endm
237
238
239# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
240# arg 1 : STATE : pointer to input data
241# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
242ENTRY(sha512_x4_avx2)
243 # general registers preserved in outer calling routine
244 # outer calling routine saves all the XMM registers
245 # save callee-saved clobbered registers to comply with C function ABI
246 push %r12
247 push %r13
248 push %r14
249 push %r15
250
251 sub $STACK_SPACE1, %rsp
252
253 # Load the pre-transposed incoming digest.
254 vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
255 vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
256 vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
257 vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
258 vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
259 vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
260 vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
261 vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
262
263 lea K512_4(%rip),TBL
264
265 # load the address of each of the 4 message lanes
266 # getting ready to transpose input onto stack
267 mov _data_ptr+0*PTR_SZ(STATE),inp0
268 mov _data_ptr+1*PTR_SZ(STATE),inp1
269 mov _data_ptr+2*PTR_SZ(STATE),inp2
270 mov _data_ptr+3*PTR_SZ(STATE),inp3
271
272 xor IDX, IDX
273lloop:
274 xor ROUND, ROUND
275
276 # save old digest
277 vmovdqu a, _digest(%rsp)
278 vmovdqu b, _digest+1*SZ4(%rsp)
279 vmovdqu c, _digest+2*SZ4(%rsp)
280 vmovdqu d, _digest+3*SZ4(%rsp)
281 vmovdqu e, _digest+4*SZ4(%rsp)
282 vmovdqu f, _digest+5*SZ4(%rsp)
283 vmovdqu g, _digest+6*SZ4(%rsp)
284 vmovdqu h, _digest+7*SZ4(%rsp)
285 i = 0
286.rep 4
287 vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
288 VMOVPD i*32(inp0, IDX), TT2
289 VMOVPD i*32(inp1, IDX), TT1
290 VMOVPD i*32(inp2, IDX), TT4
291 VMOVPD i*32(inp3, IDX), TT3
292 TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5
293 vpshufb TMP, TT0, TT0
294 vpshufb TMP, TT1, TT1
295 vpshufb TMP, TT2, TT2
296 vpshufb TMP, TT3, TT3
297 ROUND_00_15 TT0,(i*4+0)
298 ROUND_00_15 TT1,(i*4+1)
299 ROUND_00_15 TT2,(i*4+2)
300 ROUND_00_15 TT3,(i*4+3)
301 i = (i+1)
302.endr
303 add $128, IDX
304
305 i = (i*4)
306
307 jmp Lrounds_16_xx
308.align 16
309Lrounds_16_xx:
310.rep 16
311 ROUND_16_XX T1, i
312 i = (i+1)
313.endr
314 cmp $0xa00,ROUND
315 jb Lrounds_16_xx
316
317 # add old digest
318 vpaddq _digest(%rsp), a, a
319 vpaddq _digest+1*SZ4(%rsp), b, b
320 vpaddq _digest+2*SZ4(%rsp), c, c
321 vpaddq _digest+3*SZ4(%rsp), d, d
322 vpaddq _digest+4*SZ4(%rsp), e, e
323 vpaddq _digest+5*SZ4(%rsp), f, f
324 vpaddq _digest+6*SZ4(%rsp), g, g
325 vpaddq _digest+7*SZ4(%rsp), h, h
326
327 sub $1, INP_SIZE # unit is blocks
328 jne lloop
329
330 # write back to memory (state object) the transposed digest
331 vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
332 vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
333 vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
334 vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
335 vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
336 vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
337 vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
338 vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
339
340 # update input data pointers
341 add IDX, inp0
342 mov inp0, _data_ptr+0*PTR_SZ(STATE)
343 add IDX, inp1
344 mov inp1, _data_ptr+1*PTR_SZ(STATE)
345 add IDX, inp2
346 mov inp2, _data_ptr+2*PTR_SZ(STATE)
347 add IDX, inp3
348 mov inp3, _data_ptr+3*PTR_SZ(STATE)
349
350 #;;;;;;;;;;;;;;;
351 #; Postamble
352 add $STACK_SPACE1, %rsp
353 # restore callee-saved clobbered registers
354
355 pop %r15
356 pop %r14
357 pop %r13
358 pop %r12
359
360 # outer calling routine restores XMM and other GP registers
361 ret
362ENDPROC(sha512_x4_avx2)
363
364.section .rodata.K512_4, "a", @progbits
365.align 64
366K512_4:
367 .octa 0x428a2f98d728ae22428a2f98d728ae22,\
368 0x428a2f98d728ae22428a2f98d728ae22
369 .octa 0x7137449123ef65cd7137449123ef65cd,\
370 0x7137449123ef65cd7137449123ef65cd
371 .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
372 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
373 .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
374 0xe9b5dba58189dbbce9b5dba58189dbbc
375 .octa 0x3956c25bf348b5383956c25bf348b538,\
376 0x3956c25bf348b5383956c25bf348b538
377 .octa 0x59f111f1b605d01959f111f1b605d019,\
378 0x59f111f1b605d01959f111f1b605d019
379 .octa 0x923f82a4af194f9b923f82a4af194f9b,\
380 0x923f82a4af194f9b923f82a4af194f9b
381 .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
382 0xab1c5ed5da6d8118ab1c5ed5da6d8118
383 .octa 0xd807aa98a3030242d807aa98a3030242,\
384 0xd807aa98a3030242d807aa98a3030242
385 .octa 0x12835b0145706fbe12835b0145706fbe,\
386 0x12835b0145706fbe12835b0145706fbe
387 .octa 0x243185be4ee4b28c243185be4ee4b28c,\
388 0x243185be4ee4b28c243185be4ee4b28c
389 .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
390 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
391 .octa 0x72be5d74f27b896f72be5d74f27b896f,\
392 0x72be5d74f27b896f72be5d74f27b896f
393 .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
394 0x80deb1fe3b1696b180deb1fe3b1696b1
395 .octa 0x9bdc06a725c712359bdc06a725c71235,\
396 0x9bdc06a725c712359bdc06a725c71235
397 .octa 0xc19bf174cf692694c19bf174cf692694,\
398 0xc19bf174cf692694c19bf174cf692694
399 .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
400 0xe49b69c19ef14ad2e49b69c19ef14ad2
401 .octa 0xefbe4786384f25e3efbe4786384f25e3,\
402 0xefbe4786384f25e3efbe4786384f25e3
403 .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
404 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
405 .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
406 0x240ca1cc77ac9c65240ca1cc77ac9c65
407 .octa 0x2de92c6f592b02752de92c6f592b0275,\
408 0x2de92c6f592b02752de92c6f592b0275
409 .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
410 0x4a7484aa6ea6e4834a7484aa6ea6e483
411 .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
412 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
413 .octa 0x76f988da831153b576f988da831153b5,\
414 0x76f988da831153b576f988da831153b5
415 .octa 0x983e5152ee66dfab983e5152ee66dfab,\
416 0x983e5152ee66dfab983e5152ee66dfab
417 .octa 0xa831c66d2db43210a831c66d2db43210,\
418 0xa831c66d2db43210a831c66d2db43210
419 .octa 0xb00327c898fb213fb00327c898fb213f,\
420 0xb00327c898fb213fb00327c898fb213f
421 .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
422 0xbf597fc7beef0ee4bf597fc7beef0ee4
423 .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
424 0xc6e00bf33da88fc2c6e00bf33da88fc2
425 .octa 0xd5a79147930aa725d5a79147930aa725,\
426 0xd5a79147930aa725d5a79147930aa725
427 .octa 0x06ca6351e003826f06ca6351e003826f,\
428 0x06ca6351e003826f06ca6351e003826f
429 .octa 0x142929670a0e6e70142929670a0e6e70,\
430 0x142929670a0e6e70142929670a0e6e70
431 .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
432 0x27b70a8546d22ffc27b70a8546d22ffc
433 .octa 0x2e1b21385c26c9262e1b21385c26c926,\
434 0x2e1b21385c26c9262e1b21385c26c926
435 .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
436 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
437 .octa 0x53380d139d95b3df53380d139d95b3df,\
438 0x53380d139d95b3df53380d139d95b3df
439 .octa 0x650a73548baf63de650a73548baf63de,\
440 0x650a73548baf63de650a73548baf63de
441 .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
442 0x766a0abb3c77b2a8766a0abb3c77b2a8
443 .octa 0x81c2c92e47edaee681c2c92e47edaee6,\
444 0x81c2c92e47edaee681c2c92e47edaee6
445 .octa 0x92722c851482353b92722c851482353b,\
446 0x92722c851482353b92722c851482353b
447 .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
448 0xa2bfe8a14cf10364a2bfe8a14cf10364
449 .octa 0xa81a664bbc423001a81a664bbc423001,\
450 0xa81a664bbc423001a81a664bbc423001
451 .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
452 0xc24b8b70d0f89791c24b8b70d0f89791
453 .octa 0xc76c51a30654be30c76c51a30654be30,\
454 0xc76c51a30654be30c76c51a30654be30
455 .octa 0xd192e819d6ef5218d192e819d6ef5218,\
456 0xd192e819d6ef5218d192e819d6ef5218
457 .octa 0xd69906245565a910d69906245565a910,\
458 0xd69906245565a910d69906245565a910
459 .octa 0xf40e35855771202af40e35855771202a,\
460 0xf40e35855771202af40e35855771202a
461 .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
462 0x106aa07032bbd1b8106aa07032bbd1b8
463 .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
464 0x19a4c116b8d2d0c819a4c116b8d2d0c8
465 .octa 0x1e376c085141ab531e376c085141ab53,\
466 0x1e376c085141ab531e376c085141ab53
467 .octa 0x2748774cdf8eeb992748774cdf8eeb99,\
468 0x2748774cdf8eeb992748774cdf8eeb99
469 .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
470 0x34b0bcb5e19b48a834b0bcb5e19b48a8
471 .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
472 0x391c0cb3c5c95a63391c0cb3c5c95a63
473 .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
474 0x4ed8aa4ae3418acb4ed8aa4ae3418acb
475 .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
476 0x5b9cca4f7763e3735b9cca4f7763e373
477 .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
478 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
479 .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
480 0x748f82ee5defb2fc748f82ee5defb2fc
481 .octa 0x78a5636f43172f6078a5636f43172f60,\
482 0x78a5636f43172f6078a5636f43172f60
483 .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
484 0x84c87814a1f0ab7284c87814a1f0ab72
485 .octa 0x8cc702081a6439ec8cc702081a6439ec,\
486 0x8cc702081a6439ec8cc702081a6439ec
487 .octa 0x90befffa23631e2890befffa23631e28,\
488 0x90befffa23631e2890befffa23631e28
489 .octa 0xa4506cebde82bde9a4506cebde82bde9,\
490 0xa4506cebde82bde9a4506cebde82bde9
491 .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
492 0xbef9a3f7b2c67915bef9a3f7b2c67915
493 .octa 0xc67178f2e372532bc67178f2e372532b,\
494 0xc67178f2e372532bc67178f2e372532b
495 .octa 0xca273eceea26619cca273eceea26619c,\
496 0xca273eceea26619cca273eceea26619c
497 .octa 0xd186b8c721c0c207d186b8c721c0c207,\
498 0xd186b8c721c0c207d186b8c721c0c207
499 .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
500 0xeada7dd6cde0eb1eeada7dd6cde0eb1e
501 .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
502 0xf57d4f7fee6ed178f57d4f7fee6ed178
503 .octa 0x06f067aa72176fba06f067aa72176fba,\
504 0x06f067aa72176fba06f067aa72176fba
505 .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
506 0x0a637dc5a2c898a60a637dc5a2c898a6
507 .octa 0x113f9804bef90dae113f9804bef90dae,\
508 0x113f9804bef90dae113f9804bef90dae
509 .octa 0x1b710b35131c471b1b710b35131c471b,\
510 0x1b710b35131c471b1b710b35131c471b
511 .octa 0x28db77f523047d8428db77f523047d84,\
512 0x28db77f523047d8428db77f523047d84
513 .octa 0x32caab7b40c7249332caab7b40c72493,\
514 0x32caab7b40c7249332caab7b40c72493
515 .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
516 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
517 .octa 0x431d67c49c100d4c431d67c49c100d4c,\
518 0x431d67c49c100d4c431d67c49c100d4c
519 .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
520 0x4cc5d4becb3e42b64cc5d4becb3e42b6
521 .octa 0x597f299cfc657e2a597f299cfc657e2a,\
522 0x597f299cfc657e2a597f299cfc657e2a
523 .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
524 0x5fcb6fab3ad6faec5fcb6fab3ad6faec
525 .octa 0x6c44198c4a4758176c44198c4a475817,\
526 0x6c44198c4a4758176c44198c4a475817
527
528.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
529.align 32
530PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
531 .octa 0x18191a1b1c1d1e1f1011121314151617
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 59e32623a7ce..90f2811fac5f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -213,20 +213,6 @@ config CRYPTO_CRYPTD
213 converts an arbitrary synchronous software crypto algorithm 213 converts an arbitrary synchronous software crypto algorithm
214 into an asynchronous algorithm that executes in a kernel thread. 214 into an asynchronous algorithm that executes in a kernel thread.
215 215
216config CRYPTO_MCRYPTD
217 tristate "Software async multi-buffer crypto daemon"
218 select CRYPTO_BLKCIPHER
219 select CRYPTO_HASH
220 select CRYPTO_MANAGER
221 select CRYPTO_WORKQUEUE
222 help
223 This is a generic software asynchronous crypto daemon that
224 provides the kernel thread to assist multi-buffer crypto
225 algorithms for submitting jobs and flushing jobs in multi-buffer
226 crypto algorithms. Multi-buffer crypto algorithms are executed
227 in the context of this kernel thread and drivers can post
228 their crypto request asynchronously to be processed by this daemon.
229
230config CRYPTO_AUTHENC 216config CRYPTO_AUTHENC
231 tristate "Authenc support" 217 tristate "Authenc support"
232 select CRYPTO_AEAD 218 select CRYPTO_AEAD
@@ -848,54 +834,6 @@ config CRYPTO_SHA1_PPC_SPE
848 SHA-1 secure hash standard (DFIPS 180-4) implemented 834 SHA-1 secure hash standard (DFIPS 180-4) implemented
849 using powerpc SPE SIMD instruction set. 835 using powerpc SPE SIMD instruction set.
850 836
851config CRYPTO_SHA1_MB
852 tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
853 depends on X86 && 64BIT
854 select CRYPTO_SHA1
855 select CRYPTO_HASH
856 select CRYPTO_MCRYPTD
857 help
858 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
859 using multi-buffer technique. This algorithm computes on
860 multiple data lanes concurrently with SIMD instructions for
861 better throughput. It should not be enabled by default but
862 used when there is significant amount of work to keep the keep
863 the data lanes filled to get performance benefit. If the data
864 lanes remain unfilled, a flush operation will be initiated to
865 process the crypto jobs, adding a slight latency.
866
867config CRYPTO_SHA256_MB
868 tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
869 depends on X86 && 64BIT
870 select CRYPTO_SHA256
871 select CRYPTO_HASH
872 select CRYPTO_MCRYPTD
873 help
874 SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
875 using multi-buffer technique. This algorithm computes on
876 multiple data lanes concurrently with SIMD instructions for
877 better throughput. It should not be enabled by default but
878 used when there is significant amount of work to keep the keep
879 the data lanes filled to get performance benefit. If the data
880 lanes remain unfilled, a flush operation will be initiated to
881 process the crypto jobs, adding a slight latency.
882
883config CRYPTO_SHA512_MB
884 tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
885 depends on X86 && 64BIT
886 select CRYPTO_SHA512
887 select CRYPTO_HASH
888 select CRYPTO_MCRYPTD
889 help
890 SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
891 using multi-buffer technique. This algorithm computes on
892 multiple data lanes concurrently with SIMD instructions for
893 better throughput. It should not be enabled by default but
894 used when there is significant amount of work to keep the keep
895 the data lanes filled to get performance benefit. If the data
896 lanes remain unfilled, a flush operation will be initiated to
897 process the crypto jobs, adding a slight latency.
898
899config CRYPTO_SHA256 837config CRYPTO_SHA256
900 tristate "SHA224 and SHA256 digest algorithm" 838 tristate "SHA224 and SHA256 digest algorithm"
901 select CRYPTO_HASH 839 select CRYPTO_HASH
diff --git a/crypto/Makefile b/crypto/Makefile
index f6a234d08882..d719843f8b6e 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -93,7 +93,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
93obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o 93obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
94obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o 94obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
95obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o 95obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
96obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
97obj-$(CONFIG_CRYPTO_DES) += des_generic.o 96obj-$(CONFIG_CRYPTO_DES) += des_generic.o
98obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o 97obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
99obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o 98obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
deleted file mode 100644
index f14152147ce8..000000000000
--- a/crypto/mcryptd.c
+++ /dev/null
@@ -1,675 +0,0 @@
1/*
2 * Software multibuffer async crypto daemon.
3 *
4 * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
5 *
6 * Adapted from crypto daemon.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 */
14
15#include <crypto/algapi.h>
16#include <crypto/internal/hash.h>
17#include <crypto/internal/aead.h>
18#include <crypto/mcryptd.h>
19#include <crypto/crypto_wq.h>
20#include <linux/err.h>
21#include <linux/init.h>
22#include <linux/kernel.h>
23#include <linux/list.h>
24#include <linux/module.h>
25#include <linux/scatterlist.h>
26#include <linux/sched.h>
27#include <linux/sched/stat.h>
28#include <linux/slab.h>
29
30#define MCRYPTD_MAX_CPU_QLEN 100
31#define MCRYPTD_BATCH 9
32
33static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
34 unsigned int tail);
35
36struct mcryptd_flush_list {
37 struct list_head list;
38 struct mutex lock;
39};
40
41static struct mcryptd_flush_list __percpu *mcryptd_flist;
42
43struct hashd_instance_ctx {
44 struct crypto_ahash_spawn spawn;
45 struct mcryptd_queue *queue;
46};
47
48static void mcryptd_queue_worker(struct work_struct *work);
49
50void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
51{
52 struct mcryptd_flush_list *flist;
53
54 if (!cstate->flusher_engaged) {
55 /* put the flusher on the flush list */
56 flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
57 mutex_lock(&flist->lock);
58 list_add_tail(&cstate->flush_list, &flist->list);
59 cstate->flusher_engaged = true;
60 cstate->next_flush = jiffies + delay;
61 queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
62 &cstate->flush, delay);
63 mutex_unlock(&flist->lock);
64 }
65}
66EXPORT_SYMBOL(mcryptd_arm_flusher);
67
68static int mcryptd_init_queue(struct mcryptd_queue *queue,
69 unsigned int max_cpu_qlen)
70{
71 int cpu;
72 struct mcryptd_cpu_queue *cpu_queue;
73
74 queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
75 pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
76 if (!queue->cpu_queue)
77 return -ENOMEM;
78 for_each_possible_cpu(cpu) {
79 cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
80 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
81 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
82 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
83 spin_lock_init(&cpu_queue->q_lock);
84 }
85 return 0;
86}
87
88static void mcryptd_fini_queue(struct mcryptd_queue *queue)
89{
90 int cpu;
91 struct mcryptd_cpu_queue *cpu_queue;
92
93 for_each_possible_cpu(cpu) {
94 cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
95 BUG_ON(cpu_queue->queue.qlen);
96 }
97 free_percpu(queue->cpu_queue);
98}
99
100static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
101 struct crypto_async_request *request,
102 struct mcryptd_hash_request_ctx *rctx)
103{
104 int cpu, err;
105 struct mcryptd_cpu_queue *cpu_queue;
106
107 cpu_queue = raw_cpu_ptr(queue->cpu_queue);
108 spin_lock(&cpu_queue->q_lock);
109 cpu = smp_processor_id();
110 rctx->tag.cpu = smp_processor_id();
111
112 err = crypto_enqueue_request(&cpu_queue->queue, request);
113 pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
114 cpu, cpu_queue, request);
115 spin_unlock(&cpu_queue->q_lock);
116 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
117
118 return err;
119}
120
121/*
122 * Try to opportunisticlly flush the partially completed jobs if
123 * crypto daemon is the only task running.
124 */
125static void mcryptd_opportunistic_flush(void)
126{
127 struct mcryptd_flush_list *flist;
128 struct mcryptd_alg_cstate *cstate;
129
130 flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
131 while (single_task_running()) {
132 mutex_lock(&flist->lock);
133 cstate = list_first_entry_or_null(&flist->list,
134 struct mcryptd_alg_cstate, flush_list);
135 if (!cstate || !cstate->flusher_engaged) {
136 mutex_unlock(&flist->lock);
137 return;
138 }
139 list_del(&cstate->flush_list);
140 cstate->flusher_engaged = false;
141 mutex_unlock(&flist->lock);
142 cstate->alg_state->flusher(cstate);
143 }
144}
145
146/*
147 * Called in workqueue context, do one real cryption work (via
148 * req->complete) and reschedule itself if there are more work to
149 * do.
150 */
151static void mcryptd_queue_worker(struct work_struct *work)
152{
153 struct mcryptd_cpu_queue *cpu_queue;
154 struct crypto_async_request *req, *backlog;
155 int i;
156
157 /*
158 * Need to loop through more than once for multi-buffer to
159 * be effective.
160 */
161
162 cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
163 i = 0;
164 while (i < MCRYPTD_BATCH || single_task_running()) {
165
166 spin_lock_bh(&cpu_queue->q_lock);
167 backlog = crypto_get_backlog(&cpu_queue->queue);
168 req = crypto_dequeue_request(&cpu_queue->queue);
169 spin_unlock_bh(&cpu_queue->q_lock);
170
171 if (!req) {
172 mcryptd_opportunistic_flush();
173 return;
174 }
175
176 if (backlog)
177 backlog->complete(backlog, -EINPROGRESS);
178 req->complete(req, 0);
179 if (!cpu_queue->queue.qlen)
180 return;
181 ++i;
182 }
183 if (cpu_queue->queue.qlen)
184 queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
185}
186
187void mcryptd_flusher(struct work_struct *__work)
188{
189 struct mcryptd_alg_cstate *alg_cpu_state;
190 struct mcryptd_alg_state *alg_state;
191 struct mcryptd_flush_list *flist;
192 int cpu;
193
194 cpu = smp_processor_id();
195 alg_cpu_state = container_of(to_delayed_work(__work),
196 struct mcryptd_alg_cstate, flush);
197 alg_state = alg_cpu_state->alg_state;
198 if (alg_cpu_state->cpu != cpu)
199 pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
200 cpu, alg_cpu_state->cpu);
201
202 if (alg_cpu_state->flusher_engaged) {
203 flist = per_cpu_ptr(mcryptd_flist, cpu);
204 mutex_lock(&flist->lock);
205 list_del(&alg_cpu_state->flush_list);
206 alg_cpu_state->flusher_engaged = false;
207 mutex_unlock(&flist->lock);
208 alg_state->flusher(alg_cpu_state);
209 }
210}
211EXPORT_SYMBOL_GPL(mcryptd_flusher);
212
213static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
214{
215 struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
216 struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
217
218 return ictx->queue;
219}
220
221static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
222 unsigned int tail)
223{
224 char *p;
225 struct crypto_instance *inst;
226 int err;
227
228 p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
229 if (!p)
230 return ERR_PTR(-ENOMEM);
231
232 inst = (void *)(p + head);
233
234 err = -ENAMETOOLONG;
235 if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
236 "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
237 goto out_free_inst;
238
239 memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
240
241 inst->alg.cra_priority = alg->cra_priority + 50;
242 inst->alg.cra_blocksize = alg->cra_blocksize;
243 inst->alg.cra_alignmask = alg->cra_alignmask;
244
245out:
246 return p;
247
248out_free_inst:
249 kfree(p);
250 p = ERR_PTR(err);
251 goto out;
252}
253
254static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type,
255 u32 *mask)
256{
257 struct crypto_attr_type *algt;
258
259 algt = crypto_get_attr_type(tb);
260 if (IS_ERR(algt))
261 return false;
262
263 *type |= algt->type & CRYPTO_ALG_INTERNAL;
264 *mask |= algt->mask & CRYPTO_ALG_INTERNAL;
265
266 if (*type & *mask & CRYPTO_ALG_INTERNAL)
267 return true;
268 else
269 return false;
270}
271
272static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
273{
274 struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
275 struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
276 struct crypto_ahash_spawn *spawn = &ictx->spawn;
277 struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
278 struct crypto_ahash *hash;
279
280 hash = crypto_spawn_ahash(spawn);
281 if (IS_ERR(hash))
282 return PTR_ERR(hash);
283
284 ctx->child = hash;
285 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
286 sizeof(struct mcryptd_hash_request_ctx) +
287 crypto_ahash_reqsize(hash));
288 return 0;
289}
290
291static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
292{
293 struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
294
295 crypto_free_ahash(ctx->child);
296}
297
298static int mcryptd_hash_setkey(struct crypto_ahash *parent,
299 const u8 *key, unsigned int keylen)
300{
301 struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent);
302 struct crypto_ahash *child = ctx->child;
303 int err;
304
305 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
306 crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
307 CRYPTO_TFM_REQ_MASK);
308 err = crypto_ahash_setkey(child, key, keylen);
309 crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
310 CRYPTO_TFM_RES_MASK);
311 return err;
312}
313
314static int mcryptd_hash_enqueue(struct ahash_request *req,
315 crypto_completion_t complete)
316{
317 int ret;
318
319 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
320 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
321 struct mcryptd_queue *queue =
322 mcryptd_get_queue(crypto_ahash_tfm(tfm));
323
324 rctx->complete = req->base.complete;
325 req->base.complete = complete;
326
327 ret = mcryptd_enqueue_request(queue, &req->base, rctx);
328
329 return ret;
330}
331
332static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
333{
334 struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
335 struct crypto_ahash *child = ctx->child;
336 struct ahash_request *req = ahash_request_cast(req_async);
337 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
338 struct ahash_request *desc = &rctx->areq;
339
340 if (unlikely(err == -EINPROGRESS))
341 goto out;
342
343 ahash_request_set_tfm(desc, child);
344 ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
345 rctx->complete, req_async);
346
347 rctx->out = req->result;
348 err = crypto_ahash_init(desc);
349
350out:
351 local_bh_disable();
352 rctx->complete(&req->base, err);
353 local_bh_enable();
354}
355
356static int mcryptd_hash_init_enqueue(struct ahash_request *req)
357{
358 return mcryptd_hash_enqueue(req, mcryptd_hash_init);
359}
360
361static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
362{
363 struct ahash_request *req = ahash_request_cast(req_async);
364 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
365
366 if (unlikely(err == -EINPROGRESS))
367 goto out;
368
369 rctx->out = req->result;
370 err = crypto_ahash_update(&rctx->areq);
371 if (err) {
372 req->base.complete = rctx->complete;
373 goto out;
374 }
375
376 return;
377out:
378 local_bh_disable();
379 rctx->complete(&req->base, err);
380 local_bh_enable();
381}
382
383static int mcryptd_hash_update_enqueue(struct ahash_request *req)
384{
385 return mcryptd_hash_enqueue(req, mcryptd_hash_update);
386}
387
388static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
389{
390 struct ahash_request *req = ahash_request_cast(req_async);
391 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
392
393 if (unlikely(err == -EINPROGRESS))
394 goto out;
395
396 rctx->out = req->result;
397 err = crypto_ahash_final(&rctx->areq);
398 if (err) {
399 req->base.complete = rctx->complete;
400 goto out;
401 }
402
403 return;
404out:
405 local_bh_disable();
406 rctx->complete(&req->base, err);
407 local_bh_enable();
408}
409
410static int mcryptd_hash_final_enqueue(struct ahash_request *req)
411{
412 return mcryptd_hash_enqueue(req, mcryptd_hash_final);
413}
414
415static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
416{
417 struct ahash_request *req = ahash_request_cast(req_async);
418 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
419
420 if (unlikely(err == -EINPROGRESS))
421 goto out;
422 rctx->out = req->result;
423 err = crypto_ahash_finup(&rctx->areq);
424
425 if (err) {
426 req->base.complete = rctx->complete;
427 goto out;
428 }
429
430 return;
431out:
432 local_bh_disable();
433 rctx->complete(&req->base, err);
434 local_bh_enable();
435}
436
437static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
438{
439 return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
440}
441
442static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
443{
444 struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
445 struct crypto_ahash *child = ctx->child;
446 struct ahash_request *req = ahash_request_cast(req_async);
447 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
448 struct ahash_request *desc = &rctx->areq;
449
450 if (unlikely(err == -EINPROGRESS))
451 goto out;
452
453 ahash_request_set_tfm(desc, child);
454 ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
455 rctx->complete, req_async);
456
457 rctx->out = req->result;
458 err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
459
460out:
461 local_bh_disable();
462 rctx->complete(&req->base, err);
463 local_bh_enable();
464}
465
466static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
467{
468 return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
469}
470
471static int mcryptd_hash_export(struct ahash_request *req, void *out)
472{
473 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
474
475 return crypto_ahash_export(&rctx->areq, out);
476}
477
478static int mcryptd_hash_import(struct ahash_request *req, const void *in)
479{
480 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
481
482 return crypto_ahash_import(&rctx->areq, in);
483}
484
485static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
486 struct mcryptd_queue *queue)
487{
488 struct hashd_instance_ctx *ctx;
489 struct ahash_instance *inst;
490 struct hash_alg_common *halg;
491 struct crypto_alg *alg;
492 u32 type = 0;
493 u32 mask = 0;
494 int err;
495
496 if (!mcryptd_check_internal(tb, &type, &mask))
497 return -EINVAL;
498
499 halg = ahash_attr_alg(tb[1], type, mask);
500 if (IS_ERR(halg))
501 return PTR_ERR(halg);
502
503 alg = &halg->base;
504 pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
505 inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
506 sizeof(*ctx));
507 err = PTR_ERR(inst);
508 if (IS_ERR(inst))
509 goto out_put_alg;
510
511 ctx = ahash_instance_ctx(inst);
512 ctx->queue = queue;
513
514 err = crypto_init_ahash_spawn(&ctx->spawn, halg,
515 ahash_crypto_instance(inst));
516 if (err)
517 goto out_free_inst;
518
519 inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
520 (alg->cra_flags & (CRYPTO_ALG_INTERNAL |
521 CRYPTO_ALG_OPTIONAL_KEY));
522
523 inst->alg.halg.digestsize = halg->digestsize;
524 inst->alg.halg.statesize = halg->statesize;
525 inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
526
527 inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
528 inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
529
530 inst->alg.init = mcryptd_hash_init_enqueue;
531 inst->alg.update = mcryptd_hash_update_enqueue;
532 inst->alg.final = mcryptd_hash_final_enqueue;
533 inst->alg.finup = mcryptd_hash_finup_enqueue;
534 inst->alg.export = mcryptd_hash_export;
535 inst->alg.import = mcryptd_hash_import;
536 if (crypto_hash_alg_has_setkey(halg))
537 inst->alg.setkey = mcryptd_hash_setkey;
538 inst->alg.digest = mcryptd_hash_digest_enqueue;
539
540 err = ahash_register_instance(tmpl, inst);
541 if (err) {
542 crypto_drop_ahash(&ctx->spawn);
543out_free_inst:
544 kfree(inst);
545 }
546
547out_put_alg:
548 crypto_mod_put(alg);
549 return err;
550}
551
552static struct mcryptd_queue mqueue;
553
554static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
555{
556 struct crypto_attr_type *algt;
557
558 algt = crypto_get_attr_type(tb);
559 if (IS_ERR(algt))
560 return PTR_ERR(algt);
561
562 switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
563 case CRYPTO_ALG_TYPE_DIGEST:
564 return mcryptd_create_hash(tmpl, tb, &mqueue);
565 break;
566 }
567
568 return -EINVAL;
569}
570
571static void mcryptd_free(struct crypto_instance *inst)
572{
573 struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
574 struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
575
576 switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
577 case CRYPTO_ALG_TYPE_AHASH:
578 crypto_drop_ahash(&hctx->spawn);
579 kfree(ahash_instance(inst));
580 return;
581 default:
582 crypto_drop_spawn(&ctx->spawn);
583 kfree(inst);
584 }
585}
586
587static struct crypto_template mcryptd_tmpl = {
588 .name = "mcryptd",
589 .create = mcryptd_create,
590 .free = mcryptd_free,
591 .module = THIS_MODULE,
592};
593
594struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
595 u32 type, u32 mask)
596{
597 char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
598 struct crypto_ahash *tfm;
599
600 if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
601 "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
602 return ERR_PTR(-EINVAL);
603 tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
604 if (IS_ERR(tfm))
605 return ERR_CAST(tfm);
606 if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
607 crypto_free_ahash(tfm);
608 return ERR_PTR(-EINVAL);
609 }
610
611 return __mcryptd_ahash_cast(tfm);
612}
613EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
614
615struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
616{
617 struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
618
619 return ctx->child;
620}
621EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
622
623struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
624{
625 struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
626 return &rctx->areq;
627}
628EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
629
630void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
631{
632 crypto_free_ahash(&tfm->base);
633}
634EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
635
636static int __init mcryptd_init(void)
637{
638 int err, cpu;
639 struct mcryptd_flush_list *flist;
640
641 mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
642 for_each_possible_cpu(cpu) {
643 flist = per_cpu_ptr(mcryptd_flist, cpu);
644 INIT_LIST_HEAD(&flist->list);
645 mutex_init(&flist->lock);
646 }
647
648 err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
649 if (err) {
650 free_percpu(mcryptd_flist);
651 return err;
652 }
653
654 err = crypto_register_template(&mcryptd_tmpl);
655 if (err) {
656 mcryptd_fini_queue(&mqueue);
657 free_percpu(mcryptd_flist);
658 }
659
660 return err;
661}
662
663static void __exit mcryptd_exit(void)
664{
665 mcryptd_fini_queue(&mqueue);
666 crypto_unregister_template(&mcryptd_tmpl);
667 free_percpu(mcryptd_flist);
668}
669
670subsys_initcall(mcryptd_init);
671module_exit(mcryptd_exit);
672
673MODULE_LICENSE("GPL");
674MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
675MODULE_ALIAS_CRYPTO("mcryptd");
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
deleted file mode 100644
index b67404fc4b34..000000000000
--- a/include/crypto/mcryptd.h
+++ /dev/null
@@ -1,114 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Software async multibuffer crypto daemon headers
4 *
5 * Author:
6 * Tim Chen <tim.c.chen@linux.intel.com>
7 *
8 * Copyright (c) 2014, Intel Corporation.
9 */
10
11#ifndef _CRYPTO_MCRYPT_H
12#define _CRYPTO_MCRYPT_H
13
14#include <linux/crypto.h>
15#include <linux/kernel.h>
16#include <crypto/hash.h>
17
18struct mcryptd_ahash {
19 struct crypto_ahash base;
20};
21
22static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
23 struct crypto_ahash *tfm)
24{
25 return (struct mcryptd_ahash *)tfm;
26}
27
28struct mcryptd_cpu_queue {
29 struct crypto_queue queue;
30 spinlock_t q_lock;
31 struct work_struct work;
32};
33
34struct mcryptd_queue {
35 struct mcryptd_cpu_queue __percpu *cpu_queue;
36};
37
38struct mcryptd_instance_ctx {
39 struct crypto_spawn spawn;
40 struct mcryptd_queue *queue;
41};
42
43struct mcryptd_hash_ctx {
44 struct crypto_ahash *child;
45 struct mcryptd_alg_state *alg_state;
46};
47
48struct mcryptd_tag {
49 /* seq number of request */
50 unsigned seq_num;
51 /* arrival time of request */
52 unsigned long arrival;
53 unsigned long expire;
54 int cpu;
55};
56
57struct mcryptd_hash_request_ctx {
58 struct list_head waiter;
59 crypto_completion_t complete;
60 struct mcryptd_tag tag;
61 struct crypto_hash_walk walk;
62 u8 *out;
63 int flag;
64 struct ahash_request areq;
65};
66
67struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
68 u32 type, u32 mask);
69struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm);
70struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req);
71void mcryptd_free_ahash(struct mcryptd_ahash *tfm);
72void mcryptd_flusher(struct work_struct *work);
73
74enum mcryptd_req_type {
75 MCRYPTD_NONE,
76 MCRYPTD_UPDATE,
77 MCRYPTD_FINUP,
78 MCRYPTD_DIGEST,
79 MCRYPTD_FINAL
80};
81
82struct mcryptd_alg_cstate {
83 unsigned long next_flush;
84 unsigned next_seq_num;
85 bool flusher_engaged;
86 struct delayed_work flush;
87 int cpu;
88 struct mcryptd_alg_state *alg_state;
89 void *mgr;
90 spinlock_t work_lock;
91 struct list_head work_list;
92 struct list_head flush_list;
93};
94
95struct mcryptd_alg_state {
96 struct mcryptd_alg_cstate __percpu *alg_cstate;
97 unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate);
98};
99
100/* return delay in jiffies from current time */
101static inline unsigned long get_delay(unsigned long t)
102{
103 long delay;
104
105 delay = (long) t - (long) jiffies;
106 if (delay <= 0)
107 return 0;
108 else
109 return (unsigned long) delay;
110}
111
112void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay);
113
114#endif