diff options
47 files changed, 0 insertions, 8952 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 9ad052aeac39..9c91490baa3d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -7503,14 +7503,6 @@ S: Supported | |||
7503 | F: drivers/infiniband/hw/i40iw/ | 7503 | F: drivers/infiniband/hw/i40iw/ |
7504 | F: include/uapi/rdma/i40iw-abi.h | 7504 | F: include/uapi/rdma/i40iw-abi.h |
7505 | 7505 | ||
7506 | INTEL SHA MULTIBUFFER DRIVER | ||
7507 | M: Megha Dey <megha.dey@linux.intel.com> | ||
7508 | R: Tim Chen <tim.c.chen@linux.intel.com> | ||
7509 | L: linux-crypto@vger.kernel.org | ||
7510 | S: Supported | ||
7511 | F: arch/x86/crypto/sha*-mb/ | ||
7512 | F: crypto/mcryptd.c | ||
7513 | |||
7514 | INTEL TELEMETRY DRIVER | 7506 | INTEL TELEMETRY DRIVER |
7515 | M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com> | 7507 | M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com> |
7516 | L: platform-driver-x86@vger.kernel.org | 7508 | L: platform-driver-x86@vger.kernel.org |
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 93a3c3c0238c..85904b73e261 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig | |||
@@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m | |||
621 | CONFIG_CRYPTO_MANAGER=y | 621 | CONFIG_CRYPTO_MANAGER=y |
622 | CONFIG_CRYPTO_USER=m | 622 | CONFIG_CRYPTO_USER=m |
623 | CONFIG_CRYPTO_CRYPTD=m | 623 | CONFIG_CRYPTO_CRYPTD=m |
624 | CONFIG_CRYPTO_MCRYPTD=m | ||
625 | CONFIG_CRYPTO_TEST=m | 624 | CONFIG_CRYPTO_TEST=m |
626 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 625 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
627 | CONFIG_CRYPTO_AEGIS128=m | 626 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index e3d0efd6397d..9b3818bbb68b 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig | |||
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m | |||
578 | CONFIG_CRYPTO_MANAGER=y | 578 | CONFIG_CRYPTO_MANAGER=y |
579 | CONFIG_CRYPTO_USER=m | 579 | CONFIG_CRYPTO_USER=m |
580 | CONFIG_CRYPTO_CRYPTD=m | 580 | CONFIG_CRYPTO_CRYPTD=m |
581 | CONFIG_CRYPTO_MCRYPTD=m | ||
582 | CONFIG_CRYPTO_TEST=m | 581 | CONFIG_CRYPTO_TEST=m |
583 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 582 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
584 | CONFIG_CRYPTO_AEGIS128=m | 583 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 75ac0c76e884..769677809945 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig | |||
@@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m | |||
599 | CONFIG_CRYPTO_MANAGER=y | 599 | CONFIG_CRYPTO_MANAGER=y |
600 | CONFIG_CRYPTO_USER=m | 600 | CONFIG_CRYPTO_USER=m |
601 | CONFIG_CRYPTO_CRYPTD=m | 601 | CONFIG_CRYPTO_CRYPTD=m |
602 | CONFIG_CRYPTO_MCRYPTD=m | ||
603 | CONFIG_CRYPTO_TEST=m | 602 | CONFIG_CRYPTO_TEST=m |
604 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 603 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
605 | CONFIG_CRYPTO_AEGIS128=m | 604 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index c6e492700188..7dd264ddf2ea 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig | |||
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m | |||
570 | CONFIG_CRYPTO_MANAGER=y | 570 | CONFIG_CRYPTO_MANAGER=y |
571 | CONFIG_CRYPTO_USER=m | 571 | CONFIG_CRYPTO_USER=m |
572 | CONFIG_CRYPTO_CRYPTD=m | 572 | CONFIG_CRYPTO_CRYPTD=m |
573 | CONFIG_CRYPTO_MCRYPTD=m | ||
574 | CONFIG_CRYPTO_TEST=m | 573 | CONFIG_CRYPTO_TEST=m |
575 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 574 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
576 | CONFIG_CRYPTO_AEGIS128=m | 575 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index b00d1c477432..515f7439c755 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig | |||
@@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m | |||
580 | CONFIG_CRYPTO_MANAGER=y | 580 | CONFIG_CRYPTO_MANAGER=y |
581 | CONFIG_CRYPTO_USER=m | 581 | CONFIG_CRYPTO_USER=m |
582 | CONFIG_CRYPTO_CRYPTD=m | 582 | CONFIG_CRYPTO_CRYPTD=m |
583 | CONFIG_CRYPTO_MCRYPTD=m | ||
584 | CONFIG_CRYPTO_TEST=m | 583 | CONFIG_CRYPTO_TEST=m |
585 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 584 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
586 | CONFIG_CRYPTO_AEGIS128=m | 585 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 85cac3770d89..8e1038ceb407 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig | |||
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m | |||
602 | CONFIG_CRYPTO_MANAGER=y | 602 | CONFIG_CRYPTO_MANAGER=y |
603 | CONFIG_CRYPTO_USER=m | 603 | CONFIG_CRYPTO_USER=m |
604 | CONFIG_CRYPTO_CRYPTD=m | 604 | CONFIG_CRYPTO_CRYPTD=m |
605 | CONFIG_CRYPTO_MCRYPTD=m | ||
606 | CONFIG_CRYPTO_TEST=m | 605 | CONFIG_CRYPTO_TEST=m |
607 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 606 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
608 | CONFIG_CRYPTO_AEGIS128=m | 607 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index b3a5d1e99d27..62c8aaa15cc7 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig | |||
@@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m | |||
684 | CONFIG_CRYPTO_MANAGER=y | 684 | CONFIG_CRYPTO_MANAGER=y |
685 | CONFIG_CRYPTO_USER=m | 685 | CONFIG_CRYPTO_USER=m |
686 | CONFIG_CRYPTO_CRYPTD=m | 686 | CONFIG_CRYPTO_CRYPTD=m |
687 | CONFIG_CRYPTO_MCRYPTD=m | ||
688 | CONFIG_CRYPTO_TEST=m | 687 | CONFIG_CRYPTO_TEST=m |
689 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 688 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
690 | CONFIG_CRYPTO_AEGIS128=m | 689 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 0ca22608453f..733973f91297 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig | |||
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m | |||
570 | CONFIG_CRYPTO_MANAGER=y | 570 | CONFIG_CRYPTO_MANAGER=y |
571 | CONFIG_CRYPTO_USER=m | 571 | CONFIG_CRYPTO_USER=m |
572 | CONFIG_CRYPTO_CRYPTD=m | 572 | CONFIG_CRYPTO_CRYPTD=m |
573 | CONFIG_CRYPTO_MCRYPTD=m | ||
574 | CONFIG_CRYPTO_TEST=m | 573 | CONFIG_CRYPTO_TEST=m |
575 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 574 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
576 | CONFIG_CRYPTO_AEGIS128=m | 575 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 8e3d10d12d9c..fee30cc9ac16 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig | |||
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m | |||
570 | CONFIG_CRYPTO_MANAGER=y | 570 | CONFIG_CRYPTO_MANAGER=y |
571 | CONFIG_CRYPTO_USER=m | 571 | CONFIG_CRYPTO_USER=m |
572 | CONFIG_CRYPTO_CRYPTD=m | 572 | CONFIG_CRYPTO_CRYPTD=m |
573 | CONFIG_CRYPTO_MCRYPTD=m | ||
574 | CONFIG_CRYPTO_TEST=m | 573 | CONFIG_CRYPTO_TEST=m |
575 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 574 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
576 | CONFIG_CRYPTO_AEGIS128=m | 575 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index ff7e653ec7fa..eebf9c9088e7 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig | |||
@@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m | |||
593 | CONFIG_CRYPTO_MANAGER=y | 593 | CONFIG_CRYPTO_MANAGER=y |
594 | CONFIG_CRYPTO_USER=m | 594 | CONFIG_CRYPTO_USER=m |
595 | CONFIG_CRYPTO_CRYPTD=m | 595 | CONFIG_CRYPTO_CRYPTD=m |
596 | CONFIG_CRYPTO_MCRYPTD=m | ||
597 | CONFIG_CRYPTO_TEST=m | 596 | CONFIG_CRYPTO_TEST=m |
598 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 597 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
599 | CONFIG_CRYPTO_AEGIS128=m | 598 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 612cf46f6d0c..dabc54318c09 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig | |||
@@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m | |||
571 | CONFIG_CRYPTO_MANAGER=y | 571 | CONFIG_CRYPTO_MANAGER=y |
572 | CONFIG_CRYPTO_USER=m | 572 | CONFIG_CRYPTO_USER=m |
573 | CONFIG_CRYPTO_CRYPTD=m | 573 | CONFIG_CRYPTO_CRYPTD=m |
574 | CONFIG_CRYPTO_MCRYPTD=m | ||
575 | CONFIG_CRYPTO_TEST=m | 574 | CONFIG_CRYPTO_TEST=m |
576 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 575 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
577 | CONFIG_CRYPTO_AEGIS128=m | 576 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index a6a7bb6dc3fd..0d9a5c2a311a 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig | |||
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m | |||
572 | CONFIG_CRYPTO_MANAGER=y | 572 | CONFIG_CRYPTO_MANAGER=y |
573 | CONFIG_CRYPTO_USER=m | 573 | CONFIG_CRYPTO_USER=m |
574 | CONFIG_CRYPTO_CRYPTD=m | 574 | CONFIG_CRYPTO_CRYPTD=m |
575 | CONFIG_CRYPTO_MCRYPTD=m | ||
576 | CONFIG_CRYPTO_TEST=m | 575 | CONFIG_CRYPTO_TEST=m |
577 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 576 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
578 | CONFIG_CRYPTO_AEGIS128=m | 577 | CONFIG_CRYPTO_AEGIS128=m |
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 941d8cc6c9f5..259d1698ac50 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig | |||
@@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m | |||
668 | # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set | 668 | # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set |
669 | CONFIG_CRYPTO_PCRYPT=m | 669 | CONFIG_CRYPTO_PCRYPT=m |
670 | CONFIG_CRYPTO_CRYPTD=m | 670 | CONFIG_CRYPTO_CRYPTD=m |
671 | CONFIG_CRYPTO_MCRYPTD=m | ||
672 | CONFIG_CRYPTO_TEST=m | 671 | CONFIG_CRYPTO_TEST=m |
673 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 672 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
674 | CONFIG_CRYPTO_LRW=m | 673 | CONFIG_CRYPTO_LRW=m |
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index eb6f75f24208..37fd60c20e22 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig | |||
@@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m | |||
610 | # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set | 610 | # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set |
611 | CONFIG_CRYPTO_PCRYPT=m | 611 | CONFIG_CRYPTO_PCRYPT=m |
612 | CONFIG_CRYPTO_CRYPTD=m | 612 | CONFIG_CRYPTO_CRYPTD=m |
613 | CONFIG_CRYPTO_MCRYPTD=m | ||
614 | CONFIG_CRYPTO_TEST=m | 613 | CONFIG_CRYPTO_TEST=m |
615 | CONFIG_CRYPTO_CHACHA20POLY1305=m | 614 | CONFIG_CRYPTO_CHACHA20POLY1305=m |
616 | CONFIG_CRYPTO_LRW=m | 615 | CONFIG_CRYPTO_LRW=m |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a450ad573dcb..9edfa5469f9f 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -60,9 +60,6 @@ endif | |||
60 | ifeq ($(avx2_supported),yes) | 60 | ifeq ($(avx2_supported),yes) |
61 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o | 61 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o |
62 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o | 62 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o |
63 | obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/ | ||
64 | obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ | ||
65 | obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/ | ||
66 | 63 | ||
67 | obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o | 64 | obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o |
68 | endif | 65 | endif |
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile deleted file mode 100644 index 815ded3ba90e..000000000000 --- a/arch/x86/crypto/sha1-mb/Makefile +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | ||
2 | # | ||
3 | # Arch-specific CryptoAPI modules. | ||
4 | # | ||
5 | |||
6 | OBJECT_FILES_NON_STANDARD := y | ||
7 | |||
8 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ | ||
9 | $(comma)4)$(comma)%ymm2,yes,no) | ||
10 | ifeq ($(avx2_supported),yes) | ||
11 | obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o | ||
12 | sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \ | ||
13 | sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o | ||
14 | endif | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c deleted file mode 100644 index b93805664c1d..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb.c +++ /dev/null | |||
@@ -1,1011 +0,0 @@ | |||
1 | /* | ||
2 | * Multi buffer SHA1 algorithm Glue Code | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2014 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
55 | |||
56 | #include <crypto/internal/hash.h> | ||
57 | #include <linux/init.h> | ||
58 | #include <linux/module.h> | ||
59 | #include <linux/mm.h> | ||
60 | #include <linux/cryptohash.h> | ||
61 | #include <linux/types.h> | ||
62 | #include <linux/list.h> | ||
63 | #include <crypto/scatterwalk.h> | ||
64 | #include <crypto/sha.h> | ||
65 | #include <crypto/mcryptd.h> | ||
66 | #include <crypto/crypto_wq.h> | ||
67 | #include <asm/byteorder.h> | ||
68 | #include <linux/hardirq.h> | ||
69 | #include <asm/fpu/api.h> | ||
70 | #include "sha1_mb_ctx.h" | ||
71 | |||
72 | #define FLUSH_INTERVAL 1000 /* in usec */ | ||
73 | |||
74 | static struct mcryptd_alg_state sha1_mb_alg_state; | ||
75 | |||
76 | struct sha1_mb_ctx { | ||
77 | struct mcryptd_ahash *mcryptd_tfm; | ||
78 | }; | ||
79 | |||
80 | static inline struct mcryptd_hash_request_ctx | ||
81 | *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) | ||
82 | { | ||
83 | struct ahash_request *areq; | ||
84 | |||
85 | areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); | ||
86 | return container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
87 | } | ||
88 | |||
89 | static inline struct ahash_request | ||
90 | *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) | ||
91 | { | ||
92 | return container_of((void *) ctx, struct ahash_request, __ctx); | ||
93 | } | ||
94 | |||
95 | static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, | ||
96 | struct ahash_request *areq) | ||
97 | { | ||
98 | rctx->flag = HASH_UPDATE; | ||
99 | } | ||
100 | |||
101 | static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state); | ||
102 | static asmlinkage struct job_sha1* (*sha1_job_mgr_submit) | ||
103 | (struct sha1_mb_mgr *state, struct job_sha1 *job); | ||
104 | static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) | ||
105 | (struct sha1_mb_mgr *state); | ||
106 | static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) | ||
107 | (struct sha1_mb_mgr *state); | ||
108 | |||
109 | static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], | ||
110 | uint64_t total_len) | ||
111 | { | ||
112 | uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); | ||
113 | |||
114 | memset(&padblock[i], 0, SHA1_BLOCK_SIZE); | ||
115 | padblock[i] = 0x80; | ||
116 | |||
117 | i += ((SHA1_BLOCK_SIZE - 1) & | ||
118 | (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) | ||
119 | + 1 + SHA1_PADLENGTHFIELD_SIZE; | ||
120 | |||
121 | #if SHA1_PADLENGTHFIELD_SIZE == 16 | ||
122 | *((uint64_t *) &padblock[i - 16]) = 0; | ||
123 | #endif | ||
124 | |||
125 | *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); | ||
126 | |||
127 | /* Number of extra blocks to hash */ | ||
128 | return i >> SHA1_LOG2_BLOCK_SIZE; | ||
129 | } | ||
130 | |||
131 | static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, | ||
132 | struct sha1_hash_ctx *ctx) | ||
133 | { | ||
134 | while (ctx) { | ||
135 | if (ctx->status & HASH_CTX_STS_COMPLETE) { | ||
136 | /* Clear PROCESSING bit */ | ||
137 | ctx->status = HASH_CTX_STS_COMPLETE; | ||
138 | return ctx; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * If the extra blocks are empty, begin hashing what remains | ||
143 | * in the user's buffer. | ||
144 | */ | ||
145 | if (ctx->partial_block_buffer_length == 0 && | ||
146 | ctx->incoming_buffer_length) { | ||
147 | |||
148 | const void *buffer = ctx->incoming_buffer; | ||
149 | uint32_t len = ctx->incoming_buffer_length; | ||
150 | uint32_t copy_len; | ||
151 | |||
152 | /* | ||
153 | * Only entire blocks can be hashed. | ||
154 | * Copy remainder to extra blocks buffer. | ||
155 | */ | ||
156 | copy_len = len & (SHA1_BLOCK_SIZE-1); | ||
157 | |||
158 | if (copy_len) { | ||
159 | len -= copy_len; | ||
160 | memcpy(ctx->partial_block_buffer, | ||
161 | ((const char *) buffer + len), | ||
162 | copy_len); | ||
163 | ctx->partial_block_buffer_length = copy_len; | ||
164 | } | ||
165 | |||
166 | ctx->incoming_buffer_length = 0; | ||
167 | |||
168 | /* len should be a multiple of the block size now */ | ||
169 | assert((len % SHA1_BLOCK_SIZE) == 0); | ||
170 | |||
171 | /* Set len to the number of blocks to be hashed */ | ||
172 | len >>= SHA1_LOG2_BLOCK_SIZE; | ||
173 | |||
174 | if (len) { | ||
175 | |||
176 | ctx->job.buffer = (uint8_t *) buffer; | ||
177 | ctx->job.len = len; | ||
178 | ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr, | ||
179 | &ctx->job); | ||
180 | continue; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * If the extra blocks are not empty, then we are | ||
186 | * either on the last block(s) or we need more | ||
187 | * user input before continuing. | ||
188 | */ | ||
189 | if (ctx->status & HASH_CTX_STS_LAST) { | ||
190 | |||
191 | uint8_t *buf = ctx->partial_block_buffer; | ||
192 | uint32_t n_extra_blocks = | ||
193 | sha1_pad(buf, ctx->total_length); | ||
194 | |||
195 | ctx->status = (HASH_CTX_STS_PROCESSING | | ||
196 | HASH_CTX_STS_COMPLETE); | ||
197 | ctx->job.buffer = buf; | ||
198 | ctx->job.len = (uint32_t) n_extra_blocks; | ||
199 | ctx = (struct sha1_hash_ctx *) | ||
200 | sha1_job_mgr_submit(&mgr->mgr, &ctx->job); | ||
201 | continue; | ||
202 | } | ||
203 | |||
204 | ctx->status = HASH_CTX_STS_IDLE; | ||
205 | return ctx; | ||
206 | } | ||
207 | |||
208 | return NULL; | ||
209 | } | ||
210 | |||
211 | static struct sha1_hash_ctx | ||
212 | *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) | ||
213 | { | ||
214 | /* | ||
215 | * If get_comp_job returns NULL, there are no jobs complete. | ||
216 | * If get_comp_job returns a job, verify that it is safe to return to | ||
217 | * the user. | ||
218 | * If it is not ready, resubmit the job to finish processing. | ||
219 | * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. | ||
220 | * Otherwise, all jobs currently being managed by the hash_ctx_mgr | ||
221 | * still need processing. | ||
222 | */ | ||
223 | struct sha1_hash_ctx *ctx; | ||
224 | |||
225 | ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr); | ||
226 | return sha1_ctx_mgr_resubmit(mgr, ctx); | ||
227 | } | ||
228 | |||
229 | static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr) | ||
230 | { | ||
231 | sha1_job_mgr_init(&mgr->mgr); | ||
232 | } | ||
233 | |||
234 | static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, | ||
235 | struct sha1_hash_ctx *ctx, | ||
236 | const void *buffer, | ||
237 | uint32_t len, | ||
238 | int flags) | ||
239 | { | ||
240 | if (flags & ~(HASH_UPDATE | HASH_LAST)) { | ||
241 | /* User should not pass anything other than UPDATE or LAST */ | ||
242 | ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; | ||
243 | return ctx; | ||
244 | } | ||
245 | |||
246 | if (ctx->status & HASH_CTX_STS_PROCESSING) { | ||
247 | /* Cannot submit to a currently processing job. */ | ||
248 | ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; | ||
249 | return ctx; | ||
250 | } | ||
251 | |||
252 | if (ctx->status & HASH_CTX_STS_COMPLETE) { | ||
253 | /* Cannot update a finished job. */ | ||
254 | ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; | ||
255 | return ctx; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * If we made it here, there were no errors during this call to | ||
260 | * submit | ||
261 | */ | ||
262 | ctx->error = HASH_CTX_ERROR_NONE; | ||
263 | |||
264 | /* Store buffer ptr info from user */ | ||
265 | ctx->incoming_buffer = buffer; | ||
266 | ctx->incoming_buffer_length = len; | ||
267 | |||
268 | /* | ||
269 | * Store the user's request flags and mark this ctx as currently | ||
270 | * being processed. | ||
271 | */ | ||
272 | ctx->status = (flags & HASH_LAST) ? | ||
273 | (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : | ||
274 | HASH_CTX_STS_PROCESSING; | ||
275 | |||
276 | /* Advance byte counter */ | ||
277 | ctx->total_length += len; | ||
278 | |||
279 | /* | ||
280 | * If there is anything currently buffered in the extra blocks, | ||
281 | * append to it until it contains a whole block. | ||
282 | * Or if the user's buffer contains less than a whole block, | ||
283 | * append as much as possible to the extra block. | ||
284 | */ | ||
285 | if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) { | ||
286 | /* | ||
287 | * Compute how many bytes to copy from user buffer into | ||
288 | * extra block | ||
289 | */ | ||
290 | uint32_t copy_len = SHA1_BLOCK_SIZE - | ||
291 | ctx->partial_block_buffer_length; | ||
292 | if (len < copy_len) | ||
293 | copy_len = len; | ||
294 | |||
295 | if (copy_len) { | ||
296 | /* Copy and update relevant pointers and counters */ | ||
297 | memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length], | ||
298 | buffer, copy_len); | ||
299 | |||
300 | ctx->partial_block_buffer_length += copy_len; | ||
301 | ctx->incoming_buffer = (const void *) | ||
302 | ((const char *)buffer + copy_len); | ||
303 | ctx->incoming_buffer_length = len - copy_len; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * The extra block should never contain more than 1 block | ||
308 | * here | ||
309 | */ | ||
310 | assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); | ||
311 | |||
312 | /* | ||
313 | * If the extra block buffer contains exactly 1 block, it can | ||
314 | * be hashed. | ||
315 | */ | ||
316 | if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { | ||
317 | ctx->partial_block_buffer_length = 0; | ||
318 | |||
319 | ctx->job.buffer = ctx->partial_block_buffer; | ||
320 | ctx->job.len = 1; | ||
321 | ctx = (struct sha1_hash_ctx *) | ||
322 | sha1_job_mgr_submit(&mgr->mgr, &ctx->job); | ||
323 | } | ||
324 | } | ||
325 | |||
326 | return sha1_ctx_mgr_resubmit(mgr, ctx); | ||
327 | } | ||
328 | |||
329 | static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) | ||
330 | { | ||
331 | struct sha1_hash_ctx *ctx; | ||
332 | |||
333 | while (1) { | ||
334 | ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr); | ||
335 | |||
336 | /* If flush returned 0, there are no more jobs in flight. */ | ||
337 | if (!ctx) | ||
338 | return NULL; | ||
339 | |||
340 | /* | ||
341 | * If flush returned a job, resubmit the job to finish | ||
342 | * processing. | ||
343 | */ | ||
344 | ctx = sha1_ctx_mgr_resubmit(mgr, ctx); | ||
345 | |||
346 | /* | ||
347 | * If sha1_ctx_mgr_resubmit returned a job, it is ready to be | ||
348 | * returned. Otherwise, all jobs currently being managed by the | ||
349 | * sha1_ctx_mgr still need processing. Loop. | ||
350 | */ | ||
351 | if (ctx) | ||
352 | return ctx; | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static int sha1_mb_init(struct ahash_request *areq) | ||
357 | { | ||
358 | struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); | ||
359 | |||
360 | hash_ctx_init(sctx); | ||
361 | sctx->job.result_digest[0] = SHA1_H0; | ||
362 | sctx->job.result_digest[1] = SHA1_H1; | ||
363 | sctx->job.result_digest[2] = SHA1_H2; | ||
364 | sctx->job.result_digest[3] = SHA1_H3; | ||
365 | sctx->job.result_digest[4] = SHA1_H4; | ||
366 | sctx->total_length = 0; | ||
367 | sctx->partial_block_buffer_length = 0; | ||
368 | sctx->status = HASH_CTX_STS_IDLE; | ||
369 | |||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx) | ||
374 | { | ||
375 | int i; | ||
376 | struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); | ||
377 | __be32 *dst = (__be32 *) rctx->out; | ||
378 | |||
379 | for (i = 0; i < 5; ++i) | ||
380 | dst[i] = cpu_to_be32(sctx->job.result_digest[i]); | ||
381 | |||
382 | return 0; | ||
383 | } | ||
384 | |||
385 | static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, | ||
386 | struct mcryptd_alg_cstate *cstate, bool flush) | ||
387 | { | ||
388 | int flag = HASH_UPDATE; | ||
389 | int nbytes, err = 0; | ||
390 | struct mcryptd_hash_request_ctx *rctx = *ret_rctx; | ||
391 | struct sha1_hash_ctx *sha_ctx; | ||
392 | |||
393 | /* more work ? */ | ||
394 | while (!(rctx->flag & HASH_DONE)) { | ||
395 | nbytes = crypto_ahash_walk_done(&rctx->walk, 0); | ||
396 | if (nbytes < 0) { | ||
397 | err = nbytes; | ||
398 | goto out; | ||
399 | } | ||
400 | /* check if the walk is done */ | ||
401 | if (crypto_ahash_walk_last(&rctx->walk)) { | ||
402 | rctx->flag |= HASH_DONE; | ||
403 | if (rctx->flag & HASH_FINAL) | ||
404 | flag |= HASH_LAST; | ||
405 | |||
406 | } | ||
407 | sha_ctx = (struct sha1_hash_ctx *) | ||
408 | ahash_request_ctx(&rctx->areq); | ||
409 | kernel_fpu_begin(); | ||
410 | sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, | ||
411 | rctx->walk.data, nbytes, flag); | ||
412 | if (!sha_ctx) { | ||
413 | if (flush) | ||
414 | sha_ctx = sha1_ctx_mgr_flush(cstate->mgr); | ||
415 | } | ||
416 | kernel_fpu_end(); | ||
417 | if (sha_ctx) | ||
418 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
419 | else { | ||
420 | rctx = NULL; | ||
421 | goto out; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | /* copy the results */ | ||
426 | if (rctx->flag & HASH_FINAL) | ||
427 | sha1_mb_set_results(rctx); | ||
428 | |||
429 | out: | ||
430 | *ret_rctx = rctx; | ||
431 | return err; | ||
432 | } | ||
433 | |||
434 | static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, | ||
435 | struct mcryptd_alg_cstate *cstate, | ||
436 | int err) | ||
437 | { | ||
438 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
439 | struct sha1_hash_ctx *sha_ctx; | ||
440 | struct mcryptd_hash_request_ctx *req_ctx; | ||
441 | int ret; | ||
442 | |||
443 | /* remove from work list */ | ||
444 | spin_lock(&cstate->work_lock); | ||
445 | list_del(&rctx->waiter); | ||
446 | spin_unlock(&cstate->work_lock); | ||
447 | |||
448 | if (irqs_disabled()) | ||
449 | rctx->complete(&req->base, err); | ||
450 | else { | ||
451 | local_bh_disable(); | ||
452 | rctx->complete(&req->base, err); | ||
453 | local_bh_enable(); | ||
454 | } | ||
455 | |||
456 | /* check to see if there are other jobs that are done */ | ||
457 | sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); | ||
458 | while (sha_ctx) { | ||
459 | req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
460 | ret = sha_finish_walk(&req_ctx, cstate, false); | ||
461 | if (req_ctx) { | ||
462 | spin_lock(&cstate->work_lock); | ||
463 | list_del(&req_ctx->waiter); | ||
464 | spin_unlock(&cstate->work_lock); | ||
465 | |||
466 | req = cast_mcryptd_ctx_to_req(req_ctx); | ||
467 | if (irqs_disabled()) | ||
468 | req_ctx->complete(&req->base, ret); | ||
469 | else { | ||
470 | local_bh_disable(); | ||
471 | req_ctx->complete(&req->base, ret); | ||
472 | local_bh_enable(); | ||
473 | } | ||
474 | } | ||
475 | sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); | ||
476 | } | ||
477 | |||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx, | ||
482 | struct mcryptd_alg_cstate *cstate) | ||
483 | { | ||
484 | unsigned long next_flush; | ||
485 | unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); | ||
486 | |||
487 | /* initialize tag */ | ||
488 | rctx->tag.arrival = jiffies; /* tag the arrival time */ | ||
489 | rctx->tag.seq_num = cstate->next_seq_num++; | ||
490 | next_flush = rctx->tag.arrival + delay; | ||
491 | rctx->tag.expire = next_flush; | ||
492 | |||
493 | spin_lock(&cstate->work_lock); | ||
494 | list_add_tail(&rctx->waiter, &cstate->work_list); | ||
495 | spin_unlock(&cstate->work_lock); | ||
496 | |||
497 | mcryptd_arm_flusher(cstate, delay); | ||
498 | } | ||
499 | |||
500 | static int sha1_mb_update(struct ahash_request *areq) | ||
501 | { | ||
502 | struct mcryptd_hash_request_ctx *rctx = | ||
503 | container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
504 | struct mcryptd_alg_cstate *cstate = | ||
505 | this_cpu_ptr(sha1_mb_alg_state.alg_cstate); | ||
506 | |||
507 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
508 | struct sha1_hash_ctx *sha_ctx; | ||
509 | int ret = 0, nbytes; | ||
510 | |||
511 | |||
512 | /* sanity check */ | ||
513 | if (rctx->tag.cpu != smp_processor_id()) { | ||
514 | pr_err("mcryptd error: cpu clash\n"); | ||
515 | goto done; | ||
516 | } | ||
517 | |||
518 | /* need to init context */ | ||
519 | req_ctx_init(rctx, areq); | ||
520 | |||
521 | nbytes = crypto_ahash_walk_first(req, &rctx->walk); | ||
522 | |||
523 | if (nbytes < 0) { | ||
524 | ret = nbytes; | ||
525 | goto done; | ||
526 | } | ||
527 | |||
528 | if (crypto_ahash_walk_last(&rctx->walk)) | ||
529 | rctx->flag |= HASH_DONE; | ||
530 | |||
531 | /* submit */ | ||
532 | sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); | ||
533 | sha1_mb_add_list(rctx, cstate); | ||
534 | kernel_fpu_begin(); | ||
535 | sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, | ||
536 | nbytes, HASH_UPDATE); | ||
537 | kernel_fpu_end(); | ||
538 | |||
539 | /* check if anything is returned */ | ||
540 | if (!sha_ctx) | ||
541 | return -EINPROGRESS; | ||
542 | |||
543 | if (sha_ctx->error) { | ||
544 | ret = sha_ctx->error; | ||
545 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
546 | goto done; | ||
547 | } | ||
548 | |||
549 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
550 | ret = sha_finish_walk(&rctx, cstate, false); | ||
551 | |||
552 | if (!rctx) | ||
553 | return -EINPROGRESS; | ||
554 | done: | ||
555 | sha_complete_job(rctx, cstate, ret); | ||
556 | return ret; | ||
557 | } | ||
558 | |||
559 | static int sha1_mb_finup(struct ahash_request *areq) | ||
560 | { | ||
561 | struct mcryptd_hash_request_ctx *rctx = | ||
562 | container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
563 | struct mcryptd_alg_cstate *cstate = | ||
564 | this_cpu_ptr(sha1_mb_alg_state.alg_cstate); | ||
565 | |||
566 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
567 | struct sha1_hash_ctx *sha_ctx; | ||
568 | int ret = 0, flag = HASH_UPDATE, nbytes; | ||
569 | |||
570 | /* sanity check */ | ||
571 | if (rctx->tag.cpu != smp_processor_id()) { | ||
572 | pr_err("mcryptd error: cpu clash\n"); | ||
573 | goto done; | ||
574 | } | ||
575 | |||
576 | /* need to init context */ | ||
577 | req_ctx_init(rctx, areq); | ||
578 | |||
579 | nbytes = crypto_ahash_walk_first(req, &rctx->walk); | ||
580 | |||
581 | if (nbytes < 0) { | ||
582 | ret = nbytes; | ||
583 | goto done; | ||
584 | } | ||
585 | |||
586 | if (crypto_ahash_walk_last(&rctx->walk)) { | ||
587 | rctx->flag |= HASH_DONE; | ||
588 | flag = HASH_LAST; | ||
589 | } | ||
590 | |||
591 | /* submit */ | ||
592 | rctx->flag |= HASH_FINAL; | ||
593 | sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); | ||
594 | sha1_mb_add_list(rctx, cstate); | ||
595 | |||
596 | kernel_fpu_begin(); | ||
597 | sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, | ||
598 | nbytes, flag); | ||
599 | kernel_fpu_end(); | ||
600 | |||
601 | /* check if anything is returned */ | ||
602 | if (!sha_ctx) | ||
603 | return -EINPROGRESS; | ||
604 | |||
605 | if (sha_ctx->error) { | ||
606 | ret = sha_ctx->error; | ||
607 | goto done; | ||
608 | } | ||
609 | |||
610 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
611 | ret = sha_finish_walk(&rctx, cstate, false); | ||
612 | if (!rctx) | ||
613 | return -EINPROGRESS; | ||
614 | done: | ||
615 | sha_complete_job(rctx, cstate, ret); | ||
616 | return ret; | ||
617 | } | ||
618 | |||
619 | static int sha1_mb_final(struct ahash_request *areq) | ||
620 | { | ||
621 | struct mcryptd_hash_request_ctx *rctx = | ||
622 | container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
623 | struct mcryptd_alg_cstate *cstate = | ||
624 | this_cpu_ptr(sha1_mb_alg_state.alg_cstate); | ||
625 | |||
626 | struct sha1_hash_ctx *sha_ctx; | ||
627 | int ret = 0; | ||
628 | u8 data; | ||
629 | |||
630 | /* sanity check */ | ||
631 | if (rctx->tag.cpu != smp_processor_id()) { | ||
632 | pr_err("mcryptd error: cpu clash\n"); | ||
633 | goto done; | ||
634 | } | ||
635 | |||
636 | /* need to init context */ | ||
637 | req_ctx_init(rctx, areq); | ||
638 | |||
639 | rctx->flag |= HASH_DONE | HASH_FINAL; | ||
640 | |||
641 | sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); | ||
642 | /* flag HASH_FINAL and 0 data size */ | ||
643 | sha1_mb_add_list(rctx, cstate); | ||
644 | kernel_fpu_begin(); | ||
645 | sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, | ||
646 | HASH_LAST); | ||
647 | kernel_fpu_end(); | ||
648 | |||
649 | /* check if anything is returned */ | ||
650 | if (!sha_ctx) | ||
651 | return -EINPROGRESS; | ||
652 | |||
653 | if (sha_ctx->error) { | ||
654 | ret = sha_ctx->error; | ||
655 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
656 | goto done; | ||
657 | } | ||
658 | |||
659 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
660 | ret = sha_finish_walk(&rctx, cstate, false); | ||
661 | if (!rctx) | ||
662 | return -EINPROGRESS; | ||
663 | done: | ||
664 | sha_complete_job(rctx, cstate, ret); | ||
665 | return ret; | ||
666 | } | ||
667 | |||
668 | static int sha1_mb_export(struct ahash_request *areq, void *out) | ||
669 | { | ||
670 | struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); | ||
671 | |||
672 | memcpy(out, sctx, sizeof(*sctx)); | ||
673 | |||
674 | return 0; | ||
675 | } | ||
676 | |||
677 | static int sha1_mb_import(struct ahash_request *areq, const void *in) | ||
678 | { | ||
679 | struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); | ||
680 | |||
681 | memcpy(sctx, in, sizeof(*sctx)); | ||
682 | |||
683 | return 0; | ||
684 | } | ||
685 | |||
686 | static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) | ||
687 | { | ||
688 | struct mcryptd_ahash *mcryptd_tfm; | ||
689 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
690 | struct mcryptd_hash_ctx *mctx; | ||
691 | |||
692 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", | ||
693 | CRYPTO_ALG_INTERNAL, | ||
694 | CRYPTO_ALG_INTERNAL); | ||
695 | if (IS_ERR(mcryptd_tfm)) | ||
696 | return PTR_ERR(mcryptd_tfm); | ||
697 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); | ||
698 | mctx->alg_state = &sha1_mb_alg_state; | ||
699 | ctx->mcryptd_tfm = mcryptd_tfm; | ||
700 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
701 | sizeof(struct ahash_request) + | ||
702 | crypto_ahash_reqsize(&mcryptd_tfm->base)); | ||
703 | |||
704 | return 0; | ||
705 | } | ||
706 | |||
707 | static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) | ||
708 | { | ||
709 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
710 | |||
711 | mcryptd_free_ahash(ctx->mcryptd_tfm); | ||
712 | } | ||
713 | |||
714 | static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm) | ||
715 | { | ||
716 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
717 | sizeof(struct ahash_request) + | ||
718 | sizeof(struct sha1_hash_ctx)); | ||
719 | |||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm) | ||
724 | { | ||
725 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
726 | |||
727 | mcryptd_free_ahash(ctx->mcryptd_tfm); | ||
728 | } | ||
729 | |||
730 | static struct ahash_alg sha1_mb_areq_alg = { | ||
731 | .init = sha1_mb_init, | ||
732 | .update = sha1_mb_update, | ||
733 | .final = sha1_mb_final, | ||
734 | .finup = sha1_mb_finup, | ||
735 | .export = sha1_mb_export, | ||
736 | .import = sha1_mb_import, | ||
737 | .halg = { | ||
738 | .digestsize = SHA1_DIGEST_SIZE, | ||
739 | .statesize = sizeof(struct sha1_hash_ctx), | ||
740 | .base = { | ||
741 | .cra_name = "__sha1-mb", | ||
742 | .cra_driver_name = "__intel_sha1-mb", | ||
743 | .cra_priority = 100, | ||
744 | /* | ||
745 | * use ASYNC flag as some buffers in multi-buffer | ||
746 | * algo may not have completed before hashing thread | ||
747 | * sleep | ||
748 | */ | ||
749 | .cra_flags = CRYPTO_ALG_ASYNC | | ||
750 | CRYPTO_ALG_INTERNAL, | ||
751 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
752 | .cra_module = THIS_MODULE, | ||
753 | .cra_list = LIST_HEAD_INIT | ||
754 | (sha1_mb_areq_alg.halg.base.cra_list), | ||
755 | .cra_init = sha1_mb_areq_init_tfm, | ||
756 | .cra_exit = sha1_mb_areq_exit_tfm, | ||
757 | .cra_ctxsize = sizeof(struct sha1_hash_ctx), | ||
758 | } | ||
759 | } | ||
760 | }; | ||
761 | |||
762 | static int sha1_mb_async_init(struct ahash_request *req) | ||
763 | { | ||
764 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
765 | struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
766 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
767 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
768 | |||
769 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
770 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
771 | return crypto_ahash_init(mcryptd_req); | ||
772 | } | ||
773 | |||
774 | static int sha1_mb_async_update(struct ahash_request *req) | ||
775 | { | ||
776 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
777 | |||
778 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
779 | struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
780 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
781 | |||
782 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
783 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
784 | return crypto_ahash_update(mcryptd_req); | ||
785 | } | ||
786 | |||
787 | static int sha1_mb_async_finup(struct ahash_request *req) | ||
788 | { | ||
789 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
790 | |||
791 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
792 | struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
793 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
794 | |||
795 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
796 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
797 | return crypto_ahash_finup(mcryptd_req); | ||
798 | } | ||
799 | |||
800 | static int sha1_mb_async_final(struct ahash_request *req) | ||
801 | { | ||
802 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
803 | |||
804 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
805 | struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
806 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
807 | |||
808 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
809 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
810 | return crypto_ahash_final(mcryptd_req); | ||
811 | } | ||
812 | |||
813 | static int sha1_mb_async_digest(struct ahash_request *req) | ||
814 | { | ||
815 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
816 | struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
817 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
818 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
819 | |||
820 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
821 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
822 | return crypto_ahash_digest(mcryptd_req); | ||
823 | } | ||
824 | |||
825 | static int sha1_mb_async_export(struct ahash_request *req, void *out) | ||
826 | { | ||
827 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
828 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
829 | struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
830 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
831 | |||
832 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
833 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
834 | return crypto_ahash_export(mcryptd_req, out); | ||
835 | } | ||
836 | |||
837 | static int sha1_mb_async_import(struct ahash_request *req, const void *in) | ||
838 | { | ||
839 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
840 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
841 | struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
842 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
843 | struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); | ||
844 | struct mcryptd_hash_request_ctx *rctx; | ||
845 | struct ahash_request *areq; | ||
846 | |||
847 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
848 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
849 | rctx = ahash_request_ctx(mcryptd_req); | ||
850 | areq = &rctx->areq; | ||
851 | |||
852 | ahash_request_set_tfm(areq, child); | ||
853 | ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, | ||
854 | rctx->complete, req); | ||
855 | |||
856 | return crypto_ahash_import(mcryptd_req, in); | ||
857 | } | ||
858 | |||
859 | static struct ahash_alg sha1_mb_async_alg = { | ||
860 | .init = sha1_mb_async_init, | ||
861 | .update = sha1_mb_async_update, | ||
862 | .final = sha1_mb_async_final, | ||
863 | .finup = sha1_mb_async_finup, | ||
864 | .digest = sha1_mb_async_digest, | ||
865 | .export = sha1_mb_async_export, | ||
866 | .import = sha1_mb_async_import, | ||
867 | .halg = { | ||
868 | .digestsize = SHA1_DIGEST_SIZE, | ||
869 | .statesize = sizeof(struct sha1_hash_ctx), | ||
870 | .base = { | ||
871 | .cra_name = "sha1", | ||
872 | .cra_driver_name = "sha1_mb", | ||
873 | /* | ||
874 | * Low priority, since with few concurrent hash requests | ||
875 | * this is extremely slow due to the flush delay. Users | ||
876 | * whose workloads would benefit from this can request | ||
877 | * it explicitly by driver name, or can increase its | ||
878 | * priority at runtime using NETLINK_CRYPTO. | ||
879 | */ | ||
880 | .cra_priority = 50, | ||
881 | .cra_flags = CRYPTO_ALG_ASYNC, | ||
882 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
883 | .cra_module = THIS_MODULE, | ||
884 | .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list), | ||
885 | .cra_init = sha1_mb_async_init_tfm, | ||
886 | .cra_exit = sha1_mb_async_exit_tfm, | ||
887 | .cra_ctxsize = sizeof(struct sha1_mb_ctx), | ||
888 | .cra_alignmask = 0, | ||
889 | }, | ||
890 | }, | ||
891 | }; | ||
892 | |||
893 | static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) | ||
894 | { | ||
895 | struct mcryptd_hash_request_ctx *rctx; | ||
896 | unsigned long cur_time; | ||
897 | unsigned long next_flush = 0; | ||
898 | struct sha1_hash_ctx *sha_ctx; | ||
899 | |||
900 | |||
901 | cur_time = jiffies; | ||
902 | |||
903 | while (!list_empty(&cstate->work_list)) { | ||
904 | rctx = list_entry(cstate->work_list.next, | ||
905 | struct mcryptd_hash_request_ctx, waiter); | ||
906 | if (time_before(cur_time, rctx->tag.expire)) | ||
907 | break; | ||
908 | kernel_fpu_begin(); | ||
909 | sha_ctx = (struct sha1_hash_ctx *) | ||
910 | sha1_ctx_mgr_flush(cstate->mgr); | ||
911 | kernel_fpu_end(); | ||
912 | if (!sha_ctx) { | ||
913 | pr_err("sha1_mb error: nothing got flushed for non-empty list\n"); | ||
914 | break; | ||
915 | } | ||
916 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
917 | sha_finish_walk(&rctx, cstate, true); | ||
918 | sha_complete_job(rctx, cstate, 0); | ||
919 | } | ||
920 | |||
921 | if (!list_empty(&cstate->work_list)) { | ||
922 | rctx = list_entry(cstate->work_list.next, | ||
923 | struct mcryptd_hash_request_ctx, waiter); | ||
924 | /* get the hash context and then flush time */ | ||
925 | next_flush = rctx->tag.expire; | ||
926 | mcryptd_arm_flusher(cstate, get_delay(next_flush)); | ||
927 | } | ||
928 | return next_flush; | ||
929 | } | ||
930 | |||
931 | static int __init sha1_mb_mod_init(void) | ||
932 | { | ||
933 | |||
934 | int cpu; | ||
935 | int err; | ||
936 | struct mcryptd_alg_cstate *cpu_state; | ||
937 | |||
938 | /* check for dependent cpu features */ | ||
939 | if (!boot_cpu_has(X86_FEATURE_AVX2) || | ||
940 | !boot_cpu_has(X86_FEATURE_BMI2)) | ||
941 | return -ENODEV; | ||
942 | |||
943 | /* initialize multibuffer structures */ | ||
944 | sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate); | ||
945 | |||
946 | sha1_job_mgr_init = sha1_mb_mgr_init_avx2; | ||
947 | sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2; | ||
948 | sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2; | ||
949 | sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2; | ||
950 | |||
951 | if (!sha1_mb_alg_state.alg_cstate) | ||
952 | return -ENOMEM; | ||
953 | for_each_possible_cpu(cpu) { | ||
954 | cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); | ||
955 | cpu_state->next_flush = 0; | ||
956 | cpu_state->next_seq_num = 0; | ||
957 | cpu_state->flusher_engaged = false; | ||
958 | INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); | ||
959 | cpu_state->cpu = cpu; | ||
960 | cpu_state->alg_state = &sha1_mb_alg_state; | ||
961 | cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr), | ||
962 | GFP_KERNEL); | ||
963 | if (!cpu_state->mgr) | ||
964 | goto err2; | ||
965 | sha1_ctx_mgr_init(cpu_state->mgr); | ||
966 | INIT_LIST_HEAD(&cpu_state->work_list); | ||
967 | spin_lock_init(&cpu_state->work_lock); | ||
968 | } | ||
969 | sha1_mb_alg_state.flusher = &sha1_mb_flusher; | ||
970 | |||
971 | err = crypto_register_ahash(&sha1_mb_areq_alg); | ||
972 | if (err) | ||
973 | goto err2; | ||
974 | err = crypto_register_ahash(&sha1_mb_async_alg); | ||
975 | if (err) | ||
976 | goto err1; | ||
977 | |||
978 | |||
979 | return 0; | ||
980 | err1: | ||
981 | crypto_unregister_ahash(&sha1_mb_areq_alg); | ||
982 | err2: | ||
983 | for_each_possible_cpu(cpu) { | ||
984 | cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); | ||
985 | kfree(cpu_state->mgr); | ||
986 | } | ||
987 | free_percpu(sha1_mb_alg_state.alg_cstate); | ||
988 | return -ENODEV; | ||
989 | } | ||
990 | |||
991 | static void __exit sha1_mb_mod_fini(void) | ||
992 | { | ||
993 | int cpu; | ||
994 | struct mcryptd_alg_cstate *cpu_state; | ||
995 | |||
996 | crypto_unregister_ahash(&sha1_mb_async_alg); | ||
997 | crypto_unregister_ahash(&sha1_mb_areq_alg); | ||
998 | for_each_possible_cpu(cpu) { | ||
999 | cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); | ||
1000 | kfree(cpu_state->mgr); | ||
1001 | } | ||
1002 | free_percpu(sha1_mb_alg_state.alg_cstate); | ||
1003 | } | ||
1004 | |||
1005 | module_init(sha1_mb_mod_init); | ||
1006 | module_exit(sha1_mb_mod_fini); | ||
1007 | |||
1008 | MODULE_LICENSE("GPL"); | ||
1009 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated"); | ||
1010 | |||
1011 | MODULE_ALIAS_CRYPTO("sha1"); | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h deleted file mode 100644 index 9454bd16f9f8..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h +++ /dev/null | |||
@@ -1,134 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA context | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2014 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #ifndef _SHA_MB_CTX_INTERNAL_H | ||
55 | #define _SHA_MB_CTX_INTERNAL_H | ||
56 | |||
57 | #include "sha1_mb_mgr.h" | ||
58 | |||
59 | #define HASH_UPDATE 0x00 | ||
60 | #define HASH_LAST 0x01 | ||
61 | #define HASH_DONE 0x02 | ||
62 | #define HASH_FINAL 0x04 | ||
63 | |||
64 | #define HASH_CTX_STS_IDLE 0x00 | ||
65 | #define HASH_CTX_STS_PROCESSING 0x01 | ||
66 | #define HASH_CTX_STS_LAST 0x02 | ||
67 | #define HASH_CTX_STS_COMPLETE 0x04 | ||
68 | |||
69 | enum hash_ctx_error { | ||
70 | HASH_CTX_ERROR_NONE = 0, | ||
71 | HASH_CTX_ERROR_INVALID_FLAGS = -1, | ||
72 | HASH_CTX_ERROR_ALREADY_PROCESSING = -2, | ||
73 | HASH_CTX_ERROR_ALREADY_COMPLETED = -3, | ||
74 | |||
75 | #ifdef HASH_CTX_DEBUG | ||
76 | HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, | ||
77 | #endif | ||
78 | }; | ||
79 | |||
80 | |||
81 | #define hash_ctx_user_data(ctx) ((ctx)->user_data) | ||
82 | #define hash_ctx_digest(ctx) ((ctx)->job.result_digest) | ||
83 | #define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) | ||
84 | #define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) | ||
85 | #define hash_ctx_status(ctx) ((ctx)->status) | ||
86 | #define hash_ctx_error(ctx) ((ctx)->error) | ||
87 | #define hash_ctx_init(ctx) \ | ||
88 | do { \ | ||
89 | (ctx)->error = HASH_CTX_ERROR_NONE; \ | ||
90 | (ctx)->status = HASH_CTX_STS_COMPLETE; \ | ||
91 | } while (0) | ||
92 | |||
93 | |||
94 | /* Hash Constants and Typedefs */ | ||
95 | #define SHA1_DIGEST_LENGTH 5 | ||
96 | #define SHA1_LOG2_BLOCK_SIZE 6 | ||
97 | |||
98 | #define SHA1_PADLENGTHFIELD_SIZE 8 | ||
99 | |||
100 | #ifdef SHA_MB_DEBUG | ||
101 | #define assert(expr) \ | ||
102 | do { \ | ||
103 | if (unlikely(!(expr))) { \ | ||
104 | printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ | ||
105 | #expr, __FILE__, __func__, __LINE__); \ | ||
106 | } \ | ||
107 | } while (0) | ||
108 | #else | ||
109 | #define assert(expr) do {} while (0) | ||
110 | #endif | ||
111 | |||
112 | struct sha1_ctx_mgr { | ||
113 | struct sha1_mb_mgr mgr; | ||
114 | }; | ||
115 | |||
116 | /* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */ | ||
117 | |||
118 | struct sha1_hash_ctx { | ||
119 | /* Must be at struct offset 0 */ | ||
120 | struct job_sha1 job; | ||
121 | /* status flag */ | ||
122 | int status; | ||
123 | /* error flag */ | ||
124 | int error; | ||
125 | |||
126 | uint64_t total_length; | ||
127 | const void *incoming_buffer; | ||
128 | uint32_t incoming_buffer_length; | ||
129 | uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; | ||
130 | uint32_t partial_block_buffer_length; | ||
131 | void *user_data; | ||
132 | }; | ||
133 | |||
134 | #endif | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h deleted file mode 100644 index 08ad1a9acfd7..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h +++ /dev/null | |||
@@ -1,110 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA1 algorithm manager | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * James Guilford <james.guilford@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2014 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | */ | ||
54 | #ifndef __SHA_MB_MGR_H | ||
55 | #define __SHA_MB_MGR_H | ||
56 | |||
57 | |||
58 | #include <linux/types.h> | ||
59 | |||
60 | #define NUM_SHA1_DIGEST_WORDS 5 | ||
61 | |||
62 | enum job_sts { STS_UNKNOWN = 0, | ||
63 | STS_BEING_PROCESSED = 1, | ||
64 | STS_COMPLETED = 2, | ||
65 | STS_INTERNAL_ERROR = 3, | ||
66 | STS_ERROR = 4 | ||
67 | }; | ||
68 | |||
69 | struct job_sha1 { | ||
70 | u8 *buffer; | ||
71 | u32 len; | ||
72 | u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32); | ||
73 | enum job_sts status; | ||
74 | void *user_data; | ||
75 | }; | ||
76 | |||
77 | /* SHA1 out-of-order scheduler */ | ||
78 | |||
79 | /* typedef uint32_t sha1_digest_array[5][8]; */ | ||
80 | |||
81 | struct sha1_args_x8 { | ||
82 | uint32_t digest[5][8]; | ||
83 | uint8_t *data_ptr[8]; | ||
84 | }; | ||
85 | |||
86 | struct sha1_lane_data { | ||
87 | struct job_sha1 *job_in_lane; | ||
88 | }; | ||
89 | |||
90 | struct sha1_mb_mgr { | ||
91 | struct sha1_args_x8 args; | ||
92 | |||
93 | uint32_t lens[8]; | ||
94 | |||
95 | /* each byte is index (0...7) of unused lanes */ | ||
96 | uint64_t unused_lanes; | ||
97 | /* byte 4 is set to FF as a flag */ | ||
98 | struct sha1_lane_data ldata[8]; | ||
99 | }; | ||
100 | |||
101 | |||
102 | #define SHA1_MB_MGR_NUM_LANES_AVX2 8 | ||
103 | |||
104 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state); | ||
105 | struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state, | ||
106 | struct job_sha1 *job); | ||
107 | struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state); | ||
108 | struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state); | ||
109 | |||
110 | #endif | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S deleted file mode 100644 index 86688c6e7a25..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S +++ /dev/null | |||
@@ -1,287 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA1 algorithm data structure | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * James Guilford <james.guilford@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2014 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | */ | ||
54 | |||
55 | # Macros for defining data structures | ||
56 | |||
57 | # Usage example | ||
58 | |||
59 | #START_FIELDS # JOB_AES | ||
60 | ### name size align | ||
61 | #FIELD _plaintext, 8, 8 # pointer to plaintext | ||
62 | #FIELD _ciphertext, 8, 8 # pointer to ciphertext | ||
63 | #FIELD _IV, 16, 8 # IV | ||
64 | #FIELD _keys, 8, 8 # pointer to keys | ||
65 | #FIELD _len, 4, 4 # length in bytes | ||
66 | #FIELD _status, 4, 4 # status enumeration | ||
67 | #FIELD _user_data, 8, 8 # pointer to user data | ||
68 | #UNION _union, size1, align1, \ | ||
69 | # size2, align2, \ | ||
70 | # size3, align3, \ | ||
71 | # ... | ||
72 | #END_FIELDS | ||
73 | #%assign _JOB_AES_size _FIELD_OFFSET | ||
74 | #%assign _JOB_AES_align _STRUCT_ALIGN | ||
75 | |||
76 | ######################################################################### | ||
77 | |||
78 | # Alternate "struc-like" syntax: | ||
79 | # STRUCT job_aes2 | ||
80 | # RES_Q .plaintext, 1 | ||
81 | # RES_Q .ciphertext, 1 | ||
82 | # RES_DQ .IV, 1 | ||
83 | # RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN | ||
84 | # RES_U .union, size1, align1, \ | ||
85 | # size2, align2, \ | ||
86 | # ... | ||
87 | # ENDSTRUCT | ||
88 | # # Following only needed if nesting | ||
89 | # %assign job_aes2_size _FIELD_OFFSET | ||
90 | # %assign job_aes2_align _STRUCT_ALIGN | ||
91 | # | ||
92 | # RES_* macros take a name, a count and an optional alignment. | ||
93 | # The count in in terms of the base size of the macro, and the | ||
94 | # default alignment is the base size. | ||
95 | # The macros are: | ||
96 | # Macro Base size | ||
97 | # RES_B 1 | ||
98 | # RES_W 2 | ||
99 | # RES_D 4 | ||
100 | # RES_Q 8 | ||
101 | # RES_DQ 16 | ||
102 | # RES_Y 32 | ||
103 | # RES_Z 64 | ||
104 | # | ||
105 | # RES_U defines a union. It's arguments are a name and two or more | ||
106 | # pairs of "size, alignment" | ||
107 | # | ||
108 | # The two assigns are only needed if this structure is being nested | ||
109 | # within another. Even if the assigns are not done, one can still use | ||
110 | # STRUCT_NAME_size as the size of the structure. | ||
111 | # | ||
112 | # Note that for nesting, you still need to assign to STRUCT_NAME_size. | ||
113 | # | ||
114 | # The differences between this and using "struc" directly are that each | ||
115 | # type is implicitly aligned to its natural length (although this can be | ||
116 | # over-ridden with an explicit third parameter), and that the structure | ||
117 | # is padded at the end to its overall alignment. | ||
118 | # | ||
119 | |||
120 | ######################################################################### | ||
121 | |||
122 | #ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_ | ||
123 | #define _SHA1_MB_MGR_DATASTRUCT_ASM_ | ||
124 | |||
125 | ## START_FIELDS | ||
126 | .macro START_FIELDS | ||
127 | _FIELD_OFFSET = 0 | ||
128 | _STRUCT_ALIGN = 0 | ||
129 | .endm | ||
130 | |||
131 | ## FIELD name size align | ||
132 | .macro FIELD name size align | ||
133 | _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) | ||
134 | \name = _FIELD_OFFSET | ||
135 | _FIELD_OFFSET = _FIELD_OFFSET + (\size) | ||
136 | .if (\align > _STRUCT_ALIGN) | ||
137 | _STRUCT_ALIGN = \align | ||
138 | .endif | ||
139 | .endm | ||
140 | |||
141 | ## END_FIELDS | ||
142 | .macro END_FIELDS | ||
143 | _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) | ||
144 | .endm | ||
145 | |||
146 | ######################################################################## | ||
147 | |||
148 | .macro STRUCT p1 | ||
149 | START_FIELDS | ||
150 | .struc \p1 | ||
151 | .endm | ||
152 | |||
153 | .macro ENDSTRUCT | ||
154 | tmp = _FIELD_OFFSET | ||
155 | END_FIELDS | ||
156 | tmp = (_FIELD_OFFSET - %%tmp) | ||
157 | .if (tmp > 0) | ||
158 | .lcomm tmp | ||
159 | .endif | ||
160 | .endstruc | ||
161 | .endm | ||
162 | |||
163 | ## RES_int name size align | ||
164 | .macro RES_int p1 p2 p3 | ||
165 | name = \p1 | ||
166 | size = \p2 | ||
167 | align = .\p3 | ||
168 | |||
169 | _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) | ||
170 | .align align | ||
171 | .lcomm name size | ||
172 | _FIELD_OFFSET = _FIELD_OFFSET + (size) | ||
173 | .if (align > _STRUCT_ALIGN) | ||
174 | _STRUCT_ALIGN = align | ||
175 | .endif | ||
176 | .endm | ||
177 | |||
178 | |||
179 | |||
180 | # macro RES_B name, size [, align] | ||
181 | .macro RES_B _name, _size, _align=1 | ||
182 | RES_int _name _size _align | ||
183 | .endm | ||
184 | |||
185 | # macro RES_W name, size [, align] | ||
186 | .macro RES_W _name, _size, _align=2 | ||
187 | RES_int _name 2*(_size) _align | ||
188 | .endm | ||
189 | |||
190 | # macro RES_D name, size [, align] | ||
191 | .macro RES_D _name, _size, _align=4 | ||
192 | RES_int _name 4*(_size) _align | ||
193 | .endm | ||
194 | |||
195 | # macro RES_Q name, size [, align] | ||
196 | .macro RES_Q _name, _size, _align=8 | ||
197 | RES_int _name 8*(_size) _align | ||
198 | .endm | ||
199 | |||
200 | # macro RES_DQ name, size [, align] | ||
201 | .macro RES_DQ _name, _size, _align=16 | ||
202 | RES_int _name 16*(_size) _align | ||
203 | .endm | ||
204 | |||
205 | # macro RES_Y name, size [, align] | ||
206 | .macro RES_Y _name, _size, _align=32 | ||
207 | RES_int _name 32*(_size) _align | ||
208 | .endm | ||
209 | |||
210 | # macro RES_Z name, size [, align] | ||
211 | .macro RES_Z _name, _size, _align=64 | ||
212 | RES_int _name 64*(_size) _align | ||
213 | .endm | ||
214 | |||
215 | |||
216 | #endif | ||
217 | |||
218 | ######################################################################## | ||
219 | #### Define constants | ||
220 | ######################################################################## | ||
221 | |||
222 | ######################################################################## | ||
223 | #### Define SHA1 Out Of Order Data Structures | ||
224 | ######################################################################## | ||
225 | |||
226 | START_FIELDS # LANE_DATA | ||
227 | ### name size align | ||
228 | FIELD _job_in_lane, 8, 8 # pointer to job object | ||
229 | END_FIELDS | ||
230 | |||
231 | _LANE_DATA_size = _FIELD_OFFSET | ||
232 | _LANE_DATA_align = _STRUCT_ALIGN | ||
233 | |||
234 | ######################################################################## | ||
235 | |||
236 | START_FIELDS # SHA1_ARGS_X8 | ||
237 | ### name size align | ||
238 | FIELD _digest, 4*5*8, 16 # transposed digest | ||
239 | FIELD _data_ptr, 8*8, 8 # array of pointers to data | ||
240 | END_FIELDS | ||
241 | |||
242 | _SHA1_ARGS_X4_size = _FIELD_OFFSET | ||
243 | _SHA1_ARGS_X4_align = _STRUCT_ALIGN | ||
244 | _SHA1_ARGS_X8_size = _FIELD_OFFSET | ||
245 | _SHA1_ARGS_X8_align = _STRUCT_ALIGN | ||
246 | |||
247 | ######################################################################## | ||
248 | |||
249 | START_FIELDS # MB_MGR | ||
250 | ### name size align | ||
251 | FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align | ||
252 | FIELD _lens, 4*8, 8 | ||
253 | FIELD _unused_lanes, 8, 8 | ||
254 | FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align | ||
255 | END_FIELDS | ||
256 | |||
257 | _MB_MGR_size = _FIELD_OFFSET | ||
258 | _MB_MGR_align = _STRUCT_ALIGN | ||
259 | |||
260 | _args_digest = _args + _digest | ||
261 | _args_data_ptr = _args + _data_ptr | ||
262 | |||
263 | |||
264 | ######################################################################## | ||
265 | #### Define constants | ||
266 | ######################################################################## | ||
267 | |||
268 | #define STS_UNKNOWN 0 | ||
269 | #define STS_BEING_PROCESSED 1 | ||
270 | #define STS_COMPLETED 2 | ||
271 | |||
272 | ######################################################################## | ||
273 | #### Define JOB_SHA1 structure | ||
274 | ######################################################################## | ||
275 | |||
276 | START_FIELDS # JOB_SHA1 | ||
277 | |||
278 | ### name size align | ||
279 | FIELD _buffer, 8, 8 # pointer to buffer | ||
280 | FIELD _len, 4, 4 # length in bytes | ||
281 | FIELD _result_digest, 5*4, 32 # Digest (output) | ||
282 | FIELD _status, 4, 4 | ||
283 | FIELD _user_data, 8, 8 | ||
284 | END_FIELDS | ||
285 | |||
286 | _JOB_SHA1_size = _FIELD_OFFSET | ||
287 | _JOB_SHA1_align = _STRUCT_ALIGN | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S deleted file mode 100644 index 7cfba738f104..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ /dev/null | |||
@@ -1,304 +0,0 @@ | |||
1 | /* | ||
2 | * Flush routine for SHA1 multibuffer | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * James Guilford <james.guilford@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2014 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | */ | ||
54 | #include <linux/linkage.h> | ||
55 | #include <asm/frame.h> | ||
56 | #include "sha1_mb_mgr_datastruct.S" | ||
57 | |||
58 | |||
59 | .extern sha1_x8_avx2 | ||
60 | |||
61 | # LINUX register definitions | ||
62 | #define arg1 %rdi | ||
63 | #define arg2 %rsi | ||
64 | |||
65 | # Common definitions | ||
66 | #define state arg1 | ||
67 | #define job arg2 | ||
68 | #define len2 arg2 | ||
69 | |||
70 | # idx must be a register not clobbered by sha1_x8_avx2 | ||
71 | #define idx %r8 | ||
72 | #define DWORD_idx %r8d | ||
73 | |||
74 | #define unused_lanes %rbx | ||
75 | #define lane_data %rbx | ||
76 | #define tmp2 %rbx | ||
77 | #define tmp2_w %ebx | ||
78 | |||
79 | #define job_rax %rax | ||
80 | #define tmp1 %rax | ||
81 | #define size_offset %rax | ||
82 | #define tmp %rax | ||
83 | #define start_offset %rax | ||
84 | |||
85 | #define tmp3 %arg1 | ||
86 | |||
87 | #define extra_blocks %arg2 | ||
88 | #define p %arg2 | ||
89 | |||
90 | .macro LABEL prefix n | ||
91 | \prefix\n\(): | ||
92 | .endm | ||
93 | |||
94 | .macro JNE_SKIP i | ||
95 | jne skip_\i | ||
96 | .endm | ||
97 | |||
98 | .altmacro | ||
99 | .macro SET_OFFSET _offset | ||
100 | offset = \_offset | ||
101 | .endm | ||
102 | .noaltmacro | ||
103 | |||
104 | # JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) | ||
105 | # arg 1 : rcx : state | ||
106 | ENTRY(sha1_mb_mgr_flush_avx2) | ||
107 | FRAME_BEGIN | ||
108 | push %rbx | ||
109 | |||
110 | # If bit (32+3) is set, then all lanes are empty | ||
111 | mov _unused_lanes(state), unused_lanes | ||
112 | bt $32+3, unused_lanes | ||
113 | jc return_null | ||
114 | |||
115 | # find a lane with a non-null job | ||
116 | xor idx, idx | ||
117 | offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) | ||
118 | cmpq $0, offset(state) | ||
119 | cmovne one(%rip), idx | ||
120 | offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) | ||
121 | cmpq $0, offset(state) | ||
122 | cmovne two(%rip), idx | ||
123 | offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) | ||
124 | cmpq $0, offset(state) | ||
125 | cmovne three(%rip), idx | ||
126 | offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) | ||
127 | cmpq $0, offset(state) | ||
128 | cmovne four(%rip), idx | ||
129 | offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) | ||
130 | cmpq $0, offset(state) | ||
131 | cmovne five(%rip), idx | ||
132 | offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) | ||
133 | cmpq $0, offset(state) | ||
134 | cmovne six(%rip), idx | ||
135 | offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) | ||
136 | cmpq $0, offset(state) | ||
137 | cmovne seven(%rip), idx | ||
138 | |||
139 | # copy idx to empty lanes | ||
140 | copy_lane_data: | ||
141 | offset = (_args + _data_ptr) | ||
142 | mov offset(state,idx,8), tmp | ||
143 | |||
144 | I = 0 | ||
145 | .rep 8 | ||
146 | offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) | ||
147 | cmpq $0, offset(state) | ||
148 | .altmacro | ||
149 | JNE_SKIP %I | ||
150 | offset = (_args + _data_ptr + 8*I) | ||
151 | mov tmp, offset(state) | ||
152 | offset = (_lens + 4*I) | ||
153 | movl $0xFFFFFFFF, offset(state) | ||
154 | LABEL skip_ %I | ||
155 | I = (I+1) | ||
156 | .noaltmacro | ||
157 | .endr | ||
158 | |||
159 | # Find min length | ||
160 | vmovdqu _lens+0*16(state), %xmm0 | ||
161 | vmovdqu _lens+1*16(state), %xmm1 | ||
162 | |||
163 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | ||
164 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | ||
165 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | ||
166 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | ||
167 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword | ||
168 | |||
169 | vmovd %xmm2, DWORD_idx | ||
170 | mov idx, len2 | ||
171 | and $0xF, idx | ||
172 | shr $4, len2 | ||
173 | jz len_is_0 | ||
174 | |||
175 | vpand clear_low_nibble(%rip), %xmm2, %xmm2 | ||
176 | vpshufd $0, %xmm2, %xmm2 | ||
177 | |||
178 | vpsubd %xmm2, %xmm0, %xmm0 | ||
179 | vpsubd %xmm2, %xmm1, %xmm1 | ||
180 | |||
181 | vmovdqu %xmm0, _lens+0*16(state) | ||
182 | vmovdqu %xmm1, _lens+1*16(state) | ||
183 | |||
184 | # "state" and "args" are the same address, arg1 | ||
185 | # len is arg2 | ||
186 | call sha1_x8_avx2 | ||
187 | # state and idx are intact | ||
188 | |||
189 | |||
190 | len_is_0: | ||
191 | # process completed job "idx" | ||
192 | imul $_LANE_DATA_size, idx, lane_data | ||
193 | lea _ldata(state, lane_data), lane_data | ||
194 | |||
195 | mov _job_in_lane(lane_data), job_rax | ||
196 | movq $0, _job_in_lane(lane_data) | ||
197 | movl $STS_COMPLETED, _status(job_rax) | ||
198 | mov _unused_lanes(state), unused_lanes | ||
199 | shl $4, unused_lanes | ||
200 | or idx, unused_lanes | ||
201 | mov unused_lanes, _unused_lanes(state) | ||
202 | |||
203 | movl $0xFFFFFFFF, _lens(state, idx, 4) | ||
204 | |||
205 | vmovd _args_digest(state , idx, 4) , %xmm0 | ||
206 | vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 | ||
207 | vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 | ||
208 | vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 | ||
209 | movl _args_digest+4*32(state, idx, 4), tmp2_w | ||
210 | |||
211 | vmovdqu %xmm0, _result_digest(job_rax) | ||
212 | offset = (_result_digest + 1*16) | ||
213 | mov tmp2_w, offset(job_rax) | ||
214 | |||
215 | return: | ||
216 | pop %rbx | ||
217 | FRAME_END | ||
218 | ret | ||
219 | |||
220 | return_null: | ||
221 | xor job_rax, job_rax | ||
222 | jmp return | ||
223 | ENDPROC(sha1_mb_mgr_flush_avx2) | ||
224 | |||
225 | |||
226 | ################################################################# | ||
227 | |||
228 | .align 16 | ||
229 | ENTRY(sha1_mb_mgr_get_comp_job_avx2) | ||
230 | push %rbx | ||
231 | |||
232 | ## if bit 32+3 is set, then all lanes are empty | ||
233 | mov _unused_lanes(state), unused_lanes | ||
234 | bt $(32+3), unused_lanes | ||
235 | jc .return_null | ||
236 | |||
237 | # Find min length | ||
238 | vmovdqu _lens(state), %xmm0 | ||
239 | vmovdqu _lens+1*16(state), %xmm1 | ||
240 | |||
241 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | ||
242 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | ||
243 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | ||
244 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | ||
245 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword | ||
246 | |||
247 | vmovd %xmm2, DWORD_idx | ||
248 | test $~0xF, idx | ||
249 | jnz .return_null | ||
250 | |||
251 | # process completed job "idx" | ||
252 | imul $_LANE_DATA_size, idx, lane_data | ||
253 | lea _ldata(state, lane_data), lane_data | ||
254 | |||
255 | mov _job_in_lane(lane_data), job_rax | ||
256 | movq $0, _job_in_lane(lane_data) | ||
257 | movl $STS_COMPLETED, _status(job_rax) | ||
258 | mov _unused_lanes(state), unused_lanes | ||
259 | shl $4, unused_lanes | ||
260 | or idx, unused_lanes | ||
261 | mov unused_lanes, _unused_lanes(state) | ||
262 | |||
263 | movl $0xFFFFFFFF, _lens(state, idx, 4) | ||
264 | |||
265 | vmovd _args_digest(state, idx, 4), %xmm0 | ||
266 | vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 | ||
267 | vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 | ||
268 | vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 | ||
269 | movl _args_digest+4*32(state, idx, 4), tmp2_w | ||
270 | |||
271 | vmovdqu %xmm0, _result_digest(job_rax) | ||
272 | movl tmp2_w, _result_digest+1*16(job_rax) | ||
273 | |||
274 | pop %rbx | ||
275 | |||
276 | ret | ||
277 | |||
278 | .return_null: | ||
279 | xor job_rax, job_rax | ||
280 | pop %rbx | ||
281 | ret | ||
282 | ENDPROC(sha1_mb_mgr_get_comp_job_avx2) | ||
283 | |||
284 | .section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 | ||
285 | .align 16 | ||
286 | clear_low_nibble: | ||
287 | .octa 0x000000000000000000000000FFFFFFF0 | ||
288 | |||
289 | .section .rodata.cst8, "aM", @progbits, 8 | ||
290 | .align 8 | ||
291 | one: | ||
292 | .quad 1 | ||
293 | two: | ||
294 | .quad 2 | ||
295 | three: | ||
296 | .quad 3 | ||
297 | four: | ||
298 | .quad 4 | ||
299 | five: | ||
300 | .quad 5 | ||
301 | six: | ||
302 | .quad 6 | ||
303 | seven: | ||
304 | .quad 7 | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c deleted file mode 100644 index d2add0d35f43..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c +++ /dev/null | |||
@@ -1,64 +0,0 @@ | |||
1 | /* | ||
2 | * Initialization code for multi buffer SHA1 algorithm for AVX2 | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2014 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #include "sha1_mb_mgr.h" | ||
55 | |||
56 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) | ||
57 | { | ||
58 | unsigned int j; | ||
59 | state->unused_lanes = 0xF76543210ULL; | ||
60 | for (j = 0; j < 8; j++) { | ||
61 | state->lens[j] = 0xFFFFFFFF; | ||
62 | state->ldata[j].job_in_lane = NULL; | ||
63 | } | ||
64 | } | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S deleted file mode 100644 index 7a93b1c0d69a..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S +++ /dev/null | |||
@@ -1,209 +0,0 @@ | |||
1 | /* | ||
2 | * Buffer submit code for multi buffer SHA1 algorithm | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * James Guilford <james.guilford@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2014 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | */ | ||
54 | |||
55 | #include <linux/linkage.h> | ||
56 | #include <asm/frame.h> | ||
57 | #include "sha1_mb_mgr_datastruct.S" | ||
58 | |||
59 | |||
60 | .extern sha1_x8_avx | ||
61 | |||
62 | # LINUX register definitions | ||
63 | arg1 = %rdi | ||
64 | arg2 = %rsi | ||
65 | size_offset = %rcx | ||
66 | tmp2 = %rcx | ||
67 | extra_blocks = %rdx | ||
68 | |||
69 | # Common definitions | ||
70 | #define state arg1 | ||
71 | #define job %rsi | ||
72 | #define len2 arg2 | ||
73 | #define p2 arg2 | ||
74 | |||
75 | # idx must be a register not clobberred by sha1_x8_avx2 | ||
76 | idx = %r8 | ||
77 | DWORD_idx = %r8d | ||
78 | last_len = %r8 | ||
79 | |||
80 | p = %r11 | ||
81 | start_offset = %r11 | ||
82 | |||
83 | unused_lanes = %rbx | ||
84 | BYTE_unused_lanes = %bl | ||
85 | |||
86 | job_rax = %rax | ||
87 | len = %rax | ||
88 | DWORD_len = %eax | ||
89 | |||
90 | lane = %r12 | ||
91 | tmp3 = %r12 | ||
92 | |||
93 | tmp = %r9 | ||
94 | DWORD_tmp = %r9d | ||
95 | |||
96 | lane_data = %r10 | ||
97 | |||
98 | # JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) | ||
99 | # arg 1 : rcx : state | ||
100 | # arg 2 : rdx : job | ||
101 | ENTRY(sha1_mb_mgr_submit_avx2) | ||
102 | FRAME_BEGIN | ||
103 | push %rbx | ||
104 | push %r12 | ||
105 | |||
106 | mov _unused_lanes(state), unused_lanes | ||
107 | mov unused_lanes, lane | ||
108 | and $0xF, lane | ||
109 | shr $4, unused_lanes | ||
110 | imul $_LANE_DATA_size, lane, lane_data | ||
111 | movl $STS_BEING_PROCESSED, _status(job) | ||
112 | lea _ldata(state, lane_data), lane_data | ||
113 | mov unused_lanes, _unused_lanes(state) | ||
114 | movl _len(job), DWORD_len | ||
115 | |||
116 | mov job, _job_in_lane(lane_data) | ||
117 | shl $4, len | ||
118 | or lane, len | ||
119 | |||
120 | movl DWORD_len, _lens(state , lane, 4) | ||
121 | |||
122 | # Load digest words from result_digest | ||
123 | vmovdqu _result_digest(job), %xmm0 | ||
124 | mov _result_digest+1*16(job), DWORD_tmp | ||
125 | vmovd %xmm0, _args_digest(state, lane, 4) | ||
126 | vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) | ||
127 | vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) | ||
128 | vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) | ||
129 | movl DWORD_tmp, _args_digest+4*32(state , lane, 4) | ||
130 | |||
131 | mov _buffer(job), p | ||
132 | mov p, _args_data_ptr(state, lane, 8) | ||
133 | |||
134 | cmp $0xF, unused_lanes | ||
135 | jne return_null | ||
136 | |||
137 | start_loop: | ||
138 | # Find min length | ||
139 | vmovdqa _lens(state), %xmm0 | ||
140 | vmovdqa _lens+1*16(state), %xmm1 | ||
141 | |||
142 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | ||
143 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | ||
144 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | ||
145 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | ||
146 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword | ||
147 | |||
148 | vmovd %xmm2, DWORD_idx | ||
149 | mov idx, len2 | ||
150 | and $0xF, idx | ||
151 | shr $4, len2 | ||
152 | jz len_is_0 | ||
153 | |||
154 | vpand clear_low_nibble(%rip), %xmm2, %xmm2 | ||
155 | vpshufd $0, %xmm2, %xmm2 | ||
156 | |||
157 | vpsubd %xmm2, %xmm0, %xmm0 | ||
158 | vpsubd %xmm2, %xmm1, %xmm1 | ||
159 | |||
160 | vmovdqa %xmm0, _lens + 0*16(state) | ||
161 | vmovdqa %xmm1, _lens + 1*16(state) | ||
162 | |||
163 | |||
164 | # "state" and "args" are the same address, arg1 | ||
165 | # len is arg2 | ||
166 | call sha1_x8_avx2 | ||
167 | |||
168 | # state and idx are intact | ||
169 | |||
170 | len_is_0: | ||
171 | # process completed job "idx" | ||
172 | imul $_LANE_DATA_size, idx, lane_data | ||
173 | lea _ldata(state, lane_data), lane_data | ||
174 | |||
175 | mov _job_in_lane(lane_data), job_rax | ||
176 | mov _unused_lanes(state), unused_lanes | ||
177 | movq $0, _job_in_lane(lane_data) | ||
178 | movl $STS_COMPLETED, _status(job_rax) | ||
179 | shl $4, unused_lanes | ||
180 | or idx, unused_lanes | ||
181 | mov unused_lanes, _unused_lanes(state) | ||
182 | |||
183 | movl $0xFFFFFFFF, _lens(state, idx, 4) | ||
184 | |||
185 | vmovd _args_digest(state, idx, 4), %xmm0 | ||
186 | vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 | ||
187 | vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 | ||
188 | vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 | ||
189 | movl _args_digest+4*32(state, idx, 4), DWORD_tmp | ||
190 | |||
191 | vmovdqu %xmm0, _result_digest(job_rax) | ||
192 | movl DWORD_tmp, _result_digest+1*16(job_rax) | ||
193 | |||
194 | return: | ||
195 | pop %r12 | ||
196 | pop %rbx | ||
197 | FRAME_END | ||
198 | ret | ||
199 | |||
200 | return_null: | ||
201 | xor job_rax, job_rax | ||
202 | jmp return | ||
203 | |||
204 | ENDPROC(sha1_mb_mgr_submit_avx2) | ||
205 | |||
206 | .section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 | ||
207 | .align 16 | ||
208 | clear_low_nibble: | ||
209 | .octa 0x000000000000000000000000FFFFFFF0 | ||
diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S deleted file mode 100644 index 20f77aa633de..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S +++ /dev/null | |||
@@ -1,492 +0,0 @@ | |||
1 | /* | ||
2 | * Multi-buffer SHA1 algorithm hash compute routine | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * James Guilford <james.guilford@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2014 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | */ | ||
54 | |||
55 | #include <linux/linkage.h> | ||
56 | #include "sha1_mb_mgr_datastruct.S" | ||
57 | |||
58 | ## code to compute oct SHA1 using SSE-256 | ||
59 | ## outer calling routine takes care of save and restore of XMM registers | ||
60 | |||
61 | ## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15 | ||
62 | ## | ||
63 | ## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 | ||
64 | ## Linux preserves: rdi rbp r8 | ||
65 | ## | ||
66 | ## clobbers ymm0-15 | ||
67 | |||
68 | |||
69 | # TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 | ||
70 | # "transpose" data in {r0...r7} using temps {t0...t1} | ||
71 | # Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} | ||
72 | # r0 = {a7 a6 a5 a4 a3 a2 a1 a0} | ||
73 | # r1 = {b7 b6 b5 b4 b3 b2 b1 b0} | ||
74 | # r2 = {c7 c6 c5 c4 c3 c2 c1 c0} | ||
75 | # r3 = {d7 d6 d5 d4 d3 d2 d1 d0} | ||
76 | # r4 = {e7 e6 e5 e4 e3 e2 e1 e0} | ||
77 | # r5 = {f7 f6 f5 f4 f3 f2 f1 f0} | ||
78 | # r6 = {g7 g6 g5 g4 g3 g2 g1 g0} | ||
79 | # r7 = {h7 h6 h5 h4 h3 h2 h1 h0} | ||
80 | # | ||
81 | # Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} | ||
82 | # r0 = {h0 g0 f0 e0 d0 c0 b0 a0} | ||
83 | # r1 = {h1 g1 f1 e1 d1 c1 b1 a1} | ||
84 | # r2 = {h2 g2 f2 e2 d2 c2 b2 a2} | ||
85 | # r3 = {h3 g3 f3 e3 d3 c3 b3 a3} | ||
86 | # r4 = {h4 g4 f4 e4 d4 c4 b4 a4} | ||
87 | # r5 = {h5 g5 f5 e5 d5 c5 b5 a5} | ||
88 | # r6 = {h6 g6 f6 e6 d6 c6 b6 a6} | ||
89 | # r7 = {h7 g7 f7 e7 d7 c7 b7 a7} | ||
90 | # | ||
91 | |||
92 | .macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 | ||
93 | # process top half (r0..r3) {a...d} | ||
94 | vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} | ||
95 | vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} | ||
96 | vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} | ||
97 | vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} | ||
98 | vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} | ||
99 | vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} | ||
100 | vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} | ||
101 | vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} | ||
102 | |||
103 | # use r2 in place of t0 | ||
104 | # process bottom half (r4..r7) {e...h} | ||
105 | vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} | ||
106 | vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} | ||
107 | vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} | ||
108 | vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} | ||
109 | vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} | ||
110 | vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} | ||
111 | vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} | ||
112 | vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} | ||
113 | |||
114 | vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 | ||
115 | vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 | ||
116 | vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 | ||
117 | vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 | ||
118 | vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 | ||
119 | vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 | ||
120 | vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 | ||
121 | vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 | ||
122 | |||
123 | .endm | ||
124 | ## | ||
125 | ## Magic functions defined in FIPS 180-1 | ||
126 | ## | ||
127 | # macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D))) | ||
128 | .macro MAGIC_F0 regF regB regC regD regT | ||
129 | vpxor \regD, \regC, \regF | ||
130 | vpand \regB, \regF, \regF | ||
131 | vpxor \regD, \regF, \regF | ||
132 | .endm | ||
133 | |||
134 | # macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D) | ||
135 | .macro MAGIC_F1 regF regB regC regD regT | ||
136 | vpxor \regC, \regD, \regF | ||
137 | vpxor \regB, \regF, \regF | ||
138 | .endm | ||
139 | |||
140 | # macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D)) | ||
141 | .macro MAGIC_F2 regF regB regC regD regT | ||
142 | vpor \regC, \regB, \regF | ||
143 | vpand \regC, \regB, \regT | ||
144 | vpand \regD, \regF, \regF | ||
145 | vpor \regT, \regF, \regF | ||
146 | .endm | ||
147 | |||
148 | # macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D) | ||
149 | .macro MAGIC_F3 regF regB regC regD regT | ||
150 | MAGIC_F1 \regF,\regB,\regC,\regD,\regT | ||
151 | .endm | ||
152 | |||
153 | # PROLD reg, imm, tmp | ||
154 | .macro PROLD reg imm tmp | ||
155 | vpsrld $(32-\imm), \reg, \tmp | ||
156 | vpslld $\imm, \reg, \reg | ||
157 | vpor \tmp, \reg, \reg | ||
158 | .endm | ||
159 | |||
160 | .macro PROLD_nd reg imm tmp src | ||
161 | vpsrld $(32-\imm), \src, \tmp | ||
162 | vpslld $\imm, \src, \reg | ||
163 | vpor \tmp, \reg, \reg | ||
164 | .endm | ||
165 | |||
166 | .macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC | ||
167 | vpaddd \immCNT, \regE, \regE | ||
168 | vpaddd \memW*32(%rsp), \regE, \regE | ||
169 | PROLD_nd \regT, 5, \regF, \regA | ||
170 | vpaddd \regT, \regE, \regE | ||
171 | \MAGIC \regF, \regB, \regC, \regD, \regT | ||
172 | PROLD \regB, 30, \regT | ||
173 | vpaddd \regF, \regE, \regE | ||
174 | .endm | ||
175 | |||
176 | .macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC | ||
177 | vpaddd \immCNT, \regE, \regE | ||
178 | offset = ((\memW - 14) & 15) * 32 | ||
179 | vmovdqu offset(%rsp), W14 | ||
180 | vpxor W14, W16, W16 | ||
181 | offset = ((\memW - 8) & 15) * 32 | ||
182 | vpxor offset(%rsp), W16, W16 | ||
183 | offset = ((\memW - 3) & 15) * 32 | ||
184 | vpxor offset(%rsp), W16, W16 | ||
185 | vpsrld $(32-1), W16, \regF | ||
186 | vpslld $1, W16, W16 | ||
187 | vpor W16, \regF, \regF | ||
188 | |||
189 | ROTATE_W | ||
190 | |||
191 | offset = ((\memW - 0) & 15) * 32 | ||
192 | vmovdqu \regF, offset(%rsp) | ||
193 | vpaddd \regF, \regE, \regE | ||
194 | PROLD_nd \regT, 5, \regF, \regA | ||
195 | vpaddd \regT, \regE, \regE | ||
196 | \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D) | ||
197 | PROLD \regB,30, \regT | ||
198 | vpaddd \regF, \regE, \regE | ||
199 | .endm | ||
200 | |||
201 | ######################################################################## | ||
202 | ######################################################################## | ||
203 | ######################################################################## | ||
204 | |||
205 | ## FRAMESZ plus pushes must be an odd multiple of 8 | ||
206 | YMM_SAVE = (15-15)*32 | ||
207 | FRAMESZ = 32*16 + YMM_SAVE | ||
208 | _YMM = FRAMESZ - YMM_SAVE | ||
209 | |||
210 | #define VMOVPS vmovups | ||
211 | |||
212 | IDX = %rax | ||
213 | inp0 = %r9 | ||
214 | inp1 = %r10 | ||
215 | inp2 = %r11 | ||
216 | inp3 = %r12 | ||
217 | inp4 = %r13 | ||
218 | inp5 = %r14 | ||
219 | inp6 = %r15 | ||
220 | inp7 = %rcx | ||
221 | arg1 = %rdi | ||
222 | arg2 = %rsi | ||
223 | RSP_SAVE = %rdx | ||
224 | |||
225 | # ymm0 A | ||
226 | # ymm1 B | ||
227 | # ymm2 C | ||
228 | # ymm3 D | ||
229 | # ymm4 E | ||
230 | # ymm5 F AA | ||
231 | # ymm6 T0 BB | ||
232 | # ymm7 T1 CC | ||
233 | # ymm8 T2 DD | ||
234 | # ymm9 T3 EE | ||
235 | # ymm10 T4 TMP | ||
236 | # ymm11 T5 FUN | ||
237 | # ymm12 T6 K | ||
238 | # ymm13 T7 W14 | ||
239 | # ymm14 T8 W15 | ||
240 | # ymm15 T9 W16 | ||
241 | |||
242 | |||
243 | A = %ymm0 | ||
244 | B = %ymm1 | ||
245 | C = %ymm2 | ||
246 | D = %ymm3 | ||
247 | E = %ymm4 | ||
248 | F = %ymm5 | ||
249 | T0 = %ymm6 | ||
250 | T1 = %ymm7 | ||
251 | T2 = %ymm8 | ||
252 | T3 = %ymm9 | ||
253 | T4 = %ymm10 | ||
254 | T5 = %ymm11 | ||
255 | T6 = %ymm12 | ||
256 | T7 = %ymm13 | ||
257 | T8 = %ymm14 | ||
258 | T9 = %ymm15 | ||
259 | |||
260 | AA = %ymm5 | ||
261 | BB = %ymm6 | ||
262 | CC = %ymm7 | ||
263 | DD = %ymm8 | ||
264 | EE = %ymm9 | ||
265 | TMP = %ymm10 | ||
266 | FUN = %ymm11 | ||
267 | K = %ymm12 | ||
268 | W14 = %ymm13 | ||
269 | W15 = %ymm14 | ||
270 | W16 = %ymm15 | ||
271 | |||
272 | .macro ROTATE_ARGS | ||
273 | TMP_ = E | ||
274 | E = D | ||
275 | D = C | ||
276 | C = B | ||
277 | B = A | ||
278 | A = TMP_ | ||
279 | .endm | ||
280 | |||
281 | .macro ROTATE_W | ||
282 | TMP_ = W16 | ||
283 | W16 = W15 | ||
284 | W15 = W14 | ||
285 | W14 = TMP_ | ||
286 | .endm | ||
287 | |||
288 | # 8 streams x 5 32bit words per digest x 4 bytes per word | ||
289 | #define DIGEST_SIZE (8*5*4) | ||
290 | |||
291 | .align 32 | ||
292 | |||
293 | # void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size) | ||
294 | # arg 1 : pointer to array[4] of pointer to input data | ||
295 | # arg 2 : size (in blocks) ;; assumed to be >= 1 | ||
296 | # | ||
297 | ENTRY(sha1_x8_avx2) | ||
298 | |||
299 | # save callee-saved clobbered registers to comply with C function ABI | ||
300 | push %r12 | ||
301 | push %r13 | ||
302 | push %r14 | ||
303 | push %r15 | ||
304 | |||
305 | #save rsp | ||
306 | mov %rsp, RSP_SAVE | ||
307 | sub $FRAMESZ, %rsp | ||
308 | |||
309 | #align rsp to 32 Bytes | ||
310 | and $~0x1F, %rsp | ||
311 | |||
312 | ## Initialize digests | ||
313 | vmovdqu 0*32(arg1), A | ||
314 | vmovdqu 1*32(arg1), B | ||
315 | vmovdqu 2*32(arg1), C | ||
316 | vmovdqu 3*32(arg1), D | ||
317 | vmovdqu 4*32(arg1), E | ||
318 | |||
319 | ## transpose input onto stack | ||
320 | mov _data_ptr+0*8(arg1),inp0 | ||
321 | mov _data_ptr+1*8(arg1),inp1 | ||
322 | mov _data_ptr+2*8(arg1),inp2 | ||
323 | mov _data_ptr+3*8(arg1),inp3 | ||
324 | mov _data_ptr+4*8(arg1),inp4 | ||
325 | mov _data_ptr+5*8(arg1),inp5 | ||
326 | mov _data_ptr+6*8(arg1),inp6 | ||
327 | mov _data_ptr+7*8(arg1),inp7 | ||
328 | |||
329 | xor IDX, IDX | ||
330 | lloop: | ||
331 | vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F | ||
332 | I=0 | ||
333 | .rep 2 | ||
334 | VMOVPS (inp0, IDX), T0 | ||
335 | VMOVPS (inp1, IDX), T1 | ||
336 | VMOVPS (inp2, IDX), T2 | ||
337 | VMOVPS (inp3, IDX), T3 | ||
338 | VMOVPS (inp4, IDX), T4 | ||
339 | VMOVPS (inp5, IDX), T5 | ||
340 | VMOVPS (inp6, IDX), T6 | ||
341 | VMOVPS (inp7, IDX), T7 | ||
342 | |||
343 | TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 | ||
344 | vpshufb F, T0, T0 | ||
345 | vmovdqu T0, (I*8)*32(%rsp) | ||
346 | vpshufb F, T1, T1 | ||
347 | vmovdqu T1, (I*8+1)*32(%rsp) | ||
348 | vpshufb F, T2, T2 | ||
349 | vmovdqu T2, (I*8+2)*32(%rsp) | ||
350 | vpshufb F, T3, T3 | ||
351 | vmovdqu T3, (I*8+3)*32(%rsp) | ||
352 | vpshufb F, T4, T4 | ||
353 | vmovdqu T4, (I*8+4)*32(%rsp) | ||
354 | vpshufb F, T5, T5 | ||
355 | vmovdqu T5, (I*8+5)*32(%rsp) | ||
356 | vpshufb F, T6, T6 | ||
357 | vmovdqu T6, (I*8+6)*32(%rsp) | ||
358 | vpshufb F, T7, T7 | ||
359 | vmovdqu T7, (I*8+7)*32(%rsp) | ||
360 | add $32, IDX | ||
361 | I = (I+1) | ||
362 | .endr | ||
363 | # save old digests | ||
364 | vmovdqu A,AA | ||
365 | vmovdqu B,BB | ||
366 | vmovdqu C,CC | ||
367 | vmovdqu D,DD | ||
368 | vmovdqu E,EE | ||
369 | |||
370 | ## | ||
371 | ## perform 0-79 steps | ||
372 | ## | ||
373 | vmovdqu K00_19(%rip), K | ||
374 | ## do rounds 0...15 | ||
375 | I = 0 | ||
376 | .rep 16 | ||
377 | SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 | ||
378 | ROTATE_ARGS | ||
379 | I = (I+1) | ||
380 | .endr | ||
381 | |||
382 | ## do rounds 16...19 | ||
383 | vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16 | ||
384 | vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15 | ||
385 | .rep 4 | ||
386 | SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 | ||
387 | ROTATE_ARGS | ||
388 | I = (I+1) | ||
389 | .endr | ||
390 | |||
391 | ## do rounds 20...39 | ||
392 | vmovdqu K20_39(%rip), K | ||
393 | .rep 20 | ||
394 | SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1 | ||
395 | ROTATE_ARGS | ||
396 | I = (I+1) | ||
397 | .endr | ||
398 | |||
399 | ## do rounds 40...59 | ||
400 | vmovdqu K40_59(%rip), K | ||
401 | .rep 20 | ||
402 | SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2 | ||
403 | ROTATE_ARGS | ||
404 | I = (I+1) | ||
405 | .endr | ||
406 | |||
407 | ## do rounds 60...79 | ||
408 | vmovdqu K60_79(%rip), K | ||
409 | .rep 20 | ||
410 | SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3 | ||
411 | ROTATE_ARGS | ||
412 | I = (I+1) | ||
413 | .endr | ||
414 | |||
415 | vpaddd AA,A,A | ||
416 | vpaddd BB,B,B | ||
417 | vpaddd CC,C,C | ||
418 | vpaddd DD,D,D | ||
419 | vpaddd EE,E,E | ||
420 | |||
421 | sub $1, arg2 | ||
422 | jne lloop | ||
423 | |||
424 | # write out digests | ||
425 | vmovdqu A, 0*32(arg1) | ||
426 | vmovdqu B, 1*32(arg1) | ||
427 | vmovdqu C, 2*32(arg1) | ||
428 | vmovdqu D, 3*32(arg1) | ||
429 | vmovdqu E, 4*32(arg1) | ||
430 | |||
431 | # update input pointers | ||
432 | add IDX, inp0 | ||
433 | add IDX, inp1 | ||
434 | add IDX, inp2 | ||
435 | add IDX, inp3 | ||
436 | add IDX, inp4 | ||
437 | add IDX, inp5 | ||
438 | add IDX, inp6 | ||
439 | add IDX, inp7 | ||
440 | mov inp0, _data_ptr (arg1) | ||
441 | mov inp1, _data_ptr + 1*8(arg1) | ||
442 | mov inp2, _data_ptr + 2*8(arg1) | ||
443 | mov inp3, _data_ptr + 3*8(arg1) | ||
444 | mov inp4, _data_ptr + 4*8(arg1) | ||
445 | mov inp5, _data_ptr + 5*8(arg1) | ||
446 | mov inp6, _data_ptr + 6*8(arg1) | ||
447 | mov inp7, _data_ptr + 7*8(arg1) | ||
448 | |||
449 | ################ | ||
450 | ## Postamble | ||
451 | |||
452 | mov RSP_SAVE, %rsp | ||
453 | |||
454 | # restore callee-saved clobbered registers | ||
455 | pop %r15 | ||
456 | pop %r14 | ||
457 | pop %r13 | ||
458 | pop %r12 | ||
459 | |||
460 | ret | ||
461 | ENDPROC(sha1_x8_avx2) | ||
462 | |||
463 | |||
464 | .section .rodata.cst32.K00_19, "aM", @progbits, 32 | ||
465 | .align 32 | ||
466 | K00_19: | ||
467 | .octa 0x5A8279995A8279995A8279995A827999 | ||
468 | .octa 0x5A8279995A8279995A8279995A827999 | ||
469 | |||
470 | .section .rodata.cst32.K20_39, "aM", @progbits, 32 | ||
471 | .align 32 | ||
472 | K20_39: | ||
473 | .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 | ||
474 | .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 | ||
475 | |||
476 | .section .rodata.cst32.K40_59, "aM", @progbits, 32 | ||
477 | .align 32 | ||
478 | K40_59: | ||
479 | .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC | ||
480 | .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC | ||
481 | |||
482 | .section .rodata.cst32.K60_79, "aM", @progbits, 32 | ||
483 | .align 32 | ||
484 | K60_79: | ||
485 | .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 | ||
486 | .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 | ||
487 | |||
488 | .section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 | ||
489 | .align 32 | ||
490 | PSHUFFLE_BYTE_FLIP_MASK: | ||
491 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||
492 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile deleted file mode 100644 index 53ad6e7db747..000000000000 --- a/arch/x86/crypto/sha256-mb/Makefile +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | ||
2 | # | ||
3 | # Arch-specific CryptoAPI modules. | ||
4 | # | ||
5 | |||
6 | OBJECT_FILES_NON_STANDARD := y | ||
7 | |||
8 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ | ||
9 | $(comma)4)$(comma)%ymm2,yes,no) | ||
10 | ifeq ($(avx2_supported),yes) | ||
11 | obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o | ||
12 | sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \ | ||
13 | sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o | ||
14 | endif | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c deleted file mode 100644 index 97c5fc43e115..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ /dev/null | |||
@@ -1,1013 +0,0 @@ | |||
1 | /* | ||
2 | * Multi buffer SHA256 algorithm Glue Code | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
55 | |||
56 | #include <crypto/internal/hash.h> | ||
57 | #include <linux/init.h> | ||
58 | #include <linux/module.h> | ||
59 | #include <linux/mm.h> | ||
60 | #include <linux/cryptohash.h> | ||
61 | #include <linux/types.h> | ||
62 | #include <linux/list.h> | ||
63 | #include <crypto/scatterwalk.h> | ||
64 | #include <crypto/sha.h> | ||
65 | #include <crypto/mcryptd.h> | ||
66 | #include <crypto/crypto_wq.h> | ||
67 | #include <asm/byteorder.h> | ||
68 | #include <linux/hardirq.h> | ||
69 | #include <asm/fpu/api.h> | ||
70 | #include "sha256_mb_ctx.h" | ||
71 | |||
72 | #define FLUSH_INTERVAL 1000 /* in usec */ | ||
73 | |||
74 | static struct mcryptd_alg_state sha256_mb_alg_state; | ||
75 | |||
76 | struct sha256_mb_ctx { | ||
77 | struct mcryptd_ahash *mcryptd_tfm; | ||
78 | }; | ||
79 | |||
80 | static inline struct mcryptd_hash_request_ctx | ||
81 | *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx) | ||
82 | { | ||
83 | struct ahash_request *areq; | ||
84 | |||
85 | areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); | ||
86 | return container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
87 | } | ||
88 | |||
89 | static inline struct ahash_request | ||
90 | *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) | ||
91 | { | ||
92 | return container_of((void *) ctx, struct ahash_request, __ctx); | ||
93 | } | ||
94 | |||
95 | static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, | ||
96 | struct ahash_request *areq) | ||
97 | { | ||
98 | rctx->flag = HASH_UPDATE; | ||
99 | } | ||
100 | |||
101 | static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state); | ||
102 | static asmlinkage struct job_sha256* (*sha256_job_mgr_submit) | ||
103 | (struct sha256_mb_mgr *state, struct job_sha256 *job); | ||
104 | static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) | ||
105 | (struct sha256_mb_mgr *state); | ||
106 | static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) | ||
107 | (struct sha256_mb_mgr *state); | ||
108 | |||
109 | inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], | ||
110 | uint64_t total_len) | ||
111 | { | ||
112 | uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1); | ||
113 | |||
114 | memset(&padblock[i], 0, SHA256_BLOCK_SIZE); | ||
115 | padblock[i] = 0x80; | ||
116 | |||
117 | i += ((SHA256_BLOCK_SIZE - 1) & | ||
118 | (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1))) | ||
119 | + 1 + SHA256_PADLENGTHFIELD_SIZE; | ||
120 | |||
121 | #if SHA256_PADLENGTHFIELD_SIZE == 16 | ||
122 | *((uint64_t *) &padblock[i - 16]) = 0; | ||
123 | #endif | ||
124 | |||
125 | *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); | ||
126 | |||
127 | /* Number of extra blocks to hash */ | ||
128 | return i >> SHA256_LOG2_BLOCK_SIZE; | ||
129 | } | ||
130 | |||
131 | static struct sha256_hash_ctx | ||
132 | *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr, | ||
133 | struct sha256_hash_ctx *ctx) | ||
134 | { | ||
135 | while (ctx) { | ||
136 | if (ctx->status & HASH_CTX_STS_COMPLETE) { | ||
137 | /* Clear PROCESSING bit */ | ||
138 | ctx->status = HASH_CTX_STS_COMPLETE; | ||
139 | return ctx; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * If the extra blocks are empty, begin hashing what remains | ||
144 | * in the user's buffer. | ||
145 | */ | ||
146 | if (ctx->partial_block_buffer_length == 0 && | ||
147 | ctx->incoming_buffer_length) { | ||
148 | |||
149 | const void *buffer = ctx->incoming_buffer; | ||
150 | uint32_t len = ctx->incoming_buffer_length; | ||
151 | uint32_t copy_len; | ||
152 | |||
153 | /* | ||
154 | * Only entire blocks can be hashed. | ||
155 | * Copy remainder to extra blocks buffer. | ||
156 | */ | ||
157 | copy_len = len & (SHA256_BLOCK_SIZE-1); | ||
158 | |||
159 | if (copy_len) { | ||
160 | len -= copy_len; | ||
161 | memcpy(ctx->partial_block_buffer, | ||
162 | ((const char *) buffer + len), | ||
163 | copy_len); | ||
164 | ctx->partial_block_buffer_length = copy_len; | ||
165 | } | ||
166 | |||
167 | ctx->incoming_buffer_length = 0; | ||
168 | |||
169 | /* len should be a multiple of the block size now */ | ||
170 | assert((len % SHA256_BLOCK_SIZE) == 0); | ||
171 | |||
172 | /* Set len to the number of blocks to be hashed */ | ||
173 | len >>= SHA256_LOG2_BLOCK_SIZE; | ||
174 | |||
175 | if (len) { | ||
176 | |||
177 | ctx->job.buffer = (uint8_t *) buffer; | ||
178 | ctx->job.len = len; | ||
179 | ctx = (struct sha256_hash_ctx *) | ||
180 | sha256_job_mgr_submit(&mgr->mgr, &ctx->job); | ||
181 | continue; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * If the extra blocks are not empty, then we are | ||
187 | * either on the last block(s) or we need more | ||
188 | * user input before continuing. | ||
189 | */ | ||
190 | if (ctx->status & HASH_CTX_STS_LAST) { | ||
191 | |||
192 | uint8_t *buf = ctx->partial_block_buffer; | ||
193 | uint32_t n_extra_blocks = | ||
194 | sha256_pad(buf, ctx->total_length); | ||
195 | |||
196 | ctx->status = (HASH_CTX_STS_PROCESSING | | ||
197 | HASH_CTX_STS_COMPLETE); | ||
198 | ctx->job.buffer = buf; | ||
199 | ctx->job.len = (uint32_t) n_extra_blocks; | ||
200 | ctx = (struct sha256_hash_ctx *) | ||
201 | sha256_job_mgr_submit(&mgr->mgr, &ctx->job); | ||
202 | continue; | ||
203 | } | ||
204 | |||
205 | ctx->status = HASH_CTX_STS_IDLE; | ||
206 | return ctx; | ||
207 | } | ||
208 | |||
209 | return NULL; | ||
210 | } | ||
211 | |||
212 | static struct sha256_hash_ctx | ||
213 | *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr) | ||
214 | { | ||
215 | /* | ||
216 | * If get_comp_job returns NULL, there are no jobs complete. | ||
217 | * If get_comp_job returns a job, verify that it is safe to return to | ||
218 | * the user. If it is not ready, resubmit the job to finish processing. | ||
219 | * If sha256_ctx_mgr_resubmit returned a job, it is ready to be | ||
220 | * returned. Otherwise, all jobs currently being managed by the | ||
221 | * hash_ctx_mgr still need processing. | ||
222 | */ | ||
223 | struct sha256_hash_ctx *ctx; | ||
224 | |||
225 | ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr); | ||
226 | return sha256_ctx_mgr_resubmit(mgr, ctx); | ||
227 | } | ||
228 | |||
229 | static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr) | ||
230 | { | ||
231 | sha256_job_mgr_init(&mgr->mgr); | ||
232 | } | ||
233 | |||
234 | static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, | ||
235 | struct sha256_hash_ctx *ctx, | ||
236 | const void *buffer, | ||
237 | uint32_t len, | ||
238 | int flags) | ||
239 | { | ||
240 | if (flags & ~(HASH_UPDATE | HASH_LAST)) { | ||
241 | /* User should not pass anything other than UPDATE or LAST */ | ||
242 | ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; | ||
243 | return ctx; | ||
244 | } | ||
245 | |||
246 | if (ctx->status & HASH_CTX_STS_PROCESSING) { | ||
247 | /* Cannot submit to a currently processing job. */ | ||
248 | ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; | ||
249 | return ctx; | ||
250 | } | ||
251 | |||
252 | if (ctx->status & HASH_CTX_STS_COMPLETE) { | ||
253 | /* Cannot update a finished job. */ | ||
254 | ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; | ||
255 | return ctx; | ||
256 | } | ||
257 | |||
258 | /* If we made it here, there was no error during this call to submit */ | ||
259 | ctx->error = HASH_CTX_ERROR_NONE; | ||
260 | |||
261 | /* Store buffer ptr info from user */ | ||
262 | ctx->incoming_buffer = buffer; | ||
263 | ctx->incoming_buffer_length = len; | ||
264 | |||
265 | /* | ||
266 | * Store the user's request flags and mark this ctx as currently | ||
267 | * being processed. | ||
268 | */ | ||
269 | ctx->status = (flags & HASH_LAST) ? | ||
270 | (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : | ||
271 | HASH_CTX_STS_PROCESSING; | ||
272 | |||
273 | /* Advance byte counter */ | ||
274 | ctx->total_length += len; | ||
275 | |||
276 | /* | ||
277 | * If there is anything currently buffered in the extra blocks, | ||
278 | * append to it until it contains a whole block. | ||
279 | * Or if the user's buffer contains less than a whole block, | ||
280 | * append as much as possible to the extra block. | ||
281 | */ | ||
282 | if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) { | ||
283 | /* | ||
284 | * Compute how many bytes to copy from user buffer into | ||
285 | * extra block | ||
286 | */ | ||
287 | uint32_t copy_len = SHA256_BLOCK_SIZE - | ||
288 | ctx->partial_block_buffer_length; | ||
289 | if (len < copy_len) | ||
290 | copy_len = len; | ||
291 | |||
292 | if (copy_len) { | ||
293 | /* Copy and update relevant pointers and counters */ | ||
294 | memcpy( | ||
295 | &ctx->partial_block_buffer[ctx->partial_block_buffer_length], | ||
296 | buffer, copy_len); | ||
297 | |||
298 | ctx->partial_block_buffer_length += copy_len; | ||
299 | ctx->incoming_buffer = (const void *) | ||
300 | ((const char *)buffer + copy_len); | ||
301 | ctx->incoming_buffer_length = len - copy_len; | ||
302 | } | ||
303 | |||
304 | /* The extra block should never contain more than 1 block */ | ||
305 | assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE); | ||
306 | |||
307 | /* | ||
308 | * If the extra block buffer contains exactly 1 block, | ||
309 | * it can be hashed. | ||
310 | */ | ||
311 | if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) { | ||
312 | ctx->partial_block_buffer_length = 0; | ||
313 | |||
314 | ctx->job.buffer = ctx->partial_block_buffer; | ||
315 | ctx->job.len = 1; | ||
316 | ctx = (struct sha256_hash_ctx *) | ||
317 | sha256_job_mgr_submit(&mgr->mgr, &ctx->job); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | return sha256_ctx_mgr_resubmit(mgr, ctx); | ||
322 | } | ||
323 | |||
324 | static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr) | ||
325 | { | ||
326 | struct sha256_hash_ctx *ctx; | ||
327 | |||
328 | while (1) { | ||
329 | ctx = (struct sha256_hash_ctx *) | ||
330 | sha256_job_mgr_flush(&mgr->mgr); | ||
331 | |||
332 | /* If flush returned 0, there are no more jobs in flight. */ | ||
333 | if (!ctx) | ||
334 | return NULL; | ||
335 | |||
336 | /* | ||
337 | * If flush returned a job, resubmit the job to finish | ||
338 | * processing. | ||
339 | */ | ||
340 | ctx = sha256_ctx_mgr_resubmit(mgr, ctx); | ||
341 | |||
342 | /* | ||
343 | * If sha256_ctx_mgr_resubmit returned a job, it is ready to | ||
344 | * be returned. Otherwise, all jobs currently being managed by | ||
345 | * the sha256_ctx_mgr still need processing. Loop. | ||
346 | */ | ||
347 | if (ctx) | ||
348 | return ctx; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | static int sha256_mb_init(struct ahash_request *areq) | ||
353 | { | ||
354 | struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); | ||
355 | |||
356 | hash_ctx_init(sctx); | ||
357 | sctx->job.result_digest[0] = SHA256_H0; | ||
358 | sctx->job.result_digest[1] = SHA256_H1; | ||
359 | sctx->job.result_digest[2] = SHA256_H2; | ||
360 | sctx->job.result_digest[3] = SHA256_H3; | ||
361 | sctx->job.result_digest[4] = SHA256_H4; | ||
362 | sctx->job.result_digest[5] = SHA256_H5; | ||
363 | sctx->job.result_digest[6] = SHA256_H6; | ||
364 | sctx->job.result_digest[7] = SHA256_H7; | ||
365 | sctx->total_length = 0; | ||
366 | sctx->partial_block_buffer_length = 0; | ||
367 | sctx->status = HASH_CTX_STS_IDLE; | ||
368 | |||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx) | ||
373 | { | ||
374 | int i; | ||
375 | struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); | ||
376 | __be32 *dst = (__be32 *) rctx->out; | ||
377 | |||
378 | for (i = 0; i < 8; ++i) | ||
379 | dst[i] = cpu_to_be32(sctx->job.result_digest[i]); | ||
380 | |||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, | ||
385 | struct mcryptd_alg_cstate *cstate, bool flush) | ||
386 | { | ||
387 | int flag = HASH_UPDATE; | ||
388 | int nbytes, err = 0; | ||
389 | struct mcryptd_hash_request_ctx *rctx = *ret_rctx; | ||
390 | struct sha256_hash_ctx *sha_ctx; | ||
391 | |||
392 | /* more work ? */ | ||
393 | while (!(rctx->flag & HASH_DONE)) { | ||
394 | nbytes = crypto_ahash_walk_done(&rctx->walk, 0); | ||
395 | if (nbytes < 0) { | ||
396 | err = nbytes; | ||
397 | goto out; | ||
398 | } | ||
399 | /* check if the walk is done */ | ||
400 | if (crypto_ahash_walk_last(&rctx->walk)) { | ||
401 | rctx->flag |= HASH_DONE; | ||
402 | if (rctx->flag & HASH_FINAL) | ||
403 | flag |= HASH_LAST; | ||
404 | |||
405 | } | ||
406 | sha_ctx = (struct sha256_hash_ctx *) | ||
407 | ahash_request_ctx(&rctx->areq); | ||
408 | kernel_fpu_begin(); | ||
409 | sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, | ||
410 | rctx->walk.data, nbytes, flag); | ||
411 | if (!sha_ctx) { | ||
412 | if (flush) | ||
413 | sha_ctx = sha256_ctx_mgr_flush(cstate->mgr); | ||
414 | } | ||
415 | kernel_fpu_end(); | ||
416 | if (sha_ctx) | ||
417 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
418 | else { | ||
419 | rctx = NULL; | ||
420 | goto out; | ||
421 | } | ||
422 | } | ||
423 | |||
424 | /* copy the results */ | ||
425 | if (rctx->flag & HASH_FINAL) | ||
426 | sha256_mb_set_results(rctx); | ||
427 | |||
428 | out: | ||
429 | *ret_rctx = rctx; | ||
430 | return err; | ||
431 | } | ||
432 | |||
433 | static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, | ||
434 | struct mcryptd_alg_cstate *cstate, | ||
435 | int err) | ||
436 | { | ||
437 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
438 | struct sha256_hash_ctx *sha_ctx; | ||
439 | struct mcryptd_hash_request_ctx *req_ctx; | ||
440 | int ret; | ||
441 | |||
442 | /* remove from work list */ | ||
443 | spin_lock(&cstate->work_lock); | ||
444 | list_del(&rctx->waiter); | ||
445 | spin_unlock(&cstate->work_lock); | ||
446 | |||
447 | if (irqs_disabled()) | ||
448 | rctx->complete(&req->base, err); | ||
449 | else { | ||
450 | local_bh_disable(); | ||
451 | rctx->complete(&req->base, err); | ||
452 | local_bh_enable(); | ||
453 | } | ||
454 | |||
455 | /* check to see if there are other jobs that are done */ | ||
456 | sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); | ||
457 | while (sha_ctx) { | ||
458 | req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
459 | ret = sha_finish_walk(&req_ctx, cstate, false); | ||
460 | if (req_ctx) { | ||
461 | spin_lock(&cstate->work_lock); | ||
462 | list_del(&req_ctx->waiter); | ||
463 | spin_unlock(&cstate->work_lock); | ||
464 | |||
465 | req = cast_mcryptd_ctx_to_req(req_ctx); | ||
466 | if (irqs_disabled()) | ||
467 | req_ctx->complete(&req->base, ret); | ||
468 | else { | ||
469 | local_bh_disable(); | ||
470 | req_ctx->complete(&req->base, ret); | ||
471 | local_bh_enable(); | ||
472 | } | ||
473 | } | ||
474 | sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); | ||
475 | } | ||
476 | |||
477 | return 0; | ||
478 | } | ||
479 | |||
480 | static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx, | ||
481 | struct mcryptd_alg_cstate *cstate) | ||
482 | { | ||
483 | unsigned long next_flush; | ||
484 | unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); | ||
485 | |||
486 | /* initialize tag */ | ||
487 | rctx->tag.arrival = jiffies; /* tag the arrival time */ | ||
488 | rctx->tag.seq_num = cstate->next_seq_num++; | ||
489 | next_flush = rctx->tag.arrival + delay; | ||
490 | rctx->tag.expire = next_flush; | ||
491 | |||
492 | spin_lock(&cstate->work_lock); | ||
493 | list_add_tail(&rctx->waiter, &cstate->work_list); | ||
494 | spin_unlock(&cstate->work_lock); | ||
495 | |||
496 | mcryptd_arm_flusher(cstate, delay); | ||
497 | } | ||
498 | |||
499 | static int sha256_mb_update(struct ahash_request *areq) | ||
500 | { | ||
501 | struct mcryptd_hash_request_ctx *rctx = | ||
502 | container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
503 | struct mcryptd_alg_cstate *cstate = | ||
504 | this_cpu_ptr(sha256_mb_alg_state.alg_cstate); | ||
505 | |||
506 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
507 | struct sha256_hash_ctx *sha_ctx; | ||
508 | int ret = 0, nbytes; | ||
509 | |||
510 | /* sanity check */ | ||
511 | if (rctx->tag.cpu != smp_processor_id()) { | ||
512 | pr_err("mcryptd error: cpu clash\n"); | ||
513 | goto done; | ||
514 | } | ||
515 | |||
516 | /* need to init context */ | ||
517 | req_ctx_init(rctx, areq); | ||
518 | |||
519 | nbytes = crypto_ahash_walk_first(req, &rctx->walk); | ||
520 | |||
521 | if (nbytes < 0) { | ||
522 | ret = nbytes; | ||
523 | goto done; | ||
524 | } | ||
525 | |||
526 | if (crypto_ahash_walk_last(&rctx->walk)) | ||
527 | rctx->flag |= HASH_DONE; | ||
528 | |||
529 | /* submit */ | ||
530 | sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); | ||
531 | sha256_mb_add_list(rctx, cstate); | ||
532 | kernel_fpu_begin(); | ||
533 | sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, | ||
534 | nbytes, HASH_UPDATE); | ||
535 | kernel_fpu_end(); | ||
536 | |||
537 | /* check if anything is returned */ | ||
538 | if (!sha_ctx) | ||
539 | return -EINPROGRESS; | ||
540 | |||
541 | if (sha_ctx->error) { | ||
542 | ret = sha_ctx->error; | ||
543 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
544 | goto done; | ||
545 | } | ||
546 | |||
547 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
548 | ret = sha_finish_walk(&rctx, cstate, false); | ||
549 | |||
550 | if (!rctx) | ||
551 | return -EINPROGRESS; | ||
552 | done: | ||
553 | sha_complete_job(rctx, cstate, ret); | ||
554 | return ret; | ||
555 | } | ||
556 | |||
557 | static int sha256_mb_finup(struct ahash_request *areq) | ||
558 | { | ||
559 | struct mcryptd_hash_request_ctx *rctx = | ||
560 | container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
561 | struct mcryptd_alg_cstate *cstate = | ||
562 | this_cpu_ptr(sha256_mb_alg_state.alg_cstate); | ||
563 | |||
564 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
565 | struct sha256_hash_ctx *sha_ctx; | ||
566 | int ret = 0, flag = HASH_UPDATE, nbytes; | ||
567 | |||
568 | /* sanity check */ | ||
569 | if (rctx->tag.cpu != smp_processor_id()) { | ||
570 | pr_err("mcryptd error: cpu clash\n"); | ||
571 | goto done; | ||
572 | } | ||
573 | |||
574 | /* need to init context */ | ||
575 | req_ctx_init(rctx, areq); | ||
576 | |||
577 | nbytes = crypto_ahash_walk_first(req, &rctx->walk); | ||
578 | |||
579 | if (nbytes < 0) { | ||
580 | ret = nbytes; | ||
581 | goto done; | ||
582 | } | ||
583 | |||
584 | if (crypto_ahash_walk_last(&rctx->walk)) { | ||
585 | rctx->flag |= HASH_DONE; | ||
586 | flag = HASH_LAST; | ||
587 | } | ||
588 | |||
589 | /* submit */ | ||
590 | rctx->flag |= HASH_FINAL; | ||
591 | sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); | ||
592 | sha256_mb_add_list(rctx, cstate); | ||
593 | |||
594 | kernel_fpu_begin(); | ||
595 | sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, | ||
596 | nbytes, flag); | ||
597 | kernel_fpu_end(); | ||
598 | |||
599 | /* check if anything is returned */ | ||
600 | if (!sha_ctx) | ||
601 | return -EINPROGRESS; | ||
602 | |||
603 | if (sha_ctx->error) { | ||
604 | ret = sha_ctx->error; | ||
605 | goto done; | ||
606 | } | ||
607 | |||
608 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
609 | ret = sha_finish_walk(&rctx, cstate, false); | ||
610 | if (!rctx) | ||
611 | return -EINPROGRESS; | ||
612 | done: | ||
613 | sha_complete_job(rctx, cstate, ret); | ||
614 | return ret; | ||
615 | } | ||
616 | |||
617 | static int sha256_mb_final(struct ahash_request *areq) | ||
618 | { | ||
619 | struct mcryptd_hash_request_ctx *rctx = | ||
620 | container_of(areq, struct mcryptd_hash_request_ctx, | ||
621 | areq); | ||
622 | struct mcryptd_alg_cstate *cstate = | ||
623 | this_cpu_ptr(sha256_mb_alg_state.alg_cstate); | ||
624 | |||
625 | struct sha256_hash_ctx *sha_ctx; | ||
626 | int ret = 0; | ||
627 | u8 data; | ||
628 | |||
629 | /* sanity check */ | ||
630 | if (rctx->tag.cpu != smp_processor_id()) { | ||
631 | pr_err("mcryptd error: cpu clash\n"); | ||
632 | goto done; | ||
633 | } | ||
634 | |||
635 | /* need to init context */ | ||
636 | req_ctx_init(rctx, areq); | ||
637 | |||
638 | rctx->flag |= HASH_DONE | HASH_FINAL; | ||
639 | |||
640 | sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); | ||
641 | /* flag HASH_FINAL and 0 data size */ | ||
642 | sha256_mb_add_list(rctx, cstate); | ||
643 | kernel_fpu_begin(); | ||
644 | sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, | ||
645 | HASH_LAST); | ||
646 | kernel_fpu_end(); | ||
647 | |||
648 | /* check if anything is returned */ | ||
649 | if (!sha_ctx) | ||
650 | return -EINPROGRESS; | ||
651 | |||
652 | if (sha_ctx->error) { | ||
653 | ret = sha_ctx->error; | ||
654 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
655 | goto done; | ||
656 | } | ||
657 | |||
658 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
659 | ret = sha_finish_walk(&rctx, cstate, false); | ||
660 | if (!rctx) | ||
661 | return -EINPROGRESS; | ||
662 | done: | ||
663 | sha_complete_job(rctx, cstate, ret); | ||
664 | return ret; | ||
665 | } | ||
666 | |||
667 | static int sha256_mb_export(struct ahash_request *areq, void *out) | ||
668 | { | ||
669 | struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); | ||
670 | |||
671 | memcpy(out, sctx, sizeof(*sctx)); | ||
672 | |||
673 | return 0; | ||
674 | } | ||
675 | |||
676 | static int sha256_mb_import(struct ahash_request *areq, const void *in) | ||
677 | { | ||
678 | struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); | ||
679 | |||
680 | memcpy(sctx, in, sizeof(*sctx)); | ||
681 | |||
682 | return 0; | ||
683 | } | ||
684 | |||
685 | static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm) | ||
686 | { | ||
687 | struct mcryptd_ahash *mcryptd_tfm; | ||
688 | struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
689 | struct mcryptd_hash_ctx *mctx; | ||
690 | |||
691 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb", | ||
692 | CRYPTO_ALG_INTERNAL, | ||
693 | CRYPTO_ALG_INTERNAL); | ||
694 | if (IS_ERR(mcryptd_tfm)) | ||
695 | return PTR_ERR(mcryptd_tfm); | ||
696 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); | ||
697 | mctx->alg_state = &sha256_mb_alg_state; | ||
698 | ctx->mcryptd_tfm = mcryptd_tfm; | ||
699 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
700 | sizeof(struct ahash_request) + | ||
701 | crypto_ahash_reqsize(&mcryptd_tfm->base)); | ||
702 | |||
703 | return 0; | ||
704 | } | ||
705 | |||
706 | static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm) | ||
707 | { | ||
708 | struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
709 | |||
710 | mcryptd_free_ahash(ctx->mcryptd_tfm); | ||
711 | } | ||
712 | |||
713 | static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm) | ||
714 | { | ||
715 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
716 | sizeof(struct ahash_request) + | ||
717 | sizeof(struct sha256_hash_ctx)); | ||
718 | |||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm) | ||
723 | { | ||
724 | struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
725 | |||
726 | mcryptd_free_ahash(ctx->mcryptd_tfm); | ||
727 | } | ||
728 | |||
729 | static struct ahash_alg sha256_mb_areq_alg = { | ||
730 | .init = sha256_mb_init, | ||
731 | .update = sha256_mb_update, | ||
732 | .final = sha256_mb_final, | ||
733 | .finup = sha256_mb_finup, | ||
734 | .export = sha256_mb_export, | ||
735 | .import = sha256_mb_import, | ||
736 | .halg = { | ||
737 | .digestsize = SHA256_DIGEST_SIZE, | ||
738 | .statesize = sizeof(struct sha256_hash_ctx), | ||
739 | .base = { | ||
740 | .cra_name = "__sha256-mb", | ||
741 | .cra_driver_name = "__intel_sha256-mb", | ||
742 | .cra_priority = 100, | ||
743 | /* | ||
744 | * use ASYNC flag as some buffers in multi-buffer | ||
745 | * algo may not have completed before hashing thread | ||
746 | * sleep | ||
747 | */ | ||
748 | .cra_flags = CRYPTO_ALG_ASYNC | | ||
749 | CRYPTO_ALG_INTERNAL, | ||
750 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
751 | .cra_module = THIS_MODULE, | ||
752 | .cra_list = LIST_HEAD_INIT | ||
753 | (sha256_mb_areq_alg.halg.base.cra_list), | ||
754 | .cra_init = sha256_mb_areq_init_tfm, | ||
755 | .cra_exit = sha256_mb_areq_exit_tfm, | ||
756 | .cra_ctxsize = sizeof(struct sha256_hash_ctx), | ||
757 | } | ||
758 | } | ||
759 | }; | ||
760 | |||
761 | static int sha256_mb_async_init(struct ahash_request *req) | ||
762 | { | ||
763 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
764 | struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
765 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
766 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
767 | |||
768 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
769 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
770 | return crypto_ahash_init(mcryptd_req); | ||
771 | } | ||
772 | |||
773 | static int sha256_mb_async_update(struct ahash_request *req) | ||
774 | { | ||
775 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
776 | |||
777 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
778 | struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
779 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
780 | |||
781 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
782 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
783 | return crypto_ahash_update(mcryptd_req); | ||
784 | } | ||
785 | |||
786 | static int sha256_mb_async_finup(struct ahash_request *req) | ||
787 | { | ||
788 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
789 | |||
790 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
791 | struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
792 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
793 | |||
794 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
795 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
796 | return crypto_ahash_finup(mcryptd_req); | ||
797 | } | ||
798 | |||
799 | static int sha256_mb_async_final(struct ahash_request *req) | ||
800 | { | ||
801 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
802 | |||
803 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
804 | struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
805 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
806 | |||
807 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
808 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
809 | return crypto_ahash_final(mcryptd_req); | ||
810 | } | ||
811 | |||
812 | static int sha256_mb_async_digest(struct ahash_request *req) | ||
813 | { | ||
814 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
815 | struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
816 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
817 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
818 | |||
819 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
820 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
821 | return crypto_ahash_digest(mcryptd_req); | ||
822 | } | ||
823 | |||
824 | static int sha256_mb_async_export(struct ahash_request *req, void *out) | ||
825 | { | ||
826 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
827 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
828 | struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
829 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
830 | |||
831 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
832 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
833 | return crypto_ahash_export(mcryptd_req, out); | ||
834 | } | ||
835 | |||
836 | static int sha256_mb_async_import(struct ahash_request *req, const void *in) | ||
837 | { | ||
838 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
839 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
840 | struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
841 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
842 | struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); | ||
843 | struct mcryptd_hash_request_ctx *rctx; | ||
844 | struct ahash_request *areq; | ||
845 | |||
846 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
847 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
848 | rctx = ahash_request_ctx(mcryptd_req); | ||
849 | areq = &rctx->areq; | ||
850 | |||
851 | ahash_request_set_tfm(areq, child); | ||
852 | ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, | ||
853 | rctx->complete, req); | ||
854 | |||
855 | return crypto_ahash_import(mcryptd_req, in); | ||
856 | } | ||
857 | |||
858 | static struct ahash_alg sha256_mb_async_alg = { | ||
859 | .init = sha256_mb_async_init, | ||
860 | .update = sha256_mb_async_update, | ||
861 | .final = sha256_mb_async_final, | ||
862 | .finup = sha256_mb_async_finup, | ||
863 | .export = sha256_mb_async_export, | ||
864 | .import = sha256_mb_async_import, | ||
865 | .digest = sha256_mb_async_digest, | ||
866 | .halg = { | ||
867 | .digestsize = SHA256_DIGEST_SIZE, | ||
868 | .statesize = sizeof(struct sha256_hash_ctx), | ||
869 | .base = { | ||
870 | .cra_name = "sha256", | ||
871 | .cra_driver_name = "sha256_mb", | ||
872 | /* | ||
873 | * Low priority, since with few concurrent hash requests | ||
874 | * this is extremely slow due to the flush delay. Users | ||
875 | * whose workloads would benefit from this can request | ||
876 | * it explicitly by driver name, or can increase its | ||
877 | * priority at runtime using NETLINK_CRYPTO. | ||
878 | */ | ||
879 | .cra_priority = 50, | ||
880 | .cra_flags = CRYPTO_ALG_ASYNC, | ||
881 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
882 | .cra_module = THIS_MODULE, | ||
883 | .cra_list = LIST_HEAD_INIT | ||
884 | (sha256_mb_async_alg.halg.base.cra_list), | ||
885 | .cra_init = sha256_mb_async_init_tfm, | ||
886 | .cra_exit = sha256_mb_async_exit_tfm, | ||
887 | .cra_ctxsize = sizeof(struct sha256_mb_ctx), | ||
888 | .cra_alignmask = 0, | ||
889 | }, | ||
890 | }, | ||
891 | }; | ||
892 | |||
893 | static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate) | ||
894 | { | ||
895 | struct mcryptd_hash_request_ctx *rctx; | ||
896 | unsigned long cur_time; | ||
897 | unsigned long next_flush = 0; | ||
898 | struct sha256_hash_ctx *sha_ctx; | ||
899 | |||
900 | |||
901 | cur_time = jiffies; | ||
902 | |||
903 | while (!list_empty(&cstate->work_list)) { | ||
904 | rctx = list_entry(cstate->work_list.next, | ||
905 | struct mcryptd_hash_request_ctx, waiter); | ||
906 | if (time_before(cur_time, rctx->tag.expire)) | ||
907 | break; | ||
908 | kernel_fpu_begin(); | ||
909 | sha_ctx = (struct sha256_hash_ctx *) | ||
910 | sha256_ctx_mgr_flush(cstate->mgr); | ||
911 | kernel_fpu_end(); | ||
912 | if (!sha_ctx) { | ||
913 | pr_err("sha256_mb error: nothing got" | ||
914 | " flushed for non-empty list\n"); | ||
915 | break; | ||
916 | } | ||
917 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
918 | sha_finish_walk(&rctx, cstate, true); | ||
919 | sha_complete_job(rctx, cstate, 0); | ||
920 | } | ||
921 | |||
922 | if (!list_empty(&cstate->work_list)) { | ||
923 | rctx = list_entry(cstate->work_list.next, | ||
924 | struct mcryptd_hash_request_ctx, waiter); | ||
925 | /* get the hash context and then flush time */ | ||
926 | next_flush = rctx->tag.expire; | ||
927 | mcryptd_arm_flusher(cstate, get_delay(next_flush)); | ||
928 | } | ||
929 | return next_flush; | ||
930 | } | ||
931 | |||
932 | static int __init sha256_mb_mod_init(void) | ||
933 | { | ||
934 | |||
935 | int cpu; | ||
936 | int err; | ||
937 | struct mcryptd_alg_cstate *cpu_state; | ||
938 | |||
939 | /* check for dependent cpu features */ | ||
940 | if (!boot_cpu_has(X86_FEATURE_AVX2) || | ||
941 | !boot_cpu_has(X86_FEATURE_BMI2)) | ||
942 | return -ENODEV; | ||
943 | |||
944 | /* initialize multibuffer structures */ | ||
945 | sha256_mb_alg_state.alg_cstate = alloc_percpu | ||
946 | (struct mcryptd_alg_cstate); | ||
947 | |||
948 | sha256_job_mgr_init = sha256_mb_mgr_init_avx2; | ||
949 | sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2; | ||
950 | sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2; | ||
951 | sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2; | ||
952 | |||
953 | if (!sha256_mb_alg_state.alg_cstate) | ||
954 | return -ENOMEM; | ||
955 | for_each_possible_cpu(cpu) { | ||
956 | cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); | ||
957 | cpu_state->next_flush = 0; | ||
958 | cpu_state->next_seq_num = 0; | ||
959 | cpu_state->flusher_engaged = false; | ||
960 | INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); | ||
961 | cpu_state->cpu = cpu; | ||
962 | cpu_state->alg_state = &sha256_mb_alg_state; | ||
963 | cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr), | ||
964 | GFP_KERNEL); | ||
965 | if (!cpu_state->mgr) | ||
966 | goto err2; | ||
967 | sha256_ctx_mgr_init(cpu_state->mgr); | ||
968 | INIT_LIST_HEAD(&cpu_state->work_list); | ||
969 | spin_lock_init(&cpu_state->work_lock); | ||
970 | } | ||
971 | sha256_mb_alg_state.flusher = &sha256_mb_flusher; | ||
972 | |||
973 | err = crypto_register_ahash(&sha256_mb_areq_alg); | ||
974 | if (err) | ||
975 | goto err2; | ||
976 | err = crypto_register_ahash(&sha256_mb_async_alg); | ||
977 | if (err) | ||
978 | goto err1; | ||
979 | |||
980 | |||
981 | return 0; | ||
982 | err1: | ||
983 | crypto_unregister_ahash(&sha256_mb_areq_alg); | ||
984 | err2: | ||
985 | for_each_possible_cpu(cpu) { | ||
986 | cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); | ||
987 | kfree(cpu_state->mgr); | ||
988 | } | ||
989 | free_percpu(sha256_mb_alg_state.alg_cstate); | ||
990 | return -ENODEV; | ||
991 | } | ||
992 | |||
993 | static void __exit sha256_mb_mod_fini(void) | ||
994 | { | ||
995 | int cpu; | ||
996 | struct mcryptd_alg_cstate *cpu_state; | ||
997 | |||
998 | crypto_unregister_ahash(&sha256_mb_async_alg); | ||
999 | crypto_unregister_ahash(&sha256_mb_areq_alg); | ||
1000 | for_each_possible_cpu(cpu) { | ||
1001 | cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); | ||
1002 | kfree(cpu_state->mgr); | ||
1003 | } | ||
1004 | free_percpu(sha256_mb_alg_state.alg_cstate); | ||
1005 | } | ||
1006 | |||
1007 | module_init(sha256_mb_mod_init); | ||
1008 | module_exit(sha256_mb_mod_fini); | ||
1009 | |||
1010 | MODULE_LICENSE("GPL"); | ||
1011 | MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated"); | ||
1012 | |||
1013 | MODULE_ALIAS_CRYPTO("sha256"); | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h deleted file mode 100644 index 7c432543dc7f..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h +++ /dev/null | |||
@@ -1,134 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA256 context | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #ifndef _SHA_MB_CTX_INTERNAL_H | ||
55 | #define _SHA_MB_CTX_INTERNAL_H | ||
56 | |||
57 | #include "sha256_mb_mgr.h" | ||
58 | |||
59 | #define HASH_UPDATE 0x00 | ||
60 | #define HASH_LAST 0x01 | ||
61 | #define HASH_DONE 0x02 | ||
62 | #define HASH_FINAL 0x04 | ||
63 | |||
64 | #define HASH_CTX_STS_IDLE 0x00 | ||
65 | #define HASH_CTX_STS_PROCESSING 0x01 | ||
66 | #define HASH_CTX_STS_LAST 0x02 | ||
67 | #define HASH_CTX_STS_COMPLETE 0x04 | ||
68 | |||
69 | enum hash_ctx_error { | ||
70 | HASH_CTX_ERROR_NONE = 0, | ||
71 | HASH_CTX_ERROR_INVALID_FLAGS = -1, | ||
72 | HASH_CTX_ERROR_ALREADY_PROCESSING = -2, | ||
73 | HASH_CTX_ERROR_ALREADY_COMPLETED = -3, | ||
74 | |||
75 | #ifdef HASH_CTX_DEBUG | ||
76 | HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, | ||
77 | #endif | ||
78 | }; | ||
79 | |||
80 | |||
81 | #define hash_ctx_user_data(ctx) ((ctx)->user_data) | ||
82 | #define hash_ctx_digest(ctx) ((ctx)->job.result_digest) | ||
83 | #define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) | ||
84 | #define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) | ||
85 | #define hash_ctx_status(ctx) ((ctx)->status) | ||
86 | #define hash_ctx_error(ctx) ((ctx)->error) | ||
87 | #define hash_ctx_init(ctx) \ | ||
88 | do { \ | ||
89 | (ctx)->error = HASH_CTX_ERROR_NONE; \ | ||
90 | (ctx)->status = HASH_CTX_STS_COMPLETE; \ | ||
91 | } while (0) | ||
92 | |||
93 | |||
94 | /* Hash Constants and Typedefs */ | ||
95 | #define SHA256_DIGEST_LENGTH 8 | ||
96 | #define SHA256_LOG2_BLOCK_SIZE 6 | ||
97 | |||
98 | #define SHA256_PADLENGTHFIELD_SIZE 8 | ||
99 | |||
100 | #ifdef SHA_MB_DEBUG | ||
101 | #define assert(expr) \ | ||
102 | do { \ | ||
103 | if (unlikely(!(expr))) { \ | ||
104 | printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ | ||
105 | #expr, __FILE__, __func__, __LINE__); \ | ||
106 | } \ | ||
107 | } while (0) | ||
108 | #else | ||
109 | #define assert(expr) do {} while (0) | ||
110 | #endif | ||
111 | |||
112 | struct sha256_ctx_mgr { | ||
113 | struct sha256_mb_mgr mgr; | ||
114 | }; | ||
115 | |||
116 | /* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */ | ||
117 | |||
118 | struct sha256_hash_ctx { | ||
119 | /* Must be at struct offset 0 */ | ||
120 | struct job_sha256 job; | ||
121 | /* status flag */ | ||
122 | int status; | ||
123 | /* error flag */ | ||
124 | int error; | ||
125 | |||
126 | uint64_t total_length; | ||
127 | const void *incoming_buffer; | ||
128 | uint32_t incoming_buffer_length; | ||
129 | uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; | ||
130 | uint32_t partial_block_buffer_length; | ||
131 | void *user_data; | ||
132 | }; | ||
133 | |||
134 | #endif | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h deleted file mode 100644 index b01ae408c56d..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h +++ /dev/null | |||
@@ -1,108 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA256 algorithm manager | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | #ifndef __SHA_MB_MGR_H | ||
54 | #define __SHA_MB_MGR_H | ||
55 | |||
56 | #include <linux/types.h> | ||
57 | |||
58 | #define NUM_SHA256_DIGEST_WORDS 8 | ||
59 | |||
60 | enum job_sts { STS_UNKNOWN = 0, | ||
61 | STS_BEING_PROCESSED = 1, | ||
62 | STS_COMPLETED = 2, | ||
63 | STS_INTERNAL_ERROR = 3, | ||
64 | STS_ERROR = 4 | ||
65 | }; | ||
66 | |||
67 | struct job_sha256 { | ||
68 | u8 *buffer; | ||
69 | u32 len; | ||
70 | u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32); | ||
71 | enum job_sts status; | ||
72 | void *user_data; | ||
73 | }; | ||
74 | |||
75 | /* SHA256 out-of-order scheduler */ | ||
76 | |||
77 | /* typedef uint32_t sha8_digest_array[8][8]; */ | ||
78 | |||
79 | struct sha256_args_x8 { | ||
80 | uint32_t digest[8][8]; | ||
81 | uint8_t *data_ptr[8]; | ||
82 | }; | ||
83 | |||
84 | struct sha256_lane_data { | ||
85 | struct job_sha256 *job_in_lane; | ||
86 | }; | ||
87 | |||
88 | struct sha256_mb_mgr { | ||
89 | struct sha256_args_x8 args; | ||
90 | |||
91 | uint32_t lens[8]; | ||
92 | |||
93 | /* each byte is index (0...7) of unused lanes */ | ||
94 | uint64_t unused_lanes; | ||
95 | /* byte 4 is set to FF as a flag */ | ||
96 | struct sha256_lane_data ldata[8]; | ||
97 | }; | ||
98 | |||
99 | |||
100 | #define SHA256_MB_MGR_NUM_LANES_AVX2 8 | ||
101 | |||
102 | void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state); | ||
103 | struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state, | ||
104 | struct job_sha256 *job); | ||
105 | struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state); | ||
106 | struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state); | ||
107 | |||
108 | #endif | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S deleted file mode 100644 index 5c377bac21d0..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S +++ /dev/null | |||
@@ -1,304 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA256 algorithm data structure | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | # Macros for defining data structures | ||
55 | |||
56 | # Usage example | ||
57 | |||
58 | #START_FIELDS # JOB_AES | ||
59 | ### name size align | ||
60 | #FIELD _plaintext, 8, 8 # pointer to plaintext | ||
61 | #FIELD _ciphertext, 8, 8 # pointer to ciphertext | ||
62 | #FIELD _IV, 16, 8 # IV | ||
63 | #FIELD _keys, 8, 8 # pointer to keys | ||
64 | #FIELD _len, 4, 4 # length in bytes | ||
65 | #FIELD _status, 4, 4 # status enumeration | ||
66 | #FIELD _user_data, 8, 8 # pointer to user data | ||
67 | #UNION _union, size1, align1, \ | ||
68 | # size2, align2, \ | ||
69 | # size3, align3, \ | ||
70 | # ... | ||
71 | #END_FIELDS | ||
72 | #%assign _JOB_AES_size _FIELD_OFFSET | ||
73 | #%assign _JOB_AES_align _STRUCT_ALIGN | ||
74 | |||
75 | ######################################################################### | ||
76 | |||
77 | # Alternate "struc-like" syntax: | ||
78 | # STRUCT job_aes2 | ||
79 | # RES_Q .plaintext, 1 | ||
80 | # RES_Q .ciphertext, 1 | ||
81 | # RES_DQ .IV, 1 | ||
82 | # RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN | ||
83 | # RES_U .union, size1, align1, \ | ||
84 | # size2, align2, \ | ||
85 | # ... | ||
86 | # ENDSTRUCT | ||
87 | # # Following only needed if nesting | ||
88 | # %assign job_aes2_size _FIELD_OFFSET | ||
89 | # %assign job_aes2_align _STRUCT_ALIGN | ||
90 | # | ||
91 | # RES_* macros take a name, a count and an optional alignment. | ||
92 | # The count in in terms of the base size of the macro, and the | ||
93 | # default alignment is the base size. | ||
94 | # The macros are: | ||
95 | # Macro Base size | ||
96 | # RES_B 1 | ||
97 | # RES_W 2 | ||
98 | # RES_D 4 | ||
99 | # RES_Q 8 | ||
100 | # RES_DQ 16 | ||
101 | # RES_Y 32 | ||
102 | # RES_Z 64 | ||
103 | # | ||
104 | # RES_U defines a union. It's arguments are a name and two or more | ||
105 | # pairs of "size, alignment" | ||
106 | # | ||
107 | # The two assigns are only needed if this structure is being nested | ||
108 | # within another. Even if the assigns are not done, one can still use | ||
109 | # STRUCT_NAME_size as the size of the structure. | ||
110 | # | ||
111 | # Note that for nesting, you still need to assign to STRUCT_NAME_size. | ||
112 | # | ||
113 | # The differences between this and using "struc" directly are that each | ||
114 | # type is implicitly aligned to its natural length (although this can be | ||
115 | # over-ridden with an explicit third parameter), and that the structure | ||
116 | # is padded at the end to its overall alignment. | ||
117 | # | ||
118 | |||
119 | ######################################################################### | ||
120 | |||
121 | #ifndef _DATASTRUCT_ASM_ | ||
122 | #define _DATASTRUCT_ASM_ | ||
123 | |||
124 | #define SZ8 8*SHA256_DIGEST_WORD_SIZE | ||
125 | #define ROUNDS 64*SZ8 | ||
126 | #define PTR_SZ 8 | ||
127 | #define SHA256_DIGEST_WORD_SIZE 4 | ||
128 | #define MAX_SHA256_LANES 8 | ||
129 | #define SHA256_DIGEST_WORDS 8 | ||
130 | #define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE) | ||
131 | #define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS) | ||
132 | #define SHA256_BLK_SZ 64 | ||
133 | |||
134 | # START_FIELDS | ||
135 | .macro START_FIELDS | ||
136 | _FIELD_OFFSET = 0 | ||
137 | _STRUCT_ALIGN = 0 | ||
138 | .endm | ||
139 | |||
140 | # FIELD name size align | ||
141 | .macro FIELD name size align | ||
142 | _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) | ||
143 | \name = _FIELD_OFFSET | ||
144 | _FIELD_OFFSET = _FIELD_OFFSET + (\size) | ||
145 | .if (\align > _STRUCT_ALIGN) | ||
146 | _STRUCT_ALIGN = \align | ||
147 | .endif | ||
148 | .endm | ||
149 | |||
150 | # END_FIELDS | ||
151 | .macro END_FIELDS | ||
152 | _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) | ||
153 | .endm | ||
154 | |||
155 | ######################################################################## | ||
156 | |||
157 | .macro STRUCT p1 | ||
158 | START_FIELDS | ||
159 | .struc \p1 | ||
160 | .endm | ||
161 | |||
162 | .macro ENDSTRUCT | ||
163 | tmp = _FIELD_OFFSET | ||
164 | END_FIELDS | ||
165 | tmp = (_FIELD_OFFSET - %%tmp) | ||
166 | .if (tmp > 0) | ||
167 | .lcomm tmp | ||
168 | .endif | ||
169 | .endstruc | ||
170 | .endm | ||
171 | |||
172 | ## RES_int name size align | ||
173 | .macro RES_int p1 p2 p3 | ||
174 | name = \p1 | ||
175 | size = \p2 | ||
176 | align = .\p3 | ||
177 | |||
178 | _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) | ||
179 | .align align | ||
180 | .lcomm name size | ||
181 | _FIELD_OFFSET = _FIELD_OFFSET + (size) | ||
182 | .if (align > _STRUCT_ALIGN) | ||
183 | _STRUCT_ALIGN = align | ||
184 | .endif | ||
185 | .endm | ||
186 | |||
187 | # macro RES_B name, size [, align] | ||
188 | .macro RES_B _name, _size, _align=1 | ||
189 | RES_int _name _size _align | ||
190 | .endm | ||
191 | |||
192 | # macro RES_W name, size [, align] | ||
193 | .macro RES_W _name, _size, _align=2 | ||
194 | RES_int _name 2*(_size) _align | ||
195 | .endm | ||
196 | |||
197 | # macro RES_D name, size [, align] | ||
198 | .macro RES_D _name, _size, _align=4 | ||
199 | RES_int _name 4*(_size) _align | ||
200 | .endm | ||
201 | |||
202 | # macro RES_Q name, size [, align] | ||
203 | .macro RES_Q _name, _size, _align=8 | ||
204 | RES_int _name 8*(_size) _align | ||
205 | .endm | ||
206 | |||
207 | # macro RES_DQ name, size [, align] | ||
208 | .macro RES_DQ _name, _size, _align=16 | ||
209 | RES_int _name 16*(_size) _align | ||
210 | .endm | ||
211 | |||
212 | # macro RES_Y name, size [, align] | ||
213 | .macro RES_Y _name, _size, _align=32 | ||
214 | RES_int _name 32*(_size) _align | ||
215 | .endm | ||
216 | |||
217 | # macro RES_Z name, size [, align] | ||
218 | .macro RES_Z _name, _size, _align=64 | ||
219 | RES_int _name 64*(_size) _align | ||
220 | .endm | ||
221 | |||
222 | #endif | ||
223 | |||
224 | |||
225 | ######################################################################## | ||
226 | #### Define SHA256 Out Of Order Data Structures | ||
227 | ######################################################################## | ||
228 | |||
229 | START_FIELDS # LANE_DATA | ||
230 | ### name size align | ||
231 | FIELD _job_in_lane, 8, 8 # pointer to job object | ||
232 | END_FIELDS | ||
233 | |||
234 | _LANE_DATA_size = _FIELD_OFFSET | ||
235 | _LANE_DATA_align = _STRUCT_ALIGN | ||
236 | |||
237 | ######################################################################## | ||
238 | |||
239 | START_FIELDS # SHA256_ARGS_X4 | ||
240 | ### name size align | ||
241 | FIELD _digest, 4*8*8, 4 # transposed digest | ||
242 | FIELD _data_ptr, 8*8, 8 # array of pointers to data | ||
243 | END_FIELDS | ||
244 | |||
245 | _SHA256_ARGS_X4_size = _FIELD_OFFSET | ||
246 | _SHA256_ARGS_X4_align = _STRUCT_ALIGN | ||
247 | _SHA256_ARGS_X8_size = _FIELD_OFFSET | ||
248 | _SHA256_ARGS_X8_align = _STRUCT_ALIGN | ||
249 | |||
250 | ####################################################################### | ||
251 | |||
252 | START_FIELDS # MB_MGR | ||
253 | ### name size align | ||
254 | FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align | ||
255 | FIELD _lens, 4*8, 8 | ||
256 | FIELD _unused_lanes, 8, 8 | ||
257 | FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align | ||
258 | END_FIELDS | ||
259 | |||
260 | _MB_MGR_size = _FIELD_OFFSET | ||
261 | _MB_MGR_align = _STRUCT_ALIGN | ||
262 | |||
263 | _args_digest = _args + _digest | ||
264 | _args_data_ptr = _args + _data_ptr | ||
265 | |||
266 | ####################################################################### | ||
267 | |||
268 | START_FIELDS #STACK_FRAME | ||
269 | ### name size align | ||
270 | FIELD _data, 16*SZ8, 1 # transposed digest | ||
271 | FIELD _digest, 8*SZ8, 1 # array of pointers to data | ||
272 | FIELD _ytmp, 4*SZ8, 1 | ||
273 | FIELD _rsp, 8, 1 | ||
274 | END_FIELDS | ||
275 | |||
276 | _STACK_FRAME_size = _FIELD_OFFSET | ||
277 | _STACK_FRAME_align = _STRUCT_ALIGN | ||
278 | |||
279 | ####################################################################### | ||
280 | |||
281 | ######################################################################## | ||
282 | #### Define constants | ||
283 | ######################################################################## | ||
284 | |||
285 | #define STS_UNKNOWN 0 | ||
286 | #define STS_BEING_PROCESSED 1 | ||
287 | #define STS_COMPLETED 2 | ||
288 | |||
289 | ######################################################################## | ||
290 | #### Define JOB_SHA256 structure | ||
291 | ######################################################################## | ||
292 | |||
293 | START_FIELDS # JOB_SHA256 | ||
294 | |||
295 | ### name size align | ||
296 | FIELD _buffer, 8, 8 # pointer to buffer | ||
297 | FIELD _len, 8, 8 # length in bytes | ||
298 | FIELD _result_digest, 8*4, 32 # Digest (output) | ||
299 | FIELD _status, 4, 4 | ||
300 | FIELD _user_data, 8, 8 | ||
301 | END_FIELDS | ||
302 | |||
303 | _JOB_SHA256_size = _FIELD_OFFSET | ||
304 | _JOB_SHA256_align = _STRUCT_ALIGN | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S deleted file mode 100644 index d2364c55bbde..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ /dev/null | |||
@@ -1,307 +0,0 @@ | |||
1 | /* | ||
2 | * Flush routine for SHA256 multibuffer | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | #include <linux/linkage.h> | ||
54 | #include <asm/frame.h> | ||
55 | #include "sha256_mb_mgr_datastruct.S" | ||
56 | |||
57 | .extern sha256_x8_avx2 | ||
58 | |||
59 | #LINUX register definitions | ||
60 | #define arg1 %rdi | ||
61 | #define arg2 %rsi | ||
62 | |||
63 | # Common register definitions | ||
64 | #define state arg1 | ||
65 | #define job arg2 | ||
66 | #define len2 arg2 | ||
67 | |||
68 | # idx must be a register not clobberred by sha1_mult | ||
69 | #define idx %r8 | ||
70 | #define DWORD_idx %r8d | ||
71 | |||
72 | #define unused_lanes %rbx | ||
73 | #define lane_data %rbx | ||
74 | #define tmp2 %rbx | ||
75 | #define tmp2_w %ebx | ||
76 | |||
77 | #define job_rax %rax | ||
78 | #define tmp1 %rax | ||
79 | #define size_offset %rax | ||
80 | #define tmp %rax | ||
81 | #define start_offset %rax | ||
82 | |||
83 | #define tmp3 %arg1 | ||
84 | |||
85 | #define extra_blocks %arg2 | ||
86 | #define p %arg2 | ||
87 | |||
88 | .macro LABEL prefix n | ||
89 | \prefix\n\(): | ||
90 | .endm | ||
91 | |||
92 | .macro JNE_SKIP i | ||
93 | jne skip_\i | ||
94 | .endm | ||
95 | |||
96 | .altmacro | ||
97 | .macro SET_OFFSET _offset | ||
98 | offset = \_offset | ||
99 | .endm | ||
100 | .noaltmacro | ||
101 | |||
102 | # JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state) | ||
103 | # arg 1 : rcx : state | ||
104 | ENTRY(sha256_mb_mgr_flush_avx2) | ||
105 | FRAME_BEGIN | ||
106 | push %rbx | ||
107 | |||
108 | # If bit (32+3) is set, then all lanes are empty | ||
109 | mov _unused_lanes(state), unused_lanes | ||
110 | bt $32+3, unused_lanes | ||
111 | jc return_null | ||
112 | |||
113 | # find a lane with a non-null job | ||
114 | xor idx, idx | ||
115 | offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) | ||
116 | cmpq $0, offset(state) | ||
117 | cmovne one(%rip), idx | ||
118 | offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) | ||
119 | cmpq $0, offset(state) | ||
120 | cmovne two(%rip), idx | ||
121 | offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) | ||
122 | cmpq $0, offset(state) | ||
123 | cmovne three(%rip), idx | ||
124 | offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) | ||
125 | cmpq $0, offset(state) | ||
126 | cmovne four(%rip), idx | ||
127 | offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) | ||
128 | cmpq $0, offset(state) | ||
129 | cmovne five(%rip), idx | ||
130 | offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) | ||
131 | cmpq $0, offset(state) | ||
132 | cmovne six(%rip), idx | ||
133 | offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) | ||
134 | cmpq $0, offset(state) | ||
135 | cmovne seven(%rip), idx | ||
136 | |||
137 | # copy idx to empty lanes | ||
138 | copy_lane_data: | ||
139 | offset = (_args + _data_ptr) | ||
140 | mov offset(state,idx,8), tmp | ||
141 | |||
142 | I = 0 | ||
143 | .rep 8 | ||
144 | offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) | ||
145 | cmpq $0, offset(state) | ||
146 | .altmacro | ||
147 | JNE_SKIP %I | ||
148 | offset = (_args + _data_ptr + 8*I) | ||
149 | mov tmp, offset(state) | ||
150 | offset = (_lens + 4*I) | ||
151 | movl $0xFFFFFFFF, offset(state) | ||
152 | LABEL skip_ %I | ||
153 | I = (I+1) | ||
154 | .noaltmacro | ||
155 | .endr | ||
156 | |||
157 | # Find min length | ||
158 | vmovdqu _lens+0*16(state), %xmm0 | ||
159 | vmovdqu _lens+1*16(state), %xmm1 | ||
160 | |||
161 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | ||
162 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | ||
163 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | ||
164 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | ||
165 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword | ||
166 | |||
167 | vmovd %xmm2, DWORD_idx | ||
168 | mov idx, len2 | ||
169 | and $0xF, idx | ||
170 | shr $4, len2 | ||
171 | jz len_is_0 | ||
172 | |||
173 | vpand clear_low_nibble(%rip), %xmm2, %xmm2 | ||
174 | vpshufd $0, %xmm2, %xmm2 | ||
175 | |||
176 | vpsubd %xmm2, %xmm0, %xmm0 | ||
177 | vpsubd %xmm2, %xmm1, %xmm1 | ||
178 | |||
179 | vmovdqu %xmm0, _lens+0*16(state) | ||
180 | vmovdqu %xmm1, _lens+1*16(state) | ||
181 | |||
182 | # "state" and "args" are the same address, arg1 | ||
183 | # len is arg2 | ||
184 | call sha256_x8_avx2 | ||
185 | # state and idx are intact | ||
186 | |||
187 | len_is_0: | ||
188 | # process completed job "idx" | ||
189 | imul $_LANE_DATA_size, idx, lane_data | ||
190 | lea _ldata(state, lane_data), lane_data | ||
191 | |||
192 | mov _job_in_lane(lane_data), job_rax | ||
193 | movq $0, _job_in_lane(lane_data) | ||
194 | movl $STS_COMPLETED, _status(job_rax) | ||
195 | mov _unused_lanes(state), unused_lanes | ||
196 | shl $4, unused_lanes | ||
197 | or idx, unused_lanes | ||
198 | |||
199 | mov unused_lanes, _unused_lanes(state) | ||
200 | movl $0xFFFFFFFF, _lens(state,idx,4) | ||
201 | |||
202 | vmovd _args_digest(state , idx, 4) , %xmm0 | ||
203 | vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 | ||
204 | vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 | ||
205 | vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 | ||
206 | vmovd _args_digest+4*32(state, idx, 4), %xmm1 | ||
207 | vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 | ||
208 | vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 | ||
209 | vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 | ||
210 | |||
211 | vmovdqu %xmm0, _result_digest(job_rax) | ||
212 | offset = (_result_digest + 1*16) | ||
213 | vmovdqu %xmm1, offset(job_rax) | ||
214 | |||
215 | return: | ||
216 | pop %rbx | ||
217 | FRAME_END | ||
218 | ret | ||
219 | |||
220 | return_null: | ||
221 | xor job_rax, job_rax | ||
222 | jmp return | ||
223 | ENDPROC(sha256_mb_mgr_flush_avx2) | ||
224 | |||
225 | ############################################################################## | ||
226 | |||
227 | .align 16 | ||
228 | ENTRY(sha256_mb_mgr_get_comp_job_avx2) | ||
229 | push %rbx | ||
230 | |||
231 | ## if bit 32+3 is set, then all lanes are empty | ||
232 | mov _unused_lanes(state), unused_lanes | ||
233 | bt $(32+3), unused_lanes | ||
234 | jc .return_null | ||
235 | |||
236 | # Find min length | ||
237 | vmovdqu _lens(state), %xmm0 | ||
238 | vmovdqu _lens+1*16(state), %xmm1 | ||
239 | |||
240 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | ||
241 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | ||
242 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | ||
243 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | ||
244 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword | ||
245 | |||
246 | vmovd %xmm2, DWORD_idx | ||
247 | test $~0xF, idx | ||
248 | jnz .return_null | ||
249 | |||
250 | # process completed job "idx" | ||
251 | imul $_LANE_DATA_size, idx, lane_data | ||
252 | lea _ldata(state, lane_data), lane_data | ||
253 | |||
254 | mov _job_in_lane(lane_data), job_rax | ||
255 | movq $0, _job_in_lane(lane_data) | ||
256 | movl $STS_COMPLETED, _status(job_rax) | ||
257 | mov _unused_lanes(state), unused_lanes | ||
258 | shl $4, unused_lanes | ||
259 | or idx, unused_lanes | ||
260 | mov unused_lanes, _unused_lanes(state) | ||
261 | |||
262 | movl $0xFFFFFFFF, _lens(state, idx, 4) | ||
263 | |||
264 | vmovd _args_digest(state, idx, 4), %xmm0 | ||
265 | vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 | ||
266 | vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 | ||
267 | vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 | ||
268 | vmovd _args_digest+4*32(state, idx, 4), %xmm1 | ||
269 | vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 | ||
270 | vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 | ||
271 | vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 | ||
272 | |||
273 | vmovdqu %xmm0, _result_digest(job_rax) | ||
274 | offset = (_result_digest + 1*16) | ||
275 | vmovdqu %xmm1, offset(job_rax) | ||
276 | |||
277 | pop %rbx | ||
278 | |||
279 | ret | ||
280 | |||
281 | .return_null: | ||
282 | xor job_rax, job_rax | ||
283 | pop %rbx | ||
284 | ret | ||
285 | ENDPROC(sha256_mb_mgr_get_comp_job_avx2) | ||
286 | |||
287 | .section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 | ||
288 | .align 16 | ||
289 | clear_low_nibble: | ||
290 | .octa 0x000000000000000000000000FFFFFFF0 | ||
291 | |||
292 | .section .rodata.cst8, "aM", @progbits, 8 | ||
293 | .align 8 | ||
294 | one: | ||
295 | .quad 1 | ||
296 | two: | ||
297 | .quad 2 | ||
298 | three: | ||
299 | .quad 3 | ||
300 | four: | ||
301 | .quad 4 | ||
302 | five: | ||
303 | .quad 5 | ||
304 | six: | ||
305 | .quad 6 | ||
306 | seven: | ||
307 | .quad 7 | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c deleted file mode 100644 index b0c498371e67..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c +++ /dev/null | |||
@@ -1,65 +0,0 @@ | |||
1 | /* | ||
2 | * Initialization code for multi buffer SHA256 algorithm for AVX2 | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #include "sha256_mb_mgr.h" | ||
55 | |||
56 | void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state) | ||
57 | { | ||
58 | unsigned int j; | ||
59 | |||
60 | state->unused_lanes = 0xF76543210ULL; | ||
61 | for (j = 0; j < 8; j++) { | ||
62 | state->lens[j] = 0xFFFFFFFF; | ||
63 | state->ldata[j].job_in_lane = NULL; | ||
64 | } | ||
65 | } | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S deleted file mode 100644 index b36ae7454084..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S +++ /dev/null | |||
@@ -1,214 +0,0 @@ | |||
1 | /* | ||
2 | * Buffer submit code for multi buffer SHA256 algorithm | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #include <linux/linkage.h> | ||
55 | #include <asm/frame.h> | ||
56 | #include "sha256_mb_mgr_datastruct.S" | ||
57 | |||
58 | .extern sha256_x8_avx2 | ||
59 | |||
60 | # LINUX register definitions | ||
61 | arg1 = %rdi | ||
62 | arg2 = %rsi | ||
63 | size_offset = %rcx | ||
64 | tmp2 = %rcx | ||
65 | extra_blocks = %rdx | ||
66 | |||
67 | # Common definitions | ||
68 | #define state arg1 | ||
69 | #define job %rsi | ||
70 | #define len2 arg2 | ||
71 | #define p2 arg2 | ||
72 | |||
73 | # idx must be a register not clobberred by sha1_x8_avx2 | ||
74 | idx = %r8 | ||
75 | DWORD_idx = %r8d | ||
76 | last_len = %r8 | ||
77 | |||
78 | p = %r11 | ||
79 | start_offset = %r11 | ||
80 | |||
81 | unused_lanes = %rbx | ||
82 | BYTE_unused_lanes = %bl | ||
83 | |||
84 | job_rax = %rax | ||
85 | len = %rax | ||
86 | DWORD_len = %eax | ||
87 | |||
88 | lane = %r12 | ||
89 | tmp3 = %r12 | ||
90 | |||
91 | tmp = %r9 | ||
92 | DWORD_tmp = %r9d | ||
93 | |||
94 | lane_data = %r10 | ||
95 | |||
96 | # JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job) | ||
97 | # arg 1 : rcx : state | ||
98 | # arg 2 : rdx : job | ||
99 | ENTRY(sha256_mb_mgr_submit_avx2) | ||
100 | FRAME_BEGIN | ||
101 | push %rbx | ||
102 | push %r12 | ||
103 | |||
104 | mov _unused_lanes(state), unused_lanes | ||
105 | mov unused_lanes, lane | ||
106 | and $0xF, lane | ||
107 | shr $4, unused_lanes | ||
108 | imul $_LANE_DATA_size, lane, lane_data | ||
109 | movl $STS_BEING_PROCESSED, _status(job) | ||
110 | lea _ldata(state, lane_data), lane_data | ||
111 | mov unused_lanes, _unused_lanes(state) | ||
112 | movl _len(job), DWORD_len | ||
113 | |||
114 | mov job, _job_in_lane(lane_data) | ||
115 | shl $4, len | ||
116 | or lane, len | ||
117 | |||
118 | movl DWORD_len, _lens(state , lane, 4) | ||
119 | |||
120 | # Load digest words from result_digest | ||
121 | vmovdqu _result_digest(job), %xmm0 | ||
122 | vmovdqu _result_digest+1*16(job), %xmm1 | ||
123 | vmovd %xmm0, _args_digest(state, lane, 4) | ||
124 | vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) | ||
125 | vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) | ||
126 | vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) | ||
127 | vmovd %xmm1, _args_digest+4*32(state , lane, 4) | ||
128 | |||
129 | vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4) | ||
130 | vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4) | ||
131 | vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4) | ||
132 | |||
133 | mov _buffer(job), p | ||
134 | mov p, _args_data_ptr(state, lane, 8) | ||
135 | |||
136 | cmp $0xF, unused_lanes | ||
137 | jne return_null | ||
138 | |||
139 | start_loop: | ||
140 | # Find min length | ||
141 | vmovdqa _lens(state), %xmm0 | ||
142 | vmovdqa _lens+1*16(state), %xmm1 | ||
143 | |||
144 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | ||
145 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | ||
146 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | ||
147 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | ||
148 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword | ||
149 | |||
150 | vmovd %xmm2, DWORD_idx | ||
151 | mov idx, len2 | ||
152 | and $0xF, idx | ||
153 | shr $4, len2 | ||
154 | jz len_is_0 | ||
155 | |||
156 | vpand clear_low_nibble(%rip), %xmm2, %xmm2 | ||
157 | vpshufd $0, %xmm2, %xmm2 | ||
158 | |||
159 | vpsubd %xmm2, %xmm0, %xmm0 | ||
160 | vpsubd %xmm2, %xmm1, %xmm1 | ||
161 | |||
162 | vmovdqa %xmm0, _lens + 0*16(state) | ||
163 | vmovdqa %xmm1, _lens + 1*16(state) | ||
164 | |||
165 | # "state" and "args" are the same address, arg1 | ||
166 | # len is arg2 | ||
167 | call sha256_x8_avx2 | ||
168 | |||
169 | # state and idx are intact | ||
170 | |||
171 | len_is_0: | ||
172 | # process completed job "idx" | ||
173 | imul $_LANE_DATA_size, idx, lane_data | ||
174 | lea _ldata(state, lane_data), lane_data | ||
175 | |||
176 | mov _job_in_lane(lane_data), job_rax | ||
177 | mov _unused_lanes(state), unused_lanes | ||
178 | movq $0, _job_in_lane(lane_data) | ||
179 | movl $STS_COMPLETED, _status(job_rax) | ||
180 | shl $4, unused_lanes | ||
181 | or idx, unused_lanes | ||
182 | mov unused_lanes, _unused_lanes(state) | ||
183 | |||
184 | movl $0xFFFFFFFF, _lens(state,idx,4) | ||
185 | |||
186 | vmovd _args_digest(state, idx, 4), %xmm0 | ||
187 | vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 | ||
188 | vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 | ||
189 | vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 | ||
190 | vmovd _args_digest+4*32(state, idx, 4), %xmm1 | ||
191 | |||
192 | vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1 | ||
193 | vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1 | ||
194 | vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1 | ||
195 | |||
196 | vmovdqu %xmm0, _result_digest(job_rax) | ||
197 | vmovdqu %xmm1, _result_digest+1*16(job_rax) | ||
198 | |||
199 | return: | ||
200 | pop %r12 | ||
201 | pop %rbx | ||
202 | FRAME_END | ||
203 | ret | ||
204 | |||
205 | return_null: | ||
206 | xor job_rax, job_rax | ||
207 | jmp return | ||
208 | |||
209 | ENDPROC(sha256_mb_mgr_submit_avx2) | ||
210 | |||
211 | .section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 | ||
212 | .align 16 | ||
213 | clear_low_nibble: | ||
214 | .octa 0x000000000000000000000000FFFFFFF0 | ||
diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S deleted file mode 100644 index 1687c80c5995..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S +++ /dev/null | |||
@@ -1,598 +0,0 @@ | |||
1 | /* | ||
2 | * Multi-buffer SHA256 algorithm hash compute routine | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #include <linux/linkage.h> | ||
55 | #include "sha256_mb_mgr_datastruct.S" | ||
56 | |||
57 | ## code to compute oct SHA256 using SSE-256 | ||
58 | ## outer calling routine takes care of save and restore of XMM registers | ||
59 | ## Logic designed/laid out by JDG | ||
60 | |||
61 | ## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15 | ||
62 | ## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 | ||
63 | ## Linux preserves: rdi rbp r8 | ||
64 | ## | ||
65 | ## clobbers %ymm0-15 | ||
66 | |||
67 | arg1 = %rdi | ||
68 | arg2 = %rsi | ||
69 | reg3 = %rcx | ||
70 | reg4 = %rdx | ||
71 | |||
72 | # Common definitions | ||
73 | STATE = arg1 | ||
74 | INP_SIZE = arg2 | ||
75 | |||
76 | IDX = %rax | ||
77 | ROUND = %rbx | ||
78 | TBL = reg3 | ||
79 | |||
80 | inp0 = %r9 | ||
81 | inp1 = %r10 | ||
82 | inp2 = %r11 | ||
83 | inp3 = %r12 | ||
84 | inp4 = %r13 | ||
85 | inp5 = %r14 | ||
86 | inp6 = %r15 | ||
87 | inp7 = reg4 | ||
88 | |||
89 | a = %ymm0 | ||
90 | b = %ymm1 | ||
91 | c = %ymm2 | ||
92 | d = %ymm3 | ||
93 | e = %ymm4 | ||
94 | f = %ymm5 | ||
95 | g = %ymm6 | ||
96 | h = %ymm7 | ||
97 | |||
98 | T1 = %ymm8 | ||
99 | |||
100 | a0 = %ymm12 | ||
101 | a1 = %ymm13 | ||
102 | a2 = %ymm14 | ||
103 | TMP = %ymm15 | ||
104 | TMP0 = %ymm6 | ||
105 | TMP1 = %ymm7 | ||
106 | |||
107 | TT0 = %ymm8 | ||
108 | TT1 = %ymm9 | ||
109 | TT2 = %ymm10 | ||
110 | TT3 = %ymm11 | ||
111 | TT4 = %ymm12 | ||
112 | TT5 = %ymm13 | ||
113 | TT6 = %ymm14 | ||
114 | TT7 = %ymm15 | ||
115 | |||
116 | # Define stack usage | ||
117 | |||
118 | # Assume stack aligned to 32 bytes before call | ||
119 | # Therefore FRAMESZ mod 32 must be 32-8 = 24 | ||
120 | |||
121 | #define FRAMESZ 0x388 | ||
122 | |||
123 | #define VMOVPS vmovups | ||
124 | |||
125 | # TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 | ||
126 | # "transpose" data in {r0...r7} using temps {t0...t1} | ||
127 | # Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} | ||
128 | # r0 = {a7 a6 a5 a4 a3 a2 a1 a0} | ||
129 | # r1 = {b7 b6 b5 b4 b3 b2 b1 b0} | ||
130 | # r2 = {c7 c6 c5 c4 c3 c2 c1 c0} | ||
131 | # r3 = {d7 d6 d5 d4 d3 d2 d1 d0} | ||
132 | # r4 = {e7 e6 e5 e4 e3 e2 e1 e0} | ||
133 | # r5 = {f7 f6 f5 f4 f3 f2 f1 f0} | ||
134 | # r6 = {g7 g6 g5 g4 g3 g2 g1 g0} | ||
135 | # r7 = {h7 h6 h5 h4 h3 h2 h1 h0} | ||
136 | # | ||
137 | # Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} | ||
138 | # r0 = {h0 g0 f0 e0 d0 c0 b0 a0} | ||
139 | # r1 = {h1 g1 f1 e1 d1 c1 b1 a1} | ||
140 | # r2 = {h2 g2 f2 e2 d2 c2 b2 a2} | ||
141 | # r3 = {h3 g3 f3 e3 d3 c3 b3 a3} | ||
142 | # r4 = {h4 g4 f4 e4 d4 c4 b4 a4} | ||
143 | # r5 = {h5 g5 f5 e5 d5 c5 b5 a5} | ||
144 | # r6 = {h6 g6 f6 e6 d6 c6 b6 a6} | ||
145 | # r7 = {h7 g7 f7 e7 d7 c7 b7 a7} | ||
146 | # | ||
147 | |||
148 | .macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 | ||
149 | # process top half (r0..r3) {a...d} | ||
150 | vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} | ||
151 | vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} | ||
152 | vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} | ||
153 | vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} | ||
154 | vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} | ||
155 | vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} | ||
156 | vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} | ||
157 | vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} | ||
158 | |||
159 | # use r2 in place of t0 | ||
160 | # process bottom half (r4..r7) {e...h} | ||
161 | vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} | ||
162 | vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} | ||
163 | vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} | ||
164 | vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} | ||
165 | vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} | ||
166 | vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} | ||
167 | vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} | ||
168 | vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} | ||
169 | |||
170 | vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 | ||
171 | vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 | ||
172 | vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 | ||
173 | vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 | ||
174 | vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 | ||
175 | vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 | ||
176 | vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 | ||
177 | vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 | ||
178 | |||
179 | .endm | ||
180 | |||
181 | .macro ROTATE_ARGS | ||
182 | TMP_ = h | ||
183 | h = g | ||
184 | g = f | ||
185 | f = e | ||
186 | e = d | ||
187 | d = c | ||
188 | c = b | ||
189 | b = a | ||
190 | a = TMP_ | ||
191 | .endm | ||
192 | |||
193 | .macro _PRORD reg imm tmp | ||
194 | vpslld $(32-\imm),\reg,\tmp | ||
195 | vpsrld $\imm,\reg, \reg | ||
196 | vpor \tmp,\reg, \reg | ||
197 | .endm | ||
198 | |||
199 | # PRORD_nd reg, imm, tmp, src | ||
200 | .macro _PRORD_nd reg imm tmp src | ||
201 | vpslld $(32-\imm), \src, \tmp | ||
202 | vpsrld $\imm, \src, \reg | ||
203 | vpor \tmp, \reg, \reg | ||
204 | .endm | ||
205 | |||
206 | # PRORD dst/src, amt | ||
207 | .macro PRORD reg imm | ||
208 | _PRORD \reg,\imm,TMP | ||
209 | .endm | ||
210 | |||
211 | # PRORD_nd dst, src, amt | ||
212 | .macro PRORD_nd reg tmp imm | ||
213 | _PRORD_nd \reg, \imm, TMP, \tmp | ||
214 | .endm | ||
215 | |||
216 | # arguments passed implicitly in preprocessor symbols i, a...h | ||
217 | .macro ROUND_00_15 _T1 i | ||
218 | PRORD_nd a0,e,5 # sig1: a0 = (e >> 5) | ||
219 | |||
220 | vpxor g, f, a2 # ch: a2 = f^g | ||
221 | vpand e,a2, a2 # ch: a2 = (f^g)&e | ||
222 | vpxor g, a2, a2 # a2 = ch | ||
223 | |||
224 | PRORD_nd a1,e,25 # sig1: a1 = (e >> 25) | ||
225 | |||
226 | vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp) | ||
227 | vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K | ||
228 | vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) | ||
229 | PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11) | ||
230 | vpaddd a2, h, h # h = h + ch | ||
231 | PRORD_nd a2,a,11 # sig0: a2 = (a >> 11) | ||
232 | vpaddd \_T1,h, h # h = h + ch + W + K | ||
233 | vpxor a1, a0, a0 # a0 = sigma1 | ||
234 | PRORD_nd a1,a,22 # sig0: a1 = (a >> 22) | ||
235 | vpxor c, a, \_T1 # maj: T1 = a^c | ||
236 | add $SZ8, ROUND # ROUND++ | ||
237 | vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b | ||
238 | vpaddd a0, h, h | ||
239 | vpaddd h, d, d | ||
240 | vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) | ||
241 | PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13) | ||
242 | vpxor a1, a2, a2 # a2 = sig0 | ||
243 | vpand c, a, a1 # maj: a1 = a&c | ||
244 | vpor \_T1, a1, a1 # a1 = maj | ||
245 | vpaddd a1, h, h # h = h + ch + W + K + maj | ||
246 | vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0 | ||
247 | ROTATE_ARGS | ||
248 | .endm | ||
249 | |||
250 | # arguments passed implicitly in preprocessor symbols i, a...h | ||
251 | .macro ROUND_16_XX _T1 i | ||
252 | vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1 | ||
253 | vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1 | ||
254 | vmovdqu \_T1, a0 | ||
255 | PRORD \_T1,11 | ||
256 | vmovdqu a1, a2 | ||
257 | PRORD a1,2 | ||
258 | vpxor a0, \_T1, \_T1 | ||
259 | PRORD \_T1, 7 | ||
260 | vpxor a2, a1, a1 | ||
261 | PRORD a1, 17 | ||
262 | vpsrld $3, a0, a0 | ||
263 | vpxor a0, \_T1, \_T1 | ||
264 | vpsrld $10, a2, a2 | ||
265 | vpxor a2, a1, a1 | ||
266 | vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1 | ||
267 | vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1 | ||
268 | vpaddd a1, \_T1, \_T1 | ||
269 | |||
270 | ROUND_00_15 \_T1,\i | ||
271 | .endm | ||
272 | |||
273 | # SHA256_ARGS: | ||
274 | # UINT128 digest[8]; // transposed digests | ||
275 | # UINT8 *data_ptr[4]; | ||
276 | |||
277 | # void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes); | ||
278 | # arg 1 : STATE : pointer to array of pointers to input data | ||
279 | # arg 2 : INP_SIZE : size of input in blocks | ||
280 | # general registers preserved in outer calling routine | ||
281 | # outer calling routine saves all the XMM registers | ||
282 | # save rsp, allocate 32-byte aligned for local variables | ||
283 | ENTRY(sha256_x8_avx2) | ||
284 | |||
285 | # save callee-saved clobbered registers to comply with C function ABI | ||
286 | push %r12 | ||
287 | push %r13 | ||
288 | push %r14 | ||
289 | push %r15 | ||
290 | |||
291 | mov %rsp, IDX | ||
292 | sub $FRAMESZ, %rsp | ||
293 | and $~0x1F, %rsp | ||
294 | mov IDX, _rsp(%rsp) | ||
295 | |||
296 | # Load the pre-transposed incoming digest. | ||
297 | vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a | ||
298 | vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b | ||
299 | vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c | ||
300 | vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d | ||
301 | vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e | ||
302 | vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f | ||
303 | vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g | ||
304 | vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h | ||
305 | |||
306 | lea K256_8(%rip),TBL | ||
307 | |||
308 | # load the address of each of the 4 message lanes | ||
309 | # getting ready to transpose input onto stack | ||
310 | mov _args_data_ptr+0*PTR_SZ(STATE),inp0 | ||
311 | mov _args_data_ptr+1*PTR_SZ(STATE),inp1 | ||
312 | mov _args_data_ptr+2*PTR_SZ(STATE),inp2 | ||
313 | mov _args_data_ptr+3*PTR_SZ(STATE),inp3 | ||
314 | mov _args_data_ptr+4*PTR_SZ(STATE),inp4 | ||
315 | mov _args_data_ptr+5*PTR_SZ(STATE),inp5 | ||
316 | mov _args_data_ptr+6*PTR_SZ(STATE),inp6 | ||
317 | mov _args_data_ptr+7*PTR_SZ(STATE),inp7 | ||
318 | |||
319 | xor IDX, IDX | ||
320 | lloop: | ||
321 | xor ROUND, ROUND | ||
322 | |||
323 | # save old digest | ||
324 | vmovdqu a, _digest(%rsp) | ||
325 | vmovdqu b, _digest+1*SZ8(%rsp) | ||
326 | vmovdqu c, _digest+2*SZ8(%rsp) | ||
327 | vmovdqu d, _digest+3*SZ8(%rsp) | ||
328 | vmovdqu e, _digest+4*SZ8(%rsp) | ||
329 | vmovdqu f, _digest+5*SZ8(%rsp) | ||
330 | vmovdqu g, _digest+6*SZ8(%rsp) | ||
331 | vmovdqu h, _digest+7*SZ8(%rsp) | ||
332 | i = 0 | ||
333 | .rep 2 | ||
334 | VMOVPS i*32(inp0, IDX), TT0 | ||
335 | VMOVPS i*32(inp1, IDX), TT1 | ||
336 | VMOVPS i*32(inp2, IDX), TT2 | ||
337 | VMOVPS i*32(inp3, IDX), TT3 | ||
338 | VMOVPS i*32(inp4, IDX), TT4 | ||
339 | VMOVPS i*32(inp5, IDX), TT5 | ||
340 | VMOVPS i*32(inp6, IDX), TT6 | ||
341 | VMOVPS i*32(inp7, IDX), TT7 | ||
342 | vmovdqu g, _ytmp(%rsp) | ||
343 | vmovdqu h, _ytmp+1*SZ8(%rsp) | ||
344 | TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1 | ||
345 | vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1 | ||
346 | vmovdqu _ytmp(%rsp), g | ||
347 | vpshufb TMP1, TT0, TT0 | ||
348 | vpshufb TMP1, TT1, TT1 | ||
349 | vpshufb TMP1, TT2, TT2 | ||
350 | vpshufb TMP1, TT3, TT3 | ||
351 | vpshufb TMP1, TT4, TT4 | ||
352 | vpshufb TMP1, TT5, TT5 | ||
353 | vpshufb TMP1, TT6, TT6 | ||
354 | vpshufb TMP1, TT7, TT7 | ||
355 | vmovdqu _ytmp+1*SZ8(%rsp), h | ||
356 | vmovdqu TT4, _ytmp(%rsp) | ||
357 | vmovdqu TT5, _ytmp+1*SZ8(%rsp) | ||
358 | vmovdqu TT6, _ytmp+2*SZ8(%rsp) | ||
359 | vmovdqu TT7, _ytmp+3*SZ8(%rsp) | ||
360 | ROUND_00_15 TT0,(i*8+0) | ||
361 | vmovdqu _ytmp(%rsp), TT0 | ||
362 | ROUND_00_15 TT1,(i*8+1) | ||
363 | vmovdqu _ytmp+1*SZ8(%rsp), TT1 | ||
364 | ROUND_00_15 TT2,(i*8+2) | ||
365 | vmovdqu _ytmp+2*SZ8(%rsp), TT2 | ||
366 | ROUND_00_15 TT3,(i*8+3) | ||
367 | vmovdqu _ytmp+3*SZ8(%rsp), TT3 | ||
368 | ROUND_00_15 TT0,(i*8+4) | ||
369 | ROUND_00_15 TT1,(i*8+5) | ||
370 | ROUND_00_15 TT2,(i*8+6) | ||
371 | ROUND_00_15 TT3,(i*8+7) | ||
372 | i = (i+1) | ||
373 | .endr | ||
374 | add $64, IDX | ||
375 | i = (i*8) | ||
376 | |||
377 | jmp Lrounds_16_xx | ||
378 | .align 16 | ||
379 | Lrounds_16_xx: | ||
380 | .rep 16 | ||
381 | ROUND_16_XX T1, i | ||
382 | i = (i+1) | ||
383 | .endr | ||
384 | |||
385 | cmp $ROUNDS,ROUND | ||
386 | jb Lrounds_16_xx | ||
387 | |||
388 | # add old digest | ||
389 | vpaddd _digest+0*SZ8(%rsp), a, a | ||
390 | vpaddd _digest+1*SZ8(%rsp), b, b | ||
391 | vpaddd _digest+2*SZ8(%rsp), c, c | ||
392 | vpaddd _digest+3*SZ8(%rsp), d, d | ||
393 | vpaddd _digest+4*SZ8(%rsp), e, e | ||
394 | vpaddd _digest+5*SZ8(%rsp), f, f | ||
395 | vpaddd _digest+6*SZ8(%rsp), g, g | ||
396 | vpaddd _digest+7*SZ8(%rsp), h, h | ||
397 | |||
398 | sub $1, INP_SIZE # unit is blocks | ||
399 | jne lloop | ||
400 | |||
401 | # write back to memory (state object) the transposed digest | ||
402 | vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE) | ||
403 | vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE) | ||
404 | vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE) | ||
405 | vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE) | ||
406 | vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE) | ||
407 | vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE) | ||
408 | vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE) | ||
409 | vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE) | ||
410 | |||
411 | # update input pointers | ||
412 | add IDX, inp0 | ||
413 | mov inp0, _args_data_ptr+0*8(STATE) | ||
414 | add IDX, inp1 | ||
415 | mov inp1, _args_data_ptr+1*8(STATE) | ||
416 | add IDX, inp2 | ||
417 | mov inp2, _args_data_ptr+2*8(STATE) | ||
418 | add IDX, inp3 | ||
419 | mov inp3, _args_data_ptr+3*8(STATE) | ||
420 | add IDX, inp4 | ||
421 | mov inp4, _args_data_ptr+4*8(STATE) | ||
422 | add IDX, inp5 | ||
423 | mov inp5, _args_data_ptr+5*8(STATE) | ||
424 | add IDX, inp6 | ||
425 | mov inp6, _args_data_ptr+6*8(STATE) | ||
426 | add IDX, inp7 | ||
427 | mov inp7, _args_data_ptr+7*8(STATE) | ||
428 | |||
429 | # Postamble | ||
430 | mov _rsp(%rsp), %rsp | ||
431 | |||
432 | # restore callee-saved clobbered registers | ||
433 | pop %r15 | ||
434 | pop %r14 | ||
435 | pop %r13 | ||
436 | pop %r12 | ||
437 | |||
438 | ret | ||
439 | ENDPROC(sha256_x8_avx2) | ||
440 | |||
441 | .section .rodata.K256_8, "a", @progbits | ||
442 | .align 64 | ||
443 | K256_8: | ||
444 | .octa 0x428a2f98428a2f98428a2f98428a2f98 | ||
445 | .octa 0x428a2f98428a2f98428a2f98428a2f98 | ||
446 | .octa 0x71374491713744917137449171374491 | ||
447 | .octa 0x71374491713744917137449171374491 | ||
448 | .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf | ||
449 | .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf | ||
450 | .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 | ||
451 | .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 | ||
452 | .octa 0x3956c25b3956c25b3956c25b3956c25b | ||
453 | .octa 0x3956c25b3956c25b3956c25b3956c25b | ||
454 | .octa 0x59f111f159f111f159f111f159f111f1 | ||
455 | .octa 0x59f111f159f111f159f111f159f111f1 | ||
456 | .octa 0x923f82a4923f82a4923f82a4923f82a4 | ||
457 | .octa 0x923f82a4923f82a4923f82a4923f82a4 | ||
458 | .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 | ||
459 | .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 | ||
460 | .octa 0xd807aa98d807aa98d807aa98d807aa98 | ||
461 | .octa 0xd807aa98d807aa98d807aa98d807aa98 | ||
462 | .octa 0x12835b0112835b0112835b0112835b01 | ||
463 | .octa 0x12835b0112835b0112835b0112835b01 | ||
464 | .octa 0x243185be243185be243185be243185be | ||
465 | .octa 0x243185be243185be243185be243185be | ||
466 | .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 | ||
467 | .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 | ||
468 | .octa 0x72be5d7472be5d7472be5d7472be5d74 | ||
469 | .octa 0x72be5d7472be5d7472be5d7472be5d74 | ||
470 | .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe | ||
471 | .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe | ||
472 | .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 | ||
473 | .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 | ||
474 | .octa 0xc19bf174c19bf174c19bf174c19bf174 | ||
475 | .octa 0xc19bf174c19bf174c19bf174c19bf174 | ||
476 | .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 | ||
477 | .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 | ||
478 | .octa 0xefbe4786efbe4786efbe4786efbe4786 | ||
479 | .octa 0xefbe4786efbe4786efbe4786efbe4786 | ||
480 | .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 | ||
481 | .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 | ||
482 | .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc | ||
483 | .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc | ||
484 | .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f | ||
485 | .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f | ||
486 | .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa | ||
487 | .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa | ||
488 | .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc | ||
489 | .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc | ||
490 | .octa 0x76f988da76f988da76f988da76f988da | ||
491 | .octa 0x76f988da76f988da76f988da76f988da | ||
492 | .octa 0x983e5152983e5152983e5152983e5152 | ||
493 | .octa 0x983e5152983e5152983e5152983e5152 | ||
494 | .octa 0xa831c66da831c66da831c66da831c66d | ||
495 | .octa 0xa831c66da831c66da831c66da831c66d | ||
496 | .octa 0xb00327c8b00327c8b00327c8b00327c8 | ||
497 | .octa 0xb00327c8b00327c8b00327c8b00327c8 | ||
498 | .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 | ||
499 | .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 | ||
500 | .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 | ||
501 | .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 | ||
502 | .octa 0xd5a79147d5a79147d5a79147d5a79147 | ||
503 | .octa 0xd5a79147d5a79147d5a79147d5a79147 | ||
504 | .octa 0x06ca635106ca635106ca635106ca6351 | ||
505 | .octa 0x06ca635106ca635106ca635106ca6351 | ||
506 | .octa 0x14292967142929671429296714292967 | ||
507 | .octa 0x14292967142929671429296714292967 | ||
508 | .octa 0x27b70a8527b70a8527b70a8527b70a85 | ||
509 | .octa 0x27b70a8527b70a8527b70a8527b70a85 | ||
510 | .octa 0x2e1b21382e1b21382e1b21382e1b2138 | ||
511 | .octa 0x2e1b21382e1b21382e1b21382e1b2138 | ||
512 | .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc | ||
513 | .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc | ||
514 | .octa 0x53380d1353380d1353380d1353380d13 | ||
515 | .octa 0x53380d1353380d1353380d1353380d13 | ||
516 | .octa 0x650a7354650a7354650a7354650a7354 | ||
517 | .octa 0x650a7354650a7354650a7354650a7354 | ||
518 | .octa 0x766a0abb766a0abb766a0abb766a0abb | ||
519 | .octa 0x766a0abb766a0abb766a0abb766a0abb | ||
520 | .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e | ||
521 | .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e | ||
522 | .octa 0x92722c8592722c8592722c8592722c85 | ||
523 | .octa 0x92722c8592722c8592722c8592722c85 | ||
524 | .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 | ||
525 | .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 | ||
526 | .octa 0xa81a664ba81a664ba81a664ba81a664b | ||
527 | .octa 0xa81a664ba81a664ba81a664ba81a664b | ||
528 | .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 | ||
529 | .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 | ||
530 | .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 | ||
531 | .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 | ||
532 | .octa 0xd192e819d192e819d192e819d192e819 | ||
533 | .octa 0xd192e819d192e819d192e819d192e819 | ||
534 | .octa 0xd6990624d6990624d6990624d6990624 | ||
535 | .octa 0xd6990624d6990624d6990624d6990624 | ||
536 | .octa 0xf40e3585f40e3585f40e3585f40e3585 | ||
537 | .octa 0xf40e3585f40e3585f40e3585f40e3585 | ||
538 | .octa 0x106aa070106aa070106aa070106aa070 | ||
539 | .octa 0x106aa070106aa070106aa070106aa070 | ||
540 | .octa 0x19a4c11619a4c11619a4c11619a4c116 | ||
541 | .octa 0x19a4c11619a4c11619a4c11619a4c116 | ||
542 | .octa 0x1e376c081e376c081e376c081e376c08 | ||
543 | .octa 0x1e376c081e376c081e376c081e376c08 | ||
544 | .octa 0x2748774c2748774c2748774c2748774c | ||
545 | .octa 0x2748774c2748774c2748774c2748774c | ||
546 | .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 | ||
547 | .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 | ||
548 | .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 | ||
549 | .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 | ||
550 | .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a | ||
551 | .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a | ||
552 | .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f | ||
553 | .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f | ||
554 | .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 | ||
555 | .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 | ||
556 | .octa 0x748f82ee748f82ee748f82ee748f82ee | ||
557 | .octa 0x748f82ee748f82ee748f82ee748f82ee | ||
558 | .octa 0x78a5636f78a5636f78a5636f78a5636f | ||
559 | .octa 0x78a5636f78a5636f78a5636f78a5636f | ||
560 | .octa 0x84c8781484c8781484c8781484c87814 | ||
561 | .octa 0x84c8781484c8781484c8781484c87814 | ||
562 | .octa 0x8cc702088cc702088cc702088cc70208 | ||
563 | .octa 0x8cc702088cc702088cc702088cc70208 | ||
564 | .octa 0x90befffa90befffa90befffa90befffa | ||
565 | .octa 0x90befffa90befffa90befffa90befffa | ||
566 | .octa 0xa4506ceba4506ceba4506ceba4506ceb | ||
567 | .octa 0xa4506ceba4506ceba4506ceba4506ceb | ||
568 | .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 | ||
569 | .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 | ||
570 | .octa 0xc67178f2c67178f2c67178f2c67178f2 | ||
571 | .octa 0xc67178f2c67178f2c67178f2c67178f2 | ||
572 | |||
573 | .section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 | ||
574 | .align 32 | ||
575 | PSHUFFLE_BYTE_FLIP_MASK: | ||
576 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||
577 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||
578 | |||
579 | .section .rodata.cst256.K256, "aM", @progbits, 256 | ||
580 | .align 64 | ||
581 | .global K256 | ||
582 | K256: | ||
583 | .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
584 | .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
585 | .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
586 | .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
587 | .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
588 | .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
589 | .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
590 | .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
591 | .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
592 | .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
593 | .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
594 | .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
595 | .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
596 | .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
597 | .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
598 | .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile deleted file mode 100644 index 90f1ef69152e..000000000000 --- a/arch/x86/crypto/sha512-mb/Makefile +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | ||
2 | # | ||
3 | # Arch-specific CryptoAPI modules. | ||
4 | # | ||
5 | |||
6 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ | ||
7 | $(comma)4)$(comma)%ymm2,yes,no) | ||
8 | ifeq ($(avx2_supported),yes) | ||
9 | obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o | ||
10 | sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \ | ||
11 | sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o | ||
12 | endif | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c deleted file mode 100644 index 26b85678012d..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ /dev/null | |||
@@ -1,1047 +0,0 @@ | |||
1 | /* | ||
2 | * Multi buffer SHA512 algorithm Glue Code | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
55 | |||
56 | #include <crypto/internal/hash.h> | ||
57 | #include <linux/init.h> | ||
58 | #include <linux/module.h> | ||
59 | #include <linux/mm.h> | ||
60 | #include <linux/cryptohash.h> | ||
61 | #include <linux/types.h> | ||
62 | #include <linux/list.h> | ||
63 | #include <crypto/scatterwalk.h> | ||
64 | #include <crypto/sha.h> | ||
65 | #include <crypto/mcryptd.h> | ||
66 | #include <crypto/crypto_wq.h> | ||
67 | #include <asm/byteorder.h> | ||
68 | #include <linux/hardirq.h> | ||
69 | #include <asm/fpu/api.h> | ||
70 | #include "sha512_mb_ctx.h" | ||
71 | |||
72 | #define FLUSH_INTERVAL 1000 /* in usec */ | ||
73 | |||
74 | static struct mcryptd_alg_state sha512_mb_alg_state; | ||
75 | |||
76 | struct sha512_mb_ctx { | ||
77 | struct mcryptd_ahash *mcryptd_tfm; | ||
78 | }; | ||
79 | |||
80 | static inline struct mcryptd_hash_request_ctx | ||
81 | *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx) | ||
82 | { | ||
83 | struct ahash_request *areq; | ||
84 | |||
85 | areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); | ||
86 | return container_of(areq, struct mcryptd_hash_request_ctx, areq); | ||
87 | } | ||
88 | |||
89 | static inline struct ahash_request | ||
90 | *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) | ||
91 | { | ||
92 | return container_of((void *) ctx, struct ahash_request, __ctx); | ||
93 | } | ||
94 | |||
95 | static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, | ||
96 | struct ahash_request *areq) | ||
97 | { | ||
98 | rctx->flag = HASH_UPDATE; | ||
99 | } | ||
100 | |||
101 | static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state); | ||
102 | static asmlinkage struct job_sha512* (*sha512_job_mgr_submit) | ||
103 | (struct sha512_mb_mgr *state, | ||
104 | struct job_sha512 *job); | ||
105 | static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) | ||
106 | (struct sha512_mb_mgr *state); | ||
107 | static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) | ||
108 | (struct sha512_mb_mgr *state); | ||
109 | |||
110 | inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], | ||
111 | uint64_t total_len) | ||
112 | { | ||
113 | uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1); | ||
114 | |||
115 | memset(&padblock[i], 0, SHA512_BLOCK_SIZE); | ||
116 | padblock[i] = 0x80; | ||
117 | |||
118 | i += ((SHA512_BLOCK_SIZE - 1) & | ||
119 | (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1))) | ||
120 | + 1 + SHA512_PADLENGTHFIELD_SIZE; | ||
121 | |||
122 | #if SHA512_PADLENGTHFIELD_SIZE == 16 | ||
123 | *((uint64_t *) &padblock[i - 16]) = 0; | ||
124 | #endif | ||
125 | |||
126 | *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); | ||
127 | |||
128 | /* Number of extra blocks to hash */ | ||
129 | return i >> SHA512_LOG2_BLOCK_SIZE; | ||
130 | } | ||
131 | |||
132 | static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit | ||
133 | (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx) | ||
134 | { | ||
135 | while (ctx) { | ||
136 | if (ctx->status & HASH_CTX_STS_COMPLETE) { | ||
137 | /* Clear PROCESSING bit */ | ||
138 | ctx->status = HASH_CTX_STS_COMPLETE; | ||
139 | return ctx; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * If the extra blocks are empty, begin hashing what remains | ||
144 | * in the user's buffer. | ||
145 | */ | ||
146 | if (ctx->partial_block_buffer_length == 0 && | ||
147 | ctx->incoming_buffer_length) { | ||
148 | |||
149 | const void *buffer = ctx->incoming_buffer; | ||
150 | uint32_t len = ctx->incoming_buffer_length; | ||
151 | uint32_t copy_len; | ||
152 | |||
153 | /* | ||
154 | * Only entire blocks can be hashed. | ||
155 | * Copy remainder to extra blocks buffer. | ||
156 | */ | ||
157 | copy_len = len & (SHA512_BLOCK_SIZE-1); | ||
158 | |||
159 | if (copy_len) { | ||
160 | len -= copy_len; | ||
161 | memcpy(ctx->partial_block_buffer, | ||
162 | ((const char *) buffer + len), | ||
163 | copy_len); | ||
164 | ctx->partial_block_buffer_length = copy_len; | ||
165 | } | ||
166 | |||
167 | ctx->incoming_buffer_length = 0; | ||
168 | |||
169 | /* len should be a multiple of the block size now */ | ||
170 | assert((len % SHA512_BLOCK_SIZE) == 0); | ||
171 | |||
172 | /* Set len to the number of blocks to be hashed */ | ||
173 | len >>= SHA512_LOG2_BLOCK_SIZE; | ||
174 | |||
175 | if (len) { | ||
176 | |||
177 | ctx->job.buffer = (uint8_t *) buffer; | ||
178 | ctx->job.len = len; | ||
179 | ctx = (struct sha512_hash_ctx *) | ||
180 | sha512_job_mgr_submit(&mgr->mgr, | ||
181 | &ctx->job); | ||
182 | continue; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * If the extra blocks are not empty, then we are | ||
188 | * either on the last block(s) or we need more | ||
189 | * user input before continuing. | ||
190 | */ | ||
191 | if (ctx->status & HASH_CTX_STS_LAST) { | ||
192 | |||
193 | uint8_t *buf = ctx->partial_block_buffer; | ||
194 | uint32_t n_extra_blocks = | ||
195 | sha512_pad(buf, ctx->total_length); | ||
196 | |||
197 | ctx->status = (HASH_CTX_STS_PROCESSING | | ||
198 | HASH_CTX_STS_COMPLETE); | ||
199 | ctx->job.buffer = buf; | ||
200 | ctx->job.len = (uint32_t) n_extra_blocks; | ||
201 | ctx = (struct sha512_hash_ctx *) | ||
202 | sha512_job_mgr_submit(&mgr->mgr, &ctx->job); | ||
203 | continue; | ||
204 | } | ||
205 | |||
206 | if (ctx) | ||
207 | ctx->status = HASH_CTX_STS_IDLE; | ||
208 | return ctx; | ||
209 | } | ||
210 | |||
211 | return NULL; | ||
212 | } | ||
213 | |||
214 | static struct sha512_hash_ctx | ||
215 | *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate) | ||
216 | { | ||
217 | /* | ||
218 | * If get_comp_job returns NULL, there are no jobs complete. | ||
219 | * If get_comp_job returns a job, verify that it is safe to return to | ||
220 | * the user. | ||
221 | * If it is not ready, resubmit the job to finish processing. | ||
222 | * If sha512_ctx_mgr_resubmit returned a job, it is ready to be | ||
223 | * returned. | ||
224 | * Otherwise, all jobs currently being managed by the hash_ctx_mgr | ||
225 | * still need processing. | ||
226 | */ | ||
227 | struct sha512_ctx_mgr *mgr; | ||
228 | struct sha512_hash_ctx *ctx; | ||
229 | unsigned long flags; | ||
230 | |||
231 | mgr = cstate->mgr; | ||
232 | spin_lock_irqsave(&cstate->work_lock, flags); | ||
233 | ctx = (struct sha512_hash_ctx *) | ||
234 | sha512_job_mgr_get_comp_job(&mgr->mgr); | ||
235 | ctx = sha512_ctx_mgr_resubmit(mgr, ctx); | ||
236 | spin_unlock_irqrestore(&cstate->work_lock, flags); | ||
237 | return ctx; | ||
238 | } | ||
239 | |||
240 | static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) | ||
241 | { | ||
242 | sha512_job_mgr_init(&mgr->mgr); | ||
243 | } | ||
244 | |||
245 | static struct sha512_hash_ctx | ||
246 | *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate, | ||
247 | struct sha512_hash_ctx *ctx, | ||
248 | const void *buffer, | ||
249 | uint32_t len, | ||
250 | int flags) | ||
251 | { | ||
252 | struct sha512_ctx_mgr *mgr; | ||
253 | unsigned long irqflags; | ||
254 | |||
255 | mgr = cstate->mgr; | ||
256 | spin_lock_irqsave(&cstate->work_lock, irqflags); | ||
257 | if (flags & ~(HASH_UPDATE | HASH_LAST)) { | ||
258 | /* User should not pass anything other than UPDATE or LAST */ | ||
259 | ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; | ||
260 | goto unlock; | ||
261 | } | ||
262 | |||
263 | if (ctx->status & HASH_CTX_STS_PROCESSING) { | ||
264 | /* Cannot submit to a currently processing job. */ | ||
265 | ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; | ||
266 | goto unlock; | ||
267 | } | ||
268 | |||
269 | if (ctx->status & HASH_CTX_STS_COMPLETE) { | ||
270 | /* Cannot update a finished job. */ | ||
271 | ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; | ||
272 | goto unlock; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * If we made it here, there were no errors during this call to | ||
277 | * submit | ||
278 | */ | ||
279 | ctx->error = HASH_CTX_ERROR_NONE; | ||
280 | |||
281 | /* Store buffer ptr info from user */ | ||
282 | ctx->incoming_buffer = buffer; | ||
283 | ctx->incoming_buffer_length = len; | ||
284 | |||
285 | /* | ||
286 | * Store the user's request flags and mark this ctx as currently being | ||
287 | * processed. | ||
288 | */ | ||
289 | ctx->status = (flags & HASH_LAST) ? | ||
290 | (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : | ||
291 | HASH_CTX_STS_PROCESSING; | ||
292 | |||
293 | /* Advance byte counter */ | ||
294 | ctx->total_length += len; | ||
295 | |||
296 | /* | ||
297 | * If there is anything currently buffered in the extra blocks, | ||
298 | * append to it until it contains a whole block. | ||
299 | * Or if the user's buffer contains less than a whole block, | ||
300 | * append as much as possible to the extra block. | ||
301 | */ | ||
302 | if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) { | ||
303 | /* Compute how many bytes to copy from user buffer into extra | ||
304 | * block | ||
305 | */ | ||
306 | uint32_t copy_len = SHA512_BLOCK_SIZE - | ||
307 | ctx->partial_block_buffer_length; | ||
308 | if (len < copy_len) | ||
309 | copy_len = len; | ||
310 | |||
311 | if (copy_len) { | ||
312 | /* Copy and update relevant pointers and counters */ | ||
313 | memcpy | ||
314 | (&ctx->partial_block_buffer[ctx->partial_block_buffer_length], | ||
315 | buffer, copy_len); | ||
316 | |||
317 | ctx->partial_block_buffer_length += copy_len; | ||
318 | ctx->incoming_buffer = (const void *) | ||
319 | ((const char *)buffer + copy_len); | ||
320 | ctx->incoming_buffer_length = len - copy_len; | ||
321 | } | ||
322 | |||
323 | /* The extra block should never contain more than 1 block | ||
324 | * here | ||
325 | */ | ||
326 | assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE); | ||
327 | |||
328 | /* If the extra block buffer contains exactly 1 block, it can | ||
329 | * be hashed. | ||
330 | */ | ||
331 | if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) { | ||
332 | ctx->partial_block_buffer_length = 0; | ||
333 | |||
334 | ctx->job.buffer = ctx->partial_block_buffer; | ||
335 | ctx->job.len = 1; | ||
336 | ctx = (struct sha512_hash_ctx *) | ||
337 | sha512_job_mgr_submit(&mgr->mgr, &ctx->job); | ||
338 | } | ||
339 | } | ||
340 | |||
341 | ctx = sha512_ctx_mgr_resubmit(mgr, ctx); | ||
342 | unlock: | ||
343 | spin_unlock_irqrestore(&cstate->work_lock, irqflags); | ||
344 | return ctx; | ||
345 | } | ||
346 | |||
347 | static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate) | ||
348 | { | ||
349 | struct sha512_ctx_mgr *mgr; | ||
350 | struct sha512_hash_ctx *ctx; | ||
351 | unsigned long flags; | ||
352 | |||
353 | mgr = cstate->mgr; | ||
354 | spin_lock_irqsave(&cstate->work_lock, flags); | ||
355 | while (1) { | ||
356 | ctx = (struct sha512_hash_ctx *) | ||
357 | sha512_job_mgr_flush(&mgr->mgr); | ||
358 | |||
359 | /* If flush returned 0, there are no more jobs in flight. */ | ||
360 | if (!ctx) | ||
361 | break; | ||
362 | |||
363 | /* | ||
364 | * If flush returned a job, resubmit the job to finish | ||
365 | * processing. | ||
366 | */ | ||
367 | ctx = sha512_ctx_mgr_resubmit(mgr, ctx); | ||
368 | |||
369 | /* | ||
370 | * If sha512_ctx_mgr_resubmit returned a job, it is ready to | ||
371 | * be returned. Otherwise, all jobs currently being managed by | ||
372 | * the sha512_ctx_mgr still need processing. Loop. | ||
373 | */ | ||
374 | if (ctx) | ||
375 | break; | ||
376 | } | ||
377 | spin_unlock_irqrestore(&cstate->work_lock, flags); | ||
378 | return ctx; | ||
379 | } | ||
380 | |||
381 | static int sha512_mb_init(struct ahash_request *areq) | ||
382 | { | ||
383 | struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); | ||
384 | |||
385 | hash_ctx_init(sctx); | ||
386 | sctx->job.result_digest[0] = SHA512_H0; | ||
387 | sctx->job.result_digest[1] = SHA512_H1; | ||
388 | sctx->job.result_digest[2] = SHA512_H2; | ||
389 | sctx->job.result_digest[3] = SHA512_H3; | ||
390 | sctx->job.result_digest[4] = SHA512_H4; | ||
391 | sctx->job.result_digest[5] = SHA512_H5; | ||
392 | sctx->job.result_digest[6] = SHA512_H6; | ||
393 | sctx->job.result_digest[7] = SHA512_H7; | ||
394 | sctx->total_length = 0; | ||
395 | sctx->partial_block_buffer_length = 0; | ||
396 | sctx->status = HASH_CTX_STS_IDLE; | ||
397 | |||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx) | ||
402 | { | ||
403 | int i; | ||
404 | struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); | ||
405 | __be64 *dst = (__be64 *) rctx->out; | ||
406 | |||
407 | for (i = 0; i < 8; ++i) | ||
408 | dst[i] = cpu_to_be64(sctx->job.result_digest[i]); | ||
409 | |||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, | ||
414 | struct mcryptd_alg_cstate *cstate, bool flush) | ||
415 | { | ||
416 | int flag = HASH_UPDATE; | ||
417 | int nbytes, err = 0; | ||
418 | struct mcryptd_hash_request_ctx *rctx = *ret_rctx; | ||
419 | struct sha512_hash_ctx *sha_ctx; | ||
420 | |||
421 | /* more work ? */ | ||
422 | while (!(rctx->flag & HASH_DONE)) { | ||
423 | nbytes = crypto_ahash_walk_done(&rctx->walk, 0); | ||
424 | if (nbytes < 0) { | ||
425 | err = nbytes; | ||
426 | goto out; | ||
427 | } | ||
428 | /* check if the walk is done */ | ||
429 | if (crypto_ahash_walk_last(&rctx->walk)) { | ||
430 | rctx->flag |= HASH_DONE; | ||
431 | if (rctx->flag & HASH_FINAL) | ||
432 | flag |= HASH_LAST; | ||
433 | |||
434 | } | ||
435 | sha_ctx = (struct sha512_hash_ctx *) | ||
436 | ahash_request_ctx(&rctx->areq); | ||
437 | kernel_fpu_begin(); | ||
438 | sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, | ||
439 | rctx->walk.data, nbytes, flag); | ||
440 | if (!sha_ctx) { | ||
441 | if (flush) | ||
442 | sha_ctx = sha512_ctx_mgr_flush(cstate); | ||
443 | } | ||
444 | kernel_fpu_end(); | ||
445 | if (sha_ctx) | ||
446 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
447 | else { | ||
448 | rctx = NULL; | ||
449 | goto out; | ||
450 | } | ||
451 | } | ||
452 | |||
453 | /* copy the results */ | ||
454 | if (rctx->flag & HASH_FINAL) | ||
455 | sha512_mb_set_results(rctx); | ||
456 | |||
457 | out: | ||
458 | *ret_rctx = rctx; | ||
459 | return err; | ||
460 | } | ||
461 | |||
462 | static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, | ||
463 | struct mcryptd_alg_cstate *cstate, | ||
464 | int err) | ||
465 | { | ||
466 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
467 | struct sha512_hash_ctx *sha_ctx; | ||
468 | struct mcryptd_hash_request_ctx *req_ctx; | ||
469 | int ret; | ||
470 | unsigned long flags; | ||
471 | |||
472 | /* remove from work list */ | ||
473 | spin_lock_irqsave(&cstate->work_lock, flags); | ||
474 | list_del(&rctx->waiter); | ||
475 | spin_unlock_irqrestore(&cstate->work_lock, flags); | ||
476 | |||
477 | if (irqs_disabled()) | ||
478 | rctx->complete(&req->base, err); | ||
479 | else { | ||
480 | local_bh_disable(); | ||
481 | rctx->complete(&req->base, err); | ||
482 | local_bh_enable(); | ||
483 | } | ||
484 | |||
485 | /* check to see if there are other jobs that are done */ | ||
486 | sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); | ||
487 | while (sha_ctx) { | ||
488 | req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
489 | ret = sha_finish_walk(&req_ctx, cstate, false); | ||
490 | if (req_ctx) { | ||
491 | spin_lock_irqsave(&cstate->work_lock, flags); | ||
492 | list_del(&req_ctx->waiter); | ||
493 | spin_unlock_irqrestore(&cstate->work_lock, flags); | ||
494 | |||
495 | req = cast_mcryptd_ctx_to_req(req_ctx); | ||
496 | if (irqs_disabled()) | ||
497 | req_ctx->complete(&req->base, ret); | ||
498 | else { | ||
499 | local_bh_disable(); | ||
500 | req_ctx->complete(&req->base, ret); | ||
501 | local_bh_enable(); | ||
502 | } | ||
503 | } | ||
504 | sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); | ||
505 | } | ||
506 | |||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, | ||
511 | struct mcryptd_alg_cstate *cstate) | ||
512 | { | ||
513 | unsigned long next_flush; | ||
514 | unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); | ||
515 | unsigned long flags; | ||
516 | |||
517 | /* initialize tag */ | ||
518 | rctx->tag.arrival = jiffies; /* tag the arrival time */ | ||
519 | rctx->tag.seq_num = cstate->next_seq_num++; | ||
520 | next_flush = rctx->tag.arrival + delay; | ||
521 | rctx->tag.expire = next_flush; | ||
522 | |||
523 | spin_lock_irqsave(&cstate->work_lock, flags); | ||
524 | list_add_tail(&rctx->waiter, &cstate->work_list); | ||
525 | spin_unlock_irqrestore(&cstate->work_lock, flags); | ||
526 | |||
527 | mcryptd_arm_flusher(cstate, delay); | ||
528 | } | ||
529 | |||
530 | static int sha512_mb_update(struct ahash_request *areq) | ||
531 | { | ||
532 | struct mcryptd_hash_request_ctx *rctx = | ||
533 | container_of(areq, struct mcryptd_hash_request_ctx, | ||
534 | areq); | ||
535 | struct mcryptd_alg_cstate *cstate = | ||
536 | this_cpu_ptr(sha512_mb_alg_state.alg_cstate); | ||
537 | |||
538 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
539 | struct sha512_hash_ctx *sha_ctx; | ||
540 | int ret = 0, nbytes; | ||
541 | |||
542 | |||
543 | /* sanity check */ | ||
544 | if (rctx->tag.cpu != smp_processor_id()) { | ||
545 | pr_err("mcryptd error: cpu clash\n"); | ||
546 | goto done; | ||
547 | } | ||
548 | |||
549 | /* need to init context */ | ||
550 | req_ctx_init(rctx, areq); | ||
551 | |||
552 | nbytes = crypto_ahash_walk_first(req, &rctx->walk); | ||
553 | |||
554 | if (nbytes < 0) { | ||
555 | ret = nbytes; | ||
556 | goto done; | ||
557 | } | ||
558 | |||
559 | if (crypto_ahash_walk_last(&rctx->walk)) | ||
560 | rctx->flag |= HASH_DONE; | ||
561 | |||
562 | /* submit */ | ||
563 | sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); | ||
564 | sha512_mb_add_list(rctx, cstate); | ||
565 | kernel_fpu_begin(); | ||
566 | sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, | ||
567 | nbytes, HASH_UPDATE); | ||
568 | kernel_fpu_end(); | ||
569 | |||
570 | /* check if anything is returned */ | ||
571 | if (!sha_ctx) | ||
572 | return -EINPROGRESS; | ||
573 | |||
574 | if (sha_ctx->error) { | ||
575 | ret = sha_ctx->error; | ||
576 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
577 | goto done; | ||
578 | } | ||
579 | |||
580 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
581 | ret = sha_finish_walk(&rctx, cstate, false); | ||
582 | |||
583 | if (!rctx) | ||
584 | return -EINPROGRESS; | ||
585 | done: | ||
586 | sha_complete_job(rctx, cstate, ret); | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | static int sha512_mb_finup(struct ahash_request *areq) | ||
591 | { | ||
592 | struct mcryptd_hash_request_ctx *rctx = | ||
593 | container_of(areq, struct mcryptd_hash_request_ctx, | ||
594 | areq); | ||
595 | struct mcryptd_alg_cstate *cstate = | ||
596 | this_cpu_ptr(sha512_mb_alg_state.alg_cstate); | ||
597 | |||
598 | struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); | ||
599 | struct sha512_hash_ctx *sha_ctx; | ||
600 | int ret = 0, flag = HASH_UPDATE, nbytes; | ||
601 | |||
602 | /* sanity check */ | ||
603 | if (rctx->tag.cpu != smp_processor_id()) { | ||
604 | pr_err("mcryptd error: cpu clash\n"); | ||
605 | goto done; | ||
606 | } | ||
607 | |||
608 | /* need to init context */ | ||
609 | req_ctx_init(rctx, areq); | ||
610 | |||
611 | nbytes = crypto_ahash_walk_first(req, &rctx->walk); | ||
612 | |||
613 | if (nbytes < 0) { | ||
614 | ret = nbytes; | ||
615 | goto done; | ||
616 | } | ||
617 | |||
618 | if (crypto_ahash_walk_last(&rctx->walk)) { | ||
619 | rctx->flag |= HASH_DONE; | ||
620 | flag = HASH_LAST; | ||
621 | } | ||
622 | |||
623 | /* submit */ | ||
624 | rctx->flag |= HASH_FINAL; | ||
625 | sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); | ||
626 | sha512_mb_add_list(rctx, cstate); | ||
627 | |||
628 | kernel_fpu_begin(); | ||
629 | sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, | ||
630 | nbytes, flag); | ||
631 | kernel_fpu_end(); | ||
632 | |||
633 | /* check if anything is returned */ | ||
634 | if (!sha_ctx) | ||
635 | return -EINPROGRESS; | ||
636 | |||
637 | if (sha_ctx->error) { | ||
638 | ret = sha_ctx->error; | ||
639 | goto done; | ||
640 | } | ||
641 | |||
642 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
643 | ret = sha_finish_walk(&rctx, cstate, false); | ||
644 | if (!rctx) | ||
645 | return -EINPROGRESS; | ||
646 | done: | ||
647 | sha_complete_job(rctx, cstate, ret); | ||
648 | return ret; | ||
649 | } | ||
650 | |||
651 | static int sha512_mb_final(struct ahash_request *areq) | ||
652 | { | ||
653 | struct mcryptd_hash_request_ctx *rctx = | ||
654 | container_of(areq, struct mcryptd_hash_request_ctx, | ||
655 | areq); | ||
656 | struct mcryptd_alg_cstate *cstate = | ||
657 | this_cpu_ptr(sha512_mb_alg_state.alg_cstate); | ||
658 | |||
659 | struct sha512_hash_ctx *sha_ctx; | ||
660 | int ret = 0; | ||
661 | u8 data; | ||
662 | |||
663 | /* sanity check */ | ||
664 | if (rctx->tag.cpu != smp_processor_id()) { | ||
665 | pr_err("mcryptd error: cpu clash\n"); | ||
666 | goto done; | ||
667 | } | ||
668 | |||
669 | /* need to init context */ | ||
670 | req_ctx_init(rctx, areq); | ||
671 | |||
672 | rctx->flag |= HASH_DONE | HASH_FINAL; | ||
673 | |||
674 | sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); | ||
675 | /* flag HASH_FINAL and 0 data size */ | ||
676 | sha512_mb_add_list(rctx, cstate); | ||
677 | kernel_fpu_begin(); | ||
678 | sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST); | ||
679 | kernel_fpu_end(); | ||
680 | |||
681 | /* check if anything is returned */ | ||
682 | if (!sha_ctx) | ||
683 | return -EINPROGRESS; | ||
684 | |||
685 | if (sha_ctx->error) { | ||
686 | ret = sha_ctx->error; | ||
687 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
688 | goto done; | ||
689 | } | ||
690 | |||
691 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
692 | ret = sha_finish_walk(&rctx, cstate, false); | ||
693 | if (!rctx) | ||
694 | return -EINPROGRESS; | ||
695 | done: | ||
696 | sha_complete_job(rctx, cstate, ret); | ||
697 | return ret; | ||
698 | } | ||
699 | |||
700 | static int sha512_mb_export(struct ahash_request *areq, void *out) | ||
701 | { | ||
702 | struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); | ||
703 | |||
704 | memcpy(out, sctx, sizeof(*sctx)); | ||
705 | |||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | static int sha512_mb_import(struct ahash_request *areq, const void *in) | ||
710 | { | ||
711 | struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); | ||
712 | |||
713 | memcpy(sctx, in, sizeof(*sctx)); | ||
714 | |||
715 | return 0; | ||
716 | } | ||
717 | |||
718 | static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm) | ||
719 | { | ||
720 | struct mcryptd_ahash *mcryptd_tfm; | ||
721 | struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
722 | struct mcryptd_hash_ctx *mctx; | ||
723 | |||
724 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb", | ||
725 | CRYPTO_ALG_INTERNAL, | ||
726 | CRYPTO_ALG_INTERNAL); | ||
727 | if (IS_ERR(mcryptd_tfm)) | ||
728 | return PTR_ERR(mcryptd_tfm); | ||
729 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); | ||
730 | mctx->alg_state = &sha512_mb_alg_state; | ||
731 | ctx->mcryptd_tfm = mcryptd_tfm; | ||
732 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
733 | sizeof(struct ahash_request) + | ||
734 | crypto_ahash_reqsize(&mcryptd_tfm->base)); | ||
735 | |||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm) | ||
740 | { | ||
741 | struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
742 | |||
743 | mcryptd_free_ahash(ctx->mcryptd_tfm); | ||
744 | } | ||
745 | |||
746 | static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm) | ||
747 | { | ||
748 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
749 | sizeof(struct ahash_request) + | ||
750 | sizeof(struct sha512_hash_ctx)); | ||
751 | |||
752 | return 0; | ||
753 | } | ||
754 | |||
755 | static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm) | ||
756 | { | ||
757 | struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); | ||
758 | |||
759 | mcryptd_free_ahash(ctx->mcryptd_tfm); | ||
760 | } | ||
761 | |||
762 | static struct ahash_alg sha512_mb_areq_alg = { | ||
763 | .init = sha512_mb_init, | ||
764 | .update = sha512_mb_update, | ||
765 | .final = sha512_mb_final, | ||
766 | .finup = sha512_mb_finup, | ||
767 | .export = sha512_mb_export, | ||
768 | .import = sha512_mb_import, | ||
769 | .halg = { | ||
770 | .digestsize = SHA512_DIGEST_SIZE, | ||
771 | .statesize = sizeof(struct sha512_hash_ctx), | ||
772 | .base = { | ||
773 | .cra_name = "__sha512-mb", | ||
774 | .cra_driver_name = "__intel_sha512-mb", | ||
775 | .cra_priority = 100, | ||
776 | /* | ||
777 | * use ASYNC flag as some buffers in multi-buffer | ||
778 | * algo may not have completed before hashing thread | ||
779 | * sleep | ||
780 | */ | ||
781 | .cra_flags = CRYPTO_ALG_ASYNC | | ||
782 | CRYPTO_ALG_INTERNAL, | ||
783 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
784 | .cra_module = THIS_MODULE, | ||
785 | .cra_list = LIST_HEAD_INIT | ||
786 | (sha512_mb_areq_alg.halg.base.cra_list), | ||
787 | .cra_init = sha512_mb_areq_init_tfm, | ||
788 | .cra_exit = sha512_mb_areq_exit_tfm, | ||
789 | .cra_ctxsize = sizeof(struct sha512_hash_ctx), | ||
790 | } | ||
791 | } | ||
792 | }; | ||
793 | |||
794 | static int sha512_mb_async_init(struct ahash_request *req) | ||
795 | { | ||
796 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
797 | struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
798 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
799 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
800 | |||
801 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
802 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
803 | return crypto_ahash_init(mcryptd_req); | ||
804 | } | ||
805 | |||
806 | static int sha512_mb_async_update(struct ahash_request *req) | ||
807 | { | ||
808 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
809 | |||
810 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
811 | struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
812 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
813 | |||
814 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
815 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
816 | return crypto_ahash_update(mcryptd_req); | ||
817 | } | ||
818 | |||
819 | static int sha512_mb_async_finup(struct ahash_request *req) | ||
820 | { | ||
821 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
822 | |||
823 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
824 | struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
825 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
826 | |||
827 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
828 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
829 | return crypto_ahash_finup(mcryptd_req); | ||
830 | } | ||
831 | |||
832 | static int sha512_mb_async_final(struct ahash_request *req) | ||
833 | { | ||
834 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
835 | |||
836 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
837 | struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
838 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
839 | |||
840 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
841 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
842 | return crypto_ahash_final(mcryptd_req); | ||
843 | } | ||
844 | |||
845 | static int sha512_mb_async_digest(struct ahash_request *req) | ||
846 | { | ||
847 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
848 | struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
849 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
850 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
851 | |||
852 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
853 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
854 | return crypto_ahash_digest(mcryptd_req); | ||
855 | } | ||
856 | |||
857 | static int sha512_mb_async_export(struct ahash_request *req, void *out) | ||
858 | { | ||
859 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
860 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
861 | struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
862 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
863 | |||
864 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
865 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
866 | return crypto_ahash_export(mcryptd_req, out); | ||
867 | } | ||
868 | |||
869 | static int sha512_mb_async_import(struct ahash_request *req, const void *in) | ||
870 | { | ||
871 | struct ahash_request *mcryptd_req = ahash_request_ctx(req); | ||
872 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
873 | struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); | ||
874 | struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; | ||
875 | struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); | ||
876 | struct mcryptd_hash_request_ctx *rctx; | ||
877 | struct ahash_request *areq; | ||
878 | |||
879 | memcpy(mcryptd_req, req, sizeof(*req)); | ||
880 | ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); | ||
881 | rctx = ahash_request_ctx(mcryptd_req); | ||
882 | |||
883 | areq = &rctx->areq; | ||
884 | |||
885 | ahash_request_set_tfm(areq, child); | ||
886 | ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, | ||
887 | rctx->complete, req); | ||
888 | |||
889 | return crypto_ahash_import(mcryptd_req, in); | ||
890 | } | ||
891 | |||
892 | static struct ahash_alg sha512_mb_async_alg = { | ||
893 | .init = sha512_mb_async_init, | ||
894 | .update = sha512_mb_async_update, | ||
895 | .final = sha512_mb_async_final, | ||
896 | .finup = sha512_mb_async_finup, | ||
897 | .digest = sha512_mb_async_digest, | ||
898 | .export = sha512_mb_async_export, | ||
899 | .import = sha512_mb_async_import, | ||
900 | .halg = { | ||
901 | .digestsize = SHA512_DIGEST_SIZE, | ||
902 | .statesize = sizeof(struct sha512_hash_ctx), | ||
903 | .base = { | ||
904 | .cra_name = "sha512", | ||
905 | .cra_driver_name = "sha512_mb", | ||
906 | /* | ||
907 | * Low priority, since with few concurrent hash requests | ||
908 | * this is extremely slow due to the flush delay. Users | ||
909 | * whose workloads would benefit from this can request | ||
910 | * it explicitly by driver name, or can increase its | ||
911 | * priority at runtime using NETLINK_CRYPTO. | ||
912 | */ | ||
913 | .cra_priority = 50, | ||
914 | .cra_flags = CRYPTO_ALG_ASYNC, | ||
915 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
916 | .cra_module = THIS_MODULE, | ||
917 | .cra_list = LIST_HEAD_INIT | ||
918 | (sha512_mb_async_alg.halg.base.cra_list), | ||
919 | .cra_init = sha512_mb_async_init_tfm, | ||
920 | .cra_exit = sha512_mb_async_exit_tfm, | ||
921 | .cra_ctxsize = sizeof(struct sha512_mb_ctx), | ||
922 | .cra_alignmask = 0, | ||
923 | }, | ||
924 | }, | ||
925 | }; | ||
926 | |||
927 | static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) | ||
928 | { | ||
929 | struct mcryptd_hash_request_ctx *rctx; | ||
930 | unsigned long cur_time; | ||
931 | unsigned long next_flush = 0; | ||
932 | struct sha512_hash_ctx *sha_ctx; | ||
933 | |||
934 | |||
935 | cur_time = jiffies; | ||
936 | |||
937 | while (!list_empty(&cstate->work_list)) { | ||
938 | rctx = list_entry(cstate->work_list.next, | ||
939 | struct mcryptd_hash_request_ctx, waiter); | ||
940 | if time_before(cur_time, rctx->tag.expire) | ||
941 | break; | ||
942 | kernel_fpu_begin(); | ||
943 | sha_ctx = (struct sha512_hash_ctx *) | ||
944 | sha512_ctx_mgr_flush(cstate); | ||
945 | kernel_fpu_end(); | ||
946 | if (!sha_ctx) { | ||
947 | pr_err("sha512_mb error: nothing got flushed for" | ||
948 | " non-empty list\n"); | ||
949 | break; | ||
950 | } | ||
951 | rctx = cast_hash_to_mcryptd_ctx(sha_ctx); | ||
952 | sha_finish_walk(&rctx, cstate, true); | ||
953 | sha_complete_job(rctx, cstate, 0); | ||
954 | } | ||
955 | |||
956 | if (!list_empty(&cstate->work_list)) { | ||
957 | rctx = list_entry(cstate->work_list.next, | ||
958 | struct mcryptd_hash_request_ctx, waiter); | ||
959 | /* get the hash context and then flush time */ | ||
960 | next_flush = rctx->tag.expire; | ||
961 | mcryptd_arm_flusher(cstate, get_delay(next_flush)); | ||
962 | } | ||
963 | return next_flush; | ||
964 | } | ||
965 | |||
966 | static int __init sha512_mb_mod_init(void) | ||
967 | { | ||
968 | |||
969 | int cpu; | ||
970 | int err; | ||
971 | struct mcryptd_alg_cstate *cpu_state; | ||
972 | |||
973 | /* check for dependent cpu features */ | ||
974 | if (!boot_cpu_has(X86_FEATURE_AVX2) || | ||
975 | !boot_cpu_has(X86_FEATURE_BMI2)) | ||
976 | return -ENODEV; | ||
977 | |||
978 | /* initialize multibuffer structures */ | ||
979 | sha512_mb_alg_state.alg_cstate = | ||
980 | alloc_percpu(struct mcryptd_alg_cstate); | ||
981 | |||
982 | sha512_job_mgr_init = sha512_mb_mgr_init_avx2; | ||
983 | sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2; | ||
984 | sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2; | ||
985 | sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2; | ||
986 | |||
987 | if (!sha512_mb_alg_state.alg_cstate) | ||
988 | return -ENOMEM; | ||
989 | for_each_possible_cpu(cpu) { | ||
990 | cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); | ||
991 | cpu_state->next_flush = 0; | ||
992 | cpu_state->next_seq_num = 0; | ||
993 | cpu_state->flusher_engaged = false; | ||
994 | INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); | ||
995 | cpu_state->cpu = cpu; | ||
996 | cpu_state->alg_state = &sha512_mb_alg_state; | ||
997 | cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr), | ||
998 | GFP_KERNEL); | ||
999 | if (!cpu_state->mgr) | ||
1000 | goto err2; | ||
1001 | sha512_ctx_mgr_init(cpu_state->mgr); | ||
1002 | INIT_LIST_HEAD(&cpu_state->work_list); | ||
1003 | spin_lock_init(&cpu_state->work_lock); | ||
1004 | } | ||
1005 | sha512_mb_alg_state.flusher = &sha512_mb_flusher; | ||
1006 | |||
1007 | err = crypto_register_ahash(&sha512_mb_areq_alg); | ||
1008 | if (err) | ||
1009 | goto err2; | ||
1010 | err = crypto_register_ahash(&sha512_mb_async_alg); | ||
1011 | if (err) | ||
1012 | goto err1; | ||
1013 | |||
1014 | |||
1015 | return 0; | ||
1016 | err1: | ||
1017 | crypto_unregister_ahash(&sha512_mb_areq_alg); | ||
1018 | err2: | ||
1019 | for_each_possible_cpu(cpu) { | ||
1020 | cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); | ||
1021 | kfree(cpu_state->mgr); | ||
1022 | } | ||
1023 | free_percpu(sha512_mb_alg_state.alg_cstate); | ||
1024 | return -ENODEV; | ||
1025 | } | ||
1026 | |||
1027 | static void __exit sha512_mb_mod_fini(void) | ||
1028 | { | ||
1029 | int cpu; | ||
1030 | struct mcryptd_alg_cstate *cpu_state; | ||
1031 | |||
1032 | crypto_unregister_ahash(&sha512_mb_async_alg); | ||
1033 | crypto_unregister_ahash(&sha512_mb_areq_alg); | ||
1034 | for_each_possible_cpu(cpu) { | ||
1035 | cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); | ||
1036 | kfree(cpu_state->mgr); | ||
1037 | } | ||
1038 | free_percpu(sha512_mb_alg_state.alg_cstate); | ||
1039 | } | ||
1040 | |||
1041 | module_init(sha512_mb_mod_init); | ||
1042 | module_exit(sha512_mb_mod_fini); | ||
1043 | |||
1044 | MODULE_LICENSE("GPL"); | ||
1045 | MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated"); | ||
1046 | |||
1047 | MODULE_ALIAS("sha512"); | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h deleted file mode 100644 index e5c465bd821e..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h +++ /dev/null | |||
@@ -1,128 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA512 context | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #ifndef _SHA_MB_CTX_INTERNAL_H | ||
55 | #define _SHA_MB_CTX_INTERNAL_H | ||
56 | |||
57 | #include "sha512_mb_mgr.h" | ||
58 | |||
59 | #define HASH_UPDATE 0x00 | ||
60 | #define HASH_LAST 0x01 | ||
61 | #define HASH_DONE 0x02 | ||
62 | #define HASH_FINAL 0x04 | ||
63 | |||
64 | #define HASH_CTX_STS_IDLE 0x00 | ||
65 | #define HASH_CTX_STS_PROCESSING 0x01 | ||
66 | #define HASH_CTX_STS_LAST 0x02 | ||
67 | #define HASH_CTX_STS_COMPLETE 0x04 | ||
68 | |||
69 | enum hash_ctx_error { | ||
70 | HASH_CTX_ERROR_NONE = 0, | ||
71 | HASH_CTX_ERROR_INVALID_FLAGS = -1, | ||
72 | HASH_CTX_ERROR_ALREADY_PROCESSING = -2, | ||
73 | HASH_CTX_ERROR_ALREADY_COMPLETED = -3, | ||
74 | }; | ||
75 | |||
76 | #define hash_ctx_user_data(ctx) ((ctx)->user_data) | ||
77 | #define hash_ctx_digest(ctx) ((ctx)->job.result_digest) | ||
78 | #define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) | ||
79 | #define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) | ||
80 | #define hash_ctx_status(ctx) ((ctx)->status) | ||
81 | #define hash_ctx_error(ctx) ((ctx)->error) | ||
82 | #define hash_ctx_init(ctx) \ | ||
83 | do { \ | ||
84 | (ctx)->error = HASH_CTX_ERROR_NONE; \ | ||
85 | (ctx)->status = HASH_CTX_STS_COMPLETE; \ | ||
86 | } while (0) | ||
87 | |||
88 | /* Hash Constants and Typedefs */ | ||
89 | #define SHA512_DIGEST_LENGTH 8 | ||
90 | #define SHA512_LOG2_BLOCK_SIZE 7 | ||
91 | |||
92 | #define SHA512_PADLENGTHFIELD_SIZE 16 | ||
93 | |||
94 | #ifdef SHA_MB_DEBUG | ||
95 | #define assert(expr) \ | ||
96 | do { \ | ||
97 | if (unlikely(!(expr))) { \ | ||
98 | printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ | ||
99 | #expr, __FILE__, __func__, __LINE__); \ | ||
100 | } \ | ||
101 | } while (0) | ||
102 | #else | ||
103 | #define assert(expr) do {} while (0) | ||
104 | #endif | ||
105 | |||
106 | struct sha512_ctx_mgr { | ||
107 | struct sha512_mb_mgr mgr; | ||
108 | }; | ||
109 | |||
110 | /* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */ | ||
111 | |||
112 | struct sha512_hash_ctx { | ||
113 | /* Must be at struct offset 0 */ | ||
114 | struct job_sha512 job; | ||
115 | /* status flag */ | ||
116 | int status; | ||
117 | /* error flag */ | ||
118 | int error; | ||
119 | |||
120 | uint64_t total_length; | ||
121 | const void *incoming_buffer; | ||
122 | uint32_t incoming_buffer_length; | ||
123 | uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; | ||
124 | uint32_t partial_block_buffer_length; | ||
125 | void *user_data; | ||
126 | }; | ||
127 | |||
128 | #endif | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h deleted file mode 100644 index 178f17eef382..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h +++ /dev/null | |||
@@ -1,104 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA512 algorithm manager | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #ifndef __SHA_MB_MGR_H | ||
55 | #define __SHA_MB_MGR_H | ||
56 | |||
57 | #include <linux/types.h> | ||
58 | |||
59 | #define NUM_SHA512_DIGEST_WORDS 8 | ||
60 | |||
61 | enum job_sts {STS_UNKNOWN = 0, | ||
62 | STS_BEING_PROCESSED = 1, | ||
63 | STS_COMPLETED = 2, | ||
64 | STS_INTERNAL_ERROR = 3, | ||
65 | STS_ERROR = 4 | ||
66 | }; | ||
67 | |||
68 | struct job_sha512 { | ||
69 | u8 *buffer; | ||
70 | u64 len; | ||
71 | u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32); | ||
72 | enum job_sts status; | ||
73 | void *user_data; | ||
74 | }; | ||
75 | |||
76 | struct sha512_args_x4 { | ||
77 | uint64_t digest[8][4]; | ||
78 | uint8_t *data_ptr[4]; | ||
79 | }; | ||
80 | |||
81 | struct sha512_lane_data { | ||
82 | struct job_sha512 *job_in_lane; | ||
83 | }; | ||
84 | |||
85 | struct sha512_mb_mgr { | ||
86 | struct sha512_args_x4 args; | ||
87 | |||
88 | uint64_t lens[4]; | ||
89 | |||
90 | /* each byte is index (0...7) of unused lanes */ | ||
91 | uint64_t unused_lanes; | ||
92 | /* byte 4 is set to FF as a flag */ | ||
93 | struct sha512_lane_data ldata[4]; | ||
94 | }; | ||
95 | |||
96 | #define SHA512_MB_MGR_NUM_LANES_AVX2 4 | ||
97 | |||
98 | void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state); | ||
99 | struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state, | ||
100 | struct job_sha512 *job); | ||
101 | struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state); | ||
102 | struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state); | ||
103 | |||
104 | #endif | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S deleted file mode 100644 index cf2636d4c9ba..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S +++ /dev/null | |||
@@ -1,281 +0,0 @@ | |||
1 | /* | ||
2 | * Header file for multi buffer SHA256 algorithm data structure | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | # Macros for defining data structures | ||
55 | |||
56 | # Usage example | ||
57 | |||
58 | #START_FIELDS # JOB_AES | ||
59 | ### name size align | ||
60 | #FIELD _plaintext, 8, 8 # pointer to plaintext | ||
61 | #FIELD _ciphertext, 8, 8 # pointer to ciphertext | ||
62 | #FIELD _IV, 16, 8 # IV | ||
63 | #FIELD _keys, 8, 8 # pointer to keys | ||
64 | #FIELD _len, 4, 4 # length in bytes | ||
65 | #FIELD _status, 4, 4 # status enumeration | ||
66 | #FIELD _user_data, 8, 8 # pointer to user data | ||
67 | #UNION _union, size1, align1, \ | ||
68 | # size2, align2, \ | ||
69 | # size3, align3, \ | ||
70 | # ... | ||
71 | #END_FIELDS | ||
72 | #%assign _JOB_AES_size _FIELD_OFFSET | ||
73 | #%assign _JOB_AES_align _STRUCT_ALIGN | ||
74 | |||
75 | ######################################################################### | ||
76 | |||
77 | # Alternate "struc-like" syntax: | ||
78 | # STRUCT job_aes2 | ||
79 | # RES_Q .plaintext, 1 | ||
80 | # RES_Q .ciphertext, 1 | ||
81 | # RES_DQ .IV, 1 | ||
82 | # RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN | ||
83 | # RES_U .union, size1, align1, \ | ||
84 | # size2, align2, \ | ||
85 | # ... | ||
86 | # ENDSTRUCT | ||
87 | # # Following only needed if nesting | ||
88 | # %assign job_aes2_size _FIELD_OFFSET | ||
89 | # %assign job_aes2_align _STRUCT_ALIGN | ||
90 | # | ||
91 | # RES_* macros take a name, a count and an optional alignment. | ||
92 | # The count in in terms of the base size of the macro, and the | ||
93 | # default alignment is the base size. | ||
94 | # The macros are: | ||
95 | # Macro Base size | ||
96 | # RES_B 1 | ||
97 | # RES_W 2 | ||
98 | # RES_D 4 | ||
99 | # RES_Q 8 | ||
100 | # RES_DQ 16 | ||
101 | # RES_Y 32 | ||
102 | # RES_Z 64 | ||
103 | # | ||
104 | # RES_U defines a union. It's arguments are a name and two or more | ||
105 | # pairs of "size, alignment" | ||
106 | # | ||
107 | # The two assigns are only needed if this structure is being nested | ||
108 | # within another. Even if the assigns are not done, one can still use | ||
109 | # STRUCT_NAME_size as the size of the structure. | ||
110 | # | ||
111 | # Note that for nesting, you still need to assign to STRUCT_NAME_size. | ||
112 | # | ||
113 | # The differences between this and using "struc" directly are that each | ||
114 | # type is implicitly aligned to its natural length (although this can be | ||
115 | # over-ridden with an explicit third parameter), and that the structure | ||
116 | # is padded at the end to its overall alignment. | ||
117 | # | ||
118 | |||
119 | ######################################################################### | ||
120 | |||
121 | #ifndef _DATASTRUCT_ASM_ | ||
122 | #define _DATASTRUCT_ASM_ | ||
123 | |||
124 | #define PTR_SZ 8 | ||
125 | #define SHA512_DIGEST_WORD_SIZE 8 | ||
126 | #define SHA512_MB_MGR_NUM_LANES_AVX2 4 | ||
127 | #define NUM_SHA512_DIGEST_WORDS 8 | ||
128 | #define SZ4 4*SHA512_DIGEST_WORD_SIZE | ||
129 | #define ROUNDS 80*SZ4 | ||
130 | #define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8) | ||
131 | |||
132 | # START_FIELDS | ||
133 | .macro START_FIELDS | ||
134 | _FIELD_OFFSET = 0 | ||
135 | _STRUCT_ALIGN = 0 | ||
136 | .endm | ||
137 | |||
138 | # FIELD name size align | ||
139 | .macro FIELD name size align | ||
140 | _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) | ||
141 | \name = _FIELD_OFFSET | ||
142 | _FIELD_OFFSET = _FIELD_OFFSET + (\size) | ||
143 | .if (\align > _STRUCT_ALIGN) | ||
144 | _STRUCT_ALIGN = \align | ||
145 | .endif | ||
146 | .endm | ||
147 | |||
148 | # END_FIELDS | ||
149 | .macro END_FIELDS | ||
150 | _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) | ||
151 | .endm | ||
152 | |||
153 | .macro STRUCT p1 | ||
154 | START_FIELDS | ||
155 | .struc \p1 | ||
156 | .endm | ||
157 | |||
158 | .macro ENDSTRUCT | ||
159 | tmp = _FIELD_OFFSET | ||
160 | END_FIELDS | ||
161 | tmp = (_FIELD_OFFSET - ##tmp) | ||
162 | .if (tmp > 0) | ||
163 | .lcomm tmp | ||
164 | .endm | ||
165 | |||
166 | ## RES_int name size align | ||
167 | .macro RES_int p1 p2 p3 | ||
168 | name = \p1 | ||
169 | size = \p2 | ||
170 | align = .\p3 | ||
171 | |||
172 | _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) | ||
173 | .align align | ||
174 | .lcomm name size | ||
175 | _FIELD_OFFSET = _FIELD_OFFSET + (size) | ||
176 | .if (align > _STRUCT_ALIGN) | ||
177 | _STRUCT_ALIGN = align | ||
178 | .endif | ||
179 | .endm | ||
180 | |||
181 | # macro RES_B name, size [, align] | ||
182 | .macro RES_B _name, _size, _align=1 | ||
183 | RES_int _name _size _align | ||
184 | .endm | ||
185 | |||
186 | # macro RES_W name, size [, align] | ||
187 | .macro RES_W _name, _size, _align=2 | ||
188 | RES_int _name 2*(_size) _align | ||
189 | .endm | ||
190 | |||
191 | # macro RES_D name, size [, align] | ||
192 | .macro RES_D _name, _size, _align=4 | ||
193 | RES_int _name 4*(_size) _align | ||
194 | .endm | ||
195 | |||
196 | # macro RES_Q name, size [, align] | ||
197 | .macro RES_Q _name, _size, _align=8 | ||
198 | RES_int _name 8*(_size) _align | ||
199 | .endm | ||
200 | |||
201 | # macro RES_DQ name, size [, align] | ||
202 | .macro RES_DQ _name, _size, _align=16 | ||
203 | RES_int _name 16*(_size) _align | ||
204 | .endm | ||
205 | |||
206 | # macro RES_Y name, size [, align] | ||
207 | .macro RES_Y _name, _size, _align=32 | ||
208 | RES_int _name 32*(_size) _align | ||
209 | .endm | ||
210 | |||
211 | # macro RES_Z name, size [, align] | ||
212 | .macro RES_Z _name, _size, _align=64 | ||
213 | RES_int _name 64*(_size) _align | ||
214 | .endm | ||
215 | |||
216 | #endif | ||
217 | |||
218 | ################################################################### | ||
219 | ### Define SHA512 Out Of Order Data Structures | ||
220 | ################################################################### | ||
221 | |||
222 | START_FIELDS # LANE_DATA | ||
223 | ### name size align | ||
224 | FIELD _job_in_lane, 8, 8 # pointer to job object | ||
225 | END_FIELDS | ||
226 | |||
227 | _LANE_DATA_size = _FIELD_OFFSET | ||
228 | _LANE_DATA_align = _STRUCT_ALIGN | ||
229 | |||
230 | #################################################################### | ||
231 | |||
232 | START_FIELDS # SHA512_ARGS_X4 | ||
233 | ### name size align | ||
234 | FIELD _digest, 8*8*4, 4 # transposed digest | ||
235 | FIELD _data_ptr, 8*4, 8 # array of pointers to data | ||
236 | END_FIELDS | ||
237 | |||
238 | _SHA512_ARGS_X4_size = _FIELD_OFFSET | ||
239 | _SHA512_ARGS_X4_align = _STRUCT_ALIGN | ||
240 | |||
241 | ##################################################################### | ||
242 | |||
243 | START_FIELDS # MB_MGR | ||
244 | ### name size align | ||
245 | FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align | ||
246 | FIELD _lens, 8*4, 8 | ||
247 | FIELD _unused_lanes, 8, 8 | ||
248 | FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align | ||
249 | END_FIELDS | ||
250 | |||
251 | _MB_MGR_size = _FIELD_OFFSET | ||
252 | _MB_MGR_align = _STRUCT_ALIGN | ||
253 | |||
254 | _args_digest = _args + _digest | ||
255 | _args_data_ptr = _args + _data_ptr | ||
256 | |||
257 | ####################################################################### | ||
258 | |||
259 | ####################################################################### | ||
260 | #### Define constants | ||
261 | ####################################################################### | ||
262 | |||
263 | #define STS_UNKNOWN 0 | ||
264 | #define STS_BEING_PROCESSED 1 | ||
265 | #define STS_COMPLETED 2 | ||
266 | |||
267 | ####################################################################### | ||
268 | #### Define JOB_SHA512 structure | ||
269 | ####################################################################### | ||
270 | |||
271 | START_FIELDS # JOB_SHA512 | ||
272 | ### name size align | ||
273 | FIELD _buffer, 8, 8 # pointer to buffer | ||
274 | FIELD _len, 8, 8 # length in bytes | ||
275 | FIELD _result_digest, 8*8, 32 # Digest (output) | ||
276 | FIELD _status, 4, 4 | ||
277 | FIELD _user_data, 8, 8 | ||
278 | END_FIELDS | ||
279 | |||
280 | _JOB_SHA512_size = _FIELD_OFFSET | ||
281 | _JOB_SHA512_align = _STRUCT_ALIGN | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S deleted file mode 100644 index 7c629caebc05..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S +++ /dev/null | |||
@@ -1,297 +0,0 @@ | |||
1 | /* | ||
2 | * Flush routine for SHA512 multibuffer | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #include <linux/linkage.h> | ||
55 | #include <asm/frame.h> | ||
56 | #include "sha512_mb_mgr_datastruct.S" | ||
57 | |||
58 | .extern sha512_x4_avx2 | ||
59 | |||
60 | # LINUX register definitions | ||
61 | #define arg1 %rdi | ||
62 | #define arg2 %rsi | ||
63 | |||
64 | # idx needs to be other than arg1, arg2, rbx, r12 | ||
65 | #define idx %rdx | ||
66 | |||
67 | # Common definitions | ||
68 | #define state arg1 | ||
69 | #define job arg2 | ||
70 | #define len2 arg2 | ||
71 | |||
72 | #define unused_lanes %rbx | ||
73 | #define lane_data %rbx | ||
74 | #define tmp2 %rbx | ||
75 | |||
76 | #define job_rax %rax | ||
77 | #define tmp1 %rax | ||
78 | #define size_offset %rax | ||
79 | #define tmp %rax | ||
80 | #define start_offset %rax | ||
81 | |||
82 | #define tmp3 arg1 | ||
83 | |||
84 | #define extra_blocks arg2 | ||
85 | #define p arg2 | ||
86 | |||
87 | #define tmp4 %r8 | ||
88 | #define lens0 %r8 | ||
89 | |||
90 | #define lens1 %r9 | ||
91 | #define lens2 %r10 | ||
92 | #define lens3 %r11 | ||
93 | |||
94 | .macro LABEL prefix n | ||
95 | \prefix\n\(): | ||
96 | .endm | ||
97 | |||
98 | .macro JNE_SKIP i | ||
99 | jne skip_\i | ||
100 | .endm | ||
101 | |||
102 | .altmacro | ||
103 | .macro SET_OFFSET _offset | ||
104 | offset = \_offset | ||
105 | .endm | ||
106 | .noaltmacro | ||
107 | |||
108 | # JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state) | ||
109 | # arg 1 : rcx : state | ||
110 | ENTRY(sha512_mb_mgr_flush_avx2) | ||
111 | FRAME_BEGIN | ||
112 | push %rbx | ||
113 | |||
114 | # If bit (32+3) is set, then all lanes are empty | ||
115 | mov _unused_lanes(state), unused_lanes | ||
116 | bt $32+7, unused_lanes | ||
117 | jc return_null | ||
118 | |||
119 | # find a lane with a non-null job | ||
120 | xor idx, idx | ||
121 | offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane) | ||
122 | cmpq $0, offset(state) | ||
123 | cmovne one(%rip), idx | ||
124 | offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane) | ||
125 | cmpq $0, offset(state) | ||
126 | cmovne two(%rip), idx | ||
127 | offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane) | ||
128 | cmpq $0, offset(state) | ||
129 | cmovne three(%rip), idx | ||
130 | |||
131 | # copy idx to empty lanes | ||
132 | copy_lane_data: | ||
133 | offset = (_args + _data_ptr) | ||
134 | mov offset(state,idx,8), tmp | ||
135 | |||
136 | I = 0 | ||
137 | .rep 4 | ||
138 | offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) | ||
139 | cmpq $0, offset(state) | ||
140 | .altmacro | ||
141 | JNE_SKIP %I | ||
142 | offset = (_args + _data_ptr + 8*I) | ||
143 | mov tmp, offset(state) | ||
144 | offset = (_lens + 8*I +4) | ||
145 | movl $0xFFFFFFFF, offset(state) | ||
146 | LABEL skip_ %I | ||
147 | I = (I+1) | ||
148 | .noaltmacro | ||
149 | .endr | ||
150 | |||
151 | # Find min length | ||
152 | mov _lens + 0*8(state),lens0 | ||
153 | mov lens0,idx | ||
154 | mov _lens + 1*8(state),lens1 | ||
155 | cmp idx,lens1 | ||
156 | cmovb lens1,idx | ||
157 | mov _lens + 2*8(state),lens2 | ||
158 | cmp idx,lens2 | ||
159 | cmovb lens2,idx | ||
160 | mov _lens + 3*8(state),lens3 | ||
161 | cmp idx,lens3 | ||
162 | cmovb lens3,idx | ||
163 | mov idx,len2 | ||
164 | and $0xF,idx | ||
165 | and $~0xFF,len2 | ||
166 | jz len_is_0 | ||
167 | |||
168 | sub len2, lens0 | ||
169 | sub len2, lens1 | ||
170 | sub len2, lens2 | ||
171 | sub len2, lens3 | ||
172 | shr $32,len2 | ||
173 | mov lens0, _lens + 0*8(state) | ||
174 | mov lens1, _lens + 1*8(state) | ||
175 | mov lens2, _lens + 2*8(state) | ||
176 | mov lens3, _lens + 3*8(state) | ||
177 | |||
178 | # "state" and "args" are the same address, arg1 | ||
179 | # len is arg2 | ||
180 | call sha512_x4_avx2 | ||
181 | # state and idx are intact | ||
182 | |||
183 | len_is_0: | ||
184 | # process completed job "idx" | ||
185 | imul $_LANE_DATA_size, idx, lane_data | ||
186 | lea _ldata(state, lane_data), lane_data | ||
187 | |||
188 | mov _job_in_lane(lane_data), job_rax | ||
189 | movq $0, _job_in_lane(lane_data) | ||
190 | movl $STS_COMPLETED, _status(job_rax) | ||
191 | mov _unused_lanes(state), unused_lanes | ||
192 | shl $8, unused_lanes | ||
193 | or idx, unused_lanes | ||
194 | mov unused_lanes, _unused_lanes(state) | ||
195 | |||
196 | movl $0xFFFFFFFF, _lens+4(state, idx, 8) | ||
197 | |||
198 | vmovq _args_digest+0*32(state, idx, 8), %xmm0 | ||
199 | vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 | ||
200 | vmovq _args_digest+2*32(state, idx, 8), %xmm1 | ||
201 | vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 | ||
202 | vmovq _args_digest+4*32(state, idx, 8), %xmm2 | ||
203 | vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 | ||
204 | vmovq _args_digest+6*32(state, idx, 8), %xmm3 | ||
205 | vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 | ||
206 | |||
207 | vmovdqu %xmm0, _result_digest(job_rax) | ||
208 | vmovdqu %xmm1, _result_digest+1*16(job_rax) | ||
209 | vmovdqu %xmm2, _result_digest+2*16(job_rax) | ||
210 | vmovdqu %xmm3, _result_digest+3*16(job_rax) | ||
211 | |||
212 | return: | ||
213 | pop %rbx | ||
214 | FRAME_END | ||
215 | ret | ||
216 | |||
217 | return_null: | ||
218 | xor job_rax, job_rax | ||
219 | jmp return | ||
220 | ENDPROC(sha512_mb_mgr_flush_avx2) | ||
221 | .align 16 | ||
222 | |||
223 | ENTRY(sha512_mb_mgr_get_comp_job_avx2) | ||
224 | push %rbx | ||
225 | |||
226 | mov _unused_lanes(state), unused_lanes | ||
227 | bt $(32+7), unused_lanes | ||
228 | jc .return_null | ||
229 | |||
230 | # Find min length | ||
231 | mov _lens(state),lens0 | ||
232 | mov lens0,idx | ||
233 | mov _lens+1*8(state),lens1 | ||
234 | cmp idx,lens1 | ||
235 | cmovb lens1,idx | ||
236 | mov _lens+2*8(state),lens2 | ||
237 | cmp idx,lens2 | ||
238 | cmovb lens2,idx | ||
239 | mov _lens+3*8(state),lens3 | ||
240 | cmp idx,lens3 | ||
241 | cmovb lens3,idx | ||
242 | test $~0xF,idx | ||
243 | jnz .return_null | ||
244 | and $0xF,idx | ||
245 | |||
246 | #process completed job "idx" | ||
247 | imul $_LANE_DATA_size, idx, lane_data | ||
248 | lea _ldata(state, lane_data), lane_data | ||
249 | |||
250 | mov _job_in_lane(lane_data), job_rax | ||
251 | movq $0, _job_in_lane(lane_data) | ||
252 | movl $STS_COMPLETED, _status(job_rax) | ||
253 | mov _unused_lanes(state), unused_lanes | ||
254 | shl $8, unused_lanes | ||
255 | or idx, unused_lanes | ||
256 | mov unused_lanes, _unused_lanes(state) | ||
257 | |||
258 | movl $0xFFFFFFFF, _lens+4(state, idx, 8) | ||
259 | |||
260 | vmovq _args_digest(state, idx, 8), %xmm0 | ||
261 | vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 | ||
262 | vmovq _args_digest+2*32(state, idx, 8), %xmm1 | ||
263 | vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 | ||
264 | vmovq _args_digest+4*32(state, idx, 8), %xmm2 | ||
265 | vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 | ||
266 | vmovq _args_digest+6*32(state, idx, 8), %xmm3 | ||
267 | vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 | ||
268 | |||
269 | vmovdqu %xmm0, _result_digest+0*16(job_rax) | ||
270 | vmovdqu %xmm1, _result_digest+1*16(job_rax) | ||
271 | vmovdqu %xmm2, _result_digest+2*16(job_rax) | ||
272 | vmovdqu %xmm3, _result_digest+3*16(job_rax) | ||
273 | |||
274 | pop %rbx | ||
275 | |||
276 | ret | ||
277 | |||
278 | .return_null: | ||
279 | xor job_rax, job_rax | ||
280 | pop %rbx | ||
281 | ret | ||
282 | ENDPROC(sha512_mb_mgr_get_comp_job_avx2) | ||
283 | |||
284 | .section .rodata.cst8.one, "aM", @progbits, 8 | ||
285 | .align 8 | ||
286 | one: | ||
287 | .quad 1 | ||
288 | |||
289 | .section .rodata.cst8.two, "aM", @progbits, 8 | ||
290 | .align 8 | ||
291 | two: | ||
292 | .quad 2 | ||
293 | |||
294 | .section .rodata.cst8.three, "aM", @progbits, 8 | ||
295 | .align 8 | ||
296 | three: | ||
297 | .quad 3 | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c deleted file mode 100644 index d08805032f01..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ /dev/null | |||
@@ -1,69 +0,0 @@ | |||
1 | /* | ||
2 | * Initialization code for multi buffer SHA256 algorithm for AVX2 | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #include "sha512_mb_mgr.h" | ||
55 | |||
56 | void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) | ||
57 | { | ||
58 | unsigned int j; | ||
59 | |||
60 | /* initially all lanes are unused */ | ||
61 | state->lens[0] = 0xFFFFFFFF00000000; | ||
62 | state->lens[1] = 0xFFFFFFFF00000001; | ||
63 | state->lens[2] = 0xFFFFFFFF00000002; | ||
64 | state->lens[3] = 0xFFFFFFFF00000003; | ||
65 | |||
66 | state->unused_lanes = 0xFF03020100; | ||
67 | for (j = 0; j < 4; j++) | ||
68 | state->ldata[j].job_in_lane = NULL; | ||
69 | } | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S deleted file mode 100644 index 4ba709ba78e5..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S +++ /dev/null | |||
@@ -1,224 +0,0 @@ | |||
1 | /* | ||
2 | * Buffer submit code for multi buffer SHA512 algorithm | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | #include <linux/linkage.h> | ||
55 | #include <asm/frame.h> | ||
56 | #include "sha512_mb_mgr_datastruct.S" | ||
57 | |||
58 | .extern sha512_x4_avx2 | ||
59 | |||
60 | #define arg1 %rdi | ||
61 | #define arg2 %rsi | ||
62 | |||
63 | #define idx %rdx | ||
64 | #define last_len %rdx | ||
65 | |||
66 | #define size_offset %rcx | ||
67 | #define tmp2 %rcx | ||
68 | |||
69 | # Common definitions | ||
70 | #define state arg1 | ||
71 | #define job arg2 | ||
72 | #define len2 arg2 | ||
73 | #define p2 arg2 | ||
74 | |||
75 | #define p %r11 | ||
76 | #define start_offset %r11 | ||
77 | |||
78 | #define unused_lanes %rbx | ||
79 | |||
80 | #define job_rax %rax | ||
81 | #define len %rax | ||
82 | |||
83 | #define lane %r12 | ||
84 | #define tmp3 %r12 | ||
85 | #define lens3 %r12 | ||
86 | |||
87 | #define extra_blocks %r8 | ||
88 | #define lens0 %r8 | ||
89 | |||
90 | #define tmp %r9 | ||
91 | #define lens1 %r9 | ||
92 | |||
93 | #define lane_data %r10 | ||
94 | #define lens2 %r10 | ||
95 | |||
96 | #define DWORD_len %eax | ||
97 | |||
98 | # JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job) | ||
99 | # arg 1 : rcx : state | ||
100 | # arg 2 : rdx : job | ||
101 | ENTRY(sha512_mb_mgr_submit_avx2) | ||
102 | FRAME_BEGIN | ||
103 | push %rbx | ||
104 | push %r12 | ||
105 | |||
106 | mov _unused_lanes(state), unused_lanes | ||
107 | movzb %bl,lane | ||
108 | shr $8, unused_lanes | ||
109 | imul $_LANE_DATA_size, lane,lane_data | ||
110 | movl $STS_BEING_PROCESSED, _status(job) | ||
111 | lea _ldata(state, lane_data), lane_data | ||
112 | mov unused_lanes, _unused_lanes(state) | ||
113 | movl _len(job), DWORD_len | ||
114 | |||
115 | mov job, _job_in_lane(lane_data) | ||
116 | movl DWORD_len,_lens+4(state , lane, 8) | ||
117 | |||
118 | # Load digest words from result_digest | ||
119 | vmovdqu _result_digest+0*16(job), %xmm0 | ||
120 | vmovdqu _result_digest+1*16(job), %xmm1 | ||
121 | vmovdqu _result_digest+2*16(job), %xmm2 | ||
122 | vmovdqu _result_digest+3*16(job), %xmm3 | ||
123 | |||
124 | vmovq %xmm0, _args_digest(state, lane, 8) | ||
125 | vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8) | ||
126 | vmovq %xmm1, _args_digest+2*32(state , lane, 8) | ||
127 | vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8) | ||
128 | vmovq %xmm2, _args_digest+4*32(state , lane, 8) | ||
129 | vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8) | ||
130 | vmovq %xmm3, _args_digest+6*32(state , lane, 8) | ||
131 | vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8) | ||
132 | |||
133 | mov _buffer(job), p | ||
134 | mov p, _args_data_ptr(state, lane, 8) | ||
135 | |||
136 | cmp $0xFF, unused_lanes | ||
137 | jne return_null | ||
138 | |||
139 | start_loop: | ||
140 | |||
141 | # Find min length | ||
142 | mov _lens+0*8(state),lens0 | ||
143 | mov lens0,idx | ||
144 | mov _lens+1*8(state),lens1 | ||
145 | cmp idx,lens1 | ||
146 | cmovb lens1, idx | ||
147 | mov _lens+2*8(state),lens2 | ||
148 | cmp idx,lens2 | ||
149 | cmovb lens2,idx | ||
150 | mov _lens+3*8(state),lens3 | ||
151 | cmp idx,lens3 | ||
152 | cmovb lens3,idx | ||
153 | mov idx,len2 | ||
154 | and $0xF,idx | ||
155 | and $~0xFF,len2 | ||
156 | jz len_is_0 | ||
157 | |||
158 | sub len2,lens0 | ||
159 | sub len2,lens1 | ||
160 | sub len2,lens2 | ||
161 | sub len2,lens3 | ||
162 | shr $32,len2 | ||
163 | mov lens0, _lens + 0*8(state) | ||
164 | mov lens1, _lens + 1*8(state) | ||
165 | mov lens2, _lens + 2*8(state) | ||
166 | mov lens3, _lens + 3*8(state) | ||
167 | |||
168 | # "state" and "args" are the same address, arg1 | ||
169 | # len is arg2 | ||
170 | call sha512_x4_avx2 | ||
171 | # state and idx are intact | ||
172 | |||
173 | len_is_0: | ||
174 | |||
175 | # process completed job "idx" | ||
176 | imul $_LANE_DATA_size, idx, lane_data | ||
177 | lea _ldata(state, lane_data), lane_data | ||
178 | |||
179 | mov _job_in_lane(lane_data), job_rax | ||
180 | mov _unused_lanes(state), unused_lanes | ||
181 | movq $0, _job_in_lane(lane_data) | ||
182 | movl $STS_COMPLETED, _status(job_rax) | ||
183 | shl $8, unused_lanes | ||
184 | or idx, unused_lanes | ||
185 | mov unused_lanes, _unused_lanes(state) | ||
186 | |||
187 | movl $0xFFFFFFFF,_lens+4(state,idx,8) | ||
188 | vmovq _args_digest+0*32(state , idx, 8), %xmm0 | ||
189 | vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0 | ||
190 | vmovq _args_digest+2*32(state , idx, 8), %xmm1 | ||
191 | vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1 | ||
192 | vmovq _args_digest+4*32(state , idx, 8), %xmm2 | ||
193 | vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2 | ||
194 | vmovq _args_digest+6*32(state , idx, 8), %xmm3 | ||
195 | vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3 | ||
196 | |||
197 | vmovdqu %xmm0, _result_digest + 0*16(job_rax) | ||
198 | vmovdqu %xmm1, _result_digest + 1*16(job_rax) | ||
199 | vmovdqu %xmm2, _result_digest + 2*16(job_rax) | ||
200 | vmovdqu %xmm3, _result_digest + 3*16(job_rax) | ||
201 | |||
202 | return: | ||
203 | pop %r12 | ||
204 | pop %rbx | ||
205 | FRAME_END | ||
206 | ret | ||
207 | |||
208 | return_null: | ||
209 | xor job_rax, job_rax | ||
210 | jmp return | ||
211 | ENDPROC(sha512_mb_mgr_submit_avx2) | ||
212 | |||
213 | /* UNUSED? | ||
214 | .section .rodata.cst16, "aM", @progbits, 16 | ||
215 | .align 16 | ||
216 | H0: .int 0x6a09e667 | ||
217 | H1: .int 0xbb67ae85 | ||
218 | H2: .int 0x3c6ef372 | ||
219 | H3: .int 0xa54ff53a | ||
220 | H4: .int 0x510e527f | ||
221 | H5: .int 0x9b05688c | ||
222 | H6: .int 0x1f83d9ab | ||
223 | H7: .int 0x5be0cd19 | ||
224 | */ | ||
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S deleted file mode 100644 index e22e907643a6..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S +++ /dev/null | |||
@@ -1,531 +0,0 @@ | |||
1 | /* | ||
2 | * Multi-buffer SHA512 algorithm hash compute routine | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2016 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Megha Dey <megha.dey@linux.intel.com> | ||
22 | * | ||
23 | * BSD LICENSE | ||
24 | * | ||
25 | * Copyright(c) 2016 Intel Corporation. | ||
26 | * | ||
27 | * Redistribution and use in source and binary forms, with or without | ||
28 | * modification, are permitted provided that the following conditions | ||
29 | * are met: | ||
30 | * | ||
31 | * * Redistributions of source code must retain the above copyright | ||
32 | * notice, this list of conditions and the following disclaimer. | ||
33 | * * Redistributions in binary form must reproduce the above copyright | ||
34 | * notice, this list of conditions and the following disclaimer in | ||
35 | * the documentation and/or other materials provided with the | ||
36 | * distribution. | ||
37 | * * Neither the name of Intel Corporation nor the names of its | ||
38 | * contributors may be used to endorse or promote products derived | ||
39 | * from this software without specific prior written permission. | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
42 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
43 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
44 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
45 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
48 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
49 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
50 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
51 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
52 | */ | ||
53 | |||
54 | # code to compute quad SHA512 using AVX2 | ||
55 | # use YMMs to tackle the larger digest size | ||
56 | # outer calling routine takes care of save and restore of XMM registers | ||
57 | # Logic designed/laid out by JDG | ||
58 | |||
59 | # Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15 | ||
60 | # Stack must be aligned to 32 bytes before call | ||
61 | # Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12 | ||
62 | # Linux preserves: rcx rdx rdi rbp r13 r14 r15 | ||
63 | # clobbers ymm0-15 | ||
64 | |||
65 | #include <linux/linkage.h> | ||
66 | #include "sha512_mb_mgr_datastruct.S" | ||
67 | |||
68 | arg1 = %rdi | ||
69 | arg2 = %rsi | ||
70 | |||
71 | # Common definitions | ||
72 | STATE = arg1 | ||
73 | INP_SIZE = arg2 | ||
74 | |||
75 | IDX = %rax | ||
76 | ROUND = %rbx | ||
77 | TBL = %r8 | ||
78 | |||
79 | inp0 = %r9 | ||
80 | inp1 = %r10 | ||
81 | inp2 = %r11 | ||
82 | inp3 = %r12 | ||
83 | |||
84 | a = %ymm0 | ||
85 | b = %ymm1 | ||
86 | c = %ymm2 | ||
87 | d = %ymm3 | ||
88 | e = %ymm4 | ||
89 | f = %ymm5 | ||
90 | g = %ymm6 | ||
91 | h = %ymm7 | ||
92 | |||
93 | a0 = %ymm8 | ||
94 | a1 = %ymm9 | ||
95 | a2 = %ymm10 | ||
96 | |||
97 | TT0 = %ymm14 | ||
98 | TT1 = %ymm13 | ||
99 | TT2 = %ymm12 | ||
100 | TT3 = %ymm11 | ||
101 | TT4 = %ymm10 | ||
102 | TT5 = %ymm9 | ||
103 | |||
104 | T1 = %ymm14 | ||
105 | TMP = %ymm15 | ||
106 | |||
107 | # Define stack usage | ||
108 | STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24 | ||
109 | |||
110 | #define VMOVPD vmovupd | ||
111 | _digest = SZ4*16 | ||
112 | |||
113 | # transpose r0, r1, r2, r3, t0, t1 | ||
114 | # "transpose" data in {r0..r3} using temps {t0..t3} | ||
115 | # Input looks like: {r0 r1 r2 r3} | ||
116 | # r0 = {a7 a6 a5 a4 a3 a2 a1 a0} | ||
117 | # r1 = {b7 b6 b5 b4 b3 b2 b1 b0} | ||
118 | # r2 = {c7 c6 c5 c4 c3 c2 c1 c0} | ||
119 | # r3 = {d7 d6 d5 d4 d3 d2 d1 d0} | ||
120 | # | ||
121 | # output looks like: {t0 r1 r0 r3} | ||
122 | # t0 = {d1 d0 c1 c0 b1 b0 a1 a0} | ||
123 | # r1 = {d3 d2 c3 c2 b3 b2 a3 a2} | ||
124 | # r0 = {d5 d4 c5 c4 b5 b4 a5 a4} | ||
125 | # r3 = {d7 d6 c7 c6 b7 b6 a7 a6} | ||
126 | |||
127 | .macro TRANSPOSE r0 r1 r2 r3 t0 t1 | ||
128 | vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} | ||
129 | vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} | ||
130 | vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} | ||
131 | vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} | ||
132 | |||
133 | vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6 | ||
134 | vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2 | ||
135 | vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5 | ||
136 | vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1 | ||
137 | .endm | ||
138 | |||
139 | .macro ROTATE_ARGS | ||
140 | TMP_ = h | ||
141 | h = g | ||
142 | g = f | ||
143 | f = e | ||
144 | e = d | ||
145 | d = c | ||
146 | c = b | ||
147 | b = a | ||
148 | a = TMP_ | ||
149 | .endm | ||
150 | |||
151 | # PRORQ reg, imm, tmp | ||
152 | # packed-rotate-right-double | ||
153 | # does a rotate by doing two shifts and an or | ||
154 | .macro _PRORQ reg imm tmp | ||
155 | vpsllq $(64-\imm),\reg,\tmp | ||
156 | vpsrlq $\imm,\reg, \reg | ||
157 | vpor \tmp,\reg, \reg | ||
158 | .endm | ||
159 | |||
160 | # non-destructive | ||
161 | # PRORQ_nd reg, imm, tmp, src | ||
162 | .macro _PRORQ_nd reg imm tmp src | ||
163 | vpsllq $(64-\imm), \src, \tmp | ||
164 | vpsrlq $\imm, \src, \reg | ||
165 | vpor \tmp, \reg, \reg | ||
166 | .endm | ||
167 | |||
168 | # PRORQ dst/src, amt | ||
169 | .macro PRORQ reg imm | ||
170 | _PRORQ \reg, \imm, TMP | ||
171 | .endm | ||
172 | |||
173 | # PRORQ_nd dst, src, amt | ||
174 | .macro PRORQ_nd reg tmp imm | ||
175 | _PRORQ_nd \reg, \imm, TMP, \tmp | ||
176 | .endm | ||
177 | |||
178 | #; arguments passed implicitly in preprocessor symbols i, a...h | ||
179 | .macro ROUND_00_15 _T1 i | ||
180 | PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4) | ||
181 | |||
182 | vpxor g, f, a2 # ch: a2 = f^g | ||
183 | vpand e,a2, a2 # ch: a2 = (f^g)&e | ||
184 | vpxor g, a2, a2 # a2 = ch | ||
185 | |||
186 | PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25) | ||
187 | |||
188 | offset = SZ4*(\i & 0xf) | ||
189 | vmovdqu \_T1,offset(%rsp) | ||
190 | vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K | ||
191 | vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) | ||
192 | PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11) | ||
193 | vpaddq a2, h, h # h = h + ch | ||
194 | PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11) | ||
195 | vpaddq \_T1,h, h # h = h + ch + W + K | ||
196 | vpxor a1, a0, a0 # a0 = sigma1 | ||
197 | vmovdqu a,\_T1 | ||
198 | PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22) | ||
199 | vpxor c, \_T1, \_T1 # maj: T1 = a^c | ||
200 | add $SZ4, ROUND # ROUND++ | ||
201 | vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b | ||
202 | vpaddq a0, h, h | ||
203 | vpaddq h, d, d | ||
204 | vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) | ||
205 | PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13) | ||
206 | vpxor a1, a2, a2 # a2 = sig0 | ||
207 | vpand c, a, a1 # maj: a1 = a&c | ||
208 | vpor \_T1, a1, a1 # a1 = maj | ||
209 | vpaddq a1, h, h # h = h + ch + W + K + maj | ||
210 | vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0 | ||
211 | ROTATE_ARGS | ||
212 | .endm | ||
213 | |||
214 | |||
215 | #; arguments passed implicitly in preprocessor symbols i, a...h | ||
216 | .macro ROUND_16_XX _T1 i | ||
217 | vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1 | ||
218 | vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1 | ||
219 | vmovdqu \_T1, a0 | ||
220 | PRORQ \_T1,7 | ||
221 | vmovdqu a1, a2 | ||
222 | PRORQ a1,42 | ||
223 | vpxor a0, \_T1, \_T1 | ||
224 | PRORQ \_T1, 1 | ||
225 | vpxor a2, a1, a1 | ||
226 | PRORQ a1, 19 | ||
227 | vpsrlq $7, a0, a0 | ||
228 | vpxor a0, \_T1, \_T1 | ||
229 | vpsrlq $6, a2, a2 | ||
230 | vpxor a2, a1, a1 | ||
231 | vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1 | ||
232 | vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1 | ||
233 | vpaddq a1, \_T1, \_T1 | ||
234 | |||
235 | ROUND_00_15 \_T1,\i | ||
236 | .endm | ||
237 | |||
238 | |||
239 | # void sha512_x4_avx2(void *STATE, const int INP_SIZE) | ||
240 | # arg 1 : STATE : pointer to input data | ||
241 | # arg 2 : INP_SIZE : size of data in blocks (assumed >= 1) | ||
242 | ENTRY(sha512_x4_avx2) | ||
243 | # general registers preserved in outer calling routine | ||
244 | # outer calling routine saves all the XMM registers | ||
245 | # save callee-saved clobbered registers to comply with C function ABI | ||
246 | push %r12 | ||
247 | push %r13 | ||
248 | push %r14 | ||
249 | push %r15 | ||
250 | |||
251 | sub $STACK_SPACE1, %rsp | ||
252 | |||
253 | # Load the pre-transposed incoming digest. | ||
254 | vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a | ||
255 | vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b | ||
256 | vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c | ||
257 | vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d | ||
258 | vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e | ||
259 | vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f | ||
260 | vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g | ||
261 | vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h | ||
262 | |||
263 | lea K512_4(%rip),TBL | ||
264 | |||
265 | # load the address of each of the 4 message lanes | ||
266 | # getting ready to transpose input onto stack | ||
267 | mov _data_ptr+0*PTR_SZ(STATE),inp0 | ||
268 | mov _data_ptr+1*PTR_SZ(STATE),inp1 | ||
269 | mov _data_ptr+2*PTR_SZ(STATE),inp2 | ||
270 | mov _data_ptr+3*PTR_SZ(STATE),inp3 | ||
271 | |||
272 | xor IDX, IDX | ||
273 | lloop: | ||
274 | xor ROUND, ROUND | ||
275 | |||
276 | # save old digest | ||
277 | vmovdqu a, _digest(%rsp) | ||
278 | vmovdqu b, _digest+1*SZ4(%rsp) | ||
279 | vmovdqu c, _digest+2*SZ4(%rsp) | ||
280 | vmovdqu d, _digest+3*SZ4(%rsp) | ||
281 | vmovdqu e, _digest+4*SZ4(%rsp) | ||
282 | vmovdqu f, _digest+5*SZ4(%rsp) | ||
283 | vmovdqu g, _digest+6*SZ4(%rsp) | ||
284 | vmovdqu h, _digest+7*SZ4(%rsp) | ||
285 | i = 0 | ||
286 | .rep 4 | ||
287 | vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP | ||
288 | VMOVPD i*32(inp0, IDX), TT2 | ||
289 | VMOVPD i*32(inp1, IDX), TT1 | ||
290 | VMOVPD i*32(inp2, IDX), TT4 | ||
291 | VMOVPD i*32(inp3, IDX), TT3 | ||
292 | TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5 | ||
293 | vpshufb TMP, TT0, TT0 | ||
294 | vpshufb TMP, TT1, TT1 | ||
295 | vpshufb TMP, TT2, TT2 | ||
296 | vpshufb TMP, TT3, TT3 | ||
297 | ROUND_00_15 TT0,(i*4+0) | ||
298 | ROUND_00_15 TT1,(i*4+1) | ||
299 | ROUND_00_15 TT2,(i*4+2) | ||
300 | ROUND_00_15 TT3,(i*4+3) | ||
301 | i = (i+1) | ||
302 | .endr | ||
303 | add $128, IDX | ||
304 | |||
305 | i = (i*4) | ||
306 | |||
307 | jmp Lrounds_16_xx | ||
308 | .align 16 | ||
309 | Lrounds_16_xx: | ||
310 | .rep 16 | ||
311 | ROUND_16_XX T1, i | ||
312 | i = (i+1) | ||
313 | .endr | ||
314 | cmp $0xa00,ROUND | ||
315 | jb Lrounds_16_xx | ||
316 | |||
317 | # add old digest | ||
318 | vpaddq _digest(%rsp), a, a | ||
319 | vpaddq _digest+1*SZ4(%rsp), b, b | ||
320 | vpaddq _digest+2*SZ4(%rsp), c, c | ||
321 | vpaddq _digest+3*SZ4(%rsp), d, d | ||
322 | vpaddq _digest+4*SZ4(%rsp), e, e | ||
323 | vpaddq _digest+5*SZ4(%rsp), f, f | ||
324 | vpaddq _digest+6*SZ4(%rsp), g, g | ||
325 | vpaddq _digest+7*SZ4(%rsp), h, h | ||
326 | |||
327 | sub $1, INP_SIZE # unit is blocks | ||
328 | jne lloop | ||
329 | |||
330 | # write back to memory (state object) the transposed digest | ||
331 | vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE) | ||
332 | vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE) | ||
333 | vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE) | ||
334 | vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE) | ||
335 | vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE) | ||
336 | vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE) | ||
337 | vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE) | ||
338 | vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE) | ||
339 | |||
340 | # update input data pointers | ||
341 | add IDX, inp0 | ||
342 | mov inp0, _data_ptr+0*PTR_SZ(STATE) | ||
343 | add IDX, inp1 | ||
344 | mov inp1, _data_ptr+1*PTR_SZ(STATE) | ||
345 | add IDX, inp2 | ||
346 | mov inp2, _data_ptr+2*PTR_SZ(STATE) | ||
347 | add IDX, inp3 | ||
348 | mov inp3, _data_ptr+3*PTR_SZ(STATE) | ||
349 | |||
350 | #;;;;;;;;;;;;;;; | ||
351 | #; Postamble | ||
352 | add $STACK_SPACE1, %rsp | ||
353 | # restore callee-saved clobbered registers | ||
354 | |||
355 | pop %r15 | ||
356 | pop %r14 | ||
357 | pop %r13 | ||
358 | pop %r12 | ||
359 | |||
360 | # outer calling routine restores XMM and other GP registers | ||
361 | ret | ||
362 | ENDPROC(sha512_x4_avx2) | ||
363 | |||
364 | .section .rodata.K512_4, "a", @progbits | ||
365 | .align 64 | ||
366 | K512_4: | ||
367 | .octa 0x428a2f98d728ae22428a2f98d728ae22,\ | ||
368 | 0x428a2f98d728ae22428a2f98d728ae22 | ||
369 | .octa 0x7137449123ef65cd7137449123ef65cd,\ | ||
370 | 0x7137449123ef65cd7137449123ef65cd | ||
371 | .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\ | ||
372 | 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f | ||
373 | .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\ | ||
374 | 0xe9b5dba58189dbbce9b5dba58189dbbc | ||
375 | .octa 0x3956c25bf348b5383956c25bf348b538,\ | ||
376 | 0x3956c25bf348b5383956c25bf348b538 | ||
377 | .octa 0x59f111f1b605d01959f111f1b605d019,\ | ||
378 | 0x59f111f1b605d01959f111f1b605d019 | ||
379 | .octa 0x923f82a4af194f9b923f82a4af194f9b,\ | ||
380 | 0x923f82a4af194f9b923f82a4af194f9b | ||
381 | .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\ | ||
382 | 0xab1c5ed5da6d8118ab1c5ed5da6d8118 | ||
383 | .octa 0xd807aa98a3030242d807aa98a3030242,\ | ||
384 | 0xd807aa98a3030242d807aa98a3030242 | ||
385 | .octa 0x12835b0145706fbe12835b0145706fbe,\ | ||
386 | 0x12835b0145706fbe12835b0145706fbe | ||
387 | .octa 0x243185be4ee4b28c243185be4ee4b28c,\ | ||
388 | 0x243185be4ee4b28c243185be4ee4b28c | ||
389 | .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\ | ||
390 | 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2 | ||
391 | .octa 0x72be5d74f27b896f72be5d74f27b896f,\ | ||
392 | 0x72be5d74f27b896f72be5d74f27b896f | ||
393 | .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\ | ||
394 | 0x80deb1fe3b1696b180deb1fe3b1696b1 | ||
395 | .octa 0x9bdc06a725c712359bdc06a725c71235,\ | ||
396 | 0x9bdc06a725c712359bdc06a725c71235 | ||
397 | .octa 0xc19bf174cf692694c19bf174cf692694,\ | ||
398 | 0xc19bf174cf692694c19bf174cf692694 | ||
399 | .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\ | ||
400 | 0xe49b69c19ef14ad2e49b69c19ef14ad2 | ||
401 | .octa 0xefbe4786384f25e3efbe4786384f25e3,\ | ||
402 | 0xefbe4786384f25e3efbe4786384f25e3 | ||
403 | .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\ | ||
404 | 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5 | ||
405 | .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\ | ||
406 | 0x240ca1cc77ac9c65240ca1cc77ac9c65 | ||
407 | .octa 0x2de92c6f592b02752de92c6f592b0275,\ | ||
408 | 0x2de92c6f592b02752de92c6f592b0275 | ||
409 | .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\ | ||
410 | 0x4a7484aa6ea6e4834a7484aa6ea6e483 | ||
411 | .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\ | ||
412 | 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4 | ||
413 | .octa 0x76f988da831153b576f988da831153b5,\ | ||
414 | 0x76f988da831153b576f988da831153b5 | ||
415 | .octa 0x983e5152ee66dfab983e5152ee66dfab,\ | ||
416 | 0x983e5152ee66dfab983e5152ee66dfab | ||
417 | .octa 0xa831c66d2db43210a831c66d2db43210,\ | ||
418 | 0xa831c66d2db43210a831c66d2db43210 | ||
419 | .octa 0xb00327c898fb213fb00327c898fb213f,\ | ||
420 | 0xb00327c898fb213fb00327c898fb213f | ||
421 | .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\ | ||
422 | 0xbf597fc7beef0ee4bf597fc7beef0ee4 | ||
423 | .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\ | ||
424 | 0xc6e00bf33da88fc2c6e00bf33da88fc2 | ||
425 | .octa 0xd5a79147930aa725d5a79147930aa725,\ | ||
426 | 0xd5a79147930aa725d5a79147930aa725 | ||
427 | .octa 0x06ca6351e003826f06ca6351e003826f,\ | ||
428 | 0x06ca6351e003826f06ca6351e003826f | ||
429 | .octa 0x142929670a0e6e70142929670a0e6e70,\ | ||
430 | 0x142929670a0e6e70142929670a0e6e70 | ||
431 | .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\ | ||
432 | 0x27b70a8546d22ffc27b70a8546d22ffc | ||
433 | .octa 0x2e1b21385c26c9262e1b21385c26c926,\ | ||
434 | 0x2e1b21385c26c9262e1b21385c26c926 | ||
435 | .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\ | ||
436 | 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed | ||
437 | .octa 0x53380d139d95b3df53380d139d95b3df,\ | ||
438 | 0x53380d139d95b3df53380d139d95b3df | ||
439 | .octa 0x650a73548baf63de650a73548baf63de,\ | ||
440 | 0x650a73548baf63de650a73548baf63de | ||
441 | .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\ | ||
442 | 0x766a0abb3c77b2a8766a0abb3c77b2a8 | ||
443 | .octa 0x81c2c92e47edaee681c2c92e47edaee6,\ | ||
444 | 0x81c2c92e47edaee681c2c92e47edaee6 | ||
445 | .octa 0x92722c851482353b92722c851482353b,\ | ||
446 | 0x92722c851482353b92722c851482353b | ||
447 | .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\ | ||
448 | 0xa2bfe8a14cf10364a2bfe8a14cf10364 | ||
449 | .octa 0xa81a664bbc423001a81a664bbc423001,\ | ||
450 | 0xa81a664bbc423001a81a664bbc423001 | ||
451 | .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\ | ||
452 | 0xc24b8b70d0f89791c24b8b70d0f89791 | ||
453 | .octa 0xc76c51a30654be30c76c51a30654be30,\ | ||
454 | 0xc76c51a30654be30c76c51a30654be30 | ||
455 | .octa 0xd192e819d6ef5218d192e819d6ef5218,\ | ||
456 | 0xd192e819d6ef5218d192e819d6ef5218 | ||
457 | .octa 0xd69906245565a910d69906245565a910,\ | ||
458 | 0xd69906245565a910d69906245565a910 | ||
459 | .octa 0xf40e35855771202af40e35855771202a,\ | ||
460 | 0xf40e35855771202af40e35855771202a | ||
461 | .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\ | ||
462 | 0x106aa07032bbd1b8106aa07032bbd1b8 | ||
463 | .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\ | ||
464 | 0x19a4c116b8d2d0c819a4c116b8d2d0c8 | ||
465 | .octa 0x1e376c085141ab531e376c085141ab53,\ | ||
466 | 0x1e376c085141ab531e376c085141ab53 | ||
467 | .octa 0x2748774cdf8eeb992748774cdf8eeb99,\ | ||
468 | 0x2748774cdf8eeb992748774cdf8eeb99 | ||
469 | .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\ | ||
470 | 0x34b0bcb5e19b48a834b0bcb5e19b48a8 | ||
471 | .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\ | ||
472 | 0x391c0cb3c5c95a63391c0cb3c5c95a63 | ||
473 | .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\ | ||
474 | 0x4ed8aa4ae3418acb4ed8aa4ae3418acb | ||
475 | .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\ | ||
476 | 0x5b9cca4f7763e3735b9cca4f7763e373 | ||
477 | .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\ | ||
478 | 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3 | ||
479 | .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\ | ||
480 | 0x748f82ee5defb2fc748f82ee5defb2fc | ||
481 | .octa 0x78a5636f43172f6078a5636f43172f60,\ | ||
482 | 0x78a5636f43172f6078a5636f43172f60 | ||
483 | .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\ | ||
484 | 0x84c87814a1f0ab7284c87814a1f0ab72 | ||
485 | .octa 0x8cc702081a6439ec8cc702081a6439ec,\ | ||
486 | 0x8cc702081a6439ec8cc702081a6439ec | ||
487 | .octa 0x90befffa23631e2890befffa23631e28,\ | ||
488 | 0x90befffa23631e2890befffa23631e28 | ||
489 | .octa 0xa4506cebde82bde9a4506cebde82bde9,\ | ||
490 | 0xa4506cebde82bde9a4506cebde82bde9 | ||
491 | .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\ | ||
492 | 0xbef9a3f7b2c67915bef9a3f7b2c67915 | ||
493 | .octa 0xc67178f2e372532bc67178f2e372532b,\ | ||
494 | 0xc67178f2e372532bc67178f2e372532b | ||
495 | .octa 0xca273eceea26619cca273eceea26619c,\ | ||
496 | 0xca273eceea26619cca273eceea26619c | ||
497 | .octa 0xd186b8c721c0c207d186b8c721c0c207,\ | ||
498 | 0xd186b8c721c0c207d186b8c721c0c207 | ||
499 | .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\ | ||
500 | 0xeada7dd6cde0eb1eeada7dd6cde0eb1e | ||
501 | .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\ | ||
502 | 0xf57d4f7fee6ed178f57d4f7fee6ed178 | ||
503 | .octa 0x06f067aa72176fba06f067aa72176fba,\ | ||
504 | 0x06f067aa72176fba06f067aa72176fba | ||
505 | .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\ | ||
506 | 0x0a637dc5a2c898a60a637dc5a2c898a6 | ||
507 | .octa 0x113f9804bef90dae113f9804bef90dae,\ | ||
508 | 0x113f9804bef90dae113f9804bef90dae | ||
509 | .octa 0x1b710b35131c471b1b710b35131c471b,\ | ||
510 | 0x1b710b35131c471b1b710b35131c471b | ||
511 | .octa 0x28db77f523047d8428db77f523047d84,\ | ||
512 | 0x28db77f523047d8428db77f523047d84 | ||
513 | .octa 0x32caab7b40c7249332caab7b40c72493,\ | ||
514 | 0x32caab7b40c7249332caab7b40c72493 | ||
515 | .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\ | ||
516 | 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc | ||
517 | .octa 0x431d67c49c100d4c431d67c49c100d4c,\ | ||
518 | 0x431d67c49c100d4c431d67c49c100d4c | ||
519 | .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\ | ||
520 | 0x4cc5d4becb3e42b64cc5d4becb3e42b6 | ||
521 | .octa 0x597f299cfc657e2a597f299cfc657e2a,\ | ||
522 | 0x597f299cfc657e2a597f299cfc657e2a | ||
523 | .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\ | ||
524 | 0x5fcb6fab3ad6faec5fcb6fab3ad6faec | ||
525 | .octa 0x6c44198c4a4758176c44198c4a475817,\ | ||
526 | 0x6c44198c4a4758176c44198c4a475817 | ||
527 | |||
528 | .section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 | ||
529 | .align 32 | ||
530 | PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 | ||
531 | .octa 0x18191a1b1c1d1e1f1011121314151617 | ||
diff --git a/crypto/Kconfig b/crypto/Kconfig index 59e32623a7ce..90f2811fac5f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -213,20 +213,6 @@ config CRYPTO_CRYPTD | |||
213 | converts an arbitrary synchronous software crypto algorithm | 213 | converts an arbitrary synchronous software crypto algorithm |
214 | into an asynchronous algorithm that executes in a kernel thread. | 214 | into an asynchronous algorithm that executes in a kernel thread. |
215 | 215 | ||
216 | config CRYPTO_MCRYPTD | ||
217 | tristate "Software async multi-buffer crypto daemon" | ||
218 | select CRYPTO_BLKCIPHER | ||
219 | select CRYPTO_HASH | ||
220 | select CRYPTO_MANAGER | ||
221 | select CRYPTO_WORKQUEUE | ||
222 | help | ||
223 | This is a generic software asynchronous crypto daemon that | ||
224 | provides the kernel thread to assist multi-buffer crypto | ||
225 | algorithms for submitting jobs and flushing jobs in multi-buffer | ||
226 | crypto algorithms. Multi-buffer crypto algorithms are executed | ||
227 | in the context of this kernel thread and drivers can post | ||
228 | their crypto request asynchronously to be processed by this daemon. | ||
229 | |||
230 | config CRYPTO_AUTHENC | 216 | config CRYPTO_AUTHENC |
231 | tristate "Authenc support" | 217 | tristate "Authenc support" |
232 | select CRYPTO_AEAD | 218 | select CRYPTO_AEAD |
@@ -848,54 +834,6 @@ config CRYPTO_SHA1_PPC_SPE | |||
848 | SHA-1 secure hash standard (DFIPS 180-4) implemented | 834 | SHA-1 secure hash standard (DFIPS 180-4) implemented |
849 | using powerpc SPE SIMD instruction set. | 835 | using powerpc SPE SIMD instruction set. |
850 | 836 | ||
851 | config CRYPTO_SHA1_MB | ||
852 | tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" | ||
853 | depends on X86 && 64BIT | ||
854 | select CRYPTO_SHA1 | ||
855 | select CRYPTO_HASH | ||
856 | select CRYPTO_MCRYPTD | ||
857 | help | ||
858 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
859 | using multi-buffer technique. This algorithm computes on | ||
860 | multiple data lanes concurrently with SIMD instructions for | ||
861 | better throughput. It should not be enabled by default but | ||
862 | used when there is significant amount of work to keep the keep | ||
863 | the data lanes filled to get performance benefit. If the data | ||
864 | lanes remain unfilled, a flush operation will be initiated to | ||
865 | process the crypto jobs, adding a slight latency. | ||
866 | |||
867 | config CRYPTO_SHA256_MB | ||
868 | tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)" | ||
869 | depends on X86 && 64BIT | ||
870 | select CRYPTO_SHA256 | ||
871 | select CRYPTO_HASH | ||
872 | select CRYPTO_MCRYPTD | ||
873 | help | ||
874 | SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
875 | using multi-buffer technique. This algorithm computes on | ||
876 | multiple data lanes concurrently with SIMD instructions for | ||
877 | better throughput. It should not be enabled by default but | ||
878 | used when there is significant amount of work to keep the keep | ||
879 | the data lanes filled to get performance benefit. If the data | ||
880 | lanes remain unfilled, a flush operation will be initiated to | ||
881 | process the crypto jobs, adding a slight latency. | ||
882 | |||
883 | config CRYPTO_SHA512_MB | ||
884 | tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)" | ||
885 | depends on X86 && 64BIT | ||
886 | select CRYPTO_SHA512 | ||
887 | select CRYPTO_HASH | ||
888 | select CRYPTO_MCRYPTD | ||
889 | help | ||
890 | SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
891 | using multi-buffer technique. This algorithm computes on | ||
892 | multiple data lanes concurrently with SIMD instructions for | ||
893 | better throughput. It should not be enabled by default but | ||
894 | used when there is significant amount of work to keep the keep | ||
895 | the data lanes filled to get performance benefit. If the data | ||
896 | lanes remain unfilled, a flush operation will be initiated to | ||
897 | process the crypto jobs, adding a slight latency. | ||
898 | |||
899 | config CRYPTO_SHA256 | 837 | config CRYPTO_SHA256 |
900 | tristate "SHA224 and SHA256 digest algorithm" | 838 | tristate "SHA224 and SHA256 digest algorithm" |
901 | select CRYPTO_HASH | 839 | select CRYPTO_HASH |
diff --git a/crypto/Makefile b/crypto/Makefile index f6a234d08882..d719843f8b6e 100644 --- a/crypto/Makefile +++ b/crypto/Makefile | |||
@@ -93,7 +93,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o | |||
93 | obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o | 93 | obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o |
94 | obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o | 94 | obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o |
95 | obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o | 95 | obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o |
96 | obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o | ||
97 | obj-$(CONFIG_CRYPTO_DES) += des_generic.o | 96 | obj-$(CONFIG_CRYPTO_DES) += des_generic.o |
98 | obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o | 97 | obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o |
99 | obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o | 98 | obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o |
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c deleted file mode 100644 index f14152147ce8..000000000000 --- a/crypto/mcryptd.c +++ /dev/null | |||
@@ -1,675 +0,0 @@ | |||
1 | /* | ||
2 | * Software multibuffer async crypto daemon. | ||
3 | * | ||
4 | * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com> | ||
5 | * | ||
6 | * Adapted from crypto daemon. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <crypto/algapi.h> | ||
16 | #include <crypto/internal/hash.h> | ||
17 | #include <crypto/internal/aead.h> | ||
18 | #include <crypto/mcryptd.h> | ||
19 | #include <crypto/crypto_wq.h> | ||
20 | #include <linux/err.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/scatterlist.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/sched/stat.h> | ||
28 | #include <linux/slab.h> | ||
29 | |||
30 | #define MCRYPTD_MAX_CPU_QLEN 100 | ||
31 | #define MCRYPTD_BATCH 9 | ||
32 | |||
33 | static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, | ||
34 | unsigned int tail); | ||
35 | |||
36 | struct mcryptd_flush_list { | ||
37 | struct list_head list; | ||
38 | struct mutex lock; | ||
39 | }; | ||
40 | |||
41 | static struct mcryptd_flush_list __percpu *mcryptd_flist; | ||
42 | |||
43 | struct hashd_instance_ctx { | ||
44 | struct crypto_ahash_spawn spawn; | ||
45 | struct mcryptd_queue *queue; | ||
46 | }; | ||
47 | |||
48 | static void mcryptd_queue_worker(struct work_struct *work); | ||
49 | |||
50 | void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay) | ||
51 | { | ||
52 | struct mcryptd_flush_list *flist; | ||
53 | |||
54 | if (!cstate->flusher_engaged) { | ||
55 | /* put the flusher on the flush list */ | ||
56 | flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); | ||
57 | mutex_lock(&flist->lock); | ||
58 | list_add_tail(&cstate->flush_list, &flist->list); | ||
59 | cstate->flusher_engaged = true; | ||
60 | cstate->next_flush = jiffies + delay; | ||
61 | queue_delayed_work_on(smp_processor_id(), kcrypto_wq, | ||
62 | &cstate->flush, delay); | ||
63 | mutex_unlock(&flist->lock); | ||
64 | } | ||
65 | } | ||
66 | EXPORT_SYMBOL(mcryptd_arm_flusher); | ||
67 | |||
68 | static int mcryptd_init_queue(struct mcryptd_queue *queue, | ||
69 | unsigned int max_cpu_qlen) | ||
70 | { | ||
71 | int cpu; | ||
72 | struct mcryptd_cpu_queue *cpu_queue; | ||
73 | |||
74 | queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue); | ||
75 | pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue); | ||
76 | if (!queue->cpu_queue) | ||
77 | return -ENOMEM; | ||
78 | for_each_possible_cpu(cpu) { | ||
79 | cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); | ||
80 | pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); | ||
81 | crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); | ||
82 | INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); | ||
83 | spin_lock_init(&cpu_queue->q_lock); | ||
84 | } | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | static void mcryptd_fini_queue(struct mcryptd_queue *queue) | ||
89 | { | ||
90 | int cpu; | ||
91 | struct mcryptd_cpu_queue *cpu_queue; | ||
92 | |||
93 | for_each_possible_cpu(cpu) { | ||
94 | cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); | ||
95 | BUG_ON(cpu_queue->queue.qlen); | ||
96 | } | ||
97 | free_percpu(queue->cpu_queue); | ||
98 | } | ||
99 | |||
100 | static int mcryptd_enqueue_request(struct mcryptd_queue *queue, | ||
101 | struct crypto_async_request *request, | ||
102 | struct mcryptd_hash_request_ctx *rctx) | ||
103 | { | ||
104 | int cpu, err; | ||
105 | struct mcryptd_cpu_queue *cpu_queue; | ||
106 | |||
107 | cpu_queue = raw_cpu_ptr(queue->cpu_queue); | ||
108 | spin_lock(&cpu_queue->q_lock); | ||
109 | cpu = smp_processor_id(); | ||
110 | rctx->tag.cpu = smp_processor_id(); | ||
111 | |||
112 | err = crypto_enqueue_request(&cpu_queue->queue, request); | ||
113 | pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", | ||
114 | cpu, cpu_queue, request); | ||
115 | spin_unlock(&cpu_queue->q_lock); | ||
116 | queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); | ||
117 | |||
118 | return err; | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Try to opportunisticlly flush the partially completed jobs if | ||
123 | * crypto daemon is the only task running. | ||
124 | */ | ||
125 | static void mcryptd_opportunistic_flush(void) | ||
126 | { | ||
127 | struct mcryptd_flush_list *flist; | ||
128 | struct mcryptd_alg_cstate *cstate; | ||
129 | |||
130 | flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); | ||
131 | while (single_task_running()) { | ||
132 | mutex_lock(&flist->lock); | ||
133 | cstate = list_first_entry_or_null(&flist->list, | ||
134 | struct mcryptd_alg_cstate, flush_list); | ||
135 | if (!cstate || !cstate->flusher_engaged) { | ||
136 | mutex_unlock(&flist->lock); | ||
137 | return; | ||
138 | } | ||
139 | list_del(&cstate->flush_list); | ||
140 | cstate->flusher_engaged = false; | ||
141 | mutex_unlock(&flist->lock); | ||
142 | cstate->alg_state->flusher(cstate); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * Called in workqueue context, do one real cryption work (via | ||
148 | * req->complete) and reschedule itself if there are more work to | ||
149 | * do. | ||
150 | */ | ||
151 | static void mcryptd_queue_worker(struct work_struct *work) | ||
152 | { | ||
153 | struct mcryptd_cpu_queue *cpu_queue; | ||
154 | struct crypto_async_request *req, *backlog; | ||
155 | int i; | ||
156 | |||
157 | /* | ||
158 | * Need to loop through more than once for multi-buffer to | ||
159 | * be effective. | ||
160 | */ | ||
161 | |||
162 | cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); | ||
163 | i = 0; | ||
164 | while (i < MCRYPTD_BATCH || single_task_running()) { | ||
165 | |||
166 | spin_lock_bh(&cpu_queue->q_lock); | ||
167 | backlog = crypto_get_backlog(&cpu_queue->queue); | ||
168 | req = crypto_dequeue_request(&cpu_queue->queue); | ||
169 | spin_unlock_bh(&cpu_queue->q_lock); | ||
170 | |||
171 | if (!req) { | ||
172 | mcryptd_opportunistic_flush(); | ||
173 | return; | ||
174 | } | ||
175 | |||
176 | if (backlog) | ||
177 | backlog->complete(backlog, -EINPROGRESS); | ||
178 | req->complete(req, 0); | ||
179 | if (!cpu_queue->queue.qlen) | ||
180 | return; | ||
181 | ++i; | ||
182 | } | ||
183 | if (cpu_queue->queue.qlen) | ||
184 | queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); | ||
185 | } | ||
186 | |||
187 | void mcryptd_flusher(struct work_struct *__work) | ||
188 | { | ||
189 | struct mcryptd_alg_cstate *alg_cpu_state; | ||
190 | struct mcryptd_alg_state *alg_state; | ||
191 | struct mcryptd_flush_list *flist; | ||
192 | int cpu; | ||
193 | |||
194 | cpu = smp_processor_id(); | ||
195 | alg_cpu_state = container_of(to_delayed_work(__work), | ||
196 | struct mcryptd_alg_cstate, flush); | ||
197 | alg_state = alg_cpu_state->alg_state; | ||
198 | if (alg_cpu_state->cpu != cpu) | ||
199 | pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n", | ||
200 | cpu, alg_cpu_state->cpu); | ||
201 | |||
202 | if (alg_cpu_state->flusher_engaged) { | ||
203 | flist = per_cpu_ptr(mcryptd_flist, cpu); | ||
204 | mutex_lock(&flist->lock); | ||
205 | list_del(&alg_cpu_state->flush_list); | ||
206 | alg_cpu_state->flusher_engaged = false; | ||
207 | mutex_unlock(&flist->lock); | ||
208 | alg_state->flusher(alg_cpu_state); | ||
209 | } | ||
210 | } | ||
211 | EXPORT_SYMBOL_GPL(mcryptd_flusher); | ||
212 | |||
213 | static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm) | ||
214 | { | ||
215 | struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); | ||
216 | struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst); | ||
217 | |||
218 | return ictx->queue; | ||
219 | } | ||
220 | |||
221 | static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, | ||
222 | unsigned int tail) | ||
223 | { | ||
224 | char *p; | ||
225 | struct crypto_instance *inst; | ||
226 | int err; | ||
227 | |||
228 | p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL); | ||
229 | if (!p) | ||
230 | return ERR_PTR(-ENOMEM); | ||
231 | |||
232 | inst = (void *)(p + head); | ||
233 | |||
234 | err = -ENAMETOOLONG; | ||
235 | if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, | ||
236 | "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) | ||
237 | goto out_free_inst; | ||
238 | |||
239 | memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); | ||
240 | |||
241 | inst->alg.cra_priority = alg->cra_priority + 50; | ||
242 | inst->alg.cra_blocksize = alg->cra_blocksize; | ||
243 | inst->alg.cra_alignmask = alg->cra_alignmask; | ||
244 | |||
245 | out: | ||
246 | return p; | ||
247 | |||
248 | out_free_inst: | ||
249 | kfree(p); | ||
250 | p = ERR_PTR(err); | ||
251 | goto out; | ||
252 | } | ||
253 | |||
254 | static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type, | ||
255 | u32 *mask) | ||
256 | { | ||
257 | struct crypto_attr_type *algt; | ||
258 | |||
259 | algt = crypto_get_attr_type(tb); | ||
260 | if (IS_ERR(algt)) | ||
261 | return false; | ||
262 | |||
263 | *type |= algt->type & CRYPTO_ALG_INTERNAL; | ||
264 | *mask |= algt->mask & CRYPTO_ALG_INTERNAL; | ||
265 | |||
266 | if (*type & *mask & CRYPTO_ALG_INTERNAL) | ||
267 | return true; | ||
268 | else | ||
269 | return false; | ||
270 | } | ||
271 | |||
272 | static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) | ||
273 | { | ||
274 | struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); | ||
275 | struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst); | ||
276 | struct crypto_ahash_spawn *spawn = &ictx->spawn; | ||
277 | struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); | ||
278 | struct crypto_ahash *hash; | ||
279 | |||
280 | hash = crypto_spawn_ahash(spawn); | ||
281 | if (IS_ERR(hash)) | ||
282 | return PTR_ERR(hash); | ||
283 | |||
284 | ctx->child = hash; | ||
285 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
286 | sizeof(struct mcryptd_hash_request_ctx) + | ||
287 | crypto_ahash_reqsize(hash)); | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm) | ||
292 | { | ||
293 | struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); | ||
294 | |||
295 | crypto_free_ahash(ctx->child); | ||
296 | } | ||
297 | |||
298 | static int mcryptd_hash_setkey(struct crypto_ahash *parent, | ||
299 | const u8 *key, unsigned int keylen) | ||
300 | { | ||
301 | struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); | ||
302 | struct crypto_ahash *child = ctx->child; | ||
303 | int err; | ||
304 | |||
305 | crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
306 | crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) & | ||
307 | CRYPTO_TFM_REQ_MASK); | ||
308 | err = crypto_ahash_setkey(child, key, keylen); | ||
309 | crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) & | ||
310 | CRYPTO_TFM_RES_MASK); | ||
311 | return err; | ||
312 | } | ||
313 | |||
314 | static int mcryptd_hash_enqueue(struct ahash_request *req, | ||
315 | crypto_completion_t complete) | ||
316 | { | ||
317 | int ret; | ||
318 | |||
319 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
320 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
321 | struct mcryptd_queue *queue = | ||
322 | mcryptd_get_queue(crypto_ahash_tfm(tfm)); | ||
323 | |||
324 | rctx->complete = req->base.complete; | ||
325 | req->base.complete = complete; | ||
326 | |||
327 | ret = mcryptd_enqueue_request(queue, &req->base, rctx); | ||
328 | |||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | static void mcryptd_hash_init(struct crypto_async_request *req_async, int err) | ||
333 | { | ||
334 | struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); | ||
335 | struct crypto_ahash *child = ctx->child; | ||
336 | struct ahash_request *req = ahash_request_cast(req_async); | ||
337 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
338 | struct ahash_request *desc = &rctx->areq; | ||
339 | |||
340 | if (unlikely(err == -EINPROGRESS)) | ||
341 | goto out; | ||
342 | |||
343 | ahash_request_set_tfm(desc, child); | ||
344 | ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, | ||
345 | rctx->complete, req_async); | ||
346 | |||
347 | rctx->out = req->result; | ||
348 | err = crypto_ahash_init(desc); | ||
349 | |||
350 | out: | ||
351 | local_bh_disable(); | ||
352 | rctx->complete(&req->base, err); | ||
353 | local_bh_enable(); | ||
354 | } | ||
355 | |||
356 | static int mcryptd_hash_init_enqueue(struct ahash_request *req) | ||
357 | { | ||
358 | return mcryptd_hash_enqueue(req, mcryptd_hash_init); | ||
359 | } | ||
360 | |||
361 | static void mcryptd_hash_update(struct crypto_async_request *req_async, int err) | ||
362 | { | ||
363 | struct ahash_request *req = ahash_request_cast(req_async); | ||
364 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
365 | |||
366 | if (unlikely(err == -EINPROGRESS)) | ||
367 | goto out; | ||
368 | |||
369 | rctx->out = req->result; | ||
370 | err = crypto_ahash_update(&rctx->areq); | ||
371 | if (err) { | ||
372 | req->base.complete = rctx->complete; | ||
373 | goto out; | ||
374 | } | ||
375 | |||
376 | return; | ||
377 | out: | ||
378 | local_bh_disable(); | ||
379 | rctx->complete(&req->base, err); | ||
380 | local_bh_enable(); | ||
381 | } | ||
382 | |||
383 | static int mcryptd_hash_update_enqueue(struct ahash_request *req) | ||
384 | { | ||
385 | return mcryptd_hash_enqueue(req, mcryptd_hash_update); | ||
386 | } | ||
387 | |||
388 | static void mcryptd_hash_final(struct crypto_async_request *req_async, int err) | ||
389 | { | ||
390 | struct ahash_request *req = ahash_request_cast(req_async); | ||
391 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
392 | |||
393 | if (unlikely(err == -EINPROGRESS)) | ||
394 | goto out; | ||
395 | |||
396 | rctx->out = req->result; | ||
397 | err = crypto_ahash_final(&rctx->areq); | ||
398 | if (err) { | ||
399 | req->base.complete = rctx->complete; | ||
400 | goto out; | ||
401 | } | ||
402 | |||
403 | return; | ||
404 | out: | ||
405 | local_bh_disable(); | ||
406 | rctx->complete(&req->base, err); | ||
407 | local_bh_enable(); | ||
408 | } | ||
409 | |||
410 | static int mcryptd_hash_final_enqueue(struct ahash_request *req) | ||
411 | { | ||
412 | return mcryptd_hash_enqueue(req, mcryptd_hash_final); | ||
413 | } | ||
414 | |||
415 | static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err) | ||
416 | { | ||
417 | struct ahash_request *req = ahash_request_cast(req_async); | ||
418 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
419 | |||
420 | if (unlikely(err == -EINPROGRESS)) | ||
421 | goto out; | ||
422 | rctx->out = req->result; | ||
423 | err = crypto_ahash_finup(&rctx->areq); | ||
424 | |||
425 | if (err) { | ||
426 | req->base.complete = rctx->complete; | ||
427 | goto out; | ||
428 | } | ||
429 | |||
430 | return; | ||
431 | out: | ||
432 | local_bh_disable(); | ||
433 | rctx->complete(&req->base, err); | ||
434 | local_bh_enable(); | ||
435 | } | ||
436 | |||
437 | static int mcryptd_hash_finup_enqueue(struct ahash_request *req) | ||
438 | { | ||
439 | return mcryptd_hash_enqueue(req, mcryptd_hash_finup); | ||
440 | } | ||
441 | |||
442 | static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err) | ||
443 | { | ||
444 | struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); | ||
445 | struct crypto_ahash *child = ctx->child; | ||
446 | struct ahash_request *req = ahash_request_cast(req_async); | ||
447 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
448 | struct ahash_request *desc = &rctx->areq; | ||
449 | |||
450 | if (unlikely(err == -EINPROGRESS)) | ||
451 | goto out; | ||
452 | |||
453 | ahash_request_set_tfm(desc, child); | ||
454 | ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP, | ||
455 | rctx->complete, req_async); | ||
456 | |||
457 | rctx->out = req->result; | ||
458 | err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc); | ||
459 | |||
460 | out: | ||
461 | local_bh_disable(); | ||
462 | rctx->complete(&req->base, err); | ||
463 | local_bh_enable(); | ||
464 | } | ||
465 | |||
466 | static int mcryptd_hash_digest_enqueue(struct ahash_request *req) | ||
467 | { | ||
468 | return mcryptd_hash_enqueue(req, mcryptd_hash_digest); | ||
469 | } | ||
470 | |||
471 | static int mcryptd_hash_export(struct ahash_request *req, void *out) | ||
472 | { | ||
473 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
474 | |||
475 | return crypto_ahash_export(&rctx->areq, out); | ||
476 | } | ||
477 | |||
478 | static int mcryptd_hash_import(struct ahash_request *req, const void *in) | ||
479 | { | ||
480 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
481 | |||
482 | return crypto_ahash_import(&rctx->areq, in); | ||
483 | } | ||
484 | |||
485 | static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, | ||
486 | struct mcryptd_queue *queue) | ||
487 | { | ||
488 | struct hashd_instance_ctx *ctx; | ||
489 | struct ahash_instance *inst; | ||
490 | struct hash_alg_common *halg; | ||
491 | struct crypto_alg *alg; | ||
492 | u32 type = 0; | ||
493 | u32 mask = 0; | ||
494 | int err; | ||
495 | |||
496 | if (!mcryptd_check_internal(tb, &type, &mask)) | ||
497 | return -EINVAL; | ||
498 | |||
499 | halg = ahash_attr_alg(tb[1], type, mask); | ||
500 | if (IS_ERR(halg)) | ||
501 | return PTR_ERR(halg); | ||
502 | |||
503 | alg = &halg->base; | ||
504 | pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name); | ||
505 | inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(), | ||
506 | sizeof(*ctx)); | ||
507 | err = PTR_ERR(inst); | ||
508 | if (IS_ERR(inst)) | ||
509 | goto out_put_alg; | ||
510 | |||
511 | ctx = ahash_instance_ctx(inst); | ||
512 | ctx->queue = queue; | ||
513 | |||
514 | err = crypto_init_ahash_spawn(&ctx->spawn, halg, | ||
515 | ahash_crypto_instance(inst)); | ||
516 | if (err) | ||
517 | goto out_free_inst; | ||
518 | |||
519 | inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | | ||
520 | (alg->cra_flags & (CRYPTO_ALG_INTERNAL | | ||
521 | CRYPTO_ALG_OPTIONAL_KEY)); | ||
522 | |||
523 | inst->alg.halg.digestsize = halg->digestsize; | ||
524 | inst->alg.halg.statesize = halg->statesize; | ||
525 | inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); | ||
526 | |||
527 | inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; | ||
528 | inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm; | ||
529 | |||
530 | inst->alg.init = mcryptd_hash_init_enqueue; | ||
531 | inst->alg.update = mcryptd_hash_update_enqueue; | ||
532 | inst->alg.final = mcryptd_hash_final_enqueue; | ||
533 | inst->alg.finup = mcryptd_hash_finup_enqueue; | ||
534 | inst->alg.export = mcryptd_hash_export; | ||
535 | inst->alg.import = mcryptd_hash_import; | ||
536 | if (crypto_hash_alg_has_setkey(halg)) | ||
537 | inst->alg.setkey = mcryptd_hash_setkey; | ||
538 | inst->alg.digest = mcryptd_hash_digest_enqueue; | ||
539 | |||
540 | err = ahash_register_instance(tmpl, inst); | ||
541 | if (err) { | ||
542 | crypto_drop_ahash(&ctx->spawn); | ||
543 | out_free_inst: | ||
544 | kfree(inst); | ||
545 | } | ||
546 | |||
547 | out_put_alg: | ||
548 | crypto_mod_put(alg); | ||
549 | return err; | ||
550 | } | ||
551 | |||
552 | static struct mcryptd_queue mqueue; | ||
553 | |||
554 | static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb) | ||
555 | { | ||
556 | struct crypto_attr_type *algt; | ||
557 | |||
558 | algt = crypto_get_attr_type(tb); | ||
559 | if (IS_ERR(algt)) | ||
560 | return PTR_ERR(algt); | ||
561 | |||
562 | switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { | ||
563 | case CRYPTO_ALG_TYPE_DIGEST: | ||
564 | return mcryptd_create_hash(tmpl, tb, &mqueue); | ||
565 | break; | ||
566 | } | ||
567 | |||
568 | return -EINVAL; | ||
569 | } | ||
570 | |||
571 | static void mcryptd_free(struct crypto_instance *inst) | ||
572 | { | ||
573 | struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst); | ||
574 | struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst); | ||
575 | |||
576 | switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) { | ||
577 | case CRYPTO_ALG_TYPE_AHASH: | ||
578 | crypto_drop_ahash(&hctx->spawn); | ||
579 | kfree(ahash_instance(inst)); | ||
580 | return; | ||
581 | default: | ||
582 | crypto_drop_spawn(&ctx->spawn); | ||
583 | kfree(inst); | ||
584 | } | ||
585 | } | ||
586 | |||
587 | static struct crypto_template mcryptd_tmpl = { | ||
588 | .name = "mcryptd", | ||
589 | .create = mcryptd_create, | ||
590 | .free = mcryptd_free, | ||
591 | .module = THIS_MODULE, | ||
592 | }; | ||
593 | |||
594 | struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, | ||
595 | u32 type, u32 mask) | ||
596 | { | ||
597 | char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME]; | ||
598 | struct crypto_ahash *tfm; | ||
599 | |||
600 | if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME, | ||
601 | "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) | ||
602 | return ERR_PTR(-EINVAL); | ||
603 | tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask); | ||
604 | if (IS_ERR(tfm)) | ||
605 | return ERR_CAST(tfm); | ||
606 | if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { | ||
607 | crypto_free_ahash(tfm); | ||
608 | return ERR_PTR(-EINVAL); | ||
609 | } | ||
610 | |||
611 | return __mcryptd_ahash_cast(tfm); | ||
612 | } | ||
613 | EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash); | ||
614 | |||
615 | struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) | ||
616 | { | ||
617 | struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); | ||
618 | |||
619 | return ctx->child; | ||
620 | } | ||
621 | EXPORT_SYMBOL_GPL(mcryptd_ahash_child); | ||
622 | |||
623 | struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req) | ||
624 | { | ||
625 | struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
626 | return &rctx->areq; | ||
627 | } | ||
628 | EXPORT_SYMBOL_GPL(mcryptd_ahash_desc); | ||
629 | |||
630 | void mcryptd_free_ahash(struct mcryptd_ahash *tfm) | ||
631 | { | ||
632 | crypto_free_ahash(&tfm->base); | ||
633 | } | ||
634 | EXPORT_SYMBOL_GPL(mcryptd_free_ahash); | ||
635 | |||
636 | static int __init mcryptd_init(void) | ||
637 | { | ||
638 | int err, cpu; | ||
639 | struct mcryptd_flush_list *flist; | ||
640 | |||
641 | mcryptd_flist = alloc_percpu(struct mcryptd_flush_list); | ||
642 | for_each_possible_cpu(cpu) { | ||
643 | flist = per_cpu_ptr(mcryptd_flist, cpu); | ||
644 | INIT_LIST_HEAD(&flist->list); | ||
645 | mutex_init(&flist->lock); | ||
646 | } | ||
647 | |||
648 | err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN); | ||
649 | if (err) { | ||
650 | free_percpu(mcryptd_flist); | ||
651 | return err; | ||
652 | } | ||
653 | |||
654 | err = crypto_register_template(&mcryptd_tmpl); | ||
655 | if (err) { | ||
656 | mcryptd_fini_queue(&mqueue); | ||
657 | free_percpu(mcryptd_flist); | ||
658 | } | ||
659 | |||
660 | return err; | ||
661 | } | ||
662 | |||
663 | static void __exit mcryptd_exit(void) | ||
664 | { | ||
665 | mcryptd_fini_queue(&mqueue); | ||
666 | crypto_unregister_template(&mcryptd_tmpl); | ||
667 | free_percpu(mcryptd_flist); | ||
668 | } | ||
669 | |||
670 | subsys_initcall(mcryptd_init); | ||
671 | module_exit(mcryptd_exit); | ||
672 | |||
673 | MODULE_LICENSE("GPL"); | ||
674 | MODULE_DESCRIPTION("Software async multibuffer crypto daemon"); | ||
675 | MODULE_ALIAS_CRYPTO("mcryptd"); | ||
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h deleted file mode 100644 index b67404fc4b34..000000000000 --- a/include/crypto/mcryptd.h +++ /dev/null | |||
@@ -1,114 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Software async multibuffer crypto daemon headers | ||
4 | * | ||
5 | * Author: | ||
6 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
7 | * | ||
8 | * Copyright (c) 2014, Intel Corporation. | ||
9 | */ | ||
10 | |||
11 | #ifndef _CRYPTO_MCRYPT_H | ||
12 | #define _CRYPTO_MCRYPT_H | ||
13 | |||
14 | #include <linux/crypto.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <crypto/hash.h> | ||
17 | |||
18 | struct mcryptd_ahash { | ||
19 | struct crypto_ahash base; | ||
20 | }; | ||
21 | |||
22 | static inline struct mcryptd_ahash *__mcryptd_ahash_cast( | ||
23 | struct crypto_ahash *tfm) | ||
24 | { | ||
25 | return (struct mcryptd_ahash *)tfm; | ||
26 | } | ||
27 | |||
28 | struct mcryptd_cpu_queue { | ||
29 | struct crypto_queue queue; | ||
30 | spinlock_t q_lock; | ||
31 | struct work_struct work; | ||
32 | }; | ||
33 | |||
34 | struct mcryptd_queue { | ||
35 | struct mcryptd_cpu_queue __percpu *cpu_queue; | ||
36 | }; | ||
37 | |||
38 | struct mcryptd_instance_ctx { | ||
39 | struct crypto_spawn spawn; | ||
40 | struct mcryptd_queue *queue; | ||
41 | }; | ||
42 | |||
43 | struct mcryptd_hash_ctx { | ||
44 | struct crypto_ahash *child; | ||
45 | struct mcryptd_alg_state *alg_state; | ||
46 | }; | ||
47 | |||
48 | struct mcryptd_tag { | ||
49 | /* seq number of request */ | ||
50 | unsigned seq_num; | ||
51 | /* arrival time of request */ | ||
52 | unsigned long arrival; | ||
53 | unsigned long expire; | ||
54 | int cpu; | ||
55 | }; | ||
56 | |||
57 | struct mcryptd_hash_request_ctx { | ||
58 | struct list_head waiter; | ||
59 | crypto_completion_t complete; | ||
60 | struct mcryptd_tag tag; | ||
61 | struct crypto_hash_walk walk; | ||
62 | u8 *out; | ||
63 | int flag; | ||
64 | struct ahash_request areq; | ||
65 | }; | ||
66 | |||
67 | struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, | ||
68 | u32 type, u32 mask); | ||
69 | struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm); | ||
70 | struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req); | ||
71 | void mcryptd_free_ahash(struct mcryptd_ahash *tfm); | ||
72 | void mcryptd_flusher(struct work_struct *work); | ||
73 | |||
74 | enum mcryptd_req_type { | ||
75 | MCRYPTD_NONE, | ||
76 | MCRYPTD_UPDATE, | ||
77 | MCRYPTD_FINUP, | ||
78 | MCRYPTD_DIGEST, | ||
79 | MCRYPTD_FINAL | ||
80 | }; | ||
81 | |||
82 | struct mcryptd_alg_cstate { | ||
83 | unsigned long next_flush; | ||
84 | unsigned next_seq_num; | ||
85 | bool flusher_engaged; | ||
86 | struct delayed_work flush; | ||
87 | int cpu; | ||
88 | struct mcryptd_alg_state *alg_state; | ||
89 | void *mgr; | ||
90 | spinlock_t work_lock; | ||
91 | struct list_head work_list; | ||
92 | struct list_head flush_list; | ||
93 | }; | ||
94 | |||
95 | struct mcryptd_alg_state { | ||
96 | struct mcryptd_alg_cstate __percpu *alg_cstate; | ||
97 | unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate); | ||
98 | }; | ||
99 | |||
100 | /* return delay in jiffies from current time */ | ||
101 | static inline unsigned long get_delay(unsigned long t) | ||
102 | { | ||
103 | long delay; | ||
104 | |||
105 | delay = (long) t - (long) jiffies; | ||
106 | if (delay <= 0) | ||
107 | return 0; | ||
108 | else | ||
109 | return (unsigned long) delay; | ||
110 | } | ||
111 | |||
112 | void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay); | ||
113 | |||
114 | #endif | ||