summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2016-02-02 08:40:40 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-02-23 02:56:17 -0500
commit2cfc5f9ce7f5e17553e84d36ea9563e677e369d1 (patch)
tree0a612a3d95c87ca7b72c3cb44574531bbabad2ea
parent9a99649f2a89fdfc9dde5d5401675561567bf99a (diff)
s390/xor: optimized xor routing using the XC instruction
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/include/asm/xor.h21
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/xor.c134
3 files changed, 155 insertions, 2 deletions
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
index c82eb12a5b18..c988df744a70 100644
--- a/arch/s390/include/asm/xor.h
+++ b/arch/s390/include/asm/xor.h
@@ -1 +1,20 @@
1#include <asm-generic/xor.h> 1/*
2 * Optimited xor routines
3 *
4 * Copyright IBM Corp. 2016
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7#ifndef _ASM_S390_XOR_H
8#define _ASM_S390_XOR_H
9
10extern struct xor_block_template xor_block_xc;
11
12#undef XOR_TRY_TEMPLATES
13#define XOR_TRY_TEMPLATES \
14do { \
15 xor_speed(&xor_block_xc); \
16} while (0)
17
18#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc)
19
20#endif /* _ASM_S390_XOR_H */
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 0e8fefe5b0ce..1d1af31e8354 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5lib-y += delay.o string.o uaccess.o find.o 5lib-y += delay.o string.o uaccess.o find.o
6obj-y += mem.o 6obj-y += mem.o xor.o
7lib-$(CONFIG_SMP) += spinlock.o 7lib-$(CONFIG_SMP) += spinlock.o
8lib-$(CONFIG_KPROBES) += probes.o 8lib-$(CONFIG_KPROBES) += probes.o
9lib-$(CONFIG_UPROBES) += probes.o 9lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
new file mode 100644
index 000000000000..7d94e3ec34a9
--- /dev/null
+++ b/arch/s390/lib/xor.c
@@ -0,0 +1,134 @@
1/*
2 * Optimized xor_block operation for RAID4/5
3 *
4 * Copyright IBM Corp. 2016
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/types.h>
9#include <linux/module.h>
10#include <linux/raid/xor.h>
11
12static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
13{
14 asm volatile(
15 " larl 1,2f\n"
16 " aghi %0,-1\n"
17 " jm 3f\n"
18 " srlg 0,%0,8\n"
19 " ltgr 0,0\n"
20 " jz 1f\n"
21 "0: xc 0(256,%1),0(%2)\n"
22 " la %1,256(%1)\n"
23 " la %2,256(%2)\n"
24 " brctg 0,0b\n"
25 "1: ex %0,0(1)\n"
26 " j 3f\n"
27 "2: xc 0(1,%1),0(%2)\n"
28 "3:\n"
29 : : "d" (bytes), "a" (p1), "a" (p2)
30 : "0", "1", "cc", "memory");
31}
32
33static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
34 unsigned long *p3)
35{
36 asm volatile(
37 " larl 1,2f\n"
38 " aghi %0,-1\n"
39 " jm 3f\n"
40 " srlg 0,%0,8\n"
41 " ltgr 0,0\n"
42 " jz 1f\n"
43 "0: xc 0(256,%1),0(%2)\n"
44 " xc 0(256,%1),0(%3)\n"
45 " la %1,256(%1)\n"
46 " la %2,256(%2)\n"
47 " la %3,256(%3)\n"
48 " brctg 0,0b\n"
49 "1: ex %0,0(1)\n"
50 " ex %0,6(1)\n"
51 " j 3f\n"
52 "2: xc 0(1,%1),0(%2)\n"
53 " xc 0(1,%1),0(%3)\n"
54 "3:\n"
55 : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
56 : : "0", "1", "cc", "memory");
57}
58
59static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
60 unsigned long *p3, unsigned long *p4)
61{
62 asm volatile(
63 " larl 1,2f\n"
64 " aghi %0,-1\n"
65 " jm 3f\n"
66 " srlg 0,%0,8\n"
67 " ltgr 0,0\n"
68 " jz 1f\n"
69 "0: xc 0(256,%1),0(%2)\n"
70 " xc 0(256,%1),0(%3)\n"
71 " xc 0(256,%1),0(%4)\n"
72 " la %1,256(%1)\n"
73 " la %2,256(%2)\n"
74 " la %3,256(%3)\n"
75 " la %4,256(%4)\n"
76 " brctg 0,0b\n"
77 "1: ex %0,0(1)\n"
78 " ex %0,6(1)\n"
79 " ex %0,12(1)\n"
80 " j 3f\n"
81 "2: xc 0(1,%1),0(%2)\n"
82 " xc 0(1,%1),0(%3)\n"
83 " xc 0(1,%1),0(%4)\n"
84 "3:\n"
85 : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
86 : : "0", "1", "cc", "memory");
87}
88
89static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
90 unsigned long *p3, unsigned long *p4, unsigned long *p5)
91{
92 /* Get around a gcc oddity */
93 register unsigned long *reg7 asm ("7") = p5;
94
95 asm volatile(
96 " larl 1,2f\n"
97 " aghi %0,-1\n"
98 " jm 3f\n"
99 " srlg 0,%0,8\n"
100 " ltgr 0,0\n"
101 " jz 1f\n"
102 "0: xc 0(256,%1),0(%2)\n"
103 " xc 0(256,%1),0(%3)\n"
104 " xc 0(256,%1),0(%4)\n"
105 " xc 0(256,%1),0(%5)\n"
106 " la %1,256(%1)\n"
107 " la %2,256(%2)\n"
108 " la %3,256(%3)\n"
109 " la %4,256(%4)\n"
110 " la %5,256(%5)\n"
111 " brctg 0,0b\n"
112 "1: ex %0,0(1)\n"
113 " ex %0,6(1)\n"
114 " ex %0,12(1)\n"
115 " ex %0,18(1)\n"
116 " j 3f\n"
117 "2: xc 0(1,%1),0(%2)\n"
118 " xc 0(1,%1),0(%3)\n"
119 " xc 0(1,%1),0(%4)\n"
120 " xc 0(1,%1),0(%5)\n"
121 "3:\n"
122 : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
123 "+a" (reg7)
124 : : "0", "1", "cc", "memory");
125}
126
127struct xor_block_template xor_block_xc = {
128 .name = "xc",
129 .do_2 = xor_xc_2,
130 .do_3 = xor_xc_3,
131 .do_4 = xor_xc_4,
132 .do_5 = xor_xc_5,
133};
134EXPORT_SYMBOL(xor_block_xc);