diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-02-02 08:40:40 -0500 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-02-23 02:56:17 -0500 |
commit | 2cfc5f9ce7f5e17553e84d36ea9563e677e369d1 (patch) | |
tree | 0a612a3d95c87ca7b72c3cb44574531bbabad2ea | |
parent | 9a99649f2a89fdfc9dde5d5401675561567bf99a (diff) |
s390/xor: optimized xor routing using the XC instruction
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r-- | arch/s390/include/asm/xor.h | 21 | ||||
-rw-r--r-- | arch/s390/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/lib/xor.c | 134 |
3 files changed, 155 insertions, 2 deletions
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h index c82eb12a5b18..c988df744a70 100644 --- a/arch/s390/include/asm/xor.h +++ b/arch/s390/include/asm/xor.h | |||
@@ -1 +1,20 @@ | |||
1 | #include <asm-generic/xor.h> | 1 | /* |
2 | * Optimited xor routines | ||
3 | * | ||
4 | * Copyright IBM Corp. 2016 | ||
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
6 | */ | ||
7 | #ifndef _ASM_S390_XOR_H | ||
8 | #define _ASM_S390_XOR_H | ||
9 | |||
10 | extern struct xor_block_template xor_block_xc; | ||
11 | |||
12 | #undef XOR_TRY_TEMPLATES | ||
13 | #define XOR_TRY_TEMPLATES \ | ||
14 | do { \ | ||
15 | xor_speed(&xor_block_xc); \ | ||
16 | } while (0) | ||
17 | |||
18 | #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) | ||
19 | |||
20 | #endif /* _ASM_S390_XOR_H */ | ||
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 0e8fefe5b0ce..1d1af31e8354 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y += delay.o string.o uaccess.o find.o | 5 | lib-y += delay.o string.o uaccess.o find.o |
6 | obj-y += mem.o | 6 | obj-y += mem.o xor.o |
7 | lib-$(CONFIG_SMP) += spinlock.o | 7 | lib-$(CONFIG_SMP) += spinlock.o |
8 | lib-$(CONFIG_KPROBES) += probes.o | 8 | lib-$(CONFIG_KPROBES) += probes.o |
9 | lib-$(CONFIG_UPROBES) += probes.o | 9 | lib-$(CONFIG_UPROBES) += probes.o |
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c new file mode 100644 index 000000000000..7d94e3ec34a9 --- /dev/null +++ b/arch/s390/lib/xor.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * Optimized xor_block operation for RAID4/5 | ||
3 | * | ||
4 | * Copyright IBM Corp. 2016 | ||
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
6 | */ | ||
7 | |||
8 | #include <linux/types.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/raid/xor.h> | ||
11 | |||
12 | static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | ||
13 | { | ||
14 | asm volatile( | ||
15 | " larl 1,2f\n" | ||
16 | " aghi %0,-1\n" | ||
17 | " jm 3f\n" | ||
18 | " srlg 0,%0,8\n" | ||
19 | " ltgr 0,0\n" | ||
20 | " jz 1f\n" | ||
21 | "0: xc 0(256,%1),0(%2)\n" | ||
22 | " la %1,256(%1)\n" | ||
23 | " la %2,256(%2)\n" | ||
24 | " brctg 0,0b\n" | ||
25 | "1: ex %0,0(1)\n" | ||
26 | " j 3f\n" | ||
27 | "2: xc 0(1,%1),0(%2)\n" | ||
28 | "3:\n" | ||
29 | : : "d" (bytes), "a" (p1), "a" (p2) | ||
30 | : "0", "1", "cc", "memory"); | ||
31 | } | ||
32 | |||
33 | static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
34 | unsigned long *p3) | ||
35 | { | ||
36 | asm volatile( | ||
37 | " larl 1,2f\n" | ||
38 | " aghi %0,-1\n" | ||
39 | " jm 3f\n" | ||
40 | " srlg 0,%0,8\n" | ||
41 | " ltgr 0,0\n" | ||
42 | " jz 1f\n" | ||
43 | "0: xc 0(256,%1),0(%2)\n" | ||
44 | " xc 0(256,%1),0(%3)\n" | ||
45 | " la %1,256(%1)\n" | ||
46 | " la %2,256(%2)\n" | ||
47 | " la %3,256(%3)\n" | ||
48 | " brctg 0,0b\n" | ||
49 | "1: ex %0,0(1)\n" | ||
50 | " ex %0,6(1)\n" | ||
51 | " j 3f\n" | ||
52 | "2: xc 0(1,%1),0(%2)\n" | ||
53 | " xc 0(1,%1),0(%3)\n" | ||
54 | "3:\n" | ||
55 | : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) | ||
56 | : : "0", "1", "cc", "memory"); | ||
57 | } | ||
58 | |||
59 | static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
60 | unsigned long *p3, unsigned long *p4) | ||
61 | { | ||
62 | asm volatile( | ||
63 | " larl 1,2f\n" | ||
64 | " aghi %0,-1\n" | ||
65 | " jm 3f\n" | ||
66 | " srlg 0,%0,8\n" | ||
67 | " ltgr 0,0\n" | ||
68 | " jz 1f\n" | ||
69 | "0: xc 0(256,%1),0(%2)\n" | ||
70 | " xc 0(256,%1),0(%3)\n" | ||
71 | " xc 0(256,%1),0(%4)\n" | ||
72 | " la %1,256(%1)\n" | ||
73 | " la %2,256(%2)\n" | ||
74 | " la %3,256(%3)\n" | ||
75 | " la %4,256(%4)\n" | ||
76 | " brctg 0,0b\n" | ||
77 | "1: ex %0,0(1)\n" | ||
78 | " ex %0,6(1)\n" | ||
79 | " ex %0,12(1)\n" | ||
80 | " j 3f\n" | ||
81 | "2: xc 0(1,%1),0(%2)\n" | ||
82 | " xc 0(1,%1),0(%3)\n" | ||
83 | " xc 0(1,%1),0(%4)\n" | ||
84 | "3:\n" | ||
85 | : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) | ||
86 | : : "0", "1", "cc", "memory"); | ||
87 | } | ||
88 | |||
89 | static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
90 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | ||
91 | { | ||
92 | /* Get around a gcc oddity */ | ||
93 | register unsigned long *reg7 asm ("7") = p5; | ||
94 | |||
95 | asm volatile( | ||
96 | " larl 1,2f\n" | ||
97 | " aghi %0,-1\n" | ||
98 | " jm 3f\n" | ||
99 | " srlg 0,%0,8\n" | ||
100 | " ltgr 0,0\n" | ||
101 | " jz 1f\n" | ||
102 | "0: xc 0(256,%1),0(%2)\n" | ||
103 | " xc 0(256,%1),0(%3)\n" | ||
104 | " xc 0(256,%1),0(%4)\n" | ||
105 | " xc 0(256,%1),0(%5)\n" | ||
106 | " la %1,256(%1)\n" | ||
107 | " la %2,256(%2)\n" | ||
108 | " la %3,256(%3)\n" | ||
109 | " la %4,256(%4)\n" | ||
110 | " la %5,256(%5)\n" | ||
111 | " brctg 0,0b\n" | ||
112 | "1: ex %0,0(1)\n" | ||
113 | " ex %0,6(1)\n" | ||
114 | " ex %0,12(1)\n" | ||
115 | " ex %0,18(1)\n" | ||
116 | " j 3f\n" | ||
117 | "2: xc 0(1,%1),0(%2)\n" | ||
118 | " xc 0(1,%1),0(%3)\n" | ||
119 | " xc 0(1,%1),0(%4)\n" | ||
120 | " xc 0(1,%1),0(%5)\n" | ||
121 | "3:\n" | ||
122 | : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), | ||
123 | "+a" (reg7) | ||
124 | : : "0", "1", "cc", "memory"); | ||
125 | } | ||
126 | |||
127 | struct xor_block_template xor_block_xc = { | ||
128 | .name = "xc", | ||
129 | .do_2 = xor_xc_2, | ||
130 | .do_3 = xor_xc_3, | ||
131 | .do_4 = xor_xc_4, | ||
132 | .do_5 = xor_xc_5, | ||
133 | }; | ||
134 | EXPORT_SYMBOL(xor_block_xc); | ||