aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/raid6/neon.c13
-rw-r--r--lib/raid6/neon.uc46
2 files changed, 58 insertions, 1 deletions
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index d9ad6ee284f4..7076ef1ba3dd 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -40,9 +40,20 @@
40 (unsigned long)bytes, ptrs); \ 40 (unsigned long)bytes, ptrs); \
41 kernel_neon_end(); \ 41 kernel_neon_end(); \
42 } \ 42 } \
43 static void raid6_neon ## _n ## _xor_syndrome(int disks, \
44 int start, int stop, \
45 size_t bytes, void **ptrs) \
46 { \
47 void raid6_neon ## _n ## _xor_syndrome_real(int, \
48 int, int, unsigned long, void**); \
49 kernel_neon_begin(); \
50 raid6_neon ## _n ## _xor_syndrome_real(disks, \
51 start, stop, (unsigned long)bytes, ptrs); \
52 kernel_neon_end(); \
53 } \
43 struct raid6_calls const raid6_neonx ## _n = { \ 54 struct raid6_calls const raid6_neonx ## _n = { \
44 raid6_neon ## _n ## _gen_syndrome, \ 55 raid6_neon ## _n ## _gen_syndrome, \
45 NULL, /* XOR not yet implemented */ \ 56 raid6_neon ## _n ## _xor_syndrome, \
46 raid6_have_neon, \ 57 raid6_have_neon, \
47 "neonx" #_n, \ 58 "neonx" #_n, \
48 0 \ 59 0 \
diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
index 1b9ed793342d..4fa51b761dd0 100644
--- a/lib/raid6/neon.uc
+++ b/lib/raid6/neon.uc
@@ -3,6 +3,7 @@
3 * neon.uc - RAID-6 syndrome calculation using ARM NEON instructions 3 * neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
4 * 4 *
5 * Copyright (C) 2012 Rob Herring 5 * Copyright (C) 2012 Rob Herring
6 * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 * 7 *
7 * Based on altivec.uc: 8 * Based on altivec.uc:
8 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved 9 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
@@ -78,3 +79,48 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
78 vst1q_u8(&q[d+NSIZE*$$], wq$$); 79 vst1q_u8(&q[d+NSIZE*$$], wq$$);
79 } 80 }
80} 81}
82
83void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
84 unsigned long bytes, void **ptrs)
85{
86 uint8_t **dptr = (uint8_t **)ptrs;
87 uint8_t *p, *q;
88 int d, z, z0;
89
90 register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
91 const unative_t x1d = NBYTES(0x1d);
92
93 z0 = stop; /* P/Q right side optimization */
94 p = dptr[disks-2]; /* XOR parity */
95 q = dptr[disks-1]; /* RS syndrome */
96
97 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
98 wq$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
99 wp$$ = veorq_u8(vld1q_u8(&p[d+$$*NSIZE]), wq$$);
100
101 /* P/Q data pages */
102 for ( z = z0-1 ; z >= start ; z-- ) {
103 wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
104 wp$$ = veorq_u8(wp$$, wd$$);
105 w2$$ = MASK(wq$$);
106 w1$$ = SHLBYTE(wq$$);
107
108 w2$$ = vandq_u8(w2$$, x1d);
109 w1$$ = veorq_u8(w1$$, w2$$);
110 wq$$ = veorq_u8(w1$$, wd$$);
111 }
112 /* P/Q left side optimization */
113 for ( z = start-1 ; z >= 0 ; z-- ) {
114 w2$$ = MASK(wq$$);
115 w1$$ = SHLBYTE(wq$$);
116
117 w2$$ = vandq_u8(w2$$, x1d);
118 wq$$ = veorq_u8(w1$$, w2$$);
119 }
120 w1$$ = vld1q_u8(&q[d+NSIZE*$$]);
121 wq$$ = veorq_u8(wq$$, w1$$);
122
123 vst1q_u8(&p[d+NSIZE*$$], wp$$);
124 vst1q_u8(&q[d+NSIZE*$$], wq$$);
125 }
126}