diff options
author | Aaron Wu <aaron.wu@analog.com> | 2018-03-15 06:50:11 -0400 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2018-03-26 09:56:37 -0400 |
commit | dabad54949930844d237af4b55c14eaff829c888 (patch) | |
tree | b6926e97e750e913bed9851cea058a63d670064d | |
parent | 64f5fdd951d5e1558d355aefbe661739eef0c8e4 (diff) |
misc: Remove Blackfin DSP echo support
Remove Blackfin DSP echo support
Signed-off-by: Aaron Wu <aaron.wu@analog.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-rw-r--r-- | drivers/misc/echo/echo.c | 73 | ||||
-rw-r--r-- | drivers/misc/echo/fir.h | 50 |
2 files changed, 0 insertions, 123 deletions
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c index 9597e9523cac..8a5adc0d2e88 100644 --- a/drivers/misc/echo/echo.c +++ b/drivers/misc/echo/echo.c | |||
@@ -115,78 +115,6 @@ | |||
115 | 115 | ||
116 | /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ | 116 | /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ |
117 | 117 | ||
118 | #ifdef __bfin__ | ||
119 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) | ||
120 | { | ||
121 | int i; | ||
122 | int offset1; | ||
123 | int offset2; | ||
124 | int factor; | ||
125 | int exp; | ||
126 | int16_t *phist; | ||
127 | int n; | ||
128 | |||
129 | if (shift > 0) | ||
130 | factor = clean << shift; | ||
131 | else | ||
132 | factor = clean >> -shift; | ||
133 | |||
134 | /* Update the FIR taps */ | ||
135 | |||
136 | offset2 = ec->curr_pos; | ||
137 | offset1 = ec->taps - offset2; | ||
138 | phist = &ec->fir_state_bg.history[offset2]; | ||
139 | |||
140 | /* st: and en: help us locate the assembler in echo.s */ | ||
141 | |||
142 | /* asm("st:"); */ | ||
143 | n = ec->taps; | ||
144 | for (i = 0; i < n; i++) { | ||
145 | exp = *phist++ * factor; | ||
146 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | ||
147 | } | ||
148 | /* asm("en:"); */ | ||
149 | |||
150 | /* Note the asm for the inner loop above generated by Blackfin gcc | ||
151 | 4.1.1 is pretty good (note even parallel instructions used): | ||
152 | |||
153 | R0 = W [P0++] (X); | ||
154 | R0 *= R2; | ||
155 | R0 = R0 + R3 (NS) || | ||
156 | R1 = W [P1] (X) || | ||
157 | nop; | ||
158 | R0 >>>= 15; | ||
159 | R0 = R0 + R1; | ||
160 | W [P1++] = R0; | ||
161 | |||
162 | A block based update algorithm would be much faster but the | ||
163 | above can't be improved on much. Every instruction saved in | ||
164 | the loop above is 2 MIPs/ch! The for loop above is where the | ||
165 | Blackfin spends most of it's time - about 17 MIPs/ch measured | ||
166 | with speedtest.c with 256 taps (32ms). Write-back and | ||
167 | Write-through cache gave about the same performance. | ||
168 | */ | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | IDEAS for further optimisation of lms_adapt_bg(): | ||
173 | |||
174 | 1/ The rounding is quite costly. Could we keep as 32 bit coeffs | ||
175 | then make filter pluck the MS 16-bits of the coeffs when filtering? | ||
176 | However this would lower potential optimisation of filter, as I | ||
177 | think the dual-MAC architecture requires packed 16 bit coeffs. | ||
178 | |||
179 | 2/ Block based update would be more efficient, as per comments above, | ||
180 | could use dual MAC architecture. | ||
181 | |||
182 | 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC | ||
183 | packing. | ||
184 | |||
185 | 4/ Execute the whole e/c in a block of say 20ms rather than sample | ||
186 | by sample. Processing a few samples every ms is inefficient. | ||
187 | */ | ||
188 | |||
189 | #else | ||
190 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) | 118 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) |
191 | { | 119 | { |
192 | int i; | 120 | int i; |
@@ -215,7 +143,6 @@ static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) | |||
215 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | 143 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); |
216 | } | 144 | } |
217 | } | 145 | } |
218 | #endif | ||
219 | 146 | ||
220 | static inline int top_bit(unsigned int bits) | 147 | static inline int top_bit(unsigned int bits) |
221 | { | 148 | { |
diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h index 7b9fabf1fea5..4e0f365f0577 100644 --- a/drivers/misc/echo/fir.h +++ b/drivers/misc/echo/fir.h | |||
@@ -27,14 +27,6 @@ | |||
27 | #define _FIR_H_ | 27 | #define _FIR_H_ |
28 | 28 | ||
29 | /* | 29 | /* |
30 | Blackfin NOTES & IDEAS: | ||
31 | |||
32 | A simple dot product function is used to implement the filter. This performs | ||
33 | just one MAC/cycle which is inefficient but was easy to implement as a first | ||
34 | pass. The current Blackfin code also uses an unrolled form of the filter | ||
35 | history to avoid 0 length hardware loop issues. This is wasteful of | ||
36 | memory. | ||
37 | |||
38 | Ideas for improvement: | 30 | Ideas for improvement: |
39 | 31 | ||
40 | 1/ Rewrite filter for dual MAC inner loop. The issue here is handling | 32 | 1/ Rewrite filter for dual MAC inner loop. The issue here is handling |
@@ -94,21 +86,13 @@ static inline const int16_t *fir16_create(struct fir16_state_t *fir, | |||
94 | fir->taps = taps; | 86 | fir->taps = taps; |
95 | fir->curr_pos = taps - 1; | 87 | fir->curr_pos = taps - 1; |
96 | fir->coeffs = coeffs; | 88 | fir->coeffs = coeffs; |
97 | #if defined(__bfin__) | ||
98 | fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); | ||
99 | #else | ||
100 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); | 89 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); |
101 | #endif | ||
102 | return fir->history; | 90 | return fir->history; |
103 | } | 91 | } |
104 | 92 | ||
105 | static inline void fir16_flush(struct fir16_state_t *fir) | 93 | static inline void fir16_flush(struct fir16_state_t *fir) |
106 | { | 94 | { |
107 | #if defined(__bfin__) | ||
108 | memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); | ||
109 | #else | ||
110 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); | 95 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); |
111 | #endif | ||
112 | } | 96 | } |
113 | 97 | ||
114 | static inline void fir16_free(struct fir16_state_t *fir) | 98 | static inline void fir16_free(struct fir16_state_t *fir) |
@@ -116,42 +100,9 @@ static inline void fir16_free(struct fir16_state_t *fir) | |||
116 | kfree(fir->history); | 100 | kfree(fir->history); |
117 | } | 101 | } |
118 | 102 | ||
119 | #ifdef __bfin__ | ||
120 | static inline int32_t dot_asm(short *x, short *y, int len) | ||
121 | { | ||
122 | int dot; | ||
123 | |||
124 | len--; | ||
125 | |||
126 | __asm__("I0 = %1;\n\t" | ||
127 | "I1 = %2;\n\t" | ||
128 | "A0 = 0;\n\t" | ||
129 | "R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
130 | "LOOP dot%= LC0 = %3;\n\t" | ||
131 | "LOOP_BEGIN dot%=;\n\t" | ||
132 | "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
133 | "LOOP_END dot%=;\n\t" | ||
134 | "A0 += R0.L*R1.L (IS);\n\t" | ||
135 | "R0 = A0;\n\t" | ||
136 | "%0 = R0;\n\t" | ||
137 | : "=&d"(dot) | ||
138 | : "a"(x), "a"(y), "a"(len) | ||
139 | : "I0", "I1", "A1", "A0", "R0", "R1" | ||
140 | ); | ||
141 | |||
142 | return dot; | ||
143 | } | ||
144 | #endif | ||
145 | |||
146 | static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) | 103 | static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) |
147 | { | 104 | { |
148 | int32_t y; | 105 | int32_t y; |
149 | #if defined(__bfin__) | ||
150 | fir->history[fir->curr_pos] = sample; | ||
151 | fir->history[fir->curr_pos + fir->taps] = sample; | ||
152 | y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], | ||
153 | fir->taps); | ||
154 | #else | ||
155 | int i; | 106 | int i; |
156 | int offset1; | 107 | int offset1; |
157 | int offset2; | 108 | int offset2; |
@@ -165,7 +116,6 @@ static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) | |||
165 | y += fir->coeffs[i] * fir->history[i - offset1]; | 116 | y += fir->coeffs[i] * fir->history[i - offset1]; |
166 | for (; i >= 0; i--) | 117 | for (; i >= 0; i--) |
167 | y += fir->coeffs[i] * fir->history[i + offset2]; | 118 | y += fir->coeffs[i] * fir->history[i + offset2]; |
168 | #endif | ||
169 | if (fir->curr_pos <= 0) | 119 | if (fir->curr_pos <= 0) |
170 | fir->curr_pos = fir->taps; | 120 | fir->curr_pos = fir->taps; |
171 | fir->curr_pos--; | 121 | fir->curr_pos--; |