diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-02-28 17:08:42 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-02-28 17:08:42 -0500 |
commit | 6e2055a9e56e292715f935a85f381e54c1f54269 (patch) | |
tree | 5cdb033f7da95ba47c37a42602c6d88d55e11db5 /drivers/misc | |
parent | dc93c85235efa5201e9a3c116bc3fbd1afc1a182 (diff) |
staging: echo: move to drivers/misc/
The code is clean, there are users of it, so it doesn't belong in
staging anymore, move it to drivers/misc/.
Cc: Steve Underwood <steveu@coppice.org>
Cc: David Rowe <david@rowetel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/Kconfig | 1 | ||||
-rw-r--r-- | drivers/misc/Makefile | 1 | ||||
-rw-r--r-- | drivers/misc/echo/Kconfig | 9 | ||||
-rw-r--r-- | drivers/misc/echo/Makefile | 1 | ||||
-rw-r--r-- | drivers/misc/echo/echo.c | 674 | ||||
-rw-r--r-- | drivers/misc/echo/echo.h | 187 | ||||
-rw-r--r-- | drivers/misc/echo/fir.h | 216 | ||||
-rw-r--r-- | drivers/misc/echo/oslec.h | 94 |
8 files changed, 1183 insertions, 0 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 6cb388e8fb7d..3816b59d3e1e 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig | |||
@@ -526,4 +526,5 @@ source "drivers/misc/mei/Kconfig" | |||
526 | source "drivers/misc/vmw_vmci/Kconfig" | 526 | source "drivers/misc/vmw_vmci/Kconfig" |
527 | source "drivers/misc/mic/Kconfig" | 527 | source "drivers/misc/mic/Kconfig" |
528 | source "drivers/misc/genwqe/Kconfig" | 528 | source "drivers/misc/genwqe/Kconfig" |
529 | source "drivers/misc/echo/Kconfig" | ||
529 | endmenu | 530 | endmenu |
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 99b9424ce31d..7eb4b69580c0 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile | |||
@@ -54,3 +54,4 @@ obj-$(CONFIG_LATTICE_ECP3_CONFIG) += lattice-ecp3-config.o | |||
54 | obj-$(CONFIG_SRAM) += sram.o | 54 | obj-$(CONFIG_SRAM) += sram.o |
55 | obj-y += mic/ | 55 | obj-y += mic/ |
56 | obj-$(CONFIG_GENWQE) += genwqe/ | 56 | obj-$(CONFIG_GENWQE) += genwqe/ |
57 | obj-$(CONFIG_ECHO) += echo/ | ||
diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig new file mode 100644 index 000000000000..f1d41ea9cd48 --- /dev/null +++ b/drivers/misc/echo/Kconfig | |||
@@ -0,0 +1,9 @@ | |||
1 | config ECHO | ||
2 | tristate "Line Echo Canceller support" | ||
3 | default n | ||
4 | ---help--- | ||
5 | This driver provides line echo cancelling support for mISDN and | ||
6 | Zaptel drivers. | ||
7 | |||
8 | To compile this driver as a module, choose M here. The module | ||
9 | will be called echo. | ||
diff --git a/drivers/misc/echo/Makefile b/drivers/misc/echo/Makefile new file mode 100644 index 000000000000..7d4caac12a8d --- /dev/null +++ b/drivers/misc/echo/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_ECHO) += echo.o | |||
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c new file mode 100644 index 000000000000..9597e9523cac --- /dev/null +++ b/drivers/misc/echo/echo.c | |||
@@ -0,0 +1,674 @@ | |||
1 | /* | ||
2 | * SpanDSP - a series of DSP components for telephony | ||
3 | * | ||
4 | * echo.c - A line echo canceller. This code is being developed | ||
5 | * against and partially complies with G168. | ||
6 | * | ||
7 | * Written by Steve Underwood <steveu@coppice.org> | ||
8 | * and David Rowe <david_at_rowetel_dot_com> | ||
9 | * | ||
10 | * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe | ||
11 | * | ||
12 | * Based on a bit from here, a bit from there, eye of toad, ear of | ||
13 | * bat, 15 years of failed attempts by David and a few fried brain | ||
14 | * cells. | ||
15 | * | ||
16 | * All rights reserved. | ||
17 | * | ||
18 | * This program is free software; you can redistribute it and/or modify | ||
19 | * it under the terms of the GNU General Public License version 2, as | ||
20 | * published by the Free Software Foundation. | ||
21 | * | ||
22 | * This program is distributed in the hope that it will be useful, | ||
23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
25 | * GNU General Public License for more details. | ||
26 | * | ||
27 | * You should have received a copy of the GNU General Public License | ||
28 | * along with this program; if not, write to the Free Software | ||
29 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
30 | */ | ||
31 | |||
32 | /*! \file */ | ||
33 | |||
34 | /* Implementation Notes | ||
35 | David Rowe | ||
36 | April 2007 | ||
37 | |||
38 | This code started life as Steve's NLMS algorithm with a tap | ||
39 | rotation algorithm to handle divergence during double talk. I | ||
40 | added a Geigel Double Talk Detector (DTD) [2] and performed some | ||
41 | G168 tests. However I had trouble meeting the G168 requirements, | ||
42 | especially for double talk - there were always cases where my DTD | ||
43 | failed, for example where near end speech was under the 6dB | ||
44 | threshold required for declaring double talk. | ||
45 | |||
46 | So I tried a two path algorithm [1], which has so far given better | ||
47 | results. The original tap rotation/Geigel algorithm is available | ||
48 | in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit. | ||
49 | It's probably possible to make it work if some one wants to put some | ||
50 | serious work into it. | ||
51 | |||
52 | At present no special treatment is provided for tones, which | ||
53 | generally cause NLMS algorithms to diverge. Initial runs of a | ||
54 | subset of the G168 tests for tones (e.g ./echo_test 6) show the | ||
55 | current algorithm is passing OK, which is kind of surprising. The | ||
56 | full set of tests needs to be performed to confirm this result. | ||
57 | |||
58 | One other interesting change is that I have managed to get the NLMS | ||
59 | code to work with 16 bit coefficients, rather than the original 32 | ||
60 | bit coefficents. This reduces the MIPs and storage required. | ||
61 | I evaulated the 16 bit port using g168_tests.sh and listening tests | ||
62 | on 4 real-world samples. | ||
63 | |||
64 | I also attempted the implementation of a block based NLMS update | ||
65 | [2] but although this passes g168_tests.sh it didn't converge well | ||
66 | on the real-world samples. I have no idea why, perhaps a scaling | ||
67 | problem. The block based code is also available in SVN | ||
68 | http://svn.rowetel.com/software/oslec/tags/before_16bit. If this | ||
69 | code can be debugged, it will lead to further reduction in MIPS, as | ||
70 | the block update code maps nicely onto DSP instruction sets (it's a | ||
71 | dot product) compared to the current sample-by-sample update. | ||
72 | |||
73 | Steve also has some nice notes on echo cancellers in echo.h | ||
74 | |||
75 | References: | ||
76 | |||
77 | [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo | ||
78 | Path Models", IEEE Transactions on communications, COM-25, | ||
79 | No. 6, June | ||
80 | 1977. | ||
81 | http://www.rowetel.com/images/echo/dual_path_paper.pdf | ||
82 | |||
83 | [2] The classic, very useful paper that tells you how to | ||
84 | actually build a real world echo canceller: | ||
85 | Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice | ||
86 | Echo Canceller with a TMS320020, | ||
87 | http://www.rowetel.com/images/echo/spra129.pdf | ||
88 | |||
89 | [3] I have written a series of blog posts on this work, here is | ||
90 | Part 1: http://www.rowetel.com/blog/?p=18 | ||
91 | |||
92 | [4] The source code http://svn.rowetel.com/software/oslec/ | ||
93 | |||
94 | [5] A nice reference on LMS filters: | ||
95 | http://en.wikipedia.org/wiki/Least_mean_squares_filter | ||
96 | |||
97 | Credits: | ||
98 | |||
99 | Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan | ||
100 | Muthukrishnan for their suggestions and email discussions. Thanks | ||
101 | also to those people who collected echo samples for me such as | ||
102 | Mark, Pawel, and Pavel. | ||
103 | */ | ||
104 | |||
105 | #include <linux/kernel.h> | ||
106 | #include <linux/module.h> | ||
107 | #include <linux/slab.h> | ||
108 | |||
109 | #include "echo.h" | ||
110 | |||
111 | #define MIN_TX_POWER_FOR_ADAPTION 64 | ||
112 | #define MIN_RX_POWER_FOR_ADAPTION 64 | ||
113 | #define DTD_HANGOVER 600 /* 600 samples, or 75ms */ | ||
114 | #define DC_LOG2BETA 3 /* log2() of DC filter Beta */ | ||
115 | |||
116 | /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ | ||
117 | |||
118 | #ifdef __bfin__ | ||
119 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) | ||
120 | { | ||
121 | int i; | ||
122 | int offset1; | ||
123 | int offset2; | ||
124 | int factor; | ||
125 | int exp; | ||
126 | int16_t *phist; | ||
127 | int n; | ||
128 | |||
129 | if (shift > 0) | ||
130 | factor = clean << shift; | ||
131 | else | ||
132 | factor = clean >> -shift; | ||
133 | |||
134 | /* Update the FIR taps */ | ||
135 | |||
136 | offset2 = ec->curr_pos; | ||
137 | offset1 = ec->taps - offset2; | ||
138 | phist = &ec->fir_state_bg.history[offset2]; | ||
139 | |||
140 | /* st: and en: help us locate the assembler in echo.s */ | ||
141 | |||
142 | /* asm("st:"); */ | ||
143 | n = ec->taps; | ||
144 | for (i = 0; i < n; i++) { | ||
145 | exp = *phist++ * factor; | ||
146 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | ||
147 | } | ||
148 | /* asm("en:"); */ | ||
149 | |||
150 | /* Note the asm for the inner loop above generated by Blackfin gcc | ||
151 | 4.1.1 is pretty good (note even parallel instructions used): | ||
152 | |||
153 | R0 = W [P0++] (X); | ||
154 | R0 *= R2; | ||
155 | R0 = R0 + R3 (NS) || | ||
156 | R1 = W [P1] (X) || | ||
157 | nop; | ||
158 | R0 >>>= 15; | ||
159 | R0 = R0 + R1; | ||
160 | W [P1++] = R0; | ||
161 | |||
162 | A block based update algorithm would be much faster but the | ||
163 | above can't be improved on much. Every instruction saved in | ||
164 | the loop above is 2 MIPs/ch! The for loop above is where the | ||
165 | Blackfin spends most of it's time - about 17 MIPs/ch measured | ||
166 | with speedtest.c with 256 taps (32ms). Write-back and | ||
167 | Write-through cache gave about the same performance. | ||
168 | */ | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | IDEAS for further optimisation of lms_adapt_bg(): | ||
173 | |||
174 | 1/ The rounding is quite costly. Could we keep as 32 bit coeffs | ||
175 | then make filter pluck the MS 16-bits of the coeffs when filtering? | ||
176 | However this would lower potential optimisation of filter, as I | ||
177 | think the dual-MAC architecture requires packed 16 bit coeffs. | ||
178 | |||
179 | 2/ Block based update would be more efficient, as per comments above, | ||
180 | could use dual MAC architecture. | ||
181 | |||
182 | 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC | ||
183 | packing. | ||
184 | |||
185 | 4/ Execute the whole e/c in a block of say 20ms rather than sample | ||
186 | by sample. Processing a few samples every ms is inefficient. | ||
187 | */ | ||
188 | |||
189 | #else | ||
190 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) | ||
191 | { | ||
192 | int i; | ||
193 | |||
194 | int offset1; | ||
195 | int offset2; | ||
196 | int factor; | ||
197 | int exp; | ||
198 | |||
199 | if (shift > 0) | ||
200 | factor = clean << shift; | ||
201 | else | ||
202 | factor = clean >> -shift; | ||
203 | |||
204 | /* Update the FIR taps */ | ||
205 | |||
206 | offset2 = ec->curr_pos; | ||
207 | offset1 = ec->taps - offset2; | ||
208 | |||
209 | for (i = ec->taps - 1; i >= offset1; i--) { | ||
210 | exp = (ec->fir_state_bg.history[i - offset1] * factor); | ||
211 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | ||
212 | } | ||
213 | for (; i >= 0; i--) { | ||
214 | exp = (ec->fir_state_bg.history[i + offset2] * factor); | ||
215 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | ||
216 | } | ||
217 | } | ||
218 | #endif | ||
219 | |||
220 | static inline int top_bit(unsigned int bits) | ||
221 | { | ||
222 | if (bits == 0) | ||
223 | return -1; | ||
224 | else | ||
225 | return (int)fls((int32_t) bits) - 1; | ||
226 | } | ||
227 | |||
228 | struct oslec_state *oslec_create(int len, int adaption_mode) | ||
229 | { | ||
230 | struct oslec_state *ec; | ||
231 | int i; | ||
232 | const int16_t *history; | ||
233 | |||
234 | ec = kzalloc(sizeof(*ec), GFP_KERNEL); | ||
235 | if (!ec) | ||
236 | return NULL; | ||
237 | |||
238 | ec->taps = len; | ||
239 | ec->log2taps = top_bit(len); | ||
240 | ec->curr_pos = ec->taps - 1; | ||
241 | |||
242 | ec->fir_taps16[0] = | ||
243 | kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | ||
244 | if (!ec->fir_taps16[0]) | ||
245 | goto error_oom_0; | ||
246 | |||
247 | ec->fir_taps16[1] = | ||
248 | kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | ||
249 | if (!ec->fir_taps16[1]) | ||
250 | goto error_oom_1; | ||
251 | |||
252 | history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps); | ||
253 | if (!history) | ||
254 | goto error_state; | ||
255 | history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps); | ||
256 | if (!history) | ||
257 | goto error_state_bg; | ||
258 | |||
259 | for (i = 0; i < 5; i++) | ||
260 | ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0; | ||
261 | |||
262 | ec->cng_level = 1000; | ||
263 | oslec_adaption_mode(ec, adaption_mode); | ||
264 | |||
265 | ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | ||
266 | if (!ec->snapshot) | ||
267 | goto error_snap; | ||
268 | |||
269 | ec->cond_met = 0; | ||
270 | ec->pstates = 0; | ||
271 | ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; | ||
272 | ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; | ||
273 | ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; | ||
274 | ec->lbgn = ec->lbgn_acc = 0; | ||
275 | ec->lbgn_upper = 200; | ||
276 | ec->lbgn_upper_acc = ec->lbgn_upper << 13; | ||
277 | |||
278 | return ec; | ||
279 | |||
280 | error_snap: | ||
281 | fir16_free(&ec->fir_state_bg); | ||
282 | error_state_bg: | ||
283 | fir16_free(&ec->fir_state); | ||
284 | error_state: | ||
285 | kfree(ec->fir_taps16[1]); | ||
286 | error_oom_1: | ||
287 | kfree(ec->fir_taps16[0]); | ||
288 | error_oom_0: | ||
289 | kfree(ec); | ||
290 | return NULL; | ||
291 | } | ||
292 | EXPORT_SYMBOL_GPL(oslec_create); | ||
293 | |||
294 | void oslec_free(struct oslec_state *ec) | ||
295 | { | ||
296 | int i; | ||
297 | |||
298 | fir16_free(&ec->fir_state); | ||
299 | fir16_free(&ec->fir_state_bg); | ||
300 | for (i = 0; i < 2; i++) | ||
301 | kfree(ec->fir_taps16[i]); | ||
302 | kfree(ec->snapshot); | ||
303 | kfree(ec); | ||
304 | } | ||
305 | EXPORT_SYMBOL_GPL(oslec_free); | ||
306 | |||
307 | void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode) | ||
308 | { | ||
309 | ec->adaption_mode = adaption_mode; | ||
310 | } | ||
311 | EXPORT_SYMBOL_GPL(oslec_adaption_mode); | ||
312 | |||
313 | void oslec_flush(struct oslec_state *ec) | ||
314 | { | ||
315 | int i; | ||
316 | |||
317 | ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; | ||
318 | ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; | ||
319 | ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; | ||
320 | |||
321 | ec->lbgn = ec->lbgn_acc = 0; | ||
322 | ec->lbgn_upper = 200; | ||
323 | ec->lbgn_upper_acc = ec->lbgn_upper << 13; | ||
324 | |||
325 | ec->nonupdate_dwell = 0; | ||
326 | |||
327 | fir16_flush(&ec->fir_state); | ||
328 | fir16_flush(&ec->fir_state_bg); | ||
329 | ec->fir_state.curr_pos = ec->taps - 1; | ||
330 | ec->fir_state_bg.curr_pos = ec->taps - 1; | ||
331 | for (i = 0; i < 2; i++) | ||
332 | memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t)); | ||
333 | |||
334 | ec->curr_pos = ec->taps - 1; | ||
335 | ec->pstates = 0; | ||
336 | } | ||
337 | EXPORT_SYMBOL_GPL(oslec_flush); | ||
338 | |||
339 | void oslec_snapshot(struct oslec_state *ec) | ||
340 | { | ||
341 | memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t)); | ||
342 | } | ||
343 | EXPORT_SYMBOL_GPL(oslec_snapshot); | ||
344 | |||
345 | /* Dual Path Echo Canceller */ | ||
346 | |||
347 | int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) | ||
348 | { | ||
349 | int32_t echo_value; | ||
350 | int clean_bg; | ||
351 | int tmp; | ||
352 | int tmp1; | ||
353 | |||
354 | /* | ||
355 | * Input scaling was found be required to prevent problems when tx | ||
356 | * starts clipping. Another possible way to handle this would be the | ||
357 | * filter coefficent scaling. | ||
358 | */ | ||
359 | |||
360 | ec->tx = tx; | ||
361 | ec->rx = rx; | ||
362 | tx >>= 1; | ||
363 | rx >>= 1; | ||
364 | |||
365 | /* | ||
366 | * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision | ||
367 | * required otherwise values do not track down to 0. Zero at DC, Pole | ||
368 | * at (1-Beta) on real axis. Some chip sets (like Si labs) don't | ||
369 | * need this, but something like a $10 X100P card does. Any DC really | ||
370 | * slows down convergence. | ||
371 | * | ||
372 | * Note: removes some low frequency from the signal, this reduces the | ||
373 | * speech quality when listening to samples through headphones but may | ||
374 | * not be obvious through a telephone handset. | ||
375 | * | ||
376 | * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta | ||
377 | * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz. | ||
378 | */ | ||
379 | |||
380 | if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) { | ||
381 | tmp = rx << 15; | ||
382 | |||
383 | /* | ||
384 | * Make sure the gain of the HPF is 1.0. This can still | ||
385 | * saturate a little under impulse conditions, and it might | ||
386 | * roll to 32768 and need clipping on sustained peak level | ||
387 | * signals. However, the scale of such clipping is small, and | ||
388 | * the error due to any saturation should not markedly affect | ||
389 | * the downstream processing. | ||
390 | */ | ||
391 | tmp -= (tmp >> 4); | ||
392 | |||
393 | ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2; | ||
394 | |||
395 | /* | ||
396 | * hard limit filter to prevent clipping. Note that at this | ||
397 | * stage rx should be limited to +/- 16383 due to right shift | ||
398 | * above | ||
399 | */ | ||
400 | tmp1 = ec->rx_1 >> 15; | ||
401 | if (tmp1 > 16383) | ||
402 | tmp1 = 16383; | ||
403 | if (tmp1 < -16383) | ||
404 | tmp1 = -16383; | ||
405 | rx = tmp1; | ||
406 | ec->rx_2 = tmp; | ||
407 | } | ||
408 | |||
409 | /* Block average of power in the filter states. Used for | ||
410 | adaption power calculation. */ | ||
411 | |||
412 | { | ||
413 | int new, old; | ||
414 | |||
415 | /* efficient "out with the old and in with the new" algorithm so | ||
416 | we don't have to recalculate over the whole block of | ||
417 | samples. */ | ||
418 | new = (int)tx * (int)tx; | ||
419 | old = (int)ec->fir_state.history[ec->fir_state.curr_pos] * | ||
420 | (int)ec->fir_state.history[ec->fir_state.curr_pos]; | ||
421 | ec->pstates += | ||
422 | ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps; | ||
423 | if (ec->pstates < 0) | ||
424 | ec->pstates = 0; | ||
425 | } | ||
426 | |||
427 | /* Calculate short term average levels using simple single pole IIRs */ | ||
428 | |||
429 | ec->ltxacc += abs(tx) - ec->ltx; | ||
430 | ec->ltx = (ec->ltxacc + (1 << 4)) >> 5; | ||
431 | ec->lrxacc += abs(rx) - ec->lrx; | ||
432 | ec->lrx = (ec->lrxacc + (1 << 4)) >> 5; | ||
433 | |||
434 | /* Foreground filter */ | ||
435 | |||
436 | ec->fir_state.coeffs = ec->fir_taps16[0]; | ||
437 | echo_value = fir16(&ec->fir_state, tx); | ||
438 | ec->clean = rx - echo_value; | ||
439 | ec->lcleanacc += abs(ec->clean) - ec->lclean; | ||
440 | ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5; | ||
441 | |||
442 | /* Background filter */ | ||
443 | |||
444 | echo_value = fir16(&ec->fir_state_bg, tx); | ||
445 | clean_bg = rx - echo_value; | ||
446 | ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg; | ||
447 | ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5; | ||
448 | |||
449 | /* Background Filter adaption */ | ||
450 | |||
451 | /* Almost always adap bg filter, just simple DT and energy | ||
452 | detection to minimise adaption in cases of strong double talk. | ||
453 | However this is not critical for the dual path algorithm. | ||
454 | */ | ||
455 | ec->factor = 0; | ||
456 | ec->shift = 0; | ||
457 | if ((ec->nonupdate_dwell == 0)) { | ||
458 | int p, logp, shift; | ||
459 | |||
460 | /* Determine: | ||
461 | |||
462 | f = Beta * clean_bg_rx/P ------ (1) | ||
463 | |||
464 | where P is the total power in the filter states. | ||
465 | |||
466 | The Boffins have shown that if we obey (1) we converge | ||
467 | quickly and avoid instability. | ||
468 | |||
469 | The correct factor f must be in Q30, as this is the fixed | ||
470 | point format required by the lms_adapt_bg() function, | ||
471 | therefore the scaled version of (1) is: | ||
472 | |||
473 | (2^30) * f = (2^30) * Beta * clean_bg_rx/P | ||
474 | factor = (2^30) * Beta * clean_bg_rx/P ----- (2) | ||
475 | |||
476 | We have chosen Beta = 0.25 by experiment, so: | ||
477 | |||
478 | factor = (2^30) * (2^-2) * clean_bg_rx/P | ||
479 | |||
480 | (30 - 2 - log2(P)) | ||
481 | factor = clean_bg_rx 2 ----- (3) | ||
482 | |||
483 | To avoid a divide we approximate log2(P) as top_bit(P), | ||
484 | which returns the position of the highest non-zero bit in | ||
485 | P. This approximation introduces an error as large as a | ||
486 | factor of 2, but the algorithm seems to handle it OK. | ||
487 | |||
488 | Come to think of it a divide may not be a big deal on a | ||
489 | modern DSP, so its probably worth checking out the cycles | ||
490 | for a divide versus a top_bit() implementation. | ||
491 | */ | ||
492 | |||
493 | p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates; | ||
494 | logp = top_bit(p) + ec->log2taps; | ||
495 | shift = 30 - 2 - logp; | ||
496 | ec->shift = shift; | ||
497 | |||
498 | lms_adapt_bg(ec, clean_bg, shift); | ||
499 | } | ||
500 | |||
501 | /* very simple DTD to make sure we dont try and adapt with strong | ||
502 | near end speech */ | ||
503 | |||
504 | ec->adapt = 0; | ||
505 | if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx)) | ||
506 | ec->nonupdate_dwell = DTD_HANGOVER; | ||
507 | if (ec->nonupdate_dwell) | ||
508 | ec->nonupdate_dwell--; | ||
509 | |||
510 | /* Transfer logic */ | ||
511 | |||
512 | /* These conditions are from the dual path paper [1], I messed with | ||
513 | them a bit to improve performance. */ | ||
514 | |||
515 | if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && | ||
516 | (ec->nonupdate_dwell == 0) && | ||
517 | /* (ec->Lclean_bg < 0.875*ec->Lclean) */ | ||
518 | (8 * ec->lclean_bg < 7 * ec->lclean) && | ||
519 | /* (ec->Lclean_bg < 0.125*ec->Ltx) */ | ||
520 | (8 * ec->lclean_bg < ec->ltx)) { | ||
521 | if (ec->cond_met == 6) { | ||
522 | /* | ||
523 | * BG filter has had better results for 6 consecutive | ||
524 | * samples | ||
525 | */ | ||
526 | ec->adapt = 1; | ||
527 | memcpy(ec->fir_taps16[0], ec->fir_taps16[1], | ||
528 | ec->taps * sizeof(int16_t)); | ||
529 | } else | ||
530 | ec->cond_met++; | ||
531 | } else | ||
532 | ec->cond_met = 0; | ||
533 | |||
534 | /* Non-Linear Processing */ | ||
535 | |||
536 | ec->clean_nlp = ec->clean; | ||
537 | if (ec->adaption_mode & ECHO_CAN_USE_NLP) { | ||
538 | /* | ||
539 | * Non-linear processor - a fancy way to say "zap small | ||
540 | * signals, to avoid residual echo due to (uLaw/ALaw) | ||
541 | * non-linearity in the channel.". | ||
542 | */ | ||
543 | |||
544 | if ((16 * ec->lclean < ec->ltx)) { | ||
545 | /* | ||
546 | * Our e/c has improved echo by at least 24 dB (each | ||
547 | * factor of 2 is 6dB, so 2*2*2*2=16 is the same as | ||
548 | * 6+6+6+6=24dB) | ||
549 | */ | ||
550 | if (ec->adaption_mode & ECHO_CAN_USE_CNG) { | ||
551 | ec->cng_level = ec->lbgn; | ||
552 | |||
553 | /* | ||
554 | * Very elementary comfort noise generation. | ||
555 | * Just random numbers rolled off very vaguely | ||
556 | * Hoth-like. DR: This noise doesn't sound | ||
557 | * quite right to me - I suspect there are some | ||
558 | * overflow issues in the filtering as it's too | ||
559 | * "crackly". | ||
560 | * TODO: debug this, maybe just play noise at | ||
561 | * high level or look at spectrum. | ||
562 | */ | ||
563 | |||
564 | ec->cng_rndnum = | ||
565 | 1664525U * ec->cng_rndnum + 1013904223U; | ||
566 | ec->cng_filter = | ||
567 | ((ec->cng_rndnum & 0xFFFF) - 32768 + | ||
568 | 5 * ec->cng_filter) >> 3; | ||
569 | ec->clean_nlp = | ||
570 | (ec->cng_filter * ec->cng_level * 8) >> 14; | ||
571 | |||
572 | } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) { | ||
573 | /* This sounds much better than CNG */ | ||
574 | if (ec->clean_nlp > ec->lbgn) | ||
575 | ec->clean_nlp = ec->lbgn; | ||
576 | if (ec->clean_nlp < -ec->lbgn) | ||
577 | ec->clean_nlp = -ec->lbgn; | ||
578 | } else { | ||
579 | /* | ||
580 | * just mute the residual, doesn't sound very | ||
581 | * good, used mainly in G168 tests | ||
582 | */ | ||
583 | ec->clean_nlp = 0; | ||
584 | } | ||
585 | } else { | ||
586 | /* | ||
587 | * Background noise estimator. I tried a few | ||
588 | * algorithms here without much luck. This very simple | ||
589 | * one seems to work best, we just average the level | ||
590 | * using a slow (1 sec time const) filter if the | ||
591 | * current level is less than a (experimentally | ||
592 | * derived) constant. This means we dont include high | ||
593 | * level signals like near end speech. When combined | ||
594 | * with CNG or especially CLIP seems to work OK. | ||
595 | */ | ||
596 | if (ec->lclean < 40) { | ||
597 | ec->lbgn_acc += abs(ec->clean) - ec->lbgn; | ||
598 | ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12; | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | |||
603 | /* Roll around the taps buffer */ | ||
604 | if (ec->curr_pos <= 0) | ||
605 | ec->curr_pos = ec->taps; | ||
606 | ec->curr_pos--; | ||
607 | |||
608 | if (ec->adaption_mode & ECHO_CAN_DISABLE) | ||
609 | ec->clean_nlp = rx; | ||
610 | |||
611 | /* Output scaled back up again to match input scaling */ | ||
612 | |||
613 | return (int16_t) ec->clean_nlp << 1; | ||
614 | } | ||
615 | EXPORT_SYMBOL_GPL(oslec_update); | ||
616 | |||
617 | /* This function is separated from the echo canceller is it is usually called | ||
618 | as part of the tx process. See rx HP (DC blocking) filter above, it's | ||
619 | the same design. | ||
620 | |||
621 | Some soft phones send speech signals with a lot of low frequency | ||
622 | energy, e.g. down to 20Hz. This can make the hybrid non-linear | ||
623 | which causes the echo canceller to fall over. This filter can help | ||
624 | by removing any low frequency before it gets to the tx port of the | ||
625 | hybrid. | ||
626 | |||
627 | It can also help by removing and DC in the tx signal. DC is bad | ||
628 | for LMS algorithms. | ||
629 | |||
630 | This is one of the classic DC removal filters, adjusted to provide | ||
631 | sufficient bass rolloff to meet the above requirement to protect hybrids | ||
632 | from things that upset them. The difference between successive samples | ||
633 | produces a lousy HPF, and then a suitably placed pole flattens things out. | ||
634 | The final result is a nicely rolled off bass end. The filtering is | ||
635 | implemented with extended fractional precision, which noise shapes things, | ||
636 | giving very clean DC removal. | ||
637 | */ | ||
638 | |||
639 | int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) | ||
640 | { | ||
641 | int tmp; | ||
642 | int tmp1; | ||
643 | |||
644 | if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) { | ||
645 | tmp = tx << 15; | ||
646 | |||
647 | /* | ||
648 | * Make sure the gain of the HPF is 1.0. The first can still | ||
649 | * saturate a little under impulse conditions, and it might | ||
650 | * roll to 32768 and need clipping on sustained peak level | ||
651 | * signals. However, the scale of such clipping is small, and | ||
652 | * the error due to any saturation should not markedly affect | ||
653 | * the downstream processing. | ||
654 | */ | ||
655 | tmp -= (tmp >> 4); | ||
656 | |||
657 | ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2; | ||
658 | tmp1 = ec->tx_1 >> 15; | ||
659 | if (tmp1 > 32767) | ||
660 | tmp1 = 32767; | ||
661 | if (tmp1 < -32767) | ||
662 | tmp1 = -32767; | ||
663 | tx = tmp1; | ||
664 | ec->tx_2 = tmp; | ||
665 | } | ||
666 | |||
667 | return tx; | ||
668 | } | ||
669 | EXPORT_SYMBOL_GPL(oslec_hpf_tx); | ||
670 | |||
671 | MODULE_LICENSE("GPL"); | ||
672 | MODULE_AUTHOR("David Rowe"); | ||
673 | MODULE_DESCRIPTION("Open Source Line Echo Canceller"); | ||
674 | MODULE_VERSION("0.3.0"); | ||
diff --git a/drivers/misc/echo/echo.h b/drivers/misc/echo/echo.h new file mode 100644 index 000000000000..9b08c63e6369 --- /dev/null +++ b/drivers/misc/echo/echo.h | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * SpanDSP - a series of DSP components for telephony | ||
3 | * | ||
4 | * echo.c - A line echo canceller. This code is being developed | ||
5 | * against and partially complies with G168. | ||
6 | * | ||
7 | * Written by Steve Underwood <steveu@coppice.org> | ||
8 | * and David Rowe <david_at_rowetel_dot_com> | ||
9 | * | ||
10 | * Copyright (C) 2001 Steve Underwood and 2007 David Rowe | ||
11 | * | ||
12 | * All rights reserved. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify | ||
15 | * it under the terms of the GNU General Public License version 2, as | ||
16 | * published by the Free Software Foundation. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, | ||
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
21 | * GNU General Public License for more details. | ||
22 | * | ||
23 | * You should have received a copy of the GNU General Public License | ||
24 | * along with this program; if not, write to the Free Software | ||
25 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
26 | */ | ||
27 | |||
28 | #ifndef __ECHO_H | ||
29 | #define __ECHO_H | ||
30 | |||
31 | /* | ||
32 | Line echo cancellation for voice | ||
33 | |||
34 | What does it do? | ||
35 | |||
36 | This module aims to provide G.168-2002 compliant echo cancellation, to remove | ||
37 | electrical echoes (e.g. from 2-4 wire hybrids) from voice calls. | ||
38 | |||
39 | How does it work? | ||
40 | |||
41 | The heart of the echo cancellor is FIR filter. This is adapted to match the | ||
42 | echo impulse response of the telephone line. It must be long enough to | ||
43 | adequately cover the duration of that impulse response. The signal transmitted | ||
44 | to the telephone line is passed through the FIR filter. Once the FIR is | ||
45 | properly adapted, the resulting output is an estimate of the echo signal | ||
46 | received from the line. This is subtracted from the received signal. The result | ||
47 | is an estimate of the signal which originated at the far end of the line, free | ||
48 | from echos of our own transmitted signal. | ||
49 | |||
50 | The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and | ||
51 | was introduced in 1960. It is the commonest form of filter adaption used in | ||
52 | things like modem line equalisers and line echo cancellers. There it works very | ||
53 | well. However, it only works well for signals of constant amplitude. It works | ||
54 | very poorly for things like speech echo cancellation, where the signal level | ||
55 | varies widely. This is quite easy to fix. If the signal level is normalised - | ||
56 | similar to applying AGC - LMS can work as well for a signal of varying | ||
57 | amplitude as it does for a modem signal. This normalised least mean squares | ||
58 | (NLMS) algorithm is the commonest one used for speech echo cancellation. Many | ||
59 | other algorithms exist - e.g. RLS (essentially the same as Kalman filtering), | ||
60 | FAP, etc. Some perform significantly better than NLMS. However, factors such | ||
61 | as computational complexity and patents favour the use of NLMS. | ||
62 | |||
63 | A simple refinement to NLMS can improve its performance with speech. NLMS tends | ||
64 | to adapt best to the strongest parts of a signal. If the signal is white noise, | ||
65 | the NLMS algorithm works very well. However, speech has more low frequency than | ||
66 | high frequency content. Pre-whitening (i.e. filtering the signal to flatten its | ||
67 | spectrum) the echo signal improves the adapt rate for speech, and ensures the | ||
68 | final residual signal is not heavily biased towards high frequencies. A very | ||
69 | low complexity filter is adequate for this, so pre-whitening adds little to the | ||
70 | compute requirements of the echo canceller. | ||
71 | |||
72 | An FIR filter adapted using pre-whitened NLMS performs well, provided certain | ||
73 | conditions are met: | ||
74 | |||
75 | - The transmitted signal has poor self-correlation. | ||
76 | - There is no signal being generated within the environment being | ||
77 | cancelled. | ||
78 | |||
79 | The difficulty is that neither of these can be guaranteed. | ||
80 | |||
81 | If the adaption is performed while transmitting noise (or something fairly | ||
82 | noise like, such as voice) the adaption works very well. If the adaption is | ||
83 | performed while transmitting something highly correlative (typically narrow | ||
84 | band energy such as signalling tones or DTMF), the adaption can go seriously | ||
85 | wrong. The reason is there is only one solution for the adaption on a near | ||
86 | random signal - the impulse response of the line. For a repetitive signal, | ||
87 | there are any number of solutions which converge the adaption, and nothing | ||
88 | guides the adaption to choose the generalised one. Allowing an untrained | ||
89 | canceller to converge on this kind of narrowband energy probably a good thing, | ||
90 | since at least it cancels the tones. Allowing a well converged canceller to | ||
91 | continue converging on such energy is just a way to ruin its generalised | ||
92 | adaption. A narrowband detector is needed, so adapation can be suspended at | ||
93 | appropriate times. | ||
94 | |||
95 | The adaption process is based on trying to eliminate the received signal. When | ||
96 | there is any signal from within the environment being cancelled it may upset | ||
97 | the adaption process. Similarly, if the signal we are transmitting is small, | ||
98 | noise may dominate and disturb the adaption process. If we can ensure that the | ||
99 | adaption is only performed when we are transmitting a significant signal level, | ||
100 | and the environment is not, things will be OK. Clearly, it is easy to tell when | ||
101 | we are sending a significant signal. Telling, if the environment is generating | ||
102 | a significant signal, and doing it with sufficient speed that the adaption will | ||
103 | not have diverged too much more we stop it, is a little harder. | ||
104 | |||
105 | The key problem in detecting when the environment is sourcing significant | ||
106 | energy is that we must do this very quickly. Given a reasonably long sample of | ||
107 | the received signal, there are a number of strategies which may be used to | ||
108 | assess whether that signal contains a strong far end component. However, by the | ||
109 | time that assessment is complete the far end signal will have already caused | ||
110 | major mis-convergence in the adaption process. An assessment algorithm is | ||
111 | needed which produces a fairly accurate result from a very short burst of far | ||
112 | end energy. | ||
113 | |||
114 | How do I use it? | ||
115 | |||
116 | The echo cancellor processes both the transmit and receive streams sample by | ||
117 | sample. The processing function is not declared inline. Unfortunately, | ||
118 | cancellation requires many operations per sample, so the call overhead is only | ||
119 | a minor burden. | ||
120 | */ | ||
121 | |||
122 | #include "fir.h" | ||
123 | #include "oslec.h" | ||
124 | |||
125 | /* | ||
126 | G.168 echo canceller descriptor. This defines the working state for a line | ||
127 | echo canceller. | ||
128 | */ | ||
129 | struct oslec_state { | ||
130 | int16_t tx; | ||
131 | int16_t rx; | ||
132 | int16_t clean; | ||
133 | int16_t clean_nlp; | ||
134 | |||
135 | int nonupdate_dwell; | ||
136 | int curr_pos; | ||
137 | int taps; | ||
138 | int log2taps; | ||
139 | int adaption_mode; | ||
140 | |||
141 | int cond_met; | ||
142 | int32_t pstates; | ||
143 | int16_t adapt; | ||
144 | int32_t factor; | ||
145 | int16_t shift; | ||
146 | |||
147 | /* Average levels and averaging filter states */ | ||
148 | int ltxacc; | ||
149 | int lrxacc; | ||
150 | int lcleanacc; | ||
151 | int lclean_bgacc; | ||
152 | int ltx; | ||
153 | int lrx; | ||
154 | int lclean; | ||
155 | int lclean_bg; | ||
156 | int lbgn; | ||
157 | int lbgn_acc; | ||
158 | int lbgn_upper; | ||
159 | int lbgn_upper_acc; | ||
160 | |||
161 | /* foreground and background filter states */ | ||
162 | struct fir16_state_t fir_state; | ||
163 | struct fir16_state_t fir_state_bg; | ||
164 | int16_t *fir_taps16[2]; | ||
165 | |||
166 | /* DC blocking filter states */ | ||
167 | int tx_1; | ||
168 | int tx_2; | ||
169 | int rx_1; | ||
170 | int rx_2; | ||
171 | |||
172 | /* optional High Pass Filter states */ | ||
173 | int32_t xvtx[5]; | ||
174 | int32_t yvtx[5]; | ||
175 | int32_t xvrx[5]; | ||
176 | int32_t yvrx[5]; | ||
177 | |||
178 | /* Parameters for the optional Hoth noise generator */ | ||
179 | int cng_level; | ||
180 | int cng_rndnum; | ||
181 | int cng_filter; | ||
182 | |||
183 | /* snapshot sample of coeffs used for development */ | ||
184 | int16_t *snapshot; | ||
185 | }; | ||
186 | |||
187 | #endif /* __ECHO_H */ | ||
diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h new file mode 100644 index 000000000000..7b9fabf1fea5 --- /dev/null +++ b/drivers/misc/echo/fir.h | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * SpanDSP - a series of DSP components for telephony | ||
3 | * | ||
4 | * fir.h - General telephony FIR routines | ||
5 | * | ||
6 | * Written by Steve Underwood <steveu@coppice.org> | ||
7 | * | ||
8 | * Copyright (C) 2002 Steve Underwood | ||
9 | * | ||
10 | * All rights reserved. | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License version 2, as | ||
14 | * published by the Free Software Foundation. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | */ | ||
25 | |||
26 | #if !defined(_FIR_H_) | ||
27 | #define _FIR_H_ | ||
28 | |||
29 | /* | ||
30 | Blackfin NOTES & IDEAS: | ||
31 | |||
32 | A simple dot product function is used to implement the filter. This performs | ||
33 | just one MAC/cycle which is inefficient but was easy to implement as a first | ||
34 | pass. The current Blackfin code also uses an unrolled form of the filter | ||
35 | history to avoid 0 length hardware loop issues. This is wasteful of | ||
36 | memory. | ||
37 | |||
38 | Ideas for improvement: | ||
39 | |||
40 | 1/ Rewrite filter for dual MAC inner loop. The issue here is handling | ||
41 | history sample offsets that are 16 bit aligned - the dual MAC needs | ||
42 | 32 bit aligmnent. There are some good examples in libbfdsp. | ||
43 | |||
44 | 2/ Use the hardware circular buffer facility tohalve memory usage. | ||
45 | |||
46 | 3/ Consider using internal memory. | ||
47 | |||
48 | Using less memory might also improve speed as cache misses will be | ||
49 | reduced. A drop in MIPs and memory approaching 50% should be | ||
50 | possible. | ||
51 | |||
52 | The foreground and background filters currenlty use a total of | ||
53 | about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo | ||
54 | can. | ||
55 | */ | ||
56 | |||
57 | /* | ||
58 | * 16 bit integer FIR descriptor. This defines the working state for a single | ||
59 | * instance of an FIR filter using 16 bit integer coefficients. | ||
60 | */ | ||
61 | struct fir16_state_t { | ||
62 | int taps; | ||
63 | int curr_pos; | ||
64 | const int16_t *coeffs; | ||
65 | int16_t *history; | ||
66 | }; | ||
67 | |||
68 | /* | ||
69 | * 32 bit integer FIR descriptor. This defines the working state for a single | ||
70 | * instance of an FIR filter using 32 bit integer coefficients, and filtering | ||
71 | * 16 bit integer data. | ||
72 | */ | ||
73 | struct fir32_state_t { | ||
74 | int taps; | ||
75 | int curr_pos; | ||
76 | const int32_t *coeffs; | ||
77 | int16_t *history; | ||
78 | }; | ||
79 | |||
80 | /* | ||
81 | * Floating point FIR descriptor. This defines the working state for a single | ||
82 | * instance of an FIR filter using floating point coefficients and data. | ||
83 | */ | ||
84 | struct fir_float_state_t { | ||
85 | int taps; | ||
86 | int curr_pos; | ||
87 | const float *coeffs; | ||
88 | float *history; | ||
89 | }; | ||
90 | |||
91 | static inline const int16_t *fir16_create(struct fir16_state_t *fir, | ||
92 | const int16_t *coeffs, int taps) | ||
93 | { | ||
94 | fir->taps = taps; | ||
95 | fir->curr_pos = taps - 1; | ||
96 | fir->coeffs = coeffs; | ||
97 | #if defined(__bfin__) | ||
98 | fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); | ||
99 | #else | ||
100 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); | ||
101 | #endif | ||
102 | return fir->history; | ||
103 | } | ||
104 | |||
105 | static inline void fir16_flush(struct fir16_state_t *fir) | ||
106 | { | ||
107 | #if defined(__bfin__) | ||
108 | memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); | ||
109 | #else | ||
110 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); | ||
111 | #endif | ||
112 | } | ||
113 | |||
114 | static inline void fir16_free(struct fir16_state_t *fir) | ||
115 | { | ||
116 | kfree(fir->history); | ||
117 | } | ||
118 | |||
119 | #ifdef __bfin__ | ||
120 | static inline int32_t dot_asm(short *x, short *y, int len) | ||
121 | { | ||
122 | int dot; | ||
123 | |||
124 | len--; | ||
125 | |||
126 | __asm__("I0 = %1;\n\t" | ||
127 | "I1 = %2;\n\t" | ||
128 | "A0 = 0;\n\t" | ||
129 | "R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
130 | "LOOP dot%= LC0 = %3;\n\t" | ||
131 | "LOOP_BEGIN dot%=;\n\t" | ||
132 | "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
133 | "LOOP_END dot%=;\n\t" | ||
134 | "A0 += R0.L*R1.L (IS);\n\t" | ||
135 | "R0 = A0;\n\t" | ||
136 | "%0 = R0;\n\t" | ||
137 | : "=&d"(dot) | ||
138 | : "a"(x), "a"(y), "a"(len) | ||
139 | : "I0", "I1", "A1", "A0", "R0", "R1" | ||
140 | ); | ||
141 | |||
142 | return dot; | ||
143 | } | ||
144 | #endif | ||
145 | |||
146 | static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) | ||
147 | { | ||
148 | int32_t y; | ||
149 | #if defined(__bfin__) | ||
150 | fir->history[fir->curr_pos] = sample; | ||
151 | fir->history[fir->curr_pos + fir->taps] = sample; | ||
152 | y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], | ||
153 | fir->taps); | ||
154 | #else | ||
155 | int i; | ||
156 | int offset1; | ||
157 | int offset2; | ||
158 | |||
159 | fir->history[fir->curr_pos] = sample; | ||
160 | |||
161 | offset2 = fir->curr_pos; | ||
162 | offset1 = fir->taps - offset2; | ||
163 | y = 0; | ||
164 | for (i = fir->taps - 1; i >= offset1; i--) | ||
165 | y += fir->coeffs[i] * fir->history[i - offset1]; | ||
166 | for (; i >= 0; i--) | ||
167 | y += fir->coeffs[i] * fir->history[i + offset2]; | ||
168 | #endif | ||
169 | if (fir->curr_pos <= 0) | ||
170 | fir->curr_pos = fir->taps; | ||
171 | fir->curr_pos--; | ||
172 | return (int16_t) (y >> 15); | ||
173 | } | ||
174 | |||
175 | static inline const int16_t *fir32_create(struct fir32_state_t *fir, | ||
176 | const int32_t *coeffs, int taps) | ||
177 | { | ||
178 | fir->taps = taps; | ||
179 | fir->curr_pos = taps - 1; | ||
180 | fir->coeffs = coeffs; | ||
181 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); | ||
182 | return fir->history; | ||
183 | } | ||
184 | |||
185 | static inline void fir32_flush(struct fir32_state_t *fir) | ||
186 | { | ||
187 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); | ||
188 | } | ||
189 | |||
190 | static inline void fir32_free(struct fir32_state_t *fir) | ||
191 | { | ||
192 | kfree(fir->history); | ||
193 | } | ||
194 | |||
195 | static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) | ||
196 | { | ||
197 | int i; | ||
198 | int32_t y; | ||
199 | int offset1; | ||
200 | int offset2; | ||
201 | |||
202 | fir->history[fir->curr_pos] = sample; | ||
203 | offset2 = fir->curr_pos; | ||
204 | offset1 = fir->taps - offset2; | ||
205 | y = 0; | ||
206 | for (i = fir->taps - 1; i >= offset1; i--) | ||
207 | y += fir->coeffs[i] * fir->history[i - offset1]; | ||
208 | for (; i >= 0; i--) | ||
209 | y += fir->coeffs[i] * fir->history[i + offset2]; | ||
210 | if (fir->curr_pos <= 0) | ||
211 | fir->curr_pos = fir->taps; | ||
212 | fir->curr_pos--; | ||
213 | return (int16_t) (y >> 15); | ||
214 | } | ||
215 | |||
216 | #endif | ||
diff --git a/drivers/misc/echo/oslec.h b/drivers/misc/echo/oslec.h new file mode 100644 index 000000000000..f4175360ce27 --- /dev/null +++ b/drivers/misc/echo/oslec.h | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * OSLEC - A line echo canceller. This code is being developed | ||
3 | * against and partially complies with G168. Using code from SpanDSP | ||
4 | * | ||
5 | * Written by Steve Underwood <steveu@coppice.org> | ||
6 | * and David Rowe <david_at_rowetel_dot_com> | ||
7 | * | ||
8 | * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe | ||
9 | * | ||
10 | * All rights reserved. | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License version 2, as | ||
14 | * published by the Free Software Foundation. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #ifndef __OSLEC_H | ||
28 | #define __OSLEC_H | ||
29 | |||
30 | /* Mask bits for the adaption mode */ | ||
31 | #define ECHO_CAN_USE_ADAPTION 0x01 | ||
32 | #define ECHO_CAN_USE_NLP 0x02 | ||
33 | #define ECHO_CAN_USE_CNG 0x04 | ||
34 | #define ECHO_CAN_USE_CLIP 0x08 | ||
35 | #define ECHO_CAN_USE_TX_HPF 0x10 | ||
36 | #define ECHO_CAN_USE_RX_HPF 0x20 | ||
37 | #define ECHO_CAN_DISABLE 0x40 | ||
38 | |||
39 | /** | ||
40 | * oslec_state: G.168 echo canceller descriptor. | ||
41 | * | ||
42 | * This defines the working state for a line echo canceller. | ||
43 | */ | ||
44 | struct oslec_state; | ||
45 | |||
46 | /** | ||
47 | * oslec_create - Create a voice echo canceller context. | ||
48 | * @len: The length of the canceller, in samples. | ||
49 | * @return: The new canceller context, or NULL if the canceller could not be | ||
50 | * created. | ||
51 | */ | ||
52 | struct oslec_state *oslec_create(int len, int adaption_mode); | ||
53 | |||
54 | /** | ||
55 | * oslec_free - Free a voice echo canceller context. | ||
56 | * @ec: The echo canceller context. | ||
57 | */ | ||
58 | void oslec_free(struct oslec_state *ec); | ||
59 | |||
60 | /** | ||
61 | * oslec_flush - Flush (reinitialise) a voice echo canceller context. | ||
62 | * @ec: The echo canceller context. | ||
63 | */ | ||
64 | void oslec_flush(struct oslec_state *ec); | ||
65 | |||
66 | /** | ||
67 | * oslec_adaption_mode - set the adaption mode of a voice echo canceller context. | ||
68 | * @ec The echo canceller context. | ||
69 | * @adaption_mode: The mode. | ||
70 | */ | ||
71 | void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode); | ||
72 | |||
73 | void oslec_snapshot(struct oslec_state *ec); | ||
74 | |||
75 | /** | ||
76 | * oslec_update: Process a sample through a voice echo canceller. | ||
77 | * @ec: The echo canceller context. | ||
78 | * @tx: The transmitted audio sample. | ||
79 | * @rx: The received audio sample. | ||
80 | * | ||
81 | * The return value is the clean (echo cancelled) received sample. | ||
82 | */ | ||
83 | int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx); | ||
84 | |||
85 | /** | ||
86 | * oslec_hpf_tx: Process to high pass filter the tx signal. | ||
87 | * @ec: The echo canceller context. | ||
88 | * @tx: The transmitted auio sample. | ||
89 | * | ||
90 | * The return value is the HP filtered transmit sample, send this to your D/A. | ||
91 | */ | ||
92 | int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx); | ||
93 | |||
94 | #endif /* __OSLEC_H */ | ||