diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-02-28 17:08:42 -0500 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-02-28 17:08:42 -0500 |
| commit | 6e2055a9e56e292715f935a85f381e54c1f54269 (patch) | |
| tree | 5cdb033f7da95ba47c37a42602c6d88d55e11db5 /drivers/misc/echo | |
| parent | dc93c85235efa5201e9a3c116bc3fbd1afc1a182 (diff) | |
staging: echo: move to drivers/misc/
The code is clean, there are users of it, so it doesn't belong in
staging anymore, move it to drivers/misc/.
Cc: Steve Underwood <steveu@coppice.org>
Cc: David Rowe <david@rowetel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/echo')
| -rw-r--r-- | drivers/misc/echo/Kconfig | 9 | ||||
| -rw-r--r-- | drivers/misc/echo/Makefile | 1 | ||||
| -rw-r--r-- | drivers/misc/echo/echo.c | 674 | ||||
| -rw-r--r-- | drivers/misc/echo/echo.h | 187 | ||||
| -rw-r--r-- | drivers/misc/echo/fir.h | 216 | ||||
| -rw-r--r-- | drivers/misc/echo/oslec.h | 94 |
6 files changed, 1181 insertions, 0 deletions
diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig new file mode 100644 index 000000000000..f1d41ea9cd48 --- /dev/null +++ b/drivers/misc/echo/Kconfig | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | config ECHO | ||
| 2 | tristate "Line Echo Canceller support" | ||
| 3 | default n | ||
| 4 | ---help--- | ||
| 5 | This driver provides line echo cancelling support for mISDN and | ||
| 6 | Zaptel drivers. | ||
| 7 | |||
| 8 | To compile this driver as a module, choose M here. The module | ||
| 9 | will be called echo. | ||
diff --git a/drivers/misc/echo/Makefile b/drivers/misc/echo/Makefile new file mode 100644 index 000000000000..7d4caac12a8d --- /dev/null +++ b/drivers/misc/echo/Makefile | |||
| @@ -0,0 +1 @@ | |||
| obj-$(CONFIG_ECHO) += echo.o | |||
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c new file mode 100644 index 000000000000..9597e9523cac --- /dev/null +++ b/drivers/misc/echo/echo.c | |||
| @@ -0,0 +1,674 @@ | |||
| 1 | /* | ||
| 2 | * SpanDSP - a series of DSP components for telephony | ||
| 3 | * | ||
| 4 | * echo.c - A line echo canceller. This code is being developed | ||
| 5 | * against and partially complies with G168. | ||
| 6 | * | ||
| 7 | * Written by Steve Underwood <steveu@coppice.org> | ||
| 8 | * and David Rowe <david_at_rowetel_dot_com> | ||
| 9 | * | ||
| 10 | * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe | ||
| 11 | * | ||
| 12 | * Based on a bit from here, a bit from there, eye of toad, ear of | ||
| 13 | * bat, 15 years of failed attempts by David and a few fried brain | ||
| 14 | * cells. | ||
| 15 | * | ||
| 16 | * All rights reserved. | ||
| 17 | * | ||
| 18 | * This program is free software; you can redistribute it and/or modify | ||
| 19 | * it under the terms of the GNU General Public License version 2, as | ||
| 20 | * published by the Free Software Foundation. | ||
| 21 | * | ||
| 22 | * This program is distributed in the hope that it will be useful, | ||
| 23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 25 | * GNU General Public License for more details. | ||
| 26 | * | ||
| 27 | * You should have received a copy of the GNU General Public License | ||
| 28 | * along with this program; if not, write to the Free Software | ||
| 29 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 30 | */ | ||
| 31 | |||
| 32 | /*! \file */ | ||
| 33 | |||
| 34 | /* Implementation Notes | ||
| 35 | David Rowe | ||
| 36 | April 2007 | ||
| 37 | |||
| 38 | This code started life as Steve's NLMS algorithm with a tap | ||
| 39 | rotation algorithm to handle divergence during double talk. I | ||
| 40 | added a Geigel Double Talk Detector (DTD) [2] and performed some | ||
| 41 | G168 tests. However I had trouble meeting the G168 requirements, | ||
| 42 | especially for double talk - there were always cases where my DTD | ||
| 43 | failed, for example where near end speech was under the 6dB | ||
| 44 | threshold required for declaring double talk. | ||
| 45 | |||
| 46 | So I tried a two path algorithm [1], which has so far given better | ||
| 47 | results. The original tap rotation/Geigel algorithm is available | ||
| 48 | in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit. | ||
| 49 | It's probably possible to make it work if some one wants to put some | ||
| 50 | serious work into it. | ||
| 51 | |||
| 52 | At present no special treatment is provided for tones, which | ||
| 53 | generally cause NLMS algorithms to diverge. Initial runs of a | ||
| 54 | subset of the G168 tests for tones (e.g ./echo_test 6) show the | ||
| 55 | current algorithm is passing OK, which is kind of surprising. The | ||
| 56 | full set of tests needs to be performed to confirm this result. | ||
| 57 | |||
| 58 | One other interesting change is that I have managed to get the NLMS | ||
| 59 | code to work with 16 bit coefficients, rather than the original 32 | ||
| 60 | bit coefficents. This reduces the MIPs and storage required. | ||
| 61 | I evaulated the 16 bit port using g168_tests.sh and listening tests | ||
| 62 | on 4 real-world samples. | ||
| 63 | |||
| 64 | I also attempted the implementation of a block based NLMS update | ||
| 65 | [2] but although this passes g168_tests.sh it didn't converge well | ||
| 66 | on the real-world samples. I have no idea why, perhaps a scaling | ||
| 67 | problem. The block based code is also available in SVN | ||
| 68 | http://svn.rowetel.com/software/oslec/tags/before_16bit. If this | ||
| 69 | code can be debugged, it will lead to further reduction in MIPS, as | ||
| 70 | the block update code maps nicely onto DSP instruction sets (it's a | ||
| 71 | dot product) compared to the current sample-by-sample update. | ||
| 72 | |||
| 73 | Steve also has some nice notes on echo cancellers in echo.h | ||
| 74 | |||
| 75 | References: | ||
| 76 | |||
| 77 | [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo | ||
| 78 | Path Models", IEEE Transactions on communications, COM-25, | ||
| 79 | No. 6, June | ||
| 80 | 1977. | ||
| 81 | http://www.rowetel.com/images/echo/dual_path_paper.pdf | ||
| 82 | |||
| 83 | [2] The classic, very useful paper that tells you how to | ||
| 84 | actually build a real world echo canceller: | ||
| 85 | Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice | ||
| 86 | Echo Canceller with a TMS320020, | ||
| 87 | http://www.rowetel.com/images/echo/spra129.pdf | ||
| 88 | |||
| 89 | [3] I have written a series of blog posts on this work, here is | ||
| 90 | Part 1: http://www.rowetel.com/blog/?p=18 | ||
| 91 | |||
| 92 | [4] The source code http://svn.rowetel.com/software/oslec/ | ||
| 93 | |||
| 94 | [5] A nice reference on LMS filters: | ||
| 95 | http://en.wikipedia.org/wiki/Least_mean_squares_filter | ||
| 96 | |||
| 97 | Credits: | ||
| 98 | |||
| 99 | Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan | ||
| 100 | Muthukrishnan for their suggestions and email discussions. Thanks | ||
| 101 | also to those people who collected echo samples for me such as | ||
| 102 | Mark, Pawel, and Pavel. | ||
| 103 | */ | ||
| 104 | |||
| 105 | #include <linux/kernel.h> | ||
| 106 | #include <linux/module.h> | ||
| 107 | #include <linux/slab.h> | ||
| 108 | |||
| 109 | #include "echo.h" | ||
| 110 | |||
| 111 | #define MIN_TX_POWER_FOR_ADAPTION 64 | ||
| 112 | #define MIN_RX_POWER_FOR_ADAPTION 64 | ||
| 113 | #define DTD_HANGOVER 600 /* 600 samples, or 75ms */ | ||
| 114 | #define DC_LOG2BETA 3 /* log2() of DC filter Beta */ | ||
| 115 | |||
| 116 | /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ | ||
| 117 | |||
| 118 | #ifdef __bfin__ | ||
| 119 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) | ||
| 120 | { | ||
| 121 | int i; | ||
| 122 | int offset1; | ||
| 123 | int offset2; | ||
| 124 | int factor; | ||
| 125 | int exp; | ||
| 126 | int16_t *phist; | ||
| 127 | int n; | ||
| 128 | |||
| 129 | if (shift > 0) | ||
| 130 | factor = clean << shift; | ||
| 131 | else | ||
| 132 | factor = clean >> -shift; | ||
| 133 | |||
| 134 | /* Update the FIR taps */ | ||
| 135 | |||
| 136 | offset2 = ec->curr_pos; | ||
| 137 | offset1 = ec->taps - offset2; | ||
| 138 | phist = &ec->fir_state_bg.history[offset2]; | ||
| 139 | |||
| 140 | /* st: and en: help us locate the assembler in echo.s */ | ||
| 141 | |||
| 142 | /* asm("st:"); */ | ||
| 143 | n = ec->taps; | ||
| 144 | for (i = 0; i < n; i++) { | ||
| 145 | exp = *phist++ * factor; | ||
| 146 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | ||
| 147 | } | ||
| 148 | /* asm("en:"); */ | ||
| 149 | |||
| 150 | /* Note the asm for the inner loop above generated by Blackfin gcc | ||
| 151 | 4.1.1 is pretty good (note even parallel instructions used): | ||
| 152 | |||
| 153 | R0 = W [P0++] (X); | ||
| 154 | R0 *= R2; | ||
| 155 | R0 = R0 + R3 (NS) || | ||
| 156 | R1 = W [P1] (X) || | ||
| 157 | nop; | ||
| 158 | R0 >>>= 15; | ||
| 159 | R0 = R0 + R1; | ||
| 160 | W [P1++] = R0; | ||
| 161 | |||
| 162 | A block based update algorithm would be much faster but the | ||
| 163 | above can't be improved on much. Every instruction saved in | ||
| 164 | the loop above is 2 MIPs/ch! The for loop above is where the | ||
| 165 | Blackfin spends most of it's time - about 17 MIPs/ch measured | ||
| 166 | with speedtest.c with 256 taps (32ms). Write-back and | ||
| 167 | Write-through cache gave about the same performance. | ||
| 168 | */ | ||
| 169 | } | ||
| 170 | |||
| 171 | /* | ||
| 172 | IDEAS for further optimisation of lms_adapt_bg(): | ||
| 173 | |||
| 174 | 1/ The rounding is quite costly. Could we keep as 32 bit coeffs | ||
| 175 | then make filter pluck the MS 16-bits of the coeffs when filtering? | ||
| 176 | However this would lower potential optimisation of filter, as I | ||
| 177 | think the dual-MAC architecture requires packed 16 bit coeffs. | ||
| 178 | |||
| 179 | 2/ Block based update would be more efficient, as per comments above, | ||
| 180 | could use dual MAC architecture. | ||
| 181 | |||
| 182 | 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC | ||
| 183 | packing. | ||
| 184 | |||
| 185 | 4/ Execute the whole e/c in a block of say 20ms rather than sample | ||
| 186 | by sample. Processing a few samples every ms is inefficient. | ||
| 187 | */ | ||
| 188 | |||
| 189 | #else | ||
| 190 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) | ||
| 191 | { | ||
| 192 | int i; | ||
| 193 | |||
| 194 | int offset1; | ||
| 195 | int offset2; | ||
| 196 | int factor; | ||
| 197 | int exp; | ||
| 198 | |||
| 199 | if (shift > 0) | ||
| 200 | factor = clean << shift; | ||
| 201 | else | ||
| 202 | factor = clean >> -shift; | ||
| 203 | |||
| 204 | /* Update the FIR taps */ | ||
| 205 | |||
| 206 | offset2 = ec->curr_pos; | ||
| 207 | offset1 = ec->taps - offset2; | ||
| 208 | |||
| 209 | for (i = ec->taps - 1; i >= offset1; i--) { | ||
| 210 | exp = (ec->fir_state_bg.history[i - offset1] * factor); | ||
| 211 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | ||
| 212 | } | ||
| 213 | for (; i >= 0; i--) { | ||
| 214 | exp = (ec->fir_state_bg.history[i + offset2] * factor); | ||
| 215 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | #endif | ||
| 219 | |||
| 220 | static inline int top_bit(unsigned int bits) | ||
| 221 | { | ||
| 222 | if (bits == 0) | ||
| 223 | return -1; | ||
| 224 | else | ||
| 225 | return (int)fls((int32_t) bits) - 1; | ||
| 226 | } | ||
| 227 | |||
| 228 | struct oslec_state *oslec_create(int len, int adaption_mode) | ||
| 229 | { | ||
| 230 | struct oslec_state *ec; | ||
| 231 | int i; | ||
| 232 | const int16_t *history; | ||
| 233 | |||
| 234 | ec = kzalloc(sizeof(*ec), GFP_KERNEL); | ||
| 235 | if (!ec) | ||
| 236 | return NULL; | ||
| 237 | |||
| 238 | ec->taps = len; | ||
| 239 | ec->log2taps = top_bit(len); | ||
| 240 | ec->curr_pos = ec->taps - 1; | ||
| 241 | |||
| 242 | ec->fir_taps16[0] = | ||
| 243 | kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | ||
| 244 | if (!ec->fir_taps16[0]) | ||
| 245 | goto error_oom_0; | ||
| 246 | |||
| 247 | ec->fir_taps16[1] = | ||
| 248 | kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | ||
| 249 | if (!ec->fir_taps16[1]) | ||
| 250 | goto error_oom_1; | ||
| 251 | |||
| 252 | history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps); | ||
| 253 | if (!history) | ||
| 254 | goto error_state; | ||
| 255 | history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps); | ||
| 256 | if (!history) | ||
| 257 | goto error_state_bg; | ||
| 258 | |||
| 259 | for (i = 0; i < 5; i++) | ||
| 260 | ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0; | ||
| 261 | |||
| 262 | ec->cng_level = 1000; | ||
| 263 | oslec_adaption_mode(ec, adaption_mode); | ||
| 264 | |||
| 265 | ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | ||
| 266 | if (!ec->snapshot) | ||
| 267 | goto error_snap; | ||
| 268 | |||
| 269 | ec->cond_met = 0; | ||
| 270 | ec->pstates = 0; | ||
| 271 | ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; | ||
| 272 | ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; | ||
| 273 | ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; | ||
| 274 | ec->lbgn = ec->lbgn_acc = 0; | ||
| 275 | ec->lbgn_upper = 200; | ||
| 276 | ec->lbgn_upper_acc = ec->lbgn_upper << 13; | ||
| 277 | |||
| 278 | return ec; | ||
| 279 | |||
| 280 | error_snap: | ||
| 281 | fir16_free(&ec->fir_state_bg); | ||
| 282 | error_state_bg: | ||
| 283 | fir16_free(&ec->fir_state); | ||
| 284 | error_state: | ||
| 285 | kfree(ec->fir_taps16[1]); | ||
| 286 | error_oom_1: | ||
| 287 | kfree(ec->fir_taps16[0]); | ||
| 288 | error_oom_0: | ||
| 289 | kfree(ec); | ||
| 290 | return NULL; | ||
| 291 | } | ||
| 292 | EXPORT_SYMBOL_GPL(oslec_create); | ||
| 293 | |||
| 294 | void oslec_free(struct oslec_state *ec) | ||
| 295 | { | ||
| 296 | int i; | ||
| 297 | |||
| 298 | fir16_free(&ec->fir_state); | ||
| 299 | fir16_free(&ec->fir_state_bg); | ||
| 300 | for (i = 0; i < 2; i++) | ||
| 301 | kfree(ec->fir_taps16[i]); | ||
| 302 | kfree(ec->snapshot); | ||
| 303 | kfree(ec); | ||
| 304 | } | ||
| 305 | EXPORT_SYMBOL_GPL(oslec_free); | ||
| 306 | |||
| 307 | void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode) | ||
| 308 | { | ||
| 309 | ec->adaption_mode = adaption_mode; | ||
| 310 | } | ||
| 311 | EXPORT_SYMBOL_GPL(oslec_adaption_mode); | ||
| 312 | |||
| 313 | void oslec_flush(struct oslec_state *ec) | ||
| 314 | { | ||
| 315 | int i; | ||
| 316 | |||
| 317 | ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; | ||
| 318 | ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; | ||
| 319 | ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; | ||
| 320 | |||
| 321 | ec->lbgn = ec->lbgn_acc = 0; | ||
| 322 | ec->lbgn_upper = 200; | ||
| 323 | ec->lbgn_upper_acc = ec->lbgn_upper << 13; | ||
| 324 | |||
| 325 | ec->nonupdate_dwell = 0; | ||
| 326 | |||
| 327 | fir16_flush(&ec->fir_state); | ||
| 328 | fir16_flush(&ec->fir_state_bg); | ||
| 329 | ec->fir_state.curr_pos = ec->taps - 1; | ||
| 330 | ec->fir_state_bg.curr_pos = ec->taps - 1; | ||
| 331 | for (i = 0; i < 2; i++) | ||
| 332 | memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t)); | ||
| 333 | |||
| 334 | ec->curr_pos = ec->taps - 1; | ||
| 335 | ec->pstates = 0; | ||
| 336 | } | ||
| 337 | EXPORT_SYMBOL_GPL(oslec_flush); | ||
| 338 | |||
| 339 | void oslec_snapshot(struct oslec_state *ec) | ||
| 340 | { | ||
| 341 | memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t)); | ||
| 342 | } | ||
| 343 | EXPORT_SYMBOL_GPL(oslec_snapshot); | ||
| 344 | |||
| 345 | /* Dual Path Echo Canceller */ | ||
| 346 | |||
| 347 | int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) | ||
| 348 | { | ||
| 349 | int32_t echo_value; | ||
| 350 | int clean_bg; | ||
| 351 | int tmp; | ||
| 352 | int tmp1; | ||
| 353 | |||
| 354 | /* | ||
| 355 | * Input scaling was found be required to prevent problems when tx | ||
| 356 | * starts clipping. Another possible way to handle this would be the | ||
| 357 | * filter coefficent scaling. | ||
| 358 | */ | ||
| 359 | |||
| 360 | ec->tx = tx; | ||
| 361 | ec->rx = rx; | ||
| 362 | tx >>= 1; | ||
| 363 | rx >>= 1; | ||
| 364 | |||
| 365 | /* | ||
| 366 | * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision | ||
| 367 | * required otherwise values do not track down to 0. Zero at DC, Pole | ||
| 368 | * at (1-Beta) on real axis. Some chip sets (like Si labs) don't | ||
| 369 | * need this, but something like a $10 X100P card does. Any DC really | ||
| 370 | * slows down convergence. | ||
| 371 | * | ||
| 372 | * Note: removes some low frequency from the signal, this reduces the | ||
| 373 | * speech quality when listening to samples through headphones but may | ||
| 374 | * not be obvious through a telephone handset. | ||
| 375 | * | ||
| 376 | * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta | ||
| 377 | * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz. | ||
| 378 | */ | ||
| 379 | |||
| 380 | if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) { | ||
| 381 | tmp = rx << 15; | ||
| 382 | |||
| 383 | /* | ||
| 384 | * Make sure the gain of the HPF is 1.0. This can still | ||
| 385 | * saturate a little under impulse conditions, and it might | ||
| 386 | * roll to 32768 and need clipping on sustained peak level | ||
| 387 | * signals. However, the scale of such clipping is small, and | ||
| 388 | * the error due to any saturation should not markedly affect | ||
| 389 | * the downstream processing. | ||
| 390 | */ | ||
| 391 | tmp -= (tmp >> 4); | ||
| 392 | |||
| 393 | ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2; | ||
| 394 | |||
| 395 | /* | ||
| 396 | * hard limit filter to prevent clipping. Note that at this | ||
| 397 | * stage rx should be limited to +/- 16383 due to right shift | ||
| 398 | * above | ||
| 399 | */ | ||
| 400 | tmp1 = ec->rx_1 >> 15; | ||
| 401 | if (tmp1 > 16383) | ||
| 402 | tmp1 = 16383; | ||
| 403 | if (tmp1 < -16383) | ||
| 404 | tmp1 = -16383; | ||
| 405 | rx = tmp1; | ||
| 406 | ec->rx_2 = tmp; | ||
| 407 | } | ||
| 408 | |||
| 409 | /* Block average of power in the filter states. Used for | ||
| 410 | adaption power calculation. */ | ||
| 411 | |||
| 412 | { | ||
| 413 | int new, old; | ||
| 414 | |||
| 415 | /* efficient "out with the old and in with the new" algorithm so | ||
| 416 | we don't have to recalculate over the whole block of | ||
| 417 | samples. */ | ||
| 418 | new = (int)tx * (int)tx; | ||
| 419 | old = (int)ec->fir_state.history[ec->fir_state.curr_pos] * | ||
| 420 | (int)ec->fir_state.history[ec->fir_state.curr_pos]; | ||
| 421 | ec->pstates += | ||
| 422 | ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps; | ||
| 423 | if (ec->pstates < 0) | ||
| 424 | ec->pstates = 0; | ||
| 425 | } | ||
| 426 | |||
| 427 | /* Calculate short term average levels using simple single pole IIRs */ | ||
| 428 | |||
| 429 | ec->ltxacc += abs(tx) - ec->ltx; | ||
| 430 | ec->ltx = (ec->ltxacc + (1 << 4)) >> 5; | ||
| 431 | ec->lrxacc += abs(rx) - ec->lrx; | ||
| 432 | ec->lrx = (ec->lrxacc + (1 << 4)) >> 5; | ||
| 433 | |||
| 434 | /* Foreground filter */ | ||
| 435 | |||
| 436 | ec->fir_state.coeffs = ec->fir_taps16[0]; | ||
| 437 | echo_value = fir16(&ec->fir_state, tx); | ||
| 438 | ec->clean = rx - echo_value; | ||
| 439 | ec->lcleanacc += abs(ec->clean) - ec->lclean; | ||
| 440 | ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5; | ||
| 441 | |||
| 442 | /* Background filter */ | ||
| 443 | |||
| 444 | echo_value = fir16(&ec->fir_state_bg, tx); | ||
| 445 | clean_bg = rx - echo_value; | ||
| 446 | ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg; | ||
| 447 | ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5; | ||
| 448 | |||
| 449 | /* Background Filter adaption */ | ||
| 450 | |||
| 451 | /* Almost always adap bg filter, just simple DT and energy | ||
| 452 | detection to minimise adaption in cases of strong double talk. | ||
| 453 | However this is not critical for the dual path algorithm. | ||
| 454 | */ | ||
| 455 | ec->factor = 0; | ||
| 456 | ec->shift = 0; | ||
| 457 | if ((ec->nonupdate_dwell == 0)) { | ||
| 458 | int p, logp, shift; | ||
| 459 | |||
| 460 | /* Determine: | ||
| 461 | |||
| 462 | f = Beta * clean_bg_rx/P ------ (1) | ||
| 463 | |||
| 464 | where P is the total power in the filter states. | ||
| 465 | |||
| 466 | The Boffins have shown that if we obey (1) we converge | ||
| 467 | quickly and avoid instability. | ||
| 468 | |||
| 469 | The correct factor f must be in Q30, as this is the fixed | ||
| 470 | point format required by the lms_adapt_bg() function, | ||
| 471 | therefore the scaled version of (1) is: | ||
| 472 | |||
| 473 | (2^30) * f = (2^30) * Beta * clean_bg_rx/P | ||
| 474 | factor = (2^30) * Beta * clean_bg_rx/P ----- (2) | ||
| 475 | |||
| 476 | We have chosen Beta = 0.25 by experiment, so: | ||
| 477 | |||
| 478 | factor = (2^30) * (2^-2) * clean_bg_rx/P | ||
| 479 | |||
| 480 | (30 - 2 - log2(P)) | ||
| 481 | factor = clean_bg_rx 2 ----- (3) | ||
| 482 | |||
| 483 | To avoid a divide we approximate log2(P) as top_bit(P), | ||
| 484 | which returns the position of the highest non-zero bit in | ||
| 485 | P. This approximation introduces an error as large as a | ||
| 486 | factor of 2, but the algorithm seems to handle it OK. | ||
| 487 | |||
| 488 | Come to think of it a divide may not be a big deal on a | ||
| 489 | modern DSP, so its probably worth checking out the cycles | ||
| 490 | for a divide versus a top_bit() implementation. | ||
| 491 | */ | ||
| 492 | |||
| 493 | p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates; | ||
| 494 | logp = top_bit(p) + ec->log2taps; | ||
| 495 | shift = 30 - 2 - logp; | ||
| 496 | ec->shift = shift; | ||
| 497 | |||
| 498 | lms_adapt_bg(ec, clean_bg, shift); | ||
| 499 | } | ||
| 500 | |||
| 501 | /* very simple DTD to make sure we dont try and adapt with strong | ||
| 502 | near end speech */ | ||
| 503 | |||
| 504 | ec->adapt = 0; | ||
| 505 | if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx)) | ||
| 506 | ec->nonupdate_dwell = DTD_HANGOVER; | ||
| 507 | if (ec->nonupdate_dwell) | ||
| 508 | ec->nonupdate_dwell--; | ||
| 509 | |||
| 510 | /* Transfer logic */ | ||
| 511 | |||
| 512 | /* These conditions are from the dual path paper [1], I messed with | ||
| 513 | them a bit to improve performance. */ | ||
| 514 | |||
| 515 | if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && | ||
| 516 | (ec->nonupdate_dwell == 0) && | ||
| 517 | /* (ec->Lclean_bg < 0.875*ec->Lclean) */ | ||
| 518 | (8 * ec->lclean_bg < 7 * ec->lclean) && | ||
| 519 | /* (ec->Lclean_bg < 0.125*ec->Ltx) */ | ||
| 520 | (8 * ec->lclean_bg < ec->ltx)) { | ||
| 521 | if (ec->cond_met == 6) { | ||
| 522 | /* | ||
| 523 | * BG filter has had better results for 6 consecutive | ||
| 524 | * samples | ||
| 525 | */ | ||
| 526 | ec->adapt = 1; | ||
| 527 | memcpy(ec->fir_taps16[0], ec->fir_taps16[1], | ||
| 528 | ec->taps * sizeof(int16_t)); | ||
| 529 | } else | ||
| 530 | ec->cond_met++; | ||
| 531 | } else | ||
| 532 | ec->cond_met = 0; | ||
| 533 | |||
| 534 | /* Non-Linear Processing */ | ||
| 535 | |||
| 536 | ec->clean_nlp = ec->clean; | ||
| 537 | if (ec->adaption_mode & ECHO_CAN_USE_NLP) { | ||
| 538 | /* | ||
| 539 | * Non-linear processor - a fancy way to say "zap small | ||
| 540 | * signals, to avoid residual echo due to (uLaw/ALaw) | ||
| 541 | * non-linearity in the channel.". | ||
| 542 | */ | ||
| 543 | |||
| 544 | if ((16 * ec->lclean < ec->ltx)) { | ||
| 545 | /* | ||
| 546 | * Our e/c has improved echo by at least 24 dB (each | ||
| 547 | * factor of 2 is 6dB, so 2*2*2*2=16 is the same as | ||
| 548 | * 6+6+6+6=24dB) | ||
| 549 | */ | ||
| 550 | if (ec->adaption_mode & ECHO_CAN_USE_CNG) { | ||
| 551 | ec->cng_level = ec->lbgn; | ||
| 552 | |||
| 553 | /* | ||
| 554 | * Very elementary comfort noise generation. | ||
| 555 | * Just random numbers rolled off very vaguely | ||
| 556 | * Hoth-like. DR: This noise doesn't sound | ||
| 557 | * quite right to me - I suspect there are some | ||
| 558 | * overflow issues in the filtering as it's too | ||
| 559 | * "crackly". | ||
| 560 | * TODO: debug this, maybe just play noise at | ||
| 561 | * high level or look at spectrum. | ||
| 562 | */ | ||
| 563 | |||
| 564 | ec->cng_rndnum = | ||
| 565 | 1664525U * ec->cng_rndnum + 1013904223U; | ||
| 566 | ec->cng_filter = | ||
| 567 | ((ec->cng_rndnum & 0xFFFF) - 32768 + | ||
| 568 | 5 * ec->cng_filter) >> 3; | ||
| 569 | ec->clean_nlp = | ||
| 570 | (ec->cng_filter * ec->cng_level * 8) >> 14; | ||
| 571 | |||
| 572 | } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) { | ||
| 573 | /* This sounds much better than CNG */ | ||
| 574 | if (ec->clean_nlp > ec->lbgn) | ||
| 575 | ec->clean_nlp = ec->lbgn; | ||
| 576 | if (ec->clean_nlp < -ec->lbgn) | ||
| 577 | ec->clean_nlp = -ec->lbgn; | ||
| 578 | } else { | ||
| 579 | /* | ||
| 580 | * just mute the residual, doesn't sound very | ||
| 581 | * good, used mainly in G168 tests | ||
| 582 | */ | ||
| 583 | ec->clean_nlp = 0; | ||
| 584 | } | ||
| 585 | } else { | ||
| 586 | /* | ||
| 587 | * Background noise estimator. I tried a few | ||
| 588 | * algorithms here without much luck. This very simple | ||
| 589 | * one seems to work best, we just average the level | ||
| 590 | * using a slow (1 sec time const) filter if the | ||
| 591 | * current level is less than a (experimentally | ||
| 592 | * derived) constant. This means we dont include high | ||
| 593 | * level signals like near end speech. When combined | ||
| 594 | * with CNG or especially CLIP seems to work OK. | ||
| 595 | */ | ||
| 596 | if (ec->lclean < 40) { | ||
| 597 | ec->lbgn_acc += abs(ec->clean) - ec->lbgn; | ||
| 598 | ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12; | ||
| 599 | } | ||
| 600 | } | ||
| 601 | } | ||
| 602 | |||
| 603 | /* Roll around the taps buffer */ | ||
| 604 | if (ec->curr_pos <= 0) | ||
| 605 | ec->curr_pos = ec->taps; | ||
| 606 | ec->curr_pos--; | ||
| 607 | |||
| 608 | if (ec->adaption_mode & ECHO_CAN_DISABLE) | ||
| 609 | ec->clean_nlp = rx; | ||
| 610 | |||
| 611 | /* Output scaled back up again to match input scaling */ | ||
| 612 | |||
| 613 | return (int16_t) ec->clean_nlp << 1; | ||
| 614 | } | ||
| 615 | EXPORT_SYMBOL_GPL(oslec_update); | ||
| 616 | |||
| 617 | /* This function is separated from the echo canceller is it is usually called | ||
| 618 | as part of the tx process. See rx HP (DC blocking) filter above, it's | ||
| 619 | the same design. | ||
| 620 | |||
| 621 | Some soft phones send speech signals with a lot of low frequency | ||
| 622 | energy, e.g. down to 20Hz. This can make the hybrid non-linear | ||
| 623 | which causes the echo canceller to fall over. This filter can help | ||
| 624 | by removing any low frequency before it gets to the tx port of the | ||
| 625 | hybrid. | ||
| 626 | |||
| 627 | It can also help by removing and DC in the tx signal. DC is bad | ||
| 628 | for LMS algorithms. | ||
| 629 | |||
| 630 | This is one of the classic DC removal filters, adjusted to provide | ||
| 631 | sufficient bass rolloff to meet the above requirement to protect hybrids | ||
| 632 | from things that upset them. The difference between successive samples | ||
| 633 | produces a lousy HPF, and then a suitably placed pole flattens things out. | ||
| 634 | The final result is a nicely rolled off bass end. The filtering is | ||
| 635 | implemented with extended fractional precision, which noise shapes things, | ||
| 636 | giving very clean DC removal. | ||
| 637 | */ | ||
| 638 | |||
| 639 | int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) | ||
| 640 | { | ||
| 641 | int tmp; | ||
| 642 | int tmp1; | ||
| 643 | |||
| 644 | if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) { | ||
| 645 | tmp = tx << 15; | ||
| 646 | |||
| 647 | /* | ||
| 648 | * Make sure the gain of the HPF is 1.0. The first can still | ||
| 649 | * saturate a little under impulse conditions, and it might | ||
| 650 | * roll to 32768 and need clipping on sustained peak level | ||
| 651 | * signals. However, the scale of such clipping is small, and | ||
| 652 | * the error due to any saturation should not markedly affect | ||
| 653 | * the downstream processing. | ||
| 654 | */ | ||
| 655 | tmp -= (tmp >> 4); | ||
| 656 | |||
| 657 | ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2; | ||
| 658 | tmp1 = ec->tx_1 >> 15; | ||
| 659 | if (tmp1 > 32767) | ||
| 660 | tmp1 = 32767; | ||
| 661 | if (tmp1 < -32767) | ||
| 662 | tmp1 = -32767; | ||
| 663 | tx = tmp1; | ||
| 664 | ec->tx_2 = tmp; | ||
| 665 | } | ||
| 666 | |||
| 667 | return tx; | ||
| 668 | } | ||
| 669 | EXPORT_SYMBOL_GPL(oslec_hpf_tx); | ||
| 670 | |||
| 671 | MODULE_LICENSE("GPL"); | ||
| 672 | MODULE_AUTHOR("David Rowe"); | ||
| 673 | MODULE_DESCRIPTION("Open Source Line Echo Canceller"); | ||
| 674 | MODULE_VERSION("0.3.0"); | ||
diff --git a/drivers/misc/echo/echo.h b/drivers/misc/echo/echo.h new file mode 100644 index 000000000000..9b08c63e6369 --- /dev/null +++ b/drivers/misc/echo/echo.h | |||
| @@ -0,0 +1,187 @@ | |||
| 1 | /* | ||
| 2 | * SpanDSP - a series of DSP components for telephony | ||
| 3 | * | ||
| 4 | * echo.c - A line echo canceller. This code is being developed | ||
| 5 | * against and partially complies with G168. | ||
| 6 | * | ||
| 7 | * Written by Steve Underwood <steveu@coppice.org> | ||
| 8 | * and David Rowe <david_at_rowetel_dot_com> | ||
| 9 | * | ||
| 10 | * Copyright (C) 2001 Steve Underwood and 2007 David Rowe | ||
| 11 | * | ||
| 12 | * All rights reserved. | ||
| 13 | * | ||
| 14 | * This program is free software; you can redistribute it and/or modify | ||
| 15 | * it under the terms of the GNU General Public License version 2, as | ||
| 16 | * published by the Free Software Foundation. | ||
| 17 | * | ||
| 18 | * This program is distributed in the hope that it will be useful, | ||
| 19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 21 | * GNU General Public License for more details. | ||
| 22 | * | ||
| 23 | * You should have received a copy of the GNU General Public License | ||
| 24 | * along with this program; if not, write to the Free Software | ||
| 25 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 26 | */ | ||
| 27 | |||
| 28 | #ifndef __ECHO_H | ||
| 29 | #define __ECHO_H | ||
| 30 | |||
| 31 | /* | ||
| 32 | Line echo cancellation for voice | ||
| 33 | |||
| 34 | What does it do? | ||
| 35 | |||
| 36 | This module aims to provide G.168-2002 compliant echo cancellation, to remove | ||
| 37 | electrical echoes (e.g. from 2-4 wire hybrids) from voice calls. | ||
| 38 | |||
| 39 | How does it work? | ||
| 40 | |||
| 41 | The heart of the echo cancellor is FIR filter. This is adapted to match the | ||
| 42 | echo impulse response of the telephone line. It must be long enough to | ||
| 43 | adequately cover the duration of that impulse response. The signal transmitted | ||
| 44 | to the telephone line is passed through the FIR filter. Once the FIR is | ||
| 45 | properly adapted, the resulting output is an estimate of the echo signal | ||
| 46 | received from the line. This is subtracted from the received signal. The result | ||
| 47 | is an estimate of the signal which originated at the far end of the line, free | ||
| 48 | from echos of our own transmitted signal. | ||
| 49 | |||
| 50 | The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and | ||
| 51 | was introduced in 1960. It is the commonest form of filter adaption used in | ||
| 52 | things like modem line equalisers and line echo cancellers. There it works very | ||
| 53 | well. However, it only works well for signals of constant amplitude. It works | ||
| 54 | very poorly for things like speech echo cancellation, where the signal level | ||
| 55 | varies widely. This is quite easy to fix. If the signal level is normalised - | ||
| 56 | similar to applying AGC - LMS can work as well for a signal of varying | ||
| 57 | amplitude as it does for a modem signal. This normalised least mean squares | ||
| 58 | (NLMS) algorithm is the commonest one used for speech echo cancellation. Many | ||
| 59 | other algorithms exist - e.g. RLS (essentially the same as Kalman filtering), | ||
| 60 | FAP, etc. Some perform significantly better than NLMS. However, factors such | ||
| 61 | as computational complexity and patents favour the use of NLMS. | ||
| 62 | |||
| 63 | A simple refinement to NLMS can improve its performance with speech. NLMS tends | ||
| 64 | to adapt best to the strongest parts of a signal. If the signal is white noise, | ||
| 65 | the NLMS algorithm works very well. However, speech has more low frequency than | ||
| 66 | high frequency content. Pre-whitening (i.e. filtering the signal to flatten its | ||
| 67 | spectrum) the echo signal improves the adapt rate for speech, and ensures the | ||
| 68 | final residual signal is not heavily biased towards high frequencies. A very | ||
| 69 | low complexity filter is adequate for this, so pre-whitening adds little to the | ||
| 70 | compute requirements of the echo canceller. | ||
| 71 | |||
| 72 | An FIR filter adapted using pre-whitened NLMS performs well, provided certain | ||
| 73 | conditions are met: | ||
| 74 | |||
| 75 | - The transmitted signal has poor self-correlation. | ||
| 76 | - There is no signal being generated within the environment being | ||
| 77 | cancelled. | ||
| 78 | |||
| 79 | The difficulty is that neither of these can be guaranteed. | ||
| 80 | |||
| 81 | If the adaption is performed while transmitting noise (or something fairly | ||
| 82 | noise like, such as voice) the adaption works very well. If the adaption is | ||
| 83 | performed while transmitting something highly correlative (typically narrow | ||
| 84 | band energy such as signalling tones or DTMF), the adaption can go seriously | ||
| 85 | wrong. The reason is there is only one solution for the adaption on a near | ||
| 86 | random signal - the impulse response of the line. For a repetitive signal, | ||
| 87 | there are any number of solutions which converge the adaption, and nothing | ||
| 88 | guides the adaption to choose the generalised one. Allowing an untrained | ||
| 89 | canceller to converge on this kind of narrowband energy probably a good thing, | ||
| 90 | since at least it cancels the tones. Allowing a well converged canceller to | ||
| 91 | continue converging on such energy is just a way to ruin its generalised | ||
| 92 | adaption. A narrowband detector is needed, so adapation can be suspended at | ||
| 93 | appropriate times. | ||
| 94 | |||
| 95 | The adaption process is based on trying to eliminate the received signal. When | ||
| 96 | there is any signal from within the environment being cancelled it may upset | ||
| 97 | the adaption process. Similarly, if the signal we are transmitting is small, | ||
| 98 | noise may dominate and disturb the adaption process. If we can ensure that the | ||
| 99 | adaption is only performed when we are transmitting a significant signal level, | ||
| 100 | and the environment is not, things will be OK. Clearly, it is easy to tell when | ||
| 101 | we are sending a significant signal. Telling, if the environment is generating | ||
| 102 | a significant signal, and doing it with sufficient speed that the adaption will | ||
| 103 | not have diverged too much more we stop it, is a little harder. | ||
| 104 | |||
| 105 | The key problem in detecting when the environment is sourcing significant | ||
| 106 | energy is that we must do this very quickly. Given a reasonably long sample of | ||
| 107 | the received signal, there are a number of strategies which may be used to | ||
| 108 | assess whether that signal contains a strong far end component. However, by the | ||
| 109 | time that assessment is complete the far end signal will have already caused | ||
| 110 | major mis-convergence in the adaption process. An assessment algorithm is | ||
| 111 | needed which produces a fairly accurate result from a very short burst of far | ||
| 112 | end energy. | ||
| 113 | |||
| 114 | How do I use it? | ||
| 115 | |||
| 116 | The echo cancellor processes both the transmit and receive streams sample by | ||
| 117 | sample. The processing function is not declared inline. Unfortunately, | ||
| 118 | cancellation requires many operations per sample, so the call overhead is only | ||
| 119 | a minor burden. | ||
| 120 | */ | ||
| 121 | |||
| 122 | #include "fir.h" | ||
| 123 | #include "oslec.h" | ||
| 124 | |||
| 125 | /* | ||
| 126 | G.168 echo canceller descriptor. This defines the working state for a line | ||
| 127 | echo canceller. | ||
| 128 | */ | ||
| 129 | struct oslec_state { | ||
| 130 | int16_t tx; | ||
| 131 | int16_t rx; | ||
| 132 | int16_t clean; | ||
| 133 | int16_t clean_nlp; | ||
| 134 | |||
| 135 | int nonupdate_dwell; | ||
| 136 | int curr_pos; | ||
| 137 | int taps; | ||
| 138 | int log2taps; | ||
| 139 | int adaption_mode; | ||
| 140 | |||
| 141 | int cond_met; | ||
| 142 | int32_t pstates; | ||
| 143 | int16_t adapt; | ||
| 144 | int32_t factor; | ||
| 145 | int16_t shift; | ||
| 146 | |||
| 147 | /* Average levels and averaging filter states */ | ||
| 148 | int ltxacc; | ||
| 149 | int lrxacc; | ||
| 150 | int lcleanacc; | ||
| 151 | int lclean_bgacc; | ||
| 152 | int ltx; | ||
| 153 | int lrx; | ||
| 154 | int lclean; | ||
| 155 | int lclean_bg; | ||
| 156 | int lbgn; | ||
| 157 | int lbgn_acc; | ||
| 158 | int lbgn_upper; | ||
| 159 | int lbgn_upper_acc; | ||
| 160 | |||
| 161 | /* foreground and background filter states */ | ||
| 162 | struct fir16_state_t fir_state; | ||
| 163 | struct fir16_state_t fir_state_bg; | ||
| 164 | int16_t *fir_taps16[2]; | ||
| 165 | |||
| 166 | /* DC blocking filter states */ | ||
| 167 | int tx_1; | ||
| 168 | int tx_2; | ||
| 169 | int rx_1; | ||
| 170 | int rx_2; | ||
| 171 | |||
| 172 | /* optional High Pass Filter states */ | ||
| 173 | int32_t xvtx[5]; | ||
| 174 | int32_t yvtx[5]; | ||
| 175 | int32_t xvrx[5]; | ||
| 176 | int32_t yvrx[5]; | ||
| 177 | |||
| 178 | /* Parameters for the optional Hoth noise generator */ | ||
| 179 | int cng_level; | ||
| 180 | int cng_rndnum; | ||
| 181 | int cng_filter; | ||
| 182 | |||
| 183 | /* snapshot sample of coeffs used for development */ | ||
| 184 | int16_t *snapshot; | ||
| 185 | }; | ||
| 186 | |||
| 187 | #endif /* __ECHO_H */ | ||
diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h new file mode 100644 index 000000000000..7b9fabf1fea5 --- /dev/null +++ b/drivers/misc/echo/fir.h | |||
| @@ -0,0 +1,216 @@ | |||
| 1 | /* | ||
| 2 | * SpanDSP - a series of DSP components for telephony | ||
| 3 | * | ||
| 4 | * fir.h - General telephony FIR routines | ||
| 5 | * | ||
| 6 | * Written by Steve Underwood <steveu@coppice.org> | ||
| 7 | * | ||
| 8 | * Copyright (C) 2002 Steve Underwood | ||
| 9 | * | ||
| 10 | * All rights reserved. | ||
| 11 | * | ||
| 12 | * This program is free software; you can redistribute it and/or modify | ||
| 13 | * it under the terms of the GNU General Public License version 2, as | ||
| 14 | * published by the Free Software Foundation. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | * GNU General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #if !defined(_FIR_H_) | ||
| 27 | #define _FIR_H_ | ||
| 28 | |||
| 29 | /* | ||
| 30 | Blackfin NOTES & IDEAS: | ||
| 31 | |||
| 32 | A simple dot product function is used to implement the filter. This performs | ||
| 33 | just one MAC/cycle which is inefficient but was easy to implement as a first | ||
| 34 | pass. The current Blackfin code also uses an unrolled form of the filter | ||
| 35 | history to avoid 0 length hardware loop issues. This is wasteful of | ||
| 36 | memory. | ||
| 37 | |||
| 38 | Ideas for improvement: | ||
| 39 | |||
| 40 | 1/ Rewrite filter for dual MAC inner loop. The issue here is handling | ||
| 41 | history sample offsets that are 16 bit aligned - the dual MAC needs | ||
| 42 | 32 bit aligmnent. There are some good examples in libbfdsp. | ||
| 43 | |||
| 44 | 2/ Use the hardware circular buffer facility tohalve memory usage. | ||
| 45 | |||
| 46 | 3/ Consider using internal memory. | ||
| 47 | |||
| 48 | Using less memory might also improve speed as cache misses will be | ||
| 49 | reduced. A drop in MIPs and memory approaching 50% should be | ||
| 50 | possible. | ||
| 51 | |||
| 52 | The foreground and background filters currenlty use a total of | ||
| 53 | about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo | ||
| 54 | can. | ||
| 55 | */ | ||
| 56 | |||
| 57 | /* | ||
| 58 | * 16 bit integer FIR descriptor. This defines the working state for a single | ||
| 59 | * instance of an FIR filter using 16 bit integer coefficients. | ||
| 60 | */ | ||
| 61 | struct fir16_state_t { | ||
| 62 | int taps; | ||
| 63 | int curr_pos; | ||
| 64 | const int16_t *coeffs; | ||
| 65 | int16_t *history; | ||
| 66 | }; | ||
| 67 | |||
| 68 | /* | ||
| 69 | * 32 bit integer FIR descriptor. This defines the working state for a single | ||
| 70 | * instance of an FIR filter using 32 bit integer coefficients, and filtering | ||
| 71 | * 16 bit integer data. | ||
| 72 | */ | ||
| 73 | struct fir32_state_t { | ||
| 74 | int taps; | ||
| 75 | int curr_pos; | ||
| 76 | const int32_t *coeffs; | ||
| 77 | int16_t *history; | ||
| 78 | }; | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Floating point FIR descriptor. This defines the working state for a single | ||
| 82 | * instance of an FIR filter using floating point coefficients and data. | ||
| 83 | */ | ||
| 84 | struct fir_float_state_t { | ||
| 85 | int taps; | ||
| 86 | int curr_pos; | ||
| 87 | const float *coeffs; | ||
| 88 | float *history; | ||
| 89 | }; | ||
| 90 | |||
| 91 | static inline const int16_t *fir16_create(struct fir16_state_t *fir, | ||
| 92 | const int16_t *coeffs, int taps) | ||
| 93 | { | ||
| 94 | fir->taps = taps; | ||
| 95 | fir->curr_pos = taps - 1; | ||
| 96 | fir->coeffs = coeffs; | ||
| 97 | #if defined(__bfin__) | ||
| 98 | fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); | ||
| 99 | #else | ||
| 100 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); | ||
| 101 | #endif | ||
| 102 | return fir->history; | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline void fir16_flush(struct fir16_state_t *fir) | ||
| 106 | { | ||
| 107 | #if defined(__bfin__) | ||
| 108 | memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); | ||
| 109 | #else | ||
| 110 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); | ||
| 111 | #endif | ||
| 112 | } | ||
| 113 | |||
| 114 | static inline void fir16_free(struct fir16_state_t *fir) | ||
| 115 | { | ||
| 116 | kfree(fir->history); | ||
| 117 | } | ||
| 118 | |||
| 119 | #ifdef __bfin__ | ||
| 120 | static inline int32_t dot_asm(short *x, short *y, int len) | ||
| 121 | { | ||
| 122 | int dot; | ||
| 123 | |||
| 124 | len--; | ||
| 125 | |||
| 126 | __asm__("I0 = %1;\n\t" | ||
| 127 | "I1 = %2;\n\t" | ||
| 128 | "A0 = 0;\n\t" | ||
| 129 | "R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
| 130 | "LOOP dot%= LC0 = %3;\n\t" | ||
| 131 | "LOOP_BEGIN dot%=;\n\t" | ||
| 132 | "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
| 133 | "LOOP_END dot%=;\n\t" | ||
| 134 | "A0 += R0.L*R1.L (IS);\n\t" | ||
| 135 | "R0 = A0;\n\t" | ||
| 136 | "%0 = R0;\n\t" | ||
| 137 | : "=&d"(dot) | ||
| 138 | : "a"(x), "a"(y), "a"(len) | ||
| 139 | : "I0", "I1", "A1", "A0", "R0", "R1" | ||
| 140 | ); | ||
| 141 | |||
| 142 | return dot; | ||
| 143 | } | ||
| 144 | #endif | ||
| 145 | |||
| 146 | static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) | ||
| 147 | { | ||
| 148 | int32_t y; | ||
| 149 | #if defined(__bfin__) | ||
| 150 | fir->history[fir->curr_pos] = sample; | ||
| 151 | fir->history[fir->curr_pos + fir->taps] = sample; | ||
| 152 | y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], | ||
| 153 | fir->taps); | ||
| 154 | #else | ||
| 155 | int i; | ||
| 156 | int offset1; | ||
| 157 | int offset2; | ||
| 158 | |||
| 159 | fir->history[fir->curr_pos] = sample; | ||
| 160 | |||
| 161 | offset2 = fir->curr_pos; | ||
| 162 | offset1 = fir->taps - offset2; | ||
| 163 | y = 0; | ||
| 164 | for (i = fir->taps - 1; i >= offset1; i--) | ||
| 165 | y += fir->coeffs[i] * fir->history[i - offset1]; | ||
| 166 | for (; i >= 0; i--) | ||
| 167 | y += fir->coeffs[i] * fir->history[i + offset2]; | ||
| 168 | #endif | ||
| 169 | if (fir->curr_pos <= 0) | ||
| 170 | fir->curr_pos = fir->taps; | ||
| 171 | fir->curr_pos--; | ||
| 172 | return (int16_t) (y >> 15); | ||
| 173 | } | ||
| 174 | |||
| 175 | static inline const int16_t *fir32_create(struct fir32_state_t *fir, | ||
| 176 | const int32_t *coeffs, int taps) | ||
| 177 | { | ||
| 178 | fir->taps = taps; | ||
| 179 | fir->curr_pos = taps - 1; | ||
| 180 | fir->coeffs = coeffs; | ||
| 181 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); | ||
| 182 | return fir->history; | ||
| 183 | } | ||
| 184 | |||
| 185 | static inline void fir32_flush(struct fir32_state_t *fir) | ||
| 186 | { | ||
| 187 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); | ||
| 188 | } | ||
| 189 | |||
| 190 | static inline void fir32_free(struct fir32_state_t *fir) | ||
| 191 | { | ||
| 192 | kfree(fir->history); | ||
| 193 | } | ||
| 194 | |||
| 195 | static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) | ||
| 196 | { | ||
| 197 | int i; | ||
| 198 | int32_t y; | ||
| 199 | int offset1; | ||
| 200 | int offset2; | ||
| 201 | |||
| 202 | fir->history[fir->curr_pos] = sample; | ||
| 203 | offset2 = fir->curr_pos; | ||
| 204 | offset1 = fir->taps - offset2; | ||
| 205 | y = 0; | ||
| 206 | for (i = fir->taps - 1; i >= offset1; i--) | ||
| 207 | y += fir->coeffs[i] * fir->history[i - offset1]; | ||
| 208 | for (; i >= 0; i--) | ||
| 209 | y += fir->coeffs[i] * fir->history[i + offset2]; | ||
| 210 | if (fir->curr_pos <= 0) | ||
| 211 | fir->curr_pos = fir->taps; | ||
| 212 | fir->curr_pos--; | ||
| 213 | return (int16_t) (y >> 15); | ||
| 214 | } | ||
| 215 | |||
| 216 | #endif | ||
diff --git a/drivers/misc/echo/oslec.h b/drivers/misc/echo/oslec.h new file mode 100644 index 000000000000..f4175360ce27 --- /dev/null +++ b/drivers/misc/echo/oslec.h | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | /* | ||
| 2 | * OSLEC - A line echo canceller. This code is being developed | ||
| 3 | * against and partially complies with G168. Using code from SpanDSP | ||
| 4 | * | ||
| 5 | * Written by Steve Underwood <steveu@coppice.org> | ||
| 6 | * and David Rowe <david_at_rowetel_dot_com> | ||
| 7 | * | ||
| 8 | * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe | ||
| 9 | * | ||
| 10 | * All rights reserved. | ||
| 11 | * | ||
| 12 | * This program is free software; you can redistribute it and/or modify | ||
| 13 | * it under the terms of the GNU General Public License version 2, as | ||
| 14 | * published by the Free Software Foundation. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | * GNU General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 24 | * | ||
| 25 | */ | ||
| 26 | |||
| 27 | #ifndef __OSLEC_H | ||
| 28 | #define __OSLEC_H | ||
| 29 | |||
| 30 | /* Mask bits for the adaption mode */ | ||
| 31 | #define ECHO_CAN_USE_ADAPTION 0x01 | ||
| 32 | #define ECHO_CAN_USE_NLP 0x02 | ||
| 33 | #define ECHO_CAN_USE_CNG 0x04 | ||
| 34 | #define ECHO_CAN_USE_CLIP 0x08 | ||
| 35 | #define ECHO_CAN_USE_TX_HPF 0x10 | ||
| 36 | #define ECHO_CAN_USE_RX_HPF 0x20 | ||
| 37 | #define ECHO_CAN_DISABLE 0x40 | ||
| 38 | |||
| 39 | /** | ||
| 40 | * oslec_state: G.168 echo canceller descriptor. | ||
| 41 | * | ||
| 42 | * This defines the working state for a line echo canceller. | ||
| 43 | */ | ||
| 44 | struct oslec_state; | ||
| 45 | |||
| 46 | /** | ||
| 47 | * oslec_create - Create a voice echo canceller context. | ||
| 48 | * @len: The length of the canceller, in samples. | ||
| 49 | * @return: The new canceller context, or NULL if the canceller could not be | ||
| 50 | * created. | ||
| 51 | */ | ||
| 52 | struct oslec_state *oslec_create(int len, int adaption_mode); | ||
| 53 | |||
| 54 | /** | ||
| 55 | * oslec_free - Free a voice echo canceller context. | ||
| 56 | * @ec: The echo canceller context. | ||
| 57 | */ | ||
| 58 | void oslec_free(struct oslec_state *ec); | ||
| 59 | |||
| 60 | /** | ||
| 61 | * oslec_flush - Flush (reinitialise) a voice echo canceller context. | ||
| 62 | * @ec: The echo canceller context. | ||
| 63 | */ | ||
| 64 | void oslec_flush(struct oslec_state *ec); | ||
| 65 | |||
| 66 | /** | ||
| 67 | * oslec_adaption_mode - set the adaption mode of a voice echo canceller context. | ||
| 68 | * @ec The echo canceller context. | ||
| 69 | * @adaption_mode: The mode. | ||
| 70 | */ | ||
| 71 | void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode); | ||
| 72 | |||
| 73 | void oslec_snapshot(struct oslec_state *ec); | ||
| 74 | |||
| 75 | /** | ||
| 76 | * oslec_update: Process a sample through a voice echo canceller. | ||
| 77 | * @ec: The echo canceller context. | ||
| 78 | * @tx: The transmitted audio sample. | ||
| 79 | * @rx: The received audio sample. | ||
| 80 | * | ||
| 81 | * The return value is the clean (echo cancelled) received sample. | ||
| 82 | */ | ||
| 83 | int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx); | ||
| 84 | |||
| 85 | /** | ||
| 86 | * oslec_hpf_tx: Process to high pass filter the tx signal. | ||
| 87 | * @ec: The echo canceller context. | ||
| 88 | * @tx: The transmitted auio sample. | ||
| 89 | * | ||
| 90 | * The return value is the HP filtered transmit sample, send this to your D/A. | ||
| 91 | */ | ||
| 92 | int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx); | ||
| 93 | |||
| 94 | #endif /* __OSLEC_H */ | ||
