aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/async_tx/async_tx.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2007-01-02 13:10:44 -0500
committerDan Williams <dan.j.williams@intel.com>2007-07-13 11:06:14 -0400
commit9bc89cd82d6f88fb0ca39b30445c329a430fd66b (patch)
tree7bd0e856abd359f84edea1bacfd1dd32edd93fbb /crypto/async_tx/async_tx.c
parent685784aaf3cd0e3ff5e36c7ecf6f441cdbf57f73 (diff)
async_tx: add the async_tx api
The async_tx api provides methods for describing a chain of asynchronous bulk memory transfers/transforms with support for inter-transactional dependencies. It is implemented as a dmaengine client that smooths over the details of different hardware offload engine implementations. Code that is written to the api can optimize for asynchronous operation and the api will fit the chain of operations to the available offload resources. I imagine that any piece of ADMA hardware would register with the 'async_*' subsystem, and a call to async_X would be routed as appropriate, or be run in-line. - Neil Brown async_tx exploits the capabilities of struct dma_async_tx_descriptor to provide an api of the following general format: struct dma_async_tx_descriptor * async_<operation>(..., struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, <operation>); struct dma_device *device = chan ? chan->device : NULL; int int_en = cb_fn ? 1 : 0; struct dma_async_tx_descriptor *tx = device ? device->device_prep_dma_<operation>(chan, len, int_en) : NULL; if (tx) { /* run <operation> asynchronously */ ... tx->tx_set_dest(addr, tx, index); ... tx->tx_set_src(addr, tx, index); ... async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); } else { /* run <operation> synchronously */ ... <operation> ... async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); } return tx; } async_tx_find_channel() returns a capable channel from its pool. The channel pool is organized as a per-cpu array of channel pointers. The async_tx_rebalance() routine is tasked with managing these arrays. In the uniprocessor case async_tx_rebalance() tries to spread responsibility evenly over channels of similar capabilities. For example if there are two copy+xor channels, one will handle copy operations and the other will handle xor. In the SMP case async_tx_rebalance() attempts to spread the operations evenly over the cpus, e.g. cpu0 gets copy channel0 and xor channel0 while cpu1 gets copy channel 1 and xor channel 1. When a dependency is specified async_tx_find_channel defaults to keeping the operation on the same channel. A xor->copy->xor chain will stay on one channel if it supports both operation types, otherwise the transaction will transition between a copy and a xor resource. Currently the raid5 implementation in the MD raid456 driver has been converted to the async_tx api. A driver for the offload engines on the Intel Xscale series of I/O processors, iop-adma, is provided in a later commit. With the iop-adma driver and async_tx, raid456 is able to offload copy, xor, and xor-zero-sum operations to hardware engines. On iop342 tiobench showed higher throughput for sequential writes (20 - 30% improvement) and sequential reads to a degraded array (40 - 55% improvement). For the other cases performance was roughly equal, +/- a few percentage points. On a x86-smp platform the performance of the async_tx implementation (in synchronous mode) was also +/- a few percentage points of the original implementation. According to 'top' on iop342 CPU utilization drops from ~50% to ~15% during a 'resync' while the speed according to /proc/mdstat doubles from ~25 MB/s to ~50 MB/s. The tiobench command line used for testing was: tiobench --size 2048 --block 4096 --block 131072 --dir /mnt/raid --numruns 5 * iop342 had 1GB of memory available Details: * if CONFIG_DMA_ENGINE=n the asynchronous path is compiled away by making async_tx_find_channel a static inline routine that always returns NULL * when a callback is specified for a given transaction an interrupt will fire at operation completion time and the callback will occur in a tasklet. if the the channel does not support interrupts then a live polling wait will be performed * the api is written as a dmaengine client that requests all available channels * In support of dependencies the api implicitly schedules channel-switch interrupts. The interrupt triggers the cleanup tasklet which causes pending operations to be scheduled on the next channel * Xor engines treat an xor destination address differently than a software xor routine. To the software routine the destination address is an implied source, whereas engines treat it as a write-only destination. This patch modifies the xor_blocks routine to take a an explicit destination address to mirror the hardware. Changelog: * fixed a leftover debug print * don't allow callbacks in async_interrupt_cond * fixed xor_block changes * fixed usage of ASYNC_TX_XOR_DROP_DEST * drop dma mapping methods, suggested by Chris Leech * printk warning fixups from Andrew Morton * don't use inline in C files, Adrian Bunk * select the API when MD is enabled * BUG_ON xor source counts <= 1 * implicitly handle hardware concerns like channel switching and interrupts, Neil Brown * remove the per operation type list, and distribute operation capabilities evenly amongst the available channels * simplify async_tx_find_channel to optimize the fast path * introduce the channel_table_initialized flag to prevent early calls to the api * reorganize the code to mimic crypto * include mm.h as not all archs include it in dma-mapping.h * make the Kconfig options non-user visible, Adrian Bunk * move async_tx under crypto since it is meant as 'core' functionality, and the two may share algorithms in the future * move large inline functions into c files * checkpatch.pl fixes * gpl v2 only correction Cc: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Acked-By: NeilBrown <neilb@suse.de>
Diffstat (limited to 'crypto/async_tx/async_tx.c')
-rw-r--r--crypto/async_tx/async_tx.c497
1 files changed, 497 insertions, 0 deletions
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
new file mode 100644
index 000000000000..035007145e78
--- /dev/null
+++ b/crypto/async_tx/async_tx.c
@@ -0,0 +1,497 @@
1/*
2 * core routines for the asynchronous memory transfer/transform api
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/async_tx.h>
28
29#ifdef CONFIG_DMA_ENGINE
30static enum dma_state_client
31dma_channel_add_remove(struct dma_client *client,
32 struct dma_chan *chan, enum dma_state state);
33
34static struct dma_client async_tx_dma = {
35 .event_callback = dma_channel_add_remove,
36 /* .cap_mask == 0 defaults to all channels */
37};
38
39/**
40 * dma_cap_mask_all - enable iteration over all operation types
41 */
42static dma_cap_mask_t dma_cap_mask_all;
43
44/**
45 * chan_ref_percpu - tracks channel allocations per core/opertion
46 */
47struct chan_ref_percpu {
48 struct dma_chan_ref *ref;
49};
50
51static int channel_table_initialized;
52static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
53
54/**
55 * async_tx_lock - protect modification of async_tx_master_list and serialize
56 * rebalance operations
57 */
58static spinlock_t async_tx_lock;
59
60static struct list_head
61async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list);
62
63/* async_tx_issue_pending_all - start all transactions on all channels */
64void async_tx_issue_pending_all(void)
65{
66 struct dma_chan_ref *ref;
67
68 rcu_read_lock();
69 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
70 ref->chan->device->device_issue_pending(ref->chan);
71 rcu_read_unlock();
72}
73EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
74
75/* dma_wait_for_async_tx - spin wait for a transcation to complete
76 * @tx: transaction to wait on
77 */
78enum dma_status
79dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
80{
81 enum dma_status status;
82 struct dma_async_tx_descriptor *iter;
83
84 if (!tx)
85 return DMA_SUCCESS;
86
87 /* poll through the dependency chain, return when tx is complete */
88 do {
89 iter = tx;
90 while (iter->cookie == -EBUSY)
91 iter = iter->parent;
92
93 status = dma_sync_wait(iter->chan, iter->cookie);
94 } while (status == DMA_IN_PROGRESS || (iter != tx));
95
96 return status;
97}
98EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
99
100/* async_tx_run_dependencies - helper routine for dma drivers to process
101 * (start) dependent operations on their target channel
102 * @tx: transaction with dependencies
103 */
104void
105async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
106{
107 struct dma_async_tx_descriptor *dep_tx, *_dep_tx;
108 struct dma_device *dev;
109 struct dma_chan *chan;
110
111 list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list,
112 depend_node) {
113 chan = dep_tx->chan;
114 dev = chan->device;
115 /* we can't depend on ourselves */
116 BUG_ON(chan == tx->chan);
117 list_del(&dep_tx->depend_node);
118 tx->tx_submit(dep_tx);
119
120 /* we need to poke the engine as client code does not
121 * know about dependency submission events
122 */
123 dev->device_issue_pending(chan);
124 }
125}
126EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
127
128static void
129free_dma_chan_ref(struct rcu_head *rcu)
130{
131 struct dma_chan_ref *ref;
132 ref = container_of(rcu, struct dma_chan_ref, rcu);
133 kfree(ref);
134}
135
136static void
137init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
138{
139 INIT_LIST_HEAD(&ref->node);
140 INIT_RCU_HEAD(&ref->rcu);
141 ref->chan = chan;
142 atomic_set(&ref->count, 0);
143}
144
145/**
146 * get_chan_ref_by_cap - returns the nth channel of the given capability
147 * defaults to returning the channel with the desired capability and the
148 * lowest reference count if the index can not be satisfied
149 * @cap: capability to match
150 * @index: nth channel desired, passing -1 has the effect of forcing the
151 * default return value
152 */
153static struct dma_chan_ref *
154get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
155{
156 struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
157
158 rcu_read_lock();
159 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
160 if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
161 if (!min_ref)
162 min_ref = ref;
163 else if (atomic_read(&ref->count) <
164 atomic_read(&min_ref->count))
165 min_ref = ref;
166
167 if (index-- == 0) {
168 ret_ref = ref;
169 break;
170 }
171 }
172 rcu_read_unlock();
173
174 if (!ret_ref)
175 ret_ref = min_ref;
176
177 if (ret_ref)
178 atomic_inc(&ret_ref->count);
179
180 return ret_ref;
181}
182
183/**
184 * async_tx_rebalance - redistribute the available channels, optimize
185 * for cpu isolation in the SMP case, and opertaion isolation in the
186 * uniprocessor case
187 */
188static void async_tx_rebalance(void)
189{
190 int cpu, cap, cpu_idx = 0;
191 unsigned long flags;
192
193 if (!channel_table_initialized)
194 return;
195
196 spin_lock_irqsave(&async_tx_lock, flags);
197
198 /* undo the last distribution */
199 for_each_dma_cap_mask(cap, dma_cap_mask_all)
200 for_each_possible_cpu(cpu) {
201 struct dma_chan_ref *ref =
202 per_cpu_ptr(channel_table[cap], cpu)->ref;
203 if (ref) {
204 atomic_set(&ref->count, 0);
205 per_cpu_ptr(channel_table[cap], cpu)->ref =
206 NULL;
207 }
208 }
209
210 for_each_dma_cap_mask(cap, dma_cap_mask_all)
211 for_each_online_cpu(cpu) {
212 struct dma_chan_ref *new;
213 if (NR_CPUS > 1)
214 new = get_chan_ref_by_cap(cap, cpu_idx++);
215 else
216 new = get_chan_ref_by_cap(cap, -1);
217
218 per_cpu_ptr(channel_table[cap], cpu)->ref = new;
219 }
220
221 spin_unlock_irqrestore(&async_tx_lock, flags);
222}
223
224static enum dma_state_client
225dma_channel_add_remove(struct dma_client *client,
226 struct dma_chan *chan, enum dma_state state)
227{
228 unsigned long found, flags;
229 struct dma_chan_ref *master_ref, *ref;
230 enum dma_state_client ack = DMA_DUP; /* default: take no action */
231
232 switch (state) {
233 case DMA_RESOURCE_AVAILABLE:
234 found = 0;
235 rcu_read_lock();
236 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
237 if (ref->chan == chan) {
238 found = 1;
239 break;
240 }
241 rcu_read_unlock();
242
243 pr_debug("async_tx: dma resource available [%s]\n",
244 found ? "old" : "new");
245
246 if (!found)
247 ack = DMA_ACK;
248 else
249 break;
250
251 /* add the channel to the generic management list */
252 master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
253 if (master_ref) {
254 /* keep a reference until async_tx is unloaded */
255 dma_chan_get(chan);
256 init_dma_chan_ref(master_ref, chan);
257 spin_lock_irqsave(&async_tx_lock, flags);
258 list_add_tail_rcu(&master_ref->node,
259 &async_tx_master_list);
260 spin_unlock_irqrestore(&async_tx_lock,
261 flags);
262 } else {
263 printk(KERN_WARNING "async_tx: unable to create"
264 " new master entry in response to"
265 " a DMA_RESOURCE_ADDED event"
266 " (-ENOMEM)\n");
267 return 0;
268 }
269
270 async_tx_rebalance();
271 break;
272 case DMA_RESOURCE_REMOVED:
273 found = 0;
274 spin_lock_irqsave(&async_tx_lock, flags);
275 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
276 if (ref->chan == chan) {
277 /* permit backing devices to go away */
278 dma_chan_put(ref->chan);
279 list_del_rcu(&ref->node);
280 call_rcu(&ref->rcu, free_dma_chan_ref);
281 found = 1;
282 break;
283 }
284 spin_unlock_irqrestore(&async_tx_lock, flags);
285
286 pr_debug("async_tx: dma resource removed [%s]\n",
287 found ? "ours" : "not ours");
288
289 if (found)
290 ack = DMA_ACK;
291 else
292 break;
293
294 async_tx_rebalance();
295 break;
296 case DMA_RESOURCE_SUSPEND:
297 case DMA_RESOURCE_RESUME:
298 printk(KERN_WARNING "async_tx: does not support dma channel"
299 " suspend/resume\n");
300 break;
301 default:
302 BUG();
303 }
304
305 return ack;
306}
307
308static int __init
309async_tx_init(void)
310{
311 enum dma_transaction_type cap;
312
313 spin_lock_init(&async_tx_lock);
314 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
315
316 /* an interrupt will never be an explicit operation type.
317 * clearing this bit prevents allocation to a slot in 'channel_table'
318 */
319 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
320
321 for_each_dma_cap_mask(cap, dma_cap_mask_all) {
322 channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
323 if (!channel_table[cap])
324 goto err;
325 }
326
327 channel_table_initialized = 1;
328 dma_async_client_register(&async_tx_dma);
329 dma_async_client_chan_request(&async_tx_dma);
330
331 printk(KERN_INFO "async_tx: api initialized (async)\n");
332
333 return 0;
334err:
335 printk(KERN_ERR "async_tx: initialization failure\n");
336
337 while (--cap >= 0)
338 free_percpu(channel_table[cap]);
339
340 return 1;
341}
342
343static void __exit async_tx_exit(void)
344{
345 enum dma_transaction_type cap;
346
347 channel_table_initialized = 0;
348
349 for_each_dma_cap_mask(cap, dma_cap_mask_all)
350 if (channel_table[cap])
351 free_percpu(channel_table[cap]);
352
353 dma_async_client_unregister(&async_tx_dma);
354}
355
356/**
357 * async_tx_find_channel - find a channel to carry out the operation or let
358 * the transaction execute synchronously
359 * @depend_tx: transaction dependency
360 * @tx_type: transaction type
361 */
362struct dma_chan *
363async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
364 enum dma_transaction_type tx_type)
365{
366 /* see if we can keep the chain on one channel */
367 if (depend_tx &&
368 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
369 return depend_tx->chan;
370 else if (likely(channel_table_initialized)) {
371 struct dma_chan_ref *ref;
372 int cpu = get_cpu();
373 ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
374 put_cpu();
375 return ref ? ref->chan : NULL;
376 } else
377 return NULL;
378}
379EXPORT_SYMBOL_GPL(async_tx_find_channel);
380#else
381static int __init async_tx_init(void)
382{
383 printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
384 return 0;
385}
386
387static void __exit async_tx_exit(void)
388{
389 do { } while (0);
390}
391#endif
392
393void
394async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
395 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
396 dma_async_tx_callback cb_fn, void *cb_param)
397{
398 tx->callback = cb_fn;
399 tx->callback_param = cb_param;
400
401 /* set this new tx to run after depend_tx if:
402 * 1/ a dependency exists (depend_tx is !NULL)
403 * 2/ the tx can not be submitted to the current channel
404 */
405 if (depend_tx && depend_tx->chan != chan) {
406 /* if ack is already set then we cannot be sure
407 * we are referring to the correct operation
408 */
409 BUG_ON(depend_tx->ack);
410
411 tx->parent = depend_tx;
412 spin_lock_bh(&depend_tx->lock);
413 list_add_tail(&tx->depend_node, &depend_tx->depend_list);
414 if (depend_tx->cookie == 0) {
415 struct dma_chan *dep_chan = depend_tx->chan;
416 struct dma_device *dep_dev = dep_chan->device;
417 dep_dev->device_dependency_added(dep_chan);
418 }
419 spin_unlock_bh(&depend_tx->lock);
420
421 /* schedule an interrupt to trigger the channel switch */
422 async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL);
423 } else {
424 tx->parent = NULL;
425 tx->tx_submit(tx);
426 }
427
428 if (flags & ASYNC_TX_ACK)
429 async_tx_ack(tx);
430
431 if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
432 async_tx_ack(depend_tx);
433}
434EXPORT_SYMBOL_GPL(async_tx_submit);
435
436/**
437 * async_trigger_callback - schedules the callback function to be run after
438 * any dependent operations have been completed.
439 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
440 * @depend_tx: 'callback' requires the completion of this transaction
441 * @cb_fn: function to call after depend_tx completes
442 * @cb_param: parameter to pass to the callback routine
443 */
444struct dma_async_tx_descriptor *
445async_trigger_callback(enum async_tx_flags flags,
446 struct dma_async_tx_descriptor *depend_tx,
447 dma_async_tx_callback cb_fn, void *cb_param)
448{
449 struct dma_chan *chan;
450 struct dma_device *device;
451 struct dma_async_tx_descriptor *tx;
452
453 if (depend_tx) {
454 chan = depend_tx->chan;
455 device = chan->device;
456
457 /* see if we can schedule an interrupt
458 * otherwise poll for completion
459 */
460 if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
461 device = NULL;
462
463 tx = device ? device->device_prep_dma_interrupt(chan) : NULL;
464 } else
465 tx = NULL;
466
467 if (tx) {
468 pr_debug("%s: (async)\n", __FUNCTION__);
469
470 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
471 } else {
472 pr_debug("%s: (sync)\n", __FUNCTION__);
473
474 /* wait for any prerequisite operations */
475 if (depend_tx) {
476 /* if ack is already set then we cannot be sure
477 * we are referring to the correct operation
478 */
479 BUG_ON(depend_tx->ack);
480 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
481 panic("%s: DMA_ERROR waiting for depend_tx\n",
482 __FUNCTION__);
483 }
484
485 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
486 }
487
488 return tx;
489}
490EXPORT_SYMBOL_GPL(async_trigger_callback);
491
492module_init(async_tx_init);
493module_exit(async_tx_exit);
494
495MODULE_AUTHOR("Intel Corporation");
496MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
497MODULE_LICENSE("GPL");