aboutsummaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'crypto')
-rw-r--r--crypto/Kconfig12
-rw-r--r--crypto/Makefile6
-rw-r--r--crypto/async_tx/Kconfig16
-rw-r--r--crypto/async_tx/Makefile4
-rw-r--r--crypto/async_tx/async_memcpy.c131
-rw-r--r--crypto/async_tx/async_memset.c109
-rw-r--r--crypto/async_tx/async_tx.c497
-rw-r--r--crypto/async_tx/async_xor.c327
-rw-r--r--crypto/xor.c155
9 files changed, 1256 insertions, 1 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 935301ee5ac8..3d1a1e27944f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1,7 +1,17 @@
1# 1#
2# Cryptographic API Configuration 2# Generic algorithms support
3#
4config XOR_BLOCKS
5 tristate
6
3# 7#
8# async_tx api: hardware offloaded memory transfer/transform support
9#
10source "crypto/async_tx/Kconfig"
4 11
12#
13# Cryptographic API Configuration
14#
5menuconfig CRYPTO 15menuconfig CRYPTO
6 bool "Cryptographic API" 16 bool "Cryptographic API"
7 help 17 help
diff --git a/crypto/Makefile b/crypto/Makefile
index cce46a1c9dc7..0cf17f1ea151 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -50,3 +50,9 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
50obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o 50obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
51 51
52obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o 52obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
53
54#
55# generic algorithms and the async_tx api
56#
57obj-$(CONFIG_XOR_BLOCKS) += xor.o
58obj-$(CONFIG_ASYNC_CORE) += async_tx/
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
new file mode 100644
index 000000000000..d8fb39145986
--- /dev/null
+++ b/crypto/async_tx/Kconfig
@@ -0,0 +1,16 @@
1config ASYNC_CORE
2 tristate
3
4config ASYNC_MEMCPY
5 tristate
6 select ASYNC_CORE
7
8config ASYNC_XOR
9 tristate
10 select ASYNC_CORE
11 select XOR_BLOCKS
12
13config ASYNC_MEMSET
14 tristate
15 select ASYNC_CORE
16
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
new file mode 100644
index 000000000000..27baa7d52fbc
--- /dev/null
+++ b/crypto/async_tx/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_ASYNC_CORE) += async_tx.o
2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
4obj-$(CONFIG_ASYNC_XOR) += async_xor.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
new file mode 100644
index 000000000000..a973f4ef897d
--- /dev/null
+++ b/crypto/async_tx/async_memcpy.c
@@ -0,0 +1,131 @@
1/*
2 * copy offload engine support
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/highmem.h>
28#include <linux/mm.h>
29#include <linux/dma-mapping.h>
30#include <linux/async_tx.h>
31
32/**
33 * async_memcpy - attempt to copy memory with a dma engine.
34 * @dest: destination page
35 * @src: src page
36 * @offset: offset in pages to start transaction
37 * @len: length in bytes
38 * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
39 * ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST
40 * @depend_tx: memcpy depends on the result of this transaction
41 * @cb_fn: function to call when the memcpy completes
42 * @cb_param: parameter to pass to the callback routine
43 */
44struct dma_async_tx_descriptor *
45async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
46 unsigned int src_offset, size_t len, enum async_tx_flags flags,
47 struct dma_async_tx_descriptor *depend_tx,
48 dma_async_tx_callback cb_fn, void *cb_param)
49{
50 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
51 struct dma_device *device = chan ? chan->device : NULL;
52 int int_en = cb_fn ? 1 : 0;
53 struct dma_async_tx_descriptor *tx = device ?
54 device->device_prep_dma_memcpy(chan, len,
55 int_en) : NULL;
56
57 if (tx) { /* run the memcpy asynchronously */
58 dma_addr_t addr;
59 enum dma_data_direction dir;
60
61 pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
62
63 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
64 DMA_NONE : DMA_FROM_DEVICE;
65
66 addr = dma_map_page(device->dev, dest, dest_offset, len, dir);
67 tx->tx_set_dest(addr, tx, 0);
68
69 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
70 DMA_NONE : DMA_TO_DEVICE;
71
72 addr = dma_map_page(device->dev, src, src_offset, len, dir);
73 tx->tx_set_src(addr, tx, 0);
74
75 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
76 } else { /* run the memcpy synchronously */
77 void *dest_buf, *src_buf;
78 pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
79
80 /* wait for any prerequisite operations */
81 if (depend_tx) {
82 /* if ack is already set then we cannot be sure
83 * we are referring to the correct operation
84 */
85 BUG_ON(depend_tx->ack);
86 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
87 panic("%s: DMA_ERROR waiting for depend_tx\n",
88 __FUNCTION__);
89 }
90
91 if (flags & ASYNC_TX_KMAP_DST)
92 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
93 else
94 dest_buf = page_address(dest) + dest_offset;
95
96 if (flags & ASYNC_TX_KMAP_SRC)
97 src_buf = kmap_atomic(src, KM_USER0) + src_offset;
98 else
99 src_buf = page_address(src) + src_offset;
100
101 memcpy(dest_buf, src_buf, len);
102
103 if (flags & ASYNC_TX_KMAP_DST)
104 kunmap_atomic(dest_buf, KM_USER0);
105
106 if (flags & ASYNC_TX_KMAP_SRC)
107 kunmap_atomic(src_buf, KM_USER0);
108
109 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
110 }
111
112 return tx;
113}
114EXPORT_SYMBOL_GPL(async_memcpy);
115
116static int __init async_memcpy_init(void)
117{
118 return 0;
119}
120
121static void __exit async_memcpy_exit(void)
122{
123 do { } while (0);
124}
125
126module_init(async_memcpy_init);
127module_exit(async_memcpy_exit);
128
129MODULE_AUTHOR("Intel Corporation");
130MODULE_DESCRIPTION("asynchronous memcpy api");
131MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
new file mode 100644
index 000000000000..66ef6351202e
--- /dev/null
+++ b/crypto/async_tx/async_memset.c
@@ -0,0 +1,109 @@
1/*
2 * memory fill offload engine support
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/interrupt.h>
28#include <linux/mm.h>
29#include <linux/dma-mapping.h>
30#include <linux/async_tx.h>
31
32/**
33 * async_memset - attempt to fill memory with a dma engine.
34 * @dest: destination page
35 * @val: fill value
36 * @offset: offset in pages to start transaction
37 * @len: length in bytes
38 * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
39 * @depend_tx: memset depends on the result of this transaction
40 * @cb_fn: function to call when the memcpy completes
41 * @cb_param: parameter to pass to the callback routine
42 */
43struct dma_async_tx_descriptor *
44async_memset(struct page *dest, int val, unsigned int offset,
45 size_t len, enum async_tx_flags flags,
46 struct dma_async_tx_descriptor *depend_tx,
47 dma_async_tx_callback cb_fn, void *cb_param)
48{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
50 struct dma_device *device = chan ? chan->device : NULL;
51 int int_en = cb_fn ? 1 : 0;
52 struct dma_async_tx_descriptor *tx = device ?
53 device->device_prep_dma_memset(chan, val, len,
54 int_en) : NULL;
55
56 if (tx) { /* run the memset asynchronously */
57 dma_addr_t dma_addr;
58 enum dma_data_direction dir;
59
60 pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
61 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
62 DMA_NONE : DMA_FROM_DEVICE;
63
64 dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
65 tx->tx_set_dest(dma_addr, tx, 0);
66
67 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
68 } else { /* run the memset synchronously */
69 void *dest_buf;
70 pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
71
72 dest_buf = (void *) (((char *) page_address(dest)) + offset);
73
74 /* wait for any prerequisite operations */
75 if (depend_tx) {
76 /* if ack is already set then we cannot be sure
77 * we are referring to the correct operation
78 */
79 BUG_ON(depend_tx->ack);
80 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
81 panic("%s: DMA_ERROR waiting for depend_tx\n",
82 __FUNCTION__);
83 }
84
85 memset(dest_buf, val, len);
86
87 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
88 }
89
90 return tx;
91}
92EXPORT_SYMBOL_GPL(async_memset);
93
94static int __init async_memset_init(void)
95{
96 return 0;
97}
98
99static void __exit async_memset_exit(void)
100{
101 do { } while (0);
102}
103
104module_init(async_memset_init);
105module_exit(async_memset_exit);
106
107MODULE_AUTHOR("Intel Corporation");
108MODULE_DESCRIPTION("asynchronous memset api");
109MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
new file mode 100644
index 000000000000..035007145e78
--- /dev/null
+++ b/crypto/async_tx/async_tx.c
@@ -0,0 +1,497 @@
1/*
2 * core routines for the asynchronous memory transfer/transform api
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/async_tx.h>
28
29#ifdef CONFIG_DMA_ENGINE
30static enum dma_state_client
31dma_channel_add_remove(struct dma_client *client,
32 struct dma_chan *chan, enum dma_state state);
33
34static struct dma_client async_tx_dma = {
35 .event_callback = dma_channel_add_remove,
36 /* .cap_mask == 0 defaults to all channels */
37};
38
39/**
40 * dma_cap_mask_all - enable iteration over all operation types
41 */
42static dma_cap_mask_t dma_cap_mask_all;
43
44/**
45 * chan_ref_percpu - tracks channel allocations per core/opertion
46 */
47struct chan_ref_percpu {
48 struct dma_chan_ref *ref;
49};
50
51static int channel_table_initialized;
52static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
53
54/**
55 * async_tx_lock - protect modification of async_tx_master_list and serialize
56 * rebalance operations
57 */
58static spinlock_t async_tx_lock;
59
60static struct list_head
61async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list);
62
63/* async_tx_issue_pending_all - start all transactions on all channels */
64void async_tx_issue_pending_all(void)
65{
66 struct dma_chan_ref *ref;
67
68 rcu_read_lock();
69 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
70 ref->chan->device->device_issue_pending(ref->chan);
71 rcu_read_unlock();
72}
73EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
74
75/* dma_wait_for_async_tx - spin wait for a transcation to complete
76 * @tx: transaction to wait on
77 */
78enum dma_status
79dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
80{
81 enum dma_status status;
82 struct dma_async_tx_descriptor *iter;
83
84 if (!tx)
85 return DMA_SUCCESS;
86
87 /* poll through the dependency chain, return when tx is complete */
88 do {
89 iter = tx;
90 while (iter->cookie == -EBUSY)
91 iter = iter->parent;
92
93 status = dma_sync_wait(iter->chan, iter->cookie);
94 } while (status == DMA_IN_PROGRESS || (iter != tx));
95
96 return status;
97}
98EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
99
100/* async_tx_run_dependencies - helper routine for dma drivers to process
101 * (start) dependent operations on their target channel
102 * @tx: transaction with dependencies
103 */
104void
105async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
106{
107 struct dma_async_tx_descriptor *dep_tx, *_dep_tx;
108 struct dma_device *dev;
109 struct dma_chan *chan;
110
111 list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list,
112 depend_node) {
113 chan = dep_tx->chan;
114 dev = chan->device;
115 /* we can't depend on ourselves */
116 BUG_ON(chan == tx->chan);
117 list_del(&dep_tx->depend_node);
118 tx->tx_submit(dep_tx);
119
120 /* we need to poke the engine as client code does not
121 * know about dependency submission events
122 */
123 dev->device_issue_pending(chan);
124 }
125}
126EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
127
128static void
129free_dma_chan_ref(struct rcu_head *rcu)
130{
131 struct dma_chan_ref *ref;
132 ref = container_of(rcu, struct dma_chan_ref, rcu);
133 kfree(ref);
134}
135
136static void
137init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
138{
139 INIT_LIST_HEAD(&ref->node);
140 INIT_RCU_HEAD(&ref->rcu);
141 ref->chan = chan;
142 atomic_set(&ref->count, 0);
143}
144
145/**
146 * get_chan_ref_by_cap - returns the nth channel of the given capability
147 * defaults to returning the channel with the desired capability and the
148 * lowest reference count if the index can not be satisfied
149 * @cap: capability to match
150 * @index: nth channel desired, passing -1 has the effect of forcing the
151 * default return value
152 */
153static struct dma_chan_ref *
154get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
155{
156 struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
157
158 rcu_read_lock();
159 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
160 if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
161 if (!min_ref)
162 min_ref = ref;
163 else if (atomic_read(&ref->count) <
164 atomic_read(&min_ref->count))
165 min_ref = ref;
166
167 if (index-- == 0) {
168 ret_ref = ref;
169 break;
170 }
171 }
172 rcu_read_unlock();
173
174 if (!ret_ref)
175 ret_ref = min_ref;
176
177 if (ret_ref)
178 atomic_inc(&ret_ref->count);
179
180 return ret_ref;
181}
182
183/**
184 * async_tx_rebalance - redistribute the available channels, optimize
185 * for cpu isolation in the SMP case, and opertaion isolation in the
186 * uniprocessor case
187 */
188static void async_tx_rebalance(void)
189{
190 int cpu, cap, cpu_idx = 0;
191 unsigned long flags;
192
193 if (!channel_table_initialized)
194 return;
195
196 spin_lock_irqsave(&async_tx_lock, flags);
197
198 /* undo the last distribution */
199 for_each_dma_cap_mask(cap, dma_cap_mask_all)
200 for_each_possible_cpu(cpu) {
201 struct dma_chan_ref *ref =
202 per_cpu_ptr(channel_table[cap], cpu)->ref;
203 if (ref) {
204 atomic_set(&ref->count, 0);
205 per_cpu_ptr(channel_table[cap], cpu)->ref =
206 NULL;
207 }
208 }
209
210 for_each_dma_cap_mask(cap, dma_cap_mask_all)
211 for_each_online_cpu(cpu) {
212 struct dma_chan_ref *new;
213 if (NR_CPUS > 1)
214 new = get_chan_ref_by_cap(cap, cpu_idx++);
215 else
216 new = get_chan_ref_by_cap(cap, -1);
217
218 per_cpu_ptr(channel_table[cap], cpu)->ref = new;
219 }
220
221 spin_unlock_irqrestore(&async_tx_lock, flags);
222}
223
224static enum dma_state_client
225dma_channel_add_remove(struct dma_client *client,
226 struct dma_chan *chan, enum dma_state state)
227{
228 unsigned long found, flags;
229 struct dma_chan_ref *master_ref, *ref;
230 enum dma_state_client ack = DMA_DUP; /* default: take no action */
231
232 switch (state) {
233 case DMA_RESOURCE_AVAILABLE:
234 found = 0;
235 rcu_read_lock();
236 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
237 if (ref->chan == chan) {
238 found = 1;
239 break;
240 }
241 rcu_read_unlock();
242
243 pr_debug("async_tx: dma resource available [%s]\n",
244 found ? "old" : "new");
245
246 if (!found)
247 ack = DMA_ACK;
248 else
249 break;
250
251 /* add the channel to the generic management list */
252 master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
253 if (master_ref) {
254 /* keep a reference until async_tx is unloaded */
255 dma_chan_get(chan);
256 init_dma_chan_ref(master_ref, chan);
257 spin_lock_irqsave(&async_tx_lock, flags);
258 list_add_tail_rcu(&master_ref->node,
259 &async_tx_master_list);
260 spin_unlock_irqrestore(&async_tx_lock,
261 flags);
262 } else {
263 printk(KERN_WARNING "async_tx: unable to create"
264 " new master entry in response to"
265 " a DMA_RESOURCE_ADDED event"
266 " (-ENOMEM)\n");
267 return 0;
268 }
269
270 async_tx_rebalance();
271 break;
272 case DMA_RESOURCE_REMOVED:
273 found = 0;
274 spin_lock_irqsave(&async_tx_lock, flags);
275 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
276 if (ref->chan == chan) {
277 /* permit backing devices to go away */
278 dma_chan_put(ref->chan);
279 list_del_rcu(&ref->node);
280 call_rcu(&ref->rcu, free_dma_chan_ref);
281 found = 1;
282 break;
283 }
284 spin_unlock_irqrestore(&async_tx_lock, flags);
285
286 pr_debug("async_tx: dma resource removed [%s]\n",
287 found ? "ours" : "not ours");
288
289 if (found)
290 ack = DMA_ACK;
291 else
292 break;
293
294 async_tx_rebalance();
295 break;
296 case DMA_RESOURCE_SUSPEND:
297 case DMA_RESOURCE_RESUME:
298 printk(KERN_WARNING "async_tx: does not support dma channel"
299 " suspend/resume\n");
300 break;
301 default:
302 BUG();
303 }
304
305 return ack;
306}
307
308static int __init
309async_tx_init(void)
310{
311 enum dma_transaction_type cap;
312
313 spin_lock_init(&async_tx_lock);
314 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
315
316 /* an interrupt will never be an explicit operation type.
317 * clearing this bit prevents allocation to a slot in 'channel_table'
318 */
319 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
320
321 for_each_dma_cap_mask(cap, dma_cap_mask_all) {
322 channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
323 if (!channel_table[cap])
324 goto err;
325 }
326
327 channel_table_initialized = 1;
328 dma_async_client_register(&async_tx_dma);
329 dma_async_client_chan_request(&async_tx_dma);
330
331 printk(KERN_INFO "async_tx: api initialized (async)\n");
332
333 return 0;
334err:
335 printk(KERN_ERR "async_tx: initialization failure\n");
336
337 while (--cap >= 0)
338 free_percpu(channel_table[cap]);
339
340 return 1;
341}
342
343static void __exit async_tx_exit(void)
344{
345 enum dma_transaction_type cap;
346
347 channel_table_initialized = 0;
348
349 for_each_dma_cap_mask(cap, dma_cap_mask_all)
350 if (channel_table[cap])
351 free_percpu(channel_table[cap]);
352
353 dma_async_client_unregister(&async_tx_dma);
354}
355
356/**
357 * async_tx_find_channel - find a channel to carry out the operation or let
358 * the transaction execute synchronously
359 * @depend_tx: transaction dependency
360 * @tx_type: transaction type
361 */
362struct dma_chan *
363async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
364 enum dma_transaction_type tx_type)
365{
366 /* see if we can keep the chain on one channel */
367 if (depend_tx &&
368 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
369 return depend_tx->chan;
370 else if (likely(channel_table_initialized)) {
371 struct dma_chan_ref *ref;
372 int cpu = get_cpu();
373 ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
374 put_cpu();
375 return ref ? ref->chan : NULL;
376 } else
377 return NULL;
378}
379EXPORT_SYMBOL_GPL(async_tx_find_channel);
380#else
381static int __init async_tx_init(void)
382{
383 printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
384 return 0;
385}
386
387static void __exit async_tx_exit(void)
388{
389 do { } while (0);
390}
391#endif
392
393void
394async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
395 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
396 dma_async_tx_callback cb_fn, void *cb_param)
397{
398 tx->callback = cb_fn;
399 tx->callback_param = cb_param;
400
401 /* set this new tx to run after depend_tx if:
402 * 1/ a dependency exists (depend_tx is !NULL)
403 * 2/ the tx can not be submitted to the current channel
404 */
405 if (depend_tx && depend_tx->chan != chan) {
406 /* if ack is already set then we cannot be sure
407 * we are referring to the correct operation
408 */
409 BUG_ON(depend_tx->ack);
410
411 tx->parent = depend_tx;
412 spin_lock_bh(&depend_tx->lock);
413 list_add_tail(&tx->depend_node, &depend_tx->depend_list);
414 if (depend_tx->cookie == 0) {
415 struct dma_chan *dep_chan = depend_tx->chan;
416 struct dma_device *dep_dev = dep_chan->device;
417 dep_dev->device_dependency_added(dep_chan);
418 }
419 spin_unlock_bh(&depend_tx->lock);
420
421 /* schedule an interrupt to trigger the channel switch */
422 async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL);
423 } else {
424 tx->parent = NULL;
425 tx->tx_submit(tx);
426 }
427
428 if (flags & ASYNC_TX_ACK)
429 async_tx_ack(tx);
430
431 if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
432 async_tx_ack(depend_tx);
433}
434EXPORT_SYMBOL_GPL(async_tx_submit);
435
436/**
437 * async_trigger_callback - schedules the callback function to be run after
438 * any dependent operations have been completed.
439 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
440 * @depend_tx: 'callback' requires the completion of this transaction
441 * @cb_fn: function to call after depend_tx completes
442 * @cb_param: parameter to pass to the callback routine
443 */
444struct dma_async_tx_descriptor *
445async_trigger_callback(enum async_tx_flags flags,
446 struct dma_async_tx_descriptor *depend_tx,
447 dma_async_tx_callback cb_fn, void *cb_param)
448{
449 struct dma_chan *chan;
450 struct dma_device *device;
451 struct dma_async_tx_descriptor *tx;
452
453 if (depend_tx) {
454 chan = depend_tx->chan;
455 device = chan->device;
456
457 /* see if we can schedule an interrupt
458 * otherwise poll for completion
459 */
460 if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
461 device = NULL;
462
463 tx = device ? device->device_prep_dma_interrupt(chan) : NULL;
464 } else
465 tx = NULL;
466
467 if (tx) {
468 pr_debug("%s: (async)\n", __FUNCTION__);
469
470 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
471 } else {
472 pr_debug("%s: (sync)\n", __FUNCTION__);
473
474 /* wait for any prerequisite operations */
475 if (depend_tx) {
476 /* if ack is already set then we cannot be sure
477 * we are referring to the correct operation
478 */
479 BUG_ON(depend_tx->ack);
480 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
481 panic("%s: DMA_ERROR waiting for depend_tx\n",
482 __FUNCTION__);
483 }
484
485 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
486 }
487
488 return tx;
489}
490EXPORT_SYMBOL_GPL(async_trigger_callback);
491
492module_init(async_tx_init);
493module_exit(async_tx_exit);
494
495MODULE_AUTHOR("Intel Corporation");
496MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
497MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
new file mode 100644
index 000000000000..2575f674dcd5
--- /dev/null
+++ b/crypto/async_tx/async_xor.c
@@ -0,0 +1,327 @@
1/*
2 * xor offload engine api
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/interrupt.h>
28#include <linux/mm.h>
29#include <linux/dma-mapping.h>
30#include <linux/raid/xor.h>
31#include <linux/async_tx.h>
32
33static void
34do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
35 struct dma_chan *chan, struct page *dest, struct page **src_list,
36 unsigned int offset, unsigned int src_cnt, size_t len,
37 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
38 dma_async_tx_callback cb_fn, void *cb_param)
39{
40 dma_addr_t dma_addr;
41 enum dma_data_direction dir;
42 int i;
43
44 pr_debug("%s: len: %zu\n", __FUNCTION__, len);
45
46 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
47 DMA_NONE : DMA_FROM_DEVICE;
48
49 dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
50 tx->tx_set_dest(dma_addr, tx, 0);
51
52 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
53 DMA_NONE : DMA_TO_DEVICE;
54
55 for (i = 0; i < src_cnt; i++) {
56 dma_addr = dma_map_page(device->dev, src_list[i],
57 offset, len, dir);
58 tx->tx_set_src(dma_addr, tx, i);
59 }
60
61 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
62}
63
64static void
65do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
66 unsigned int src_cnt, size_t len, enum async_tx_flags flags,
67 struct dma_async_tx_descriptor *depend_tx,
68 dma_async_tx_callback cb_fn, void *cb_param)
69{
70 void *_dest;
71 int i;
72
73 pr_debug("%s: len: %zu\n", __FUNCTION__, len);
74
75 /* reuse the 'src_list' array to convert to buffer pointers */
76 for (i = 0; i < src_cnt; i++)
77 src_list[i] = (struct page *)
78 (page_address(src_list[i]) + offset);
79
80 /* set destination address */
81 _dest = page_address(dest) + offset;
82
83 if (flags & ASYNC_TX_XOR_ZERO_DST)
84 memset(_dest, 0, len);
85
86 xor_blocks(src_cnt, len, _dest,
87 (void **) src_list);
88
89 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
90}
91
92/**
93 * async_xor - attempt to xor a set of blocks with a dma engine.
94 * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
95 * flag must be set to not include dest data in the calculation. The
96 * assumption with dma eninges is that they only use the destination
97 * buffer as a source when it is explicity specified in the source list.
98 * @dest: destination page
99 * @src_list: array of source pages (if the dest is also a source it must be
100 * at index zero). The contents of this array may be overwritten.
101 * @offset: offset in pages to start transaction
102 * @src_cnt: number of source pages
103 * @len: length in bytes
104 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
105 * ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
106 * @depend_tx: xor depends on the result of this transaction.
107 * @cb_fn: function to call when the xor completes
108 * @cb_param: parameter to pass to the callback routine
109 */
110struct dma_async_tx_descriptor *
111async_xor(struct page *dest, struct page **src_list, unsigned int offset,
112 int src_cnt, size_t len, enum async_tx_flags flags,
113 struct dma_async_tx_descriptor *depend_tx,
114 dma_async_tx_callback cb_fn, void *cb_param)
115{
116 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR);
117 struct dma_device *device = chan ? chan->device : NULL;
118 struct dma_async_tx_descriptor *tx = NULL;
119 dma_async_tx_callback _cb_fn;
120 void *_cb_param;
121 unsigned long local_flags;
122 int xor_src_cnt;
123 int i = 0, src_off = 0, int_en;
124
125 BUG_ON(src_cnt <= 1);
126
127 while (src_cnt) {
128 local_flags = flags;
129 if (device) { /* run the xor asynchronously */
130 xor_src_cnt = min(src_cnt, device->max_xor);
131 /* if we are submitting additional xors
132 * only set the callback on the last transaction
133 */
134 if (src_cnt > xor_src_cnt) {
135 local_flags &= ~ASYNC_TX_ACK;
136 _cb_fn = NULL;
137 _cb_param = NULL;
138 } else {
139 _cb_fn = cb_fn;
140 _cb_param = cb_param;
141 }
142
143 int_en = _cb_fn ? 1 : 0;
144
145 tx = device->device_prep_dma_xor(
146 chan, xor_src_cnt, len, int_en);
147
148 if (tx) {
149 do_async_xor(tx, device, chan, dest,
150 &src_list[src_off], offset, xor_src_cnt, len,
151 local_flags, depend_tx, _cb_fn,
152 _cb_param);
153 } else /* fall through */
154 goto xor_sync;
155 } else { /* run the xor synchronously */
156xor_sync:
157 /* in the sync case the dest is an implied source
158 * (assumes the dest is at the src_off index)
159 */
160 if (flags & ASYNC_TX_XOR_DROP_DST) {
161 src_cnt--;
162 src_off++;
163 }
164
165 /* process up to 'MAX_XOR_BLOCKS' sources */
166 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
167
168 /* if we are submitting additional xors
169 * only set the callback on the last transaction
170 */
171 if (src_cnt > xor_src_cnt) {
172 local_flags &= ~ASYNC_TX_ACK;
173 _cb_fn = NULL;
174 _cb_param = NULL;
175 } else {
176 _cb_fn = cb_fn;
177 _cb_param = cb_param;
178 }
179
180 /* wait for any prerequisite operations */
181 if (depend_tx) {
182 /* if ack is already set then we cannot be sure
183 * we are referring to the correct operation
184 */
185 BUG_ON(depend_tx->ack);
186 if (dma_wait_for_async_tx(depend_tx) ==
187 DMA_ERROR)
188 panic("%s: DMA_ERROR waiting for "
189 "depend_tx\n",
190 __FUNCTION__);
191 }
192
193 do_sync_xor(dest, &src_list[src_off], offset,
194 xor_src_cnt, len, local_flags, depend_tx,
195 _cb_fn, _cb_param);
196 }
197
198 /* the previous tx is hidden from the client,
199 * so ack it
200 */
201 if (i && depend_tx)
202 async_tx_ack(depend_tx);
203
204 depend_tx = tx;
205
206 if (src_cnt > xor_src_cnt) {
207 /* drop completed sources */
208 src_cnt -= xor_src_cnt;
209 src_off += xor_src_cnt;
210
211 /* unconditionally preserve the destination */
212 flags &= ~ASYNC_TX_XOR_ZERO_DST;
213
214 /* use the intermediate result a source, but remember
215 * it's dropped, because it's implied, in the sync case
216 */
217 src_list[--src_off] = dest;
218 src_cnt++;
219 flags |= ASYNC_TX_XOR_DROP_DST;
220 } else
221 src_cnt = 0;
222 i++;
223 }
224
225 return tx;
226}
227EXPORT_SYMBOL_GPL(async_xor);
228
229static int page_is_zero(struct page *p, unsigned int offset, size_t len)
230{
231 char *a = page_address(p) + offset;
232 return ((*(u32 *) a) == 0 &&
233 memcmp(a, a + 4, len - 4) == 0);
234}
235
236/**
237 * async_xor_zero_sum - attempt a xor parity check with a dma engine.
238 * @dest: destination page used if the xor is performed synchronously
239 * @src_list: array of source pages. The dest page must be listed as a source
240 * at index zero. The contents of this array may be overwritten.
241 * @offset: offset in pages to start transaction
242 * @src_cnt: number of source pages
243 * @len: length in bytes
244 * @result: 0 if sum == 0 else non-zero
245 * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
246 * @depend_tx: xor depends on the result of this transaction.
247 * @cb_fn: function to call when the xor completes
248 * @cb_param: parameter to pass to the callback routine
249 */
250struct dma_async_tx_descriptor *
251async_xor_zero_sum(struct page *dest, struct page **src_list,
252 unsigned int offset, int src_cnt, size_t len,
253 u32 *result, enum async_tx_flags flags,
254 struct dma_async_tx_descriptor *depend_tx,
255 dma_async_tx_callback cb_fn, void *cb_param)
256{
257 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
258 struct dma_device *device = chan ? chan->device : NULL;
259 int int_en = cb_fn ? 1 : 0;
260 struct dma_async_tx_descriptor *tx = device ?
261 device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
262 int_en) : NULL;
263 int i;
264
265 BUG_ON(src_cnt <= 1);
266
267 if (tx) {
268 dma_addr_t dma_addr;
269 enum dma_data_direction dir;
270
271 pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
272
273 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
274 DMA_NONE : DMA_TO_DEVICE;
275
276 for (i = 0; i < src_cnt; i++) {
277 dma_addr = dma_map_page(device->dev, src_list[i],
278 offset, len, dir);
279 tx->tx_set_src(dma_addr, tx, i);
280 }
281
282 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
283 } else {
284 unsigned long xor_flags = flags;
285
286 pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
287
288 xor_flags |= ASYNC_TX_XOR_DROP_DST;
289 xor_flags &= ~ASYNC_TX_ACK;
290
291 tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
292 depend_tx, NULL, NULL);
293
294 if (tx) {
295 if (dma_wait_for_async_tx(tx) == DMA_ERROR)
296 panic("%s: DMA_ERROR waiting for tx\n",
297 __FUNCTION__);
298 async_tx_ack(tx);
299 }
300
301 *result = page_is_zero(dest, offset, len) ? 0 : 1;
302
303 tx = NULL;
304
305 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
306 }
307
308 return tx;
309}
310EXPORT_SYMBOL_GPL(async_xor_zero_sum);
311
312static int __init async_xor_init(void)
313{
314 return 0;
315}
316
317static void __exit async_xor_exit(void)
318{
319 do { } while (0);
320}
321
322module_init(async_xor_init);
323module_exit(async_xor_exit);
324
325MODULE_AUTHOR("Intel Corporation");
326MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
327MODULE_LICENSE("GPL");
diff --git a/crypto/xor.c b/crypto/xor.c
new file mode 100644
index 000000000000..b2e6db075e49
--- /dev/null
+++ b/crypto/xor.c
@@ -0,0 +1,155 @@
1/*
2 * xor.c : Multiple Devices driver for Linux
3 *
4 * Copyright (C) 1996, 1997, 1998, 1999, 2000,
5 * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
6 *
7 * Dispatch optimized RAID-5 checksumming functions.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * You should have received a copy of the GNU General Public License
15 * (for example /usr/src/linux/COPYING); if not, write to the Free
16 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19#define BH_TRACE 0
20#include <linux/module.h>
21#include <linux/raid/md.h>
22#include <linux/raid/xor.h>
23#include <asm/xor.h>
24
25/* The xor routines to use. */
26static struct xor_block_template *active_template;
27
28void
29xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
30{
31 unsigned long *p1, *p2, *p3, *p4;
32
33 p1 = (unsigned long *) srcs[0];
34 if (src_count == 1) {
35 active_template->do_2(bytes, dest, p1);
36 return;
37 }
38
39 p2 = (unsigned long *) srcs[1];
40 if (src_count == 2) {
41 active_template->do_3(bytes, dest, p1, p2);
42 return;
43 }
44
45 p3 = (unsigned long *) srcs[2];
46 if (src_count == 3) {
47 active_template->do_4(bytes, dest, p1, p2, p3);
48 return;
49 }
50
51 p4 = (unsigned long *) srcs[3];
52 active_template->do_5(bytes, dest, p1, p2, p3, p4);
53}
54EXPORT_SYMBOL(xor_blocks);
55
56/* Set of all registered templates. */
57static struct xor_block_template *template_list;
58
59#define BENCH_SIZE (PAGE_SIZE)
60
61static void
62do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
63{
64 int speed;
65 unsigned long now;
66 int i, count, max;
67
68 tmpl->next = template_list;
69 template_list = tmpl;
70
71 /*
72 * Count the number of XORs done during a whole jiffy, and use
73 * this to calculate the speed of checksumming. We use a 2-page
74 * allocation to have guaranteed color L1-cache layout.
75 */
76 max = 0;
77 for (i = 0; i < 5; i++) {
78 now = jiffies;
79 count = 0;
80 while (jiffies == now) {
81 mb(); /* prevent loop optimzation */
82 tmpl->do_2(BENCH_SIZE, b1, b2);
83 mb();
84 count++;
85 mb();
86 }
87 if (count > max)
88 max = count;
89 }
90
91 speed = max * (HZ * BENCH_SIZE / 1024);
92 tmpl->speed = speed;
93
94 printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name,
95 speed / 1000, speed % 1000);
96}
97
98static int __init
99calibrate_xor_blocks(void)
100{
101 void *b1, *b2;
102 struct xor_block_template *f, *fastest;
103
104 b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
105 if (!b1) {
106 printk(KERN_WARNING "xor: Yikes! No memory available.\n");
107 return -ENOMEM;
108 }
109 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
110
111 /*
112 * If this arch/cpu has a short-circuited selection, don't loop through
113 * all the possible functions, just test the best one
114 */
115
116 fastest = NULL;
117
118#ifdef XOR_SELECT_TEMPLATE
119 fastest = XOR_SELECT_TEMPLATE(fastest);
120#endif
121
122#define xor_speed(templ) do_xor_speed((templ), b1, b2)
123
124 if (fastest) {
125 printk(KERN_INFO "xor: automatically using best "
126 "checksumming function: %s\n",
127 fastest->name);
128 xor_speed(fastest);
129 } else {
130 printk(KERN_INFO "xor: measuring software checksum speed\n");
131 XOR_TRY_TEMPLATES;
132 fastest = template_list;
133 for (f = fastest; f; f = f->next)
134 if (f->speed > fastest->speed)
135 fastest = f;
136 }
137
138 printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
139 fastest->name, fastest->speed / 1000, fastest->speed % 1000);
140
141#undef xor_speed
142
143 free_pages((unsigned long)b1, 2);
144
145 active_template = fastest;
146 return 0;
147}
148
149static __exit void xor_exit(void) { }
150
151MODULE_LICENSE("GPL");
152
153/* when built-in xor.o must initialize before drivers/md/md.o */
154core_initcall(calibrate_xor_blocks);
155module_exit(xor_exit);