diff options
-rw-r--r-- | crypto/async_tx/async_tx.c | 146 | ||||
-rw-r--r-- | drivers/dma/dmaengine.c | 168 | ||||
-rw-r--r-- | include/linux/dmaengine.h | 3 |
3 files changed, 174 insertions, 143 deletions
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 43fe4cbe71e6..b88bb1f608fc 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c | |||
@@ -38,25 +38,10 @@ static struct dma_client async_tx_dma = { | |||
38 | }; | 38 | }; |
39 | 39 | ||
40 | /** | 40 | /** |
41 | * dma_cap_mask_all - enable iteration over all operation types | ||
42 | */ | ||
43 | static dma_cap_mask_t dma_cap_mask_all; | ||
44 | |||
45 | /** | ||
46 | * chan_ref_percpu - tracks channel allocations per core/opertion | ||
47 | */ | ||
48 | struct chan_ref_percpu { | ||
49 | struct dma_chan_ref *ref; | ||
50 | }; | ||
51 | |||
52 | static int channel_table_initialized; | ||
53 | static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END]; | ||
54 | |||
55 | /** | ||
56 | * async_tx_lock - protect modification of async_tx_master_list and serialize | 41 | * async_tx_lock - protect modification of async_tx_master_list and serialize |
57 | * rebalance operations | 42 | * rebalance operations |
58 | */ | 43 | */ |
59 | static spinlock_t async_tx_lock; | 44 | static DEFINE_SPINLOCK(async_tx_lock); |
60 | 45 | ||
61 | static LIST_HEAD(async_tx_master_list); | 46 | static LIST_HEAD(async_tx_master_list); |
62 | 47 | ||
@@ -89,85 +74,6 @@ init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan) | |||
89 | atomic_set(&ref->count, 0); | 74 | atomic_set(&ref->count, 0); |
90 | } | 75 | } |
91 | 76 | ||
92 | /** | ||
93 | * get_chan_ref_by_cap - returns the nth channel of the given capability | ||
94 | * defaults to returning the channel with the desired capability and the | ||
95 | * lowest reference count if the index can not be satisfied | ||
96 | * @cap: capability to match | ||
97 | * @index: nth channel desired, passing -1 has the effect of forcing the | ||
98 | * default return value | ||
99 | */ | ||
100 | static struct dma_chan_ref * | ||
101 | get_chan_ref_by_cap(enum dma_transaction_type cap, int index) | ||
102 | { | ||
103 | struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref; | ||
104 | |||
105 | rcu_read_lock(); | ||
106 | list_for_each_entry_rcu(ref, &async_tx_master_list, node) | ||
107 | if (dma_has_cap(cap, ref->chan->device->cap_mask)) { | ||
108 | if (!min_ref) | ||
109 | min_ref = ref; | ||
110 | else if (atomic_read(&ref->count) < | ||
111 | atomic_read(&min_ref->count)) | ||
112 | min_ref = ref; | ||
113 | |||
114 | if (index-- == 0) { | ||
115 | ret_ref = ref; | ||
116 | break; | ||
117 | } | ||
118 | } | ||
119 | rcu_read_unlock(); | ||
120 | |||
121 | if (!ret_ref) | ||
122 | ret_ref = min_ref; | ||
123 | |||
124 | if (ret_ref) | ||
125 | atomic_inc(&ret_ref->count); | ||
126 | |||
127 | return ret_ref; | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * async_tx_rebalance - redistribute the available channels, optimize | ||
132 | * for cpu isolation in the SMP case, and opertaion isolation in the | ||
133 | * uniprocessor case | ||
134 | */ | ||
135 | static void async_tx_rebalance(void) | ||
136 | { | ||
137 | int cpu, cap, cpu_idx = 0; | ||
138 | unsigned long flags; | ||
139 | |||
140 | if (!channel_table_initialized) | ||
141 | return; | ||
142 | |||
143 | spin_lock_irqsave(&async_tx_lock, flags); | ||
144 | |||
145 | /* undo the last distribution */ | ||
146 | for_each_dma_cap_mask(cap, dma_cap_mask_all) | ||
147 | for_each_possible_cpu(cpu) { | ||
148 | struct dma_chan_ref *ref = | ||
149 | per_cpu_ptr(channel_table[cap], cpu)->ref; | ||
150 | if (ref) { | ||
151 | atomic_set(&ref->count, 0); | ||
152 | per_cpu_ptr(channel_table[cap], cpu)->ref = | ||
153 | NULL; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | for_each_dma_cap_mask(cap, dma_cap_mask_all) | ||
158 | for_each_online_cpu(cpu) { | ||
159 | struct dma_chan_ref *new; | ||
160 | if (NR_CPUS > 1) | ||
161 | new = get_chan_ref_by_cap(cap, cpu_idx++); | ||
162 | else | ||
163 | new = get_chan_ref_by_cap(cap, -1); | ||
164 | |||
165 | per_cpu_ptr(channel_table[cap], cpu)->ref = new; | ||
166 | } | ||
167 | |||
168 | spin_unlock_irqrestore(&async_tx_lock, flags); | ||
169 | } | ||
170 | |||
171 | static enum dma_state_client | 77 | static enum dma_state_client |
172 | dma_channel_add_remove(struct dma_client *client, | 78 | dma_channel_add_remove(struct dma_client *client, |
173 | struct dma_chan *chan, enum dma_state state) | 79 | struct dma_chan *chan, enum dma_state state) |
@@ -211,8 +117,6 @@ dma_channel_add_remove(struct dma_client *client, | |||
211 | " (-ENOMEM)\n"); | 117 | " (-ENOMEM)\n"); |
212 | return 0; | 118 | return 0; |
213 | } | 119 | } |
214 | |||
215 | async_tx_rebalance(); | ||
216 | break; | 120 | break; |
217 | case DMA_RESOURCE_REMOVED: | 121 | case DMA_RESOURCE_REMOVED: |
218 | found = 0; | 122 | found = 0; |
@@ -233,8 +137,6 @@ dma_channel_add_remove(struct dma_client *client, | |||
233 | ack = DMA_ACK; | 137 | ack = DMA_ACK; |
234 | else | 138 | else |
235 | break; | 139 | break; |
236 | |||
237 | async_tx_rebalance(); | ||
238 | break; | 140 | break; |
239 | case DMA_RESOURCE_SUSPEND: | 141 | case DMA_RESOURCE_SUSPEND: |
240 | case DMA_RESOURCE_RESUME: | 142 | case DMA_RESOURCE_RESUME: |
@@ -248,51 +150,18 @@ dma_channel_add_remove(struct dma_client *client, | |||
248 | return ack; | 150 | return ack; |
249 | } | 151 | } |
250 | 152 | ||
251 | static int __init | 153 | static int __init async_tx_init(void) |
252 | async_tx_init(void) | ||
253 | { | 154 | { |
254 | enum dma_transaction_type cap; | ||
255 | |||
256 | spin_lock_init(&async_tx_lock); | ||
257 | bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); | ||
258 | |||
259 | /* an interrupt will never be an explicit operation type. | ||
260 | * clearing this bit prevents allocation to a slot in 'channel_table' | ||
261 | */ | ||
262 | clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); | ||
263 | |||
264 | for_each_dma_cap_mask(cap, dma_cap_mask_all) { | ||
265 | channel_table[cap] = alloc_percpu(struct chan_ref_percpu); | ||
266 | if (!channel_table[cap]) | ||
267 | goto err; | ||
268 | } | ||
269 | |||
270 | channel_table_initialized = 1; | ||
271 | dma_async_client_register(&async_tx_dma); | 155 | dma_async_client_register(&async_tx_dma); |
272 | dma_async_client_chan_request(&async_tx_dma); | 156 | dma_async_client_chan_request(&async_tx_dma); |
273 | 157 | ||
274 | printk(KERN_INFO "async_tx: api initialized (async)\n"); | 158 | printk(KERN_INFO "async_tx: api initialized (async)\n"); |
275 | 159 | ||
276 | return 0; | 160 | return 0; |
277 | err: | ||
278 | printk(KERN_ERR "async_tx: initialization failure\n"); | ||
279 | |||
280 | while (--cap >= 0) | ||
281 | free_percpu(channel_table[cap]); | ||
282 | |||
283 | return 1; | ||
284 | } | 161 | } |
285 | 162 | ||
286 | static void __exit async_tx_exit(void) | 163 | static void __exit async_tx_exit(void) |
287 | { | 164 | { |
288 | enum dma_transaction_type cap; | ||
289 | |||
290 | channel_table_initialized = 0; | ||
291 | |||
292 | for_each_dma_cap_mask(cap, dma_cap_mask_all) | ||
293 | if (channel_table[cap]) | ||
294 | free_percpu(channel_table[cap]); | ||
295 | |||
296 | dma_async_client_unregister(&async_tx_dma); | 165 | dma_async_client_unregister(&async_tx_dma); |
297 | } | 166 | } |
298 | 167 | ||
@@ -308,16 +177,9 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | |||
308 | { | 177 | { |
309 | /* see if we can keep the chain on one channel */ | 178 | /* see if we can keep the chain on one channel */ |
310 | if (depend_tx && | 179 | if (depend_tx && |
311 | dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) | 180 | dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) |
312 | return depend_tx->chan; | 181 | return depend_tx->chan; |
313 | else if (likely(channel_table_initialized)) { | 182 | return dma_find_channel(tx_type); |
314 | struct dma_chan_ref *ref; | ||
315 | int cpu = get_cpu(); | ||
316 | ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref; | ||
317 | put_cpu(); | ||
318 | return ref ? ref->chan : NULL; | ||
319 | } else | ||
320 | return NULL; | ||
321 | } | 183 | } |
322 | EXPORT_SYMBOL_GPL(__async_tx_find_channel); | 184 | EXPORT_SYMBOL_GPL(__async_tx_find_channel); |
323 | #else | 185 | #else |
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index d4d925912c47..87a8cd4791ed 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c | |||
@@ -295,6 +295,164 @@ static void dma_chan_release(struct dma_chan *chan) | |||
295 | } | 295 | } |
296 | 296 | ||
297 | /** | 297 | /** |
298 | * dma_cap_mask_all - enable iteration over all operation types | ||
299 | */ | ||
300 | static dma_cap_mask_t dma_cap_mask_all; | ||
301 | |||
302 | /** | ||
303 | * dma_chan_tbl_ent - tracks channel allocations per core/operation | ||
304 | * @chan - associated channel for this entry | ||
305 | */ | ||
306 | struct dma_chan_tbl_ent { | ||
307 | struct dma_chan *chan; | ||
308 | }; | ||
309 | |||
310 | /** | ||
311 | * channel_table - percpu lookup table for memory-to-memory offload providers | ||
312 | */ | ||
313 | static struct dma_chan_tbl_ent *channel_table[DMA_TX_TYPE_END]; | ||
314 | |||
315 | static int __init dma_channel_table_init(void) | ||
316 | { | ||
317 | enum dma_transaction_type cap; | ||
318 | int err = 0; | ||
319 | |||
320 | bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); | ||
321 | |||
322 | /* 'interrupt' and 'slave' are channel capabilities, but are not | ||
323 | * associated with an operation so they do not need an entry in the | ||
324 | * channel_table | ||
325 | */ | ||
326 | clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); | ||
327 | clear_bit(DMA_SLAVE, dma_cap_mask_all.bits); | ||
328 | |||
329 | for_each_dma_cap_mask(cap, dma_cap_mask_all) { | ||
330 | channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent); | ||
331 | if (!channel_table[cap]) { | ||
332 | err = -ENOMEM; | ||
333 | break; | ||
334 | } | ||
335 | } | ||
336 | |||
337 | if (err) { | ||
338 | pr_err("dmaengine: initialization failure\n"); | ||
339 | for_each_dma_cap_mask(cap, dma_cap_mask_all) | ||
340 | if (channel_table[cap]) | ||
341 | free_percpu(channel_table[cap]); | ||
342 | } | ||
343 | |||
344 | return err; | ||
345 | } | ||
346 | subsys_initcall(dma_channel_table_init); | ||
347 | |||
348 | /** | ||
349 | * dma_find_channel - find a channel to carry out the operation | ||
350 | * @tx_type: transaction type | ||
351 | */ | ||
352 | struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type) | ||
353 | { | ||
354 | struct dma_chan *chan; | ||
355 | int cpu; | ||
356 | |||
357 | WARN_ONCE(dmaengine_ref_count == 0, | ||
358 | "client called %s without a reference", __func__); | ||
359 | |||
360 | cpu = get_cpu(); | ||
361 | chan = per_cpu_ptr(channel_table[tx_type], cpu)->chan; | ||
362 | put_cpu(); | ||
363 | |||
364 | return chan; | ||
365 | } | ||
366 | EXPORT_SYMBOL(dma_find_channel); | ||
367 | |||
368 | /** | ||
369 | * nth_chan - returns the nth channel of the given capability | ||
370 | * @cap: capability to match | ||
371 | * @n: nth channel desired | ||
372 | * | ||
373 | * Defaults to returning the channel with the desired capability and the | ||
374 | * lowest reference count when 'n' cannot be satisfied. Must be called | ||
375 | * under dma_list_mutex. | ||
376 | */ | ||
377 | static struct dma_chan *nth_chan(enum dma_transaction_type cap, int n) | ||
378 | { | ||
379 | struct dma_device *device; | ||
380 | struct dma_chan *chan; | ||
381 | struct dma_chan *ret = NULL; | ||
382 | struct dma_chan *min = NULL; | ||
383 | |||
384 | list_for_each_entry(device, &dma_device_list, global_node) { | ||
385 | if (!dma_has_cap(cap, device->cap_mask)) | ||
386 | continue; | ||
387 | list_for_each_entry(chan, &device->channels, device_node) { | ||
388 | if (!chan->client_count) | ||
389 | continue; | ||
390 | if (!min) | ||
391 | min = chan; | ||
392 | else if (chan->table_count < min->table_count) | ||
393 | min = chan; | ||
394 | |||
395 | if (n-- == 0) { | ||
396 | ret = chan; | ||
397 | break; /* done */ | ||
398 | } | ||
399 | } | ||
400 | if (ret) | ||
401 | break; /* done */ | ||
402 | } | ||
403 | |||
404 | if (!ret) | ||
405 | ret = min; | ||
406 | |||
407 | if (ret) | ||
408 | ret->table_count++; | ||
409 | |||
410 | return ret; | ||
411 | } | ||
412 | |||
413 | /** | ||
414 | * dma_channel_rebalance - redistribute the available channels | ||
415 | * | ||
416 | * Optimize for cpu isolation (each cpu gets a dedicated channel for an | ||
417 | * operation type) in the SMP case, and operation isolation (avoid | ||
418 | * multi-tasking channels) in the non-SMP case. Must be called under | ||
419 | * dma_list_mutex. | ||
420 | */ | ||
421 | static void dma_channel_rebalance(void) | ||
422 | { | ||
423 | struct dma_chan *chan; | ||
424 | struct dma_device *device; | ||
425 | int cpu; | ||
426 | int cap; | ||
427 | int n; | ||
428 | |||
429 | /* undo the last distribution */ | ||
430 | for_each_dma_cap_mask(cap, dma_cap_mask_all) | ||
431 | for_each_possible_cpu(cpu) | ||
432 | per_cpu_ptr(channel_table[cap], cpu)->chan = NULL; | ||
433 | |||
434 | list_for_each_entry(device, &dma_device_list, global_node) | ||
435 | list_for_each_entry(chan, &device->channels, device_node) | ||
436 | chan->table_count = 0; | ||
437 | |||
438 | /* don't populate the channel_table if no clients are available */ | ||
439 | if (!dmaengine_ref_count) | ||
440 | return; | ||
441 | |||
442 | /* redistribute available channels */ | ||
443 | n = 0; | ||
444 | for_each_dma_cap_mask(cap, dma_cap_mask_all) | ||
445 | for_each_online_cpu(cpu) { | ||
446 | if (num_possible_cpus() > 1) | ||
447 | chan = nth_chan(cap, n++); | ||
448 | else | ||
449 | chan = nth_chan(cap, -1); | ||
450 | |||
451 | per_cpu_ptr(channel_table[cap], cpu)->chan = chan; | ||
452 | } | ||
453 | } | ||
454 | |||
455 | /** | ||
298 | * dma_chans_notify_available - broadcast available channels to the clients | 456 | * dma_chans_notify_available - broadcast available channels to the clients |
299 | */ | 457 | */ |
300 | static void dma_clients_notify_available(void) | 458 | static void dma_clients_notify_available(void) |
@@ -339,7 +497,12 @@ void dma_async_client_register(struct dma_client *client) | |||
339 | dev_name(&chan->dev), err); | 497 | dev_name(&chan->dev), err); |
340 | } | 498 | } |
341 | 499 | ||
342 | 500 | /* if this is the first reference and there were channels | |
501 | * waiting we need to rebalance to get those channels | ||
502 | * incorporated into the channel table | ||
503 | */ | ||
504 | if (dmaengine_ref_count == 1) | ||
505 | dma_channel_rebalance(); | ||
343 | list_add_tail(&client->global_node, &dma_client_list); | 506 | list_add_tail(&client->global_node, &dma_client_list); |
344 | mutex_unlock(&dma_list_mutex); | 507 | mutex_unlock(&dma_list_mutex); |
345 | } | 508 | } |
@@ -473,6 +636,7 @@ int dma_async_device_register(struct dma_device *device) | |||
473 | } | 636 | } |
474 | } | 637 | } |
475 | list_add_tail(&device->global_node, &dma_device_list); | 638 | list_add_tail(&device->global_node, &dma_device_list); |
639 | dma_channel_rebalance(); | ||
476 | mutex_unlock(&dma_list_mutex); | 640 | mutex_unlock(&dma_list_mutex); |
477 | 641 | ||
478 | dma_clients_notify_available(); | 642 | dma_clients_notify_available(); |
@@ -514,6 +678,7 @@ void dma_async_device_unregister(struct dma_device *device) | |||
514 | 678 | ||
515 | mutex_lock(&dma_list_mutex); | 679 | mutex_lock(&dma_list_mutex); |
516 | list_del(&device->global_node); | 680 | list_del(&device->global_node); |
681 | dma_channel_rebalance(); | ||
517 | mutex_unlock(&dma_list_mutex); | 682 | mutex_unlock(&dma_list_mutex); |
518 | 683 | ||
519 | list_for_each_entry(chan, &device->channels, device_node) { | 684 | list_for_each_entry(chan, &device->channels, device_node) { |
@@ -768,3 +933,4 @@ static int __init dma_bus_init(void) | |||
768 | } | 933 | } |
769 | subsys_initcall(dma_bus_init); | 934 | subsys_initcall(dma_bus_init); |
770 | 935 | ||
936 | |||
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d18d37d1015d..b466f02e2433 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h | |||
@@ -182,6 +182,7 @@ struct dma_chan_percpu { | |||
182 | * @device_node: used to add this to the device chan list | 182 | * @device_node: used to add this to the device chan list |
183 | * @local: per-cpu pointer to a struct dma_chan_percpu | 183 | * @local: per-cpu pointer to a struct dma_chan_percpu |
184 | * @client-count: how many clients are using this channel | 184 | * @client-count: how many clients are using this channel |
185 | * @table_count: number of appearances in the mem-to-mem allocation table | ||
185 | */ | 186 | */ |
186 | struct dma_chan { | 187 | struct dma_chan { |
187 | struct dma_device *device; | 188 | struct dma_device *device; |
@@ -198,6 +199,7 @@ struct dma_chan { | |||
198 | struct list_head device_node; | 199 | struct list_head device_node; |
199 | struct dma_chan_percpu *local; | 200 | struct dma_chan_percpu *local; |
200 | int client_count; | 201 | int client_count; |
202 | int table_count; | ||
201 | }; | 203 | }; |
202 | 204 | ||
203 | #define to_dma_chan(p) container_of(p, struct dma_chan, dev) | 205 | #define to_dma_chan(p) container_of(p, struct dma_chan, dev) |
@@ -468,6 +470,7 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript | |||
468 | int dma_async_device_register(struct dma_device *device); | 470 | int dma_async_device_register(struct dma_device *device); |
469 | void dma_async_device_unregister(struct dma_device *device); | 471 | void dma_async_device_unregister(struct dma_device *device); |
470 | void dma_run_dependencies(struct dma_async_tx_descriptor *tx); | 472 | void dma_run_dependencies(struct dma_async_tx_descriptor *tx); |
473 | struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); | ||
471 | 474 | ||
472 | /* --- Helper iov-locking functions --- */ | 475 | /* --- Helper iov-locking functions --- */ |
473 | 476 | ||